diff options
Diffstat (limited to 'qa')
3788 files changed, 185755 insertions, 0 deletions
diff --git a/qa/.gitignore b/qa/.gitignore new file mode 100644 index 000000000..e80d9d42a --- /dev/null +++ b/qa/.gitignore @@ -0,0 +1,4 @@ +*~ +.*.sw[nmop] +*.pyc +.tox @@ -0,0 +1 @@ +.
\ No newline at end of file diff --git a/qa/CMakeLists.txt b/qa/CMakeLists.txt new file mode 100644 index 000000000..a8726141b --- /dev/null +++ b/qa/CMakeLists.txt @@ -0,0 +1,9 @@ +set(CEPH_BUILD_VIRTUALENV $ENV{TMPDIR}) +if(NOT CEPH_BUILD_VIRTUALENV) + set(CEPH_BUILD_VIRTUALENV ${CMAKE_BINARY_DIR}) +endif() + +if(WITH_TESTS) + include(AddCephTest) + add_tox_test(qa TOX_ENVS flake8 mypy deadsymlinks) +endif() diff --git a/qa/Makefile b/qa/Makefile new file mode 100644 index 000000000..ad655b7e7 --- /dev/null +++ b/qa/Makefile @@ -0,0 +1,4 @@ +DIRS= workunits btrfs + +all: + for d in $(DIRS) ; do ( cd $$d ; $(MAKE) all ) ; done diff --git a/qa/README b/qa/README new file mode 100644 index 000000000..f9b8988c6 --- /dev/null +++ b/qa/README @@ -0,0 +1,85 @@ +ceph-qa-suite +------------- + +clusters/ - some predefined cluster layouts +suites/ - set suite + +The suites directory has a hierarchical collection of tests. This can be +freeform, but generally follows the convention of + + suites/<test suite name>/<test group>/... + +A test is described by a yaml fragment. + +A test can exist as a single .yaml file in the directory tree. For example: + + suites/foo/one.yaml + suites/foo/two.yaml + +is a simple group of two tests. + +A directory with a magic '+' file represents a test that combines all +other items in the directory into a single yaml fragment. For example: + + suites/foo/bar/+ + suites/foo/bar/a.yaml + suites/foo/bar/b.yaml + suites/foo/bar/c.yaml + +is a single test consisting of a + b + c. + +A directory with a magic '%' file represents a test matrix formed from +all other items in the directory. For example, + + suites/baz/% + suites/baz/a.yaml + suites/baz/b/b1.yaml + suites/baz/b/b2.yaml + suites/baz/c.yaml + suites/baz/d/d1.yaml + suites/baz/d/d2.yaml + +is a 4-dimensional test matrix. Two dimensions (a, c) are trivial (1 +item), so this is really 2x2 = 4 tests, which are + + a + b1 + c + d1 + a + b1 + c + d2 + a + b2 + c + d1 + a + b2 + c + d2 + +A directory with a magic '$' file, or a directory whose name ends with '$', +represents a test where one of the non-magic items is chosen randomly. For +example, both + + suites/foo/$ + suites/foo/a.yaml + suites/foo/b.yaml + suites/foo/c.yaml + +and + + suites/foo$/a.yaml + suites/foo$/b.yaml + suites/foo$/c.yaml + +is a single test, either a, b or c. This can be used in conjunction with the +'%' file in the same (see below) or other directories to run a series of tests +without causing an unwanted increase in the total number of jobs run. + +Symlinks are okay. + +One particular use of symlinks is to combine '%' and the latter form of '$' +feature. Consider supported_distros directory containing fragments that define +os_type and os_version: + + supported_distros/% + supported_distros/centos.yaml + supported_distros/rhel.yaml + supported_distros/ubuntu.yaml + +A test that links supported_distros as distros (a name that doesn't end with +'$') will be run three times: on centos, rhel and ubuntu. A test that links +supported_distros as distros$ will be run just once: either on centos, rhel or +ubuntu, chosen randomly. + +The teuthology code can be found in https://github.com/ceph/teuthology.git diff --git a/qa/archs/aarch64.yaml b/qa/archs/aarch64.yaml new file mode 100644 index 000000000..6399b9959 --- /dev/null +++ b/qa/archs/aarch64.yaml @@ -0,0 +1 @@ +arch: aarch64 diff --git a/qa/archs/armv7.yaml b/qa/archs/armv7.yaml new file mode 100644 index 000000000..c261ebd52 --- /dev/null +++ b/qa/archs/armv7.yaml @@ -0,0 +1 @@ +arch: armv7l diff --git a/qa/archs/i686.yaml b/qa/archs/i686.yaml new file mode 100644 index 000000000..a920e5a9e --- /dev/null +++ b/qa/archs/i686.yaml @@ -0,0 +1 @@ +arch: i686 diff --git a/qa/archs/x86_64.yaml b/qa/archs/x86_64.yaml new file mode 100644 index 000000000..c2409f5d0 --- /dev/null +++ b/qa/archs/x86_64.yaml @@ -0,0 +1 @@ +arch: x86_64 diff --git a/qa/btrfs/.gitignore b/qa/btrfs/.gitignore new file mode 100644 index 000000000..530c1b5b4 --- /dev/null +++ b/qa/btrfs/.gitignore @@ -0,0 +1,3 @@ +/clone_range +/test_async_snap +/create_async_snap diff --git a/qa/btrfs/Makefile b/qa/btrfs/Makefile new file mode 100644 index 000000000..be95ecfd3 --- /dev/null +++ b/qa/btrfs/Makefile @@ -0,0 +1,11 @@ +CFLAGS = -Wall -Wextra -D_GNU_SOURCE + +TARGETS = clone_range test_async_snap create_async_snap + +.c: + $(CC) $(CFLAGS) $@.c -o $@ + +all: $(TARGETS) + +clean: + rm $(TARGETS) diff --git a/qa/btrfs/clone_range.c b/qa/btrfs/clone_range.c new file mode 100644 index 000000000..0a88e1601 --- /dev/null +++ b/qa/btrfs/clone_range.c @@ -0,0 +1,35 @@ +#include <fcntl.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <string.h> + +#include <linux/types.h> +#include "../../src/os/btrfs_ioctl.h" +#include <stdio.h> +#include <errno.h> + +int main(int argc, char **argv) +{ + struct btrfs_ioctl_clone_range_args ca; + int dfd; + int r; + + if (argc < 6) { + printf("usage: %s <srcfn> <srcoffset> <srclen> <destfn> <destoffset>\n", argv[0]); + exit(1); + } + + ca.src_fd = open(argv[1], O_RDONLY); + ca.src_offset = atoi(argv[2]); + ca.src_length = atoi(argv[3]); + dfd = open(argv[4], O_WRONLY|O_CREAT); + ca.dest_offset = atoi(argv[5]); + + r = ioctl(dfd, BTRFS_IOC_CLONE_RANGE, &ca); + printf("clone_range %s %lld %lld~%lld to %s %d %lld = %d %s\n", + argv[1], ca.src_fd, + ca.src_offset, ca.src_length, + argv[4], dfd, + ca.dest_offset, r, strerror(errno)); + return r; +} diff --git a/qa/btrfs/create_async_snap.c b/qa/btrfs/create_async_snap.c new file mode 100644 index 000000000..2ef22af7b --- /dev/null +++ b/qa/btrfs/create_async_snap.c @@ -0,0 +1,34 @@ +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <string.h> + +#include <linux/ioctl.h> +#include <linux/types.h> +#include "../../src/os/btrfs_ioctl.h" + +struct btrfs_ioctl_vol_args_v2 va; + +int main(int argc, char **argv) +{ + int fd; + int r; + + if (argc != 3) { + printf("usage: %s <source subvol> <name>\n", argv[0]); + return 1; + } + printf("creating snap ./%s from %s\n", argv[2], argv[1]); + fd = open(".", O_RDONLY); + va.fd = open(argv[1], O_RDONLY); + va.flags = BTRFS_SUBVOL_CREATE_ASYNC; + strcpy(va.name, argv[2]); + r = ioctl(fd, BTRFS_IOC_SNAP_CREATE_V2, (unsigned long long)&va); + printf("result %d\n", r ? -errno:0); + return r; +} diff --git a/qa/btrfs/test_async_snap.c b/qa/btrfs/test_async_snap.c new file mode 100644 index 000000000..211be95a6 --- /dev/null +++ b/qa/btrfs/test_async_snap.c @@ -0,0 +1,83 @@ +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <string.h> + +#include <linux/ioctl.h> +#include <linux/types.h> +#include "../../src/os/btrfs_ioctl.h" + +struct btrfs_ioctl_vol_args_v2 va; +struct btrfs_ioctl_vol_args vold; +int max = 4; + +void check_return(int r) +{ + if (r < 0) { + printf("********* failed with %d %s ********\n", errno, strerror(errno)); + exit(1); + } +} + +int main(int argc, char **argv) +{ + int num = 1000; + + if (argc > 1) + num = atoi(argv[1]); + printf("will do %d iterations\n", num); + + int cwd = open(".", O_RDONLY); + printf("cwd = %d\n", cwd); + while (num-- > 0) { + if (rand() % 10 == 0) { + __u64 transid; + int r; + printf("sync starting\n"); + r = ioctl(cwd, BTRFS_IOC_START_SYNC, &transid); + check_return(r); + printf("sync started, transid %lld, waiting\n", transid); + r = ioctl(cwd, BTRFS_IOC_WAIT_SYNC, &transid); + check_return(r); + printf("sync finished\n"); + } + + int i = rand() % max; + struct stat st; + va.fd = cwd; + sprintf(va.name, "test.%d", i); + va.transid = 0; + int r = stat(va.name, &st); + if (r < 0) { + if (rand() % 3 == 0) { + printf("snap create (sync) %s\n", va.name); + va.flags = 0; + r = ioctl(cwd, BTRFS_IOC_SNAP_CREATE_V2, &va); + check_return(r); + } else { + printf("snap create (async) %s\n", va.name); + va.flags = BTRFS_SUBVOL_CREATE_ASYNC; + r = ioctl(cwd, BTRFS_IOC_SNAP_CREATE_V2, &va); + check_return(r); + printf("snap created, transid %lld\n", va.transid); + if (rand() % 2 == 0) { + printf("waiting for async snap create\n"); + r = ioctl(cwd, BTRFS_IOC_WAIT_SYNC, &va.transid); + check_return(r); + } + } + } else { + printf("snap remove %s\n", va.name); + vold.fd = va.fd; + strcpy(vold.name, va.name); + r = ioctl(cwd, BTRFS_IOC_SNAP_DESTROY, &vold); + check_return(r); + } + } + return 0; +} diff --git a/qa/btrfs/test_rmdir_async_snap.c b/qa/btrfs/test_rmdir_async_snap.c new file mode 100644 index 000000000..5dafaacaa --- /dev/null +++ b/qa/btrfs/test_rmdir_async_snap.c @@ -0,0 +1,62 @@ +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <string.h> + +#include <linux/ioctl.h> +#include <linux/types.h> +#include "../../src/os/btrfs_ioctl.h" + +struct btrfs_ioctl_vol_args_v2 va; +struct btrfs_ioctl_vol_args vold; + +int main(int argc, char **argv) +{ + int num = 1000; + int i, r, fd; + char buf[30]; + + if (argc > 1) + num = atoi(argv[1]); + printf("will do %d iterations\n", num); + + fd = open(".", O_RDONLY); + vold.fd = 0; + strcpy(vold.name, "current"); + r = ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, (unsigned long int)&vold); + printf("create current ioctl got %d\n", r ? errno:0); + if (r) + return 1; + + for (i=0; i<num; i++) { + sprintf(buf, "current/dir.%d", i); + r = mkdir(buf, 0755); + printf("mkdir got %d\n", r ? errno:0); + if (r) + return 1; + } + + va.fd = open("current", O_RDONLY); + va.flags = BTRFS_SUBVOL_CREATE_ASYNC; + for (i=0; i<num; i++) { + system("/bin/cp /boot/vmlinuz-3.2.0-ceph-00142-g9e98323 current/foo"); + sprintf(buf, "current/dir.%d", i); + r = rmdir(buf); + printf("rmdir got %d\n", r ? errno:0); + if (r) + return 1; + + if (i % 10) continue; + sprintf(va.name, "snap.%d", i); + r = ioctl(fd, BTRFS_IOC_SNAP_CREATE_V2, (unsigned long long)&va); + printf("ioctl got %d\n", r ? errno:0); + if (r) + return 1; + } + return 0; +} diff --git a/qa/cephfs/.qa b/qa/cephfs/.qa new file mode 120000 index 000000000..b870225aa --- /dev/null +++ b/qa/cephfs/.qa @@ -0,0 +1 @@ +../
\ No newline at end of file diff --git a/qa/cephfs/begin/+ b/qa/cephfs/begin/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/cephfs/begin/+ diff --git a/qa/cephfs/begin/0-install.yaml b/qa/cephfs/begin/0-install.yaml new file mode 100644 index 000000000..413958f11 --- /dev/null +++ b/qa/cephfs/begin/0-install.yaml @@ -0,0 +1,65 @@ +tasks: + - install: + extra_packages: + rpm: + - python3-cephfs + - cephfs-top + - cephfs-mirror + deb: + - python3-cephfs + - cephfs-shell + - cephfs-top + - cephfs-mirror + # For kernel_untar_build workunit + extra_system_packages: + deb: + - bison + - flex + - libelf-dev + - libssl-dev + - network-manager + - iproute2 + - util-linux + # for xfstests-dev + - dump + - indent + # for fsx + - libaio-dev + - libtool-bin + - uuid-dev + - xfslibs-dev + # for postgres + - postgresql + - postgresql-client + - postgresql-common + - postgresql-contrib + rpm: + - bison + - flex + - elfutils-libelf-devel + - openssl-devel + - NetworkManager + - iproute + - util-linux + # for xfstests-dev + - libacl-devel + - libaio-devel + - libattr-devel + - libtool + - libuuid-devel + - xfsdump + - xfsprogs + - xfsprogs-devel + # for fsx + - libaio-devel + - libtool + - libuuid-devel + - xfsprogs-devel + # for postgres + - postgresql + - postgresql-server + - postgresql-contrib +syslog: + ignorelist: + - WARNING*.*check_session_state + - WARNING*.*__ceph_remove_cap diff --git a/qa/cephfs/begin/1-ceph.yaml b/qa/cephfs/begin/1-ceph.yaml new file mode 100644 index 000000000..531c8e3e0 --- /dev/null +++ b/qa/cephfs/begin/1-ceph.yaml @@ -0,0 +1,6 @@ +log-rotate: + ceph-mds: 10G + ceph-osd: 10G +tasks: + - ceph: + create_rbd_pool: false diff --git a/qa/cephfs/begin/2-logrotate.yaml b/qa/cephfs/begin/2-logrotate.yaml new file mode 100644 index 000000000..3b876f13b --- /dev/null +++ b/qa/cephfs/begin/2-logrotate.yaml @@ -0,0 +1,3 @@ +log-rotate: + ceph-mds: 10G + ceph-osd: 10G diff --git a/qa/cephfs/clusters/.qa b/qa/cephfs/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/clusters/1-mds-1-client-coloc.yaml b/qa/cephfs/clusters/1-mds-1-client-coloc.yaml new file mode 100644 index 000000000..d295dc3d3 --- /dev/null +++ b/qa/cephfs/clusters/1-mds-1-client-coloc.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3, client.0] +- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1-mds-1-client-micro.yaml b/qa/cephfs/clusters/1-mds-1-client-micro.yaml new file mode 100644 index 000000000..8b66c3906 --- /dev/null +++ b/qa/cephfs/clusters/1-mds-1-client-micro.yaml @@ -0,0 +1,7 @@ +roles: +- [mon.a, mon.b, mon.c, mgr.x, mds.a, osd.0, osd.1, osd.2, osd.3] +- [client.0] +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/cephfs/clusters/1-mds-1-client.yaml b/qa/cephfs/clusters/1-mds-1-client.yaml new file mode 100644 index 000000000..d7701815f --- /dev/null +++ b/qa/cephfs/clusters/1-mds-1-client.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.a, mgr.y, mds.a, mds.c, osd.0, osd.1, osd.2, osd.3] +- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7] +- [client.0] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1-mds-2-client-coloc.yaml b/qa/cephfs/clusters/1-mds-2-client-coloc.yaml new file mode 100644 index 000000000..5ce128cbd --- /dev/null +++ b/qa/cephfs/clusters/1-mds-2-client-coloc.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3, client.0] +- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7, client.1] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1-mds-2-client-micro.yaml b/qa/cephfs/clusters/1-mds-2-client-micro.yaml new file mode 100644 index 000000000..42ad4efcd --- /dev/null +++ b/qa/cephfs/clusters/1-mds-2-client-micro.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, mon.b, mon.c, mgr.x, mgr.y, mds.a, mds.b, mds.c, osd.0, osd.1, osd.2, osd.3] +- [client.0] +- [client.1] +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/cephfs/clusters/1-mds-2-client.yaml b/qa/cephfs/clusters/1-mds-2-client.yaml new file mode 100644 index 000000000..6e996244e --- /dev/null +++ b/qa/cephfs/clusters/1-mds-2-client.yaml @@ -0,0 +1,11 @@ +roles: +- [mon.a, mgr.y, mds.a, mds.c, osd.0, osd.1, osd.2, osd.3] +- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7] +- [client.0] +- [client.1] +openstack: +- volumes: # attached to each instance + count: 4 + size: 30 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1-mds-3-client.yaml b/qa/cephfs/clusters/1-mds-3-client.yaml new file mode 100644 index 000000000..200df1fe0 --- /dev/null +++ b/qa/cephfs/clusters/1-mds-3-client.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3] +- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7] +- [client.0] +- [client.1] +- [client.2] +openstack: +- volumes: # attached to each instance + count: 4 + size: 30 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1-mds-4-client-coloc.yaml b/qa/cephfs/clusters/1-mds-4-client-coloc.yaml new file mode 100644 index 000000000..3e2ee7870 --- /dev/null +++ b/qa/cephfs/clusters/1-mds-4-client-coloc.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3, client.0, client.1] +- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7, client.2, client.3] +openstack: +- volumes: # attached to each instance + count: 4 + size: 30 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1-mds-4-client.yaml b/qa/cephfs/clusters/1-mds-4-client.yaml new file mode 100644 index 000000000..9addfe3b0 --- /dev/null +++ b/qa/cephfs/clusters/1-mds-4-client.yaml @@ -0,0 +1,13 @@ +roles: +- [mon.a, mgr.y, mds.a, mds.b, osd.0, osd.1, osd.2, osd.3] +- [mon.b, mon.c, mgr.x, mds.c, osd.4, osd.5, osd.6, osd.7] +- [client.0] +- [client.1] +- [client.2] +- [client.3] +openstack: +- volumes: # attached to each instance + count: 4 + size: 30 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml b/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml new file mode 100644 index 000000000..865b976c6 --- /dev/null +++ b/qa/cephfs/clusters/1-node-1-mds-1-osd.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, mgr.x, mds.a, osd.0, client.0] +openstack: +- volumes: # attached to each instance + count: 1 + size: 5 # GB +- machine: + disk: 10 # GB diff --git a/qa/cephfs/clusters/1a11s-mds-1c-client-3node.yaml b/qa/cephfs/clusters/1a11s-mds-1c-client-3node.yaml new file mode 100644 index 000000000..ca6f79ba2 --- /dev/null +++ b/qa/cephfs/clusters/1a11s-mds-1c-client-3node.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.a, mgr.x, mds.a, mds.d, mds.g, mds.j, osd.0, osd.3, osd.6, osd.9, client.0] +- [mon.b, mgr.y, mds.b, mds.e, mds.h, mds.k, osd.1, osd.4, osd.7, osd.10] +- [mon.c, mgr.z, mds.c, mds.f, mds.i, mds.l, osd.2, osd.5, osd.8, osd.11] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1a2s-mds-1c-client-3node.yaml b/qa/cephfs/clusters/1a2s-mds-1c-client-3node.yaml new file mode 100644 index 000000000..3a9ff34f8 --- /dev/null +++ b/qa/cephfs/clusters/1a2s-mds-1c-client-3node.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.a, mgr.x, mds.a, osd.0, osd.3, osd.6, osd.9, client.0] +- [mon.b, mgr.y, mds.b, osd.1, osd.4, osd.7, osd.10] +- [mon.c, mgr.z, mds.c, osd.2, osd.5, osd.8, osd.11] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1a3s-mds-1c-client.yaml b/qa/cephfs/clusters/1a3s-mds-1c-client.yaml new file mode 100644 index 000000000..87867daa4 --- /dev/null +++ b/qa/cephfs/clusters/1a3s-mds-1c-client.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.y, mds.a, mds.c, osd.0, osd.1, osd.2, osd.3, client.0] +- [mon.b, mon.c, mgr.x, mds.b, mds.d, osd.4, osd.5, osd.6, osd.7] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1a3s-mds-2c-client.yaml b/qa/cephfs/clusters/1a3s-mds-2c-client.yaml new file mode 100644 index 000000000..ba17f05d2 --- /dev/null +++ b/qa/cephfs/clusters/1a3s-mds-2c-client.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.y, mds.a, mds.c, osd.0, osd.1, osd.2, osd.3, client.0] +- [mon.b, mon.c, mgr.x, mds.b, mds.d, osd.4, osd.5, osd.6, osd.7, client.1] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1a3s-mds-4c-client.yaml b/qa/cephfs/clusters/1a3s-mds-4c-client.yaml new file mode 100644 index 000000000..51fd809b4 --- /dev/null +++ b/qa/cephfs/clusters/1a3s-mds-4c-client.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.y, mds.a, mds.c, osd.0, osd.1, osd.2, osd.3, client.0, client.2] +- [mon.b, mon.c, mgr.x, mds.b, mds.d, osd.4, osd.5, osd.6, osd.7, client.1, client.3] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1a5s-mds-1c-client-3node.yaml b/qa/cephfs/clusters/1a5s-mds-1c-client-3node.yaml new file mode 100644 index 000000000..ae723c160 --- /dev/null +++ b/qa/cephfs/clusters/1a5s-mds-1c-client-3node.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.a, mgr.x, mds.a, mds.d, osd.0, osd.3, osd.6, osd.9, client.0] +- [mon.b, mgr.y, mds.b, mds.e, osd.1, osd.4, osd.7, osd.10] +- [mon.c, mgr.z, mds.c, mds.f, osd.2, osd.5, osd.8, osd.11] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/1a5s-mds-1c-client.yaml b/qa/cephfs/clusters/1a5s-mds-1c-client.yaml new file mode 100644 index 000000000..79fd84cf4 --- /dev/null +++ b/qa/cephfs/clusters/1a5s-mds-1c-client.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.y, mds.a, mds.c, mds.e, osd.0, osd.1, osd.2, osd.3, client.0] +- [mon.b, mon.c, mgr.x, mds.b, mds.d, mds.f, osd.4, osd.5, osd.6, osd.7] +openstack: +- volumes: # attached to each instance + count: 4 + size: 20 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/3-mds.yaml b/qa/cephfs/clusters/3-mds.yaml new file mode 100644 index 000000000..8dfacb4e7 --- /dev/null +++ b/qa/cephfs/clusters/3-mds.yaml @@ -0,0 +1,14 @@ +roles: +- [mon.a, mon.c, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3] +- [mon.b, mgr.x, mds.b, mds.c, osd.4, osd.5, osd.6, osd.7] +- [client.0, client.1] +overrides: + ceph: + cephfs: + max_mds: 3 +openstack: +- volumes: # attached to each instance + count: 4 + size: 30 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/9-mds.yaml b/qa/cephfs/clusters/9-mds.yaml new file mode 100644 index 000000000..60b18c251 --- /dev/null +++ b/qa/cephfs/clusters/9-mds.yaml @@ -0,0 +1,14 @@ +roles: +- [mon.a, mon.c, mgr.y, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2, osd.3] +- [mon.b, mgr.x, mds.e, mds.f, mds.g, mds.h, mds.i, osd.4, osd.5, osd.6, osd.7] +- [client.0, client.1] +overrides: + ceph: + cephfs: + max_mds: 9 +openstack: +- volumes: # attached to each instance + count: 4 + size: 30 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/clusters/fixed-2-ucephfs.yaml b/qa/cephfs/clusters/fixed-2-ucephfs.yaml new file mode 100644 index 000000000..dff37a51f --- /dev/null +++ b/qa/cephfs/clusters/fixed-2-ucephfs.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3, client.0] +- [mon.b, mon.c, mgr.x, mds.b, osd.4, osd.5, osd.6, osd.7] +openstack: +- volumes: # attached to each instance + count: 4 + size: 30 # GB +- machine: + disk: 200 # GB diff --git a/qa/cephfs/conf/+ b/qa/cephfs/conf/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/cephfs/conf/+ diff --git a/qa/cephfs/conf/.qa b/qa/cephfs/conf/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/conf/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/conf/client.yaml b/qa/cephfs/conf/client.yaml new file mode 100644 index 000000000..4b37d03b4 --- /dev/null +++ b/qa/cephfs/conf/client.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + client: + client mount timeout: 600 + debug ms: 1 + debug client: 20 + rados mon op timeout: 900 + rados osd op timeout: 900 diff --git a/qa/cephfs/conf/mds.yaml b/qa/cephfs/conf/mds.yaml new file mode 100644 index 000000000..b1c7a5869 --- /dev/null +++ b/qa/cephfs/conf/mds.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + mds: + debug mds: 20 + debug mds balancer: 20 + debug ms: 1 + mds debug frag: true + mds debug scatterstat: true + mds op complaint time: 180 + mds verify scatter: true + osd op complaint time: 180 + rados mon op timeout: 900 + rados osd op timeout: 900 diff --git a/qa/cephfs/conf/mon.yaml b/qa/cephfs/conf/mon.yaml new file mode 100644 index 000000000..eea56004a --- /dev/null +++ b/qa/cephfs/conf/mon.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mon: + mon op complaint time: 120 diff --git a/qa/cephfs/conf/osd.yaml b/qa/cephfs/conf/osd.yaml new file mode 100644 index 000000000..1087202f9 --- /dev/null +++ b/qa/cephfs/conf/osd.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd op complaint time: 180 diff --git a/qa/cephfs/mount/.qa b/qa/cephfs/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/mount/fuse.yaml b/qa/cephfs/mount/fuse.yaml new file mode 100644 index 000000000..9ffb5129d --- /dev/null +++ b/qa/cephfs/mount/fuse.yaml @@ -0,0 +1,16 @@ +teuthology: + postmerge: + - local function is_kupstream() + return false + end + - local function is_kdistro() + return false + end + - local function is_fuse() + return true + end + - local function syntax_version() + return '' + end +tasks: + - ceph-fuse: diff --git a/qa/cephfs/mount/kclient/% b/qa/cephfs/mount/kclient/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/cephfs/mount/kclient/% diff --git a/qa/cephfs/mount/kclient/.qa b/qa/cephfs/mount/kclient/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/mount/kclient/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/mount/kclient/mount-syntax/$ b/qa/cephfs/mount/kclient/mount-syntax/$ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/cephfs/mount/kclient/mount-syntax/$ diff --git a/qa/cephfs/mount/kclient/mount-syntax/v1.yaml b/qa/cephfs/mount/kclient/mount-syntax/v1.yaml new file mode 100644 index 000000000..84d5d43b2 --- /dev/null +++ b/qa/cephfs/mount/kclient/mount-syntax/v1.yaml @@ -0,0 +1,3 @@ +overrides: + kclient: + syntax: 'v1' diff --git a/qa/cephfs/mount/kclient/mount-syntax/v2.yaml b/qa/cephfs/mount/kclient/mount-syntax/v2.yaml new file mode 100644 index 000000000..ef7d30424 --- /dev/null +++ b/qa/cephfs/mount/kclient/mount-syntax/v2.yaml @@ -0,0 +1,3 @@ +overrides: + kclient: + syntax: 'v2' diff --git a/qa/cephfs/mount/kclient/mount.yaml b/qa/cephfs/mount/kclient/mount.yaml new file mode 100644 index 000000000..a130ff9d5 --- /dev/null +++ b/qa/cephfs/mount/kclient/mount.yaml @@ -0,0 +1,16 @@ +teuthology: + postmerge: + - local function is_kupstream() + return yaml.ktype == 'upstream' + end + - local function is_kdistro() + return yaml.ktype == 'distro' + end + - local function is_fuse() + return false + end + - local function syntax_version() + return yaml.overrides.kclient.syntax + end +tasks: +- kclient: diff --git a/qa/cephfs/mount/kclient/overrides/% b/qa/cephfs/mount/kclient/overrides/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/cephfs/mount/kclient/overrides/% diff --git a/qa/cephfs/mount/kclient/overrides/.qa b/qa/cephfs/mount/kclient/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/mount/kclient/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/mount/kclient/overrides/distro/.qa b/qa/cephfs/mount/kclient/overrides/distro/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/mount/kclient/overrides/distro/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/mount/kclient/overrides/distro/stock/% b/qa/cephfs/mount/kclient/overrides/distro/stock/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/cephfs/mount/kclient/overrides/distro/stock/% diff --git a/qa/cephfs/mount/kclient/overrides/distro/stock/.qa b/qa/cephfs/mount/kclient/overrides/distro/stock/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/mount/kclient/overrides/distro/stock/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/mount/kclient/overrides/distro/stock/k-stock.yaml b/qa/cephfs/mount/kclient/overrides/distro/stock/k-stock.yaml new file mode 100644 index 000000000..ca2d688bb --- /dev/null +++ b/qa/cephfs/mount/kclient/overrides/distro/stock/k-stock.yaml @@ -0,0 +1,4 @@ +kernel: + client: + sha1: distro +ktype: distro diff --git a/qa/cephfs/mount/kclient/overrides/distro/stock/rhel_8.yaml b/qa/cephfs/mount/kclient/overrides/distro/stock/rhel_8.yaml new file mode 120000 index 000000000..133acf27b --- /dev/null +++ b/qa/cephfs/mount/kclient/overrides/distro/stock/rhel_8.yaml @@ -0,0 +1 @@ +.qa/distros/all/rhel_8.yaml
\ No newline at end of file diff --git a/qa/cephfs/mount/kclient/overrides/distro/testing/.qa b/qa/cephfs/mount/kclient/overrides/distro/testing/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/mount/kclient/overrides/distro/testing/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml b/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml new file mode 100644 index 000000000..2ee219125 --- /dev/null +++ b/qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml @@ -0,0 +1,4 @@ +kernel: + client: + branch: testing +ktype: upstream diff --git a/qa/cephfs/mount/kclient/overrides/ms-die-on-skipped.yaml b/qa/cephfs/mount/kclient/overrides/ms-die-on-skipped.yaml new file mode 100644 index 000000000..30da870b2 --- /dev/null +++ b/qa/cephfs/mount/kclient/overrides/ms-die-on-skipped.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false diff --git a/qa/cephfs/objectstore-ec/.qa b/qa/cephfs/objectstore-ec/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/objectstore-ec/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/objectstore-ec/bluestore-bitmap.yaml b/qa/cephfs/objectstore-ec/bluestore-bitmap.yaml new file mode 120000 index 000000000..9fb86b9fe --- /dev/null +++ b/qa/cephfs/objectstore-ec/bluestore-bitmap.yaml @@ -0,0 +1 @@ +../../objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/cephfs/objectstore-ec/bluestore-comp-ec-root.yaml b/qa/cephfs/objectstore-ec/bluestore-comp-ec-root.yaml new file mode 100644 index 000000000..512eb117c --- /dev/null +++ b/qa/cephfs/objectstore-ec/bluestore-comp-ec-root.yaml @@ -0,0 +1,29 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + cephfs: + ec_profile: + - m=2 + - k=2 + - crush-failure-domain=osd + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + diff --git a/qa/cephfs/objectstore-ec/bluestore-comp.yaml b/qa/cephfs/objectstore-ec/bluestore-comp.yaml new file mode 100644 index 000000000..b408032fd --- /dev/null +++ b/qa/cephfs/objectstore-ec/bluestore-comp.yaml @@ -0,0 +1,23 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/cephfs/objectstore-ec/bluestore-ec-root.yaml b/qa/cephfs/objectstore-ec/bluestore-ec-root.yaml new file mode 100644 index 000000000..b89c4c711 --- /dev/null +++ b/qa/cephfs/objectstore-ec/bluestore-ec-root.yaml @@ -0,0 +1,43 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + cephfs: + ec_profile: + - m=2 + - k=2 + - crush-failure-domain=osd + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + diff --git a/qa/cephfs/overrides/.qa b/qa/cephfs/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/overrides/frag.yaml b/qa/cephfs/overrides/frag.yaml new file mode 100644 index 000000000..f05b3f48f --- /dev/null +++ b/qa/cephfs/overrides/frag.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + mds: + mds bal fragment size max: 10000 + mds bal merge size: 5 + mds bal split bits: 3 + mds bal split size: 100 + diff --git a/qa/cephfs/overrides/fuse/.qa b/qa/cephfs/overrides/fuse/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/overrides/fuse/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/overrides/fuse/default-perm/% b/qa/cephfs/overrides/fuse/default-perm/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/cephfs/overrides/fuse/default-perm/% diff --git a/qa/cephfs/overrides/fuse/default-perm/.qa b/qa/cephfs/overrides/fuse/default-perm/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/overrides/fuse/default-perm/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/overrides/fuse/default-perm/no.yaml b/qa/cephfs/overrides/fuse/default-perm/no.yaml new file mode 100644 index 000000000..445e93652 --- /dev/null +++ b/qa/cephfs/overrides/fuse/default-perm/no.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + fuse default permissions: false diff --git a/qa/cephfs/overrides/fuse/default-perm/yes.yaml b/qa/cephfs/overrides/fuse/default-perm/yes.yaml new file mode 100644 index 000000000..2fd210a39 --- /dev/null +++ b/qa/cephfs/overrides/fuse/default-perm/yes.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + fuse default permissions: true diff --git a/qa/cephfs/overrides/ignorelist_health.yaml b/qa/cephfs/overrides/ignorelist_health.yaml new file mode 100644 index 000000000..d8b819288 --- /dev/null +++ b/qa/cephfs/overrides/ignorelist_health.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(FS_DEGRADED\) + - \(MDS_FAILED\) + - \(MDS_DEGRADED\) + - \(FS_WITH_FAILED_MDS\) + - \(MDS_DAMAGE\) + - \(MDS_ALL_DOWN\) + - \(MDS_UP_LESS_THAN_MAX\) + - \(FS_INLINE_DATA_DEPRECATED\) + - \(POOL_APP_NOT_ENABLED\) diff --git a/qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml b/qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml new file mode 100644 index 000000000..41ba84f04 --- /dev/null +++ b/qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(OSD_DOWN\) + - \(OSD_ + - but it is still running +# MDS daemon 'b' is not responding, replacing it as rank 0 with standby 'a' + - is not responding diff --git a/qa/cephfs/overrides/osd-asserts.yaml b/qa/cephfs/overrides/osd-asserts.yaml new file mode 100644 index 000000000..8c16e6e1c --- /dev/null +++ b/qa/cephfs/overrides/osd-asserts.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/cephfs/overrides/prefetch_entire_dirfrags/no.yaml b/qa/cephfs/overrides/prefetch_entire_dirfrags/no.yaml new file mode 100644 index 000000000..71d6d73ba --- /dev/null +++ b/qa/cephfs/overrides/prefetch_entire_dirfrags/no.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mds: + mds_dir_prefetch: false diff --git a/qa/cephfs/overrides/prefetch_entire_dirfrags/yes.yaml b/qa/cephfs/overrides/prefetch_entire_dirfrags/yes.yaml new file mode 100644 index 000000000..5d25b61d1 --- /dev/null +++ b/qa/cephfs/overrides/prefetch_entire_dirfrags/yes.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mds: + mds_dir_prefetch: true diff --git a/qa/cephfs/overrides/session_timeout.yaml b/qa/cephfs/overrides/session_timeout.yaml new file mode 100644 index 000000000..a7a163337 --- /dev/null +++ b/qa/cephfs/overrides/session_timeout.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + cephfs: + session_timeout: 300 diff --git a/qa/cephfs/tasks/.qa b/qa/cephfs/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/cephfs/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/cephfs/tasks/cfuse_workunit_suites_blogbench.yaml b/qa/cephfs/tasks/cfuse_workunit_suites_blogbench.yaml new file mode 100644 index 000000000..2d370d7ef --- /dev/null +++ b/qa/cephfs/tasks/cfuse_workunit_suites_blogbench.yaml @@ -0,0 +1,9 @@ +tasks: +- check-counter: + counters: + mds: + - "mds.dir_split" +- workunit: + clients: + all: + - suites/blogbench.sh diff --git a/qa/cephfs/tasks/cfuse_workunit_suites_dbench.yaml b/qa/cephfs/tasks/cfuse_workunit_suites_dbench.yaml new file mode 100644 index 000000000..41b2bc8ed --- /dev/null +++ b/qa/cephfs/tasks/cfuse_workunit_suites_dbench.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/cephfs/tasks/cfuse_workunit_suites_ffsb.yaml b/qa/cephfs/tasks/cfuse_workunit_suites_ffsb.yaml new file mode 100644 index 000000000..6a2b35a18 --- /dev/null +++ b/qa/cephfs/tasks/cfuse_workunit_suites_ffsb.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-ignorelist: + - SLOW_OPS + - slow request + conf: + osd: + filestore flush min: 0 +tasks: +- check-counter: + counters: + mds: + - "mds.dir_split" +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml new file mode 100644 index 000000000..bae220292 --- /dev/null +++ b/qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + timeout: 6h + clients: + all: + - suites/fsstress.sh diff --git a/qa/cephfs/tasks/cfuse_workunit_trivial_sync.yaml b/qa/cephfs/tasks/cfuse_workunit_trivial_sync.yaml new file mode 100644 index 000000000..e51542022 --- /dev/null +++ b/qa/cephfs/tasks/cfuse_workunit_trivial_sync.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - fs/misc/trivial_sync.sh diff --git a/qa/cephfs/tasks/libcephfs_interface_tests.yaml b/qa/cephfs/tasks/libcephfs_interface_tests.yaml new file mode 100644 index 000000000..c59775259 --- /dev/null +++ b/qa/cephfs/tasks/libcephfs_interface_tests.yaml @@ -0,0 +1,14 @@ +overrides: + ceph-fuse: + disabled: true + kclient: + disabled: true +tasks: +- check-counter: + counters: + mds: + - "mds.dir_split" +- workunit: + clients: + client.0: + - libcephfs/test.sh diff --git a/qa/cephfs/unshare_ns_mount.sh b/qa/cephfs/unshare_ns_mount.sh new file mode 100755 index 000000000..88ac3e933 --- /dev/null +++ b/qa/cephfs/unshare_ns_mount.sh @@ -0,0 +1,594 @@ +#!/usr/bin/env bash + +# This is one helper for mounting the ceph-fuse/kernel clients by +# unsharing the network namespace, let's call it netns container. +# With the netns container, you can easily suspend or resume the +# virtual network interface to simulate the client node hard +# shutdown for some test cases. +# +# netnsX netnsY netnsZ +# -------------- -------------- -------------- +# | mount client | | mount client | | mount client | +# | default | ... | default | ... | default | +# |192.168.0.1/16| |192.168.0.2/16| |192.168.0.3/16| +# | veth0 | | veth0 | | veth0 | +# -------------- -------------- ------------- +# | | | +# \ | brx.Y / +# \ ---------------------- / +# \ brx.X | ceph-brx | brx.Z / +# \------>| default |<------/ +# | | 192.168.255.254/16 | | +# | ---------------------- | +# (suspend/resume) | (suspend/resume) +# ----------- +# | Physical | +# | A.B.C.D/M | +# ----------- +# +# Defaultly it will use the 192.168.X.Y/16 private network IPs for +# the ceph-brx and netnses as above. And you can also specify your +# own new ip/mask for the ceph-brx, like: +# +# $ unshare_ns_mount.sh --fuse /mnt/cephfs --brxip 172.19.100.100/12 +# +# Then the each netns will get a new ip from the ranges: +# [172.16.0.1 ~ 172.19.100.99]/12 and [172.19.100.101 ~ 172.31.255.254]/12 + +usage() { + echo "" + echo "This will help to isolate the network namespace from OS for the mount client!" + echo "" + echo "usage: unshare_ns_mount.sh [OPTIONS [paramters]] [--brxip <ip_address/mask>]" + echo "OPTIONS:" + echo -e " --fuse <ceph-fuse options>" + echo -e "\tThe ceph-fuse command options" + echo -e "\t $ unshare_ns_mount.sh --fuse -m 192.168.0.1:6789 /mnt/cephfs -o nonempty" + echo "" + echo -e " --kernel <mount options>" + echo -e "\tThe mount command options" + echo -e "\t $ unshare_ns_mount.sh --kernel -t ceph 192.168.0.1:6789:/ /mnt/cephfs -o fs=a" + echo "" + echo -e " --suspend <mountpoint>" + echo -e "\tDown the veth interface in the network namespace" + echo -e "\t $ unshare_ns_mount.sh --suspend /mnt/cephfs" + echo "" + echo -e " --resume <mountpoint>" + echo -e "\tUp the veth interface in the network namespace" + echo -e "\t $ unshare_ns_mount.sh --resume /mnt/cephfs" + echo "" + echo -e " --umount <mountpoint>" + echo -e "\tUmount and delete the network namespace" + echo -e "\t $ unshare_ns_mount.sh --umount /mnt/cephfs" + echo "" + echo -e " --brxip <ip_address/mask>" + echo -e "\tSpecify ip/mask for ceph-brx and it only makes sense for --fuse/--kernel options" + echo -e "\t(default: 192.168.255.254/16, netns ip: 192.168.0.1/16 ~ 192.168.255.253/16)" + echo -e "\t $ unshare_ns_mount.sh --fuse -m 192.168.0.1:6789 /mnt/cephfs --brxip 172.19.255.254/12" + echo -e "\t $ unshare_ns_mount.sh --kernel 192.168.0.1:6789:/ /mnt/cephfs --brxip 172.19.255.254/12" + echo "" + echo -e " -h, --help" + echo -e "\tPrint help" + echo "" +} + +CEPH_BRX=ceph-brx +CEPH_BRX_IP_DEF=192.168.255.254 +NET_MASK_DEF=16 +BRD_DEF=192.168.255.255 + +CEPH_BRX_IP=$CEPH_BRX_IP_DEF +NET_MASK=$NET_MASK_DEF +BRD=$BRD_DEF + +mountpoint="" +new_netns="" +fuse_type=false + +function get_mountpoint() { + for param in $@ + do + if [ -d $param ]; then + # skipping "--client_mountpoint/-r root_directory" + # option for ceph-fuse command + if [ "$last" == "-r" -o "$last" == "--client_mountpoint" ]; then + last=$param + continue + fi + if [ "0$mountpoint" != "0" ]; then + echo "Oops: too many mountpiont options!" + exit 1 + fi + mountpoint=$param + fi + last=$param + done + + if [ "0$mountpoint" == "0" ]; then + echo "Oops: mountpoint path is not a directory or no mountpoint specified!" + exit 1 + fi +} + +function get_new_netns() { + # prune the repeating slashes: + # "/mnt///cephfs///" --> "/mnt/cephfs/" + __mountpoint=`echo "$mountpoint" | sed 's/\/\+/\//g'` + + # prune the leading slashes + while [ ${__mountpoint:0:1} == "/" ] + do + __mountpoint=${__mountpoint:1} + done + + # prune the last slashes + while [ ${__mountpoint: -1} == "/" ] + do + __mountpoint=${__mountpoint:0:-1} + done + + # replace '/' with '-' + __mountpoint=${__mountpoint//\//-} + + # "mnt/cephfs" --> "ceph-fuse-mnt-cephfs" + if [ "$1" == "--fuse" ]; then + new_netns=`echo ceph-fuse-$__mountpoint` + fuse_type=true + return + fi + + # "mnt/cephfs" --> "ceph-kernel-mnt-cephfs" + if [ "$1" == "--kernel" ]; then + new_netns=`echo ceph-kernel-$__mountpoint` + return + fi + + # we are in umount/suspend/resume routines + for ns in `ip netns list | awk '{print $1}'` + do + if [ "$ns" == "ceph-fuse-$__mountpoint" ]; then + new_netns=$ns + fuse_type=true + return + fi + if [ "$ns" == "ceph-kernel-$__mountpoint" ]; then + new_netns=$ns + return + fi + done + + if [ "0$new_netns" == "0" ]; then + echo "Oops, netns 'ceph-{fuse/kernel}-$__mountpoint' does not exists!" + exit 1 + fi +} + +# the peer veth name will be "brx.$nsid" on host node +function get_netns_brx() { + get_new_netns + + nsid=`ip netns list-id | grep "$new_netns" | awk '{print $2}'` + netns_veth=brx.$nsid + eval $1="$netns_veth" +} + +function suspend_netns_veth() { + get_mountpoint $@ + + get_netns_brx brx + ip link set $brx down + exit 0 +} + +function resume_netns_veth() { + get_mountpoint $@ + + get_netns_brx brx + ip link set $brx up + exit 0 +} + +# help and usage +if [ $# == 0 -o "$1" == "-h" -o "$1" == "--help" ]; then + usage + exit 0 +fi + +# suspend the veth from network namespace +if [ $1 == "--suspend" ]; then + suspend_netns_veth $@ + exit 0 +fi + +# resume the veth from network namespace +if [ $1 == "--resume" ]; then + resume_netns_veth $@ + exit 0 +fi + +function ceph_umount() { + get_mountpoint $@ + get_new_netns + + if [ $fuse_type == true ]; then + nsenter --net=/var/run/netns/$new_netns fusermount -u $mountpoint 2>/dev/null + else + nsenter --net=/var/run/netns/$new_netns umount $mountpoint 2>/dev/null + fi + + # let's wait for a while to let the umount operation + # to finish before deleting the netns + while [ 1 ] + do + for pid in `ip netns pids $new_netns 2>/dev/null` + do + name=`cat /proc/$pid/comm 2>/dev/null` + if [ "$name" == "ceph-fuse" ]; then + break + fi + done + + if [ "$name" == "ceph-fuse" ]; then + name="" + usleep 100000 + continue + fi + + break + done + + nsid=`ip netns list-id | grep "$new_netns" | awk '{print $2}'` + netns_brx=brx.$nsid + + # brctl delif $CEPH_BRX $netns_brx 2>/dev/null + nmcli connection down $netns_brx down 2>/dev/null + nmcli connection delete $netns_brx 2>/dev/null + + ip netns delete $new_netns 2>/dev/null + + # if this is the last netns_brx, will delete + # the $CEPH_BRX and restore the OS configure + # rc=`brctl show ceph-brx 2>/dev/null | grep 'brx\.'|wc -l` + rc=`nmcli connection show 2>/dev/null | grep 'brx\.' | wc -l` + if [ $rc == 0 ]; then + ip link set $CEPH_BRX down 2>/dev/null + # brctl delbr $CEPH_BRX 2>/dev/null + nmcli connection delete $CEPH_BRX 2>/dev/null + + # restore the ip forward + tmpfile=`ls /tmp/ | grep "$CEPH_BRX\."` + tmpfile=/tmp/$tmpfile + if [ ! -f $tmpfile ]; then + echo "Oops, the $CEPH_BRX.XXX temp file does not exist!" + else + save=`cat $tmpfile` + echo $save > /proc/sys/net/ipv4/ip_forward + rm -rf $tmpfile + fi + + # drop the iptables NAT rules + host_nic=`route | grep default | awk '{print $8}'` + iptables -D FORWARD -o $host_nic -i $CEPH_BRX -j ACCEPT + iptables -D FORWARD -i $host_nic -o $CEPH_BRX -j ACCEPT + iptables -t nat -D POSTROUTING -s $CEPH_BRX_IP/$NET_MASK -o $host_nic -j MASQUERADE + fi +} + +function get_brd_mask() { + first=`echo "$CEPH_BRX_IP" | awk -F. '{print $1}'` + second=`echo "$CEPH_BRX_IP" | awk -F. '{print $2}'` + third=`echo "$CEPH_BRX_IP" | awk -F. '{print $3}'` + fourth=`echo "$CEPH_BRX_IP" | awk -F. '{print $4}'` + + if [ "$first" == "172" ]; then + second_max=31 + else + second_max=255 + fi + third_max=255 + fourth_max=255 + + if [ $NET_MASK -lt 16 ]; then + let power=16-$NET_MASK + m=`awk 'BEGIN{printf 2^"'$power'"-1}'` + second=$((second&~m)) + let second_max=$second+$m + elif [ $NET_MASK -lt 24 ]; then + let power=24-$NET_MASK + m=`awk 'BEGIN{printf 2^"'$power'"-1}'` + third=$((third&~m)) + let third_max=$third+$m + second_max=$second + elif [ $NET_MASK -lt 32 ]; then + let power=32-$NET_MASK + m=`awk 'BEGIN{printf 2^"'$power'"-1}'` + fourth=$((fourth&~m)) + let fourth_max=$fourth+$m + second_max=$second + third_max=$third + fi + + BRD=$first.$second_max.$third_max.$fourth_max +} + +# As default: +# The netns IP will be 192.168.0.1 ~ 192.168.255.253, +# and 192.168.255.254 is saved for $CEPH_BRX +function get_new_ns_ip() { + first=`echo "$CEPH_BRX_IP" | awk -F. '{print $1}'` + second=`echo "$CEPH_BRX_IP" | awk -F. '{print $2}'` + third=`echo "$CEPH_BRX_IP" | awk -F. '{print $3}'` + fourth=`echo "$CEPH_BRX_IP" | awk -F. '{print $4}'` + + if [ "$first" == ""172 ]; then + second_max=31 + else + second_max=255 + fi + third_max=255 + fourth_max=254 + + if [ $NET_MASK -lt 16 ]; then + let power=16-$NET_MASK + m=`awk 'BEGIN{printf 2^"'$power'"-1}'` + second=$((second&~m)) + let second_max=$second+$m + third=0 + fourth=1 + elif [ $NET_MASK -lt 24 ]; then + let power=24-$NET_MASK + m=`awk 'BEGIN{printf 2^"'$power'"-1}'` + third=$((third&~m)) + let third_max=$third+$m + second_max=$second + fourth=1 + elif [ $NET_MASK -lt 32 ]; then + let power=32-$NET_MASK + m=`awk 'BEGIN{printf 2^"'$power'"-1}'` + fourth=$((fourth&~m)) + let fourth+=1 + let fourth_max=$fourth+$m-1 + second_max=$second + third_max=$third + fi + + while [ $second -le $second_max -a $third -le $third_max -a $fourth -le $fourth_max ] + do + conflict=false + + # check from the existing network namespaces + for netns in `ip netns list | awk '{print $1}'` + do + ip=`ip netns exec $netns ip addr | grep "inet " | grep "veth0"` + ip=`echo "$ip" | awk '{print $2}' | awk -F/ '{print $1}'` + if [ "0$ip" == "0" ]; then + continue + fi + if [ "$first.$second.$third.$fourth" == "$ip" ]; then + conflict=true + + let fourth+=1 + if [ $fourth -le $fourth_max ]; then + break + fi + + fourth=0 + let third+=1 + if [ $third -le $third_max ]; then + break + fi + + third=0 + let second+=1 + if [ $second -le $second_max ]; then + break + fi + + echo "Oops: we have ran out of the ip addresses!" + exit 1 + fi + done + + # have we found one ? + if [ $conflict == false ]; then + break + fi + done + + ip=$first.$second.$third.$fourth + max=$first.$second_max.$third_max.$fourth_max + if [ "$ip" == "$max" ]; then + echo "Oops: we have ran out of the ip addresses!" + exit 1 + fi + + eval $1="$ip" +} + +function check_valid_private_ip() { + first=`echo "$1" | awk -F. '{print $1}'` + second=`echo "$1" | awk -F. '{print $2}'` + + # private network class A 10.0.0.0 - 10.255.255.255 + if [ "$first" == "10" -a $NET_MASK -ge 8 ]; then + return + fi + + # private network class B 172.16.0.0 - 172.31.255.255 + if [ "$first" == "172" -a $second -ge 16 -a $second -le 31 -a $NET_MASK -ge 12 ]; then + return + fi + + # private network class C 192.168.0.0 - 192.168.255.255 + if [ "$first" == "192" -a "$second" == "168" -a $NET_MASK -ge 16 ]; then + return + fi + + echo "Oops: invalid private ip address '$CEPH_BRX_IP/$NET_MASK'!" + exit 1 +} + +function setup_bridge_and_nat() { + # check and parse the --brxip parameter + is_brxip=false + for ip in $@ + do + if [ "$ip" == "--brxip" ]; then + is_brxip=true + continue + fi + if [ $is_brxip == true ]; then + new_brxip=$ip + break + fi + done + + # if the $CEPH_BRX already exists, then check the new + # brxip, if not match fail it without doing anything. + rc=`ip addr | grep "inet " | grep " $CEPH_BRX"` + if [ "0$rc" != "0" ]; then + existing_brxip=`echo "$rc" | awk '{print $2}'` + if [ "0$new_brxip" != "0" -a "$existing_brxip" != "$new_brxip" ]; then + echo "Oops: conflict with the existing $CEPH_BRX ip '$existing_brxip', new '$new_brxip'!" + exit 1 + fi + + CEPH_BRX_IP=`echo "$existing_brxip" | awk -F/ '{print $1}'` + NET_MASK=`echo "$existing_brxip" | awk -F/ '{print $2}'` + get_brd_mask + return + fi + + # if it is the first time to run the the script or there + # is no any network namespace exists, we need to setup + # the $CEPH_BRX, if no --brxip is specified will use the + # default $CEPH_BRX_IP/$NET_MASK + if [ "0$new_brxip" != "0" ]; then + CEPH_BRX_IP=`echo "$new_brxip" | awk -F/ '{print $1}'` + NET_MASK=`echo "$new_brxip" | awk -F/ '{print $2}'` + get_brd_mask + check_valid_private_ip $CEPH_BRX_IP + fi + + # brctl addbr $CEPH_BRX + nmcli connection add type bridge con-name $CEPH_BRX ifname $CEPH_BRX stp no + # ip link set $CEPH_BRX up + # ip addr add $CEPH_BRX_IP/$NET_MASK brd $BRD dev $CEPH_BRX + nmcli connection modify $CEPH_BRX ipv4.addresses $CEPH_BRX_IP/$NET_MASK ipv4.method manual + nmcli connection up $CEPH_BRX + + # setup the NAT + rm -rf /tmp/ceph-brx.* + tmpfile=$(mktemp /tmp/ceph-brx.XXXXXXXX) + save=`cat /proc/sys/net/ipv4/ip_forward` + echo $save > $tmpfile + echo 1 > /proc/sys/net/ipv4/ip_forward + + host_nic=`route | grep default | awk '{print $8}'` + iptables -A FORWARD -o $host_nic -i $CEPH_BRX -j ACCEPT + iptables -A FORWARD -i $host_nic -o $CEPH_BRX -j ACCEPT + iptables -t nat -A POSTROUTING -s $CEPH_BRX_IP/$NET_MASK -o $host_nic -j MASQUERADE +} + +function __ceph_mount() { + # for some options like the '-t' in mount command + # the nsenter command will take over it, so it is + # hard to pass it direct to the netns. + # here we will create one temp file with x mode + tmpfile=$(mktemp /tmp/ceph-nsenter.XXXXXXXX) + chmod +x $tmpfile + if [ "$1" == "--kernel" ]; then + cmd=`echo "$@" | sed 's/--kernel/mount/'` + else + cmd=`echo "$@" | sed 's/--fuse/ceph-fuse/'` + fi + + # remove the --brxip parameter + cmd=`echo "$cmd" | sed 's/--brxip.*\/[0-9]* //'` + + # enter $new_netns and run ceph fuse client mount, + # we couldn't use 'ip netns exec' here because it + # will unshare the mount namespace. + echo "$cmd" > $tmpfile + nsenter --net=/var/run/netns/$new_netns /bin/bash $tmpfile ; echo $? > $tmpfile + rc=`cat $tmpfile` + rm -f $tmpfile + + # fall back + if [ $rc != 0 ]; then + m=$mountpoint + mountpoint="" + ceph_umount $m + fi +} + +function get_new_nsid() { + # get one uniq netns id + uniq_id=0 + while [ 1 ] + do + rc=`ip netns list-id | grep "nsid $uniq_id "` + if [ "0$rc" == "0" ]; then + break + fi + let uniq_id+=1 + done + + eval $1="$uniq_id" +} + +function ceph_mount() { + get_mountpoint $@ + setup_bridge_and_nat $@ + + get_new_netns $1 + rc=`ip netns list | grep "$new_netns" | awk '{print $1}'` + if [ "0$rc" != "0" ]; then + echo "Oops: the netns "$new_netns" already exists!" + exit 1 + fi + + get_new_nsid new_nsid + + # create a new network namespace + ip netns add $new_netns + ip netns set $new_netns $new_nsid + + get_new_ns_ip ns_ip + if [ 0"$ns_ip" == "0" ]; then + echo "Oops: there is no ip address could be used any more!" + exit 1 + fi + + # veth interface in netns + ns_veth=veth0 + netns_brx=brx.$new_nsid + + # setup veth interfaces + ip link add $ns_veth netns $new_netns type veth peer name $netns_brx + ip netns exec $new_netns ip addr add $ns_ip/$NET_MASK brd $BRD dev $ns_veth + ip netns exec $new_netns ip link set $ns_veth up + ip netns exec $new_netns ip link set lo up + ip netns exec $new_netns ip route add default via $CEPH_BRX_IP + + # bring up the bridge interface and join it to $CEPH_BRX + # brctl addif $CEPH_BRX $netns_brx + nmcli connection add type bridge-slave con-name $netns_brx ifname $netns_brx master $CEPH_BRX + nmcli connection up $netns_brx + # ip link set $netns_brx up + + __ceph_mount $@ +} + +if [ "$1" == "--umount" ]; then + ceph_umount $@ + exit 0 +fi + +# mount in the netns +if [ "$1" != "--kernel" -a "$1" != "--fuse" ]; then + echo "Oops: invalid mount options '$1'!" + exit 1 +fi + +ceph_mount $@ diff --git a/qa/client/30_subdir_mount.sh b/qa/client/30_subdir_mount.sh new file mode 100755 index 000000000..0bdf2ed1a --- /dev/null +++ b/qa/client/30_subdir_mount.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -x + +basedir=`echo $0 | sed 's/[^/]*$//g'`. +. $basedir/common.sh + +client_mount +mkdir -p $mnt/sub +echo sub > $mnt/sub/file +client_umount + +mkdir -p $mnt/1 +mkdir -p $mnt/2 +/bin/mount -t ceph $monhost:/sub $mnt/1 +grep sub $mnt/1/file + +/bin/mount -t ceph $monhost:/ $mnt/2 +grep sub $mnt/2/sub/file + +/bin/umount $mnt/1 +grep sub $mnt/2/sub/file + +/bin/umount $mnt/2 diff --git a/qa/client/common.sh b/qa/client/common.sh new file mode 100644 index 000000000..d06368e6e --- /dev/null +++ b/qa/client/common.sh @@ -0,0 +1,58 @@ + +# defaults +[ -z "$bindir" ] && bindir=$PWD # location of init-ceph +[ -z "$conf" ] && conf="$basedir/ceph.conf" +[ -z "$mnt" ] && mnt="/c" +[ -z "$monhost" ] && monhost="cosd0" + +set -e + +mydir=`hostname`_`echo $0 | sed 's/\//_/g'` + +client_mount() +{ + /bin/mount -t ceph $monhost:/ $mnt +} + +client_umount() +{ + /bin/umount $mnt + # look for VFS complaints + if dmesg | tail -n 50 | grep -c "VFS: Busy inodes" ; then + echo "looks like we left inodes pinned" + exit 1 + fi +} + +ceph_start() +{ + $bindir/init-ceph -c $conf start ${1} +} + +ceph_stop() +{ + $bindir/init-ceph -c $conf stop ${1} +} + +ceph_restart() +{ + $bindir/init-ceph -c $conf restart ${1} +} + +ceph_command() +{ + $bindir/ceph -c $conf $* +} + +client_enter_mydir() +{ + pushd . + test -d $mnt/$mydir && rm -r $mnt/$mydir + mkdir $mnt/$mydir + cd $mnt/$mydir +} + +client_leave_mydir() +{ + popd +} diff --git a/qa/client/gen-1774.sh b/qa/client/gen-1774.sh new file mode 100644 index 000000000..3ee5bc90d --- /dev/null +++ b/qa/client/gen-1774.sh @@ -0,0 +1,2068 @@ +#!/usr/bin/env bash +set -e + +mount () { :; } +umount () { :; } + +list="\ +abiword.control +abiword.list +abiword-plugin-latex.control +abiword-plugin-latex.list +abiword-plugin-opendocument.control +abiword-plugin-opendocument.list +abiword-plugin-openxml.control +abiword-plugin-openxml.list +abiword-plugin-pdf.control +abiword-plugin-pdf.list +abiword-plugin-wikipedia.control +abiword-plugin-wikipedia.list +abiword.postinst +aceofpenguins.control +aceofpenguins-launcher.control +aceofpenguins-launcher.list +aceofpenguins.list +aceofpenguins.postinst +alsa-conf-base.control +alsa-conf-base.list +alsa-scenarii-shr.conffiles +alsa-scenarii-shr.control +alsa-scenarii-shr.list +alsa-utils-alsactl.control +alsa-utils-alsactl.list +alsa-utils-alsamixer.control +alsa-utils-alsamixer.list +alsa-utils-amixer.control +alsa-utils-amixer.list +alsa-utils-aplay.control +alsa-utils-aplay.list +angstrom-libc-fixup-hack.control +angstrom-libc-fixup-hack.list +angstrom-libc-fixup-hack.postinst +apmd.control +apmd.list +apmd.postinst +apmd.postrm +apmd.prerm +aspell.control +aspell.list +atd-over-fso.control +atd-over-fso.list +atd-over-fso.postinst +atd-over-fso.postrm +atd-over-fso.prerm +base-files.conffiles +base-files.control +base-files.list +base-passwd.control +base-passwd.list +base-passwd.postinst +bash.control +bash.list +bash.postinst +bluez4.control +bluez4.list +bluez4.postinst +bluez4.postrm +bluez4.prerm +boost-signals.control +boost-signals.list +boost-signals.postinst +busybox.control +busybox.list +busybox-mountall.control +busybox-mountall.list +busybox-mountall.postinst +busybox-mountall.prerm +busybox.postinst +busybox.prerm +busybox-syslog.conffiles +busybox-syslog.control +busybox-syslog.list +busybox-syslog.postinst +busybox-syslog.postrm +busybox-syslog.prerm +ca-certificates.conffiles +ca-certificates.control +ca-certificates.list +ca-certificates.postinst +calc.control +calc.list +connman.control +connman.list +connman-plugin-udhcp.control +connman-plugin-udhcp.list +connman-plugin-wifi.control +connman-plugin-wifi.list +connman.postinst +connman.postrm +connman.prerm +connman-scripts.control +connman-scripts.list +cpio.control +cpio.list +cpio.postinst +cpio.prerm +cpp.control +cpp.list +cpp-symlinks.control +cpp-symlinks.list +cron.control +cron.list +cron.postinst +cron.postrm +cron.prerm +curl.control +curl.list +dbus.conffiles +dbus.control +dbus-daemon-proxy.control +dbus-daemon-proxy.list +dbus-hlid.control +dbus-hlid.list +dbus.list +dbus.postinst +dbus.postrm +dbus.prerm +dbus-x11.control +dbus-x11.list +devmem2.control +devmem2.list +distro-feed-configs.conffiles +distro-feed-configs.control +distro-feed-configs.list +dosfstools.control +dosfstools.list +e2fsprogs-badblocks.control +e2fsprogs-badblocks.list +e2fsprogs.control +e2fsprogs-e2fsck.control +e2fsprogs-e2fsck.list +e2fsprogs-e2fsck.postinst +e2fsprogs-e2fsck.prerm +e2fsprogs.list +e2fsprogs-mke2fs.control +e2fsprogs-mke2fs.list +e2fsprogs-mke2fs.postinst +e2fsprogs-mke2fs.prerm +e2fsprogs.postinst +e2fsprogs.prerm +ecore-con.control +ecore-con.list +ecore-con.postinst +ecore.control +ecore-evas.control +ecore-evas.list +ecore-evas.postinst +ecore-fb.control +ecore-fb.list +ecore-fb.postinst +ecore-file.control +ecore-file.list +ecore-file.postinst +ecore-imf.control +ecore-imf-evas.control +ecore-imf-evas.list +ecore-imf-evas.postinst +ecore-imf.list +ecore-imf.postinst +ecore-input.control +ecore-input.list +ecore-input.postinst +ecore-ipc.control +ecore-ipc.list +ecore-ipc.postinst +ecore.list +ecore.postinst +ecore-x.control +ecore-x.list +ecore-x.postinst +edbus.control +edbus.list +edbus.postinst +edje.control +edje.list +edje.postinst +edje-utils.control +edje-utils.list +efreet.control +efreet.list +efreet.postinst +eggdbus.control +eggdbus.list +eggdbus.postinst +eglibc-binary-localedata-en-us.control +eglibc-binary-localedata-en-us.list +eglibc-charmap-utf-8.control +eglibc-charmap-utf-8.list +eglibc-gconv.control +eglibc-gconv-cp1252.control +eglibc-gconv-cp1252.list +eglibc-gconv-ibm850.control +eglibc-gconv-ibm850.list +eglibc-gconv-iso8859-15.control +eglibc-gconv-iso8859-15.list +eglibc-gconv-iso8859-1.control +eglibc-gconv-iso8859-1.list +eglibc-gconv.list +eglibc-localedata-i18n.control +eglibc-localedata-i18n.list +eglibc-localedata-iso14651-t1-common.control +eglibc-localedata-iso14651-t1-common.list +eglibc-localedata-iso14651-t1.control +eglibc-localedata-iso14651-t1.list +eglibc-localedata-translit-circle.control +eglibc-localedata-translit-circle.list +eglibc-localedata-translit-cjk-compat.control +eglibc-localedata-translit-cjk-compat.list +eglibc-localedata-translit-compat.control +eglibc-localedata-translit-compat.list +eglibc-localedata-translit-font.control +eglibc-localedata-translit-font.list +eglibc-localedata-translit-fraction.control +eglibc-localedata-translit-fraction.list +eglibc-localedata-translit-narrow.control +eglibc-localedata-translit-narrow.list +eglibc-localedata-translit-neutral.control +eglibc-localedata-translit-neutral.list +eglibc-localedata-translit-small.control +eglibc-localedata-translit-small.list +eglibc-localedata-translit-wide.control +eglibc-localedata-translit-wide.list +eglibc-utils.control +eglibc-utils.list +eina.control +eina.list +eina.postinst +eject.control +eject.list +elementary-theme-gry.control +elementary-theme-gry.list +emacs-x11.control +emacs-x11.list +embryo.control +embryo.list +embryo.postinst +embryo-tests.control +embryo-tests.list +enchant.control +enchant.list +enchant.postinst +epdfview.control +epdfview.list +espeak.control +espeak.list +espeak.postinst +evas.control +evas-engine-buffer.control +evas-engine-buffer.list +evas-engine-fb.control +evas-engine-fb.list +evas-engine-software-16.control +evas-engine-software-16.list +evas-engine-software-16-x11.control +evas-engine-software-16-x11.list +evas-engine-software-generic.control +evas-engine-software-generic.list +evas-engine-software-x11.control +evas-engine-software-x11.list +evas-engine-xrender-x11.control +evas-engine-xrender-x11.list +evas.list +evas-loader-eet.control +evas-loader-eet.list +evas-loader-jpeg.control +evas-loader-jpeg.list +evas-loader-png.control +evas-loader-png.list +evas.postinst +evas-saver-eet.control +evas-saver-eet.list +evas-saver-jpeg.control +evas-saver-jpeg.list +evas-saver-png.control +evas-saver-png.list +evtest.control +evtest.list +e-wm-config-default.control +e-wm-config-default.list +e-wm-config-illume2-shr.control +e-wm-config-illume2-shr.list +e-wm-config-illume-shr.control +e-wm-config-illume-shr.list +e-wm.control +e-wm-icons.control +e-wm-icons.list +e-wm-images.control +e-wm-images.list +e-wm-input-methods.control +e-wm-input-methods.list +e-wm.list +e-wm-menu-shr.control +e-wm-menu-shr.list +e-wm-other.control +e-wm-other.list +e-wm.postinst +e-wm.postrm +e-wm-sysactions-shr.control +e-wm-sysactions-shr.list +e-wm-theme-default.control +e-wm-theme-default.list +e-wm-theme-illume-gry.control +e-wm-theme-illume-gry.list +e-wm-theme-illume-shr.control +e-wm-theme-illume-shr.list +e-wm-utils.control +e-wm-utils.list +fbreader.control +fbreader.list +fbreader.postinst +fbset.control +fbset.list +fbset-modes.conffiles +fbset-modes.control +fbset-modes.list +fbset.postinst +fbset.postrm +ffalarms.control +ffalarms.list +file.control +file.list +file.postinst +findutils.control +findutils.list +findutils.postinst +findutils.prerm +flac.control +flac.list +flite.control +flite.list +fontconfig-utils.control +fontconfig-utils.list +font-update-common.control +font-update-common.list +frameworkd-config-shr.conffiles +frameworkd-config-shr.control +frameworkd-config-shr.list +frameworkd.control +frameworkd.list +frameworkd.postinst +frameworkd.postrm +frameworkd.prerm +fso-abyss-config.conffiles +fso-abyss-config.control +fso-abyss-config.list +fso-abyss.control +fso-abyss.list +fso-apm.control +fso-apm.list +fsodatad.control +fsodatad.list +fsodatad.postinst +fsodeviced.control +fsodeviced.list +fsodeviced.postinst +fsodeviced.postrm +fsodeviced.prerm +fso-gpsd.control +fso-gpsd.list +fso-gpsd.postinst +fso-gpsd.postrm +fso-gpsd.prerm +fsogsmd.control +fsogsmd.list +fsogsmd.postinst +fsonetworkd.control +fsonetworkd.list +fsonetworkd.postinst +fsoraw.control +fsoraw.list +fsotdld.control +fsotdld.list +fsotdld.postinst +fsousaged.control +fsousaged.list +fsousaged.postinst +gcc.control +gcc.list +gconf.control +gconf.list +gconf.postinst +g++.control +gdb.control +gdb.list +gdk-pixbuf-loader-gif.control +gdk-pixbuf-loader-gif.list +gdk-pixbuf-loader-gif.postinst +gdk-pixbuf-loader-jpeg.control +gdk-pixbuf-loader-jpeg.list +gdk-pixbuf-loader-jpeg.postinst +gdk-pixbuf-loader-png.control +gdk-pixbuf-loader-png.list +gdk-pixbuf-loader-png.postinst +gdk-pixbuf-loader-xpm.control +gdk-pixbuf-loader-xpm.list +gdk-pixbuf-loader-xpm.postinst +git.control +git.list +g++.list +gnome-pty-helper.control +gnome-pty-helper.list +gnome-vfs.control +gnome-vfs.list +gnome-vfs-plugin-file.control +gnome-vfs-plugin-file.list +gnome-vfs.postinst +gnome-vfs.prerm +gnupg.control +gnupg.list +gpe-icons.control +gpe-icons.list +gpe-icons.postinst +gpe-icons.postrm +gpe-scap.control +gpe-scap.list +gpe-sketchbook.control +gpe-sketchbook.list +gpgv.control +gpgv.list +gridpad.control +gridpad.list +gst-plugin-alsa.control +gst-plugin-alsa.list +gst-plugin-audioconvert.control +gst-plugin-audioconvert.list +gst-plugin-autodetect.control +gst-plugin-autodetect.list +gst-plugin-gconfelements.control +gst-plugin-gconfelements.list +gst-plugin-gconfelements.postinst +gst-plugin-gconfelements.prerm +gst-plugin-mad.control +gst-plugin-mad.list +gstreamer.control +gstreamer.list +gstreamer.postinst +gtk+.control +gtk+.list +gtk+.postinst +hal.control +hal-info.control +hal-info.list +hal.list +hal.postinst +hal.postrm +hdparm.control +hdparm.list +hdparm.postinst +hdparm.prerm +hicolor-icon-theme.control +hicolor-icon-theme.list +hicolor-icon-theme.postinst +hicolor-icon-theme.postrm +htop.control +htop.list +i2c-tools.control +i2c-tools.list +id3lib.control +id3lib.list +id3lib.postinst +iliwi.control +iliwi.list +illume-keyboard-default-alpha.control +illume-keyboard-default-alpha.list +illume-keyboard-default-terminal.control +illume-keyboard-default-terminal.list +illume-keyboard-numeric-alt.control +illume-keyboard-numeric-alt.list +imagemagick.control +imagemagick.list +imagemagick.postinst +initscripts-shr.control +initscripts-shr.list +intone.control +intone.list +iptables.control +iptables.list +iptables.postinst +kernel-2.6.29-rc3.control +kernel-2.6.29-rc3.list +kernel.control +kernel-image-2.6.29-rc3.control +kernel-image-2.6.29-rc3.list +kernel-image-2.6.29-rc3.postinst +kernel.list +kernel-module-ar6000.control +kernel-module-ar6000.list +kernel-module-ar6000.postinst +kernel-module-ar6000.postrm +kernel-module-arc4.control +kernel-module-arc4.list +kernel-module-arc4.postinst +kernel-module-arc4.postrm +kernel-module-asix.control +kernel-module-asix.list +kernel-module-asix.postinst +kernel-module-asix.postrm +kernel-module-bluetooth.control +kernel-module-bluetooth.list +kernel-module-bluetooth.postinst +kernel-module-bluetooth.postrm +kernel-module-bnep.control +kernel-module-bnep.list +kernel-module-bnep.postinst +kernel-module-bnep.postrm +kernel-module-btusb.control +kernel-module-btusb.list +kernel-module-btusb.postinst +kernel-module-btusb.postrm +kernel-module-crc-ccitt.control +kernel-module-crc-ccitt.list +kernel-module-crc-ccitt.postinst +kernel-module-crc-ccitt.postrm +kernel-module-ecb.control +kernel-module-ecb.list +kernel-module-ecb.postinst +kernel-module-ecb.postrm +kernel-module-exportfs.control +kernel-module-exportfs.list +kernel-module-exportfs.postinst +kernel-module-exportfs.postrm +kernel-module-gadgetfs.control +kernel-module-gadgetfs.list +kernel-module-gadgetfs.postinst +kernel-module-gadgetfs.postrm +kernel-module-g-ether.control +kernel-module-g-ether.list +kernel-module-g-ether.postinst +kernel-module-g-ether.postrm +kernel-module-g-file-storage.control +kernel-module-g-file-storage.list +kernel-module-g-file-storage.postinst +kernel-module-g-file-storage.postrm +kernel-module-g-serial.control +kernel-module-g-serial.list +kernel-module-g-serial.postinst +kernel-module-g-serial.postrm +kernel-module-hidp.control +kernel-module-hidp.list +kernel-module-hidp.postinst +kernel-module-hidp.postrm +kernel-module-iptable-filter.control +kernel-module-iptable-filter.list +kernel-module-iptable-filter.postinst +kernel-module-iptable-filter.postrm +kernel-module-iptable-nat.control +kernel-module-iptable-nat.list +kernel-module-iptable-nat.postinst +kernel-module-iptable-nat.postrm +kernel-module-ip-tables.control +kernel-module-ip-tables.list +kernel-module-ip-tables.postinst +kernel-module-ip-tables.postrm +kernel-module-ipt-masquerade.control +kernel-module-ipt-masquerade.list +kernel-module-ipt-masquerade.postinst +kernel-module-ipt-masquerade.postrm +kernel-module-l2cap.control +kernel-module-l2cap.list +kernel-module-l2cap.postinst +kernel-module-l2cap.postrm +kernel-module-lockd.control +kernel-module-lockd.list +kernel-module-lockd.postinst +kernel-module-lockd.postrm +kernel-module-michael-mic.control +kernel-module-michael-mic.list +kernel-module-michael-mic.postinst +kernel-module-michael-mic.postrm +kernel-module-nf-conntrack.control +kernel-module-nf-conntrack-ipv4.control +kernel-module-nf-conntrack-ipv4.list +kernel-module-nf-conntrack-ipv4.postinst +kernel-module-nf-conntrack-ipv4.postrm +kernel-module-nf-conntrack.list +kernel-module-nf-conntrack.postinst +kernel-module-nf-conntrack.postrm +kernel-module-nf-defrag-ipv4.control +kernel-module-nf-defrag-ipv4.list +kernel-module-nf-defrag-ipv4.postinst +kernel-module-nf-defrag-ipv4.postrm +kernel-module-nf-nat.control +kernel-module-nf-nat.list +kernel-module-nf-nat.postinst +kernel-module-nf-nat.postrm +kernel-module-nfs-acl.control +kernel-module-nfs-acl.list +kernel-module-nfs-acl.postinst +kernel-module-nfs-acl.postrm +kernel-module-nfsd.control +kernel-module-nfsd.list +kernel-module-nfsd.postinst +kernel-module-nfsd.postrm +kernel-module-nls-utf8.control +kernel-module-nls-utf8.list +kernel-module-nls-utf8.postinst +kernel-module-nls-utf8.postrm +kernel-module-ohci-hcd.control +kernel-module-ohci-hcd.list +kernel-module-ohci-hcd.postinst +kernel-module-ohci-hcd.postrm +kernel-module-pegasus.control +kernel-module-pegasus.list +kernel-module-pegasus.postinst +kernel-module-pegasus.postrm +kernel-module-ppp-async.control +kernel-module-ppp-async.list +kernel-module-ppp-async.postinst +kernel-module-ppp-async.postrm +kernel-module-ppp-deflate.control +kernel-module-ppp-deflate.list +kernel-module-ppp-deflate.postinst +kernel-module-ppp-deflate.postrm +kernel-module-ppp-generic.control +kernel-module-ppp-generic.list +kernel-module-ppp-generic.postinst +kernel-module-ppp-generic.postrm +kernel-module-ppp-mppe.control +kernel-module-ppp-mppe.list +kernel-module-ppp-mppe.postinst +kernel-module-ppp-mppe.postrm +kernel-module-rfcomm.control +kernel-module-rfcomm.list +kernel-module-rfcomm.postinst +kernel-module-rfcomm.postrm +kernel-module-s3cmci.control +kernel-module-s3cmci.list +kernel-module-s3cmci.postinst +kernel-module-s3cmci.postrm +kernel-module-sco.control +kernel-module-sco.list +kernel-module-sco.postinst +kernel-module-sco.postrm +kernel-module-scsi-mod.control +kernel-module-scsi-mod.list +kernel-module-scsi-mod.postinst +kernel-module-scsi-mod.postrm +kernel-module-sd-mod.control +kernel-module-sd-mod.list +kernel-module-sd-mod.postinst +kernel-module-sd-mod.postrm +kernel-module-slhc.control +kernel-module-slhc.list +kernel-module-slhc.postinst +kernel-module-slhc.postrm +kernel-module-snd.control +kernel-module-snd.list +kernel-module-snd-page-alloc.control +kernel-module-snd-page-alloc.list +kernel-module-snd-page-alloc.postinst +kernel-module-snd-page-alloc.postrm +kernel-module-snd-pcm.control +kernel-module-snd-pcm.list +kernel-module-snd-pcm.postinst +kernel-module-snd-pcm.postrm +kernel-module-snd.postinst +kernel-module-snd.postrm +kernel-module-snd-soc-core.control +kernel-module-snd-soc-core.list +kernel-module-snd-soc-core.postinst +kernel-module-snd-soc-core.postrm +kernel-module-snd-soc-neo1973-gta02-wm8753.control +kernel-module-snd-soc-neo1973-gta02-wm8753.list +kernel-module-snd-soc-neo1973-gta02-wm8753.postinst +kernel-module-snd-soc-neo1973-gta02-wm8753.postrm +kernel-module-snd-soc-s3c24xx.control +kernel-module-snd-soc-s3c24xx-i2s.control +kernel-module-snd-soc-s3c24xx-i2s.list +kernel-module-snd-soc-s3c24xx-i2s.postinst +kernel-module-snd-soc-s3c24xx-i2s.postrm +kernel-module-snd-soc-s3c24xx.list +kernel-module-snd-soc-s3c24xx.postinst +kernel-module-snd-soc-s3c24xx.postrm +kernel-module-snd-soc-wm8753.control +kernel-module-snd-soc-wm8753.list +kernel-module-snd-soc-wm8753.postinst +kernel-module-snd-soc-wm8753.postrm +kernel-module-snd-timer.control +kernel-module-snd-timer.list +kernel-module-snd-timer.postinst +kernel-module-snd-timer.postrm +kernel-module-sunrpc.control +kernel-module-sunrpc.list +kernel-module-sunrpc.postinst +kernel-module-sunrpc.postrm +kernel-module-tun.control +kernel-module-tun.list +kernel-module-tun.postinst +kernel-module-tun.postrm +kernel-module-uinput.control +kernel-module-uinput.list +kernel-module-uinput.postinst +kernel-module-uinput.postrm +kernel-module-usbserial.control +kernel-module-usbserial.list +kernel-module-usbserial.postinst +kernel-module-usbserial.postrm +kernel-module-usb-storage.control +kernel-module-usb-storage.list +kernel-module-usb-storage.postinst +kernel-module-usb-storage.postrm +kernel-module-x-tables.control +kernel-module-x-tables.list +kernel-module-x-tables.postinst +kernel-module-x-tables.postrm +kernel.postinst +kernel.postrm +lame.control +lame.list +liba52-0.control +liba52-0.list +liba52-0.postinst +libacl1.control +libacl1.list +libacl1.postinst +libapm1.control +libapm1.list +libapm1.postinst +libasound2.control +libasound2.list +libasound2.postinst +libaspell15.control +libaspell15.list +libaspell15.postinst +libatk-1.0-0.control +libatk-1.0-0.list +libatk-1.0-0.postinst +libattr1.control +libattr1.list +libattr1.postinst +libavahi-client3.control +libavahi-client3.list +libavahi-client3.postinst +libavahi-common3.control +libavahi-common3.list +libavahi-common3.postinst +libavahi-glib1.control +libavahi-glib1.list +libavahi-glib1.postinst +libavcodec52.control +libavcodec52.list +libavcodec52.postinst +libavformat52.control +libavformat52.list +libavformat52.postinst +libavutil50.control +libavutil50.list +libavutil50.postinst +libblkid1.control +libblkid1.list +libblkid1.postinst +libbz2-1.control +libbz2-1.list +libbz2-1.postinst +libc6.control +libc6.list +libc6.postinst +libcairo2.control +libcairo2.list +libcairo2.postinst +libcanberra0.control +libcanberra0.list +libcanberra0.postinst +libcanberra-alsa.control +libcanberra-alsa.list +libcom-err2.control +libcom-err2.list +libcom-err2.postinst +libcroco.control +libcroco.list +libcroco.postinst +libcrypto0.9.8.control +libcrypto0.9.8.list +libcrypto0.9.8.postinst +libcups2.control +libcups2.list +libcups2.postinst +libcurl4.control +libcurl4.list +libcurl4.postinst +libdbus-1-3.control +libdbus-1-3.list +libdbus-1-3.postinst +libdbus-glib-1-2.control +libdbus-glib-1-2.list +libdbus-glib-1-2.postinst +libdmx1.control +libdmx1.list +libdmx1.postinst +libdrm.control +libdrm.list +libdrm.postinst +libdvdcss2.control +libdvdcss2.list +libdvdcss2.postinst +libdvdread3.control +libdvdread3.list +libdvdread3.postinst +libeet1.control +libeet1.list +libeet1.postinst +libelementary-ver-pre-svn-05-0.control +libelementary-ver-pre-svn-05-0.list +libelementary-ver-pre-svn-05-0.postinst +libelementary-ver-pre-svn-05-themes.control +libelementary-ver-pre-svn-05-themes.list +libelf0.control +libelf0.list +libelf0.postinst +libewebkit0.control +libewebkit0.list +libewebkit0.postinst +libexif12.control +libexif12.list +libexif12.postinst +libexosip2.control +libexosip2.list +libexosip2.postinst +libexpat1.control +libexpat1.list +libexpat1.postinst +libfaac0.control +libfaac0.list +libfaac0.postinst +libfakekey0.control +libfakekey0.list +libfakekey0.postinst +libffi5.control +libffi5.list +libffi5.postinst +libflac8.control +libflac8.list +libflac8.postinst +libfontconfig1.control +libfontconfig1.list +libfontconfig1.postinst +libfontenc1.control +libfontenc1.list +libfontenc1.postinst +libframeworkd-glib0.control +libframeworkd-glib0.list +libframeworkd-glib0.postinst +libfreetype6.control +libfreetype6.list +libfreetype6.postinst +libfribidi0.control +libfribidi0.list +libfribidi0.postinst +libfsobasics0.control +libfsobasics0.list +libfsobasics0.postinst +libfsoframework0.control +libfsoframework0.list +libfsoframework0.postinst +libfso-glib0.control +libfso-glib0.list +libfso-glib0.postinst +libfsoresource0.control +libfsoresource0.list +libfsoresource0.postinst +libfsotransport0.control +libfsotransport0.list +libfsotransport0.postinst +libgcc1.control +libgcc1.list +libgcc1.postinst +libgcrypt11.control +libgcrypt11.list +libgcrypt11.postinst +libgee2.control +libgee2.list +libgee2.postinst +libgio-2.0-0.control +libgio-2.0-0.list +libgio-2.0-0.postinst +libgl1.control +libgl1.list +libgl1.postinst +libglade-2.0-0.control +libglade-2.0-0.list +libglade-2.0-0.postinst +libglib-2.0-0.control +libglib-2.0-0.list +libglib-2.0-0.postinst +libglu1.control +libglu1.list +libglu1.postinst +libgmodule-2.0-0.control +libgmodule-2.0-0.list +libgmodule-2.0-0.postinst +libgmp3.control +libgmp3.list +libgmp3.postinst +libgnt0.control +libgnt0.list +libgnt0.postinst +libgnutls26.control +libgnutls26.list +libgnutls26.postinst +libgnutls-extra26.control +libgnutls-extra26.list +libgnutls-extra26.postinst +libgobject-2.0-0.control +libgobject-2.0-0.list +libgobject-2.0-0.postinst +libgoffice-0.8-8.control +libgoffice-0.8-8.list +libgoffice-0.8-8.postinst +libgoffice-0.8-plugin-plot-barcol.control +libgoffice-0.8-plugin-plot-barcol.list +libgoffice-0.8-plugin-plot-distrib.control +libgoffice-0.8-plugin-plot-distrib.list +libgoffice-0.8-plugin-plot-pie.control +libgoffice-0.8-plugin-plot-pie.list +libgoffice-0.8-plugin-plot-radar.control +libgoffice-0.8-plugin-plot-radar.list +libgoffice-0.8-plugin-plot-surface.control +libgoffice-0.8-plugin-plot-surface.list +libgoffice-0.8-plugin-plot-xy.control +libgoffice-0.8-plugin-plot-xy.list +libgoffice-0.8-plugin-reg-linear.control +libgoffice-0.8-plugin-reg-linear.list +libgoffice-0.8-plugin-reg-logfit.control +libgoffice-0.8-plugin-reg-logfit.list +libgoffice-0.8-plugin-smoothing.control +libgoffice-0.8-plugin-smoothing.list +libgpewidget1.control +libgpewidget1.list +libgpewidget1.postinst +libgpg-error0.control +libgpg-error0.list +libgpg-error0.postinst +libgpgme11.control +libgpgme11.list +libgpgme11.postinst +libgsf.control +libgsf.list +libgsf.postinst +libgsf.prerm +libgsm0710-0.control +libgsm0710-0.list +libgsm0710-0.postinst +libgsm0710mux0.control +libgsm0710mux0.list +libgsm0710mux0.postinst +libgsm1.control +libgsm1.list +libgsm1.postinst +libgstaudio-0.10-0.control +libgstaudio-0.10-0.list +libgstaudio-0.10-0.postinst +libgstfarsight-0.10-0.control +libgstfarsight-0.10-0.list +libgstfarsight-0.10-0.postinst +libgstinterfaces-0.10-0.control +libgstinterfaces-0.10-0.list +libgstinterfaces-0.10-0.postinst +libgstnetbuffer-0.10-0.control +libgstnetbuffer-0.10-0.list +libgstnetbuffer-0.10-0.postinst +libgstpbutils-0.10-0.control +libgstpbutils-0.10-0.list +libgstpbutils-0.10-0.postinst +libgstrtp-0.10-0.control +libgstrtp-0.10-0.list +libgstrtp-0.10-0.postinst +libgsttag-0.10-0.control +libgsttag-0.10-0.list +libgsttag-0.10-0.postinst +libgstvideo-0.10-0.control +libgstvideo-0.10-0.list +libgstvideo-0.10-0.postinst +libgthread-2.0-0.control +libgthread-2.0-0.list +libgthread-2.0-0.postinst +libgypsy0.control +libgypsy0.list +libgypsy0.postinst +libical.control +libical.list +libical.postinst +libice6.control +libice6.list +libice6.postinst +libicudata36.control +libicudata36.list +libicudata36.postinst +libicui18n36.control +libicui18n36.list +libicui18n36.postinst +libicuuc36.control +libicuuc36.list +libicuuc36.postinst +libid3tag0.control +libid3tag0.list +libid3tag0.postinst +libidl-2-0.control +libidl-2-0.list +libidl-2-0.postinst +libidn.control +libidn.list +libidn.postinst +libimlib2-1.control +libimlib2-1.list +libimlib2-1.postinst +libjasper1.control +libjasper1.list +libjasper1.postinst +libjpeg62.control +libjpeg62.list +libjpeg62.postinst +liblinebreak1.control +liblinebreak1.list +liblinebreak1.postinst +liblinphone3.control +liblinphone3.list +liblinphone3.postinst +liblockfile.control +liblockfile.list +liblockfile.postinst +libltdl7.control +libltdl7.list +libltdl7.postinst +liblzo1.control +liblzo1.list +liblzo1.postinst +libmad0.control +libmad0.list +libmad0.postinst +libmediastreamer0.control +libmediastreamer0.list +libmediastreamer0.postinst +libmp3lame0.control +libmp3lame0.list +libmp3lame0.postinst +libmpfr1.control +libmpfr1.list +libmpfr1.postinst +libnice.control +libnice.list +libnice.postinst +libnl2.control +libnl2.list +libnl2.postinst +libnl-genl2.control +libnl-genl2.list +libnl-genl2.postinst +libnl-nf2.control +libnl-nf2.list +libnl-nf2.postinst +libnl-route2.control +libnl-route2.list +libnl-route2.postinst +libode0.control +libode0.list +libode0.postinst +libogg0.control +libogg0.list +libogg0.postinst +liboil.control +liboil.list +liboil.postinst +libopkg0.control +libopkg0.list +libopkg0.postinst +libortp8.control +libortp8.list +libortp8.postinst +libosip2-3.control +libosip2-3.list +libosip2-3.postinst +libpam-base-files.control +libpam-base-files.list +libpam.control +libpam.list +libpam-meta.control +libpam-meta.list +libpam.postinst +libpcap.control +libpcap.list +libpcap.postinst +libpciaccess0.control +libpciaccess0.list +libpciaccess0.postinst +libperl5.control +libperl5.list +libperl5.postinst +libphone-ui0.conffiles +libphone-ui0.control +libphone-ui0.list +libphone-ui0.postinst +libphone-ui-shr.control +libphone-ui-shr.list +libphone-utils0.conffiles +libphone-utils0.control +libphone-utils0.list +libphone-utils0.postinst +libpixman-1-0.control +libpixman-1-0.list +libpixman-1-0.postinst +libpng12-0.control +libpng12-0.list +libpng12-0.postinst +libpng.control +libpng.list +libpoppler5.control +libpoppler5.list +libpoppler5.postinst +libpoppler-glib4.control +libpoppler-glib4.list +libpoppler-glib4.postinst +libpopt0.control +libpopt0.list +libpopt0.postinst +libportaudio2.control +libportaudio2.list +libportaudio2.postinst +libpostproc51.control +libpostproc51.list +libpostproc51.postinst +libpthread-stubs0.control +libpthread-stubs0.list +libpthread-stubs0.postinst +libpurple.control +libpurple.list +libpurple-plugin-ssl.control +libpurple-plugin-ssl-gnutls.control +libpurple-plugin-ssl-gnutls.list +libpurple-plugin-ssl.list +libpurple.postinst +libpurple.prerm +libpurple-protocol-icq.control +libpurple-protocol-icq.list +libpurple-protocol-irc.control +libpurple-protocol-irc.list +libpurple-protocol-msn.control +libpurple-protocol-msn.list +libpurple-protocol-xmpp.control +libpurple-protocol-xmpp.list +libpyglib-2.0-python0.control +libpyglib-2.0-python0.list +libpyglib-2.0-python0.postinst +libpython2.6-1.0.control +libpython2.6-1.0.list +libpython2.6-1.0.postinst +libreadline5.control +libreadline5.list +libreadline5.postinst +librsvg-2-2.control +librsvg-2-2.list +librsvg-2-2.postinst +librsvg-2-gtk.control +librsvg-2-gtk.list +librsvg-2-gtk.postinst +libschroedinger-1.0-0.control +libschroedinger-1.0-0.list +libschroedinger-1.0-0.postinst +libsdl-1.2-0.control +libsdl-1.2-0.list +libsdl-1.2-0.postinst +libsdl-image-1.2-0.control +libsdl-image-1.2-0.list +libsdl-image-1.2-0.postinst +libsdl-mixer-1.2-0.control +libsdl-mixer-1.2-0.list +libsdl-mixer-1.2-0.postinst +libsdl-ttf-2.0-0.control +libsdl-ttf-2.0-0.list +libsdl-ttf-2.0-0.postinst +libsm6.control +libsm6.list +libsm6.postinst +libsoup-2.2-8.control +libsoup-2.2-8.list +libsoup-2.2-8.postinst +libsoup-2.4-1.control +libsoup-2.4-1.list +libsoup-2.4-1.postinst +libspeex1.control +libspeex1.list +libspeex1.postinst +libspeexdsp1.control +libspeexdsp1.list +libspeexdsp1.postinst +libsqlite0.control +libsqlite0.list +libsqlite0.postinst +libsqlite3-0.control +libsqlite3-0.list +libsqlite3-0.postinst +libss2.control +libss2.list +libss2.postinst +libssl0.9.8.control +libssl0.9.8.list +libssl0.9.8.postinst +libstartup-notification-1-0.control +libstartup-notification-1-0.list +libstartup-notification-1-0.postinst +libstdc++6.control +libstdc++6.list +libstdc++6.postinst +libswscale0.control +libswscale0.list +libswscale0.postinst +libsysfs2.control +libsysfs2.list +libsysfs2.postinst +libtheora0.control +libtheora0.list +libtheora0.postinst +libthread-db1.control +libthread-db1.list +libthread-db1.postinst +libtiff5.control +libtiff5.list +libtiff5.postinst +libts-1.0-0.control +libts-1.0-0.list +libts-1.0-0.postinst +libungif4.control +libungif4.list +libungif4.postinst +libusb-0.1-4.control +libusb-0.1-4.list +libusb-0.1-4.postinst +libuuid1.control +libuuid1.list +libuuid1.postinst +libvorbis0.control +libvorbis0.list +libvorbis0.postinst +libvte9.control +libvte9.list +libvte9.postinst +libwebkit-1.0-2.control +libwebkit-1.0-2.list +libwebkit-1.0-2.postinst +libwrap0.control +libwrap0.list +libwrap0.postinst +libx11-6.control +libx11-6.list +libx11-6.postinst +libx11-locale.control +libx11-locale.list +libxau6.control +libxau6.list +libxau6.postinst +libxaw7-7.control +libxaw7-7.list +libxaw7-7.postinst +libxcalibrate0.control +libxcalibrate0.list +libxcalibrate0.postinst +libxcomposite1.control +libxcomposite1.list +libxcomposite1.postinst +libxcursor1.control +libxcursor1.list +libxcursor1.postinst +libxdamage1.control +libxdamage1.list +libxdamage1.postinst +libxdmcp6.control +libxdmcp6.list +libxdmcp6.postinst +libxext6.control +libxext6.list +libxext6.postinst +libxfixes3.control +libxfixes3.list +libxfixes3.postinst +libxfont1.control +libxfont1.list +libxfont1.postinst +libxfontcache1.control +libxfontcache1.list +libxfontcache1.postinst +libxft2.control +libxft2.list +libxft2.postinst +libxi6.control +libxi6.list +libxi6.postinst +libxinerama1.control +libxinerama1.list +libxinerama1.postinst +libxkbfile1.control +libxkbfile1.list +libxkbfile1.postinst +libxml2.control +libxml2.list +libxml2.postinst +libxmu6.control +libxmu6.list +libxmu6.postinst +libxmuu1.control +libxmuu1.list +libxmuu1.postinst +libxp6.control +libxp6.list +libxp6.postinst +libxpm4.control +libxpm4.list +libxpm4.postinst +libxrandr2.control +libxrandr2.list +libxrandr2.postinst +libxrender1.control +libxrender1.list +libxrender1.postinst +libxslt.control +libxslt.list +libxslt.postinst +libxss1.control +libxss1.list +libxss1.postinst +libxt6.control +libxt6.list +libxt6.postinst +libxtst6.control +libxtst6.list +libxtst6.postinst +libxv1.control +libxv1.list +libxv1.postinst +libxxf86dga1.control +libxxf86dga1.list +libxxf86dga1.postinst +libxxf86misc1.control +libxxf86misc1.list +libxxf86misc1.postinst +libxxf86vm1.control +libxxf86vm1.list +libxxf86vm1.postinst +libyaml-0-2.control +libyaml-0-2.list +libyaml-0-2.postinst +libz1.control +libz1.list +libz1.postinst +linphone.control +linphone.list +locale-base-en-us.control +locale-base-en-us.list +logrotate.conffiles +logrotate.control +logrotate.list +logrotate.postinst +logrotate.postrm +lsof.control +lsof.list +ltrace.control +ltrace.list +make.control +make.list +matchbox-keyboard-im.control +matchbox-keyboard-im.list +matchbox-keyboard-im.postinst +matchbox-keyboard-im.postrm +mbuffer.control +mbuffer.list +mdbus2.control +mdbus2.list +mesa-dri.control +mesa-dri.list +mesa-dri.postinst +mime-support.control +mime-support.list +mioctl.control +mioctl.list +mkdump.control +mkdump.list +mobile-broadband-provider-info.control +mobile-broadband-provider-info.list +module-init-tools.control +module-init-tools-depmod.control +module-init-tools-depmod.list +module-init-tools-depmod.postinst +module-init-tools-depmod.prerm +module-init-tools.list +module-init-tools.postinst +module-init-tools.prerm +modutils-initscripts.control +modutils-initscripts.list +modutils-initscripts.postinst +modutils-initscripts.postrm +modutils-initscripts.prerm +mokomaze.control +mokomaze.list +mplayer-common.control +mplayer-common.list +mplayer.conffiles +mplayer.control +mplayer.list +mtd-utils.control +mtd-utils.list +mterm2.control +mterm2.list +nano.control +nano.list +navit.conffiles +navit.control +navit-icons.control +navit-icons.list +navit.list +ncurses.control +ncurses.list +ncurses.postinst +netbase.conffiles +netbase.control +netbase.list +netbase.postinst +netbase.postrm +netbase.prerm +nfs-utils-client.control +nfs-utils-client.list +nmon.control +nmon.list +numptyphysics.control +numptyphysics.list +openssh.control +openssh-keygen.control +openssh-keygen.list +openssh.list +openssh-scp.control +openssh-scp.list +openssh-scp.postinst +openssh-scp.postrm +openssh-sftp-server.control +openssh-sftp-server.list +openssh-ssh.conffiles +openssh-ssh.control +openssh-sshd.conffiles +openssh-sshd.control +openssh-sshd.list +openssh-sshd.postinst +openssh-sshd.postrm +openssh-ssh.list +openssh-ssh.postinst +openssh-ssh.postrm +openssl.control +openssl.list +openvpn.control +openvpn.list +opimd-utils-cli.control +opimd-utils-cli.list +opimd-utils-data.control +opimd-utils-data.list +opimd-utils-notes.control +opimd-utils-notes.list +opkg-collateral.conffiles +opkg-collateral.control +opkg-collateral.list +opkg.control +opkg.list +opkg.postinst +opkg.postrm +orbit2.control +orbit2.list +orbit2.postinst +pam-plugin-access.control +pam-plugin-access.list +pam-plugin-debug.control +pam-plugin-debug.list +pam-plugin-deny.control +pam-plugin-deny.list +pam-plugin-echo.control +pam-plugin-echo.list +pam-plugin-env.control +pam-plugin-env.list +pam-plugin-exec.control +pam-plugin-exec.list +pam-plugin-faildelay.control +pam-plugin-faildelay.list +pam-plugin-filter.control +pam-plugin-filter.list +pam-plugin-ftp.control +pam-plugin-ftp.list +pam-plugin-group.control +pam-plugin-group.list +pam-plugin-issue.control +pam-plugin-issue.list +pam-plugin-keyinit.control +pam-plugin-keyinit.list +pam-plugin-lastlog.control +pam-plugin-lastlog.list +pam-plugin-limits.control +pam-plugin-limits.list +pam-plugin-listfile.control +pam-plugin-listfile.list +pam-plugin-localuser.control +pam-plugin-localuser.list +pam-plugin-loginuid.control +pam-plugin-loginuid.list +pam-plugin-mail.control +pam-plugin-mail.list +pam-plugin-mkhomedir.control +pam-plugin-mkhomedir.list +pam-plugin-motd.control +pam-plugin-motd.list +pam-plugin-namespace.control +pam-plugin-namespace.list +pam-plugin-nologin.control +pam-plugin-nologin.list +pam-plugin-permit.control +pam-plugin-permit.list +pam-plugin-pwhistory.control +pam-plugin-pwhistory.list +pam-plugin-rhosts.control +pam-plugin-rhosts.list +pam-plugin-rootok.control +pam-plugin-rootok.list +pam-plugin-securetty.control +pam-plugin-securetty.list +pam-plugin-shells.control +pam-plugin-shells.list +pam-plugin-stress.control +pam-plugin-stress.list +pam-plugin-succeed-if.control +pam-plugin-succeed-if.list +pam-plugin-tally2.control +pam-plugin-tally2.list +pam-plugin-tally.control +pam-plugin-tally.list +pam-plugin-time.control +pam-plugin-time.list +pam-plugin-timestamp.control +pam-plugin-timestamp.list +pam-plugin-umask.control +pam-plugin-umask.list +pam-plugin-unix.control +pam-plugin-unix.list +pam-plugin-warn.control +pam-plugin-warn.list +pam-plugin-wheel.control +pam-plugin-wheel.list +pam-plugin-xauth.control +pam-plugin-xauth.list +pango.control +pango.list +pango-module-basic-fc.control +pango-module-basic-fc.list +pango-module-basic-fc.postinst +pango-module-basic-x.control +pango-module-basic-x.list +pango-module-basic-x.postinst +pango.postinst +perl.control +perl.list +perl-module-carp.control +perl-module-carp.list +perl-module-exporter.control +perl-module-exporter.list +perl-module-file-basename.control +perl-module-file-basename.list +perl-module-file-path.control +perl-module-file-path.list +perl-module-strict.control +perl-module-strict.list +perl-module-warnings.control +perl-module-warnings.list +phonefsod.conffiles +phonefsod.control +phonefsod.list +phonefsod.postinst +phonefsod.postrm +phonefsod.prerm +phoneui-apps-contacts.control +phoneui-apps-contacts.list +phoneui-apps-dialer.control +phoneui-apps-dialer.list +phoneui-apps-messages.control +phoneui-apps-messages.list +phoneui-apps-quick-settings.control +phoneui-apps-quick-settings.list +phoneuid.conffiles +phoneuid.control +phoneuid.list +pidgin.control +pidgin-data.control +pidgin-data.list +pidgin.list +pingus.control +pingus.list +pointercal.control +pointercal.list +policykit.control +policykit.list +policykit.postinst +policykit.postrm +poppler-data.control +poppler-data.list +portmap.control +portmap.list +portmap.postinst +portmap.postrm +portmap.prerm +powertop.control +powertop.list +ppp.conffiles +ppp.control +ppp-dialin.control +ppp-dialin.list +ppp-dialin.postinst +ppp-dialin.postrm +ppp.list +ppp.postinst +procps.conffiles +procps.control +procps.list +procps.postinst +procps.postrm +procps.prerm +pth.control +pth.list +pth.postinst +pxaregs.control +pxaregs.list +pyefl-sudoku.control +pyefl-sudoku.list +pyphonelog.control +pyphonelog.list +python-codecs.control +python-codecs.list +python-core.control +python-core.list +python-crypt.control +python-crypt.list +python-ctypes.control +python-ctypes.list +python-datetime.control +python-datetime.list +python-dateutil.control +python-dateutil.list +python-dbus.control +python-dbus.list +python-difflib.control +python-difflib.list +python-ecore.control +python-ecore.list +python-edbus.control +python-edbus.list +python-edje.control +python-edje.list +python-elementary.control +python-elementary.list +python-evas.control +python-evas.list +python-fcntl.control +python-fcntl.list +python-gst.control +python-gst.list +python-io.control +python-io.list +python-lang.control +python-lang.list +python-logging.control +python-logging.list +python-math.control +python-math.list +python-multiprocessing.control +python-multiprocessing.list +python-pexpect.control +python-pexpect.list +python-phoneutils.control +python-phoneutils.list +python-pickle.control +python-pickle.list +python-pprint.control +python-pprint.list +python-pyalsaaudio.control +python-pyalsaaudio.list +python-pycairo.control +python-pycairo.list +python-pygobject.control +python-pygobject.list +python-pygtk.control +python-pygtk.list +python-pyrtc.control +python-pyrtc.list +python-pyserial.control +python-pyserial.list +python-pyyaml.control +python-pyyaml.list +python-readline.control +python-readline.list +python-re.control +python-re.list +python-resource.control +python-resource.list +python-shell.control +python-shell.list +python-sqlite3.control +python-sqlite3.list +python-stringold.control +python-stringold.list +python-subprocess.control +python-subprocess.list +python-syslog.control +python-syslog.list +python-terminal.control +python-terminal.list +python-textutils.control +python-textutils.list +python-threading.control +python-threading.list +python-vobject.control +python-vobject.list +python-xml.control +python-xml.list +python-zlib.control +python-zlib.list +rgb.control +rgb.list +rsync.control +rsync.list +s3c24xx-gpio.control +s3c24xx-gpio.list +s3c64xx-gpio.control +s3c64xx-gpio.list +screen.control +screen.list +sed.control +sed.list +sed.postinst +sed.prerm +serial-forward.control +serial-forward.list +shared-mime-info.control +shared-mime-info.list +shr-settings-addons-illume.control +shr-settings-addons-illume.list +shr-settings-backup-configuration.conffiles +shr-settings-backup-configuration.control +shr-settings-backup-configuration.list +shr-settings.control +shr-settings.list +shr-splash.control +shr-splash.list +shr-splash.postinst +shr-splash.postrm +shr-splash.prerm +shr-splash-theme-simple.control +shr-splash-theme-simple.list +shr-splash-theme-simple.postinst +shr-splash-theme-simple.postrm +shr-theme.control +shr-theme-gry.control +shr-theme-gry.list +shr-theme-gtk-e17lookalike.control +shr-theme-gtk-e17lookalike.list +shr-theme-gtk-e17lookalike.postinst +shr-theme-gtk-e17lookalike.postrm +shr-theme.list +shr-wizard.control +shr-wizard.list +socat.control +socat.list +strace.control +strace.list +synergy.control +synergy.list +sysfsutils.control +sysfsutils.list +sysstat.control +sysstat.list +sysvinit.control +sysvinit-inittab.conffiles +sysvinit-inittab.control +sysvinit-inittab.list +sysvinit.list +sysvinit-pidof.control +sysvinit-pidof.list +sysvinit-pidof.postinst +sysvinit-pidof.prerm +sysvinit.postinst +sysvinit.postrm +sysvinit.prerm +sysvinit-utils.control +sysvinit-utils.list +sysvinit-utils.postinst +sysvinit-utils.prerm +tangogps.control +tangogps.list +task-base-apm.control +task-base-apm.list +task-base-bluetooth.control +task-base-bluetooth.list +task-base.control +task-base-ext2.control +task-base-ext2.list +task-base-kernel26.control +task-base-kernel26.list +task-base.list +task-base-ppp.control +task-base-ppp.list +task-base-usbgadget.control +task-base-usbgadget.list +task-base-usbhost.control +task-base-usbhost.list +task-base-vfat.control +task-base-vfat.list +task-base-wifi.control +task-base-wifi.list +task-boot.control +task-boot.list +task-cli-tools.control +task-cli-tools-debug.control +task-cli-tools-debug.list +task-cli-tools.list +task-distro-base.control +task-distro-base.list +task-fonts-truetype-core.control +task-fonts-truetype-core.list +task-fso2-compliance.control +task-fso2-compliance.list +task-machine-base.control +task-machine-base.list +task-shr-apps.control +task-shr-apps.list +task-shr-cli.control +task-shr-cli.list +task-shr-games.control +task-shr-games.list +task-shr-gtk.control +task-shr-gtk.list +task-shr-minimal-apps.control +task-shr-minimal-apps.list +task-shr-minimal-audio.control +task-shr-minimal-audio.list +task-shr-minimal-base.control +task-shr-minimal-base.list +task-shr-minimal-cli.control +task-shr-minimal-cli.list +task-shr-minimal-fso.control +task-shr-minimal-fso.list +task-shr-minimal-gtk.control +task-shr-minimal-gtk.list +task-shr-minimal-x.control +task-shr-minimal-x.list +task-x11-illume.control +task-x11-illume.list +task-x11-server.control +task-x11-server.list +task-x11-utils.control +task-x11-utils.list +tcpdump.control +tcpdump.list +tinylogin.control +tinylogin.list +tinylogin.postinst +tinylogin.prerm +tslib-calibrate.control +tslib-calibrate.list +tslib-conf.control +tslib-conf.list +ttf-dejavu-common.control +ttf-dejavu-common.list +ttf-dejavu-common.postinst +ttf-dejavu-common.postrm +ttf-dejavu-sans.control +ttf-dejavu-sans.list +ttf-dejavu-sans-mono.control +ttf-dejavu-sans-mono.list +ttf-dejavu-sans-mono.postinst +ttf-dejavu-sans-mono.postrm +ttf-dejavu-sans.postinst +ttf-dejavu-sans.postrm +ttf-liberation-mono.control +ttf-liberation-mono.list +ttf-liberation-mono.postinst +ttf-liberation-mono.postrm +tzdata-africa.control +tzdata-africa.list +tzdata-americas.control +tzdata-americas.list +tzdata-asia.control +tzdata-asia.list +tzdata-australia.control +tzdata-australia.list +tzdata.conffiles +tzdata.control +tzdata-europe.control +tzdata-europe.list +tzdata.list +udev.control +udev.list +udev.postinst +udev.postrm +udev.prerm +udev-utils.control +udev-utils.list +update-modules.control +update-modules.list +update-modules.postinst +update-rc.d.control +update-rc.d.list +usb-gadget-mode.control +usb-gadget-mode.list +usb-gadget-mode.postinst +usb-gadget-mode.postrm +usbutils.control +usbutils.list +util-linux-ng-blkid.control +util-linux-ng-blkid.list +util-linux-ng-blkid.postinst +util-linux-ng-blkid.prerm +util-linux-ng-cfdisk.control +util-linux-ng-cfdisk.list +util-linux-ng.control +util-linux-ng-fdisk.control +util-linux-ng-fdisk.list +util-linux-ng-fdisk.postinst +util-linux-ng-fdisk.prerm +util-linux-ng-fsck.control +util-linux-ng-fsck.list +util-linux-ng-fsck.postinst +util-linux-ng-fsck.prerm +util-linux-ng.list +util-linux-ng-losetup.control +util-linux-ng-losetup.list +util-linux-ng-losetup.postinst +util-linux-ng-losetup.prerm +util-linux-ng-mountall.control +util-linux-ng-mountall.list +util-linux-ng-mountall.postinst +util-linux-ng-mountall.prerm +util-linux-ng-mount.control +util-linux-ng-mount.list +util-linux-ng-mount.postinst +util-linux-ng-mount.prerm +util-linux-ng.postinst +util-linux-ng.prerm +util-linux-ng-readprofile.control +util-linux-ng-readprofile.list +util-linux-ng-readprofile.postinst +util-linux-ng-readprofile.prerm +util-linux-ng-sfdisk.control +util-linux-ng-sfdisk.list +util-linux-ng-swaponoff.control +util-linux-ng-swaponoff.list +util-linux-ng-swaponoff.postinst +util-linux-ng-swaponoff.prerm +util-linux-ng-umount.control +util-linux-ng-umount.list +util-linux-ng-umount.postinst +util-linux-ng-umount.prerm +vagalume.control +vagalume.list +vala-terminal.control +vala-terminal.list +ventura.control +ventura.list +vnc.control +vnc.list +vpnc.conffiles +vpnc.control +vpnc.list +vte-termcap.control +vte-termcap.list +wireless-tools.control +wireless-tools.list +wmiconfig.control +wmiconfig.list +wpa-supplicant.control +wpa-supplicant.list +wpa-supplicant-passphrase.control +wpa-supplicant-passphrase.list +wv.control +wv.list +wv.postinst +x11vnc.control +x11vnc.list +xauth.control +xauth.list +xcursor-transparent-theme.control +xcursor-transparent-theme.list +xdpyinfo.control +xdpyinfo.list +xf86-input-evdev.control +xf86-input-evdev.list +xf86-input-keyboard.control +xf86-input-keyboard.list +xf86-input-mouse.control +xf86-input-mouse.list +xf86-input-tslib.control +xf86-input-tslib.list +xf86-video-glamo.control +xf86-video-glamo.list +xhost.control +xhost.list +xinit.control +xinit.list +xinput-calibrator.control +xinput-calibrator.list +xinput.control +xinput.list +xkbcomp.control +xkbcomp.list +xkeyboard-config.control +xkeyboard-config.list +xmodmap.control +xmodmap.list +xorg-minimal-fonts.control +xorg-minimal-fonts.list +xrandr.control +xrandr.list +xserver-kdrive-common.control +xserver-kdrive-common.list +xserver-nodm-init.control +xserver-nodm-init.list +xserver-nodm-init.postinst +xserver-nodm-init.postrm +xserver-nodm-init.prerm +xserver-xorg-conf.conffiles +xserver-xorg-conf.control +xserver-xorg-conf.list +xserver-xorg.control +xserver-xorg-extension-dri2.control +xserver-xorg-extension-dri2.list +xserver-xorg-extension-dri.control +xserver-xorg-extension-dri.list +xserver-xorg-extension-glx.control +xserver-xorg-extension-glx.list +xserver-xorg.list +xset.control +xset.list +xtscal.control +xtscal.list" + +mount /mnt/ceph-fuse +: cd /mnt/ceph-fuse + +mkdir test-1774 +cd test-1774 +for f in $list; do + touch $f +done + +cd +umount /mnt/ceph-fuse +mount /mnt/ceph-fuse +cd - + +# this worked before the 1774 fix +diff <(ls) <(echo "$list") + +# but this failed, because we cached the dirlist wrong +# update-modules.postinst used to be the missing file, +# the last one in the first dirent set passed to ceph-fuse +diff <(ls) <(echo "$list") + +cd .. +rm -rf test-1774 + +cd +umount /mnt/ceph-fuse diff --git a/qa/clusters/2-node-mgr.yaml b/qa/clusters/2-node-mgr.yaml new file mode 100644 index 000000000..b1c29a866 --- /dev/null +++ b/qa/clusters/2-node-mgr.yaml @@ -0,0 +1,10 @@ +roles: +- [mgr.x, mon.a, mon.c, mds.a, mds.c, osd.0, client.0] +- [mgr.y, mgr.z, mon.b, mds.b, osd.1, osd.2, osd.3, client.1] +log-rotate: + ceph-mds: 10G + ceph-osd: 10G +openstack: + - volumes: # attached to each instance + count: 2 + size: 30 # GB diff --git a/qa/clusters/extra-client.yaml b/qa/clusters/extra-client.yaml new file mode 100644 index 000000000..33fa505b7 --- /dev/null +++ b/qa/clusters/extra-client.yaml @@ -0,0 +1,14 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, mgr.x, mds.a, osd.3, osd.4, osd.5] +- [client.0] +- [client.1] +openstack: +- volumes: # attached to each instance + count: 3 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true
\ No newline at end of file diff --git a/qa/clusters/fixed-1.yaml b/qa/clusters/fixed-1.yaml new file mode 100644 index 000000000..d8e5898b9 --- /dev/null +++ b/qa/clusters/fixed-1.yaml @@ -0,0 +1,14 @@ +overrides: + ceph-deploy: + conf: + global: + osd pool default size: 2 + osd crush chooseleaf type: 0 + osd pool default pg num: 128 + osd pool default pgp num: 128 + ceph: + conf: + osd: + osd shutdown pgref assert: true +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] diff --git a/qa/clusters/fixed-2.yaml b/qa/clusters/fixed-2.yaml new file mode 100644 index 000000000..e4448bb20 --- /dev/null +++ b/qa/clusters/fixed-2.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, mon.c, mgr.y, osd.0, osd.1, osd.2, osd.3, client.0, node-exporter.a] +- [mon.b, mgr.x, osd.4, osd.5, osd.6, osd.7, client.1, prometheus.a, node-exporter.b] +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/clusters/fixed-3-cephfs.yaml b/qa/clusters/fixed-3-cephfs.yaml new file mode 100644 index 000000000..9e021b3bd --- /dev/null +++ b/qa/clusters/fixed-3-cephfs.yaml @@ -0,0 +1,16 @@ +roles: +- [mon.a, mds.a, mgr.x, osd.0, osd.1] +- [mon.b, mds.b, mon.c, mgr.y, osd.2, osd.3] +- [client.0] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +log-rotate: + ceph-mds: 10G + ceph-osd: 10G +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/clusters/fixed-3.yaml b/qa/clusters/fixed-3.yaml new file mode 100644 index 000000000..ddc79a84b --- /dev/null +++ b/qa/clusters/fixed-3.yaml @@ -0,0 +1,13 @@ +roles: +- [mon.a, mon.c, mgr.x, osd.0, osd.1, osd.2, osd.3] +- [mon.b, mgr.y, osd.4, osd.5, osd.6, osd.7] +- [client.0] +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/clusters/fixed-4.yaml b/qa/clusters/fixed-4.yaml new file mode 100644 index 000000000..df767f357 --- /dev/null +++ b/qa/clusters/fixed-4.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.a, mgr.y, osd.0, osd.4, osd.8, osd.12] +- [mon.b, osd.1, osd.5, osd.9, osd.13] +- [mon.c, osd.2, osd.6, osd.10, osd.14] +- [mgr.x, osd.3, osd.7, osd.11, osd.15, client.0] +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true
\ No newline at end of file diff --git a/qa/config/crimson_qa_overrides.yaml b/qa/config/crimson_qa_overrides.yaml new file mode 100644 index 000000000..670b98bc1 --- /dev/null +++ b/qa/config/crimson_qa_overrides.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + global: + enable experimental unrecoverable data corrupting features: crimson + mon: + osd pool default crimson: true + osd: + crimson seastar smp: 3 + flavor: crimson + workunit: + env: + CRIMSON_COMPAT: '1' diff --git a/qa/config/rados.yaml b/qa/config/rados.yaml new file mode 100644 index 000000000..710847f59 --- /dev/null +++ b/qa/config/rados.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + osd: + osd op queue: debug_random + osd op queue cut off: debug_random + osd debug verify missing on start: true + osd debug verify cached snaps: true + bluestore zero block detection: true + osd mclock override recovery settings: true + osd mclock profile: high_recovery_ops + mon: + mon scrub interval: 300 diff --git a/qa/crontab/teuthology-cronjobs b/qa/crontab/teuthology-cronjobs new file mode 100644 index 000000000..783dcbd78 --- /dev/null +++ b/qa/crontab/teuthology-cronjobs @@ -0,0 +1,143 @@ +PATH=/home/teuthology/src/teuthology_main/virtualenv/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin +TEUTH_CEPH_REPO='https://github.com/ceph/ceph.git' +TEUTH_SUITE_REPO='https://github.com/ceph/ceph.git' +MAILTO="ceph-infra@redhat.com;yweinste@redhat.com" +CEPH_QA_EMAIL="ceph-qa@ceph.io" + +### !!!!!!!!!!!!!!!!!!!!!!!!!! +## THIS CRONTAB MUST NOT BE EDITED MANUALLY !!!! +## AUTOMATED CRONTAB UPDATING +## https://code.google.com/archive/p/chkcrontab/wikis/CheckCrontab.wiki +## https://github.com/ceph/ceph-cm-ansible/pull/391 +## crontab is in https://github.com/ceph/ceph/main/qa/crontab/teuthology-cronjobs +# chkcrontab: disable-msg=INVALID_USER +# chkcrontab: disable-msg=USER_NOT_FOUND +@daily /bin/bash /home/teuthology/bin/update-crontab.sh +### !!!!!!!!!!!!!!!!!!!!!!!!!! + + +# Ensure teuthology is up-to-date +@daily cd /home/teuthology/src/teuthology_main && /home/teuthology/bin/cron_wrapper git pull +@daily cd /home/teuthology/src/git.ceph.com_ceph_main && /home/teuthology/bin/cron_wrapper git pull +# Ensure ceph-sepia-secrets is up-to-date +*/5 * * * * cd /home/teuthology/ceph-sepia-secrets && /home/teuthology/bin/cron_wrapper git pull + + +#Publish this crontab to the Tracker page http://tracker.ceph.com/projects/ceph-releases/wiki/Crontab +@daily crontab=$(crontab -l | perl -p -e 's/</</g; s/>/>/g; s/&/&/g') ; header=$(echo h3. Crontab ; echo) ; curl --verbose -X PUT --header 'Content-type: application/xml' --data-binary '<?xml version="1.0"?><wiki_page><text>'"$header"'<pre>'"$crontab"'</pre></text></wiki_page>' http://tracker.ceph.com/projects/ceph-releases/wiki/sepia.xml?key=$(cat /etc/redmine-key) + +## This is an example only, don't remove ! +## to see result open http://tracker.ceph.com/projects/ceph-qa-suite/wiki/ceph-ansible +@daily SUITE_NAME=~/src/ceph-qa-suite_main/suites/ceph-ansible; crontab=$(teuthology-describe-tests --show-facet no $SUITE_NAME | perl -p -e 's/</</g; s/>/>/g; s/&/&/g') ; header=$(echo h4. $SUITE_NAME ; echo " "; echo " ") ; curl --verbose -X PUT --header 'Content-type: application/xml' --data-binary '<?xml version="1.0"?><wiki_page><text>'"$header"'<pre>'"$crontab"'</pre></text></wiki_page>' http://tracker.ceph.com/projects/ceph-qa-suite/wiki/ceph-ansible.xml?key=$(cat /etc/redmine-key) + + +## ********** smoke tests on main, octopus, and pacific branches +# 0 5 * * 0,2,4 CEPH_BRANCH=main; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s smoke -k distro -e $CEPH_QA_EMAIL -p 70 +# 0 8 * * 5 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -m $MACHINE_NAME -s smoke -k distro -e $CEPH_QA_EMAIL -p 70 +# 7 8 * * 6 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -m $MACHINE_NAME -s smoke -k distro -e $CEPH_QA_EMAIL -p 70 + + +## ********** windows tests on main branch - weekly +# 00 03 * * 1 CEPH_BRANCH=main; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s windows -k distro -e $CEPH_QA_EMAIL + +## ********** crimson tests on main branch - weekly +# 01 01 * * 0 CEPH_BRANCH=main; MACHINE_NAME=smithi; SUITE_NAME=crimson-rados; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 100000 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL + +## quincy branch runs - weekly +## suites rados and rbd use --subset arg and must be call with schedule_subset.sh +## see script in https://github.com/ceph/ceph/tree/main/qa/machine_types + +# 01 07 * * 0 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=rados; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 100000 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL +# 07 07 * * 0 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=orch; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 100000 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL +# 01 02 * * 1 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=rbd; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 100000 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL +# 15 03 * * 2 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=fs; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 32 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL +# 15 11 * * 3 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=powercycle; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 100000 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL +# 05 03 * * 4 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=rgw; KERNEL=distro; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s $SUITE_NAME -k $KERNEL -e $CEPH_QA_EMAIL +# 20 03 * * 5 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=krbd; KERNEL=testing; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s $SUITE_NAME -k $KERNEL -e $CEPH_QA_EMAIL + +### The suite below must run on bare-metal because it's performance suite and run 3 times to produce more data points +# 57 03 * * 6 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s perf-basic -k distro -e $CEPH_QA_EMAIL -N 3 + + +########################## + +#********** nautilus branch START - weekly + +# 25 13 * * 5 CEPH_BRANCH=nautilus; MACHINE_NAME=smithi; SUITE_NAME=kcephfs; KERNEL=testing; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 2999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL +# 15 05 * * 0 CEPH_BRANCH=nautilus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s krbd -k testing -e $CEPH_QA_EMAIL + + +#********** nautilus branch END + +#********** octopus branch START - weekly + +# 30 03 * * 3 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; SUITE_NAME=rados; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL +# 00 06 * * 4 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; SUITE_NAME=rbd; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL +# 10 04 * * 5 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; SUITE_NAME=fs; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL +# 15 13 * * 6 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; SUITE_NAME=multimds; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL +# 15 12 * * 0 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; SUITE_NAME=powercycle; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL +# 05 05 * * 1 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s rgw -k distro -e $CEPH_QA_EMAIL +# 15 05 * * 2 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s krbd -k testing -e $CEPH_QA_EMAIL + +## upgrades suites for on octopus +# 30 02 * * 4 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -k distro -m $MACHINE_NAME -s upgrade/mimic-x -e $CEPH_QA_EMAIL +# 23 14 * * 5 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -k distro -n 100 -m $MACHINE_NAME -s upgrade/nautilus-x -e $CEPH_QA_EMAIL +# 25 01 * * 6 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s upgrade/octopus-p2p -k distro -e $CEPH_QA_EMAIL + + +## !!!! three suites below MUST use --suite-branch luminous, mimic, nautilus (see https://tracker.ceph.com/issues/24021) +## The suites below run without filters + +# 47 01 * * 5 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s upgrade/client-upgrade-luminous-octopus -k distro -e $CEPH_QA_EMAIL --suite-branch luminous -t py2 +# 50 01 * * 5 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s upgrade/client-upgrade-mimic-octopus -k distro -e $CEPH_QA_EMAIL --suite-branch mimic -t py2 +# 50 01 * * 5 CEPH_BRANCH=octopus; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s upgrade-clients/client-upgrade-nautilus-octopus -k distro -e $CEPH_QA_EMAIL --suite-branch nautilus + +#********** octopus branch END + + +#********** pacific branch START - frequency 4(2) times a week + +# 31 03 * * 0 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; SUITE_NAME=rados; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 99999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 500 --force-priority +# 07 06 * * 1 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; SUITE_NAME=rbd; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 99999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 500 --force-priority +# 17 04 * * 2 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; SUITE_NAME=fs; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 32 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 500 --force-priority +# 17 12 * * 3 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; SUITE_NAME=powercycle; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 9999 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 500 --force-priority +# 07 05 * * 4 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s rgw -k distro -e $CEPH_QA_EMAIL -p 500 +# 17 05 * * 5 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s krbd -k testing -e $CEPH_QA_EMAIL -p 500 +# 23 14 * * 6 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -k distro -n 100 -m $MACHINE_NAME -s upgrade/nautilus-x -e $CEPH_QA_EMAIL -p 500 +# 20 01 * * 6 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s upgrade-clients/client-upgrade-octopus-pacific -k distro -e $CEPH_QA_EMAIL --suite-branch octopus -p 500 + +# 20 07 * * 6 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s upgrade-clients/client-upgrade-nautilus-pacific -k distro -e $CEPH_QA_EMAIL --suite-branch nautilus -p 500 + + +# 22 14 * * 6 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; SUITE_NAME=upgrade:octopus-x; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 10 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 500 --force-priority + +# 25 01 * * 7 CEPH_BRANCH=pacific; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s upgrade/pacific-p2p -k distro -e $CEPH_QA_EMAIL + + +#********** pacific branch END + + +### upgrade runs for quincy release +###### on smithi + +## !!!! the client suites below MUST use --suite-branch octopus, pacific (see https://tracker.ceph.com/issues/24021) + +# 20 01 * * 4 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s upgrade-clients/client-upgrade-octopus-quincy -k distro -e $CEPH_QA_EMAIL --suite-branch octopus + +# 25 01 * * 4 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s upgrade-clients/client-upgrade-pacific-quincy -k distro -e $CEPH_QA_EMAIL --suite-branch pacific + +# 22 14 * * 5 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=upgrade:octopus-x; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 10 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 70 --force-priority + +# 23 14 * * 5 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; SUITE_NAME=upgrade:pacific-x; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 10 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 70 --force-priority + + +# 35 01 * * 7 CEPH_BRANCH=quincy; MACHINE_NAME=smithi; /home/teuthology/bin/cron_wrapper teuthology-suite -v -c $CEPH_BRANCH -n 100 -m $MACHINE_NAME -s upgrade/quincy-p2p -k distro -e $CEPH_QA_EMAIL + + +### upgrade runs for reef release +###### on smithi + + +# 23 14 * * 6 CEPH_BRANCH=main; MACHINE_NAME=smithi; SUITE_NAME=upgrade:pacific-x; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 10 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 70 --force-priority + +# 23 14 * * 6 CEPH_BRANCH=main; MACHINE_NAME=smithi; SUITE_NAME=upgrade:quincy-x; KERNEL=distro; /home/teuthology/bin/cron_wrapper /home/teuthology/bin/schedule_subset.sh 10 $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL -p 70 --force-priority diff --git a/qa/debug/buildpackages.yaml b/qa/debug/buildpackages.yaml new file mode 100644 index 000000000..527ed6627 --- /dev/null +++ b/qa/debug/buildpackages.yaml @@ -0,0 +1,6 @@ +tasks: + - buildpackages: + machine: + disk: 40 # GB + ram: 15000 # MB + cpus: 16 diff --git a/qa/debug/mds_client.yaml b/qa/debug/mds_client.yaml new file mode 100644 index 000000000..c6fec3fc6 --- /dev/null +++ b/qa/debug/mds_client.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + mds: + debug ms: 1 + debug mds: 20 + client: + debug ms: 1 + debug client: 20
\ No newline at end of file diff --git a/qa/debug/mgr.yaml b/qa/debug/mgr.yaml new file mode 100644 index 000000000..1f8e9cbc2 --- /dev/null +++ b/qa/debug/mgr.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + conf: + mon: + debug mon: 20 + mgr: + debug mgr: 20 + debug ms: 1 + debug client: 20 + client: + debug client: 20 + debug mgrc: 20 + debug ms: 1 + osd: + debug mgrc: 20 + mds: + debug mgrc: 20 diff --git a/qa/debug/openstack-15G.yaml b/qa/debug/openstack-15G.yaml new file mode 100644 index 000000000..857ad22a2 --- /dev/null +++ b/qa/debug/openstack-15G.yaml @@ -0,0 +1,3 @@ +openstack: + - machine: + ram: 15000 # MB diff --git a/qa/debug/openstack-30G.yaml b/qa/debug/openstack-30G.yaml new file mode 100644 index 000000000..da7ed803a --- /dev/null +++ b/qa/debug/openstack-30G.yaml @@ -0,0 +1,3 @@ +openstack: + - machine: + ram: 30000 # MB diff --git a/qa/distros/.qa b/qa/distros/.qa new file mode 120000 index 000000000..a96aa0ea9 --- /dev/null +++ b/qa/distros/.qa @@ -0,0 +1 @@ +..
\ No newline at end of file diff --git a/qa/distros/a-supported-distro.yaml b/qa/distros/a-supported-distro.yaml new file mode 120000 index 000000000..33a40b6e4 --- /dev/null +++ b/qa/distros/a-supported-distro.yaml @@ -0,0 +1 @@ +all/centos_7.2.yaml
\ No newline at end of file diff --git a/qa/distros/all/centos.yaml b/qa/distros/all/centos.yaml new file mode 100644 index 000000000..1efcfa192 --- /dev/null +++ b/qa/distros/all/centos.yaml @@ -0,0 +1,2 @@ +os_type: centos +ktype: distro diff --git a/qa/distros/all/centos_6.3.yaml b/qa/distros/all/centos_6.3.yaml new file mode 100644 index 000000000..ab441ebe4 --- /dev/null +++ b/qa/distros/all/centos_6.3.yaml @@ -0,0 +1,3 @@ +os_type: centos +os_version: "6.3" +ktype: distro diff --git a/qa/distros/all/centos_6.4.yaml b/qa/distros/all/centos_6.4.yaml new file mode 100644 index 000000000..c0675434f --- /dev/null +++ b/qa/distros/all/centos_6.4.yaml @@ -0,0 +1,3 @@ +os_type: centos +os_version: "6.4" +ktype: distro diff --git a/qa/distros/all/centos_6.5.yaml b/qa/distros/all/centos_6.5.yaml new file mode 100644 index 000000000..2500389ee --- /dev/null +++ b/qa/distros/all/centos_6.5.yaml @@ -0,0 +1,3 @@ +os_type: centos +os_version: "6.5" +ktype: distro diff --git a/qa/distros/all/centos_7.0.yaml b/qa/distros/all/centos_7.0.yaml new file mode 100644 index 000000000..357b11f0d --- /dev/null +++ b/qa/distros/all/centos_7.0.yaml @@ -0,0 +1,3 @@ +os_type: centos +os_version: "7.0" +ktype: distro diff --git a/qa/distros/all/centos_7.1.yaml b/qa/distros/all/centos_7.1.yaml new file mode 100644 index 000000000..022620d9e --- /dev/null +++ b/qa/distros/all/centos_7.1.yaml @@ -0,0 +1,3 @@ +os_type: centos +os_version: "7.1" +ktype: distro diff --git a/qa/distros/all/centos_7.2.yaml b/qa/distros/all/centos_7.2.yaml new file mode 100644 index 000000000..9a918f855 --- /dev/null +++ b/qa/distros/all/centos_7.2.yaml @@ -0,0 +1,3 @@ +os_type: centos +os_version: "7.2" +ktype: distro diff --git a/qa/distros/all/centos_7.3.yaml b/qa/distros/all/centos_7.3.yaml new file mode 100644 index 000000000..e86cbb80d --- /dev/null +++ b/qa/distros/all/centos_7.3.yaml @@ -0,0 +1,3 @@ +os_type: centos +os_version: "7.3" +ktype: distro diff --git a/qa/distros/all/centos_7.4.yaml b/qa/distros/all/centos_7.4.yaml new file mode 100644 index 000000000..3eb689611 --- /dev/null +++ b/qa/distros/all/centos_7.4.yaml @@ -0,0 +1,3 @@ +os_type: centos +os_version: "7.4" +ktype: distro diff --git a/qa/distros/all/centos_7.5.yaml b/qa/distros/all/centos_7.5.yaml new file mode 100644 index 000000000..2f067e462 --- /dev/null +++ b/qa/distros/all/centos_7.5.yaml @@ -0,0 +1,3 @@ +os_type: centos +os_version: "7.5" +ktype: distro diff --git a/qa/distros/all/centos_7.6.yaml b/qa/distros/all/centos_7.6.yaml new file mode 100644 index 000000000..81014e102 --- /dev/null +++ b/qa/distros/all/centos_7.6.yaml @@ -0,0 +1,7 @@ +os_type: centos +os_version: "7.6" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/centos_7.yaml b/qa/distros/all/centos_7.yaml new file mode 120000 index 000000000..23ef40d86 --- /dev/null +++ b/qa/distros/all/centos_7.yaml @@ -0,0 +1 @@ +centos_7.6.yaml
\ No newline at end of file diff --git a/qa/distros/all/centos_8.0.yaml b/qa/distros/all/centos_8.0.yaml new file mode 100644 index 000000000..1679bf0d5 --- /dev/null +++ b/qa/distros/all/centos_8.0.yaml @@ -0,0 +1,7 @@ +os_type: centos +os_version: "8.0" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/centos_8.1.yaml b/qa/distros/all/centos_8.1.yaml new file mode 100644 index 000000000..f764e5079 --- /dev/null +++ b/qa/distros/all/centos_8.1.yaml @@ -0,0 +1,7 @@ +os_type: centos +os_version: "8.1" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/centos_8.2.yaml b/qa/distros/all/centos_8.2.yaml new file mode 100644 index 000000000..1ccbd8abd --- /dev/null +++ b/qa/distros/all/centos_8.2.yaml @@ -0,0 +1,7 @@ +os_type: centos +os_version: "8.2" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/centos_8.3.yaml b/qa/distros/all/centos_8.3.yaml new file mode 100644 index 000000000..b9a7c2579 --- /dev/null +++ b/qa/distros/all/centos_8.3.yaml @@ -0,0 +1,7 @@ +os_type: centos +os_version: "8.3" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/centos_8.stream.yaml b/qa/distros/all/centos_8.stream.yaml new file mode 100644 index 000000000..5ae75c6be --- /dev/null +++ b/qa/distros/all/centos_8.stream.yaml @@ -0,0 +1,7 @@ +os_type: centos +os_version: "8.stream" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/centos_8.yaml b/qa/distros/all/centos_8.yaml new file mode 120000 index 000000000..8e7476153 --- /dev/null +++ b/qa/distros/all/centos_8.yaml @@ -0,0 +1 @@ +centos_8.stream.yaml
\ No newline at end of file diff --git a/qa/distros/all/centos_9.stream.yaml b/qa/distros/all/centos_9.stream.yaml new file mode 100644 index 000000000..52d553bd6 --- /dev/null +++ b/qa/distros/all/centos_9.stream.yaml @@ -0,0 +1,2 @@ +os_type: centos +os_version: "9.stream" diff --git a/qa/distros/all/centos_latest.yaml b/qa/distros/all/centos_latest.yaml new file mode 120000 index 000000000..2f843a512 --- /dev/null +++ b/qa/distros/all/centos_latest.yaml @@ -0,0 +1 @@ +centos_9.stream.yaml
\ No newline at end of file diff --git a/qa/distros/all/debian_6.0.yaml b/qa/distros/all/debian_6.0.yaml new file mode 100644 index 000000000..e0d6f51f8 --- /dev/null +++ b/qa/distros/all/debian_6.0.yaml @@ -0,0 +1,3 @@ +os_type: debian +os_version: "6.0" +ktype: distro diff --git a/qa/distros/all/debian_7.0.yaml b/qa/distros/all/debian_7.0.yaml new file mode 100644 index 000000000..1eba6366d --- /dev/null +++ b/qa/distros/all/debian_7.0.yaml @@ -0,0 +1,3 @@ +os_type: debian +os_version: "7.0" +ktype: distro diff --git a/qa/distros/all/debian_8.0.yaml b/qa/distros/all/debian_8.0.yaml new file mode 100644 index 000000000..48f9e44d4 --- /dev/null +++ b/qa/distros/all/debian_8.0.yaml @@ -0,0 +1,3 @@ +os_type: debian +os_version: "8.0" +ktype: distro diff --git a/qa/distros/all/fedora_17.yaml b/qa/distros/all/fedora_17.yaml new file mode 100644 index 000000000..4124a1c0a --- /dev/null +++ b/qa/distros/all/fedora_17.yaml @@ -0,0 +1,3 @@ +os_type: fedora +os_version: "17" +ktype: distro diff --git a/qa/distros/all/fedora_18.yaml b/qa/distros/all/fedora_18.yaml new file mode 100644 index 000000000..7c87ae53f --- /dev/null +++ b/qa/distros/all/fedora_18.yaml @@ -0,0 +1,3 @@ +os_type: fedora +os_version: "18" +ktype: distro diff --git a/qa/distros/all/fedora_19.yaml b/qa/distros/all/fedora_19.yaml new file mode 100644 index 000000000..5ee62d861 --- /dev/null +++ b/qa/distros/all/fedora_19.yaml @@ -0,0 +1,3 @@ +os_type: fedora +os_version: "19" +ktype: distro diff --git a/qa/distros/all/opensuse_15.1.yaml b/qa/distros/all/opensuse_15.1.yaml new file mode 100644 index 000000000..05cb3d8e4 --- /dev/null +++ b/qa/distros/all/opensuse_15.1.yaml @@ -0,0 +1,3 @@ +os_type: opensuse +os_version: "15.1" +ktype: distro diff --git a/qa/distros/all/opensuse_15.2.yaml b/qa/distros/all/opensuse_15.2.yaml new file mode 100644 index 000000000..18ee83ba8 --- /dev/null +++ b/qa/distros/all/opensuse_15.2.yaml @@ -0,0 +1,3 @@ +os_type: opensuse +os_version: "15.2" +ktype: distro diff --git a/qa/distros/all/opensuse_42.3.yaml b/qa/distros/all/opensuse_42.3.yaml new file mode 100644 index 000000000..d3419fc95 --- /dev/null +++ b/qa/distros/all/opensuse_42.3.yaml @@ -0,0 +1,3 @@ +os_type: opensuse +os_version: "42.3" +ktype: distro diff --git a/qa/distros/all/rhel_6.3.yaml b/qa/distros/all/rhel_6.3.yaml new file mode 100644 index 000000000..f9171354f --- /dev/null +++ b/qa/distros/all/rhel_6.3.yaml @@ -0,0 +1,3 @@ +os_type: rhel +os_version: "6.3" +ktype: distro diff --git a/qa/distros/all/rhel_6.4.yaml b/qa/distros/all/rhel_6.4.yaml new file mode 100644 index 000000000..5b250a32f --- /dev/null +++ b/qa/distros/all/rhel_6.4.yaml @@ -0,0 +1,3 @@ +os_type: rhel +os_version: "6.4" +ktype: distro diff --git a/qa/distros/all/rhel_6.5.yaml b/qa/distros/all/rhel_6.5.yaml new file mode 100644 index 000000000..6e9f7fe5c --- /dev/null +++ b/qa/distros/all/rhel_6.5.yaml @@ -0,0 +1,3 @@ +os_type: rhel +os_version: "6.5" +ktype: distro diff --git a/qa/distros/all/rhel_6.yaml b/qa/distros/all/rhel_6.yaml new file mode 120000 index 000000000..850573949 --- /dev/null +++ b/qa/distros/all/rhel_6.yaml @@ -0,0 +1 @@ +rhel_6.5.yaml
\ No newline at end of file diff --git a/qa/distros/all/rhel_7.0.yaml b/qa/distros/all/rhel_7.0.yaml new file mode 100644 index 000000000..36b6d4282 --- /dev/null +++ b/qa/distros/all/rhel_7.0.yaml @@ -0,0 +1,3 @@ +os_type: rhel +os_version: "7.0" +ktype: distro diff --git a/qa/distros/all/rhel_7.5.yaml b/qa/distros/all/rhel_7.5.yaml new file mode 100644 index 000000000..6b1e60f80 --- /dev/null +++ b/qa/distros/all/rhel_7.5.yaml @@ -0,0 +1,3 @@ +os_type: rhel +os_version: "7.5" +ktype: distro diff --git a/qa/distros/all/rhel_7.6.yaml b/qa/distros/all/rhel_7.6.yaml new file mode 100644 index 000000000..37bc0fb4a --- /dev/null +++ b/qa/distros/all/rhel_7.6.yaml @@ -0,0 +1,7 @@ +os_type: rhel +os_version: "7.6" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/rhel_7.7.yaml b/qa/distros/all/rhel_7.7.yaml new file mode 100644 index 000000000..ac44fe18e --- /dev/null +++ b/qa/distros/all/rhel_7.7.yaml @@ -0,0 +1,7 @@ +os_type: rhel +os_version: "7.7" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/rhel_7.yaml b/qa/distros/all/rhel_7.yaml new file mode 120000 index 000000000..85ed1cd48 --- /dev/null +++ b/qa/distros/all/rhel_7.yaml @@ -0,0 +1 @@ +rhel_7.7.yaml
\ No newline at end of file diff --git a/qa/distros/all/rhel_8.0.yaml b/qa/distros/all/rhel_8.0.yaml new file mode 100644 index 000000000..da6a33ed0 --- /dev/null +++ b/qa/distros/all/rhel_8.0.yaml @@ -0,0 +1,7 @@ +os_type: rhel +os_version: "8.0" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/rhel_8.1.yaml b/qa/distros/all/rhel_8.1.yaml new file mode 100644 index 000000000..c73893149 --- /dev/null +++ b/qa/distros/all/rhel_8.1.yaml @@ -0,0 +1,7 @@ +os_type: rhel +os_version: "8.1" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/rhel_8.3.yaml b/qa/distros/all/rhel_8.3.yaml new file mode 100644 index 000000000..4e44bbff5 --- /dev/null +++ b/qa/distros/all/rhel_8.3.yaml @@ -0,0 +1,7 @@ +os_type: rhel +os_version: "8.3" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/rhel_8.4.yaml b/qa/distros/all/rhel_8.4.yaml new file mode 100644 index 000000000..5a299ffc5 --- /dev/null +++ b/qa/distros/all/rhel_8.4.yaml @@ -0,0 +1,7 @@ +os_type: rhel +os_version: "8.4" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/rhel_8.5.yaml b/qa/distros/all/rhel_8.5.yaml new file mode 100644 index 000000000..3e02bb196 --- /dev/null +++ b/qa/distros/all/rhel_8.5.yaml @@ -0,0 +1,7 @@ +os_type: rhel +os_version: "8.5" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/rhel_8.6.yaml b/qa/distros/all/rhel_8.6.yaml new file mode 100644 index 000000000..1f9a6b73f --- /dev/null +++ b/qa/distros/all/rhel_8.6.yaml @@ -0,0 +1,7 @@ +os_type: rhel +os_version: "8.6" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +ktype: distro diff --git a/qa/distros/all/rhel_8.yaml b/qa/distros/all/rhel_8.yaml new file mode 120000 index 000000000..d49c09cc1 --- /dev/null +++ b/qa/distros/all/rhel_8.yaml @@ -0,0 +1 @@ +rhel_8.6.yaml
\ No newline at end of file diff --git a/qa/distros/all/sle_12.2.yaml b/qa/distros/all/sle_12.2.yaml new file mode 100644 index 000000000..720fc2dec --- /dev/null +++ b/qa/distros/all/sle_12.2.yaml @@ -0,0 +1,3 @@ +os_type: sle +os_version: "12.2" +ktype: distro diff --git a/qa/distros/all/sle_12.3.yaml b/qa/distros/all/sle_12.3.yaml new file mode 100644 index 000000000..baab97ed7 --- /dev/null +++ b/qa/distros/all/sle_12.3.yaml @@ -0,0 +1,3 @@ +os_type: sle +os_version: "12.3" +ktype: distro diff --git a/qa/distros/all/sle_15.1.yaml b/qa/distros/all/sle_15.1.yaml new file mode 100644 index 000000000..e2fe5427f --- /dev/null +++ b/qa/distros/all/sle_15.1.yaml @@ -0,0 +1,3 @@ +os_type: sle +os_version: "15.1" +ktype: distro diff --git a/qa/distros/all/sle_15.2.yaml b/qa/distros/all/sle_15.2.yaml new file mode 100644 index 000000000..ccaac1799 --- /dev/null +++ b/qa/distros/all/sle_15.2.yaml @@ -0,0 +1,3 @@ +os_type: sle +os_version: "15.2" +ktype: distro diff --git a/qa/distros/all/ubuntu_12.04.yaml b/qa/distros/all/ubuntu_12.04.yaml new file mode 100644 index 000000000..c7d076def --- /dev/null +++ b/qa/distros/all/ubuntu_12.04.yaml @@ -0,0 +1,3 @@ +os_type: ubuntu +os_version: "12.04" +ktype: distro diff --git a/qa/distros/all/ubuntu_12.10.yaml b/qa/distros/all/ubuntu_12.10.yaml new file mode 100644 index 000000000..5b1c510a1 --- /dev/null +++ b/qa/distros/all/ubuntu_12.10.yaml @@ -0,0 +1,3 @@ +os_type: ubuntu +os_version: "12.10" +ktype: distro diff --git a/qa/distros/all/ubuntu_14.04.yaml b/qa/distros/all/ubuntu_14.04.yaml new file mode 100644 index 000000000..3067dc0f7 --- /dev/null +++ b/qa/distros/all/ubuntu_14.04.yaml @@ -0,0 +1,3 @@ +os_type: ubuntu +os_version: "14.04" +ktype: distro diff --git a/qa/distros/all/ubuntu_14.04_aarch64.yaml b/qa/distros/all/ubuntu_14.04_aarch64.yaml new file mode 100644 index 000000000..08ad4f50f --- /dev/null +++ b/qa/distros/all/ubuntu_14.04_aarch64.yaml @@ -0,0 +1,4 @@ +os_type: ubuntu +os_version: "14.04" +arch: aarch64 +ktype: distro diff --git a/qa/distros/all/ubuntu_14.04_i686.yaml b/qa/distros/all/ubuntu_14.04_i686.yaml new file mode 100644 index 000000000..905391e1b --- /dev/null +++ b/qa/distros/all/ubuntu_14.04_i686.yaml @@ -0,0 +1,4 @@ +os_type: ubuntu +os_version: "14.04" +arch: i686 +ktype: distro diff --git a/qa/distros/all/ubuntu_16.04.yaml b/qa/distros/all/ubuntu_16.04.yaml new file mode 100644 index 000000000..7a55a7735 --- /dev/null +++ b/qa/distros/all/ubuntu_16.04.yaml @@ -0,0 +1,3 @@ +os_type: ubuntu +os_version: "16.04" +ktype: distro diff --git a/qa/distros/all/ubuntu_18.04.yaml b/qa/distros/all/ubuntu_18.04.yaml new file mode 100644 index 000000000..3a89b202d --- /dev/null +++ b/qa/distros/all/ubuntu_18.04.yaml @@ -0,0 +1,3 @@ +os_type: ubuntu +os_version: "18.04" +ktype: distro diff --git a/qa/distros/all/ubuntu_20.04.yaml b/qa/distros/all/ubuntu_20.04.yaml new file mode 100644 index 000000000..f9fb375ad --- /dev/null +++ b/qa/distros/all/ubuntu_20.04.yaml @@ -0,0 +1,3 @@ +os_type: ubuntu +os_version: "20.04" +ktype: distro diff --git a/qa/distros/all/ubuntu_22.04.yaml b/qa/distros/all/ubuntu_22.04.yaml new file mode 100644 index 000000000..a34ddad11 --- /dev/null +++ b/qa/distros/all/ubuntu_22.04.yaml @@ -0,0 +1,3 @@ +os_type: ubuntu +os_version: "22.04" +ktype: distro diff --git a/qa/distros/all/ubuntu_latest.yaml b/qa/distros/all/ubuntu_latest.yaml new file mode 120000 index 000000000..08ecf3afa --- /dev/null +++ b/qa/distros/all/ubuntu_latest.yaml @@ -0,0 +1 @@ +ubuntu_22.04.yaml
\ No newline at end of file diff --git a/qa/distros/container-hosts/.qa b/qa/distros/container-hosts/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/distros/container-hosts/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/distros/container-hosts/centos_8.stream_container_tools.yaml b/qa/distros/container-hosts/centos_8.stream_container_tools.yaml new file mode 100644 index 000000000..4a76306f1 --- /dev/null +++ b/qa/distros/container-hosts/centos_8.stream_container_tools.yaml @@ -0,0 +1,14 @@ +os_type: centos +os_version: "8.stream" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 + +tasks: +- pexec: + all: + - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup + - sudo dnf -y module reset container-tools + - sudo dnf -y module install container-tools --allowerasing --nobest + - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf diff --git a/qa/distros/container-hosts/centos_8.stream_container_tools_crun.yaml b/qa/distros/container-hosts/centos_8.stream_container_tools_crun.yaml new file mode 100644 index 000000000..b06e1c87d --- /dev/null +++ b/qa/distros/container-hosts/centos_8.stream_container_tools_crun.yaml @@ -0,0 +1,16 @@ +os_type: centos +os_version: "8.stream" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 + +tasks: +- pexec: + all: + - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup + - sudo dnf -y module reset container-tools + - sudo dnf -y module install container-tools --allowerasing --nobest + - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf + - sudo sed -i 's/runtime = "runc"/#runtime = "runc"/g' /usr/share/containers/containers.conf + - sudo sed -i 's/#runtime = "crun"/runtime = "crun"/g' /usr/share/containers/containers.conf diff --git a/qa/distros/container-hosts/rhel_8.6_container_tools_3.0.yaml b/qa/distros/container-hosts/rhel_8.6_container_tools_3.0.yaml new file mode 100644 index 000000000..361d8546e --- /dev/null +++ b/qa/distros/container-hosts/rhel_8.6_container_tools_3.0.yaml @@ -0,0 +1,13 @@ +os_type: rhel +os_version: "8.6" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +tasks: +- pexec: + all: + - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup + - sudo dnf -y module reset container-tools + - sudo dnf -y module install container-tools:3.0 --allowerasing --nobest + - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf diff --git a/qa/distros/container-hosts/rhel_8.6_container_tools_rhel8.yaml b/qa/distros/container-hosts/rhel_8.6_container_tools_rhel8.yaml new file mode 100644 index 000000000..be94ed69e --- /dev/null +++ b/qa/distros/container-hosts/rhel_8.6_container_tools_rhel8.yaml @@ -0,0 +1,13 @@ +os_type: rhel +os_version: "8.6" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +tasks: +- pexec: + all: + - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup + - sudo dnf -y module reset container-tools + - sudo dnf -y module install container-tools:rhel8 --allowerasing --nobest + - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf diff --git a/qa/distros/container-hosts/ubuntu_20.04.yaml b/qa/distros/container-hosts/ubuntu_20.04.yaml new file mode 100644 index 000000000..bb9f5c00a --- /dev/null +++ b/qa/distros/container-hosts/ubuntu_20.04.yaml @@ -0,0 +1,9 @@ +os_type: ubuntu +os_version: "20.04" +# the normal ubuntu 20.04 kernel (5.4.0-88-generic currently) have a bug that prevents the nvme_loop +# from behaving. I think it is this: +# https://lkml.org/lkml/2020/9/21/1456 +# (at least, that is the symptom: nvme nvme1: Connect command failed, error wo/DNR bit: 880) +overrides: + kernel: + hwe: true diff --git a/qa/distros/crimson-supported-all-distro/centos_8.yaml b/qa/distros/crimson-supported-all-distro/centos_8.yaml new file mode 120000 index 000000000..b7e6c9b4e --- /dev/null +++ b/qa/distros/crimson-supported-all-distro/centos_8.yaml @@ -0,0 +1 @@ +../all/centos_8.yaml
\ No newline at end of file diff --git a/qa/distros/crimson-supported-all-distro/centos_latest.yaml b/qa/distros/crimson-supported-all-distro/centos_latest.yaml new file mode 120000 index 000000000..2e29883f3 --- /dev/null +++ b/qa/distros/crimson-supported-all-distro/centos_latest.yaml @@ -0,0 +1 @@ +../all/centos_latest.yaml
\ No newline at end of file diff --git a/qa/distros/podman/centos_8.stream_container_tools.yaml b/qa/distros/podman/centos_8.stream_container_tools.yaml new file mode 100644 index 000000000..4a76306f1 --- /dev/null +++ b/qa/distros/podman/centos_8.stream_container_tools.yaml @@ -0,0 +1,14 @@ +os_type: centos +os_version: "8.stream" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 + +tasks: +- pexec: + all: + - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup + - sudo dnf -y module reset container-tools + - sudo dnf -y module install container-tools --allowerasing --nobest + - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf diff --git a/qa/distros/podman/rhel_8.6_container_tools_3.0.yaml b/qa/distros/podman/rhel_8.6_container_tools_3.0.yaml new file mode 100644 index 000000000..361d8546e --- /dev/null +++ b/qa/distros/podman/rhel_8.6_container_tools_3.0.yaml @@ -0,0 +1,13 @@ +os_type: rhel +os_version: "8.6" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +tasks: +- pexec: + all: + - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup + - sudo dnf -y module reset container-tools + - sudo dnf -y module install container-tools:3.0 --allowerasing --nobest + - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf diff --git a/qa/distros/podman/rhel_8.6_container_tools_rhel8.yaml b/qa/distros/podman/rhel_8.6_container_tools_rhel8.yaml new file mode 100644 index 000000000..be94ed69e --- /dev/null +++ b/qa/distros/podman/rhel_8.6_container_tools_rhel8.yaml @@ -0,0 +1,13 @@ +os_type: rhel +os_version: "8.6" +overrides: + selinux: + whitelist: + - scontext=system_u:system_r:logrotate_t:s0 +tasks: +- pexec: + all: + - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup + - sudo dnf -y module reset container-tools + - sudo dnf -y module install container-tools:rhel8 --allowerasing --nobest + - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf diff --git a/qa/distros/single-container-host.yaml b/qa/distros/single-container-host.yaml new file mode 120000 index 000000000..f71756d42 --- /dev/null +++ b/qa/distros/single-container-host.yaml @@ -0,0 +1 @@ +container-hosts/centos_8.stream_container_tools_crun.yaml
\ No newline at end of file diff --git a/qa/distros/supported-all-distro/centos_8.yaml b/qa/distros/supported-all-distro/centos_8.yaml new file mode 120000 index 000000000..b7e6c9b4e --- /dev/null +++ b/qa/distros/supported-all-distro/centos_8.yaml @@ -0,0 +1 @@ +../all/centos_8.yaml
\ No newline at end of file diff --git a/qa/distros/supported-all-distro/centos_latest.yaml b/qa/distros/supported-all-distro/centos_latest.yaml new file mode 120000 index 000000000..2e29883f3 --- /dev/null +++ b/qa/distros/supported-all-distro/centos_latest.yaml @@ -0,0 +1 @@ +../all/centos_latest.yaml
\ No newline at end of file diff --git a/qa/distros/supported-all-distro/rhel_8.yaml b/qa/distros/supported-all-distro/rhel_8.yaml new file mode 120000 index 000000000..f803f091e --- /dev/null +++ b/qa/distros/supported-all-distro/rhel_8.yaml @@ -0,0 +1 @@ +../all/rhel_8.yaml
\ No newline at end of file diff --git a/qa/distros/supported-all-distro/ubuntu_20.04.yaml b/qa/distros/supported-all-distro/ubuntu_20.04.yaml new file mode 120000 index 000000000..75d907e3b --- /dev/null +++ b/qa/distros/supported-all-distro/ubuntu_20.04.yaml @@ -0,0 +1 @@ +../all/ubuntu_20.04.yaml
\ No newline at end of file diff --git a/qa/distros/supported-all-distro/ubuntu_latest.yaml b/qa/distros/supported-all-distro/ubuntu_latest.yaml new file mode 120000 index 000000000..cfcd0d1a8 --- /dev/null +++ b/qa/distros/supported-all-distro/ubuntu_latest.yaml @@ -0,0 +1 @@ +../all/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/distros/supported-random-distro$/$ b/qa/distros/supported-random-distro$/$ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/distros/supported-random-distro$/$ diff --git a/qa/distros/supported-random-distro$/centos_8.yaml b/qa/distros/supported-random-distro$/centos_8.yaml new file mode 120000 index 000000000..b7e6c9b4e --- /dev/null +++ b/qa/distros/supported-random-distro$/centos_8.yaml @@ -0,0 +1 @@ +../all/centos_8.yaml
\ No newline at end of file diff --git a/qa/distros/supported-random-distro$/centos_latest.yaml b/qa/distros/supported-random-distro$/centos_latest.yaml new file mode 120000 index 000000000..2e29883f3 --- /dev/null +++ b/qa/distros/supported-random-distro$/centos_latest.yaml @@ -0,0 +1 @@ +../all/centos_latest.yaml
\ No newline at end of file diff --git a/qa/distros/supported-random-distro$/rhel_8.yaml b/qa/distros/supported-random-distro$/rhel_8.yaml new file mode 120000 index 000000000..f803f091e --- /dev/null +++ b/qa/distros/supported-random-distro$/rhel_8.yaml @@ -0,0 +1 @@ +../all/rhel_8.yaml
\ No newline at end of file diff --git a/qa/distros/supported-random-distro$/ubuntu_20.04.yaml b/qa/distros/supported-random-distro$/ubuntu_20.04.yaml new file mode 120000 index 000000000..75d907e3b --- /dev/null +++ b/qa/distros/supported-random-distro$/ubuntu_20.04.yaml @@ -0,0 +1 @@ +../all/ubuntu_20.04.yaml
\ No newline at end of file diff --git a/qa/distros/supported-random-distro$/ubuntu_latest.yaml b/qa/distros/supported-random-distro$/ubuntu_latest.yaml new file mode 120000 index 000000000..cfcd0d1a8 --- /dev/null +++ b/qa/distros/supported-random-distro$/ubuntu_latest.yaml @@ -0,0 +1 @@ +../all/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/distros/supported/centos_8.stream.yaml b/qa/distros/supported/centos_8.stream.yaml new file mode 120000 index 000000000..e96091bb1 --- /dev/null +++ b/qa/distros/supported/centos_8.stream.yaml @@ -0,0 +1 @@ +../all/centos_8.stream.yaml
\ No newline at end of file diff --git a/qa/distros/supported/centos_latest.yaml b/qa/distros/supported/centos_latest.yaml new file mode 120000 index 000000000..2e29883f3 --- /dev/null +++ b/qa/distros/supported/centos_latest.yaml @@ -0,0 +1 @@ +../all/centos_latest.yaml
\ No newline at end of file diff --git a/qa/distros/supported/rhel_latest.yaml b/qa/distros/supported/rhel_latest.yaml new file mode 120000 index 000000000..f803f091e --- /dev/null +++ b/qa/distros/supported/rhel_latest.yaml @@ -0,0 +1 @@ +../all/rhel_8.yaml
\ No newline at end of file diff --git a/qa/distros/supported/ubuntu_20.04.yaml b/qa/distros/supported/ubuntu_20.04.yaml new file mode 120000 index 000000000..75d907e3b --- /dev/null +++ b/qa/distros/supported/ubuntu_20.04.yaml @@ -0,0 +1 @@ +../all/ubuntu_20.04.yaml
\ No newline at end of file diff --git a/qa/distros/supported/ubuntu_latest.yaml b/qa/distros/supported/ubuntu_latest.yaml new file mode 120000 index 000000000..cfcd0d1a8 --- /dev/null +++ b/qa/distros/supported/ubuntu_latest.yaml @@ -0,0 +1 @@ +../all/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/erasure-code/ec-feature-plugins-v2.yaml b/qa/erasure-code/ec-feature-plugins-v2.yaml new file mode 100644 index 000000000..f2d374dd9 --- /dev/null +++ b/qa/erasure-code/ec-feature-plugins-v2.yaml @@ -0,0 +1,98 @@ +# +# Test the expected behavior of the +# +# CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 +# +# feature. +# +roles: +- - mon.a + - mon.b + - osd.0 + - osd.1 +- - osd.2 + - mon.c + - mgr.x +tasks: +# +# Install firefly +# +- install: + branch: firefly +- ceph: + fs: xfs +# +# We don't need mon.c for now: it will be used later to make sure an old +# mon cannot join the quorum once the feature has been activated +# +- ceph.stop: + daemons: [mon.c] +- exec: + mon.a: + - |- + ceph osd erasure-code-profile set WRONG plugin=WRONG + ceph osd pool create poolWRONG 12 12 erasure WRONG 2>&1 | grep "failed to load plugin using profile WRONG" +# +# Partial upgrade, osd.2 is not upgraded +# +- install.upgrade: + osd.0: +# +# a is the leader +# +- ceph.restart: + daemons: [mon.a] + wait-for-healthy: false +- exec: + mon.a: + - |- + ceph osd erasure-code-profile set profile-lrc plugin=lrc 2>&1 | grep "unsupported by: the monitor cluster" +- ceph.restart: + daemons: [mon.b, osd.1, osd.0] + wait-for-healthy: false + wait-for-osds-up: true +# +# The lrc plugin cannot be used because osd.2 is not upgraded yet +# and would crash. +# +- exec: + mon.a: + - |- + ceph osd erasure-code-profile set profile-lrc plugin=lrc 2>&1 | grep "unsupported by: osd.2" +# +# Taking osd.2 out, the rest of the cluster is upgraded +# +- ceph.stop: + daemons: [osd.2] +- sleep: + duration: 60 +# +# Creating an erasure code profile using the lrc plugin now works +# +- exec: + mon.a: + - "ceph osd erasure-code-profile set profile-lrc plugin=lrc" +# +# osd.2 won't be able to join the because is does not support the feature +# +- ceph.restart: + daemons: [osd.2] + wait-for-healthy: false +- sleep: + duration: 60 +- exec: + osd.2: + - |- + grep "protocol feature.*missing 100000000000" /var/log/ceph/ceph-osd.2.log +# +# mon.c won't be able to join the because it does not support the feature +# +- ceph.restart: + daemons: [mon.c] + wait-for-healthy: false +- sleep: + duration: 60 +- exec: + mon.c: + - |- + grep "missing.*feature" /var/log/ceph/ceph-mon.c.log diff --git a/qa/erasure-code/ec-rados-default.yaml b/qa/erasure-code/ec-rados-default.yaml new file mode 100644 index 000000000..cc62371e3 --- /dev/null +++ b/qa/erasure-code/ec-rados-default.yaml @@ -0,0 +1,19 @@ +tasks: + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + write_append_excl: false + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 + - print: "**** done rados ec task" diff --git a/qa/erasure-code/ec-rados-parallel.yaml b/qa/erasure-code/ec-rados-parallel.yaml new file mode 100644 index 000000000..0f01d8424 --- /dev/null +++ b/qa/erasure-code/ec-rados-parallel.yaml @@ -0,0 +1,20 @@ +workload: + parallel: + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + write_append_excl: false + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 + - print: "**** done rados ec parallel" diff --git a/qa/erasure-code/ec-rados-plugin=clay-k=4-m=2.yaml b/qa/erasure-code/ec-rados-plugin=clay-k=4-m=2.yaml new file mode 100644 index 000000000..2efb85436 --- /dev/null +++ b/qa/erasure-code/ec-rados-plugin=clay-k=4-m=2.yaml @@ -0,0 +1,25 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + write_append_excl: false + erasure_code_profile: + name: clay42profile + plugin: clay + k: 4 + m: 2 + technique: reed_sol_van + crush-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml b/qa/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml new file mode 100644 index 000000000..64b59705c --- /dev/null +++ b/qa/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml @@ -0,0 +1,26 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + min_size: 2 + write_append_excl: false + erasure_code_profile: + name: isaprofile + plugin: isa + k: 2 + m: 1 + technique: reed_sol_van + crush-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml b/qa/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml new file mode 100644 index 000000000..d61b1c8af --- /dev/null +++ b/qa/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml @@ -0,0 +1,25 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + write_append_excl: false + erasure_code_profile: + name: jerasure21profile + plugin: jerasure + k: 2 + m: 1 + technique: reed_sol_van + crush-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml b/qa/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml new file mode 100644 index 000000000..2ca53a799 --- /dev/null +++ b/qa/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml @@ -0,0 +1,31 @@ +# +# k=3 implies a stripe_width of 1376*3 = 4128 which is different from +# the default value of 4096 It is also not a multiple of 1024*1024 and +# creates situations where rounding rules during recovery becomes +# necessary. +# +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + write_append_excl: false + erasure_code_profile: + name: jerasure31profile + plugin: jerasure + k: 3 + m: 1 + technique: reed_sol_van + crush-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml b/qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml new file mode 100644 index 000000000..dfcc61607 --- /dev/null +++ b/qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml @@ -0,0 +1,25 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + write_append_excl: false + erasure_code_profile: + name: jerasure21profile + plugin: jerasure + k: 4 + m: 2 + technique: reed_sol_van + crush-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml b/qa/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml new file mode 100644 index 000000000..86ae0568c --- /dev/null +++ b/qa/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml @@ -0,0 +1,25 @@ +tasks: +- rados: + clients: [client.0] + ops: 400 + objects: 50 + ec_pool: true + write_append_excl: false + erasure_code_profile: + name: lrcprofile + plugin: lrc + k: 4 + m: 2 + l: 3 + crush-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/erasure-code/ec-rados-plugin=shec-k=4-m=3-c=2.yaml b/qa/erasure-code/ec-rados-plugin=shec-k=4-m=3-c=2.yaml new file mode 100644 index 000000000..ee74c6e98 --- /dev/null +++ b/qa/erasure-code/ec-rados-plugin=shec-k=4-m=3-c=2.yaml @@ -0,0 +1,25 @@ +tasks: +- rados: + clients: [client.0] + ops: 400 + objects: 50 + ec_pool: true + write_append_excl: false + erasure_code_profile: + name: shecprofile + plugin: shec + k: 4 + m: 3 + c: 2 + crush-failure-domain: osd + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/erasure-code/ec-rados-sequential.yaml b/qa/erasure-code/ec-rados-sequential.yaml new file mode 100644 index 000000000..90536ee6f --- /dev/null +++ b/qa/erasure-code/ec-rados-sequential.yaml @@ -0,0 +1,20 @@ +workload: + sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + write_append_excl: false + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 + - print: "**** done rados ec sequential" diff --git a/qa/find-used-ports.sh b/qa/find-used-ports.sh new file mode 100755 index 000000000..c57525cd4 --- /dev/null +++ b/qa/find-used-ports.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +git --no-pager grep -n '127.0.0.1:[0-9]\+' | sed -n 's/.*127.0.0.1:\([0-9]\+\).*/\1/p' | sort -n | uniq -u diff --git a/qa/libceph/Makefile b/qa/libceph/Makefile new file mode 100644 index 000000000..06e1b990b --- /dev/null +++ b/qa/libceph/Makefile @@ -0,0 +1,11 @@ +CFLAGS = -Wall -Wextra -D_GNU_SOURCE -lcephfs -L../../build/lib + +TARGETS = trivial_libceph + +.c: + $(CC) $(CFLAGS) $@.c -o $@ + +all: $(TARGETS) + +clean: + rm $(TARGETS) diff --git a/qa/libceph/trivial_libceph.c b/qa/libceph/trivial_libceph.c new file mode 100644 index 000000000..9093e97e7 --- /dev/null +++ b/qa/libceph/trivial_libceph.c @@ -0,0 +1,69 @@ +#define _FILE_OFFSET_BITS 64 +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/statvfs.h> +#include "../../src/include/cephfs/libcephfs.h" + +#define MB64 (1<<26) + +int main(int argc, const char **argv) +{ + struct ceph_mount_info *cmount; + int ret, fd, len; + char buf[1024]; + + if (argc < 3) { + fprintf(stderr, "usage: ./%s <conf> <file>\n", argv[0]); + exit(1); + } + + ret = ceph_create(&cmount, NULL); + if (ret) { + fprintf(stderr, "ceph_create=%d\n", ret); + exit(1); + } + + ret = ceph_conf_read_file(cmount, argv[1]); + if (ret) { + fprintf(stderr, "ceph_conf_read_file=%d\n", ret); + exit(1); + } + + ret = ceph_conf_parse_argv(cmount, argc, argv); + if (ret) { + fprintf(stderr, "ceph_conf_parse_argv=%d\n", ret); + exit(1); + } + + ret = ceph_mount(cmount, NULL); + if (ret) { + fprintf(stderr, "ceph_mount=%d\n", ret); + exit(1); + } + + ret = ceph_chdir(cmount, "/"); + if (ret) { + fprintf(stderr, "ceph_chdir=%d\n", ret); + exit(1); + } + + fd = ceph_open(cmount, argv[2], O_CREAT|O_TRUNC|O_RDWR, 0777); + if (fd < 0) { + fprintf(stderr, "ceph_open=%d\n", fd); + exit(1); + } + + memset(buf, 'a', sizeof(buf)); + + len = ceph_write(cmount, fd, buf, sizeof(buf), 0); + + fprintf(stdout, "wrote %d bytes\n", len); + + ceph_shutdown(cmount); + + return 0; +} diff --git a/qa/loopall.sh b/qa/loopall.sh new file mode 100755 index 000000000..d69e8c720 --- /dev/null +++ b/qa/loopall.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -ex + +basedir=`echo $0 | sed 's/[^/]*$//g'`. +testdir="$1" +[ -n "$2" ] && logdir=$2 || logdir=$1 + +[ ${basedir:0:1} == "." ] && basedir=`pwd`/${basedir:1} + +PATH="$basedir/src:$PATH" + +[ -z "$testdir" ] || [ ! -d "$testdir" ] && echo "specify test dir" && exit 1 +cd $testdir + +while true +do + for test in `cd $basedir/workunits && find . -executable -type f | $basedir/../src/script/permute` + do + echo "------ running test $test ------" + pwd + [ -d $test ] && rm -r $test + mkdir -p $test + mkdir -p `dirname $logdir/$test.log` + test -e $logdir/$test.log && rm $logdir/$test.log + sh -c "cd $test && $basedir/workunits/$test" 2>&1 | tee $logdir/$test.log + done +done diff --git a/qa/machine_types/schedule_rados_ovh.sh b/qa/machine_types/schedule_rados_ovh.sh new file mode 100755 index 000000000..aeb37162e --- /dev/null +++ b/qa/machine_types/schedule_rados_ovh.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# $1 - part +# $2 - branch name +# $3 - machine name +# $4 - email address +# $5 - filter out (this arg is to be at the end of the command line for now) + +## example #1 +## (date +%U) week number +## % 2 - mod 2 (e.g. 0,1,0,1 ...) +## * 7 - multiplied by 7 (e.g. 0,7,0,7...) +## $1 day of the week (0-6) +## /14 for 2 weeks + +## example #2 +## (date +%U) week number +## % 4 - mod 4 (e.g. 0,1,2,3,0,1,2,3 ...) +## * 7 - multiplied by 7 (e.g. 0,7,14,21,0,7,14,21...) +## $1 day of the week (0-6) +## /28 for 4 weeks + +echo "Scheduling " $2 " branch" +if [ $2 = "master" ] ; then + # run master branch with --newest option looking for good sha1 7 builds back + teuthology-suite -v -c $2 -m $3 -k distro -s rados --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/28 --newest 7 -e $4 ~/vps.yaml $5 +elif [ $2 = "jewel" ] ; then + # run jewel branch with /40 jobs + teuthology-suite -v -c $2 -m $3 -k distro -s rados --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/40 -e $4 ~/vps.yaml $5 +else + # run NON master branches without --newest + teuthology-suite -v -c $2 -m $3 -k distro -s rados --subset $(echo "(($(date +%U) % 4) * 7) + $1" | bc)/28 -e $4 ~/vps.yaml $5 +fi + diff --git a/qa/machine_types/schedule_subset.sh b/qa/machine_types/schedule_subset.sh new file mode 100755 index 000000000..7f18c81ef --- /dev/null +++ b/qa/machine_types/schedule_subset.sh @@ -0,0 +1,20 @@ +#!/bin/bash -e + +#command line => CEPH_BRANCH=<branch>; MACHINE_NAME=<machine_type>; SUITE_NAME=<suite>; ../schedule_subset.sh <day_of_week> $CEPH_BRANCH $MACHINE_NAME $SUITE_NAME $CEPH_QA_EMAIL $KERNEL <$FILTER> + +partitions="$1" +shift +branch="$1" +shift +machine="$1" +shift +suite="$1" +shift +email="$1" +shift +kernel="$1" +shift +# rest of arguments passed directly to teuthology-suite + +echo "Scheduling $branch branch" +teuthology-suite -v -c "$branch" -m "$machine" -k "$kernel" -s "$suite" --ceph-repo https://git.ceph.com/ceph.git --suite-repo https://git.ceph.com/ceph.git --subset "$((RANDOM % partitions))/$partitions" --newest 100 -e "$email" "$@" diff --git a/qa/machine_types/vps.yaml b/qa/machine_types/vps.yaml new file mode 100644 index 000000000..64a3da47d --- /dev/null +++ b/qa/machine_types/vps.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + global: + osd heartbeat grace: 100 + # this line to address issue #1017 + mon lease: 15 + mon lease ack timeout: 25 + s3tests: + idle_timeout: 1200 + ceph-fuse: + client.0: + mount_wait: 60 + mount_timeout: 120 diff --git a/qa/mds/test_anchortable.sh b/qa/mds/test_anchortable.sh new file mode 100755 index 000000000..1bf2494de --- /dev/null +++ b/qa/mds/test_anchortable.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -x + +mkdir links +for f in `seq 1 8` +do + mkdir $f + for g in `seq 1 20` + do + touch $f/$g + ln $f/$g links/$f.$g + done +done + +for f in `seq 1 8` +do + echo testing failure point $f + bash -c "pushd . ; cd $bindir ; sleep 10; ./ceph -c $conf mds tell \* injectargs \"--mds_kill_mdstable_at $f\" ; popd" & + bash -c "pushd . ; cd $bindir ; sleep 11 ; ./init-ceph -c $conf start mds ; popd" & + for g in `seq 1 20` + do + rm $f/$g + rm links/$f.$g + sleep 1 + done +done + diff --git a/qa/mds/test_mdstable_failures.sh b/qa/mds/test_mdstable_failures.sh new file mode 100755 index 000000000..c959995cf --- /dev/null +++ b/qa/mds/test_mdstable_failures.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -x + +for f in `seq 1 8` +do + echo testing failure point $f + pushd . ; cd $bindir ; ./ceph -c $conf mds tell \* injectargs "--mds_kill_mdstable_at $f" ; popd + sleep 1 # wait for mds command to go thru + bash -c "pushd . ; cd $bindir ; sleep 10 ; ./init-ceph -c $conf start mds ; popd" & + touch $f + ln $f $f.link + sleep 10 +done + diff --git a/qa/mgr_ttl_cache/disable.yaml b/qa/mgr_ttl_cache/disable.yaml new file mode 100644 index 000000000..bbd78d53f --- /dev/null +++ b/qa/mgr_ttl_cache/disable.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mgr: + mgr ttl cache expire seconds: 0 diff --git a/qa/mgr_ttl_cache/enable.yaml b/qa/mgr_ttl_cache/enable.yaml new file mode 100644 index 000000000..2c1c0e053 --- /dev/null +++ b/qa/mgr_ttl_cache/enable.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mgr: + mgr ttl cache expire seconds: 5 diff --git a/qa/mon/bootstrap/host.sh b/qa/mon/bootstrap/host.sh new file mode 100755 index 000000000..ad4e327d1 --- /dev/null +++ b/qa/mon/bootstrap/host.sh @@ -0,0 +1,29 @@ +#!/bin/sh -ex + +cwd=`pwd` +cat > conf <<EOF +[global] +mon host = 127.0.0.1:6789 + +[mon] +admin socket = +log file = $cwd/\$name.log +debug mon = 20 +debug ms = 1 +EOF + +rm -f mm +fsid=`uuidgen` + +rm -f keyring +ceph-authtool --create-keyring keyring --gen-key -n client.admin +ceph-authtool keyring --gen-key -n mon. + +ceph-mon -c conf -i a --mkfs --fsid $fsid --mon-data mon.a -k keyring + +ceph-mon -c conf -i a --mon-data $cwd/mon.a + +ceph -c conf -k keyring health + +killall ceph-mon +echo OK
\ No newline at end of file diff --git a/qa/mon/bootstrap/initial_members.sh b/qa/mon/bootstrap/initial_members.sh new file mode 100755 index 000000000..2dfa9e992 --- /dev/null +++ b/qa/mon/bootstrap/initial_members.sh @@ -0,0 +1,39 @@ +#!/bin/sh -ex + +cwd=`pwd` +cat > conf <<EOF +[mon] +admin socket = +log file = $cwd/\$name.log +debug mon = 20 +debug ms = 1 +mon initial members = a,b,d +EOF + +rm -f mm +monmaptool --create mm \ + --add a 127.0.0.1:6789 \ + --add b 127.0.0.1:6790 \ + --add c 127.0.0.1:6791 + +rm -f keyring +ceph-authtool --create-keyring keyring --gen-key -n client.admin +ceph-authtool keyring --gen-key -n mon. + +ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring +ceph-mon -c conf -i b --mkfs --monmap mm --mon-data $cwd/mon.b -k keyring +ceph-mon -c conf -i c --mkfs --monmap mm --mon-data $cwd/mon.c -k keyring + +ceph-mon -c conf -i a --mon-data $cwd/mon.a +ceph-mon -c conf -i c --mon-data $cwd/mon.b +ceph-mon -c conf -i b --mon-data $cwd/mon.c + +ceph -c conf -k keyring --monmap mm health + +ceph -c conf -k keyring --monmap mm health +if ceph -c conf -k keyring --monmap mm mon stat | grep a= | grep b= | grep c= ; then + break +fi + +killall ceph-mon +echo OK diff --git a/qa/mon/bootstrap/initial_members_asok.sh b/qa/mon/bootstrap/initial_members_asok.sh new file mode 100755 index 000000000..618f4c5db --- /dev/null +++ b/qa/mon/bootstrap/initial_members_asok.sh @@ -0,0 +1,66 @@ +#!/bin/sh -ex + +cwd=`pwd` +cat > conf <<EOF +[mon] +log file = $cwd/\$name.log +debug mon = 20 +debug ms = 1 +debug asok = 20 +mon initial members = a,b,d +admin socket = $cwd/\$name.asok +EOF + +rm -f mm +fsid=`uuidgen` + +rm -f keyring +ceph-authtool --create-keyring keyring --gen-key -n client.admin +ceph-authtool keyring --gen-key -n mon. + +ceph-mon -c conf -i a --mkfs --fsid $fsid --mon-data $cwd/mon.a -k keyring +ceph-mon -c conf -i b --mkfs --fsid $fsid --mon-data $cwd/mon.b -k keyring +ceph-mon -c conf -i c --mkfs --fsid $fsid --mon-data $cwd/mon.c -k keyring + +ceph-mon -c conf -i a --mon-data $cwd/mon.a --public-addr 127.0.0.1:6789 +ceph-mon -c conf -i b --mon-data $cwd/mon.c --public-addr 127.0.0.1:6790 +ceph-mon -c conf -i c --mon-data $cwd/mon.b --public-addr 127.0.0.1:6791 + +sleep 1 + +if timeout 5 ceph -c conf -k keyring -m localhost mon stat | grep "a,b,c" ; then + echo WTF + exit 1 +fi + +ceph --admin-daemon mon.a.asok add_bootstrap_peer_hint 127.0.0.1:6790 + +while true; do + if ceph -c conf -k keyring -m 127.0.0.1 mon stat | grep 'a,b'; then + break + fi + sleep 1 +done + +ceph --admin-daemon mon.c.asok add_bootstrap_peer_hint 127.0.0.1:6790 + +while true; do + if ceph -c conf -k keyring -m 127.0.0.1 mon stat | grep 'a,b,c'; then + break + fi + sleep 1 +done + +ceph-mon -c conf -i d --mkfs --fsid $fsid --mon-data $cwd/mon.d -k keyring +ceph-mon -c conf -i d --mon-data $cwd/mon.d --public-addr 127.0.0.1:6792 +ceph --admin-daemon mon.d.asok add_bootstrap_peer_hint 127.0.0.1:6790 + +while true; do + if ceph -c conf -k keyring -m 127.0.0.1 mon stat | grep 'a,b,c,d'; then + break + fi + sleep 1 +done + +killall ceph-mon +echo OK diff --git a/qa/mon/bootstrap/simple.sh b/qa/mon/bootstrap/simple.sh new file mode 100755 index 000000000..2121301b9 --- /dev/null +++ b/qa/mon/bootstrap/simple.sh @@ -0,0 +1,36 @@ +#!/bin/sh -e + +cwd=`pwd` +cat > conf <<EOF +[mon] +admin socket = +EOF + +rm -f mm +monmaptool --create mm \ + --add a 127.0.0.1:6789 \ + --add b 127.0.0.1:6790 \ + --add c 127.0.0.1:6791 + +rm -f keyring +ceph-authtool --create-keyring keyring --gen-key -n client.admin +ceph-authtool keyring --gen-key -n mon. + +ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring +ceph-mon -c conf -i b --mkfs --monmap mm --mon-data $cwd/mon.b -k keyring +ceph-mon -c conf -i c --mkfs --monmap mm --mon-data $cwd/mon.c -k keyring + +ceph-mon -c conf -i a --mon-data $cwd/mon.a +ceph-mon -c conf -i c --mon-data $cwd/mon.b +ceph-mon -c conf -i b --mon-data $cwd/mon.c + +while true; do + ceph -c conf -k keyring --monmap mm health + if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1,2'; then + break + fi + sleep 1 +done + +killall ceph-mon +echo OK diff --git a/qa/mon/bootstrap/simple_expand.sh b/qa/mon/bootstrap/simple_expand.sh new file mode 100755 index 000000000..519d8ae8f --- /dev/null +++ b/qa/mon/bootstrap/simple_expand.sh @@ -0,0 +1,60 @@ +#!/bin/sh -ex + +cwd=`pwd` +cat > conf <<EOF +[mon] +admin socket = +log file = $cwd/\$name.log +debug mon = 20 +debug ms = 1 +EOF + +rm -f mm +monmaptool --create mm \ + --add a 127.0.0.1:6789 \ + --add b 127.0.0.1:6790 \ + --add c 127.0.0.1:6791 + +rm -f keyring +ceph-authtool --create-keyring keyring --gen-key -n client.admin +ceph-authtool keyring --gen-key -n mon. + +ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring +ceph-mon -c conf -i b --mkfs --monmap mm --mon-data $cwd/mon.b -k keyring +ceph-mon -c conf -i c --mkfs --monmap mm --mon-data $cwd/mon.c -k keyring + +ceph-mon -c conf -i a --mon-data $cwd/mon.a +ceph-mon -c conf -i c --mon-data $cwd/mon.b +ceph-mon -c conf -i b --mon-data $cwd/mon.c + +ceph -c conf -k keyring --monmap mm health + +## expand via a kludged monmap +monmaptool mm --add d 127.0.0.1:6792 +ceph-mon -c conf -i d --mkfs --monmap mm --mon-data $cwd/mon.d -k keyring +ceph-mon -c conf -i d --mon-data $cwd/mon.d + +while true; do + ceph -c conf -k keyring --monmap mm health + if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1,2,3'; then + break + fi + sleep 1 +done + +# again +monmaptool mm --add e 127.0.0.1:6793 +ceph-mon -c conf -i e --mkfs --monmap mm --mon-data $cwd/mon.e -k keyring +ceph-mon -c conf -i e --mon-data $cwd/mon.e + +while true; do + ceph -c conf -k keyring --monmap mm health + if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1,2,3,4'; then + break + fi + sleep 1 +done + + +killall ceph-mon +echo OK diff --git a/qa/mon/bootstrap/simple_expand_monmap.sh b/qa/mon/bootstrap/simple_expand_monmap.sh new file mode 100755 index 000000000..da24c02c2 --- /dev/null +++ b/qa/mon/bootstrap/simple_expand_monmap.sh @@ -0,0 +1,44 @@ +#!/bin/sh -ex + +cwd=`pwd` +cat > conf <<EOF +[mon] +admin socket = +EOF + +rm -f mm +monmaptool --create mm \ + --add a 127.0.0.1:6789 \ + --add b 127.0.0.1:6790 \ + --add c 127.0.0.1:6791 + +rm -f keyring +ceph-authtool --create-keyring keyring --gen-key -n client.admin +ceph-authtool keyring --gen-key -n mon. + +ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring +ceph-mon -c conf -i b --mkfs --monmap mm --mon-data $cwd/mon.b -k keyring +ceph-mon -c conf -i c --mkfs --monmap mm --mon-data $cwd/mon.c -k keyring + +ceph-mon -c conf -i a --mon-data $cwd/mon.a +ceph-mon -c conf -i c --mon-data $cwd/mon.b +ceph-mon -c conf -i b --mon-data $cwd/mon.c + +ceph -c conf -k keyring --monmap mm health + +## expand via a kludged monmap +monmaptool mm --add d 127.0.0.1:6792 +ceph-mon -c conf -i d --mkfs --monmap mm --mon-data $cwd/mon.d -k keyring +ceph-mon -c conf -i d --mon-data $cwd/mon.d + +while true; do + ceph -c conf -k keyring --monmap mm health + if ceph -c conf -k keyring --monmap mm mon stat | grep d=; then + break + fi + sleep 1 +done + +killall ceph-mon + +echo OK diff --git a/qa/mon/bootstrap/simple_single_expand.sh b/qa/mon/bootstrap/simple_single_expand.sh new file mode 100755 index 000000000..99fe5645e --- /dev/null +++ b/qa/mon/bootstrap/simple_single_expand.sh @@ -0,0 +1,54 @@ +#!/bin/sh -ex + +cwd=`pwd` +cat > conf <<EOF +[mon] +admin socket = +log file = $cwd/\$name.log +debug mon = 20 +debug ms = 1 +EOF + +rm -f mm +monmaptool --create mm \ + --add a 127.0.0.1:6789 + +rm -f keyring +ceph-authtool --create-keyring keyring --gen-key -n client.admin +ceph-authtool keyring --gen-key -n mon. + +ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring + +ceph-mon -c conf -i a --mon-data $cwd/mon.a + +ceph -c conf -k keyring --monmap mm health + +## expand via a kludged monmap +monmaptool mm --add d 127.0.0.1:6702 +ceph-mon -c conf -i d --mkfs --monmap mm --mon-data $cwd/mon.d -k keyring +ceph-mon -c conf -i d --mon-data $cwd/mon.d + +while true; do + ceph -c conf -k keyring --monmap mm health + if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1'; then + break + fi + sleep 1 +done + +# again +monmaptool mm --add e 127.0.0.1:6793 +ceph-mon -c conf -i e --mkfs --monmap mm --mon-data $cwd/mon.e -k keyring +ceph-mon -c conf -i e --mon-data $cwd/mon.e + +while true; do + ceph -c conf -k keyring --monmap mm health + if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1,2'; then + break + fi + sleep 1 +done + + +killall ceph-mon +echo OK diff --git a/qa/mon/bootstrap/simple_single_expand2.sh b/qa/mon/bootstrap/simple_single_expand2.sh new file mode 100755 index 000000000..28d0c563b --- /dev/null +++ b/qa/mon/bootstrap/simple_single_expand2.sh @@ -0,0 +1,40 @@ +#!/bin/sh -ex + +cwd=`pwd` +cat > conf <<EOF +[mon] +admin socket = +log file = $cwd/\$name.log +debug mon = 20 +debug ms = 1 +EOF + +rm -f mm +ip=`host \`hostname\` | awk '{print $4}'` +monmaptool --create mm \ + --add a $ip:6779 + +rm -f keyring +ceph-authtool --create-keyring keyring --gen-key -n client.admin +ceph-authtool keyring --gen-key -n mon. + +ceph-mon -c conf -i a --mkfs --monmap mm --mon-data $cwd/mon.a -k keyring + +ceph-mon -c conf -i a --mon-data $cwd/mon.a + +ceph -c conf -k keyring --monmap mm health + +## expand via a local_network +ceph-mon -c conf -i d --mkfs --monmap mm --mon-data $cwd/mon.d -k keyring +ceph-mon -c conf -i d --mon-data $cwd/mon.d --public-network 127.0.0.1/32 + +while true; do + ceph -c conf -k keyring --monmap mm health + if ceph -c conf -k keyring --monmap mm mon stat | grep 'quorum 0,1'; then + break + fi + sleep 1 +done + +killall ceph-mon +echo OK diff --git a/qa/mon/bootstrap/single_host.sh b/qa/mon/bootstrap/single_host.sh new file mode 100755 index 000000000..c40b5614f --- /dev/null +++ b/qa/mon/bootstrap/single_host.sh @@ -0,0 +1,29 @@ +#!/bin/sh -ex + +cwd=`pwd` +cat > conf <<EOF +[global] +mon host = 127.0.0.1:6789 + +[mon] +admin socket = +log file = $cwd/\$name.log +debug mon = 20 +debug ms = 1 +EOF + +rm -f mm +fsid=`uuidgen` + +rm -f keyring +ceph-authtool --create-keyring keyring --gen-key -n client.admin +ceph-authtool keyring --gen-key -n mon. + +ceph-mon -c conf -i a --mkfs --fsid $fsid --mon-data $cwd/mon.a -k keyring + +ceph-mon -c conf -i a --mon-data $cwd/mon.a + +ceph -c conf -k keyring health + +killall ceph-mon +echo OK
\ No newline at end of file diff --git a/qa/mon/bootstrap/single_host_multi.sh b/qa/mon/bootstrap/single_host_multi.sh new file mode 100755 index 000000000..864f3b179 --- /dev/null +++ b/qa/mon/bootstrap/single_host_multi.sh @@ -0,0 +1,39 @@ +#!/bin/sh -ex + +cwd=`pwd` +cat > conf <<EOF +[global] + +[mon] +admin socket = +log file = $cwd/\$name.log +debug mon = 20 +debug ms = 1 +mon host = 127.0.0.1:6789 127.0.0.1:6790 127.0.0.1:6791 +EOF + +rm -f mm +fsid=`uuidgen` + +rm -f keyring +ceph-authtool --create-keyring keyring --gen-key -n client.admin +ceph-authtool keyring --gen-key -n mon. + +ceph-mon -c conf -i a --mkfs --fsid $fsid --mon-data $cwd/mon.a -k keyring --public-addr 127.0.0.1:6789 +ceph-mon -c conf -i b --mkfs --fsid $fsid --mon-data $cwd/mon.b -k keyring --public-addr 127.0.0.1:6790 +ceph-mon -c conf -i c --mkfs --fsid $fsid --mon-data $cwd/mon.c -k keyring --public-addr 127.0.0.1:6791 + +ceph-mon -c conf -i a --mon-data $cwd/mon.a +ceph-mon -c conf -i b --mon-data $cwd/mon.b +ceph-mon -c conf -i c --mon-data $cwd/mon.c + +ceph -c conf -k keyring health -m 127.0.0.1 +while true; do + if ceph -c conf -k keyring -m 127.0.0.1 mon stat | grep 'a,b,c'; then + break + fi + sleep 1 +done + +killall ceph-mon +echo OK
\ No newline at end of file diff --git a/qa/mon_election/classic.yaml b/qa/mon_election/classic.yaml new file mode 100644 index 000000000..7ccd99830 --- /dev/null +++ b/qa/mon_election/classic.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + mon election default strategy: 1
\ No newline at end of file diff --git a/qa/mon_election/connectivity.yaml b/qa/mon_election/connectivity.yaml new file mode 100644 index 000000000..3b9f9e26c --- /dev/null +++ b/qa/mon_election/connectivity.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + mon election default strategy: 3
\ No newline at end of file diff --git a/qa/msgr/async-v1only.yaml b/qa/msgr/async-v1only.yaml new file mode 100644 index 000000000..aa90cad67 --- /dev/null +++ b/qa/msgr/async-v1only.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + mon_bind_msgr2: false + conf: + global: + ms type: async + ms bind msgr2: false diff --git a/qa/msgr/async-v2only.yaml b/qa/msgr/async-v2only.yaml new file mode 100644 index 000000000..4de9f32ac --- /dev/null +++ b/qa/msgr/async-v2only.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + ms type: async + ms bind msgr2: true + ms bind msgr1: false diff --git a/qa/msgr/async.yaml b/qa/msgr/async.yaml new file mode 100644 index 000000000..9f93cab4f --- /dev/null +++ b/qa/msgr/async.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + ms type: async diff --git a/qa/mypy.ini b/qa/mypy.ini new file mode 100644 index 000000000..1215375ed --- /dev/null +++ b/qa/mypy.ini @@ -0,0 +1,2 @@ +[mypy] +ignore_missing_imports = True
\ No newline at end of file diff --git a/qa/nightlies/cron_wrapper b/qa/nightlies/cron_wrapper new file mode 100755 index 000000000..29b4483e9 --- /dev/null +++ b/qa/nightlies/cron_wrapper @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# /nightlies/cron_wrapper.sh + +# check for no argument case and stop +if [ -z $1 ]; then + echo "need argument" + exit 1 +fi + +# set permanent $LOG file var +LOG="/var/log/crontab-nightlies-log/crontab.log" +# set $LOG_LOCKED_ERR in case locking failed +LOG_LOCK_ERR="/var/log/crontab-nightlies-log/crontab_lock_problem.$$" + +# temp files to store stdout and stderr +# named with the PID of this script in their name so they'll be unique +STDERR="/var/tmp/stderr.$$" +STDOUT="/var/tmp/stdout.$$" + +# $STDOUT and $STDERR are removed when the script exits for any reason +trap "rm -f $STDOUT $STDERR" 0 + +# run a command from this script's argument +# redirect stdout to $STDOUT file and redirect stderr to $STDERR file + +DATE=$(date) +echo -n "$DATE: " >> $STDOUT +echo "Running command: $@" >> $STDOUT +"$@" > $STDOUT 2> $STDERR + +# get return code from the command run +code=$? + +if [ $code != 0 ] ; then + # echoing to stdout/stderr makes cron send email + echo "stdout:" + cat $STDOUT + echo "stderr:" + cat $STDERR +else + # normal exit: just log stdout + + # lock $LOG with file descriptor 200 + exec 200>>$LOG + # if $LOG is locked by other process - wait for 20 sec + flock -w 20 200 || LOG=$LOG_LOCK_ERR + echo "stdout:" >> $LOG + cat $STDOUT >> $LOG + echo "stderr:" >> $LOG + cat $STDERR >> $LOG + # unlock + flock -u 200 +fi diff --git a/qa/objectstore/bluestore-bitmap.yaml b/qa/objectstore/bluestore-bitmap.yaml new file mode 100644 index 000000000..d50c50e76 --- /dev/null +++ b/qa/objectstore/bluestore-bitmap.yaml @@ -0,0 +1,43 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 1/20 + debug bluefs: 1/20 + debug rocksdb: 4/10 + bluestore fsck on mount: true + bluestore allocator: bitmap + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 1/20 + debug bluefs: 1/20 + debug rocksdb: 4/10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + bdev enable discard: true + bdev async discard: true + diff --git a/qa/objectstore/bluestore-comp-lz4.yaml b/qa/objectstore/bluestore-comp-lz4.yaml new file mode 100644 index 000000000..c45951984 --- /dev/null +++ b/qa/objectstore/bluestore-comp-lz4.yaml @@ -0,0 +1,24 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 1/20 + debug bluefs: 1/20 + debug rocksdb: 4/10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + bluestore compression algorithm: lz4 + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/objectstore/bluestore-comp-snappy.yaml b/qa/objectstore/bluestore-comp-snappy.yaml new file mode 100644 index 000000000..e9cdd741d --- /dev/null +++ b/qa/objectstore/bluestore-comp-snappy.yaml @@ -0,0 +1,24 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 1/20 + debug bluefs: 1/20 + debug rocksdb: 4/10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + bluestore compression algorithm: snappy + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/objectstore/bluestore-comp-zlib.yaml b/qa/objectstore/bluestore-comp-zlib.yaml new file mode 100644 index 000000000..b3628dea2 --- /dev/null +++ b/qa/objectstore/bluestore-comp-zlib.yaml @@ -0,0 +1,24 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 1/20 + debug bluefs: 1/20 + debug rocksdb: 4/10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + bluestore compression algorithm: zlib + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/objectstore/bluestore-comp-zstd.yaml b/qa/objectstore/bluestore-comp-zstd.yaml new file mode 100644 index 000000000..7906c6045 --- /dev/null +++ b/qa/objectstore/bluestore-comp-zstd.yaml @@ -0,0 +1,24 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 1/20 + debug bluefs: 1/20 + debug rocksdb: 4/10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + bluestore compression algorithm: zstd + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/objectstore/bluestore-hybrid.yaml b/qa/objectstore/bluestore-hybrid.yaml new file mode 100644 index 000000000..26e3302ce --- /dev/null +++ b/qa/objectstore/bluestore-hybrid.yaml @@ -0,0 +1,40 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 1/20 + debug bluefs: 1/20 + debug rocksdb: 4/10 + bluestore fsck on mount: true + bluestore allocator: hybrid + bluefs allocator: hybrid + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 1/20 + debug bluefs: 1/20 + debug rocksdb: 4/10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + diff --git a/qa/objectstore/bluestore-low-osd-mem-target.yaml b/qa/objectstore/bluestore-low-osd-mem-target.yaml new file mode 100644 index 000000000..e0efcbc36 --- /dev/null +++ b/qa/objectstore/bluestore-low-osd-mem-target.yaml @@ -0,0 +1,25 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + osd memory target: 1610612736 # reduced to 1.5_G + bluestore block size: 96636764160 + debug bluestore: 1/20 + debug bluefs: 1/20 + debug rocksdb: 4/10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + diff --git a/qa/objectstore/bluestore-stupid.yaml b/qa/objectstore/bluestore-stupid.yaml new file mode 100644 index 000000000..5441e9397 --- /dev/null +++ b/qa/objectstore/bluestore-stupid.yaml @@ -0,0 +1,43 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 1/20 + debug bluefs: 1/20 + debug rocksdb: 4/10 + bluestore fsck on mount: true + bluestore allocator: stupid + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 1/20 + debug bluefs: 1/20 + debug rocksdb: 4/10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + bdev enable discard: true + bdev async discard: true + diff --git a/qa/objectstore_cephfs/bluestore-bitmap.yaml b/qa/objectstore_cephfs/bluestore-bitmap.yaml new file mode 120000 index 000000000..951e65ac0 --- /dev/null +++ b/qa/objectstore_cephfs/bluestore-bitmap.yaml @@ -0,0 +1 @@ +../objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/objectstore_debug/bluestore-bitmap.yaml b/qa/objectstore_debug/bluestore-bitmap.yaml new file mode 100644 index 000000000..b18e04bee --- /dev/null +++ b/qa/objectstore_debug/bluestore-bitmap.yaml @@ -0,0 +1,43 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + bluestore allocator: bitmap + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + bdev enable discard: true + bdev async discard: true + diff --git a/qa/objectstore_debug/bluestore-comp-lz4.yaml b/qa/objectstore_debug/bluestore-comp-lz4.yaml new file mode 100644 index 000000000..46f993e68 --- /dev/null +++ b/qa/objectstore_debug/bluestore-comp-lz4.yaml @@ -0,0 +1,24 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + bluestore compression algorithm: lz4 + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/objectstore_debug/bluestore-comp-snappy.yaml b/qa/objectstore_debug/bluestore-comp-snappy.yaml new file mode 100644 index 000000000..b5d58414e --- /dev/null +++ b/qa/objectstore_debug/bluestore-comp-snappy.yaml @@ -0,0 +1,24 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + bluestore compression algorithm: snappy + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/objectstore_debug/bluestore-comp-zlib.yaml b/qa/objectstore_debug/bluestore-comp-zlib.yaml new file mode 100644 index 000000000..b47ebbb7c --- /dev/null +++ b/qa/objectstore_debug/bluestore-comp-zlib.yaml @@ -0,0 +1,24 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + bluestore compression algorithm: zlib + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/objectstore_debug/bluestore-comp-zstd.yaml b/qa/objectstore_debug/bluestore-comp-zstd.yaml new file mode 100644 index 000000000..e2f5e4e5b --- /dev/null +++ b/qa/objectstore_debug/bluestore-comp-zstd.yaml @@ -0,0 +1,24 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + bluestore compression algorithm: zstd + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/objectstore_debug/bluestore-hybrid.yaml b/qa/objectstore_debug/bluestore-hybrid.yaml new file mode 100644 index 000000000..68b9bc427 --- /dev/null +++ b/qa/objectstore_debug/bluestore-hybrid.yaml @@ -0,0 +1,40 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + bluestore allocator: hybrid + bluefs allocator: hybrid + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + diff --git a/qa/objectstore_debug/bluestore-low-osd-mem-target.yaml b/qa/objectstore_debug/bluestore-low-osd-mem-target.yaml new file mode 100644 index 000000000..b2a49790b --- /dev/null +++ b/qa/objectstore_debug/bluestore-low-osd-mem-target.yaml @@ -0,0 +1,25 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + osd memory target: 1610612736 # reduced to 1.5_G + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + diff --git a/qa/objectstore_debug/bluestore-stupid.yaml b/qa/objectstore_debug/bluestore-stupid.yaml new file mode 100644 index 000000000..ca811f131 --- /dev/null +++ b/qa/objectstore_debug/bluestore-stupid.yaml @@ -0,0 +1,43 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + bluestore allocator: stupid + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + bdev enable discard: true + bdev async discard: true + diff --git a/qa/overrides/2-size-1-min-size.yaml b/qa/overrides/2-size-1-min-size.yaml new file mode 100644 index 000000000..d710aee22 --- /dev/null +++ b/qa/overrides/2-size-1-min-size.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_pool_default_size: 2 + osd_pool_default_min_size: 1 diff --git a/qa/overrides/2-size-2-min-size.yaml b/qa/overrides/2-size-2-min-size.yaml new file mode 100644 index 000000000..d46db3fad --- /dev/null +++ b/qa/overrides/2-size-2-min-size.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + osd_pool_default_size: 2 + osd_pool_default_min_size: 2 + log-ignorelist: + - \(REQUEST_STUCK\) diff --git a/qa/overrides/3-size-2-min-size.yaml b/qa/overrides/3-size-2-min-size.yaml new file mode 100644 index 000000000..02579060c --- /dev/null +++ b/qa/overrides/3-size-2-min-size.yaml @@ -0,0 +1,8 @@ +overrides: + thrashosds: + min_in: 4 + ceph: + conf: + global: + osd_pool_default_size: 3 + osd_pool_default_min_size: 2 diff --git a/qa/overrides/ignorelist_wrongly_marked_down.yaml b/qa/overrides/ignorelist_wrongly_marked_down.yaml new file mode 100644 index 000000000..2ce7ffd97 --- /dev/null +++ b/qa/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + conf: + mds: + debug mds: 20 + debug ms: 1 + client: + debug client: 10
\ No newline at end of file diff --git a/qa/overrides/more-active-recovery.yaml b/qa/overrides/more-active-recovery.yaml new file mode 100644 index 000000000..bfe86e4de --- /dev/null +++ b/qa/overrides/more-active-recovery.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_recovery_max_active: 10 + osd_recovery_max_single_start: 10 diff --git a/qa/overrides/no_client_pidfile.yaml b/qa/overrides/no_client_pidfile.yaml new file mode 100644 index 000000000..4ea02f476 --- /dev/null +++ b/qa/overrides/no_client_pidfile.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + pid file: "" diff --git a/qa/overrides/nvme_loop.yaml b/qa/overrides/nvme_loop.yaml new file mode 100644 index 000000000..75f6cd454 --- /dev/null +++ b/qa/overrides/nvme_loop.yaml @@ -0,0 +1,2 @@ +tasks: +- nvme_loop: diff --git a/qa/overrides/short_pg_log.yaml b/qa/overrides/short_pg_log.yaml new file mode 100644 index 000000000..fa55e91ea --- /dev/null +++ b/qa/overrides/short_pg_log.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + osd_min_pg_log_entries: 1 + osd_max_pg_log_entries: 2 + osd_pg_log_trim_min: 0 diff --git a/qa/packages/packages.yaml b/qa/packages/packages.yaml new file mode 100644 index 000000000..866d59e6f --- /dev/null +++ b/qa/packages/packages.yaml @@ -0,0 +1,85 @@ +--- +ceph: + deb: + - ceph + - cephadm + - ceph-mds + - ceph-mgr + - ceph-common + - ceph-fuse + - ceph-test + - ceph-volume + - radosgw + - python3-rados + - python3-rgw + - python3-cephfs + - python3-rbd + - libcephfs2 + - libcephfs-dev + - librados2 + - librbd1 + - rbd-fuse + - ceph-common-dbg + - ceph-fuse-dbg + - ceph-mds-dbg + - ceph-mgr-dbg + - ceph-mon-dbg + - ceph-osd-dbg + - ceph-test-dbg + - libcephfs2-dbg + - librados2-dbg + - libradosstriper1-dbg + - librbd1-dbg + - librgw2-dbg + - radosgw-dbg + - rbd-fuse-dbg + - rbd-mirror-dbg + - rbd-nbd-dbg + rpm: + - ceph-radosgw + - ceph-test + - ceph + - ceph-base + - cephadm + - ceph-immutable-object-cache + - ceph-mgr + - ceph-mgr-dashboard + - ceph-mgr-diskprediction-local + - ceph-mgr-rook + - ceph-mgr-cephadm + - ceph-fuse + - ceph-volume + - librados-devel + - libcephfs2 + - libcephfs-devel + - librados2 + - librbd1 + - python3-rados + - python3-rgw + - python3-cephfs + - python3-rbd + - rbd-fuse + - rbd-mirror + - rbd-nbd + - ceph-base-debuginfo + - ceph-common-debuginfo + - ceph-immutable-object-cache-debuginfo + - ceph-radosgw-debuginfo + - ceph-test-debuginfo + - ceph-base-debuginfo + - ceph-mgr-debuginfo + - ceph-mds-debuginfo + - ceph-mon-debuginfo + - ceph-osd-debuginfo + - ceph-fuse-debuginfo + - librados-devel-debuginfo + - libcephfs2-debuginfo + - librados2-debuginfo + - librbd1-debuginfo + - python3-cephfs-debuginfo + - python3-rados-debuginfo + - python3-rbd-debuginfo + - python3-rgw-debuginfo + - rbd-fuse-debuginfo + - rbd-mirror-debuginfo + - rbd-nbd-debuginfo diff --git a/qa/qa_scripts/cephscrub.sh b/qa/qa_scripts/cephscrub.sh new file mode 100755 index 000000000..331d5ce32 --- /dev/null +++ b/qa/qa_scripts/cephscrub.sh @@ -0,0 +1,30 @@ +# remove the ceph directories +sudo rm -rf /var/log/ceph +sudo rm -rf /var/lib/ceph +sudo rm -rf /etc/ceph +sudo rm -rf /var/run/ceph +# remove the ceph packages +sudo apt-get -y purge ceph +sudo apt-get -y purge ceph-dbg +sudo apt-get -y purge ceph-mds +sudo apt-get -y purge ceph-mds-dbg +sudo apt-get -y purge ceph-fuse +sudo apt-get -y purge ceph-fuse-dbg +sudo apt-get -y purge ceph-common +sudo apt-get -y purge ceph-common-dbg +sudo apt-get -y purge ceph-resource-agents +sudo apt-get -y purge librados2 +sudo apt-get -y purge librados2-dbg +sudo apt-get -y purge librados-dev +sudo apt-get -y purge librbd1 +sudo apt-get -y purge librbd1-dbg +sudo apt-get -y purge librbd-dev +sudo apt-get -y purge libcephfs2 +sudo apt-get -y purge libcephfs2-dbg +sudo apt-get -y purge libcephfs-dev +sudo apt-get -y purge radosgw +sudo apt-get -y purge radosgw-dbg +sudo apt-get -y purge obsync +sudo apt-get -y purge python-rados +sudo apt-get -y purge python-rbd +sudo apt-get -y purge python-cephfs diff --git a/qa/qa_scripts/openstack/README b/qa/qa_scripts/openstack/README new file mode 100644 index 000000000..63fe2d973 --- /dev/null +++ b/qa/qa_scripts/openstack/README @@ -0,0 +1,32 @@ +This directory contains scripts to quickly bring up an OpenStack instance, +attach a ceph cluster, create a nova compute node, and store the associated glance images, cinder volumes, nova vm, and cinder backup on ceph via rbd. + +execs is a directory that contains executables that are copied and remotely +run on the OpenStack instance + +files is a directory that contains templates used to initialize OpenStack +conf files. These templates reflect the state of these conf files on 5/17/2016. +If further development is necessary in the future, these templates should +probably be removed and direct editing of the OpenStack conf files should +probably be performed. + +These scripts also assume that either there is a rhel iso file named +rhel-server-7.2-x86_64-boot.iso in the user's home directory, or the +exported variable RHEL_ISO is set to point at an existing rhel iso file. +If one is also running the ceph-deploy based ceph_install.sh, this script +also assumes that there is a file named rhceph-1.3.1-rhel-7-x86_64-dvd.iso +in the files directory. These iso files can be obtained from the rhel site +and are not stored with these scripts. + +To install openstack: +./openstack.sh <openstack-admin-node> <ceph-monitor-node> + +This assumes that the ceph cluster is already set up. + +To setup a ceph-cluster using an iso and ceph-deploy: +./ceph_install.sh <admin-node> <mon-node> <osd-node> <osd-node> <osd-node> + +To setup a ceph-cluster using the cdn and ceph-ansible: +cd ceph_install_w_ansible +./ceph_install.sh <admin-node> <mon-node> <osd-node> <osd-node> <osd-node> + diff --git a/qa/qa_scripts/openstack/ceph_install.sh b/qa/qa_scripts/openstack/ceph_install.sh new file mode 100755 index 000000000..47831bd03 --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# +# Install a simple ceph cluster upon which openstack images will be stored. +# +set -fv +ceph_node=${1} +source copy_func.sh +copy_file files/$OS_CEPH_ISO $ceph_node . +copy_file execs/ceph_cluster.sh $ceph_node . 0777 +copy_file execs/ceph-pool-create.sh $ceph_node . 0777 +ssh $ceph_node ./ceph_cluster.sh $* diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/README b/qa/qa_scripts/openstack/ceph_install_w_ansible/README new file mode 100644 index 000000000..282c46e48 --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/README @@ -0,0 +1,32 @@ + +ceph_install.sh installs a ceph cluster using the cdn and ceph-ansible. + +Right now, it takes 5 parameters -- an admin node, a ceph mon node, and +three osd nodes. + +In order to subscribe to the cdn, in your home directory create a file named +secrets, (~/secrets), that contains the following lines: + +subscrname=Your-Redhat-Cdn-Id +subscrpassword=Your-Redhat-Cdn-Password + +If you want to set the monitor_interface or the public_network values, +in your home directory create a file named ip_info (~/ip_info), that +contains the following lines: + +mon_intf=your-monitor-interface (default is eno1) +pub_netw=public-network (default is 10.8.128.0/21) + +This script first subscribes to the cdn, enables the rhel 7 repos, and does +a yum update. (multi_action.sh performs all the actions on all nodes at once, +staller.sh is used to make sure that all updates are complete before exiting, +and execs/cdn_setup.sh is used to remotely update the cdn information. + +After that, it makes sure that all nodes can connect via passwordless ssh +(using talknice.sh and config) and then installs the appropriate repos and +runs ceph_ansible on the admin node using execs/ceph_ansible.sh, +execs/edit_ansible_hosts.sh and execs/edit_groupvars_osds.sh. + +repolocs.sh contains the locations of repo files. These variables can +be changed if one wishes to use different urls. + diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/ceph_install.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/ceph_install.sh new file mode 100755 index 000000000..b4d14f9c2 --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/ceph_install.sh @@ -0,0 +1,39 @@ +#! /usr/bin/env bash +if [ $# -ne 5 ]; then + echo 'Usage: ceph_install.sh <admin-node> <mon-node> <osd-node> <osd-node> <osd-node>' + exit -1 +fi +allnodes=$* +adminnode=$1 +shift +cephnodes=$* +monnode=$1 +shift +osdnodes=$* +./multi_action.sh cdn_setup.sh $allnodes +./talknice.sh $allnodes +for mac in $allnodes; do + ssh $mac sudo yum -y install yum-utils +done + +source ./repolocs.sh +ssh $adminnode sudo yum-config-manager --add ${CEPH_REPO_TOOLS} +ssh $monnode sudo yum-config-manager --add ${CEPH_REPO_MON} +for mac in $osdnodes; do + ssh $mac sudo yum-config-manager --add ${CEPH_REPO_OSD} +done +ssh $adminnode sudo yum-config-manager --add ${INSTALLER_REPO_LOC} + +for mac in $allnodes; do + ssh $mac sudo sed -i 's/gpgcheck=1/gpgcheck=0/' /etc/yum.conf +done + +source copy_func.sh +copy_file execs/ceph_ansible.sh $adminnode . 0777 ubuntu:ubuntu +copy_file execs/edit_ansible_hosts.sh $adminnode . 0777 ubuntu:ubuntu +copy_file execs/edit_groupvars_osds.sh $adminnode . 0777 ubuntu:ubuntu +copy_file ../execs/ceph-pool-create.sh $monnode . 0777 ubuntu:ubuntu +if [ -e ~/ip_info ]; then + copy_file ~/ip_info $adminnode . 0777 ubuntu:ubuntu +fi +ssh $adminnode ./ceph_ansible.sh $cephnodes diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/config b/qa/qa_scripts/openstack/ceph_install_w_ansible/config new file mode 100644 index 000000000..a7d819869 --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/config @@ -0,0 +1,5 @@ +Host plana* mira* burnupi* tala* saya* vpm* names* gitbuilder* teuthology gw* senta* vercoi* rex* magna* + ServerAliveInterval 360 + StrictHostKeyChecking no + UserKnownHostsFile=/dev/null + User ubuntu diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/copy_func.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/copy_func.sh new file mode 120000 index 000000000..6a36be7b0 --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/copy_func.sh @@ -0,0 +1 @@ +../copy_func.sh
\ No newline at end of file diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/cdn_setup.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/cdn_setup.sh new file mode 100755 index 000000000..0c87039db --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/cdn_setup.sh @@ -0,0 +1,20 @@ +#! /usr/bin/env bash +if [ -f ~/secrets ]; then + source ~/secrets +fi +subm=`which subscription-manager` +if [ ${#subm} -eq 0 ]; then + sudo yum -y update + exit +fi +subst=`sudo subscription-manager status | grep "^Overall" | awk '{print $NF}'` +if [ $subst == 'Unknown' ]; then + mynameis=${subscrname:-'inigomontoya'} + mypassis=${subscrpassword:-'youkeelmyfatherpreparetodie'} + sudo subscription-manager register --username=$mynameis --password=$mypassis --force + sudo subscription-manager refresh + if [ $? -eq 1 ]; then exit 1; fi + sudo subscription-manager attach --pool=8a85f9823e3d5e43013e3ddd4e2a0977 +fi +sudo subscription-manager repos --enable=rhel-7-server-rpms +sudo yum -y update diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/ceph_ansible.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/ceph_ansible.sh new file mode 100755 index 000000000..8581de601 --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/ceph_ansible.sh @@ -0,0 +1,36 @@ +#! /usr/bin/env bash +cephnodes=$* +monnode=$1 +sudo yum -y install ceph-ansible +cd +sudo ./edit_ansible_hosts.sh $cephnodes +mkdir ceph-ansible-keys +cd /usr/share/ceph-ansible/group_vars/ +if [ -f ~/ip_info ]; then + source ~/ip_info +fi +mon_intf=${mon_intf:-'eno1'} +pub_netw=${pub_netw:-'10.8.128.0\/21'} +sudo cp all.sample all +sudo sed -i 's/#ceph_origin:.*/ceph_origin: distro/' all +sudo sed -i 's/#fetch_directory:.*/fetch_directory: ~\/ceph-ansible-keys/' all +sudo sed -i 's/#ceph_stable:.*/ceph_stable: true/' all +sudo sed -i 's/#ceph_stable_rh_storage:.*/ceph_stable_rh_storage: false/' all +sudo sed -i 's/#ceph_stable_rh_storage_cdn_install:.*/ceph_stable_rh_storage_cdn_install: true/' all +sudo sed -i 's/#cephx:.*/cephx: true/' all +sudo sed -i "s/#monitor_interface:.*/monitor_interface: ${mon_intf}/" all +sudo sed -i 's/#journal_size:.*/journal_size: 1024/' all +sudo sed -i "s/#public_network:.*/public_network: ${pub_netw}/" all +sudo cp osds.sample osds +sudo sed -i 's/#fetch_directory:.*/fetch_directory: ~\/ceph-ansible-keys/' osds +sudo sed -i 's/#crush_location:/crush_location:/' osds +sudo sed -i 's/#osd_crush_location:/osd_crush_location:/' osds +sudo sed -i 's/#cephx:/cephx:/' osds +sudo sed -i 's/#devices:/devices:/' osds +sudo sed -i 's/#journal_collocation:.*/journal_collocation: true/' osds +cd +sudo ./edit_groupvars_osds.sh +cd /usr/share/ceph-ansible +sudo cp site.yml.sample site.yml +ansible-playbook site.yml +ssh $monnode ~/ceph-pool-create.sh diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_ansible_hosts.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_ansible_hosts.sh new file mode 100755 index 000000000..7eb0b7011 --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_ansible_hosts.sh @@ -0,0 +1,17 @@ +#! /usr/bin/env bash +ed /etc/ansible/hosts << EOF +$ +a + +[mons] +${1} + +[osds] +${2} +${3} +${4} + +. +w +q +EOF diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_groupvars_osds.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_groupvars_osds.sh new file mode 100755 index 000000000..751658b09 --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/execs/edit_groupvars_osds.sh @@ -0,0 +1,13 @@ +#! /usr/bin/env bash +ed /usr/share/ceph-ansible/group_vars/osds << EOF +$ +/^devices: +.+1 +i + - /dev/sdb + - /dev/sdc + - /dev/sdd +. +w +q +EOF diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/multi_action.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/multi_action.sh new file mode 100755 index 000000000..abc368b0a --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/multi_action.sh @@ -0,0 +1,19 @@ +#! /usr/bin/env bash +source copy_func.sh +allparms=$* +cmdv=$1 +shift +sites=$* +for mac in $sites; do + echo $cmdv $mac + if [ -f ~/secrets ]; then + copy_file ~/secrets $mac . 0777 ubuntu:ubuntu + fi + copy_file execs/${cmdv} $mac . 0777 ubuntu:ubuntu + ssh $mac ./${cmdv} & +done +./staller.sh $allparms +for mac in $sites; do + ssh $mac sudo rm -rf secrets +done +echo "DONE" diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/repolocs.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/repolocs.sh new file mode 100755 index 000000000..5d82f35d9 --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/repolocs.sh @@ -0,0 +1,8 @@ +#! /usr/bin/env bash +SPECIFIC_VERSION=latest-Ceph-2-RHEL-7 +#SPECIFIC_VERSION=Ceph-2-RHEL-7-20160630.t.0 +#SPECIFIC_VERSION=Ceph-2.0-RHEL-7-20160718.t.0 +export CEPH_REPO_TOOLS=http://download.eng.bos.redhat.com/rcm-guest/ceph-drops/auto/ceph-2-rhel-7-compose/${SPECIFIC_VERSION}/compose/Tools/x86_64/os/ +export CEPH_REPO_MON=http://download.eng.bos.redhat.com/rcm-guest/ceph-drops/auto/ceph-2-rhel-7-compose/${SPECIFIC_VERSION}/compose/MON/x86_64/os/ +export CEPH_REPO_OSD=http://download.eng.bos.redhat.com/rcm-guest/ceph-drops/auto/ceph-2-rhel-7-compose/${SPECIFIC_VERSION}/compose/OSD/x86_64/os/ +export INSTALLER_REPO_LOC=http://download.eng.bos.redhat.com/rcm-guest/ceph-drops/auto/rhscon-2-rhel-7-compose/latest-RHSCON-2-RHEL-7/compose/Installer/x86_64/os/ diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/staller.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/staller.sh new file mode 100755 index 000000000..99c00da33 --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/staller.sh @@ -0,0 +1,15 @@ +#! /usr/bin/env bash +cmd_wait=$1 +shift +sites=$* +donebit=0 +while [ $donebit -ne 1 ]; do + sleep 10 + donebit=1 + for rem in $sites; do + rval=`ssh $rem ps aux | grep $cmd_wait | wc -l` + if [ $rval -gt 0 ]; then + donebit=0 + fi + done +done diff --git a/qa/qa_scripts/openstack/ceph_install_w_ansible/talknice.sh b/qa/qa_scripts/openstack/ceph_install_w_ansible/talknice.sh new file mode 100755 index 000000000..ffed4f1dc --- /dev/null +++ b/qa/qa_scripts/openstack/ceph_install_w_ansible/talknice.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +declare -A rsapub +for fulln in $*; do + sname=`echo $fulln | sed 's/\..*//'` + nhead=`echo $sname | sed 's/[0-9]*//g'` + x=`ssh $fulln "ls .ssh/id_rsa"` + if [ -z $x ]; then + ssh $fulln "ssh-keygen -N '' -f .ssh/id_rsa"; + fi + xx=`ssh $fulln "ls .ssh/config"` + if [ -z $xx ]; then + scp config $fulln:/home/ubuntu/.ssh/config + fi + ssh $fulln "chown ubuntu:ubuntu .ssh/config" + ssh $fulln "chmod 0600 .ssh/config" + rsapub[$fulln]=`ssh $fulln "cat .ssh/id_rsa.pub"` +done +for ii in $*; do + ssh $ii sudo iptables -F + for jj in $*; do + pval=${rsapub[$jj]} + if [ "$ii" != "$jj" ]; then + xxxx=`ssh $ii "grep $jj .ssh/authorized_keys"` + if [ -z "$xxxx" ]; then + ssh $ii "echo '$pval' | sudo tee -a /home/ubuntu/.ssh/authorized_keys" + fi + fi + done; +done diff --git a/qa/qa_scripts/openstack/connectceph.sh b/qa/qa_scripts/openstack/connectceph.sh new file mode 100755 index 000000000..d975daada --- /dev/null +++ b/qa/qa_scripts/openstack/connectceph.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# +# Connect openstack node just installed to a ceph cluster. +# +# Essentially implements: +# +# http://docs.ceph.com/en/latest/rbd/rbd-openstack/ +# +# The directory named files contains templates for the /etc/glance/glance-api.conf, +# /etc/cinder/cinder.conf, /etc/nova/nova.conf Openstack files +# +set -fv +source ./copy_func.sh +source ./fix_conf_file.sh +openstack_node=${1} +ceph_node=${2} + +scp $ceph_node:/etc/ceph/ceph.conf ./ceph.conf +ssh $openstack_node sudo mkdir /etc/ceph +copy_file ceph.conf $openstack_node /etc/ceph 0644 +rm -f ceph.conf +ssh $openstack_node sudo yum -y install python-rbd +ssh $openstack_node sudo yum -y install ceph-common +ssh $ceph_node "sudo ceph auth get-or-create client.cinder mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rwx pool=vms, allow rx pool=images'" +ssh $ceph_node "sudo ceph auth get-or-create client.glance mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=images'" +ssh $ceph_node "sudo ceph auth get-or-create client.cinder-backup mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=backups'" +ssh $ceph_node sudo ceph auth get-or-create client.glance mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=images' +ssh $ceph_node sudo ceph auth get-or-create client.cinder-backup mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=backups' +ssh $ceph_node sudo ceph auth get-or-create client.glance | ssh $openstack_node sudo tee /etc/ceph/ceph.client.glance.keyring +ssh $openstack_node sudo chown glance:glance /etc/ceph/ceph.client.glance.keyring +ssh $ceph_node sudo ceph auth get-or-create client.cinder | ssh $openstack_node sudo tee /etc/ceph/ceph.client.cinder.keyring +ssh $openstack_node sudo chown cinder:cinder /etc/ceph/ceph.client.cinder.keyring +ssh $ceph_node sudo ceph auth get-or-create client.cinder-backup | ssh $openstack_node sudo tee /etc/ceph/ceph.client.cinder-backup.keyring +ssh $openstack_node sudo chown cinder:cinder /etc/ceph/ceph.client.cinder-backup.keyring +ssh $ceph_node sudo ceph auth get-key client.cinder | ssh $openstack_node tee client.cinder.key +copy_file execs/libvirt-secret.sh $openstack_node . +secret_msg=`ssh $openstack_node sudo ./libvirt-secret.sh $openstack_node` +secret_virt=`echo $secret_msg | sed 's/.* set //'` +echo $secret_virt +fix_conf_file $openstack_node glance-api /etc/glance +fix_conf_file $openstack_node cinder /etc/cinder $secret_virt +fix_conf_file $openstack_node nova /etc/nova $secret_virt +copy_file execs/start_openstack.sh $openstack_node . 0755 +ssh $openstack_node ./start_openstack.sh diff --git a/qa/qa_scripts/openstack/copy_func.sh b/qa/qa_scripts/openstack/copy_func.sh new file mode 100755 index 000000000..571980262 --- /dev/null +++ b/qa/qa_scripts/openstack/copy_func.sh @@ -0,0 +1,22 @@ +# +# copy_file(<filename>, <node>, <directory>, [<permissions>], [<owner>] +# +# copy a file -- this is needed because passwordless ssh does not +# work when sudo'ing. +# <file> -- name of local file to be copied +# <node> -- node where we want the file +# <directory> -- location where we want the file on <node> +# <permissions> -- (optional) permissions on the copied file +# <owner> -- (optional) owner of the copied file +# +function copy_file() { + fname=`basename ${1}` + scp ${1} ${2}:/tmp/${fname} + ssh ${2} sudo cp /tmp/${fname} ${3} + if [ $# -gt 3 ]; then + ssh ${2} sudo chmod ${4} ${3}/${fname} + fi + if [ $# -gt 4 ]; then + ssh ${2} sudo chown ${5} ${3}/${fname} + fi +} diff --git a/qa/qa_scripts/openstack/execs/ceph-pool-create.sh b/qa/qa_scripts/openstack/execs/ceph-pool-create.sh new file mode 100755 index 000000000..723c83069 --- /dev/null +++ b/qa/qa_scripts/openstack/execs/ceph-pool-create.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -f + +# +# On the ceph site, make the pools required for Openstack +# + +# +# Make a pool, if it does not already exist. +# +function make_pool { + if [[ -z `sudo ceph osd lspools | grep " $1,"` ]]; then + echo "making $1" + sudo ceph osd pool create $1 128 + fi +} + +# +# Make sure the pg_num and pgp_num values are good. +# +count=`sudo ceph osd pool get rbd pg_num | sed 's/pg_num: //'` +while [ $count -lt 128 ]; do + sudo ceph osd pool set rbd pg_num $count + count=`expr $count + 32` + sleep 30 +done +sudo ceph osd pool set rbd pg_num 128 +sleep 30 +sudo ceph osd pool set rbd pgp_num 128 +sleep 30 +make_pool volumes +make_pool images +make_pool backups +make_pool vms diff --git a/qa/qa_scripts/openstack/execs/ceph_cluster.sh b/qa/qa_scripts/openstack/execs/ceph_cluster.sh new file mode 100755 index 000000000..5afb3c787 --- /dev/null +++ b/qa/qa_scripts/openstack/execs/ceph_cluster.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -f + +echo $OS_CEPH_ISO +if [[ $# -ne 4 ]]; then + echo "Usage: ceph_cluster mon.0 osd.0 osd.1 osd.2" + exit -1 +fi +allsites=$* +mon=$1 +shift +osds=$* +ISOVAL=${OS_CEPH_ISO-rhceph-1.3.1-rhel-7-x86_64-dvd.iso} +sudo mount -o loop ${ISOVAL} /mnt + +fqdn=`hostname -f` +lsetup=`ls /mnt/Installer | grep "^ice_setup"` +sudo yum -y install /mnt/Installer/${lsetup} +sudo ice_setup -d /mnt << EOF +yes +/mnt +$fqdn +http +EOF +ceph-deploy new ${mon} +ceph-deploy install --repo --release=ceph-mon ${mon} +ceph-deploy install --repo --release=ceph-osd ${allsites} +ceph-deploy install --mon ${mon} +ceph-deploy install --osd ${allsites} +ceph-deploy mon create-initial +sudo service ceph -a start osd +for d in b c d; do + for m in $osds; do + ceph-deploy disk zap ${m}:sd${d} + done + for m in $osds; do + ceph-deploy osd prepare ${m}:sd${d} + done + for m in $osds; do + ceph-deploy osd activate ${m}:sd${d}1:sd${d}2 + done +done + +sudo ./ceph-pool-create.sh + +hchk=`sudo ceph health` +while [[ $hchk != 'HEALTH_OK' ]]; do + sleep 30 + hchk=`sudo ceph health` +done diff --git a/qa/qa_scripts/openstack/execs/libvirt-secret.sh b/qa/qa_scripts/openstack/execs/libvirt-secret.sh new file mode 100755 index 000000000..75e9e91a0 --- /dev/null +++ b/qa/qa_scripts/openstack/execs/libvirt-secret.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -f + +# +# Generate a libvirt secret on the Openstack node. +# +openstack_node=${1} +uuid=`uuidgen` +cat > secret.xml <<EOF +<secret ephemeral='no' private='no'> + <uuid>${uuid}</uuid> + <usage type='ceph'> + <name>client.cinder secret</name> + </usage> +</secret> +EOF +sudo virsh secret-define --file secret.xml +sudo virsh secret-set-value --secret ${uuid} --base64 $(cat client.cinder.key) +echo ${uuid} diff --git a/qa/qa_scripts/openstack/execs/openstack-preinstall.sh b/qa/qa_scripts/openstack/execs/openstack-preinstall.sh new file mode 100755 index 000000000..a2b235e76 --- /dev/null +++ b/qa/qa_scripts/openstack/execs/openstack-preinstall.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -f + +# +# Remotely setup the stuff needed to run packstack. This should do items 1-4 in +# https://docs.google.com/document/d/1us18KR3LuLyINgGk2rmI-SVj9UksCE7y4C2D_68Aa8o/edit?ts=56a78fcb +# +yum remove -y rhos-release +rpm -ivh http://rhos-release.virt.bos.redhat.com/repos/rhos-release/rhos-release-latest.noarch.rpm +rm -rf /etc/yum.repos.d/* +rm -rf /var/cache/yum/* +rhos-release 8 +yum update -y +yum install -y nc puppet vim screen setroubleshoot crudini bpython openstack-packstack +systemctl disable ntpd +systemctl stop ntpd +reboot diff --git a/qa/qa_scripts/openstack/execs/run_openstack.sh b/qa/qa_scripts/openstack/execs/run_openstack.sh new file mode 100755 index 000000000..8764cbeb6 --- /dev/null +++ b/qa/qa_scripts/openstack/execs/run_openstack.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -fv + +# +# Create a glance image, a corresponding cinder volume, a nova instance, attach, the cinder volume to the +# nova instance, and create a backup. +# +image_name=${1}X +file_name=${2-rhel-server-7.2-x86_64-boot.iso} +source ./keystonerc_admin +glance image-create --name $image_name --disk-format iso --container-format bare --file $file_name +glance_id=`glance image-list | grep ${image_name} | sed 's/^| //' | sed 's/ |.*//'` +cinder create --image-id ${glance_id} --display-name ${image_name}-volume 8 +nova boot --image ${image_name} --flavor 1 ${image_name}-inst +cinder_id=`cinder list | grep ${image_name} | sed 's/^| //' | sed 's/ |.*//'` +chkr=`cinder list | grep ${image_name}-volume | grep available` +while [ -z "$chkr" ]; do + sleep 30 + chkr=`cinder list | grep ${image_name}-volume | grep available` +done +nova volume-attach ${image_name}-inst ${cinder_id} auto +sleep 30 +cinder backup-create --name ${image_name}-backup ${image_name}-volume --force diff --git a/qa/qa_scripts/openstack/execs/start_openstack.sh b/qa/qa_scripts/openstack/execs/start_openstack.sh new file mode 100755 index 000000000..f5f12fe50 --- /dev/null +++ b/qa/qa_scripts/openstack/execs/start_openstack.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -fv + +# +# start the Openstack services +# +sudo cp /root/keystonerc_admin ./keystonerc_admin +sudo chmod 0644 ./keystonerc_admin +source ./keystonerc_admin +sudo service httpd stop +sudo service openstack-keystone restart +sudo service openstack-glance-api restart +sudo service openstack-nova-compute restart +sudo service openstack-cinder-volume restart +sudo service openstack-cinder-backup restart diff --git a/qa/qa_scripts/openstack/files/cinder.template.conf b/qa/qa_scripts/openstack/files/cinder.template.conf new file mode 100644 index 000000000..807125ac3 --- /dev/null +++ b/qa/qa_scripts/openstack/files/cinder.template.conf @@ -0,0 +1,3481 @@ +[DEFAULT] + +# +# From cinder +# + +# Backup metadata version to be used when backing up volume metadata. If this +# number is bumped, make sure the service doing the restore supports the new +# version. (integer value) +#backup_metadata_version = 2 + +# The number of chunks or objects, for which one Ceilometer notification will +# be sent (integer value) +#backup_object_number_per_notification = 10 + +# Interval, in seconds, between two progress notifications reporting the backup +# status (integer value) +#backup_timer_interval = 120 + +# The maximum number of items that a collection resource returns in a single +# response (integer value) +#osapi_max_limit = 1000 + +# Base URL that will be presented to users in links to the OpenStack Volume API +# (string value) +# Deprecated group/name - [DEFAULT]/osapi_compute_link_prefix +#osapi_volume_base_URL = <None> + +# Ceph configuration file to use. (string value) +#backup_ceph_conf = /etc/ceph/ceph.conf +backup_ceph_conf = /etc/ceph/ceph.conf + +# The Ceph user to connect with. Default here is to use the same user as for +# Cinder volumes. If not using cephx this should be set to None. (string value) +#backup_ceph_user = cinder +backup_ceph_user = cinder-backup + +# The chunk size, in bytes, that a backup is broken into before transfer to the +# Ceph object store. (integer value) +#backup_ceph_chunk_size = 134217728 +backup_ceph_chunk_size = 134217728 + +# The Ceph pool where volume backups are stored. (string value) +#backup_ceph_pool = backups +backup_ceph_pool = backups + +# RBD stripe unit to use when creating a backup image. (integer value) +#backup_ceph_stripe_unit = 0 +backup_ceph_stripe_unit = 0 + +# RBD stripe count to use when creating a backup image. (integer value) +#backup_ceph_stripe_count = 0 +backup_ceph_stripe_count = 0 + +# If True, always discard excess bytes when restoring volumes i.e. pad with +# zeroes. (boolean value) +#restore_discard_excess_bytes = true +restore_discard_excess_bytes = true + +# File with the list of available smbfs shares. (string value) +#smbfs_shares_config = /etc/cinder/smbfs_shares + +# Default format that will be used when creating volumes if no volume format is +# specified. (string value) +# Allowed values: raw, qcow2, vhd, vhdx +#smbfs_default_volume_format = qcow2 + +# Create volumes as sparsed files which take no space rather than regular files +# when using raw format, in which case volume creation takes lot of time. +# (boolean value) +#smbfs_sparsed_volumes = true + +# Percent of ACTUAL usage of the underlying volume before no new volumes can be +# allocated to the volume destination. (floating point value) +#smbfs_used_ratio = 0.95 + +# This will compare the allocated to available space on the volume destination. +# If the ratio exceeds this number, the destination will no longer be valid. +# (floating point value) +#smbfs_oversub_ratio = 1.0 + +# Base dir containing mount points for smbfs shares. (string value) +#smbfs_mount_point_base = $state_path/mnt + +# Mount options passed to the smbfs client. See mount.cifs man page for +# details. (string value) +#smbfs_mount_options = noperm,file_mode=0775,dir_mode=0775 + +# Compression algorithm (None to disable) (string value) +#backup_compression_algorithm = zlib + +# Use thin provisioning for SAN volumes? (boolean value) +#san_thin_provision = true + +# IP address of SAN controller (string value) +#san_ip = + +# Username for SAN controller (string value) +#san_login = admin + +# Password for SAN controller (string value) +#san_password = + +# Filename of private key to use for SSH authentication (string value) +#san_private_key = + +# Cluster name to use for creating volumes (string value) +#san_clustername = + +# SSH port to use with SAN (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#san_ssh_port = 22 + +# Execute commands locally instead of over SSH; use if the volume service is +# running on the SAN device (boolean value) +#san_is_local = false + +# SSH connection timeout in seconds (integer value) +#ssh_conn_timeout = 30 + +# Minimum ssh connections in the pool (integer value) +#ssh_min_pool_conn = 1 + +# Maximum ssh connections in the pool (integer value) +#ssh_max_pool_conn = 5 + +# Configuration file for HDS NFS cinder plugin (string value) +#hds_hnas_nfs_config_file = /opt/hds/hnas/cinder_nfs_conf.xml + +# Global backend request timeout, in seconds. (integer value) +#violin_request_timeout = 300 + +# Option to enable strict host key checking. When set to "True" Cinder will +# only connect to systems with a host key present in the configured +# "ssh_hosts_key_file". When set to "False" the host key will be saved upon +# first connection and used for subsequent connections. Default=False (boolean +# value) +#strict_ssh_host_key_policy = false + +# File containing SSH host keys for the systems with which Cinder needs to +# communicate. OPTIONAL: Default=$state_path/ssh_known_hosts (string value) +#ssh_hosts_key_file = $state_path/ssh_known_hosts + +# The storage family type used on the storage system; valid values are +# ontap_7mode for using Data ONTAP operating in 7-Mode, ontap_cluster for using +# clustered Data ONTAP, or eseries for using E-Series. (string value) +# Allowed values: ontap_7mode, ontap_cluster, eseries +#netapp_storage_family = ontap_cluster + +# The storage protocol to be used on the data path with the storage system. +# (string value) +# Allowed values: iscsi, fc, nfs +#netapp_storage_protocol = <None> + +# The hostname (or IP address) for the storage system or proxy server. (string +# value) +#netapp_server_hostname = <None> + +# The TCP port to use for communication with the storage system or proxy +# server. If not specified, Data ONTAP drivers will use 80 for HTTP and 443 for +# HTTPS; E-Series will use 8080 for HTTP and 8443 for HTTPS. (integer value) +#netapp_server_port = <None> + +# The transport protocol used when communicating with the storage system or +# proxy server. (string value) +# Allowed values: http, https +#netapp_transport_type = http + +# Administrative user account name used to access the storage system or proxy +# server. (string value) +#netapp_login = <None> + +# Password for the administrative user account specified in the netapp_login +# option. (string value) +#netapp_password = <None> + +# This option specifies the virtual storage server (Vserver) name on the +# storage cluster on which provisioning of block storage volumes should occur. +# (string value) +#netapp_vserver = <None> + +# The vFiler unit on which provisioning of block storage volumes will be done. +# This option is only used by the driver when connecting to an instance with a +# storage family of Data ONTAP operating in 7-Mode. Only use this option when +# utilizing the MultiStore feature on the NetApp storage system. (string value) +#netapp_vfiler = <None> + +# The name of the config.conf stanza for a Data ONTAP (7-mode) HA partner. +# This option is only used by the driver when connecting to an instance with a +# storage family of Data ONTAP operating in 7-Mode, and it is required if the +# storage protocol selected is FC. (string value) +#netapp_partner_backend_name = <None> + +# The quantity to be multiplied by the requested volume size to ensure enough +# space is available on the virtual storage server (Vserver) to fulfill the +# volume creation request. Note: this option is deprecated and will be removed +# in favor of "reserved_percentage" in the Mitaka release. (floating point +# value) +#netapp_size_multiplier = 1.2 + +# This option determines if storage space is reserved for LUN allocation. If +# enabled, LUNs are thick provisioned. If space reservation is disabled, +# storage space is allocated on demand. (string value) +# Allowed values: enabled, disabled +#netapp_lun_space_reservation = enabled + +# If the percentage of available space for an NFS share has dropped below the +# value specified by this option, the NFS image cache will be cleaned. (integer +# value) +#thres_avl_size_perc_start = 20 + +# When the percentage of available space on an NFS share has reached the +# percentage specified by this option, the driver will stop clearing files from +# the NFS image cache that have not been accessed in the last M minutes, where +# M is the value of the expiry_thres_minutes configuration option. (integer +# value) +#thres_avl_size_perc_stop = 60 + +# This option specifies the threshold for last access time for images in the +# NFS image cache. When a cache cleaning cycle begins, images in the cache that +# have not been accessed in the last M minutes, where M is the value of this +# parameter, will be deleted from the cache to create free space on the NFS +# share. (integer value) +#expiry_thres_minutes = 720 + +# This option is used to specify the path to the E-Series proxy application on +# a proxy server. The value is combined with the value of the +# netapp_transport_type, netapp_server_hostname, and netapp_server_port options +# to create the URL used by the driver to connect to the proxy application. +# (string value) +#netapp_webservice_path = /devmgr/v2 + +# This option is only utilized when the storage family is configured to +# eseries. This option is used to restrict provisioning to the specified +# controllers. Specify the value of this option to be a comma separated list of +# controller hostnames or IP addresses to be used for provisioning. (string +# value) +#netapp_controller_ips = <None> + +# Password for the NetApp E-Series storage array. (string value) +#netapp_sa_password = <None> + +# This option specifies whether the driver should allow operations that require +# multiple attachments to a volume. An example would be live migration of +# servers that have volumes attached. When enabled, this backend is limited to +# 256 total volumes in order to guarantee volumes can be accessed by more than +# one host. (boolean value) +#netapp_enable_multiattach = false + +# This option specifies the path of the NetApp copy offload tool binary. Ensure +# that the binary has execute permissions set which allow the effective user of +# the cinder-volume process to execute the file. (string value) +#netapp_copyoffload_tool_path = <None> + +# This option defines the type of operating system that will access a LUN +# exported from Data ONTAP; it is assigned to the LUN at the time it is +# created. (string value) +#netapp_lun_ostype = <None> + +# This option defines the type of operating system for all initiators that can +# access a LUN. This information is used when mapping LUNs to individual hosts +# or groups of hosts. (string value) +# Deprecated group/name - [DEFAULT]/netapp_eseries_host_type +#netapp_host_type = <None> + +# This option is used to restrict provisioning to the specified pools. Specify +# the value of this option to be a regular expression which will be applied to +# the names of objects from the storage backend which represent pools in +# Cinder. This option is only utilized when the storage protocol is configured +# to use iSCSI or FC. (string value) +# Deprecated group/name - [DEFAULT]/netapp_volume_list +# Deprecated group/name - [DEFAULT]/netapp_storage_pools +#netapp_pool_name_search_pattern = (.+) + +# Base dir containing mount point for gluster share. (string value) +#glusterfs_backup_mount_point = $state_path/backup_mount + +# GlusterFS share in <hostname|ipv4addr|ipv6addr>:<gluster_vol_name> format. +# Eg: 1.2.3.4:backup_vol (string value) +#glusterfs_backup_share = <None> + +# Volume prefix for the backup id when backing up to TSM (string value) +#backup_tsm_volume_prefix = backup + +# TSM password for the running username (string value) +#backup_tsm_password = password + +# Enable or Disable compression for backups (boolean value) +#backup_tsm_compression = true + +# Request for FC Zone creating host group (boolean value) +#hpxp_zoning_request = false + +# Type of storage command line interface (string value) +#hpxp_storage_cli = <None> + +# ID of storage system (string value) +#hpxp_storage_id = <None> + +# Pool of storage system (string value) +#hpxp_pool = <None> + +# Thin pool of storage system (string value) +#hpxp_thin_pool = <None> + +# Logical device range of storage system (string value) +#hpxp_ldev_range = <None> + +# Default copy method of storage system. There are two valid values: "FULL" +# specifies that a full copy; "THIN" specifies that a thin copy. Default value +# is "FULL" (string value) +#hpxp_default_copy_method = FULL + +# Copy speed of storage system (integer value) +#hpxp_copy_speed = 3 + +# Interval to check copy (integer value) +#hpxp_copy_check_interval = 3 + +# Interval to check copy asynchronously (integer value) +#hpxp_async_copy_check_interval = 10 + +# Target port names for host group or iSCSI target (list value) +#hpxp_target_ports = <None> + +# Target port names of compute node for host group or iSCSI target (list value) +#hpxp_compute_target_ports = <None> + +# Request for creating host group or iSCSI target (boolean value) +#hpxp_group_request = false + +# Instance numbers for HORCM (list value) +#hpxp_horcm_numbers = 200,201 + +# Username of storage system for HORCM (string value) +#hpxp_horcm_user = <None> + +# Add to HORCM configuration (boolean value) +#hpxp_horcm_add_conf = true + +# Resource group name of storage system for HORCM (string value) +#hpxp_horcm_resource_name = meta_resource + +# Only discover a specific name of host group or iSCSI target (boolean value) +#hpxp_horcm_name_only_discovery = false + +# Storage system storage pool for volumes (string value) +#storwize_svc_volpool_name = volpool + +# Storage system space-efficiency parameter for volumes (percentage) (integer +# value) +# Minimum value: -1 +# Maximum value: 100 +#storwize_svc_vol_rsize = 2 + +# Storage system threshold for volume capacity warnings (percentage) (integer +# value) +# Minimum value: -1 +# Maximum value: 100 +#storwize_svc_vol_warning = 0 + +# Storage system autoexpand parameter for volumes (True/False) (boolean value) +#storwize_svc_vol_autoexpand = true + +# Storage system grain size parameter for volumes (32/64/128/256) (integer +# value) +#storwize_svc_vol_grainsize = 256 + +# Storage system compression option for volumes (boolean value) +#storwize_svc_vol_compression = false + +# Enable Easy Tier for volumes (boolean value) +#storwize_svc_vol_easytier = true + +# The I/O group in which to allocate volumes (integer value) +#storwize_svc_vol_iogrp = 0 + +# Maximum number of seconds to wait for FlashCopy to be prepared. (integer +# value) +# Minimum value: 1 +# Maximum value: 600 +#storwize_svc_flashcopy_timeout = 120 + +# Connection protocol (iSCSI/FC) (string value) +#storwize_svc_connection_protocol = iSCSI + +# Configure CHAP authentication for iSCSI connections (Default: Enabled) +# (boolean value) +#storwize_svc_iscsi_chap_enabled = true + +# Connect with multipath (FC only; iSCSI multipath is controlled by Nova) +# (boolean value) +#storwize_svc_multipath_enabled = false + +# Allows vdisk to multi host mapping (boolean value) +#storwize_svc_multihostmap_enabled = true + +# Indicate whether svc driver is compatible for NPIV setup. If it is +# compatible, it will allow no wwpns being returned on get_conn_fc_wwpns during +# initialize_connection. It should always be set to True. It will be deprecated +# and removed in M release. (boolean value) +#storwize_svc_npiv_compatibility_mode = true + +# Allow tenants to specify QOS on create (boolean value) +#storwize_svc_allow_tenant_qos = false + +# If operating in stretched cluster mode, specify the name of the pool in which +# mirrored copies are stored.Example: "pool2" (string value) +#storwize_svc_stretched_cluster_partner = <None> + +# Driver to use for backups. (string value) +#backup_driver = cinder.backup.drivers.swift +backup_driver = cinder.backup.drivers.ceph + +# Offload pending backup delete during backup service startup. (boolean value) +#backup_service_inithost_offload = false + +# Make exception message format errors fatal. (boolean value) +#fatal_exception_format_errors = false + +# IP address of this host (string value) +#my_ip = 10.16.48.99 + +# Default glance host name or IP (string value) +#glance_host = $my_ip +glance_host = VARINET4ADDR + +# Default glance port (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#glance_port = 9292 + +# A list of the glance API servers available to cinder ([hostname|ip]:port) +# (list value) +#glance_api_servers = $glance_host:$glance_port + +# Version of the glance API to use (integer value) +#glance_api_version = 1 + +# Number retries when downloading an image from glance (integer value) +#glance_num_retries = 0 + +# Allow to perform insecure SSL (https) requests to glance (boolean value) +#glance_api_insecure = false + +# Enables or disables negotiation of SSL layer compression. In some cases +# disabling compression can improve data throughput, such as when high network +# bandwidth is available and you use compressed image formats like qcow2. +# (boolean value) +#glance_api_ssl_compression = false + +# Location of ca certificates file to use for glance client requests. (string +# value) +#glance_ca_certificates_file = <None> + +# http/https timeout value for glance operations. If no value (None) is +# supplied here, the glanceclient default value is used. (integer value) +#glance_request_timeout = <None> + +# The topic that scheduler nodes listen on (string value) +#scheduler_topic = cinder-scheduler + +# The topic that volume nodes listen on (string value) +#volume_topic = cinder-volume + +# The topic that volume backup nodes listen on (string value) +#backup_topic = cinder-backup + +# DEPRECATED: Deploy v1 of the Cinder API. (boolean value) +#enable_v1_api = true +enable_v1_api = True + +# Deploy v2 of the Cinder API. (boolean value) +#enable_v2_api = true +enable_v2_api = True + +# Enables or disables rate limit of the API. (boolean value) +#api_rate_limit = true + +# Specify list of extensions to load when using osapi_volume_extension option +# with cinder.api.contrib.select_extensions (list value) +#osapi_volume_ext_list = + +# osapi volume extension to load (multi valued) +#osapi_volume_extension = cinder.api.contrib.standard_extensions + +# Full class name for the Manager for volume (string value) +#volume_manager = cinder.volume.manager.VolumeManager + +# Full class name for the Manager for volume backup (string value) +#backup_manager = cinder.backup.manager.BackupManager + +# Full class name for the Manager for scheduler (string value) +#scheduler_manager = cinder.scheduler.manager.SchedulerManager + +# Name of this node. This can be an opaque identifier. It is not necessarily a +# host name, FQDN, or IP address. (string value) +#host = x86-024.build.eng.bos.redhat.com +host = VARHOSTNAME + +# Availability zone of this node (string value) +#storage_availability_zone = nova +storage_availability_zone = nova + +# Default availability zone for new volumes. If not set, the +# storage_availability_zone option value is used as the default for new +# volumes. (string value) +#default_availability_zone = <None> +default_availability_zone = nova + +# If the requested Cinder availability zone is unavailable, fall back to the +# value of default_availability_zone, then storage_availability_zone, instead +# of failing. (boolean value) +#allow_availability_zone_fallback = false + +# Default volume type to use (string value) +#default_volume_type = <None> + +# Time period for which to generate volume usages. The options are hour, day, +# month, or year. (string value) +#volume_usage_audit_period = month + +# Path to the rootwrap configuration file to use for running commands as root +# (string value) +#rootwrap_config = /etc/cinder/rootwrap.conf + +# Enable monkey patching (boolean value) +#monkey_patch = false + +# List of modules/decorators to monkey patch (list value) +#monkey_patch_modules = + +# Maximum time since last check-in for a service to be considered up (integer +# value) +#service_down_time = 60 + +# The full class name of the volume API class to use (string value) +#volume_api_class = cinder.volume.api.API + +# The full class name of the volume backup API class (string value) +#backup_api_class = cinder.backup.api.API + +# The strategy to use for auth. Supports noauth, keystone, and deprecated. +# (string value) +# Allowed values: noauth, keystone, deprecated +#auth_strategy = keystone +auth_strategy = keystone + +# A list of backend names to use. These backend names should be backed by a +# unique [CONFIG] group with its options (list value) +#enabled_backends = <None> +enabled_backends = ceph + +# Whether snapshots count against gigabyte quota (boolean value) +#no_snapshot_gb_quota = false + +# The full class name of the volume transfer API class (string value) +#transfer_api_class = cinder.transfer.api.API + +# The full class name of the volume replication API class (string value) +#replication_api_class = cinder.replication.api.API + +# The full class name of the consistencygroup API class (string value) +#consistencygroup_api_class = cinder.consistencygroup.api.API + +# OpenStack privileged account username. Used for requests to other services +# (such as Nova) that require an account with special rights. (string value) +#os_privileged_user_name = <None> + +# Password associated with the OpenStack privileged account. (string value) +#os_privileged_user_password = <None> + +# Tenant name associated with the OpenStack privileged account. (string value) +#os_privileged_user_tenant = <None> + +# Auth URL associated with the OpenStack privileged account. (string value) +#os_privileged_user_auth_url = <None> + +# Multiplier used for weighing volume capacity. Negative numbers mean to stack +# vs spread. (floating point value) +#capacity_weight_multiplier = 1.0 + +# Multiplier used for weighing volume capacity. Negative numbers mean to stack +# vs spread. (floating point value) +#allocated_capacity_weight_multiplier = -1.0 + +# IP address of sheep daemon. (string value) +#sheepdog_store_address = 127.0.0.1 + +# Port of sheep daemon. (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#sheepdog_store_port = 7000 + +# Specifies the path of the GPFS directory where Block Storage volume and +# snapshot files are stored. (string value) +#gpfs_mount_point_base = <None> + +# Specifies the path of the Image service repository in GPFS. Leave undefined +# if not storing images in GPFS. (string value) +#gpfs_images_dir = <None> + +# Specifies the type of image copy to be used. Set this when the Image service +# repository also uses GPFS so that image files can be transferred efficiently +# from the Image service to the Block Storage service. There are two valid +# values: "copy" specifies that a full copy of the image is made; +# "copy_on_write" specifies that copy-on-write optimization strategy is used +# and unmodified blocks of the image file are shared efficiently. (string +# value) +# Allowed values: copy, copy_on_write, <None> +#gpfs_images_share_mode = <None> + +# Specifies an upper limit on the number of indirections required to reach a +# specific block due to snapshots or clones. A lengthy chain of copy-on-write +# snapshots or clones can have a negative impact on performance, but improves +# space utilization. 0 indicates unlimited clone depth. (integer value) +#gpfs_max_clone_depth = 0 + +# Specifies that volumes are created as sparse files which initially consume no +# space. If set to False, the volume is created as a fully allocated file, in +# which case, creation may take a significantly longer time. (boolean value) +#gpfs_sparse_volumes = true + +# Specifies the storage pool that volumes are assigned to. By default, the +# system storage pool is used. (string value) +#gpfs_storage_pool = system + +# Set 512 byte emulation on volume creation; (boolean value) +#sf_emulate_512 = true + +# Allow tenants to specify QOS on create (boolean value) +#sf_allow_tenant_qos = false + +# Create SolidFire accounts with this prefix. Any string can be used here, but +# the string "hostname" is special and will create a prefix using the cinder +# node hostname (previous default behavior). The default is NO prefix. (string +# value) +#sf_account_prefix = <None> + +# Account name on the SolidFire Cluster to use as owner of template/cache +# volumes (created if does not exist). (string value) +#sf_template_account_name = openstack-vtemplate + +# Create an internal cache of copy of images when a bootable volume is created +# to eliminate fetch from glance and qemu-conversion on subsequent calls. +# (boolean value) +#sf_allow_template_caching = true + +# Overrides default cluster SVIP with the one specified. This is required or +# deployments that have implemented the use of VLANs for iSCSI networks in +# their cloud. (string value) +#sf_svip = <None> + +# Create an internal mapping of volume IDs and account. Optimizes lookups and +# performance at the expense of memory, very large deployments may want to +# consider setting to False. (boolean value) +#sf_enable_volume_mapping = true + +# SolidFire API port. Useful if the device api is behind a proxy on a different +# port. (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#sf_api_port = 443 + +# IBMNAS platform type to be used as backend storage; valid values are - v7ku : +# for using IBM Storwize V7000 Unified, sonas : for using IBM Scale Out NAS, +# gpfs-nas : for using NFS based IBM GPFS deployments. (string value) +# Allowed values: v7ku, sonas, gpfs-nas +#ibmnas_platform_type = v7ku + +# The URL of the Swift endpoint (string value) +#backup_swift_url = <None> +backup_swift_url = http://VARINET4ADDR:8080/v1/AUTH_ + +# Info to match when looking for swift in the service catalog. Format is: +# separated values of the form: <service_type>:<service_name>:<endpoint_type> - +# Only used if backup_swift_url is unset (string value) +#swift_catalog_info = object-store:swift:publicURL + +# Swift authentication mechanism (string value) +#backup_swift_auth = per_user + +# Swift authentication version. Specify "1" for auth 1.0, or "2" for auth 2.0 +# (string value) +#backup_swift_auth_version = 1 + +# Swift tenant/account name. Required when connecting to an auth 2.0 system +# (string value) +#backup_swift_tenant = <None> + +# Swift user name (string value) +#backup_swift_user = <None> + +# Swift key for authentication (string value) +#backup_swift_key = <None> + +# The default Swift container to use (string value) +#backup_swift_container = volumebackups +backup_swift_container = volumes_backup + +# The size in bytes of Swift backup objects (integer value) +#backup_swift_object_size = 52428800 + +# The size in bytes that changes are tracked for incremental backups. +# backup_swift_object_size has to be multiple of backup_swift_block_size. +# (integer value) +#backup_swift_block_size = 32768 + +# The number of retries to make for Swift operations (integer value) +#backup_swift_retry_attempts = 3 + +# The backoff time in seconds between Swift retries (integer value) +#backup_swift_retry_backoff = 2 + +# Enable or Disable the timer to send the periodic progress notifications to +# Ceilometer when backing up the volume to the Swift backend storage. The +# default value is True to enable the timer. (boolean value) +#backup_swift_enable_progress_timer = true + +# Location of the CA certificate file to use for swift client requests. (string +# value) +#backup_swift_ca_cert_file = <None> + +# These values will be used for CloudByte storage's addQos API call. (dict +# value) +#cb_add_qosgroup = graceallowed:false,iops:10,iopscontrol:true,latency:15,memlimit:0,networkspeed:0,throughput:0,tpcontrol:false + +# These values will be used for CloudByte storage's createVolume API call. +# (dict value) +#cb_create_volume = blocklength:512B,compression:off,deduplication:off,protocoltype:ISCSI,recordsize:16k,sync:always + +# Driver will use this API key to authenticate against the CloudByte storage's +# management interface. (string value) +#cb_apikey = <None> + +# CloudByte storage specific account name. This maps to a project name in +# OpenStack. (string value) +#cb_account_name = <None> + +# This corresponds to the name of Tenant Storage Machine (TSM) in CloudByte +# storage. A volume will be created in this TSM. (string value) +#cb_tsm_name = <None> + +# A retry value in seconds. Will be used by the driver to check if volume +# creation was successful in CloudByte storage. (integer value) +#cb_confirm_volume_create_retry_interval = 5 + +# Will confirm a successful volume creation in CloudByte storage by making this +# many number of attempts. (integer value) +#cb_confirm_volume_create_retries = 3 + +# A retry value in seconds. Will be used by the driver to check if volume +# deletion was successful in CloudByte storage. (integer value) +#cb_confirm_volume_delete_retry_interval = 5 + +# Will confirm a successful volume deletion in CloudByte storage by making this +# many number of attempts. (integer value) +#cb_confirm_volume_delete_retries = 3 + +# This corresponds to the discovery authentication group in CloudByte storage. +# Chap users are added to this group. Driver uses the first user found for this +# group. Default value is None. (string value) +#cb_auth_group = None + +# Interval, in seconds, between nodes reporting state to datastore (integer +# value) +#report_interval = 10 + +# Interval, in seconds, between running periodic tasks (integer value) +#periodic_interval = 60 + +# Range, in seconds, to randomly delay when starting the periodic task +# scheduler to reduce stampeding. (Disable by setting to 0) (integer value) +#periodic_fuzzy_delay = 60 + +# IP address on which OpenStack Volume API listens (string value) +#osapi_volume_listen = 0.0.0.0 +osapi_volume_listen = 0.0.0.0 + +# Port on which OpenStack Volume API listens (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#osapi_volume_listen_port = 8776 + +# Number of workers for OpenStack Volume API service. The default is equal to +# the number of CPUs available. (integer value) +#osapi_volume_workers = <None> +osapi_volume_workers = 12 + +# The full class name of the compute API class to use (string value) +#compute_api_class = cinder.compute.nova.API + +# Number of nodes that should replicate the data. (string value) +#drbdmanage_redundancy = 1 + +# Pool or Vdisk name to use for volume creation. (string value) +#dothill_backend_name = A + +# linear (for Vdisk) or virtual (for Pool). (string value) +# Allowed values: linear, virtual +#dothill_backend_type = virtual + +# DotHill API interface protocol. (string value) +# Allowed values: http, https +#dothill_api_protocol = https + +# Whether to verify DotHill array SSL certificate. (boolean value) +#dothill_verify_certificate = false + +# DotHill array SSL certificate path. (string value) +#dothill_verify_certificate_path = <None> + +# List of comma-separated target iSCSI IP addresses. (list value) +#dothill_iscsi_ips = + +# File with the list of available gluster shares (string value) +#glusterfs_shares_config = /etc/cinder/glusterfs_shares + +# Base dir containing mount points for gluster shares. (string value) +#glusterfs_mount_point_base = $state_path/mnt + +# REST API authorization token. (string value) +#pure_api_token = <None> + +# ID of the project which will be used as the Cinder internal tenant. (string +# value) +#cinder_internal_tenant_project_id = <None> + +# ID of the user to be used in volume operations as the Cinder internal tenant. +# (string value) +#cinder_internal_tenant_user_id = <None> + +# The scheduler host manager class to use (string value) +#scheduler_host_manager = cinder.scheduler.host_manager.HostManager + +# Maximum number of attempts to schedule an volume (integer value) +#scheduler_max_attempts = 3 + +# Path or URL to Scality SOFS configuration file (string value) +#scality_sofs_config = <None> + +# Base dir where Scality SOFS shall be mounted (string value) +#scality_sofs_mount_point = $state_path/scality + +# Path from Scality SOFS root to volume dir (string value) +#scality_sofs_volume_dir = cinder/volumes + +# VNX authentication scope type. (string value) +#storage_vnx_authentication_type = global + +# Directory path that contains the VNX security file. Make sure the security +# file is generated first. (string value) +#storage_vnx_security_file_dir = <None> + +# Naviseccli Path. (string value) +#naviseccli_path = + +# Comma-separated list of storage pool names to be used. (string value) +# Deprecated group/name - [DEFAULT]/storage_vnx_pool_name +#storage_vnx_pool_names = <None> + +# VNX secondary SP IP Address. (string value) +#san_secondary_ip = <None> + +# Default timeout for CLI operations in minutes. For example, LUN migration is +# a typical long running operation, which depends on the LUN size and the load +# of the array. An upper bound in the specific deployment can be set to avoid +# unnecessary long wait. By default, it is 365 days long. (integer value) +#default_timeout = 525600 + +# Default max number of LUNs in a storage group. By default, the value is 255. +# (integer value) +#max_luns_per_storage_group = 255 + +# To destroy storage group when the last LUN is removed from it. By default, +# the value is False. (boolean value) +#destroy_empty_storage_group = false + +# Mapping between hostname and its iSCSI initiator IP addresses. (string value) +#iscsi_initiators = + +# Comma separated iSCSI or FC ports to be used in Nova or Cinder. (string +# value) +#io_port_list = * + +# Automatically register initiators. By default, the value is False. (boolean +# value) +#initiator_auto_registration = false + +# Automatically deregister initiators after the related storage group is +# destroyed. By default, the value is False. (boolean value) +#initiator_auto_deregistration = false + +# Report free_capacity_gb as 0 when the limit to maximum number of pool LUNs is +# reached. By default, the value is False. (boolean value) +#check_max_pool_luns_threshold = false + +# Delete a LUN even if it is in Storage Groups. (boolean value) +#force_delete_lun_in_storagegroup = false + +# Force LUN creation even if the full threshold of pool is reached. (boolean +# value) +#ignore_pool_full_threshold = false + +# IP address for connecting to VMware ESX/vCenter server. (string value) +#vmware_host_ip = <None> + +# Username for authenticating with VMware ESX/vCenter server. (string value) +#vmware_host_username = <None> + +# Password for authenticating with VMware ESX/vCenter server. (string value) +#vmware_host_password = <None> + +# Optional VIM service WSDL Location e.g http://<server>/vimService.wsdl. +# Optional over-ride to default location for bug work-arounds. (string value) +#vmware_wsdl_location = <None> + +# Number of times VMware ESX/vCenter server API must be retried upon connection +# related issues. (integer value) +#vmware_api_retry_count = 10 + +# The interval (in seconds) for polling remote tasks invoked on VMware +# ESX/vCenter server. (floating point value) +#vmware_task_poll_interval = 0.5 + +# Name of the vCenter inventory folder that will contain Cinder volumes. This +# folder will be created under "OpenStack/<project_folder>", where +# project_folder is of format "Project (<volume_project_id>)". (string value) +#vmware_volume_folder = Volumes + +# Timeout in seconds for VMDK volume transfer between Cinder and Glance. +# (integer value) +#vmware_image_transfer_timeout_secs = 7200 + +# Max number of objects to be retrieved per batch. Query results will be +# obtained in batches from the server and not in one shot. Server may still +# limit the count to something less than the configured value. (integer value) +#vmware_max_objects_retrieval = 100 + +# Optional string specifying the VMware vCenter server version. The driver +# attempts to retrieve the version from VMware vCenter server. Set this +# configuration only if you want to override the vCenter server version. +# (string value) +#vmware_host_version = <None> + +# Directory where virtual disks are stored during volume backup and restore. +# (string value) +#vmware_tmp_dir = /tmp + +# CA bundle file to use in verifying the vCenter server certificate. (string +# value) +#vmware_ca_file = <None> + +# If true, the vCenter server certificate is not verified. If false, then the +# default CA truststore is used for verification. This option is ignored if +# "vmware_ca_file" is set. (boolean value) +#vmware_insecure = false + +# Name of a vCenter compute cluster where volumes should be created. (multi +# valued) +#vmware_cluster_name = + +# Pool or Vdisk name to use for volume creation. (string value) +#lenovo_backend_name = A + +# linear (for VDisk) or virtual (for Pool). (string value) +# Allowed values: linear, virtual +#lenovo_backend_type = virtual + +# Lenovo api interface protocol. (string value) +# Allowed values: http, https +#lenovo_api_protocol = https + +# Whether to verify Lenovo array SSL certificate. (boolean value) +#lenovo_verify_certificate = false + +# Lenovo array SSL certificate path. (string value) +#lenovo_verify_certificate_path = <None> + +# List of comma-separated target iSCSI IP addresses. (list value) +#lenovo_iscsi_ips = + +# The maximum size in bytes of the files used to hold backups. If the volume +# being backed up exceeds this size, then it will be backed up into multiple +# files.backup_file_size must be a multiple of backup_sha_block_size_bytes. +# (integer value) +#backup_file_size = 1999994880 + +# The size in bytes that changes are tracked for incremental backups. +# backup_file_size has to be multiple of backup_sha_block_size_bytes. (integer +# value) +#backup_sha_block_size_bytes = 32768 + +# Enable or Disable the timer to send the periodic progress notifications to +# Ceilometer when backing up the volume to the backend storage. The default +# value is True to enable the timer. (boolean value) +#backup_enable_progress_timer = true + +# Path specifying where to store backups. (string value) +#backup_posix_path = $state_path/backup + +# Custom directory to use for backups. (string value) +#backup_container = <None> + +# REST server port. (string value) +#sio_rest_server_port = 443 + +# Whether to verify server certificate. (boolean value) +#sio_verify_server_certificate = false + +# Server certificate path. (string value) +#sio_server_certificate_path = <None> + +# Whether to round volume capacity. (boolean value) +#sio_round_volume_capacity = true + +# Whether to allow force delete. (boolean value) +#sio_force_delete = false + +# Whether to unmap volume before deletion. (boolean value) +#sio_unmap_volume_before_deletion = false + +# Protection domain id. (string value) +#sio_protection_domain_id = <None> + +# Protection domain name. (string value) +#sio_protection_domain_name = <None> + +# Storage pools. (string value) +#sio_storage_pools = <None> + +# Storage pool name. (string value) +#sio_storage_pool_name = <None> + +# Storage pool id. (string value) +#sio_storage_pool_id = <None> + +# Group name to use for creating volumes. Defaults to "group-0". (string value) +#eqlx_group_name = group-0 + +# Timeout for the Group Manager cli command execution. Default is 30. Note that +# this option is deprecated in favour of "ssh_conn_timeout" as specified in +# cinder/volume/drivers/san/san.py and will be removed in M release. (integer +# value) +#eqlx_cli_timeout = 30 + +# Maximum retry count for reconnection. Default is 5. (integer value) +#eqlx_cli_max_retries = 5 + +# Use CHAP authentication for targets. Note that this option is deprecated in +# favour of "use_chap_auth" as specified in cinder/volume/driver.py and will be +# removed in next release. (boolean value) +#eqlx_use_chap = false + +# Existing CHAP account name. Note that this option is deprecated in favour of +# "chap_username" as specified in cinder/volume/driver.py and will be removed +# in next release. (string value) +#eqlx_chap_login = admin + +# Password for specified CHAP account name. Note that this option is deprecated +# in favour of "chap_password" as specified in cinder/volume/driver.py and will +# be removed in the next release (string value) +#eqlx_chap_password = password + +# Pool in which volumes will be created. Defaults to "default". (string value) +#eqlx_pool = default + +# The number of characters in the salt. (integer value) +#volume_transfer_salt_length = 8 + +# The number of characters in the autogenerated auth key. (integer value) +#volume_transfer_key_length = 16 + +# Services to be added to the available pool on create (boolean value) +#enable_new_services = true + +# Template string to be used to generate volume names (string value) +#volume_name_template = volume-%s + +# Template string to be used to generate snapshot names (string value) +#snapshot_name_template = snapshot-%s + +# Template string to be used to generate backup names (string value) +#backup_name_template = backup-%s + +# Multiplier used for weighing volume number. Negative numbers mean to spread +# vs stack. (floating point value) +#volume_number_multiplier = -1.0 + +# Default storage pool for volumes. (integer value) +#ise_storage_pool = 1 + +# Raid level for ISE volumes. (integer value) +#ise_raid = 1 + +# Number of retries (per port) when establishing connection to ISE management +# port. (integer value) +#ise_connection_retries = 5 + +# Interval (secs) between retries. (integer value) +#ise_retry_interval = 1 + +# Number on retries to get completion status after issuing a command to ISE. +# (integer value) +#ise_completion_retries = 30 + +# Storage pool name. (string value) +#zfssa_pool = <None> + +# Project name. (string value) +#zfssa_project = <None> + +# Block size. (string value) +# Allowed values: 512, 1k, 2k, 4k, 8k, 16k, 32k, 64k, 128k +#zfssa_lun_volblocksize = 8k + +# Flag to enable sparse (thin-provisioned): True, False. (boolean value) +#zfssa_lun_sparse = false + +# Data compression. (string value) +# Allowed values: off, lzjb, gzip-2, gzip, gzip-9 +#zfssa_lun_compression = off + +# Synchronous write bias. (string value) +# Allowed values: latency, throughput +#zfssa_lun_logbias = latency + +# iSCSI initiator group. (string value) +#zfssa_initiator_group = + +# iSCSI initiator IQNs. (comma separated) (string value) +#zfssa_initiator = + +# iSCSI initiator CHAP user (name). (string value) +#zfssa_initiator_user = + +# Secret of the iSCSI initiator CHAP user. (string value) +#zfssa_initiator_password = + +# iSCSI initiators configuration. (string value) +#zfssa_initiator_config = + +# iSCSI target group name. (string value) +#zfssa_target_group = tgt-grp + +# iSCSI target CHAP user (name). (string value) +#zfssa_target_user = + +# Secret of the iSCSI target CHAP user. (string value) +#zfssa_target_password = + +# iSCSI target portal (Data-IP:Port, w.x.y.z:3260). (string value) +#zfssa_target_portal = <None> + +# Network interfaces of iSCSI targets. (comma separated) (string value) +#zfssa_target_interfaces = <None> + +# REST connection timeout. (seconds) (integer value) +#zfssa_rest_timeout = <None> + +# IP address used for replication data. (maybe the same as data ip) (string +# value) +#zfssa_replication_ip = + +# Flag to enable local caching: True, False. (boolean value) +#zfssa_enable_local_cache = true + +# Name of ZFSSA project where cache volumes are stored. (string value) +#zfssa_cache_project = os-cinder-cache + +# Sets the value of TCP_KEEPALIVE (True/False) for each server socket. (boolean +# value) +#tcp_keepalive = true + +# Sets the value of TCP_KEEPIDLE in seconds for each server socket. Not +# supported on OS X. (integer value) +#tcp_keepidle = 600 + +# Sets the value of TCP_KEEPINTVL in seconds for each server socket. Not +# supported on OS X. (integer value) +#tcp_keepalive_interval = <None> + +# Sets the value of TCP_KEEPCNT for each server socket. Not supported on OS X. +# (integer value) +#tcp_keepalive_count = <None> + +# CA certificate file to use to verify connecting clients (string value) +#ssl_ca_file = <None> + +# Certificate file to use when starting the server securely (string value) +#ssl_cert_file = <None> + +# Private key file to use when starting the server securely (string value) +#ssl_key_file = <None> + +# Maximum line size of message headers to be accepted. max_header_line may need +# to be increased when using large tokens (typically those generated by the +# Keystone v3 API with big service catalogs). (integer value) +#max_header_line = 16384 + +# Timeout for client connections' socket operations. If an incoming connection +# is idle for this number of seconds it will be closed. A value of '0' means +# wait forever. (integer value) +#client_socket_timeout = 900 + +# If False, closes the client socket connection explicitly. Setting it to True +# to maintain backward compatibility. Recommended setting is set it to False. +# (boolean value) +#wsgi_keep_alive = true + +# Number of times to attempt to run flakey shell commands (integer value) +#num_shell_tries = 3 + +# The percentage of backend capacity is reserved (integer value) +# Maximum value: 100 +#reserved_percentage = 0 + +# Prefix for iSCSI volumes (string value) +#iscsi_target_prefix = iqn.2010-10.org.openstack: + +# The IP address that the iSCSI daemon is listening on (string value) +#iscsi_ip_address = $my_ip + +# The list of secondary IP addresses of the iSCSI daemon (list value) +#iscsi_secondary_ip_addresses = + +# The port that the iSCSI daemon is listening on (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#iscsi_port = 3260 + +# The maximum number of times to rescan targets to find volume (integer value) +#num_volume_device_scan_tries = 3 + +# The backend name for a given driver implementation (string value) +#volume_backend_name = <None> + +# Do we attach/detach volumes in cinder using multipath for volume to image and +# image to volume transfers? (boolean value) +#use_multipath_for_image_xfer = false + +# If this is set to True, attachment of volumes for image transfer will be +# aborted when multipathd is not running. Otherwise, it will fallback to single +# path. (boolean value) +#enforce_multipath_for_image_xfer = false + +# Method used to wipe old volumes (string value) +# Allowed values: none, zero, shred +#volume_clear = zero + +# Size in MiB to wipe at start of old volumes. 0 => all (integer value) +#volume_clear_size = 0 + +# The flag to pass to ionice to alter the i/o priority of the process used to +# zero a volume after deletion, for example "-c3" for idle only priority. +# (string value) +#volume_clear_ionice = <None> + +# iSCSI target user-land tool to use. tgtadm is default, use lioadm for LIO +# iSCSI support, scstadmin for SCST target support, iseradm for the ISER +# protocol, ietadm for iSCSI Enterprise Target, iscsictl for Chelsio iSCSI +# Target or fake for testing. (string value) +# Allowed values: tgtadm, lioadm, scstadmin, iseradm, iscsictl, ietadm, fake +#iscsi_helper = tgtadm + +# Volume configuration file storage directory (string value) +#volumes_dir = $state_path/volumes + +# IET configuration file (string value) +#iet_conf = /etc/iet/ietd.conf + +# Chiscsi (CXT) global defaults configuration file (string value) +#chiscsi_conf = /etc/chelsio-iscsi/chiscsi.conf + +# Sets the behavior of the iSCSI target to either perform blockio or fileio +# optionally, auto can be set and Cinder will autodetect type of backing device +# (string value) +# Allowed values: blockio, fileio, auto +#iscsi_iotype = fileio + +# The default block size used when copying/clearing volumes (string value) +#volume_dd_blocksize = 1M + +# The blkio cgroup name to be used to limit bandwidth of volume copy (string +# value) +#volume_copy_blkio_cgroup_name = cinder-volume-copy + +# The upper limit of bandwidth of volume copy. 0 => unlimited (integer value) +#volume_copy_bps_limit = 0 + +# Sets the behavior of the iSCSI target to either perform write-back(on) or +# write-through(off). This parameter is valid if iscsi_helper is set to tgtadm +# or iseradm. (string value) +# Allowed values: on, off +#iscsi_write_cache = on + +# Sets the target-specific flags for the iSCSI target. Only used for tgtadm to +# specify backing device flags using bsoflags option. The specified string is +# passed as is to the underlying tool. (string value) +#iscsi_target_flags = + +# Determines the iSCSI protocol for new iSCSI volumes, created with tgtadm or +# lioadm target helpers. In order to enable RDMA, this parameter should be set +# with the value "iser". The supported iSCSI protocol values are "iscsi" and +# "iser". (string value) +# Allowed values: iscsi, iser +#iscsi_protocol = iscsi + +# The path to the client certificate key for verification, if the driver +# supports it. (string value) +#driver_client_cert_key = <None> + +# The path to the client certificate for verification, if the driver supports +# it. (string value) +#driver_client_cert = <None> + +# Tell driver to use SSL for connection to backend storage if the driver +# supports it. (boolean value) +#driver_use_ssl = false + +# Float representation of the over subscription ratio when thin provisioning is +# involved. Default ratio is 20.0, meaning provisioned capacity can be 20 times +# of the total physical capacity. If the ratio is 10.5, it means provisioned +# capacity can be 10.5 times of the total physical capacity. A ratio of 1.0 +# means provisioned capacity cannot exceed the total physical capacity. A ratio +# lower than 1.0 will be ignored and the default value will be used instead. +# (floating point value) +#max_over_subscription_ratio = 20.0 + +# Certain ISCSI targets have predefined target names, SCST target driver uses +# this name. (string value) +#scst_target_iqn_name = <None> + +# SCST target implementation can choose from multiple SCST target drivers. +# (string value) +#scst_target_driver = iscsi + +# Option to enable/disable CHAP authentication for targets. (boolean value) +# Deprecated group/name - [DEFAULT]/eqlx_use_chap +#use_chap_auth = false + +# CHAP user name. (string value) +# Deprecated group/name - [DEFAULT]/eqlx_chap_login +#chap_username = + +# Password for specified CHAP account name. (string value) +# Deprecated group/name - [DEFAULT]/eqlx_chap_password +#chap_password = + +# Namespace for driver private data values to be saved in. (string value) +#driver_data_namespace = <None> + +# String representation for an equation that will be used to filter hosts. Only +# used when the driver filter is set to be used by the Cinder scheduler. +# (string value) +#filter_function = <None> + +# String representation for an equation that will be used to determine the +# goodness of a host. Only used when using the goodness weigher is set to be +# used by the Cinder scheduler. (string value) +#goodness_function = <None> + +# If set to True the http client will validate the SSL certificate of the +# backend endpoint. (boolean value) +#driver_ssl_cert_verify = false + +# List of options that control which trace info is written to the DEBUG log +# level to assist developers. Valid values are method and api. (list value) +#trace_flags = <None> + +# There are two types of target configurations managed (replicate to another +# configured backend) or unmanaged (replicate to a device not managed by +# Cinder). (boolean value) +#managed_replication_target = true + +# List of k/v pairs representing a replication target for this backend device. +# For unmanaged the format is: {'key-1'='val1' 'key-2'='val2'...},{...} and for +# managed devices its simply a list of valid configured backend_names that the +# driver supports replicating to: backend-a,bakcend-b... (list value) +#replication_devices = <None> + +# If set to True, upload-to-image in raw format will create a cloned volume and +# register its location to the image service, instead of uploading the volume +# content. The cinder backend and locations support must be enabled in the +# image service, and glance_api_version must be set to 2. (boolean value) +#image_upload_use_cinder_backend = false + +# If set to True, the image volume created by upload-to-image will be placed in +# the internal tenant. Otherwise, the image volume is created in the current +# context's tenant. (boolean value) +#image_upload_use_internal_tenant = false + +# Enable the image volume cache for this backend. (boolean value) +#image_volume_cache_enabled = false + +# Max size of the image volume cache for this backend in GB. 0 => unlimited. +# (integer value) +#image_volume_cache_max_size_gb = 0 + +# Max number of entries allowed in the image volume cache. 0 => unlimited. +# (integer value) +#image_volume_cache_max_count = 0 + +# The maximum number of times to rescan iSER targetto find volume (integer +# value) +#num_iser_scan_tries = 3 + +# Prefix for iSER volumes (string value) +#iser_target_prefix = iqn.2010-10.org.openstack: + +# The IP address that the iSER daemon is listening on (string value) +#iser_ip_address = $my_ip + +# The port that the iSER daemon is listening on (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#iser_port = 3260 + +# The name of the iSER target user-land tool to use (string value) +#iser_helper = tgtadm + +# Public url to use for versions endpoint. The default is None, which will use +# the request's host_url attribute to populate the URL base. If Cinder is +# operating behind a proxy, you will want to change this to represent the +# proxy's URL. (string value) +#public_endpoint = <None> + +# Nimble Controller pool name (string value) +#nimble_pool_name = default + +# Nimble Subnet Label (string value) +#nimble_subnet_label = * + +# Path to store VHD backed volumes (string value) +#windows_iscsi_lun_path = C:\iSCSIVirtualDisks + +# Pool or Vdisk name to use for volume creation. (string value) +#hpmsa_backend_name = A + +# linear (for Vdisk) or virtual (for Pool). (string value) +# Allowed values: linear, virtual +#hpmsa_backend_type = virtual + +# HPMSA API interface protocol. (string value) +# Allowed values: http, https +#hpmsa_api_protocol = https + +# Whether to verify HPMSA array SSL certificate. (boolean value) +#hpmsa_verify_certificate = false + +# HPMSA array SSL certificate path. (string value) +#hpmsa_verify_certificate_path = <None> + +# List of comma-separated target iSCSI IP addresses. (list value) +#hpmsa_iscsi_ips = + +# A list of url schemes that can be downloaded directly via the direct_url. +# Currently supported schemes: [file]. (list value) +#allowed_direct_url_schemes = + +# Default core properties of image (list value) +#glance_core_properties = checksum,container_format,disk_format,image_name,image_id,min_disk,min_ram,name,size + +# Name for the VG that will contain exported volumes (string value) +#volume_group = cinder-volumes + +# If >0, create LVs with multiple mirrors. Note that this requires lvm_mirrors +# + 2 PVs with available space (integer value) +#lvm_mirrors = 0 + +# Type of LVM volumes to deploy; (default, thin, or auto). Auto defaults to +# thin if thin is supported. (string value) +# Allowed values: default, thin, auto +#lvm_type = default + +# LVM conf file to use for the LVM driver in Cinder; this setting is ignored if +# the specified file does not exist (You can also specify 'None' to not use a +# conf file even if one exists). (string value) +#lvm_conf_file = /etc/cinder/lvm.conf + +# use this file for cinder emc plugin config data (string value) +#cinder_emc_config_file = /etc/cinder/cinder_emc_config.xml + +# IP address or Hostname of NAS system. (string value) +#nas_ip = + +# User name to connect to NAS system. (string value) +#nas_login = admin + +# Password to connect to NAS system. (string value) +#nas_password = + +# SSH port to use to connect to NAS system. (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#nas_ssh_port = 22 + +# Filename of private key to use for SSH authentication. (string value) +#nas_private_key = + +# Allow network-attached storage systems to operate in a secure environment +# where root level access is not permitted. If set to False, access is as the +# root user and insecure. If set to True, access is not as root. If set to +# auto, a check is done to determine if this is a new installation: True is +# used if so, otherwise False. Default is auto. (string value) +#nas_secure_file_operations = auto + +# Set more secure file permissions on network-attached storage volume files to +# restrict broad other/world access. If set to False, volumes are created with +# open permissions. If set to True, volumes are created with permissions for +# the cinder user and group (660). If set to auto, a check is done to determine +# if this is a new installation: True is used if so, otherwise False. Default +# is auto. (string value) +#nas_secure_file_permissions = auto + +# Path to the share to use for storing Cinder volumes. For example: +# "/srv/export1" for an NFS server export available at 10.0.5.10:/srv/export1 . +# (string value) +#nas_share_path = + +# Options used to mount the storage backend file system where Cinder volumes +# are stored. (string value) +#nas_mount_options = <None> + +# Provisioning type that will be used when creating volumes. (string value) +# Allowed values: thin, thick +# Deprecated group/name - [DEFAULT]/glusterfs_sparsed_volumes +# Deprecated group/name - [DEFAULT]/glusterfs_qcow2_volumes +#nas_volume_prov_type = thin + +# IP address or hostname of mg-a (string value) +#gateway_mga = <None> + +# IP address or hostname of mg-b (string value) +#gateway_mgb = <None> + +# Use igroups to manage targets and initiators (boolean value) +#use_igroups = false + +# Global backend request timeout, in seconds (integer value) +#request_timeout = 300 + +# Comma-separated list of REST servers IP to connect to. (eg +# http://IP1/,http://IP2:81/path (string value) +#srb_base_urls = <None> + +# XMS cluster id in multi-cluster environment (string value) +#xtremio_cluster_name = + +# Number of retries in case array is busy (integer value) +#xtremio_array_busy_retry_count = 5 + +# Interval between retries in case array is busy (integer value) +#xtremio_array_busy_retry_interval = 5 + +# Serial number of storage system (string value) +#hitachi_serial_number = <None> + +# Name of an array unit (string value) +#hitachi_unit_name = <None> + +# Pool ID of storage system (integer value) +#hitachi_pool_id = <None> + +# Thin pool ID of storage system (integer value) +#hitachi_thin_pool_id = <None> + +# Range of logical device of storage system (string value) +#hitachi_ldev_range = <None> + +# Default copy method of storage system (string value) +#hitachi_default_copy_method = FULL + +# Copy speed of storage system (integer value) +#hitachi_copy_speed = 3 + +# Interval to check copy (integer value) +#hitachi_copy_check_interval = 3 + +# Interval to check copy asynchronously (integer value) +#hitachi_async_copy_check_interval = 10 + +# Control port names for HostGroup or iSCSI Target (string value) +#hitachi_target_ports = <None> + +# Range of group number (string value) +#hitachi_group_range = <None> + +# Request for creating HostGroup or iSCSI Target (boolean value) +#hitachi_group_request = false + +# Infortrend raid pool name list. It is separated with comma. (string value) +#infortrend_pools_name = + +# The Infortrend CLI absolute path. By default, it is at +# /opt/bin/Infortrend/raidcmd_ESDS10.jar (string value) +#infortrend_cli_path = /opt/bin/Infortrend/raidcmd_ESDS10.jar + +# Maximum retry time for cli. Default is 5. (integer value) +#infortrend_cli_max_retries = 5 + +# Default timeout for CLI copy operations in minutes. Support: migrate volume, +# create cloned volume and create volume from snapshot. By Default, it is 30 +# minutes. (integer value) +#infortrend_cli_timeout = 30 + +# Infortrend raid channel ID list on Slot A for OpenStack usage. It is +# separated with comma. By default, it is the channel 0~7. (string value) +#infortrend_slots_a_channels_id = 0,1,2,3,4,5,6,7 + +# Infortrend raid channel ID list on Slot B for OpenStack usage. It is +# separated with comma. By default, it is the channel 0~7. (string value) +#infortrend_slots_b_channels_id = 0,1,2,3,4,5,6,7 + +# Let the volume use specific provisioning. By default, it is the full +# provisioning. The supported options are full or thin. (string value) +#infortrend_provisioning = full + +# Let the volume use specific tiering level. By default, it is the level 0. The +# supported levels are 0,2,3,4. (string value) +#infortrend_tiering = 0 + +# Configuration file for HDS iSCSI cinder plugin (string value) +#hds_hnas_iscsi_config_file = /opt/hds/hnas/cinder_iscsi_conf.xml + +# The name of ceph cluster (string value) +#rbd_cluster_name = ceph + +# The RADOS pool where rbd volumes are stored (string value) +#rbd_pool = rbd + +# The RADOS client name for accessing rbd volumes - only set when using cephx +# authentication (string value) +#rbd_user = <None> + +# Path to the ceph configuration file (string value) +#rbd_ceph_conf = + +# Flatten volumes created from snapshots to remove dependency from volume to +# snapshot (boolean value) +#rbd_flatten_volume_from_snapshot = false + +# The libvirt uuid of the secret for the rbd_user volumes (string value) +#rbd_secret_uuid = <None> + +# Directory where temporary image files are stored when the volume driver does +# not write them directly to the volume. Warning: this option is now +# deprecated, please use image_conversion_dir instead. (string value) +#volume_tmp_dir = <None> + +# Maximum number of nested volume clones that are taken before a flatten +# occurs. Set to 0 to disable cloning. (integer value) +#rbd_max_clone_depth = 5 + +# Volumes will be chunked into objects of this size (in megabytes). (integer +# value) +#rbd_store_chunk_size = 4 + +# Timeout value (in seconds) used when connecting to ceph cluster. If value < +# 0, no timeout is set and default librados value is used. (integer value) +#rados_connect_timeout = -1 + +# Number of retries if connection to ceph cluster failed. (integer value) +#rados_connection_retries = 3 + +# Interval value (in seconds) between connection retries to ceph cluster. +# (integer value) +#rados_connection_interval = 5 + +# The hostname (or IP address) for the storage system (string value) +#tintri_server_hostname = <None> + +# User name for the storage system (string value) +#tintri_server_username = <None> + +# Password for the storage system (string value) +#tintri_server_password = <None> + +# API version for the storage system (string value) +#tintri_api_version = v310 + +# Instance numbers for HORCM (string value) +#hitachi_horcm_numbers = 200,201 + +# Username of storage system for HORCM (string value) +#hitachi_horcm_user = <None> + +# Password of storage system for HORCM (string value) +#hitachi_horcm_password = <None> + +# Add to HORCM configuration (boolean value) +#hitachi_horcm_add_conf = true + +# Timeout until a resource lock is released, in seconds. The value must be +# between 0 and 7200. (integer value) +#hitachi_horcm_resource_lock_timeout = 600 + +# HP LeftHand WSAPI Server Url like https://<LeftHand ip>:8081/lhos (string +# value) +#hplefthand_api_url = <None> + +# HP LeftHand Super user username (string value) +#hplefthand_username = <None> + +# HP LeftHand Super user password (string value) +#hplefthand_password = <None> + +# HP LeftHand cluster name (string value) +#hplefthand_clustername = <None> + +# Configure CHAP authentication for iSCSI connections (Default: Disabled) +# (boolean value) +#hplefthand_iscsi_chap_enabled = false + +# Enable HTTP debugging to LeftHand (boolean value) +#hplefthand_debug = false + +# Administrative user account name used to access the storage system or proxy +# server. (string value) +#netapp_login = <None> + +# Password for the administrative user account specified in the netapp_login +# option. (string value) +#netapp_password = <None> + +# The hostname (or IP address) for the storage system or proxy server. (string +# value) +#netapp_server_hostname = <None> + +# The TCP port to use for communication with the storage system or proxy +# server. If not specified, Data ONTAP drivers will use 80 for HTTP and 443 for +# HTTPS; E-Series will use 8080 for HTTP and 8443 for HTTPS. (integer value) +#netapp_server_port = <None> + +# This option is used to specify the path to the E-Series proxy application on +# a proxy server. The value is combined with the value of the +# netapp_transport_type, netapp_server_hostname, and netapp_server_port options +# to create the URL used by the driver to connect to the proxy application. +# (string value) +#netapp_webservice_path = /devmgr/v2 + +# This option is only utilized when the storage family is configured to +# eseries. This option is used to restrict provisioning to the specified +# controllers. Specify the value of this option to be a comma separated list of +# controller hostnames or IP addresses to be used for provisioning. (string +# value) +#netapp_controller_ips = <None> + +# Password for the NetApp E-Series storage array. (string value) +#netapp_sa_password = <None> + +# This option specifies whether the driver should allow operations that require +# multiple attachments to a volume. An example would be live migration of +# servers that have volumes attached. When enabled, this backend is limited to +# 256 total volumes in order to guarantee volumes can be accessed by more than +# one host. (boolean value) +#netapp_enable_multiattach = false + +# The transport protocol used when communicating with the storage system or +# proxy server. (string value) +# Allowed values: http, https +#netapp_transport_type = http + +# This option defines the type of operating system that will access a LUN +# exported from Data ONTAP; it is assigned to the LUN at the time it is +# created. (string value) +#netapp_lun_ostype = <None> + +# This option defines the type of operating system for all initiators that can +# access a LUN. This information is used when mapping LUNs to individual hosts +# or groups of hosts. (string value) +# Deprecated group/name - [DEFAULT]/netapp_eseries_host_type +#netapp_host_type = <None> + +# This option is used to restrict provisioning to the specified pools. Specify +# the value of this option to be a regular expression which will be applied to +# the names of objects from the storage backend which represent pools in +# Cinder. This option is only utilized when the storage protocol is configured +# to use iSCSI or FC. (string value) +# Deprecated group/name - [DEFAULT]/netapp_volume_list +# Deprecated group/name - [DEFAULT]/netapp_storage_pools +#netapp_pool_name_search_pattern = (.+) + +# Request for FC Zone creating HostGroup (boolean value) +#hitachi_zoning_request = false + +# Number of volumes allowed per project (integer value) +#quota_volumes = 10 + +# Number of volume snapshots allowed per project (integer value) +#quota_snapshots = 10 + +# Number of consistencygroups allowed per project (integer value) +#quota_consistencygroups = 10 + +# Total amount of storage, in gigabytes, allowed for volumes and snapshots per +# project (integer value) +#quota_gigabytes = 1000 + +# Number of volume backups allowed per project (integer value) +#quota_backups = 10 + +# Total amount of storage, in gigabytes, allowed for backups per project +# (integer value) +#quota_backup_gigabytes = 1000 + +# Number of seconds until a reservation expires (integer value) +#reservation_expire = 86400 + +# Count of reservations until usage is refreshed (integer value) +#until_refresh = 0 + +# Number of seconds between subsequent usage refreshes (integer value) +#max_age = 0 + +# Default driver to use for quota checks (string value) +#quota_driver = cinder.quota.DbQuotaDriver + +# Enables or disables use of default quota class with default quota. (boolean +# value) +#use_default_quota_class = true + +# Max size allowed per volume, in gigabytes (integer value) +#per_volume_size_limit = -1 + +# The configuration file for the Cinder Huawei driver. (string value) +#cinder_huawei_conf_file = /etc/cinder/cinder_huawei_conf.xml + +# Storage Center System Serial Number (integer value) +#dell_sc_ssn = 64702 + +# Dell API port (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#dell_sc_api_port = 3033 + +# Name of the server folder to use on the Storage Center (string value) +#dell_sc_server_folder = openstack + +# Name of the volume folder to use on the Storage Center (string value) +#dell_sc_volume_folder = openstack + +# Enable HTTPS SC certificate verification. (boolean value) +#dell_sc_verify_cert = false + +# Which filter class names to use for filtering hosts when not specified in the +# request. (list value) +#scheduler_default_filters = AvailabilityZoneFilter,CapacityFilter,CapabilitiesFilter + +# Which weigher class names to use for weighing hosts. (list value) +#scheduler_default_weighers = CapacityWeigher + +# Base dir containing mount point for NFS share. (string value) +#backup_mount_point_base = $state_path/backup_mount + +# NFS share in hostname:path, ipv4addr:path, or "[ipv6addr]:path" format. +# (string value) +#backup_share = <None> + +# Mount options passed to the NFS client. See NFS man page for details. (string +# value) +#backup_mount_options = <None> + +# IP address/hostname of Blockbridge API. (string value) +#blockbridge_api_host = <None> + +# Override HTTPS port to connect to Blockbridge API server. (integer value) +#blockbridge_api_port = <None> + +# Blockbridge API authentication scheme (token or password) (string value) +# Allowed values: token, password +#blockbridge_auth_scheme = token + +# Blockbridge API token (for auth scheme 'token') (string value) +#blockbridge_auth_token = <None> + +# Blockbridge API user (for auth scheme 'password') (string value) +#blockbridge_auth_user = <None> + +# Blockbridge API password (for auth scheme 'password') (string value) +#blockbridge_auth_password = <None> + +# Defines the set of exposed pools and their associated backend query strings +# (dict value) +#blockbridge_pools = OpenStack:+openstack + +# Default pool name if unspecified. (string value) +#blockbridge_default_pool = <None> + +# Data path IP address (string value) +#zfssa_data_ip = <None> + +# HTTPS port number (string value) +#zfssa_https_port = 443 + +# Options to be passed while mounting share over nfs (string value) +#zfssa_nfs_mount_options = + +# Storage pool name. (string value) +#zfssa_nfs_pool = + +# Project name. (string value) +#zfssa_nfs_project = NFSProject + +# Share name. (string value) +#zfssa_nfs_share = nfs_share + +# Data compression. (string value) +# Allowed values: off, lzjb, gzip-2, gzip, gzip-9 +#zfssa_nfs_share_compression = off + +# Synchronous write bias-latency, throughput. (string value) +# Allowed values: latency, throughput +#zfssa_nfs_share_logbias = latency + +# REST connection timeout. (seconds) (integer value) +#zfssa_rest_timeout = <None> + +# Flag to enable local caching: True, False. (boolean value) +#zfssa_enable_local_cache = true + +# Name of directory inside zfssa_nfs_share where cache volumes are stored. +# (string value) +#zfssa_cache_directory = os-cinder-cache + +# Space network name to use for data transfer (string value) +#hgst_net = Net 1 (IPv4) + +# Comma separated list of Space storage servers:devices. ex: +# os1_stor:gbd0,os2_stor:gbd0 (string value) +#hgst_storage_servers = os:gbd0 + +# Should spaces be redundantly stored (1/0) (string value) +#hgst_redundancy = 0 + +# User to own created spaces (string value) +#hgst_space_user = root + +# Group to own created spaces (string value) +#hgst_space_group = disk + +# UNIX mode for created spaces (string value) +#hgst_space_mode = 0600 + +# Directory used for temporary storage during image conversion (string value) +#image_conversion_dir = $state_path/conversion + +# Match this value when searching for nova in the service catalog. Format is: +# separated values of the form: <service_type>:<service_name>:<endpoint_type> +# (string value) +#nova_catalog_info = compute:Compute Service:publicURL +nova_catalog_info = compute:nova:publicURL + +# Same as nova_catalog_info, but for admin endpoint. (string value) +#nova_catalog_admin_info = compute:Compute Service:adminURL +nova_catalog_admin_info = compute:nova:adminURL + +# Override service catalog lookup with template for nova endpoint e.g. +# http://localhost:8774/v2/%(project_id)s (string value) +#nova_endpoint_template = <None> + +# Same as nova_endpoint_template, but for admin endpoint. (string value) +#nova_endpoint_admin_template = <None> + +# Region name of this node (string value) +#os_region_name = <None> + +# Location of ca certificates file to use for nova client requests. (string +# value) +#nova_ca_certificates_file = <None> + +# Allow to perform insecure SSL requests to nova (boolean value) +#nova_api_insecure = false + +# Connect with multipath (FC only).(Default is false.) (boolean value) +#flashsystem_multipath_enabled = false + +# DPL pool uuid in which DPL volumes are stored. (string value) +#dpl_pool = + +# DPL port number. (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#dpl_port = 8357 + +# Add CHAP user (boolean value) +#hitachi_add_chap_user = false + +# iSCSI authentication method (string value) +#hitachi_auth_method = <None> + +# iSCSI authentication username (string value) +#hitachi_auth_user = HBSD-CHAP-user + +# iSCSI authentication password (string value) +#hitachi_auth_password = HBSD-CHAP-password + +# Driver to use for volume creation (string value) +#volume_driver = cinder.volume.drivers.lvm.LVMVolumeDriver + +# Timeout for creating the volume to migrate to when performing volume +# migration (seconds) (integer value) +#migration_create_volume_timeout_secs = 300 + +# Offload pending volume delete during volume service startup (boolean value) +#volume_service_inithost_offload = false + +# FC Zoning mode configured (string value) +#zoning_mode = none + +# User defined capabilities, a JSON formatted string specifying key/value +# pairs. The key/value pairs can be used by the CapabilitiesFilter to select +# between backends when requests specify volume types. For example, specifying +# a service level or the geographical location of a backend, then creating a +# volume type to allow the user to select by these different properties. +# (string value) +#extra_capabilities = {} + +# Default iSCSI Port ID of FlashSystem. (Default port is 0.) (integer value) +#flashsystem_iscsi_portid = 0 + +# Connection protocol should be FC. (Default is FC.) (string value) +#flashsystem_connection_protocol = FC + +# Allows vdisk to multi host mapping. (Default is True) (boolean value) +#flashsystem_multihostmap_enabled = true + +# 3PAR WSAPI Server Url like https://<3par ip>:8080/api/v1 (string value) +#hp3par_api_url = + +# 3PAR username with the 'edit' role (string value) +#hp3par_username = + +# 3PAR password for the user specified in hp3par_username (string value) +#hp3par_password = + +# List of the CPG(s) to use for volume creation (list value) +#hp3par_cpg = OpenStack + +# The CPG to use for Snapshots for volumes. If empty the userCPG will be used. +# (string value) +#hp3par_cpg_snap = + +# The time in hours to retain a snapshot. You can't delete it before this +# expires. (string value) +#hp3par_snapshot_retention = + +# The time in hours when a snapshot expires and is deleted. This must be +# larger than expiration (string value) +#hp3par_snapshot_expiration = + +# Enable HTTP debugging to 3PAR (boolean value) +#hp3par_debug = false + +# List of target iSCSI addresses to use. (list value) +#hp3par_iscsi_ips = + +# Enable CHAP authentication for iSCSI connections. (boolean value) +#hp3par_iscsi_chap_enabled = false + +# Proxy driver that connects to the IBM Storage Array (string value) +#xiv_ds8k_proxy = xiv_ds8k_openstack.nova_proxy.XIVDS8KNovaProxy + +# Connection type to the IBM Storage Array (string value) +# Allowed values: fibre_channel, iscsi +#xiv_ds8k_connection_type = iscsi + +# CHAP authentication mode, effective only for iscsi (disabled|enabled) (string +# value) +# Allowed values: disabled, enabled +#xiv_chap = disabled + +# List of Management IP addresses (separated by commas) (string value) +#management_ips = + +# DEPRECATED: This will be removed in the Liberty release. Use san_login and +# san_password instead. This directly sets the Datera API token. (string value) +#datera_api_token = <None> + +# Datera API port. (string value) +#datera_api_port = 7717 + +# Datera API version. (string value) +#datera_api_version = 1 + +# Number of replicas to create of an inode. (string value) +#datera_num_replicas = 3 + +# List of all available devices (list value) +#available_devices = + +# URL to the Quobyte volume e.g., quobyte://<DIR host>/<volume name> (string +# value) +#quobyte_volume_url = <None> + +# Path to a Quobyte Client configuration file. (string value) +#quobyte_client_cfg = <None> + +# Create volumes as sparse files which take no space. If set to False, volume +# is created as regular file.In such case volume creation takes a lot of time. +# (boolean value) +#quobyte_sparsed_volumes = true + +# Create volumes as QCOW2 files rather than raw files. (boolean value) +#quobyte_qcow2_volumes = true + +# Base dir containing the mount point for the Quobyte volume. (string value) +#quobyte_mount_point_base = $state_path/mnt + +# File with the list of available vzstorage shares. (string value) +#vzstorage_shares_config = /etc/cinder/vzstorage_shares + +# Create volumes as sparsed files which take no space rather than regular files +# when using raw format, in which case volume creation takes lot of time. +# (boolean value) +#vzstorage_sparsed_volumes = true + +# Percent of ACTUAL usage of the underlying volume before no new volumes can be +# allocated to the volume destination. (floating point value) +#vzstorage_used_ratio = 0.95 + +# Base dir containing mount points for vzstorage shares. (string value) +#vzstorage_mount_point_base = $state_path/mnt + +# Mount options passed to the vzstorage client. See section of the pstorage- +# mount man page for details. (list value) +#vzstorage_mount_options = <None> + +# File with the list of available nfs shares (string value) +#nfs_shares_config = /etc/cinder/nfs_shares + +# Create volumes as sparsed files which take no space.If set to False volume is +# created as regular file.In such case volume creation takes a lot of time. +# (boolean value) +#nfs_sparsed_volumes = true + +# Percent of ACTUAL usage of the underlying volume before no new volumes can be +# allocated to the volume destination. Note that this option is deprecated in +# favor of "reserved_percentage" and will be removed in the Mitaka release. +# (floating point value) +#nfs_used_ratio = 0.95 + +# This will compare the allocated to available space on the volume destination. +# If the ratio exceeds this number, the destination will no longer be valid. +# Note that this option is deprecated in favor of "max_oversubscription_ratio" +# and will be removed in the Mitaka release. (floating point value) +#nfs_oversub_ratio = 1.0 + +# Base dir containing mount points for nfs shares. (string value) +#nfs_mount_point_base = $state_path/mnt + +# Mount options passed to the nfs client. See section of the nfs man page for +# details. (string value) +#nfs_mount_options = <None> + +# The number of attempts to mount nfs shares before raising an error. At least +# one attempt will be made to mount an nfs share, regardless of the value +# specified. (integer value) +#nfs_mount_attempts = 3 + +# +# From oslo.log +# + +# Print debugging output (set logging level to DEBUG instead of default INFO +# level). (boolean value) +#debug = false +debug = True + +# If set to false, will disable INFO logging level, making WARNING the default. +# (boolean value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#verbose = true +verbose = True + +# The name of a logging configuration file. This file is appended to any +# existing logging configuration files. For details about logging configuration +# files, see the Python logging module documentation. (string value) +# Deprecated group/name - [DEFAULT]/log_config +#log_config_append = <None> + +# DEPRECATED. A logging.Formatter log message format string which may use any +# of the available logging.LogRecord attributes. This option is deprecated. +# Please use logging_context_format_string and logging_default_format_string +# instead. (string value) +#log_format = <None> + +# Format string for %%(asctime)s in log records. Default: %(default)s . (string +# value) +#log_date_format = %Y-%m-%d %H:%M:%S + +# (Optional) Name of log file to output to. If no default is set, logging will +# go to stdout. (string value) +# Deprecated group/name - [DEFAULT]/logfile +#log_file = <None> + +# (Optional) The base directory used for relative --log-file paths. (string +# value) +# Deprecated group/name - [DEFAULT]/logdir +#log_dir = <None> +log_dir = /var/log/cinder + +# Use syslog for logging. Existing syslog format is DEPRECATED and will be +# changed later to honor RFC5424. (boolean value) +#use_syslog = false + +# (Optional) Enables or disables syslog rfc5424 format for logging. If enabled, +# prefixes the MSG part of the syslog message with APP-NAME (RFC5424). The +# format without the APP-NAME is deprecated in Kilo, and will be removed in +# Mitaka, along with this option. (boolean value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#use_syslog_rfc_format = true + +# Syslog facility to receive log lines. (string value) +#syslog_log_facility = LOG_USER + +# Log output to standard error. (boolean value) +#use_stderr = true + +# Format string to use for log messages with context. (string value) +#logging_context_format_string = %(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [%(request_id)s %(user_identity)s] %(instance)s%(message)s + +# Format string to use for log messages without context. (string value) +#logging_default_format_string = %(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [-] %(instance)s%(message)s + +# Data to append to log format when level is DEBUG. (string value) +#logging_debug_format_suffix = %(funcName)s %(pathname)s:%(lineno)d + +# Prefix each line of exception output with this format. (string value) +#logging_exception_prefix = %(asctime)s.%(msecs)03d %(process)d ERROR %(name)s %(instance)s + +# List of logger=LEVEL pairs. (list value) +#default_log_levels = amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN + +# Enables or disables publication of error events. (boolean value) +#publish_errors = false + +# The format for an instance that is passed with the log message. (string +# value) +#instance_format = "[instance: %(uuid)s] " + +# The format for an instance UUID that is passed with the log message. (string +# value) +#instance_uuid_format = "[instance: %(uuid)s] " + +# Enables or disables fatal status of deprecations. (boolean value) +#fatal_deprecations = false + +# +# From oslo.messaging +# + +# Size of RPC connection pool. (integer value) +# Deprecated group/name - [DEFAULT]/rpc_conn_pool_size +#rpc_conn_pool_size = 30 + +# ZeroMQ bind address. Should be a wildcard (*), an ethernet interface, or IP. +# The "host" option should point or resolve to this address. (string value) +#rpc_zmq_bind_address = * + +# MatchMaker driver. (string value) +#rpc_zmq_matchmaker = local + +# ZeroMQ receiver listening port. (integer value) +#rpc_zmq_port = 9501 + +# Number of ZeroMQ contexts, defaults to 1. (integer value) +#rpc_zmq_contexts = 1 + +# Maximum number of ingress messages to locally buffer per topic. Default is +# unlimited. (integer value) +#rpc_zmq_topic_backlog = <None> + +# Directory for holding IPC sockets. (string value) +#rpc_zmq_ipc_dir = /var/run/openstack + +# Name of this node. Must be a valid hostname, FQDN, or IP address. Must match +# "host" option, if running Nova. (string value) +#rpc_zmq_host = localhost + +# Seconds to wait before a cast expires (TTL). Only supported by impl_zmq. +# (integer value) +#rpc_cast_timeout = 30 + +# Heartbeat frequency. (integer value) +#matchmaker_heartbeat_freq = 300 + +# Heartbeat time-to-live. (integer value) +#matchmaker_heartbeat_ttl = 600 + +# Size of executor thread pool. (integer value) +# Deprecated group/name - [DEFAULT]/rpc_thread_pool_size +#executor_thread_pool_size = 64 + +# The Drivers(s) to handle sending notifications. Possible values are +# messaging, messagingv2, routing, log, test, noop (multi valued) +#notification_driver = +notification_driver =messagingv2 + +# AMQP topic used for OpenStack notifications. (list value) +# Deprecated group/name - [rpc_notifier2]/topics +#notification_topics = notifications + +# Seconds to wait for a response from a call. (integer value) +#rpc_response_timeout = 60 + +# A URL representing the messaging driver to use and its full configuration. If +# not set, we fall back to the rpc_backend option and driver specific +# configuration. (string value) +#transport_url = <None> + +# The messaging driver to use, defaults to rabbit. Other drivers include qpid +# and zmq. (string value) +#rpc_backend = rabbit +rpc_backend = rabbit + +# The default exchange under which topics are scoped. May be overridden by an +# exchange name specified in the transport_url option. (string value) +#control_exchange = openstack +control_exchange = openstack + +# +# From oslo.messaging +# + +# Size of RPC connection pool. (integer value) +# Deprecated group/name - [DEFAULT]/rpc_conn_pool_size +#rpc_conn_pool_size = 30 + +# ZeroMQ bind address. Should be a wildcard (*), an ethernet interface, or IP. +# The "host" option should point or resolve to this address. (string value) +#rpc_zmq_bind_address = * + +# MatchMaker driver. (string value) +#rpc_zmq_matchmaker = local + +# ZeroMQ receiver listening port. (integer value) +#rpc_zmq_port = 9501 + +# Number of ZeroMQ contexts, defaults to 1. (integer value) +#rpc_zmq_contexts = 1 + +# Maximum number of ingress messages to locally buffer per topic. Default is +# unlimited. (integer value) +#rpc_zmq_topic_backlog = <None> + +# Directory for holding IPC sockets. (string value) +#rpc_zmq_ipc_dir = /var/run/openstack + +# Name of this node. Must be a valid hostname, FQDN, or IP address. Must match +# "host" option, if running Nova. (string value) +#rpc_zmq_host = localhost + +# Seconds to wait before a cast expires (TTL). Only supported by impl_zmq. +# (integer value) +#rpc_cast_timeout = 30 + +# Heartbeat frequency. (integer value) +#matchmaker_heartbeat_freq = 300 + +# Heartbeat time-to-live. (integer value) +#matchmaker_heartbeat_ttl = 600 + +# Size of executor thread pool. (integer value) +# Deprecated group/name - [DEFAULT]/rpc_thread_pool_size +#executor_thread_pool_size = 64 + +# The Drivers(s) to handle sending notifications. Possible values are +# messaging, messagingv2, routing, log, test, noop (multi valued) +#notification_driver = + +# AMQP topic used for OpenStack notifications. (list value) +# Deprecated group/name - [rpc_notifier2]/topics +#notification_topics = notifications + +# Seconds to wait for a response from a call. (integer value) +#rpc_response_timeout = 60 + +# A URL representing the messaging driver to use and its full configuration. If +# not set, we fall back to the rpc_backend option and driver specific +# configuration. (string value) +#transport_url = <None> + +# The messaging driver to use, defaults to rabbit. Other drivers include qpid +# and zmq. (string value) +#rpc_backend = rabbit + +# The default exchange under which topics are scoped. May be overridden by an +# exchange name specified in the transport_url option. (string value) +#control_exchange = openstack +api_paste_config=/etc/cinder/api-paste.ini + + +[BRCD_FABRIC_EXAMPLE] + +# +# From cinder +# + +# Management IP of fabric (string value) +#fc_fabric_address = + +# Fabric user ID (string value) +#fc_fabric_user = + +# Password for user (string value) +#fc_fabric_password = + +# Connecting port (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#fc_fabric_port = 22 + +# overridden zoning policy (string value) +#zoning_policy = initiator-target + +# overridden zoning activation state (boolean value) +#zone_activate = true + +# overridden zone name prefix (string value) +#zone_name_prefix = <None> + +# Principal switch WWN of the fabric (string value) +#principal_switch_wwn = <None> + + +[CISCO_FABRIC_EXAMPLE] + +# +# From cinder +# + +# Management IP of fabric (string value) +#cisco_fc_fabric_address = + +# Fabric user ID (string value) +#cisco_fc_fabric_user = + +# Password for user (string value) +#cisco_fc_fabric_password = + +# Connecting port (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#cisco_fc_fabric_port = 22 + +# overridden zoning policy (string value) +#cisco_zoning_policy = initiator-target + +# overridden zoning activation state (boolean value) +#cisco_zone_activate = true + +# overridden zone name prefix (string value) +#cisco_zone_name_prefix = <None> + +# VSAN of the Fabric (string value) +#cisco_zoning_vsan = <None> + + +[cors] + +# +# From oslo.middleware +# + +# Indicate whether this resource may be shared with the domain received in the +# requests "origin" header. (string value) +#allowed_origin = <None> + +# Indicate that the actual request can include user credentials (boolean value) +#allow_credentials = true + +# Indicate which headers are safe to expose to the API. Defaults to HTTP Simple +# Headers. (list value) +#expose_headers = Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma + +# Maximum cache age of CORS preflight requests. (integer value) +#max_age = 3600 + +# Indicate which methods can be used during the actual request. (list value) +#allow_methods = GET,POST,PUT,DELETE,OPTIONS + +# Indicate which header field names may be used during the actual request. +# (list value) +#allow_headers = Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma + + +[cors.subdomain] + +# +# From oslo.middleware +# + +# Indicate whether this resource may be shared with the domain received in the +# requests "origin" header. (string value) +#allowed_origin = <None> + +# Indicate that the actual request can include user credentials (boolean value) +#allow_credentials = true + +# Indicate which headers are safe to expose to the API. Defaults to HTTP Simple +# Headers. (list value) +#expose_headers = Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma + +# Maximum cache age of CORS preflight requests. (integer value) +#max_age = 3600 + +# Indicate which methods can be used during the actual request. (list value) +#allow_methods = GET,POST,PUT,DELETE,OPTIONS + +# Indicate which header field names may be used during the actual request. +# (list value) +#allow_headers = Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma + + +[database] + +# +# From oslo.db +# + +# The file name to use with SQLite. (string value) +# Deprecated group/name - [DEFAULT]/sqlite_db +#sqlite_db = oslo.sqlite + +# If True, SQLite uses synchronous mode. (boolean value) +# Deprecated group/name - [DEFAULT]/sqlite_synchronous +#sqlite_synchronous = true + +# The back end to use for the database. (string value) +# Deprecated group/name - [DEFAULT]/db_backend +#backend = sqlalchemy + +# The SQLAlchemy connection string to use to connect to the database. (string +# value) +# Deprecated group/name - [DEFAULT]/sql_connection +# Deprecated group/name - [DATABASE]/sql_connection +# Deprecated group/name - [sql]/connection +#connection = <None> +connection = mysql+pymysql://cinder:qum5net@VARINET4ADDR/cinder + +# The SQLAlchemy connection string to use to connect to the slave database. +# (string value) +#slave_connection = <None> + +# The SQL mode to be used for MySQL sessions. This option, including the +# default, overrides any server-set SQL mode. To use whatever SQL mode is set +# by the server configuration, set this to no value. Example: mysql_sql_mode= +# (string value) +#mysql_sql_mode = TRADITIONAL + +# Timeout before idle SQL connections are reaped. (integer value) +# Deprecated group/name - [DEFAULT]/sql_idle_timeout +# Deprecated group/name - [DATABASE]/sql_idle_timeout +# Deprecated group/name - [sql]/idle_timeout +#idle_timeout = 3600 + +# Minimum number of SQL connections to keep open in a pool. (integer value) +# Deprecated group/name - [DEFAULT]/sql_min_pool_size +# Deprecated group/name - [DATABASE]/sql_min_pool_size +#min_pool_size = 1 + +# Maximum number of SQL connections to keep open in a pool. (integer value) +# Deprecated group/name - [DEFAULT]/sql_max_pool_size +# Deprecated group/name - [DATABASE]/sql_max_pool_size +#max_pool_size = <None> + +# Maximum number of database connection retries during startup. Set to -1 to +# specify an infinite retry count. (integer value) +# Deprecated group/name - [DEFAULT]/sql_max_retries +# Deprecated group/name - [DATABASE]/sql_max_retries +#max_retries = 10 + +# Interval between retries of opening a SQL connection. (integer value) +# Deprecated group/name - [DEFAULT]/sql_retry_interval +# Deprecated group/name - [DATABASE]/reconnect_interval +#retry_interval = 10 + +# If set, use this value for max_overflow with SQLAlchemy. (integer value) +# Deprecated group/name - [DEFAULT]/sql_max_overflow +# Deprecated group/name - [DATABASE]/sqlalchemy_max_overflow +#max_overflow = <None> + +# Verbosity of SQL debugging information: 0=None, 100=Everything. (integer +# value) +# Deprecated group/name - [DEFAULT]/sql_connection_debug +#connection_debug = 0 + +# Add Python stack traces to SQL as comment strings. (boolean value) +# Deprecated group/name - [DEFAULT]/sql_connection_trace +#connection_trace = false + +# If set, use this value for pool_timeout with SQLAlchemy. (integer value) +# Deprecated group/name - [DATABASE]/sqlalchemy_pool_timeout +#pool_timeout = <None> + +# Enable the experimental use of database reconnect on connection lost. +# (boolean value) +#use_db_reconnect = false + +# Seconds between retries of a database transaction. (integer value) +#db_retry_interval = 1 + +# If True, increases the interval between retries of a database operation up to +# db_max_retry_interval. (boolean value) +#db_inc_retry_interval = true + +# If db_inc_retry_interval is set, the maximum seconds between retries of a +# database operation. (integer value) +#db_max_retry_interval = 10 + +# Maximum retries in case of connection error or deadlock error before error is +# raised. Set to -1 to specify an infinite retry count. (integer value) +#db_max_retries = 20 + + +[fc-zone-manager] + +# +# From cinder +# + +# FC Zone Driver responsible for zone management (string value) +#zone_driver = cinder.zonemanager.drivers.brocade.brcd_fc_zone_driver.BrcdFCZoneDriver + +# Zoning policy configured by user; valid values include "initiator-target" or +# "initiator" (string value) +#zoning_policy = initiator-target + +# Comma separated list of Fibre Channel fabric names. This list of names is +# used to retrieve other SAN credentials for connecting to each SAN fabric +# (string value) +#fc_fabric_names = <None> + +# FC SAN Lookup Service (string value) +#fc_san_lookup_service = cinder.zonemanager.drivers.brocade.brcd_fc_san_lookup_service.BrcdFCSanLookupService + +# Southbound connector for zoning operation (string value) +#brcd_sb_connector = cinder.zonemanager.drivers.brocade.brcd_fc_zone_client_cli.BrcdFCZoneClientCLI + +# Southbound connector for zoning operation (string value) +#cisco_sb_connector = cinder.zonemanager.drivers.cisco.cisco_fc_zone_client_cli.CiscoFCZoneClientCLI + + +[keymgr] + +# +# From cinder +# + +# Authentication url for encryption service. (string value) +#encryption_auth_url = http://localhost:5000/v3 + +# Url for encryption service. (string value) +#encryption_api_url = http://localhost:9311/v1 + +# The full class name of the key manager API class (string value) +#api_class = cinder.keymgr.conf_key_mgr.ConfKeyManager + +# Fixed key returned by key manager, specified in hex (string value) +#fixed_key = <None> + + +[keystone_authtoken] + +# +# From keystonemiddleware.auth_token +# + +# Complete public Identity API endpoint. (string value) +#auth_uri = <None> +auth_uri = http://VARINET4ADDR:5000/v2.0 + +# API version of the admin Identity API endpoint. (string value) +#auth_version = <None> + +# Do not handle authorization requests within the middleware, but delegate the +# authorization decision to downstream WSGI components. (boolean value) +#delay_auth_decision = false + +# Request timeout value for communicating with Identity API server. (integer +# value) +#http_connect_timeout = <None> + +# How many times are we trying to reconnect when communicating with Identity +# API Server. (integer value) +#http_request_max_retries = 3 + +# Env key for the swift cache. (string value) +#cache = <None> + +# Required if identity server requires client certificate (string value) +#certfile = <None> + +# Required if identity server requires client certificate (string value) +#keyfile = <None> + +# A PEM encoded Certificate Authority to use when verifying HTTPs connections. +# Defaults to system CAs. (string value) +#cafile = <None> + +# Verify HTTPS connections. (boolean value) +#insecure = false + +# The region in which the identity server can be found. (string value) +#region_name = <None> + +# Directory used to cache files related to PKI tokens. (string value) +#signing_dir = <None> + +# Optionally specify a list of memcached server(s) to use for caching. If left +# undefined, tokens will instead be cached in-process. (list value) +# Deprecated group/name - [DEFAULT]/memcache_servers +#memcached_servers = <None> + +# In order to prevent excessive effort spent validating tokens, the middleware +# caches previously-seen tokens for a configurable duration (in seconds). Set +# to -1 to disable caching completely. (integer value) +#token_cache_time = 300 + +# Determines the frequency at which the list of revoked tokens is retrieved +# from the Identity service (in seconds). A high number of revocation events +# combined with a low cache duration may significantly reduce performance. +# (integer value) +#revocation_cache_time = 10 + +# (Optional) If defined, indicate whether token data should be authenticated or +# authenticated and encrypted. Acceptable values are MAC or ENCRYPT. If MAC, +# token data is authenticated (with HMAC) in the cache. If ENCRYPT, token data +# is encrypted and authenticated in the cache. If the value is not one of these +# options or empty, auth_token will raise an exception on initialization. +# (string value) +#memcache_security_strategy = <None> + +# (Optional, mandatory if memcache_security_strategy is defined) This string is +# used for key derivation. (string value) +#memcache_secret_key = <None> + +# (Optional) Number of seconds memcached server is considered dead before it is +# tried again. (integer value) +#memcache_pool_dead_retry = 300 + +# (Optional) Maximum total number of open connections to every memcached +# server. (integer value) +#memcache_pool_maxsize = 10 + +# (Optional) Socket timeout in seconds for communicating with a memcached +# server. (integer value) +#memcache_pool_socket_timeout = 3 + +# (Optional) Number of seconds a connection to memcached is held unused in the +# pool before it is closed. (integer value) +#memcache_pool_unused_timeout = 60 + +# (Optional) Number of seconds that an operation will wait to get a memcached +# client connection from the pool. (integer value) +#memcache_pool_conn_get_timeout = 10 + +# (Optional) Use the advanced (eventlet safe) memcached client pool. The +# advanced pool will only work under python 2.x. (boolean value) +#memcache_use_advanced_pool = false + +# (Optional) Indicate whether to set the X-Service-Catalog header. If False, +# middleware will not ask for service catalog on token validation and will not +# set the X-Service-Catalog header. (boolean value) +#include_service_catalog = true + +# Used to control the use and type of token binding. Can be set to: "disabled" +# to not check token binding. "permissive" (default) to validate binding +# information if the bind type is of a form known to the server and ignore it +# if not. "strict" like "permissive" but if the bind type is unknown the token +# will be rejected. "required" any form of token binding is needed to be +# allowed. Finally the name of a binding method that must be present in tokens. +# (string value) +#enforce_token_bind = permissive + +# If true, the revocation list will be checked for cached tokens. This requires +# that PKI tokens are configured on the identity server. (boolean value) +#check_revocations_for_cached = false + +# Hash algorithms to use for hashing PKI tokens. This may be a single algorithm +# or multiple. The algorithms are those supported by Python standard +# hashlib.new(). The hashes will be tried in the order given, so put the +# preferred one first for performance. The result of the first hash will be +# stored in the cache. This will typically be set to multiple values only while +# migrating from a less secure algorithm to a more secure one. Once all the old +# tokens are expired this option should be set to a single value for better +# performance. (list value) +#hash_algorithms = md5 + +# Prefix to prepend at the beginning of the path. Deprecated, use identity_uri. +# (string value) +#auth_admin_prefix = + +# Host providing the admin Identity API endpoint. Deprecated, use identity_uri. +# (string value) +#auth_host = 127.0.0.1 + +# Port of the admin Identity API endpoint. Deprecated, use identity_uri. +# (integer value) +#auth_port = 35357 + +# Protocol of the admin Identity API endpoint (http or https). Deprecated, use +# identity_uri. (string value) +#auth_protocol = https + +# Complete admin Identity API endpoint. This should specify the unversioned +# root endpoint e.g. https://localhost:35357/ (string value) +#identity_uri = <None> +identity_uri = http://VARINET4ADDR:35357 + +# This option is deprecated and may be removed in a future release. Single +# shared secret with the Keystone configuration used for bootstrapping a +# Keystone installation, or otherwise bypassing the normal authentication +# process. This option should not be used, use `admin_user` and +# `admin_password` instead. (string value) +#admin_token = <None> + +# Service username. (string value) +#admin_user = <None> +admin_user = cinder + +# Service user password. (string value) +#admin_password = <None> +admin_password = qum5net + +# Service tenant name. (string value) +#admin_tenant_name = admin +admin_tenant_name = services + + +[matchmaker_redis] + +# +# From oslo.messaging +# + +# Host to locate redis. (string value) +#host = 127.0.0.1 + +# Use this port to connect to redis host. (integer value) +#port = 6379 + +# Password for Redis server (optional). (string value) +#password = <None> + +# +# From oslo.messaging +# + +# Host to locate redis. (string value) +#host = 127.0.0.1 + +# Use this port to connect to redis host. (integer value) +#port = 6379 + +# Password for Redis server (optional). (string value) +#password = <None> + + +[matchmaker_ring] + +# +# From oslo.messaging +# + +# Matchmaker ring file (JSON). (string value) +# Deprecated group/name - [DEFAULT]/matchmaker_ringfile +#ringfile = /etc/oslo/matchmaker_ring.json + +# +# From oslo.messaging +# + +# Matchmaker ring file (JSON). (string value) +# Deprecated group/name - [DEFAULT]/matchmaker_ringfile +#ringfile = /etc/oslo/matchmaker_ring.json + + +[oslo_concurrency] + +# +# From oslo.concurrency +# + +# Enables or disables inter-process locks. (boolean value) +# Deprecated group/name - [DEFAULT]/disable_process_locking +#disable_process_locking = false + +# Directory to use for lock files. For security, the specified directory +# should only be writable by the user running the processes that need locking. +# Defaults to environment variable OSLO_LOCK_PATH. If external locks are used, +# a lock path must be set. (string value) +# Deprecated group/name - [DEFAULT]/lock_path +#lock_path = <None> + + +[oslo_messaging_amqp] + +# +# From oslo.messaging +# + +# address prefix used when sending to a specific server (string value) +# Deprecated group/name - [amqp1]/server_request_prefix +#server_request_prefix = exclusive + +# address prefix used when broadcasting to all servers (string value) +# Deprecated group/name - [amqp1]/broadcast_prefix +#broadcast_prefix = broadcast + +# address prefix when sending to any server in group (string value) +# Deprecated group/name - [amqp1]/group_request_prefix +#group_request_prefix = unicast + +# Name for the AMQP container (string value) +# Deprecated group/name - [amqp1]/container_name +#container_name = <None> + +# Timeout for inactive connections (in seconds) (integer value) +# Deprecated group/name - [amqp1]/idle_timeout +#idle_timeout = 0 + +# Debug: dump AMQP frames to stdout (boolean value) +# Deprecated group/name - [amqp1]/trace +#trace = false + +# CA certificate PEM file to verify server certificate (string value) +# Deprecated group/name - [amqp1]/ssl_ca_file +#ssl_ca_file = + +# Identifying certificate PEM file to present to clients (string value) +# Deprecated group/name - [amqp1]/ssl_cert_file +#ssl_cert_file = + +# Private key PEM file used to sign cert_file certificate (string value) +# Deprecated group/name - [amqp1]/ssl_key_file +#ssl_key_file = + +# Password for decrypting ssl_key_file (if encrypted) (string value) +# Deprecated group/name - [amqp1]/ssl_key_password +#ssl_key_password = <None> + +# Accept clients using either SSL or plain TCP (boolean value) +# Deprecated group/name - [amqp1]/allow_insecure_clients +#allow_insecure_clients = false + +# +# From oslo.messaging +# + +# address prefix used when sending to a specific server (string value) +# Deprecated group/name - [amqp1]/server_request_prefix +#server_request_prefix = exclusive + +# address prefix used when broadcasting to all servers (string value) +# Deprecated group/name - [amqp1]/broadcast_prefix +#broadcast_prefix = broadcast + +# address prefix when sending to any server in group (string value) +# Deprecated group/name - [amqp1]/group_request_prefix +#group_request_prefix = unicast + +# Name for the AMQP container (string value) +# Deprecated group/name - [amqp1]/container_name +#container_name = <None> + +# Timeout for inactive connections (in seconds) (integer value) +# Deprecated group/name - [amqp1]/idle_timeout +#idle_timeout = 0 + +# Debug: dump AMQP frames to stdout (boolean value) +# Deprecated group/name - [amqp1]/trace +#trace = false + +# CA certificate PEM file to verify server certificate (string value) +# Deprecated group/name - [amqp1]/ssl_ca_file +#ssl_ca_file = + +# Identifying certificate PEM file to present to clients (string value) +# Deprecated group/name - [amqp1]/ssl_cert_file +#ssl_cert_file = + +# Private key PEM file used to sign cert_file certificate (string value) +# Deprecated group/name - [amqp1]/ssl_key_file +#ssl_key_file = + +# Password for decrypting ssl_key_file (if encrypted) (string value) +# Deprecated group/name - [amqp1]/ssl_key_password +#ssl_key_password = <None> + +# Accept clients using either SSL or plain TCP (boolean value) +# Deprecated group/name - [amqp1]/allow_insecure_clients +#allow_insecure_clients = false + + +[oslo_messaging_qpid] + +# +# From oslo.messaging +# + +# Use durable queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_durable_queues +# Deprecated group/name - [DEFAULT]/rabbit_durable_queues +#amqp_durable_queues = false + +# Auto-delete queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_auto_delete +#amqp_auto_delete = false + +# Send a single AMQP reply to call message. The current behaviour since oslo- +# incubator is to send two AMQP replies - first one with the payload, a second +# one to ensure the other have finish to send the payload. We are going to +# remove it in the N release, but we must keep backward compatible at the same +# time. This option provides such compatibility - it defaults to False in +# Liberty and can be turned on for early adopters with a new installations or +# for testing. Please note, that this option will be removed in the Mitaka +# release. (boolean value) +#send_single_reply = false + +# Qpid broker hostname. (string value) +# Deprecated group/name - [DEFAULT]/qpid_hostname +#qpid_hostname = localhost + +# Qpid broker port. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_port +#qpid_port = 5672 + +# Qpid HA cluster host:port pairs. (list value) +# Deprecated group/name - [DEFAULT]/qpid_hosts +#qpid_hosts = $qpid_hostname:$qpid_port + +# Username for Qpid connection. (string value) +# Deprecated group/name - [DEFAULT]/qpid_username +#qpid_username = + +# Password for Qpid connection. (string value) +# Deprecated group/name - [DEFAULT]/qpid_password +#qpid_password = + +# Space separated list of SASL mechanisms to use for auth. (string value) +# Deprecated group/name - [DEFAULT]/qpid_sasl_mechanisms +#qpid_sasl_mechanisms = + +# Seconds between connection keepalive heartbeats. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_heartbeat +#qpid_heartbeat = 60 + +# Transport to use, either 'tcp' or 'ssl'. (string value) +# Deprecated group/name - [DEFAULT]/qpid_protocol +#qpid_protocol = tcp + +# Whether to disable the Nagle algorithm. (boolean value) +# Deprecated group/name - [DEFAULT]/qpid_tcp_nodelay +#qpid_tcp_nodelay = true + +# The number of prefetched messages held by receiver. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_receiver_capacity +#qpid_receiver_capacity = 1 + +# The qpid topology version to use. Version 1 is what was originally used by +# impl_qpid. Version 2 includes some backwards-incompatible changes that allow +# broker federation to work. Users should update to version 2 when they are +# able to take everything down, as it requires a clean break. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_topology_version +#qpid_topology_version = 1 + +# +# From oslo.messaging +# + +# Use durable queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_durable_queues +# Deprecated group/name - [DEFAULT]/rabbit_durable_queues +#amqp_durable_queues = false + +# Auto-delete queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_auto_delete +#amqp_auto_delete = false + +# Send a single AMQP reply to call message. The current behaviour since oslo- +# incubator is to send two AMQP replies - first one with the payload, a second +# one to ensure the other have finish to send the payload. We are going to +# remove it in the N release, but we must keep backward compatible at the same +# time. This option provides such compatibility - it defaults to False in +# Liberty and can be turned on for early adopters with a new installations or +# for testing. Please note, that this option will be removed in the Mitaka +# release. (boolean value) +#send_single_reply = false + +# Qpid broker hostname. (string value) +# Deprecated group/name - [DEFAULT]/qpid_hostname +#qpid_hostname = localhost + +# Qpid broker port. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_port +#qpid_port = 5672 + +# Qpid HA cluster host:port pairs. (list value) +# Deprecated group/name - [DEFAULT]/qpid_hosts +#qpid_hosts = $qpid_hostname:$qpid_port + +# Username for Qpid connection. (string value) +# Deprecated group/name - [DEFAULT]/qpid_username +#qpid_username = + +# Password for Qpid connection. (string value) +# Deprecated group/name - [DEFAULT]/qpid_password +#qpid_password = + +# Space separated list of SASL mechanisms to use for auth. (string value) +# Deprecated group/name - [DEFAULT]/qpid_sasl_mechanisms +#qpid_sasl_mechanisms = + +# Seconds between connection keepalive heartbeats. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_heartbeat +#qpid_heartbeat = 60 + +# Transport to use, either 'tcp' or 'ssl'. (string value) +# Deprecated group/name - [DEFAULT]/qpid_protocol +#qpid_protocol = tcp + +# Whether to disable the Nagle algorithm. (boolean value) +# Deprecated group/name - [DEFAULT]/qpid_tcp_nodelay +#qpid_tcp_nodelay = true + +# The number of prefetched messages held by receiver. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_receiver_capacity +#qpid_receiver_capacity = 1 + +# The qpid topology version to use. Version 1 is what was originally used by +# impl_qpid. Version 2 includes some backwards-incompatible changes that allow +# broker federation to work. Users should update to version 2 when they are +# able to take everything down, as it requires a clean break. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_topology_version +#qpid_topology_version = 1 + + +[oslo_messaging_rabbit] + +# +# From oslo.messaging +# + +# Use durable queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_durable_queues +# Deprecated group/name - [DEFAULT]/rabbit_durable_queues +#amqp_durable_queues = false +amqp_durable_queues = False + +# Auto-delete queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_auto_delete +#amqp_auto_delete = false + +# Send a single AMQP reply to call message. The current behaviour since oslo- +# incubator is to send two AMQP replies - first one with the payload, a second +# one to ensure the other have finish to send the payload. We are going to +# remove it in the N release, but we must keep backward compatible at the same +# time. This option provides such compatibility - it defaults to False in +# Liberty and can be turned on for early adopters with a new installations or +# for testing. Please note, that this option will be removed in the Mitaka +# release. (boolean value) +#send_single_reply = false + +# SSL version to use (valid only if SSL enabled). Valid values are TLSv1 and +# SSLv23. SSLv2, SSLv3, TLSv1_1, and TLSv1_2 may be available on some +# distributions. (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_version +#kombu_ssl_version = + +# SSL key file (valid only if SSL enabled). (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_keyfile +#kombu_ssl_keyfile = +kombu_ssl_keyfile = + +# SSL cert file (valid only if SSL enabled). (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_certfile +#kombu_ssl_certfile = +kombu_ssl_certfile = + +# SSL certification authority file (valid only if SSL enabled). (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_ca_certs +#kombu_ssl_ca_certs = +kombu_ssl_ca_certs = + +# How long to wait before reconnecting in response to an AMQP consumer cancel +# notification. (floating point value) +# Deprecated group/name - [DEFAULT]/kombu_reconnect_delay +#kombu_reconnect_delay = 1.0 + +# How long to wait before considering a reconnect attempt to have failed. This +# value should not be longer than rpc_response_timeout. (integer value) +#kombu_reconnect_timeout = 60 + +# The RabbitMQ broker address where a single node is used. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_host +#rabbit_host = localhost +rabbit_host = VARINET4ADDR + +# The RabbitMQ broker port where a single node is used. (integer value) +# Deprecated group/name - [DEFAULT]/rabbit_port +#rabbit_port = 5672 +rabbit_port = 5672 + +# RabbitMQ HA cluster host:port pairs. (list value) +# Deprecated group/name - [DEFAULT]/rabbit_hosts +#rabbit_hosts = $rabbit_host:$rabbit_port +rabbit_hosts = VARINET4ADDR:5672 + +# Connect over SSL for RabbitMQ. (boolean value) +# Deprecated group/name - [DEFAULT]/rabbit_use_ssl +#rabbit_use_ssl = false +rabbit_use_ssl = False + +# The RabbitMQ userid. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_userid +#rabbit_userid = guest +rabbit_userid = guest + +# The RabbitMQ password. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_password +#rabbit_password = guest +rabbit_password = guest + +# The RabbitMQ login method. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_login_method +#rabbit_login_method = AMQPLAIN + +# The RabbitMQ virtual host. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_virtual_host +#rabbit_virtual_host = / +rabbit_virtual_host = / + +# How frequently to retry connecting with RabbitMQ. (integer value) +#rabbit_retry_interval = 1 + +# How long to backoff for between retries when connecting to RabbitMQ. (integer +# value) +# Deprecated group/name - [DEFAULT]/rabbit_retry_backoff +#rabbit_retry_backoff = 2 + +# Maximum number of RabbitMQ connection retries. Default is 0 (infinite retry +# count). (integer value) +# Deprecated group/name - [DEFAULT]/rabbit_max_retries +#rabbit_max_retries = 0 + +# Use HA queues in RabbitMQ (x-ha-policy: all). If you change this option, you +# must wipe the RabbitMQ database. (boolean value) +# Deprecated group/name - [DEFAULT]/rabbit_ha_queues +#rabbit_ha_queues = false +rabbit_ha_queues = False + +# Specifies the number of messages to prefetch. Setting to zero allows +# unlimited messages. (integer value) +#rabbit_qos_prefetch_count = 0 + +# Number of seconds after which the Rabbit broker is considered down if +# heartbeat's keep-alive fails (0 disable the heartbeat). EXPERIMENTAL (integer +# value) +#heartbeat_timeout_threshold = 60 +heartbeat_timeout_threshold = 0 + +# How often times during the heartbeat_timeout_threshold we check the +# heartbeat. (integer value) +#heartbeat_rate = 2 +heartbeat_rate = 2 + +# Deprecated, use rpc_backend=kombu+memory or rpc_backend=fake (boolean value) +# Deprecated group/name - [DEFAULT]/fake_rabbit +#fake_rabbit = false + +# +# From oslo.messaging +# + +# Use durable queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_durable_queues +# Deprecated group/name - [DEFAULT]/rabbit_durable_queues +#amqp_durable_queues = false + +# Auto-delete queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_auto_delete +#amqp_auto_delete = false + +# Send a single AMQP reply to call message. The current behaviour since oslo- +# incubator is to send two AMQP replies - first one with the payload, a second +# one to ensure the other have finish to send the payload. We are going to +# remove it in the N release, but we must keep backward compatible at the same +# time. This option provides such compatibility - it defaults to False in +# Liberty and can be turned on for early adopters with a new installations or +# for testing. Please note, that this option will be removed in the Mitaka +# release. (boolean value) +#send_single_reply = false + +# SSL version to use (valid only if SSL enabled). Valid values are TLSv1 and +# SSLv23. SSLv2, SSLv3, TLSv1_1, and TLSv1_2 may be available on some +# distributions. (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_version +#kombu_ssl_version = + +# SSL key file (valid only if SSL enabled). (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_keyfile +#kombu_ssl_keyfile = + +# SSL cert file (valid only if SSL enabled). (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_certfile +#kombu_ssl_certfile = + +# SSL certification authority file (valid only if SSL enabled). (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_ca_certs +#kombu_ssl_ca_certs = + +# How long to wait before reconnecting in response to an AMQP consumer cancel +# notification. (floating point value) +# Deprecated group/name - [DEFAULT]/kombu_reconnect_delay +#kombu_reconnect_delay = 1.0 + +# How long to wait before considering a reconnect attempt to have failed. This +# value should not be longer than rpc_response_timeout. (integer value) +#kombu_reconnect_timeout = 60 + +# The RabbitMQ broker address where a single node is used. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_host +#rabbit_host = localhost + +# The RabbitMQ broker port where a single node is used. (integer value) +# Deprecated group/name - [DEFAULT]/rabbit_port +#rabbit_port = 5672 + +# RabbitMQ HA cluster host:port pairs. (list value) +# Deprecated group/name - [DEFAULT]/rabbit_hosts +#rabbit_hosts = $rabbit_host:$rabbit_port + +# Connect over SSL for RabbitMQ. (boolean value) +# Deprecated group/name - [DEFAULT]/rabbit_use_ssl +#rabbit_use_ssl = false + +# The RabbitMQ userid. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_userid +#rabbit_userid = guest + +# The RabbitMQ password. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_password +#rabbit_password = guest + +# The RabbitMQ login method. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_login_method +#rabbit_login_method = AMQPLAIN + +# The RabbitMQ virtual host. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_virtual_host +#rabbit_virtual_host = / + +# How frequently to retry connecting with RabbitMQ. (integer value) +#rabbit_retry_interval = 1 + +# How long to backoff for between retries when connecting to RabbitMQ. (integer +# value) +# Deprecated group/name - [DEFAULT]/rabbit_retry_backoff +#rabbit_retry_backoff = 2 + +# Maximum number of RabbitMQ connection retries. Default is 0 (infinite retry +# count). (integer value) +# Deprecated group/name - [DEFAULT]/rabbit_max_retries +#rabbit_max_retries = 0 + +# Use HA queues in RabbitMQ (x-ha-policy: all). If you change this option, you +# must wipe the RabbitMQ database. (boolean value) +# Deprecated group/name - [DEFAULT]/rabbit_ha_queues +#rabbit_ha_queues = false + +# Specifies the number of messages to prefetch. Setting to zero allows +# unlimited messages. (integer value) +#rabbit_qos_prefetch_count = 0 + +# Number of seconds after which the Rabbit broker is considered down if +# heartbeat's keep-alive fails (0 disable the heartbeat). EXPERIMENTAL (integer +# value) +#heartbeat_timeout_threshold = 60 + +# How often times during the heartbeat_timeout_threshold we check the +# heartbeat. (integer value) +#heartbeat_rate = 2 + +# Deprecated, use rpc_backend=kombu+memory or rpc_backend=fake (boolean value) +# Deprecated group/name - [DEFAULT]/fake_rabbit +#fake_rabbit = false + + +[oslo_middleware] + +# +# From oslo.middleware +# + +# The maximum body size for each request, in bytes. (integer value) +# Deprecated group/name - [DEFAULT]/osapi_max_request_body_size +# Deprecated group/name - [DEFAULT]/max_request_body_size +#max_request_body_size = 114688 + +# +# From oslo.middleware +# + +# The HTTP Header that will be used to determine what the original request +# protocol scheme was, even if it was hidden by an SSL termination proxy. +# (string value) +#secure_proxy_ssl_header = X-Forwarded-Proto + + +[oslo_policy] + +# +# From oslo.policy +# + +# The JSON file that defines policies. (string value) +# Deprecated group/name - [DEFAULT]/policy_file +#policy_file = policy.json + +# Default rule. Enforced when a requested rule is not found. (string value) +# Deprecated group/name - [DEFAULT]/policy_default_rule +#policy_default_rule = default + +# Directories where policy configuration files are stored. They can be relative +# to any directory in the search path defined by the config_dir option, or +# absolute paths. The file defined by policy_file must exist for these +# directories to be searched. Missing or empty directories are ignored. (multi +# valued) +# Deprecated group/name - [DEFAULT]/policy_dirs +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#policy_dirs = policy.d + + +[oslo_reports] + +# +# From oslo.reports +# + +# Path to a log directory where to create a file (string value) +#log_dir = <None> + + +[profiler] + +# +# From cinder +# + +# If False fully disable profiling feature. (boolean value) +#profiler_enabled = false + +# If False doesn't trace SQL requests. (boolean value) +#trace_sqlalchemy = false + +[lvm] +iscsi_helper=lioadm +volume_group=cinder-volumes +iscsi_ip_address=VARINET4ADDR +volume_driver=cinder.volume.drivers.lvm.LVMVolumeDriver +volumes_dir=/var/lib/cinder/volumes +iscsi_protocol=iscsi +volume_backend_name=lvm + +[ceph] +volume_driver = cinder.volume.drivers.rbd.RBDDriver +rbd_pool = volumes +rbd_ceph_conf = /etc/ceph/ceph.conf +rbd_flatten_volume_from_snapshot = false +rbd_max_clone_depth = 5 +rbd_store_chunk_size = 4 +rados_connect_timeout = -1 +glance_api_version = 2 +rbd_user=cinder +rbd_secret_uuid=RBDSECRET diff --git a/qa/qa_scripts/openstack/files/glance-api.template.conf b/qa/qa_scripts/openstack/files/glance-api.template.conf new file mode 100644 index 000000000..956fb1bf2 --- /dev/null +++ b/qa/qa_scripts/openstack/files/glance-api.template.conf @@ -0,0 +1,1590 @@ +[DEFAULT] + +# +# From glance.api +# + +# When true, this option sets the owner of an image to be the tenant. +# Otherwise, the owner of the image will be the authenticated user +# issuing the request. (boolean value) +#owner_is_tenant=true + +# Role used to identify an authenticated user as administrator. +# (string value) +#admin_role=admin + +# Allow unauthenticated users to access the API with read-only +# privileges. This only applies when using ContextMiddleware. (boolean +# value) +#allow_anonymous_access=false + +# Limits request ID length. (integer value) +#max_request_id_length=64 + +# Public url to use for versions endpoint. The default is None, which +# will use the request's host_url attribute to populate the URL base. +# If Glance is operating behind a proxy, you will want to change this +# to represent the proxy's URL. (string value) +#public_endpoint=<None> + +# Whether to allow users to specify image properties beyond what the +# image schema provides (boolean value) +#allow_additional_image_properties=true + +# Maximum number of image members per image. Negative values evaluate +# to unlimited. (integer value) +#image_member_quota=128 + +# Maximum number of properties allowed on an image. Negative values +# evaluate to unlimited. (integer value) +#image_property_quota=128 + +# Maximum number of tags allowed on an image. Negative values evaluate +# to unlimited. (integer value) +#image_tag_quota=128 + +# Maximum number of locations allowed on an image. Negative values +# evaluate to unlimited. (integer value) +#image_location_quota=10 + +# Python module path of data access API (string value) +#data_api=glance.db.sqlalchemy.api + +# Default value for the number of items returned by a request if not +# specified explicitly in the request (integer value) +#limit_param_default=25 + +# Maximum permissible number of items that could be returned by a +# request (integer value) +#api_limit_max=1000 + +# Whether to include the backend image storage location in image +# properties. Revealing storage location can be a security risk, so +# use this setting with caution! (boolean value) +#show_image_direct_url=false +show_image_direct_url=True + +# Whether to include the backend image locations in image properties. +# For example, if using the file system store a URL of +# "file:///path/to/image" will be returned to the user in the +# 'direct_url' meta-data field. Revealing storage location can be a +# security risk, so use this setting with caution! The overrides +# show_image_direct_url. (boolean value) +#show_multiple_locations=false + +# Maximum size of image a user can upload in bytes. Defaults to +# 1099511627776 bytes (1 TB).WARNING: this value should only be +# increased after careful consideration and must be set to a value +# under 8 EB (9223372036854775808). (integer value) +# Maximum value: 9223372036854775808 +#image_size_cap=1099511627776 + +# Set a system wide quota for every user. This value is the total +# capacity that a user can use across all storage systems. A value of +# 0 means unlimited.Optional unit can be specified for the value. +# Accepted units are B, KB, MB, GB and TB representing Bytes, +# KiloBytes, MegaBytes, GigaBytes and TeraBytes respectively. If no +# unit is specified then Bytes is assumed. Note that there should not +# be any space between value and unit and units are case sensitive. +# (string value) +#user_storage_quota=0 + +# Deploy the v1 OpenStack Images API. (boolean value) +#enable_v1_api=true + +# Deploy the v2 OpenStack Images API. (boolean value) +#enable_v2_api=true + +# Deploy the v3 OpenStack Objects API. (boolean value) +#enable_v3_api=false + +# Deploy the v1 OpenStack Registry API. (boolean value) +#enable_v1_registry=true + +# Deploy the v2 OpenStack Registry API. (boolean value) +#enable_v2_registry=true + +# The hostname/IP of the pydev process listening for debug connections +# (string value) +#pydev_worker_debug_host=<None> + +# The port on which a pydev process is listening for connections. +# (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#pydev_worker_debug_port=5678 + +# AES key for encrypting store 'location' metadata. This includes, if +# used, Swift or S3 credentials. Should be set to a random string of +# length 16, 24 or 32 bytes (string value) +#metadata_encryption_key=<None> + +# Digest algorithm which will be used for digital signature. Use the +# command "openssl list-message-digest-algorithms" to get the +# available algorithmssupported by the version of OpenSSL on the +# platform. Examples are "sha1", "sha256", "sha512", etc. (string +# value) +#digest_algorithm=sha256 + +# This value sets what strategy will be used to determine the image +# location order. Currently two strategies are packaged with Glance +# 'location_order' and 'store_type'. (string value) +# Allowed values: location_order, store_type +#location_strategy=location_order + +# The location of the property protection file.This file contains the +# rules for property protections and the roles/policies associated +# with it. If this config value is not specified, by default, property +# protections won't be enforced. If a value is specified and the file +# is not found, then the glance-api service will not start. (string +# value) +#property_protection_file=<None> + +# This config value indicates whether "roles" or "policies" are used +# in the property protection file. (string value) +# Allowed values: roles, policies +#property_protection_rule_format=roles + +# Modules of exceptions that are permitted to be recreated upon +# receiving exception data from an rpc call. (list value) +#allowed_rpc_exception_modules=glance.common.exception,exceptions + +# Address to bind the server. Useful when selecting a particular +# network interface. (string value) +#bind_host=0.0.0.0 +bind_host=0.0.0.0 + +# The port on which the server will listen. (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#bind_port=<None> +bind_port=9292 + +# The number of child process workers that will be created to service +# requests. The default will be equal to the number of CPUs available. +# (integer value) +#workers=4 +workers=12 + +# Maximum line size of message headers to be accepted. max_header_line +# may need to be increased when using large tokens (typically those +# generated by the Keystone v3 API with big service catalogs (integer +# value) +#max_header_line=16384 + +# If False, server will return the header "Connection: close", If +# True, server will return "Connection: Keep-Alive" in its responses. +# In order to close the client socket connection explicitly after the +# response is sent and read successfully by the client, you simply +# have to set this option to False when you create a wsgi server. +# (boolean value) +#http_keepalive=true + +# Timeout for client connections' socket operations. If an incoming +# connection is idle for this number of seconds it will be closed. A +# value of '0' means wait forever. (integer value) +#client_socket_timeout=900 + +# The backlog value that will be used when creating the TCP listener +# socket. (integer value) +#backlog=4096 +backlog=4096 + +# The value for the socket option TCP_KEEPIDLE. This is the time in +# seconds that the connection must be idle before TCP starts sending +# keepalive probes. (integer value) +#tcp_keepidle=600 + +# CA certificate file to use to verify connecting clients. (string +# value) +#ca_file=<None> + +# Certificate file to use when starting API server securely. (string +# value) +#cert_file=<None> + +# Private key file to use when starting API server securely. (string +# value) +#key_file=<None> + +# If False fully disable profiling feature. (boolean value) +#enabled=false + +# If False doesn't trace SQL requests. (boolean value) +#trace_sqlalchemy=false + +# The path to the sqlite file database that will be used for image +# cache management. (string value) +#image_cache_sqlite_db=cache.db + +# The driver to use for image cache management. (string value) +#image_cache_driver=sqlite + +# The upper limit (the maximum size of accumulated cache in bytes) +# beyond which pruner, if running, starts cleaning the images cache. +# (integer value) +#image_cache_max_size=10737418240 + +# The amount of time to let an image remain in the cache without being +# accessed. (integer value) +#image_cache_stall_time=86400 + +# Base directory that the Image Cache uses. (string value) +#image_cache_dir=/var/lib/glance/image-cache/ +image_cache_dir=/var/lib/glance/image-cache + +# Default publisher_id for outgoing notifications. (string value) +#default_publisher_id=image.localhost + +# List of disabled notifications. A notification can be given either +# as a notification type to disable a single event, or as a +# notification group prefix to disable all events within a group. +# Example: if this config option is set to ["image.create", +# "metadef_namespace"], then "image.create" notification will not be +# sent after image is created and none of the notifications for +# metadefinition namespaces will be sent. (list value) +#disabled_notifications = + +# Address to find the registry server. (string value) +#registry_host=0.0.0.0 +registry_host=0.0.0.0 + +# Port the registry server is listening on. (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#registry_port=9191 +registry_port=9191 + +# Whether to pass through the user token when making requests to the +# registry. To prevent failures with token expiration during big files +# upload, it is recommended to set this parameter to False.If +# "use_user_token" is not in effect, then admin credentials can be +# specified. (boolean value) +#use_user_token=true + +# The administrators user name. If "use_user_token" is not in effect, +# then admin credentials can be specified. (string value) +#admin_user=%SERVICE_USER% + +# The administrators password. If "use_user_token" is not in effect, +# then admin credentials can be specified. (string value) +#admin_password=%SERVICE_PASSWORD% + +# The tenant name of the administrative user. If "use_user_token" is +# not in effect, then admin tenant name can be specified. (string +# value) +#admin_tenant_name=%SERVICE_TENANT_NAME% + +# The URL to the keystone service. If "use_user_token" is not in +# effect and using keystone auth, then URL of keystone can be +# specified. (string value) +#auth_url=<None> + +# The strategy to use for authentication. If "use_user_token" is not +# in effect, then auth strategy can be specified. (string value) +#auth_strategy=noauth + +# The region for the authentication service. If "use_user_token" is +# not in effect and using keystone auth, then region name can be +# specified. (string value) +#auth_region=<None> + +# The protocol to use for communication with the registry server. +# Either http or https. (string value) +#registry_client_protocol=http +registry_client_protocol=http + +# The path to the key file to use in SSL connections to the registry +# server, if any. Alternately, you may set the GLANCE_CLIENT_KEY_FILE +# environment variable to a filepath of the key file (string value) +#registry_client_key_file=<None> + +# The path to the cert file to use in SSL connections to the registry +# server, if any. Alternately, you may set the GLANCE_CLIENT_CERT_FILE +# environment variable to a filepath of the CA cert file (string +# value) +#registry_client_cert_file=<None> + +# The path to the certifying authority cert file to use in SSL +# connections to the registry server, if any. Alternately, you may set +# the GLANCE_CLIENT_CA_FILE environment variable to a filepath of the +# CA cert file. (string value) +#registry_client_ca_file=<None> + +# When using SSL in connections to the registry server, do not require +# validation via a certifying authority. This is the registry's +# equivalent of specifying --insecure on the command line using +# glanceclient for the API. (boolean value) +#registry_client_insecure=false + +# The period of time, in seconds, that the API server will wait for a +# registry request to complete. A value of 0 implies no timeout. +# (integer value) +#registry_client_timeout=600 + +# Whether to pass through headers containing user and tenant +# information when making requests to the registry. This allows the +# registry to use the context middleware without keystonemiddleware's +# auth_token middleware, removing calls to the keystone auth service. +# It is recommended that when using this option, secure communication +# between glance api and glance registry is ensured by means other +# than auth_token middleware. (boolean value) +#send_identity_headers=false + +# The amount of time in seconds to delay before performing a delete. +# (integer value) +#scrub_time=0 + +# The size of thread pool to be used for scrubbing images. The default +# is one, which signifies serial scrubbing. Any value above one +# indicates the max number of images that may be scrubbed in parallel. +# (integer value) +#scrub_pool_size=1 + +# Turn on/off delayed delete. (boolean value) +#delayed_delete=false + +# Role used to identify an authenticated user as administrator. +# (string value) +#admin_role=admin + +# Whether to pass through headers containing user and tenant +# information when making requests to the registry. This allows the +# registry to use the context middleware without keystonemiddleware's +# auth_token middleware, removing calls to the keystone auth service. +# It is recommended that when using this option, secure communication +# between glance api and glance registry is ensured by means other +# than auth_token middleware. (boolean value) +#send_identity_headers=false + +# +# From oslo.log +# + +# Print debugging output (set logging level to DEBUG instead of +# default INFO level). (boolean value) +#debug=False +debug=True + +# If set to false, will disable INFO logging level, making WARNING the +# default. (boolean value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#verbose=True +verbose=True + +# The name of a logging configuration file. This file is appended to +# any existing logging configuration files. For details about logging +# configuration files, see the Python logging module documentation. +# (string value) +# Deprecated group/name - [DEFAULT]/log_config +#log_config_append=<None> + +# DEPRECATED. A logging.Formatter log message format string which may +# use any of the available logging.LogRecord attributes. This option +# is deprecated. Please use logging_context_format_string and +# logging_default_format_string instead. (string value) +#log_format=<None> + +# Format string for %%(asctime)s in log records. Default: %(default)s +# . (string value) +#log_date_format=%Y-%m-%d %H:%M:%S + +# (Optional) Name of log file to output to. If no default is set, +# logging will go to stdout. (string value) +# Deprecated group/name - [DEFAULT]/logfile +#log_file=/var/log/glance/api.log +log_file=/var/log/glance/api.log + +# (Optional) The base directory used for relative --log-file paths. +# (string value) +# Deprecated group/name - [DEFAULT]/logdir +#log_dir=<None> +log_dir=/var/log/glance + +# Use syslog for logging. Existing syslog format is DEPRECATED and +# will be changed later to honor RFC5424. (boolean value) +#use_syslog=false +use_syslog=False + +# (Optional) Enables or disables syslog rfc5424 format for logging. If +# enabled, prefixes the MSG part of the syslog message with APP-NAME +# (RFC5424). The format without the APP-NAME is deprecated in Kilo, +# and will be removed in Mitaka, along with this option. (boolean +# value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#use_syslog_rfc_format=true + +# Syslog facility to receive log lines. (string value) +#syslog_log_facility=LOG_USER +syslog_log_facility=LOG_USER + +# Log output to standard error. (boolean value) +#use_stderr=False +use_stderr=True + +# Format string to use for log messages with context. (string value) +#logging_context_format_string=%(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [%(request_id)s %(user_identity)s] %(instance)s%(message)s + +# Format string to use for log messages without context. (string +# value) +#logging_default_format_string=%(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [-] %(instance)s%(message)s + +# Data to append to log format when level is DEBUG. (string value) +#logging_debug_format_suffix=%(funcName)s %(pathname)s:%(lineno)d + +# Prefix each line of exception output with this format. (string +# value) +#logging_exception_prefix=%(asctime)s.%(msecs)03d %(process)d ERROR %(name)s %(instance)s + +# List of logger=LEVEL pairs. (list value) +#default_log_levels=amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN + +# Enables or disables publication of error events. (boolean value) +#publish_errors=false + +# The format for an instance that is passed with the log message. +# (string value) +#instance_format="[instance: %(uuid)s] " + +# The format for an instance UUID that is passed with the log message. +# (string value) +#instance_uuid_format="[instance: %(uuid)s] " + +# Enables or disables fatal status of deprecations. (boolean value) +#fatal_deprecations=false + +# +# From oslo.messaging +# + +# Size of RPC connection pool. (integer value) +# Deprecated group/name - [DEFAULT]/rpc_conn_pool_size +#rpc_conn_pool_size=30 + +# ZeroMQ bind address. Should be a wildcard (*), an ethernet +# interface, or IP. The "host" option should point or resolve to this +# address. (string value) +#rpc_zmq_bind_address=* + +# MatchMaker driver. (string value) +#rpc_zmq_matchmaker=local + +# ZeroMQ receiver listening port. (integer value) +#rpc_zmq_port=9501 + +# Number of ZeroMQ contexts, defaults to 1. (integer value) +#rpc_zmq_contexts=1 + +# Maximum number of ingress messages to locally buffer per topic. +# Default is unlimited. (integer value) +#rpc_zmq_topic_backlog=<None> + +# Directory for holding IPC sockets. (string value) +#rpc_zmq_ipc_dir=/var/run/openstack + +# Name of this node. Must be a valid hostname, FQDN, or IP address. +# Must match "host" option, if running Nova. (string value) +#rpc_zmq_host=localhost + +# Seconds to wait before a cast expires (TTL). Only supported by +# impl_zmq. (integer value) +#rpc_cast_timeout=30 + +# Heartbeat frequency. (integer value) +#matchmaker_heartbeat_freq=300 + +# Heartbeat time-to-live. (integer value) +#matchmaker_heartbeat_ttl=600 + +# Size of executor thread pool. (integer value) +# Deprecated group/name - [DEFAULT]/rpc_thread_pool_size +#executor_thread_pool_size=64 + +# The Drivers(s) to handle sending notifications. Possible values are +# messaging, messagingv2, routing, log, test, noop (multi valued) +#notification_driver = +notification_driver =messaging + +# AMQP topic used for OpenStack notifications. (list value) +# Deprecated group/name - [rpc_notifier2]/topics +#notification_topics=notifications + +# Seconds to wait for a response from a call. (integer value) +#rpc_response_timeout=60 + +# A URL representing the messaging driver to use and its full +# configuration. If not set, we fall back to the rpc_backend option +# and driver specific configuration. (string value) +#transport_url=<None> + +# The messaging driver to use, defaults to rabbit. Other drivers +# include qpid and zmq. (string value) +#rpc_backend=rabbit + +# The default exchange under which topics are scoped. May be +# overridden by an exchange name specified in the transport_url +# option. (string value) +#control_exchange=openstack +hw_scsi_model=virtio-scsi +hw_disk_bus=scsi +hw_qemu_guest_agent=yes +os_require_quiesce=yes + +[database] + +# +# From oslo.db +# + +# The file name to use with SQLite. (string value) +# Deprecated group/name - [DEFAULT]/sqlite_db +#sqlite_db=oslo.sqlite + +# If True, SQLite uses synchronous mode. (boolean value) +# Deprecated group/name - [DEFAULT]/sqlite_synchronous +#sqlite_synchronous=true + +# The back end to use for the database. (string value) +# Deprecated group/name - [DEFAULT]/db_backend +#backend=sqlalchemy + +# The SQLAlchemy connection string to use to connect to the database. +# (string value) +# Deprecated group/name - [DEFAULT]/sql_connection +# Deprecated group/name - [DATABASE]/sql_connection +# Deprecated group/name - [sql]/connection +#connection=mysql://glance:glance@localhost/glance +connection=mysql+pymysql://glance:qum5net@VARINET4ADDR/glance + +# The SQLAlchemy connection string to use to connect to the slave +# database. (string value) +#slave_connection=<None> + +# The SQL mode to be used for MySQL sessions. This option, including +# the default, overrides any server-set SQL mode. To use whatever SQL +# mode is set by the server configuration, set this to no value. +# Example: mysql_sql_mode= (string value) +#mysql_sql_mode=TRADITIONAL + +# Timeout before idle SQL connections are reaped. (integer value) +# Deprecated group/name - [DEFAULT]/sql_idle_timeout +# Deprecated group/name - [DATABASE]/sql_idle_timeout +# Deprecated group/name - [sql]/idle_timeout +#idle_timeout=3600 +idle_timeout=3600 + +# Minimum number of SQL connections to keep open in a pool. (integer +# value) +# Deprecated group/name - [DEFAULT]/sql_min_pool_size +# Deprecated group/name - [DATABASE]/sql_min_pool_size +#min_pool_size=1 + +# Maximum number of SQL connections to keep open in a pool. (integer +# value) +# Deprecated group/name - [DEFAULT]/sql_max_pool_size +# Deprecated group/name - [DATABASE]/sql_max_pool_size +#max_pool_size=<None> + +# Maximum number of database connection retries during startup. Set to +# -1 to specify an infinite retry count. (integer value) +# Deprecated group/name - [DEFAULT]/sql_max_retries +# Deprecated group/name - [DATABASE]/sql_max_retries +#max_retries=10 + +# Interval between retries of opening a SQL connection. (integer +# value) +# Deprecated group/name - [DEFAULT]/sql_retry_interval +# Deprecated group/name - [DATABASE]/reconnect_interval +#retry_interval=10 + +# If set, use this value for max_overflow with SQLAlchemy. (integer +# value) +# Deprecated group/name - [DEFAULT]/sql_max_overflow +# Deprecated group/name - [DATABASE]/sqlalchemy_max_overflow +#max_overflow=<None> + +# Verbosity of SQL debugging information: 0=None, 100=Everything. +# (integer value) +# Deprecated group/name - [DEFAULT]/sql_connection_debug +#connection_debug=0 + +# Add Python stack traces to SQL as comment strings. (boolean value) +# Deprecated group/name - [DEFAULT]/sql_connection_trace +#connection_trace=false + +# If set, use this value for pool_timeout with SQLAlchemy. (integer +# value) +# Deprecated group/name - [DATABASE]/sqlalchemy_pool_timeout +#pool_timeout=<None> + +# Enable the experimental use of database reconnect on connection +# lost. (boolean value) +#use_db_reconnect=false + +# Seconds between retries of a database transaction. (integer value) +#db_retry_interval=1 + +# If True, increases the interval between retries of a database +# operation up to db_max_retry_interval. (boolean value) +#db_inc_retry_interval=true + +# If db_inc_retry_interval is set, the maximum seconds between retries +# of a database operation. (integer value) +#db_max_retry_interval=10 + +# Maximum retries in case of connection error or deadlock error before +# error is raised. Set to -1 to specify an infinite retry count. +# (integer value) +#db_max_retries=20 + +# +# From oslo.db.concurrency +# + +# Enable the experimental use of thread pooling for all DB API calls +# (boolean value) +# Deprecated group/name - [DEFAULT]/dbapi_use_tpool +#use_tpool=false + + +[glance_store] + +# +# From glance.store +# + +# List of stores enabled (list value) +#stores=file,http +stores=rbd +default_store=rbd + +# Default scheme to use to store image data. The scheme must be +# registered by one of the stores defined by the 'stores' config +# option. (string value) +#default_store=file + +# Minimum interval seconds to execute updating dynamic storage +# capabilities based on backend status then. It's not a periodic +# routine, the update logic will be executed only when interval +# seconds elapsed and an operation of store has triggered. The feature +# will be enabled only when the option value greater then zero. +# (integer value) +#store_capabilities_update_min_interval=0 + +# +# From glance.store +# + +# Hostname or IP address of the instance to connect to, or a mongodb +# URI, or a list of hostnames / mongodb URIs. If host is an IPv6 +# literal it must be enclosed in '[' and ']' characters following the +# RFC2732 URL syntax (e.g. '[::1]' for localhost) (string value) +#mongodb_store_uri=<None> + +# Database to use (string value) +#mongodb_store_db=<None> + +# Images will be chunked into objects of this size (in megabytes). For +# best performance, this should be a power of two. (integer value) +#sheepdog_store_chunk_size=64 + +# Port of sheep daemon. (integer value) +#sheepdog_store_port=7000 + +# IP address of sheep daemon. (string value) +#sheepdog_store_address=localhost + +# RADOS images will be chunked into objects of this size (in +# megabytes). For best performance, this should be a power of two. +# (integer value) +rbd_store_chunk_size=8 + +# RADOS pool in which images are stored. (string value) +#rbd_store_pool=images +rbd_store_pool=images + +# RADOS user to authenticate as (only applicable if using Cephx. If +# <None>, a default will be chosen based on the client. section in +# rbd_store_ceph_conf) (string value) +rbd_store_user=glance + +# Ceph configuration file path. If <None>, librados will locate the +# default config. If using cephx authentication, this file should +# include a reference to the right keyring in a client.<USER> section +# (string value) +#rbd_store_ceph_conf=/etc/ceph/ceph.conf +rbd_store_ceph_conf=/etc/ceph/ceph.conf + +# Timeout value (in seconds) used when connecting to ceph cluster. If +# value <= 0, no timeout is set and default librados value is used. +# (integer value) +#rados_connect_timeout=0 + +# Directory to which the Filesystem backend store writes images. +# (string value) +#filesystem_store_datadir=/var/lib/glance/images/ + +# List of directories and its priorities to which the Filesystem +# backend store writes images. (multi valued) +#filesystem_store_datadirs = + +# The path to a file which contains the metadata to be returned with +# any location associated with this store. The file must contain a +# valid JSON object. The object should contain the keys 'id' and +# 'mountpoint'. The value for both keys should be 'string'. (string +# value) +#filesystem_store_metadata_file=<None> + +# The required permission for created image file. In this way the user +# other service used, e.g. Nova, who consumes the image could be the +# exclusive member of the group that owns the files created. Assigning +# it less then or equal to zero means don't change the default +# permission of the file. This value will be decoded as an octal +# digit. (integer value) +#filesystem_store_file_perm=0 + +# If True, swiftclient won't check for a valid SSL certificate when +# authenticating. (boolean value) +#swift_store_auth_insecure=false + +# A string giving the CA certificate file to use in SSL connections +# for verifying certs. (string value) +#swift_store_cacert=<None> + +# The region of the swift endpoint to be used for single tenant. This +# setting is only necessary if the tenant has multiple swift +# endpoints. (string value) +#swift_store_region=<None> + +# If set, the configured endpoint will be used. If None, the storage +# url from the auth response will be used. (string value) +#swift_store_endpoint=<None> + +# A string giving the endpoint type of the swift service to use +# (publicURL, adminURL or internalURL). This setting is only used if +# swift_store_auth_version is 2. (string value) +#swift_store_endpoint_type=publicURL + +# A string giving the service type of the swift service to use. This +# setting is only used if swift_store_auth_version is 2. (string +# value) +#swift_store_service_type=object-store + +# Container within the account that the account should use for storing +# images in Swift when using single container mode. In multiple +# container mode, this will be the prefix for all containers. (string +# value) +#swift_store_container=glance + +# The size, in MB, that Glance will start chunking image files and do +# a large object manifest in Swift. (integer value) +#swift_store_large_object_size=5120 + +# The amount of data written to a temporary disk buffer during the +# process of chunking the image file. (integer value) +#swift_store_large_object_chunk_size=200 + +# A boolean value that determines if we create the container if it +# does not exist. (boolean value) +#swift_store_create_container_on_put=false + +# If set to True, enables multi-tenant storage mode which causes +# Glance images to be stored in tenant specific Swift accounts. +# (boolean value) +#swift_store_multi_tenant=false + +# When set to 0, a single-tenant store will only use one container to +# store all images. When set to an integer value between 1 and 32, a +# single-tenant store will use multiple containers to store images, +# and this value will determine how many containers are created.Used +# only when swift_store_multi_tenant is disabled. The total number of +# containers that will be used is equal to 16^N, so if this config +# option is set to 2, then 16^2=256 containers will be used to store +# images. (integer value) +#swift_store_multiple_containers_seed=0 + +# A list of tenants that will be granted read/write access on all +# Swift containers created by Glance in multi-tenant mode. (list +# value) +#swift_store_admin_tenants = + +# If set to False, disables SSL layer compression of https swift +# requests. Setting to False may improve performance for images which +# are already in a compressed format, eg qcow2. (boolean value) +#swift_store_ssl_compression=true + +# The number of times a Swift download will be retried before the +# request fails. (integer value) +#swift_store_retry_get_count=0 + +# The reference to the default swift account/backing store parameters +# to use for adding new images. (string value) +#default_swift_reference=ref1 + +# Version of the authentication service to use. Valid versions are 2 +# and 3 for keystone and 1 (deprecated) for swauth and rackspace. +# (deprecated - use "auth_version" in swift_store_config_file) (string +# value) +#swift_store_auth_version=2 + +# The address where the Swift authentication service is listening. +# (deprecated - use "auth_address" in swift_store_config_file) (string +# value) +#swift_store_auth_address=<None> + +# The user to authenticate against the Swift authentication service +# (deprecated - use "user" in swift_store_config_file) (string value) +#swift_store_user=<None> + +# Auth key for the user authenticating against the Swift +# authentication service. (deprecated - use "key" in +# swift_store_config_file) (string value) +#swift_store_key=<None> + +# The config file that has the swift account(s)configs. (string value) +#swift_store_config_file=<None> + +# ESX/ESXi or vCenter Server target system. The server value can be an +# IP address or a DNS name. (string value) +#vmware_server_host=<None> + +# Username for authenticating with VMware ESX/VC server. (string +# value) +#vmware_server_username=<None> + +# Password for authenticating with VMware ESX/VC server. (string +# value) +#vmware_server_password=<None> + +# DEPRECATED. Inventory path to a datacenter. If the +# vmware_server_host specified is an ESX/ESXi, the +# vmware_datacenter_path is optional. If specified, it should be "ha- +# datacenter". This option is deprecated in favor of vmware_datastores +# and will be removed in the Liberty release. (string value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#vmware_datacenter_path=ha-datacenter + +# DEPRECATED. Datastore associated with the datacenter. This option is +# deprecated in favor of vmware_datastores and will be removed in the +# Liberty release. (string value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#vmware_datastore_name=<None> + +# Number of times VMware ESX/VC server API must be retried upon +# connection related issues. (integer value) +#vmware_api_retry_count=10 + +# The interval used for polling remote tasks invoked on VMware ESX/VC +# server. (integer value) +#vmware_task_poll_interval=5 + +# The name of the directory where the glance images will be stored in +# the VMware datastore. (string value) +#vmware_store_image_dir=/openstack_glance + +# Allow to perform insecure SSL requests to ESX/VC. (boolean value) +#vmware_api_insecure=false + +# A list of datastores where the image can be stored. This option may +# be specified multiple times for specifying multiple datastores. +# Either one of vmware_datastore_name or vmware_datastores is +# required. The datastore name should be specified after its +# datacenter path, separated by ":". An optional weight may be given +# after the datastore name, separated again by ":". Thus, the required +# format becomes <datacenter_path>:<datastore_name>:<optional_weight>. +# When adding an image, the datastore with highest weight will be +# selected, unless there is not enough free space available in cases +# where the image size is already known. If no weight is given, it is +# assumed to be zero and the directory will be considered for +# selection last. If multiple datastores have the same weight, then +# the one with the most free space available is selected. (multi +# valued) +#vmware_datastores = + +# The host where the S3 server is listening. (string value) +#s3_store_host=<None> + +# The S3 query token access key. (string value) +#s3_store_access_key=<None> + +# The S3 query token secret key. (string value) +#s3_store_secret_key=<None> + +# The S3 bucket to be used to store the Glance data. (string value) +#s3_store_bucket=<None> + +# The local directory where uploads will be staged before they are +# transferred into S3. (string value) +#s3_store_object_buffer_dir=<None> + +# A boolean to determine if the S3 bucket should be created on upload +# if it does not exist or if an error should be returned to the user. +# (boolean value) +#s3_store_create_bucket_on_put=false + +# The S3 calling format used to determine the bucket. Either subdomain +# or path can be used. (string value) +#s3_store_bucket_url_format=subdomain + +# What size, in MB, should S3 start chunking image files and do a +# multipart upload in S3. (integer value) +#s3_store_large_object_size=100 + +# What multipart upload part size, in MB, should S3 use when uploading +# parts. The size must be greater than or equal to 5M. (integer value) +#s3_store_large_object_chunk_size=10 + +# The number of thread pools to perform a multipart upload in S3. +# (integer value) +#s3_store_thread_pools=10 + +# Enable the use of a proxy. (boolean value) +#s3_store_enable_proxy=false + +# Address or hostname for the proxy server. (string value) +#s3_store_proxy_host=<None> + +# The port to use when connecting over a proxy. (integer value) +#s3_store_proxy_port=8080 + +# The username to connect to the proxy. (string value) +#s3_store_proxy_user=<None> + +# The password to use when connecting over a proxy. (string value) +#s3_store_proxy_password=<None> + +# Info to match when looking for cinder in the service catalog. Format +# is : separated values of the form: +# <service_type>:<service_name>:<endpoint_type> (string value) +#cinder_catalog_info=volume:cinder:publicURL + +# Override service catalog lookup with template for cinder endpoint +# e.g. http://localhost:8776/v1/%(project_id)s (string value) +#cinder_endpoint_template=<None> + +# Region name of this node (string value) +#os_region_name=<None> +os_region_name=RegionOne + +# Location of ca certificates file to use for cinder client requests. +# (string value) +#cinder_ca_certificates_file=<None> + +# Number of cinderclient retries on failed http calls (integer value) +#cinder_http_retries=3 + +# Allow to perform insecure SSL requests to cinder (boolean value) +#cinder_api_insecure=false + + +[image_format] + +# +# From glance.api +# + +# Supported values for the 'container_format' image attribute (list +# value) +# Deprecated group/name - [DEFAULT]/container_formats +#container_formats=ami,ari,aki,bare,ovf,ova + +# Supported values for the 'disk_format' image attribute (list value) +# Deprecated group/name - [DEFAULT]/disk_formats +#disk_formats=ami,ari,aki,vhd,vmdk,raw,qcow2,vdi,iso + + +[keystone_authtoken] + +# +# From keystonemiddleware.auth_token +# + +# Complete public Identity API endpoint. (string value) +#auth_uri=<None> +auth_uri=http://VARINET4ADDR:5000/v2.0 + +# API version of the admin Identity API endpoint. (string value) +#auth_version=<None> + +# Do not handle authorization requests within the middleware, but +# delegate the authorization decision to downstream WSGI components. +# (boolean value) +#delay_auth_decision=false + +# Request timeout value for communicating with Identity API server. +# (integer value) +#http_connect_timeout=<None> + +# How many times are we trying to reconnect when communicating with +# Identity API Server. (integer value) +#http_request_max_retries=3 + +# Env key for the swift cache. (string value) +#cache=<None> + +# Required if identity server requires client certificate (string +# value) +#certfile=<None> + +# Required if identity server requires client certificate (string +# value) +#keyfile=<None> + +# A PEM encoded Certificate Authority to use when verifying HTTPs +# connections. Defaults to system CAs. (string value) +#cafile=<None> + +# Verify HTTPS connections. (boolean value) +#insecure=false + +# The region in which the identity server can be found. (string value) +#region_name=<None> + +# Directory used to cache files related to PKI tokens. (string value) +#signing_dir=<None> + +# Optionally specify a list of memcached server(s) to use for caching. +# If left undefined, tokens will instead be cached in-process. (list +# value) +# Deprecated group/name - [DEFAULT]/memcache_servers +#memcached_servers=<None> + +# In order to prevent excessive effort spent validating tokens, the +# middleware caches previously-seen tokens for a configurable duration +# (in seconds). Set to -1 to disable caching completely. (integer +# value) +#token_cache_time=300 + +# Determines the frequency at which the list of revoked tokens is +# retrieved from the Identity service (in seconds). A high number of +# revocation events combined with a low cache duration may +# significantly reduce performance. (integer value) +#revocation_cache_time=10 + +# (Optional) If defined, indicate whether token data should be +# authenticated or authenticated and encrypted. Acceptable values are +# MAC or ENCRYPT. If MAC, token data is authenticated (with HMAC) in +# the cache. If ENCRYPT, token data is encrypted and authenticated in +# the cache. If the value is not one of these options or empty, +# auth_token will raise an exception on initialization. (string value) +#memcache_security_strategy=<None> + +# (Optional, mandatory if memcache_security_strategy is defined) This +# string is used for key derivation. (string value) +#memcache_secret_key=<None> + +# (Optional) Number of seconds memcached server is considered dead +# before it is tried again. (integer value) +#memcache_pool_dead_retry=300 + +# (Optional) Maximum total number of open connections to every +# memcached server. (integer value) +#memcache_pool_maxsize=10 + +# (Optional) Socket timeout in seconds for communicating with a +# memcached server. (integer value) +#memcache_pool_socket_timeout=3 + +# (Optional) Number of seconds a connection to memcached is held +# unused in the pool before it is closed. (integer value) +#memcache_pool_unused_timeout=60 + +# (Optional) Number of seconds that an operation will wait to get a +# memcached client connection from the pool. (integer value) +#memcache_pool_conn_get_timeout=10 + +# (Optional) Use the advanced (eventlet safe) memcached client pool. +# The advanced pool will only work under python 2.x. (boolean value) +#memcache_use_advanced_pool=false + +# (Optional) Indicate whether to set the X-Service-Catalog header. If +# False, middleware will not ask for service catalog on token +# validation and will not set the X-Service-Catalog header. (boolean +# value) +#include_service_catalog=true + +# Used to control the use and type of token binding. Can be set to: +# "disabled" to not check token binding. "permissive" (default) to +# validate binding information if the bind type is of a form known to +# the server and ignore it if not. "strict" like "permissive" but if +# the bind type is unknown the token will be rejected. "required" any +# form of token binding is needed to be allowed. Finally the name of a +# binding method that must be present in tokens. (string value) +#enforce_token_bind=permissive + +# If true, the revocation list will be checked for cached tokens. This +# requires that PKI tokens are configured on the identity server. +# (boolean value) +#check_revocations_for_cached=false + +# Hash algorithms to use for hashing PKI tokens. This may be a single +# algorithm or multiple. The algorithms are those supported by Python +# standard hashlib.new(). The hashes will be tried in the order given, +# so put the preferred one first for performance. The result of the +# first hash will be stored in the cache. This will typically be set +# to multiple values only while migrating from a less secure algorithm +# to a more secure one. Once all the old tokens are expired this +# option should be set to a single value for better performance. (list +# value) +#hash_algorithms=md5 + +# Prefix to prepend at the beginning of the path. Deprecated, use +# identity_uri. (string value) +#auth_admin_prefix = + +# Host providing the admin Identity API endpoint. Deprecated, use +# identity_uri. (string value) +#auth_host=127.0.0.1 + +# Port of the admin Identity API endpoint. Deprecated, use +# identity_uri. (integer value) +#auth_port=35357 + +# Protocol of the admin Identity API endpoint (http or https). +# Deprecated, use identity_uri. (string value) +#auth_protocol=http + +# Complete admin Identity API endpoint. This should specify the +# unversioned root endpoint e.g. https://localhost:35357/ (string +# value) +#identity_uri=<None> +identity_uri=http://VARINET4ADDR:35357 + +# This option is deprecated and may be removed in a future release. +# Single shared secret with the Keystone configuration used for +# bootstrapping a Keystone installation, or otherwise bypassing the +# normal authentication process. This option should not be used, use +# `admin_user` and `admin_password` instead. (string value) +#admin_token=<None> + +# Service username. (string value) +#admin_user=<None> +admin_user=glance + +# Service user password. (string value) +#admin_password=<None> +admin_password=qum5net + +# Service tenant name. (string value) +#admin_tenant_name=admin +admin_tenant_name=services + + +[matchmaker_redis] + +# +# From oslo.messaging +# + +# Host to locate redis. (string value) +#host=127.0.0.1 + +# Use this port to connect to redis host. (integer value) +#port=6379 + +# Password for Redis server (optional). (string value) +#password=<None> + + +[matchmaker_ring] + +# +# From oslo.messaging +# + +# Matchmaker ring file (JSON). (string value) +# Deprecated group/name - [DEFAULT]/matchmaker_ringfile +#ringfile=/etc/oslo/matchmaker_ring.json + + +[oslo_concurrency] + +# +# From oslo.concurrency +# + +# Enables or disables inter-process locks. (boolean value) +# Deprecated group/name - [DEFAULT]/disable_process_locking +#disable_process_locking=false + +# Directory to use for lock files. For security, the specified +# directory should only be writable by the user running the processes +# that need locking. Defaults to environment variable OSLO_LOCK_PATH. +# If external locks are used, a lock path must be set. (string value) +# Deprecated group/name - [DEFAULT]/lock_path +#lock_path=<None> + + +[oslo_messaging_amqp] + +# +# From oslo.messaging +# + +# address prefix used when sending to a specific server (string value) +# Deprecated group/name - [amqp1]/server_request_prefix +#server_request_prefix=exclusive + +# address prefix used when broadcasting to all servers (string value) +# Deprecated group/name - [amqp1]/broadcast_prefix +#broadcast_prefix=broadcast + +# address prefix when sending to any server in group (string value) +# Deprecated group/name - [amqp1]/group_request_prefix +#group_request_prefix=unicast + +# Name for the AMQP container (string value) +# Deprecated group/name - [amqp1]/container_name +#container_name=<None> + +# Timeout for inactive connections (in seconds) (integer value) +# Deprecated group/name - [amqp1]/idle_timeout +#idle_timeout=0 + +# Debug: dump AMQP frames to stdout (boolean value) +# Deprecated group/name - [amqp1]/trace +#trace=false + +# CA certificate PEM file to verify server certificate (string value) +# Deprecated group/name - [amqp1]/ssl_ca_file +#ssl_ca_file = + +# Identifying certificate PEM file to present to clients (string +# value) +# Deprecated group/name - [amqp1]/ssl_cert_file +#ssl_cert_file = + +# Private key PEM file used to sign cert_file certificate (string +# value) +# Deprecated group/name - [amqp1]/ssl_key_file +#ssl_key_file = + +# Password for decrypting ssl_key_file (if encrypted) (string value) +# Deprecated group/name - [amqp1]/ssl_key_password +#ssl_key_password=<None> + +# Accept clients using either SSL or plain TCP (boolean value) +# Deprecated group/name - [amqp1]/allow_insecure_clients +#allow_insecure_clients=false + + +[oslo_messaging_qpid] + +# +# From oslo.messaging +# + +# Use durable queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_durable_queues +# Deprecated group/name - [DEFAULT]/rabbit_durable_queues +#amqp_durable_queues=false + +# Auto-delete queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_auto_delete +#amqp_auto_delete=false + +# Send a single AMQP reply to call message. The current behaviour +# since oslo-incubator is to send two AMQP replies - first one with +# the payload, a second one to ensure the other have finish to send +# the payload. We are going to remove it in the N release, but we must +# keep backward compatible at the same time. This option provides such +# compatibility - it defaults to False in Liberty and can be turned on +# for early adopters with a new installations or for testing. Please +# note, that this option will be removed in the Mitaka release. +# (boolean value) +#send_single_reply=false + +# Qpid broker hostname. (string value) +# Deprecated group/name - [DEFAULT]/qpid_hostname +#qpid_hostname=localhost + +# Qpid broker port. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_port +#qpid_port=5672 + +# Qpid HA cluster host:port pairs. (list value) +# Deprecated group/name - [DEFAULT]/qpid_hosts +#qpid_hosts=$qpid_hostname:$qpid_port + +# Username for Qpid connection. (string value) +# Deprecated group/name - [DEFAULT]/qpid_username +#qpid_username = + +# Password for Qpid connection. (string value) +# Deprecated group/name - [DEFAULT]/qpid_password +#qpid_password = + +# Space separated list of SASL mechanisms to use for auth. (string +# value) +# Deprecated group/name - [DEFAULT]/qpid_sasl_mechanisms +#qpid_sasl_mechanisms = + +# Seconds between connection keepalive heartbeats. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_heartbeat +#qpid_heartbeat=60 + +# Transport to use, either 'tcp' or 'ssl'. (string value) +# Deprecated group/name - [DEFAULT]/qpid_protocol +#qpid_protocol=tcp + +# Whether to disable the Nagle algorithm. (boolean value) +# Deprecated group/name - [DEFAULT]/qpid_tcp_nodelay +#qpid_tcp_nodelay=true + +# The number of prefetched messages held by receiver. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_receiver_capacity +#qpid_receiver_capacity=1 + +# The qpid topology version to use. Version 1 is what was originally +# used by impl_qpid. Version 2 includes some backwards-incompatible +# changes that allow broker federation to work. Users should update +# to version 2 when they are able to take everything down, as it +# requires a clean break. (integer value) +# Deprecated group/name - [DEFAULT]/qpid_topology_version +#qpid_topology_version=1 + + +[oslo_messaging_rabbit] + +# +# From oslo.messaging +# + +# Use durable queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_durable_queues +# Deprecated group/name - [DEFAULT]/rabbit_durable_queues +#amqp_durable_queues=false +amqp_durable_queues=False + +# Auto-delete queues in AMQP. (boolean value) +# Deprecated group/name - [DEFAULT]/amqp_auto_delete +#amqp_auto_delete=false + +# Send a single AMQP reply to call message. The current behaviour +# since oslo-incubator is to send two AMQP replies - first one with +# the payload, a second one to ensure the other have finish to send +# the payload. We are going to remove it in the N release, but we must +# keep backward compatible at the same time. This option provides such +# compatibility - it defaults to False in Liberty and can be turned on +# for early adopters with a new installations or for testing. Please +# note, that this option will be removed in the Mitaka release. +# (boolean value) +#send_single_reply=false + +# SSL version to use (valid only if SSL enabled). Valid values are +# TLSv1 and SSLv23. SSLv2, SSLv3, TLSv1_1, and TLSv1_2 may be +# available on some distributions. (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_version +#kombu_ssl_version = + +# SSL key file (valid only if SSL enabled). (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_keyfile +#kombu_ssl_keyfile = + +# SSL cert file (valid only if SSL enabled). (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_certfile +#kombu_ssl_certfile = + +# SSL certification authority file (valid only if SSL enabled). +# (string value) +# Deprecated group/name - [DEFAULT]/kombu_ssl_ca_certs +#kombu_ssl_ca_certs = + +# How long to wait before reconnecting in response to an AMQP consumer +# cancel notification. (floating point value) +# Deprecated group/name - [DEFAULT]/kombu_reconnect_delay +#kombu_reconnect_delay=1.0 + +# How long to wait before considering a reconnect attempt to have +# failed. This value should not be longer than rpc_response_timeout. +# (integer value) +#kombu_reconnect_timeout=60 + +# The RabbitMQ broker address where a single node is used. (string +# value) +# Deprecated group/name - [DEFAULT]/rabbit_host +#rabbit_host=localhost +rabbit_host=VARINET4ADDR + +# The RabbitMQ broker port where a single node is used. (integer +# value) +# Deprecated group/name - [DEFAULT]/rabbit_port +#rabbit_port=5672 +rabbit_port=5672 + +# RabbitMQ HA cluster host:port pairs. (list value) +# Deprecated group/name - [DEFAULT]/rabbit_hosts +#rabbit_hosts=$rabbit_host:$rabbit_port +rabbit_hosts=VARINET4ADDR:5672 + +# Connect over SSL for RabbitMQ. (boolean value) +# Deprecated group/name - [DEFAULT]/rabbit_use_ssl +#rabbit_use_ssl=false +rabbit_use_ssl=False + +# The RabbitMQ userid. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_userid +#rabbit_userid=guest +rabbit_userid=guest + +# The RabbitMQ password. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_password +#rabbit_password=guest +rabbit_password=guest + +# The RabbitMQ login method. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_login_method +#rabbit_login_method=AMQPLAIN + +# The RabbitMQ virtual host. (string value) +# Deprecated group/name - [DEFAULT]/rabbit_virtual_host +#rabbit_virtual_host=/ +rabbit_virtual_host=/ + +# How frequently to retry connecting with RabbitMQ. (integer value) +#rabbit_retry_interval=1 + +# How long to backoff for between retries when connecting to RabbitMQ. +# (integer value) +# Deprecated group/name - [DEFAULT]/rabbit_retry_backoff +#rabbit_retry_backoff=2 + +# Maximum number of RabbitMQ connection retries. Default is 0 +# (infinite retry count). (integer value) +# Deprecated group/name - [DEFAULT]/rabbit_max_retries +#rabbit_max_retries=0 + +# Use HA queues in RabbitMQ (x-ha-policy: all). If you change this +# option, you must wipe the RabbitMQ database. (boolean value) +# Deprecated group/name - [DEFAULT]/rabbit_ha_queues +#rabbit_ha_queues=false +rabbit_ha_queues=False + +# Number of seconds after which the Rabbit broker is considered down +# if heartbeat's keep-alive fails (0 disable the heartbeat). +# EXPERIMENTAL (integer value) +#heartbeat_timeout_threshold=60 +heartbeat_timeout_threshold=0 + +# How often times during the heartbeat_timeout_threshold we check the +# heartbeat. (integer value) +#heartbeat_rate=2 +heartbeat_rate=2 + +# Deprecated, use rpc_backend=kombu+memory or rpc_backend=fake +# (boolean value) +# Deprecated group/name - [DEFAULT]/fake_rabbit +#fake_rabbit=false +rabbit_notification_exchange=glance +rabbit_notification_topic=notifications + + +[oslo_policy] + +# +# From oslo.policy +# + +# The JSON file that defines policies. (string value) +# Deprecated group/name - [DEFAULT]/policy_file +#policy_file=policy.json + +# Default rule. Enforced when a requested rule is not found. (string +# value) +# Deprecated group/name - [DEFAULT]/policy_default_rule +#policy_default_rule=default + +# Directories where policy configuration files are stored. They can be +# relative to any directory in the search path defined by the +# config_dir option, or absolute paths. The file defined by +# policy_file must exist for these directories to be searched. +# Missing or empty directories are ignored. (multi valued) +# Deprecated group/name - [DEFAULT]/policy_dirs +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#policy_dirs=policy.d + + +[paste_deploy] + +# +# From glance.api +# + +# Partial name of a pipeline in your paste configuration file with the +# service name removed. For example, if your paste section name is +# [pipeline:glance-api-keystone] use the value "keystone" (string +# value) +#flavor=<None> +flavor=keystone + +# Name of the paste configuration file. (string value) +#config_file=/usr/share/glance/glance-api-dist-paste.ini + + +[store_type_location_strategy] + +# +# From glance.api +# + +# The store names to use to get store preference order. The name must +# be registered by one of the stores defined by the 'stores' config +# option. This option will be applied when you using 'store_type' +# option as image location strategy defined by the 'location_strategy' +# config option. (list value) +#store_type_preference = + + +[task] + +# +# From glance.api +# + +# Time in hours for which a task lives after, either succeeding or +# failing (integer value) +# Deprecated group/name - [DEFAULT]/task_time_to_live +#task_time_to_live=48 + +# Specifies which task executor to be used to run the task scripts. +# (string value) +#task_executor=taskflow + +# Work dir for asynchronous task operations. The directory set here +# will be used to operate over images - normally before they are +# imported in the destination store. When providing work dir, make +# sure enough space is provided for concurrent tasks to run +# efficiently without running out of space. A rough estimation can be +# done by multiplying the number of `max_workers` - or the N of +# workers running - by an average image size (e.g 500MB). The image +# size estimation should be done based on the average size in your +# deployment. Note that depending on the tasks running you may need to +# multiply this number by some factor depending on what the task does. +# For example, you may want to double the available size if image +# conversion is enabled. All this being said, remember these are just +# estimations and you should do them based on the worst case scenario +# and be prepared to act in case they were wrong. (string value) +#work_dir=<None> + + +[taskflow_executor] + +# +# From glance.api +# + +# The mode in which the engine will run. Can be 'serial' or +# 'parallel'. (string value) +# Allowed values: serial, parallel +#engine_mode=parallel + +# The number of parallel activities executed at the same time by the +# engine. The value can be greater than one when the engine mode is +# 'parallel'. (integer value) +# Deprecated group/name - [task]/eventlet_executor_pool_size +#max_workers=10 diff --git a/qa/qa_scripts/openstack/files/kilo.template.conf b/qa/qa_scripts/openstack/files/kilo.template.conf new file mode 100644 index 000000000..35d359c89 --- /dev/null +++ b/qa/qa_scripts/openstack/files/kilo.template.conf @@ -0,0 +1,1077 @@ +[general] + +# Path to a public key to install on servers. If a usable key has not +# been installed on the remote servers, the user is prompted for a +# password and this key is installed so the password will not be +# required again. +CONFIG_SSH_KEY=/root/.ssh/id_rsa.pub + +# Default password to be used everywhere (overridden by passwords set +# for individual services or users). +CONFIG_DEFAULT_PASSWORD= + +# Specify 'y' to install MariaDB. ['y', 'n'] +CONFIG_MARIADB_INSTALL=y + +# Specify 'y' to install OpenStack Image Service (glance). ['y', 'n'] +CONFIG_GLANCE_INSTALL=y + +# Specify 'y' to install OpenStack Block Storage (cinder). ['y', 'n'] +CONFIG_CINDER_INSTALL=y + +# Specify 'y' to install OpenStack Compute (nova). ['y', 'n'] +CONFIG_NOVA_INSTALL=y + +# Specify 'y' to install OpenStack Networking (neutron); otherwise, +# Compute Networking (nova) will be used. ['y', 'n'] +CONFIG_NEUTRON_INSTALL=y + +# Specify 'y' to install OpenStack Dashboard (horizon). ['y', 'n'] +CONFIG_HORIZON_INSTALL=y + +# Specify 'y' to install OpenStack Object Storage (swift). ['y', 'n'] +CONFIG_SWIFT_INSTALL=y + +# Specify 'y' to install OpenStack Metering (ceilometer). ['y', 'n'] +CONFIG_CEILOMETER_INSTALL=y + +# Specify 'y' to install OpenStack Data Processing (sahara). In case +# of sahara installation packstack also installs heat.['y', 'n'] +CONFIG_SAHARA_INSTALL=n + +# Specify 'y' to install OpenStack Orchestration (heat). ['y', 'n'] +CONFIG_HEAT_INSTALL=n + +# Specify 'y' to install OpenStack Database (trove) ['y', 'n'] +CONFIG_TROVE_INSTALL=n + +# Specify 'y' to install OpenStack Bare Metal Provisioning (ironic). +# ['y', 'n'] +CONFIG_IRONIC_INSTALL=n + +# Specify 'y' to install the OpenStack Client packages (command-line +# tools). An admin "rc" file will also be installed. ['y', 'n'] +CONFIG_CLIENT_INSTALL=y + +# Comma-separated list of NTP servers. Leave plain if Packstack +# should not install ntpd on instances. +CONFIG_NTP_SERVERS=clock.redhat.com + +# Specify 'y' to install Nagios to monitor OpenStack hosts. Nagios +# provides additional tools for monitoring the OpenStack environment. +# ['n'] +CONFIG_NAGIOS_INSTALL=n + +# Comma-separated list of servers to be excluded from the +# installation. This is helpful if you are running Packstack a second +# time with the same answer file and do not want Packstack to +# overwrite these server's configurations. Leave empty if you do not +# need to exclude any servers. +EXCLUDE_SERVERS= + +# Specify 'y' if you want to run OpenStack services in debug mode; +# otherwise, specify 'n'. ['y', 'n'] +CONFIG_DEBUG_MODE=y + +# Server on which to install OpenStack services specific to the +# controller role (for example, API servers or dashboard). +CONFIG_CONTROLLER_HOST=VARINET4ADDR + +# List the servers on which to install the Compute service. +CONFIG_COMPUTE_HOSTS=VARINET4ADDR + +# List of servers on which to install the network service such as +# Compute networking (nova network) or OpenStack Networking (neutron). +CONFIG_NETWORK_HOSTS=VARINET4ADDR + +# Specify 'y' if you want to use VMware vCenter as hypervisor and +# storage; otherwise, specify 'n'. ['y', 'n'] +CONFIG_VMWARE_BACKEND=n + +# Specify 'y' if you want to use unsupported parameters. This should +# be used only if you know what you are doing. Issues caused by using +# unsupported options will not be fixed before the next major release. +# ['y', 'n'] +CONFIG_UNSUPPORTED=n + +# Specify 'y' if you want to use subnet addresses (in CIDR format) +# instead of interface names in following options: +# CONFIG_NOVA_COMPUTE_PRIVIF, CONFIG_NOVA_NETWORK_PRIVIF, +# CONFIG_NOVA_NETWORK_PUBIF, CONFIG_NEUTRON_OVS_BRIDGE_IFACES, +# CONFIG_NEUTRON_LB_INTERFACE_MAPPINGS, CONFIG_NEUTRON_OVS_TUNNEL_IF. +# This is useful for cases when interface names are not same on all +# installation hosts. +CONFIG_USE_SUBNETS=n + +# IP address of the VMware vCenter server. +CONFIG_VCENTER_HOST= + +# User name for VMware vCenter server authentication. +CONFIG_VCENTER_USER= + +# Password for VMware vCenter server authentication. +CONFIG_VCENTER_PASSWORD= + +# Comma separated list of names of the VMware vCenter clusters. Note: +# if multiple clusters are specified each one is mapped to one +# compute, otherwise all computes are mapped to same cluster. +CONFIG_VCENTER_CLUSTER_NAMES= + +# (Unsupported!) Server on which to install OpenStack services +# specific to storage servers such as Image or Block Storage services. +CONFIG_STORAGE_HOST=VARINET4ADDR + +# (Unsupported!) Server on which to install OpenStack services +# specific to OpenStack Data Processing (sahara). +CONFIG_SAHARA_HOST=VARINET4ADDR + +# Specify 'y' to enable the EPEL repository (Extra Packages for +# Enterprise Linux). ['y', 'n'] +CONFIG_USE_EPEL=n + +# Comma-separated list of URLs for any additional yum repositories, +# to use for installation. +CONFIG_REPO= + +# Specify 'y' to enable the RDO testing repository. ['y', 'n'] +CONFIG_ENABLE_RDO_TESTING=n + +# To subscribe each server with Red Hat Subscription Manager, include +# this with CONFIG_RH_PW. +CONFIG_RH_USER= + +# To subscribe each server to receive updates from a Satellite +# server, provide the URL of the Satellite server. You must also +# provide a user name (CONFIG_SATELLITE_USERNAME) and password +# (CONFIG_SATELLITE_PASSWORD) or an access key (CONFIG_SATELLITE_AKEY) +# for authentication. +CONFIG_SATELLITE_URL= + +# To subscribe each server with Red Hat Subscription Manager, include +# this with CONFIG_RH_USER. +CONFIG_RH_PW= + +# Specify 'y' to enable RHEL optional repositories. ['y', 'n'] +CONFIG_RH_OPTIONAL=y + +# HTTP proxy to use with Red Hat Subscription Manager. +CONFIG_RH_PROXY= + +# Port to use for Red Hat Subscription Manager's HTTP proxy. +CONFIG_RH_PROXY_PORT= + +# User name to use for Red Hat Subscription Manager's HTTP proxy. +CONFIG_RH_PROXY_USER= + +# Password to use for Red Hat Subscription Manager's HTTP proxy. +CONFIG_RH_PROXY_PW= + +# User name to authenticate with the RHN Satellite server; if you +# intend to use an access key for Satellite authentication, leave this +# blank. +CONFIG_SATELLITE_USER= + +# Password to authenticate with the RHN Satellite server; if you +# intend to use an access key for Satellite authentication, leave this +# blank. +CONFIG_SATELLITE_PW= + +# Access key for the Satellite server; if you intend to use a user +# name and password for Satellite authentication, leave this blank. +CONFIG_SATELLITE_AKEY= + +# Certificate path or URL of the certificate authority to verify that +# the connection with the Satellite server is secure. If you are not +# using Satellite in your deployment, leave this blank. +CONFIG_SATELLITE_CACERT= + +# Profile name that should be used as an identifier for the system in +# RHN Satellite (if required). +CONFIG_SATELLITE_PROFILE= + +# Comma-separated list of flags passed to the rhnreg_ks command. +# Valid flags are: novirtinfo, norhnsd, nopackages ['novirtinfo', +# 'norhnsd', 'nopackages'] +CONFIG_SATELLITE_FLAGS= + +# HTTP proxy to use when connecting to the RHN Satellite server (if +# required). +CONFIG_SATELLITE_PROXY= + +# User name to authenticate with the Satellite-server HTTP proxy. +CONFIG_SATELLITE_PROXY_USER= + +# User password to authenticate with the Satellite-server HTTP proxy. +CONFIG_SATELLITE_PROXY_PW= + +# Specify filepath for CA cert file. If CONFIG_SSL_CACERT_SELFSIGN is +# set to 'n' it has to be preexisting file. +CONFIG_SSL_CACERT_FILE=/etc/pki/tls/certs/selfcert.crt + +# Specify filepath for CA cert key file. If +# CONFIG_SSL_CACERT_SELFSIGN is set to 'n' it has to be preexisting +# file. +CONFIG_SSL_CACERT_KEY_FILE=/etc/pki/tls/private/selfkey.key + +# Enter the path to use to store generated SSL certificates in. +CONFIG_SSL_CERT_DIR=~/packstackca/ + +# Specify 'y' if you want Packstack to pregenerate the CA +# Certificate. +CONFIG_SSL_CACERT_SELFSIGN=y + +# Enter the selfsigned CAcert subject country. +CONFIG_SELFSIGN_CACERT_SUBJECT_C=-- + +# Enter the selfsigned CAcert subject state. +CONFIG_SELFSIGN_CACERT_SUBJECT_ST=State + +# Enter the selfsigned CAcert subject location. +CONFIG_SELFSIGN_CACERT_SUBJECT_L=City + +# Enter the selfsigned CAcert subject organization. +CONFIG_SELFSIGN_CACERT_SUBJECT_O=openstack + +# Enter the selfsigned CAcert subject organizational unit. +CONFIG_SELFSIGN_CACERT_SUBJECT_OU=packstack + +# Enter the selfsigned CAcert subject common name. +CONFIG_SELFSIGN_CACERT_SUBJECT_CN=VARHOSTNAME + +CONFIG_SELFSIGN_CACERT_SUBJECT_MAIL=admin@VARHOSTNAME + +# Service to be used as the AMQP broker. Allowed values are: qpid, +# rabbitmq ['qpid', 'rabbitmq'] +CONFIG_AMQP_BACKEND=rabbitmq + +# IP address of the server on which to install the AMQP service. +CONFIG_AMQP_HOST=VARINET4ADDR + +# Specify 'y' to enable SSL for the AMQP service. ['y', 'n'] +CONFIG_AMQP_ENABLE_SSL=n + +# Specify 'y' to enable authentication for the AMQP service. ['y', +# 'n'] +CONFIG_AMQP_ENABLE_AUTH=n + +# Password for the NSS certificate database of the AMQP service. +CONFIG_AMQP_NSS_CERTDB_PW=PW_PLACEHOLDER + +# User for AMQP authentication. +CONFIG_AMQP_AUTH_USER=amqp_user + +# Password for AMQP authentication. +CONFIG_AMQP_AUTH_PASSWORD=PW_PLACEHOLDER + +# IP address of the server on which to install MariaDB. If a MariaDB +# installation was not specified in CONFIG_MARIADB_INSTALL, specify +# the IP address of an existing database server (a MariaDB cluster can +# also be specified). +CONFIG_MARIADB_HOST=VARINET4ADDR + +# User name for the MariaDB administrative user. +CONFIG_MARIADB_USER=root + +# Password for the MariaDB administrative user. +CONFIG_MARIADB_PW=qum5net + +# Password to use for the Identity service (keystone) to access the +# database. +CONFIG_KEYSTONE_DB_PW=qum5net + +# Enter y if cron job for removing soft deleted DB rows should be +# created. +CONFIG_KEYSTONE_DB_PURGE_ENABLE=True + +# Default region name to use when creating tenants in the Identity +# service. +CONFIG_KEYSTONE_REGION=RegionOne + +# Token to use for the Identity service API. +CONFIG_KEYSTONE_ADMIN_TOKEN=9390caff845749c3ac74453eb4f384e2 + +# Email address for the Identity service 'admin' user. Defaults to +CONFIG_KEYSTONE_ADMIN_EMAIL=root@localhost + +# User name for the Identity service 'admin' user. Defaults to +# 'admin'. +CONFIG_KEYSTONE_ADMIN_USERNAME=admin + +# Password to use for the Identity service 'admin' user. +CONFIG_KEYSTONE_ADMIN_PW=qum5net + +# Password to use for the Identity service 'demo' user. +CONFIG_KEYSTONE_DEMO_PW=qum5net + +# Identity service API version string. ['v2.0', 'v3'] +CONFIG_KEYSTONE_API_VERSION=v2.0 + +# Identity service token format (UUID or PKI). The recommended format +# for new deployments is UUID. ['UUID', 'PKI'] +CONFIG_KEYSTONE_TOKEN_FORMAT=UUID + +# Name of service to use to run the Identity service (keystone or +# httpd). ['keystone', 'httpd'] +CONFIG_KEYSTONE_SERVICE_NAME=httpd + +# Type of Identity service backend (sql or ldap). ['sql', 'ldap'] +CONFIG_KEYSTONE_IDENTITY_BACKEND=sql + +# URL for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_URL=ldap://VARINET4ADDR + +# User DN for the Identity service LDAP backend. Used to bind to the +# LDAP server if the LDAP server does not allow anonymous +# authentication. +CONFIG_KEYSTONE_LDAP_USER_DN= + +# User DN password for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_USER_PASSWORD= + +# Base suffix for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_SUFFIX= + +# Query scope for the Identity service LDAP backend. Use 'one' for +# onelevel/singleLevel or 'sub' for subtree/wholeSubtree ('base' is +# not actually used by the Identity service and is therefore +# deprecated). ['base', 'one', 'sub'] +CONFIG_KEYSTONE_LDAP_QUERY_SCOPE=one + +# Query page size for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_PAGE_SIZE=-1 + +# User subtree for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_USER_SUBTREE= + +# User query filter for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_USER_FILTER= + +# User object class for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_USER_OBJECTCLASS= + +# User ID attribute for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_USER_ID_ATTRIBUTE= + +# User name attribute for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_USER_NAME_ATTRIBUTE= + +# User email address attribute for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_USER_MAIL_ATTRIBUTE= + +# User-enabled attribute for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_USER_ENABLED_ATTRIBUTE= + +# Bit mask integer applied to user-enabled attribute for the Identity +# service LDAP backend. Indicate the bit that the enabled value is +# stored in if the LDAP server represents "enabled" as a bit on an +# integer rather than a boolean. A value of "0" indicates the mask is +# not used (default). If this is not set to "0", the typical value is +# "2", typically used when +# "CONFIG_KEYSTONE_LDAP_USER_ENABLED_ATTRIBUTE = userAccountControl". +CONFIG_KEYSTONE_LDAP_USER_ENABLED_MASK=-1 + +# Value of enabled attribute which indicates user is enabled for the +# Identity service LDAP backend. This should match an appropriate +# integer value if the LDAP server uses non-boolean (bitmask) values +# to indicate whether a user is enabled or disabled. If this is not +# set as 'y', the typical value is "512". This is typically used when +# "CONFIG_KEYSTONE_LDAP_USER_ENABLED_ATTRIBUTE = userAccountControl". +CONFIG_KEYSTONE_LDAP_USER_ENABLED_DEFAULT=TRUE + +# Specify 'y' if users are disabled (not enabled) in the Identity +# service LDAP backend (inverts boolean-enalbed values). Some LDAP +# servers use a boolean lock attribute where "y" means an account is +# disabled. Setting this to 'y' allows these lock attributes to be +# used. This setting will have no effect if +# "CONFIG_KEYSTONE_LDAP_USER_ENABLED_MASK" is in use. ['n', 'y'] +CONFIG_KEYSTONE_LDAP_USER_ENABLED_INVERT=n + +# Comma-separated list of attributes stripped from LDAP user entry +# upon update. +CONFIG_KEYSTONE_LDAP_USER_ATTRIBUTE_IGNORE= + +# Identity service LDAP attribute mapped to default_project_id for +# users. +CONFIG_KEYSTONE_LDAP_USER_DEFAULT_PROJECT_ID_ATTRIBUTE= + +# Specify 'y' if you want to be able to create Identity service users +# through the Identity service interface; specify 'n' if you will +# create directly in the LDAP backend. ['n', 'y'] +CONFIG_KEYSTONE_LDAP_USER_ALLOW_CREATE=n + +# Specify 'y' if you want to be able to update Identity service users +# through the Identity service interface; specify 'n' if you will +# update directly in the LDAP backend. ['n', 'y'] +CONFIG_KEYSTONE_LDAP_USER_ALLOW_UPDATE=n + +# Specify 'y' if you want to be able to delete Identity service users +# through the Identity service interface; specify 'n' if you will +# delete directly in the LDAP backend. ['n', 'y'] +CONFIG_KEYSTONE_LDAP_USER_ALLOW_DELETE=n + +# Identity service LDAP attribute mapped to password. +CONFIG_KEYSTONE_LDAP_USER_PASS_ATTRIBUTE= + +# DN of the group entry to hold enabled LDAP users when using enabled +# emulation. +CONFIG_KEYSTONE_LDAP_USER_ENABLED_EMULATION_DN= + +# List of additional LDAP attributes for mapping additional attribute +# mappings for users. The attribute-mapping format is +# <ldap_attr>:<user_attr>, where ldap_attr is the attribute in the +# LDAP entry and user_attr is the Identity API attribute. +CONFIG_KEYSTONE_LDAP_USER_ADDITIONAL_ATTRIBUTE_MAPPING= + +# Group subtree for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_GROUP_SUBTREE= + +# Group query filter for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_GROUP_FILTER= + +# Group object class for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_GROUP_OBJECTCLASS= + +# Group ID attribute for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_GROUP_ID_ATTRIBUTE= + +# Group name attribute for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_GROUP_NAME_ATTRIBUTE= + +# Group member attribute for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_GROUP_MEMBER_ATTRIBUTE= + +# Group description attribute for the Identity service LDAP backend. +CONFIG_KEYSTONE_LDAP_GROUP_DESC_ATTRIBUTE= + +# Comma-separated list of attributes stripped from LDAP group entry +# upon update. +CONFIG_KEYSTONE_LDAP_GROUP_ATTRIBUTE_IGNORE= + +# Specify 'y' if you want to be able to create Identity service +# groups through the Identity service interface; specify 'n' if you +# will create directly in the LDAP backend. ['n', 'y'] +CONFIG_KEYSTONE_LDAP_GROUP_ALLOW_CREATE=n + +# Specify 'y' if you want to be able to update Identity service +# groups through the Identity service interface; specify 'n' if you +# will update directly in the LDAP backend. ['n', 'y'] +CONFIG_KEYSTONE_LDAP_GROUP_ALLOW_UPDATE=n + +# Specify 'y' if you want to be able to delete Identity service +# groups through the Identity service interface; specify 'n' if you +# will delete directly in the LDAP backend. ['n', 'y'] +CONFIG_KEYSTONE_LDAP_GROUP_ALLOW_DELETE=n + +# List of additional LDAP attributes used for mapping additional +# attribute mappings for groups. The attribute=mapping format is +# <ldap_attr>:<group_attr>, where ldap_attr is the attribute in the +# LDAP entry and group_attr is the Identity API attribute. +CONFIG_KEYSTONE_LDAP_GROUP_ADDITIONAL_ATTRIBUTE_MAPPING= + +# Specify 'y' if the Identity service LDAP backend should use TLS. +# ['n', 'y'] +CONFIG_KEYSTONE_LDAP_USE_TLS=n + +# CA certificate directory for Identity service LDAP backend (if TLS +# is used). +CONFIG_KEYSTONE_LDAP_TLS_CACERTDIR= + +# CA certificate file for Identity service LDAP backend (if TLS is +# used). +CONFIG_KEYSTONE_LDAP_TLS_CACERTFILE= + +# Certificate-checking strictness level for Identity service LDAP +# backend; valid options are: never, allow, demand. ['never', 'allow', +# 'demand'] +CONFIG_KEYSTONE_LDAP_TLS_REQ_CERT=demand + +# Password to use for the Image service (glance) to access the +# database. +CONFIG_GLANCE_DB_PW=qum5net + +# Password to use for the Image service to authenticate with the +# Identity service. +CONFIG_GLANCE_KS_PW=qum5net + +# Storage backend for the Image service (controls how the Image +# service stores disk images). Valid options are: file or swift +# (Object Storage). The Object Storage service must be enabled to use +# it as a working backend; otherwise, Packstack falls back to 'file'. +# ['file', 'swift'] +CONFIG_GLANCE_BACKEND=file + +# Password to use for the Block Storage service (cinder) to access +# the database. +CONFIG_CINDER_DB_PW=qum5net + +# Enter y if cron job for removing soft deleted DB rows should be +# created. +CONFIG_CINDER_DB_PURGE_ENABLE=True + +# Password to use for the Block Storage service to authenticate with +# the Identity service. +CONFIG_CINDER_KS_PW=qum5net + +# Storage backend to use for the Block Storage service; valid options +# are: lvm, gluster, nfs, vmdk, netapp. ['lvm', 'gluster', 'nfs', +# 'vmdk', 'netapp'] +CONFIG_CINDER_BACKEND=lvm + +# Specify 'y' to create the Block Storage volumes group. That is, +# Packstack creates a raw disk image in /var/lib/cinder, and mounts it +# using a loopback device. This should only be used for testing on a +# proof-of-concept installation of the Block Storage service (a file- +# backed volume group is not suitable for production usage). ['y', +# 'n'] +CONFIG_CINDER_VOLUMES_CREATE=y + +# Size of Block Storage volumes group. Actual volume size will be +# extended with 3% more space for VG metadata. Remember that the size +# of the volume group will restrict the amount of disk space that you +# can expose to Compute instances, and that the specified amount must +# be available on the device used for /var/lib/cinder. +CONFIG_CINDER_VOLUMES_SIZE=20G + +# A single or comma-separated list of Red Hat Storage (gluster) +# volume shares to mount. Example: 'ip-address:/vol-name', 'domain +# :/vol-name' +CONFIG_CINDER_GLUSTER_MOUNTS= + +# A single or comma-separated list of NFS exports to mount. Example: +# 'ip-address:/export-name' +CONFIG_CINDER_NFS_MOUNTS= + +# Administrative user account name used to access the NetApp storage +# system or proxy server. +CONFIG_CINDER_NETAPP_LOGIN= + +# Password for the NetApp administrative user account specified in +# the CONFIG_CINDER_NETAPP_LOGIN parameter. +CONFIG_CINDER_NETAPP_PASSWORD= + +# Hostname (or IP address) for the NetApp storage system or proxy +# server. +CONFIG_CINDER_NETAPP_HOSTNAME= + +# The TCP port to use for communication with the storage system or +# proxy. If not specified, Data ONTAP drivers will use 80 for HTTP and +# 443 for HTTPS; E-Series will use 8080 for HTTP and 8443 for HTTPS. +# Defaults to 80. +CONFIG_CINDER_NETAPP_SERVER_PORT=80 + +# Storage family type used on the NetApp storage system; valid +# options are ontap_7mode for using Data ONTAP operating in 7-Mode, +# ontap_cluster for using clustered Data ONTAP, or E-Series for NetApp +# E-Series. Defaults to ontap_cluster. ['ontap_7mode', +# 'ontap_cluster', 'eseries'] +CONFIG_CINDER_NETAPP_STORAGE_FAMILY=ontap_cluster + +# The transport protocol used when communicating with the NetApp +# storage system or proxy server. Valid values are http or https. +# Defaults to 'http'. ['http', 'https'] +CONFIG_CINDER_NETAPP_TRANSPORT_TYPE=http + +# Storage protocol to be used on the data path with the NetApp +# storage system; valid options are iscsi, fc, nfs. Defaults to nfs. +# ['iscsi', 'fc', 'nfs'] +CONFIG_CINDER_NETAPP_STORAGE_PROTOCOL=nfs + +# Quantity to be multiplied by the requested volume size to ensure +# enough space is available on the virtual storage server (Vserver) to +# fulfill the volume creation request. Defaults to 1.0. +CONFIG_CINDER_NETAPP_SIZE_MULTIPLIER=1.0 + +# Time period (in minutes) that is allowed to elapse after the image +# is last accessed, before it is deleted from the NFS image cache. +# When a cache-cleaning cycle begins, images in the cache that have +# not been accessed in the last M minutes, where M is the value of +# this parameter, are deleted from the cache to create free space on +# the NFS share. Defaults to 720. +CONFIG_CINDER_NETAPP_EXPIRY_THRES_MINUTES=720 + +# If the percentage of available space for an NFS share has dropped +# below the value specified by this parameter, the NFS image cache is +# cleaned. Defaults to 20. +CONFIG_CINDER_NETAPP_THRES_AVL_SIZE_PERC_START=20 + +# When the percentage of available space on an NFS share has reached +# the percentage specified by this parameter, the driver stops +# clearing files from the NFS image cache that have not been accessed +# in the last M minutes, where M is the value of the +# CONFIG_CINDER_NETAPP_EXPIRY_THRES_MINUTES parameter. Defaults to 60. +CONFIG_CINDER_NETAPP_THRES_AVL_SIZE_PERC_STOP=60 + +# Single or comma-separated list of NetApp NFS shares for Block +# Storage to use. Format: ip-address:/export-name. Defaults to ''. +CONFIG_CINDER_NETAPP_NFS_SHARES= + +# File with the list of available NFS shares. Defaults to +# '/etc/cinder/shares.conf'. +CONFIG_CINDER_NETAPP_NFS_SHARES_CONFIG=/etc/cinder/shares.conf + +# This parameter is only utilized when the storage protocol is +# configured to use iSCSI or FC. This parameter is used to restrict +# provisioning to the specified controller volumes. Specify the value +# of this parameter to be a comma separated list of NetApp controller +# volume names to be used for provisioning. Defaults to ''. +CONFIG_CINDER_NETAPP_VOLUME_LIST= + +# The vFiler unit on which provisioning of block storage volumes will +# be done. This parameter is only used by the driver when connecting +# to an instance with a storage family of Data ONTAP operating in +# 7-Mode Only use this parameter when utilizing the MultiStore feature +# on the NetApp storage system. Defaults to ''. +CONFIG_CINDER_NETAPP_VFILER= + +# The name of the config.conf stanza for a Data ONTAP (7-mode) HA +# partner. This option is only used by the driver when connecting to +# an instance with a storage family of Data ONTAP operating in 7-Mode, +# and it is required if the storage protocol selected is FC. Defaults +# to ''. +CONFIG_CINDER_NETAPP_PARTNER_BACKEND_NAME= + +# This option specifies the virtual storage server (Vserver) name on +# the storage cluster on which provisioning of block storage volumes +# should occur. Defaults to ''. +CONFIG_CINDER_NETAPP_VSERVER= + +# Restricts provisioning to the specified controllers. Value must be +# a comma-separated list of controller hostnames or IP addresses to be +# used for provisioning. This option is only utilized when the storage +# family is configured to use E-Series. Defaults to ''. +CONFIG_CINDER_NETAPP_CONTROLLER_IPS= + +# Password for the NetApp E-Series storage array. Defaults to ''. +CONFIG_CINDER_NETAPP_SA_PASSWORD= + +# This option is used to define how the controllers in the E-Series +# storage array will work with the particular operating system on the +# hosts that are connected to it. Defaults to 'linux_dm_mp' +CONFIG_CINDER_NETAPP_ESERIES_HOST_TYPE=linux_dm_mp + +# Path to the NetApp E-Series proxy application on a proxy server. +# The value is combined with the value of the +# CONFIG_CINDER_NETAPP_TRANSPORT_TYPE, CONFIG_CINDER_NETAPP_HOSTNAME, +# and CONFIG_CINDER_NETAPP_HOSTNAME options to create the URL used by +# the driver to connect to the proxy application. Defaults to +# '/devmgr/v2'. +CONFIG_CINDER_NETAPP_WEBSERVICE_PATH=/devmgr/v2 + +# Restricts provisioning to the specified storage pools. Only dynamic +# disk pools are currently supported. The value must be a comma- +# separated list of disk pool names to be used for provisioning. +# Defaults to ''. +CONFIG_CINDER_NETAPP_STORAGE_POOLS= + +# Password to use for OpenStack Bare Metal Provisioning (ironic) to +# access the database. +CONFIG_IRONIC_DB_PW=PW_PLACEHOLDER + +# Password to use for OpenStack Bare Metal Provisioning to +# authenticate with the Identity service. +CONFIG_IRONIC_KS_PW=PW_PLACEHOLDER + +# Enter y if cron job for removing soft deleted DB rows should be +# created. +CONFIG_NOVA_DB_PURGE_ENABLE=True + +# Password to use for the Compute service (nova) to access the +# database. +CONFIG_NOVA_DB_PW=qum5net + +# Password to use for the Compute service to authenticate with the +# Identity service. +CONFIG_NOVA_KS_PW=qum5net + +# Overcommitment ratio for virtual to physical CPUs. Specify 1.0 to +# disable CPU overcommitment. +CONFIG_NOVA_SCHED_CPU_ALLOC_RATIO=16.0 + +# Overcommitment ratio for virtual to physical RAM. Specify 1.0 to +# disable RAM overcommitment. +CONFIG_NOVA_SCHED_RAM_ALLOC_RATIO=1.5 + +# Protocol used for instance migration. Valid options are: tcp and +# ssh. Note that by default, the Compute user is created with the +# /sbin/nologin shell so that the SSH protocol will not work. To make +# the SSH protocol work, you must configure the Compute user on +# compute hosts manually. ['tcp', 'ssh'] +CONFIG_NOVA_COMPUTE_MIGRATE_PROTOCOL=tcp + +# Manager that runs the Compute service. +CONFIG_NOVA_COMPUTE_MANAGER=nova.compute.manager.ComputeManager + +# PEM encoded certificate to be used for ssl on the https server, +# leave blank if one should be generated, this certificate should not +# require a passphrase. If CONFIG_HORIZON_SSL is set to 'n' this +# parameter is ignored. +CONFIG_VNC_SSL_CERT= + +# SSL keyfile corresponding to the certificate if one was entered. If +# CONFIG_HORIZON_SSL is set to 'n' this parameter is ignored. +CONFIG_VNC_SSL_KEY= + +# Enter the PCI passthrough array of hash in JSON style for +# controller eg. [{"vendor_id":"1234", "product_id":"5678", +# "name":"default"}, {...}] +CONFIG_NOVA_PCI_ALIAS= + +# Enter the PCI passthrough whitelist array of hash in JSON style for +# controller eg. [{"vendor_id":"1234", "product_id":"5678", +# "name':"default"}, {...}] +CONFIG_NOVA_PCI_PASSTHROUGH_WHITELIST= + +# Private interface for flat DHCP on the Compute servers. +CONFIG_NOVA_COMPUTE_PRIVIF= + +# Compute Network Manager. ['^nova\.network\.manager\.\w+Manager$'] +CONFIG_NOVA_NETWORK_MANAGER=nova.network.manager.FlatDHCPManager + +# Public interface on the Compute network server. +CONFIG_NOVA_NETWORK_PUBIF=eth0 + +# Private interface for flat DHCP on the Compute network server. +CONFIG_NOVA_NETWORK_PRIVIF= + +# IP Range for flat DHCP. ['^[\:\.\da-fA-f]+(\/\d+){0,1}$'] +CONFIG_NOVA_NETWORK_FIXEDRANGE=192.168.32.0/22 + +# IP Range for floating IP addresses. ['^[\:\.\da- +# fA-f]+(\/\d+){0,1}$'] +CONFIG_NOVA_NETWORK_FLOATRANGE=10.3.4.0/22 + +# Specify 'y' to automatically assign a floating IP to new instances. +# ['y', 'n'] +CONFIG_NOVA_NETWORK_AUTOASSIGNFLOATINGIP=n + +# First VLAN for private networks (Compute networking). +CONFIG_NOVA_NETWORK_VLAN_START=100 + +# Number of networks to support (Compute networking). +CONFIG_NOVA_NETWORK_NUMBER=1 + +# Number of addresses in each private subnet (Compute networking). +CONFIG_NOVA_NETWORK_SIZE=255 + +# Password to use for OpenStack Networking (neutron) to authenticate +# with the Identity service. +CONFIG_NEUTRON_KS_PW=qum5net + +# The password to use for OpenStack Networking to access the +# database. +CONFIG_NEUTRON_DB_PW=qum5net + +# The name of the Open vSwitch bridge (or empty for linuxbridge) for +# the OpenStack Networking L3 agent to use for external traffic. +# Specify 'provider' if you intend to use a provider network to handle +# external traffic. +CONFIG_NEUTRON_L3_EXT_BRIDGE=br-ex + +# Password for the OpenStack Networking metadata agent. +CONFIG_NEUTRON_METADATA_PW=qum5net + +# Specify 'y' to install OpenStack Networking's Load-Balancing- +# as-a-Service (LBaaS). ['y', 'n'] +CONFIG_LBAAS_INSTALL=n + +# Specify 'y' to install OpenStack Networking's L3 Metering agent +# ['y', 'n'] +CONFIG_NEUTRON_METERING_AGENT_INSTALL=n + +# Specify 'y' to configure OpenStack Networking's Firewall- +# as-a-Service (FWaaS). ['y', 'n'] +CONFIG_NEUTRON_FWAAS=n + +# Specify 'y' to configure OpenStack Networking's VPN-as-a-Service +# (VPNaaS). ['y', 'n'] +CONFIG_NEUTRON_VPNAAS=n + +# Comma-separated list of network-type driver entry points to be +# loaded from the neutron.ml2.type_drivers namespace. ['local', +# 'flat', 'vlan', 'gre', 'vxlan'] +CONFIG_NEUTRON_ML2_TYPE_DRIVERS=vxlan + +# Comma-separated, ordered list of network types to allocate as +# tenant networks. The 'local' value is only useful for single-box +# testing and provides no connectivity between hosts. ['local', +# 'vlan', 'gre', 'vxlan'] +CONFIG_NEUTRON_ML2_TENANT_NETWORK_TYPES=vxlan + +# Comma-separated ordered list of networking mechanism driver entry +# points to be loaded from the neutron.ml2.mechanism_drivers +# namespace. ['logger', 'test', 'linuxbridge', 'openvswitch', +# 'hyperv', 'ncs', 'arista', 'cisco_nexus', 'mlnx', 'l2population', +# 'sriovnicswitch'] +CONFIG_NEUTRON_ML2_MECHANISM_DRIVERS=openvswitch + +# Comma-separated list of physical_network names with which flat +# networks can be created. Use * to allow flat networks with arbitrary +# physical_network names. +CONFIG_NEUTRON_ML2_FLAT_NETWORKS=* + +# Comma-separated list of <physical_network>:<vlan_min>:<vlan_max> or +# <physical_network> specifying physical_network names usable for VLAN +# provider and tenant networks, as well as ranges of VLAN tags on each +# available for allocation to tenant networks. +CONFIG_NEUTRON_ML2_VLAN_RANGES= + +# Comma-separated list of <tun_min>:<tun_max> tuples enumerating +# ranges of GRE tunnel IDs that are available for tenant-network +# allocation. A tuple must be an array with tun_max +1 - tun_min > +# 1000000. +CONFIG_NEUTRON_ML2_TUNNEL_ID_RANGES= + +# Comma-separated list of addresses for VXLAN multicast group. If +# left empty, disables VXLAN from sending allocate broadcast traffic +# (disables multicast VXLAN mode). Should be a Multicast IP (v4 or v6) +# address. +CONFIG_NEUTRON_ML2_VXLAN_GROUP= + +# Comma-separated list of <vni_min>:<vni_max> tuples enumerating +# ranges of VXLAN VNI IDs that are available for tenant network +# allocation. Minimum value is 0 and maximum value is 16777215. +CONFIG_NEUTRON_ML2_VNI_RANGES=10:100 + +# Name of the L2 agent to be used with OpenStack Networking. +# ['linuxbridge', 'openvswitch'] +CONFIG_NEUTRON_L2_AGENT=openvswitch + +# Comma separated list of supported PCI vendor devices defined by +# vendor_id:product_id according to the PCI ID Repository. +CONFIG_NEUTRON_ML2_SUPPORTED_PCI_VENDOR_DEVS=['15b3:1004', '8086:10ca'] + +# Specify 'y' if the sriov agent is required +CONFIG_NEUTRON_ML2_SRIOV_AGENT_REQUIRED=n + +# Comma-separated list of interface mappings for the OpenStack +# Networking ML2 SRIOV agent. Each tuple in the list must be in the +# format <physical_network>:<net_interface>. Example: +# physnet1:eth1,physnet2:eth2,physnet3:eth3. +CONFIG_NEUTRON_ML2_SRIOV_INTERFACE_MAPPINGS= + +# Comma-separated list of interface mappings for the OpenStack +# Networking linuxbridge plugin. Each tuple in the list must be in the +# format <physical_network>:<net_interface>. Example: +# physnet1:eth1,physnet2:eth2,physnet3:eth3. +CONFIG_NEUTRON_LB_INTERFACE_MAPPINGS= + +# Comma-separated list of bridge mappings for the OpenStack +# Networking Open vSwitch plugin. Each tuple in the list must be in +# the format <physical_network>:<ovs_bridge>. Example: physnet1:br- +# eth1,physnet2:br-eth2,physnet3:br-eth3 +CONFIG_NEUTRON_OVS_BRIDGE_MAPPINGS= + +# Comma-separated list of colon-separated Open vSwitch +# <bridge>:<interface> pairs. The interface will be added to the +# associated bridge. If you desire the bridge to be persistent a value +# must be added to this directive, also +# CONFIG_NEUTRON_OVS_BRIDGE_MAPPINGS must be set in order to create +# the proper port. This can be achieved from the command line by +# issuing the following command: packstack --allinone --os-neutron- +# ovs-bridge-mappings=ext-net:br-ex --os-neutron-ovs-bridge-interfaces +# =br-ex:eth0 +CONFIG_NEUTRON_OVS_BRIDGE_IFACES= + +# Interface for the Open vSwitch tunnel. Packstack overrides the IP +# address used for tunnels on this hypervisor to the IP found on the +# specified interface (for example, eth1). +CONFIG_NEUTRON_OVS_TUNNEL_IF= + +# VXLAN UDP port. +CONFIG_NEUTRON_OVS_VXLAN_UDP_PORT=4789 + +# Specify 'y' to set up Horizon communication over https. ['y', 'n'] +CONFIG_HORIZON_SSL=n + +# Secret key to use for Horizon Secret Encryption Key. +CONFIG_HORIZON_SECRET_KEY=e2ba54f295f84d0c8d645de8e36fcc33 + +# PEM-encoded certificate to be used for SSL connections on the https +# server. To generate a certificate, leave blank. +CONFIG_HORIZON_SSL_CERT= + +# SSL keyfile corresponding to the certificate if one was specified. +# The certificate should not require a passphrase. +CONFIG_HORIZON_SSL_KEY= + +CONFIG_HORIZON_SSL_CACERT= + +# Password to use for the Object Storage service to authenticate with +# the Identity service. +CONFIG_SWIFT_KS_PW=qum5net + +# Comma-separated list of devices to use as storage device for Object +# Storage. Each entry must take the format /path/to/dev (for example, +# specifying /dev/vdb installs /dev/vdb as the Object Storage storage +# device; Packstack does not create the filesystem, you must do this +# first). If left empty, Packstack creates a loopback device for test +# setup. +CONFIG_SWIFT_STORAGES= + +# Number of Object Storage storage zones; this number MUST be no +# larger than the number of configured storage devices. +CONFIG_SWIFT_STORAGE_ZONES=1 + +# Number of Object Storage storage replicas; this number MUST be no +# larger than the number of configured storage zones. +CONFIG_SWIFT_STORAGE_REPLICAS=1 + +# File system type for storage nodes. ['xfs', 'ext4'] +CONFIG_SWIFT_STORAGE_FSTYPE=ext4 + +# Custom seed number to use for swift_hash_path_suffix in +# /etc/swift/swift.conf. If you do not provide a value, a seed number +# is automatically generated. +CONFIG_SWIFT_HASH=54760d6b88814b53 + +# Size of the Object Storage loopback file storage device. +CONFIG_SWIFT_STORAGE_SIZE=2G + +# Password used by Orchestration service user to authenticate against +# the database. +CONFIG_HEAT_DB_PW=PW_PLACEHOLDER + +# Encryption key to use for authentication in the Orchestration +# database (16, 24, or 32 chars). +CONFIG_HEAT_AUTH_ENC_KEY=2e06ca7c4aa3400c + +# Password to use for the Orchestration service to authenticate with +# the Identity service. +CONFIG_HEAT_KS_PW=PW_PLACEHOLDER + +# Specify 'y' to install the Orchestration CloudWatch API. ['y', 'n'] +CONFIG_HEAT_CLOUDWATCH_INSTALL=n + +# Specify 'y' to install the Orchestration CloudFormation API. ['y', +# 'n'] +CONFIG_HEAT_CFN_INSTALL=n + +# Name of the Identity domain for Orchestration. +CONFIG_HEAT_DOMAIN=heat + +# Name of the Identity domain administrative user for Orchestration. +CONFIG_HEAT_DOMAIN_ADMIN=heat_admin + +# Password for the Identity domain administrative user for +# Orchestration. +CONFIG_HEAT_DOMAIN_PASSWORD=PW_PLACEHOLDER + +# Specify 'y' to provision for demo usage and testing. ['y', 'n'] +CONFIG_PROVISION_DEMO=y + +# Specify 'y' to configure the OpenStack Integration Test Suite +# (tempest) for testing. The test suite requires OpenStack Networking +# to be installed. ['y', 'n'] +CONFIG_PROVISION_TEMPEST=n + +# CIDR network address for the floating IP subnet. +CONFIG_PROVISION_DEMO_FLOATRANGE=172.24.4.224/28 + +# The name to be assigned to the demo image in Glance (default +# "cirros"). +CONFIG_PROVISION_IMAGE_NAME=cirros + +# A URL or local file location for an image to download and provision +# in Glance (defaults to a URL for a recent "cirros" image). +CONFIG_PROVISION_IMAGE_URL=http://download.cirros-cloud.net/0.3.3/cirros-0.3.3-x86_64-disk.img + +# Format for the demo image (default "qcow2"). +CONFIG_PROVISION_IMAGE_FORMAT=qcow2 + +# User to use when connecting to instances booted from the demo +# image. +CONFIG_PROVISION_IMAGE_SSH_USER=cirros + +# Name of the Integration Test Suite provisioning user. If you do not +# provide a user name, Tempest is configured in a standalone mode. +CONFIG_PROVISION_TEMPEST_USER= + +# Password to use for the Integration Test Suite provisioning user. +CONFIG_PROVISION_TEMPEST_USER_PW=PW_PLACEHOLDER + +# CIDR network address for the floating IP subnet. +CONFIG_PROVISION_TEMPEST_FLOATRANGE=172.24.4.224/28 + +# URI of the Integration Test Suite git repository. +CONFIG_PROVISION_TEMPEST_REPO_URI=https://github.com/openstack/tempest.git + +# Revision (branch) of the Integration Test Suite git repository. +CONFIG_PROVISION_TEMPEST_REPO_REVISION=master + +# Specify 'y' to configure the Open vSwitch external bridge for an +# all-in-one deployment (the L3 external bridge acts as the gateway +# for virtual machines). ['y', 'n'] +CONFIG_PROVISION_OVS_BRIDGE=y + +# Password to use for OpenStack Data Processing (sahara) to access +# the database. +CONFIG_SAHARA_DB_PW=PW_PLACEHOLDER + +# Password to use for OpenStack Data Processing to authenticate with +# the Identity service. +CONFIG_SAHARA_KS_PW=PW_PLACEHOLDER + +# Secret key for signing Telemetry service (ceilometer) messages. +CONFIG_CEILOMETER_SECRET=d1cd21accf764049 + +# Password to use for Telemetry to authenticate with the Identity +# service. +CONFIG_CEILOMETER_KS_PW=qum5net + +# Backend driver for Telemetry's group membership coordination. +# ['redis', 'none'] +CONFIG_CEILOMETER_COORDINATION_BACKEND=redis + +# IP address of the server on which to install MongoDB. +CONFIG_MONGODB_HOST=VARINET4ADDR + +# IP address of the server on which to install the Redis master +# server. +CONFIG_REDIS_MASTER_HOST=VARINET4ADDR + +# Port on which the Redis server(s) listens. +CONFIG_REDIS_PORT=6379 + +# Specify 'y' to have Redis try to use HA. ['y', 'n'] +CONFIG_REDIS_HA=n + +# Hosts on which to install Redis slaves. +CONFIG_REDIS_SLAVE_HOSTS= + +# Hosts on which to install Redis sentinel servers. +CONFIG_REDIS_SENTINEL_HOSTS= + +# Host to configure as the Redis coordination sentinel. +CONFIG_REDIS_SENTINEL_CONTACT_HOST= + +# Port on which Redis sentinel servers listen. +CONFIG_REDIS_SENTINEL_PORT=26379 + +# Quorum value for Redis sentinel servers. +CONFIG_REDIS_SENTINEL_QUORUM=2 + +# Name of the master server watched by the Redis sentinel. ['[a-z]+'] +CONFIG_REDIS_MASTER_NAME=mymaster + +# Password to use for OpenStack Database-as-a-Service (trove) to +# access the database. +CONFIG_TROVE_DB_PW=PW_PLACEHOLDER + +# Password to use for OpenStack Database-as-a-Service to authenticate +# with the Identity service. +CONFIG_TROVE_KS_PW=PW_PLACEHOLDER + +# User name to use when OpenStack Database-as-a-Service connects to +# the Compute service. +CONFIG_TROVE_NOVA_USER=trove + +# Tenant to use when OpenStack Database-as-a-Service connects to the +# Compute service. +CONFIG_TROVE_NOVA_TENANT=services + +# Password to use when OpenStack Database-as-a-Service connects to +# the Compute service. +CONFIG_TROVE_NOVA_PW=PW_PLACEHOLDER + +# Password of the nagiosadmin user on the Nagios server. +CONFIG_NAGIOS_PW=PW_PLACEHOLDER diff --git a/qa/qa_scripts/openstack/files/nova.template.conf b/qa/qa_scripts/openstack/files/nova.template.conf new file mode 100644 index 000000000..c63c8648f --- /dev/null +++ b/qa/qa_scripts/openstack/files/nova.template.conf @@ -0,0 +1,3698 @@ +[DEFAULT] + +# +# From nova +# + +# Number of times to retry live-migration before failing. If == -1, try until +# out of hosts. If == 0, only try once, no retries. (integer value) +#migrate_max_retries=-1 + +# The topic console auth proxy nodes listen on (string value) +#consoleauth_topic=consoleauth + +# The driver to use for database access (string value) +#db_driver=nova.db + +# Backend to use for IPv6 generation (string value) +#ipv6_backend=rfc2462 + +# The driver for servicegroup service (valid options are: db, zk, mc) (string +# value) +#servicegroup_driver=db + +# The availability_zone to show internal services under (string value) +#internal_service_availability_zone=internal +internal_service_availability_zone=internal + +# Default compute node availability_zone (string value) +#default_availability_zone=nova +default_availability_zone=nova + +# The topic cert nodes listen on (string value) +#cert_topic=cert + +# Image ID used when starting up a cloudpipe vpn server (string value) +#vpn_image_id=0 + +# Flavor for vpn instances (string value) +#vpn_flavor=m1.tiny + +# Template for cloudpipe instance boot script (string value) +#boot_script_template=$pybasedir/nova/cloudpipe/bootscript.template + +# Network to push into openvpn config (string value) +#dmz_net=10.0.0.0 + +# Netmask to push into openvpn config (string value) +#dmz_mask=255.255.255.0 + +# Suffix to add to project name for vpn key and secgroups (string value) +#vpn_key_suffix=-vpn + +# Record sessions to FILE.[session_number] (boolean value) +#record=false + +# Become a daemon (background process) (boolean value) +#daemon=false + +# Disallow non-encrypted connections (boolean value) +#ssl_only=false + +# Source is ipv6 (boolean value) +#source_is_ipv6=false + +# SSL certificate file (string value) +#cert=self.pem + +# SSL key file (if separate from cert) (string value) +#key=<None> + +# Run webserver on same port. Serve files from DIR. (string value) +#web=/usr/share/spice-html5 + +# Host on which to listen for incoming requests (string value) +#novncproxy_host=0.0.0.0 +novncproxy_host=0.0.0.0 + +# Port on which to listen for incoming requests (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#novncproxy_port=6080 +novncproxy_port=6080 + +# Host on which to listen for incoming requests (string value) +#serialproxy_host=0.0.0.0 + +# Port on which to listen for incoming requests (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#serialproxy_port=6083 + +# Host on which to listen for incoming requests (string value) +#html5proxy_host=0.0.0.0 + +# Port on which to listen for incoming requests (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#html5proxy_port=6082 + +# Driver to use for the console proxy (string value) +#console_driver=nova.console.xvp.XVPConsoleProxy + +# Stub calls to compute worker for tests (boolean value) +#stub_compute=false + +# Publicly visible name for this console host (string value) +#console_public_hostname=x86-017.build.eng.bos.redhat.com + +# The topic console proxy nodes listen on (string value) +#console_topic=console + +# XVP conf template (string value) +#console_xvp_conf_template=$pybasedir/nova/console/xvp.conf.template + +# Generated XVP conf file (string value) +#console_xvp_conf=/etc/xvp.conf + +# XVP master process pid file (string value) +#console_xvp_pid=/var/run/xvp.pid + +# XVP log file (string value) +#console_xvp_log=/var/log/xvp.log + +# Port for XVP to multiplex VNC connections on (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#console_xvp_multiplex_port=5900 + +# How many seconds before deleting tokens (integer value) +#console_token_ttl=600 + +# Filename of root CA (string value) +#ca_file=cacert.pem + +# Filename of private key (string value) +#key_file=private/cakey.pem + +# Filename of root Certificate Revocation List (string value) +#crl_file=crl.pem + +# Where we keep our keys (string value) +#keys_path=$state_path/keys + +# Where we keep our root CA (string value) +#ca_path=$state_path/CA + +# Should we use a CA for each project? (boolean value) +#use_project_ca=false + +# Subject for certificate for users, %s for project, user, timestamp (string +# value) +#user_cert_subject=/C=US/ST=California/O=OpenStack/OU=NovaDev/CN=%.16s-%.16s-%s + +# Subject for certificate for projects, %s for project, timestamp (string +# value) +#project_cert_subject=/C=US/ST=California/O=OpenStack/OU=NovaDev/CN=project-ca-%.16s-%s + +# Services to be added to the available pool on create (boolean value) +#enable_new_services=true + +# Template string to be used to generate instance names (string value) +#instance_name_template=instance-%08x + +# Template string to be used to generate snapshot names (string value) +#snapshot_name_template=snapshot-%s + +# When set, compute API will consider duplicate hostnames invalid within the +# specified scope, regardless of case. Should be empty, "project" or "global". +# (string value) +#osapi_compute_unique_server_name_scope = + +# Make exception message format errors fatal (boolean value) +#fatal_exception_format_errors=false + +# Parent directory for tempdir used for image decryption (string value) +#image_decryption_dir=/tmp + +# Hostname or IP for OpenStack to use when accessing the S3 api (string value) +#s3_host=$my_ip + +# Port used when accessing the S3 api (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#s3_port=3333 + +# Access key to use for S3 server for images (string value) +#s3_access_key=notchecked + +# Secret key to use for S3 server for images (string value) +#s3_secret_key=notchecked + +# Whether to use SSL when talking to S3 (boolean value) +#s3_use_ssl=false + +# Whether to affix the tenant id to the access key when downloading from S3 +# (boolean value) +#s3_affix_tenant=false + +# IP address of this host (string value) +#my_ip=10.16.48.92 + +# Block storage IP address of this host (string value) +#my_block_storage_ip=$my_ip + +# Name of this node. This can be an opaque identifier. It is not necessarily +# a hostname, FQDN, or IP address. However, the node name must be valid within +# an AMQP key, and if using ZeroMQ, a valid hostname, FQDN, or IP address +# (string value) +#host=x86-017.build.eng.bos.redhat.com + +# Use IPv6 (boolean value) +#use_ipv6=false +use_ipv6=False + +# If set, send compute.instance.update notifications on instance state changes. +# Valid values are None for no notifications, "vm_state" for notifications on +# VM state changes, or "vm_and_task_state" for notifications on VM and task +# state changes. (string value) +#notify_on_state_change=<None> + +# If set, send api.fault notifications on caught exceptions in the API service. +# (boolean value) +#notify_api_faults=false +notify_api_faults=False + +# Default notification level for outgoing notifications (string value) +# Allowed values: DEBUG, INFO, WARN, ERROR, CRITICAL +#default_notification_level=INFO + +# Default publisher_id for outgoing notifications (string value) +#default_publisher_id=<None> + +# DEPRECATED: THIS VALUE SHOULD BE SET WHEN CREATING THE NETWORK. If True in +# multi_host mode, all compute hosts share the same dhcp address. The same IP +# address used for DHCP will be added on each nova-network node which is only +# visible to the vms on the same host. (boolean value) +#share_dhcp_address=false + +# DEPRECATED: THIS VALUE SHOULD BE SET WHEN CREATING THE NETWORK. MTU setting +# for network interface. (integer value) +#network_device_mtu=<None> + +# Path to S3 buckets (string value) +#buckets_path=$state_path/buckets + +# IP address for S3 API to listen (string value) +#s3_listen=0.0.0.0 + +# Port for S3 API to listen (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#s3_listen_port=3333 + +# Directory where the nova python module is installed (string value) +#pybasedir=/builddir/build/BUILD/nova-12.0.2 + +# Directory where nova binaries are installed (string value) +#bindir=/usr/local/bin + +# Top-level directory for maintaining nova's state (string value) +#state_path=/var/lib/nova +state_path=/var/lib/nova + +# An alias for a PCI passthrough device requirement. This allows users to +# specify the alias in the extra_spec for a flavor, without needing to repeat +# all the PCI property requirements. For example: pci_alias = { "name": +# "QuickAssist", "product_id": "0443", "vendor_id": "8086", +# "device_type": "ACCEL" } defines an alias for the Intel QuickAssist card. +# (multi valued) (multi valued) +#pci_alias = + +# White list of PCI devices available to VMs. For example: +# pci_passthrough_whitelist = [{"vendor_id": "8086", "product_id": "0443"}] +# (multi valued) +#pci_passthrough_whitelist = + +# Number of instances allowed per project (integer value) +#quota_instances=10 + +# Number of instance cores allowed per project (integer value) +#quota_cores=20 + +# Megabytes of instance RAM allowed per project (integer value) +#quota_ram=51200 + +# Number of floating IPs allowed per project (integer value) +#quota_floating_ips=10 + +# Number of fixed IPs allowed per project (this should be at least the number +# of instances allowed) (integer value) +#quota_fixed_ips=-1 + +# Number of metadata items allowed per instance (integer value) +#quota_metadata_items=128 + +# Number of injected files allowed (integer value) +#quota_injected_files=5 + +# Number of bytes allowed per injected file (integer value) +#quota_injected_file_content_bytes=10240 + +# Length of injected file path (integer value) +#quota_injected_file_path_length=255 + +# Number of security groups per project (integer value) +#quota_security_groups=10 + +# Number of security rules per security group (integer value) +#quota_security_group_rules=20 + +# Number of key pairs per user (integer value) +#quota_key_pairs=100 + +# Number of server groups per project (integer value) +#quota_server_groups=10 + +# Number of servers per server group (integer value) +#quota_server_group_members=10 + +# Number of seconds until a reservation expires (integer value) +#reservation_expire=86400 + +# Count of reservations until usage is refreshed. This defaults to 0(off) to +# avoid additional load but it is useful to turn on to help keep quota usage up +# to date and reduce the impact of out of sync usage issues. (integer value) +#until_refresh=0 + +# Number of seconds between subsequent usage refreshes. This defaults to 0(off) +# to avoid additional load but it is useful to turn on to help keep quota usage +# up to date and reduce the impact of out of sync usage issues. Note that +# quotas are not updated on a periodic task, they will update on a new +# reservation if max_age has passed since the last reservation (integer value) +#max_age=0 + +# Default driver to use for quota checks (string value) +#quota_driver=nova.quota.DbQuotaDriver + +# Seconds between nodes reporting state to datastore (integer value) +#report_interval=10 +report_interval=10 + +# Enable periodic tasks (boolean value) +#periodic_enable=true + +# Range of seconds to randomly delay when starting the periodic task scheduler +# to reduce stampeding. (Disable by setting to 0) (integer value) +#periodic_fuzzy_delay=60 + +# A list of APIs to enable by default (list value) +#enabled_apis=ec2,osapi_compute,metadata +enabled_apis=ec2,osapi_compute,metadata + +# A list of APIs with enabled SSL (list value) +#enabled_ssl_apis = + +# The IP address on which the EC2 API will listen. (string value) +#ec2_listen=0.0.0.0 +ec2_listen=0.0.0.0 + +# The port on which the EC2 API will listen. (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#ec2_listen_port=8773 +ec2_listen_port=8773 + +# Number of workers for EC2 API service. The default will be equal to the +# number of CPUs available. (integer value) +#ec2_workers=<None> +ec2_workers=12 + +# The IP address on which the OpenStack API will listen. (string value) +#osapi_compute_listen=0.0.0.0 +osapi_compute_listen=0.0.0.0 + +# The port on which the OpenStack API will listen. (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#osapi_compute_listen_port=8774 +osapi_compute_listen_port=8774 + +# Number of workers for OpenStack API service. The default will be the number +# of CPUs available. (integer value) +#osapi_compute_workers=<None> +osapi_compute_workers=12 + +# OpenStack metadata service manager (string value) +#metadata_manager=nova.api.manager.MetadataManager + +# The IP address on which the metadata API will listen. (string value) +#metadata_listen=0.0.0.0 +metadata_listen=0.0.0.0 + +# The port on which the metadata API will listen. (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#metadata_listen_port=8775 +metadata_listen_port=8775 + +# Number of workers for metadata service. The default will be the number of +# CPUs available. (integer value) +#metadata_workers=<None> +metadata_workers=12 + +# Full class name for the Manager for compute (string value) +#compute_manager=nova.compute.manager.ComputeManager +compute_manager=nova.compute.manager.ComputeManager + +# Full class name for the Manager for console proxy (string value) +#console_manager=nova.console.manager.ConsoleProxyManager + +# Manager for console auth (string value) +#consoleauth_manager=nova.consoleauth.manager.ConsoleAuthManager + +# Full class name for the Manager for cert (string value) +#cert_manager=nova.cert.manager.CertManager + +# Full class name for the Manager for network (string value) +#network_manager=nova.network.manager.FlatDHCPManager + +# Full class name for the Manager for scheduler (string value) +#scheduler_manager=nova.scheduler.manager.SchedulerManager + +# Maximum time since last check-in for up service (integer value) +#service_down_time=60 +service_down_time=60 + +# Whether to log monkey patching (boolean value) +#monkey_patch=false + +# List of modules/decorators to monkey patch (list value) +#monkey_patch_modules=nova.api.ec2.cloud:nova.notifications.notify_decorator,nova.compute.api:nova.notifications.notify_decorator + +# Length of generated instance admin passwords (integer value) +#password_length=12 + +# Time period to generate instance usages for. Time period must be hour, day, +# month or year (string value) +#instance_usage_audit_period=month + +# Start and use a daemon that can run the commands that need to be run with +# root privileges. This option is usually enabled on nodes that run nova +# compute processes (boolean value) +#use_rootwrap_daemon=false + +# Path to the rootwrap configuration file to use for running commands as root +# (string value) +#rootwrap_config=/etc/nova/rootwrap.conf +rootwrap_config=/etc/nova/rootwrap.conf + +# Explicitly specify the temporary working directory (string value) +#tempdir=<None> + +# Port that the XCP VNC proxy should bind to (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#xvpvncproxy_port=6081 + +# Address that the XCP VNC proxy should bind to (string value) +#xvpvncproxy_host=0.0.0.0 + +# The full class name of the volume API class to use (string value) +#volume_api_class=nova.volume.cinder.API +volume_api_class=nova.volume.cinder.API + +# File name for the paste.deploy config for nova-api (string value) +#api_paste_config=api-paste.ini +api_paste_config=api-paste.ini + +# A python format string that is used as the template to generate log lines. +# The following values can be formatted into it: client_ip, date_time, +# request_line, status_code, body_length, wall_seconds. (string value) +#wsgi_log_format=%(client_ip)s "%(request_line)s" status: %(status_code)s len: %(body_length)s time: %(wall_seconds).7f + +# The HTTP header used to determine the scheme for the original request, even +# if it was removed by an SSL terminating proxy. Typical value is +# "HTTP_X_FORWARDED_PROTO". (string value) +#secure_proxy_ssl_header=<None> + +# CA certificate file to use to verify connecting clients (string value) +#ssl_ca_file=<None> + +# SSL certificate of API server (string value) +#ssl_cert_file=<None> + +# SSL private key of API server (string value) +#ssl_key_file=<None> + +# Sets the value of TCP_KEEPIDLE in seconds for each server socket. Not +# supported on OS X. (integer value) +#tcp_keepidle=600 + +# Size of the pool of greenthreads used by wsgi (integer value) +#wsgi_default_pool_size=1000 + +# Maximum line size of message headers to be accepted. max_header_line may need +# to be increased when using large tokens (typically those generated by the +# Keystone v3 API with big service catalogs). (integer value) +#max_header_line=16384 + +# If False, closes the client socket connection explicitly. (boolean value) +#wsgi_keep_alive=true + +# Timeout for client connections' socket operations. If an incoming connection +# is idle for this number of seconds it will be closed. A value of '0' means +# wait forever. (integer value) +#client_socket_timeout=900 + +# +# From nova.api +# + +# File to load JSON formatted vendor data from (string value) +#vendordata_jsonfile_path=<None> + +# Permit instance snapshot operations. (boolean value) +#allow_instance_snapshots=true + +# Whether to use per-user rate limiting for the api. This option is only used +# by v2 api. Rate limiting is removed from v2.1 api. (boolean value) +#api_rate_limit=false + +# +# The strategy to use for auth: keystone or noauth2. noauth2 is designed for +# testing only, as it does no actual credential checking. noauth2 provides +# administrative credentials only if 'admin' is specified as the username. +# (string value) +#auth_strategy=keystone +auth_strategy=keystone + +# Treat X-Forwarded-For as the canonical remote address. Only enable this if +# you have a sanitizing proxy. (boolean value) +#use_forwarded_for=false +use_forwarded_for=False + +# The IP address of the EC2 API server (string value) +#ec2_host=$my_ip + +# The internal IP address of the EC2 API server (string value) +#ec2_dmz_host=$my_ip + +# The port of the EC2 API server (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#ec2_port=8773 + +# The protocol to use when connecting to the EC2 API server (string value) +# Allowed values: http, https +#ec2_scheme=http + +# The path prefix used to call the ec2 API server (string value) +#ec2_path=/ + +# List of region=fqdn pairs separated by commas (list value) +#region_list = + +# Number of failed auths before lockout. (integer value) +#lockout_attempts=5 + +# Number of minutes to lockout if triggered. (integer value) +#lockout_minutes=15 + +# Number of minutes for lockout window. (integer value) +#lockout_window=15 + +# URL to get token from ec2 request. (string value) +#keystone_ec2_url=http://localhost:5000/v2.0/ec2tokens + +# Return the IP address as private dns hostname in describe instances (boolean +# value) +#ec2_private_dns_show_ip=false + +# Validate security group names according to EC2 specification (boolean value) +#ec2_strict_validation=true + +# Time in seconds before ec2 timestamp expires (integer value) +#ec2_timestamp_expiry=300 + +# Disable SSL certificate verification. (boolean value) +#keystone_ec2_insecure=false + +# List of metadata versions to skip placing into the config drive (string +# value) +#config_drive_skip_versions=1.0 2007-01-19 2007-03-01 2007-08-29 2007-10-10 2007-12-15 2008-02-01 2008-09-01 + +# Driver to use for vendor data (string value) +#vendordata_driver=nova.api.metadata.vendordata_json.JsonFileVendorData + +# Time in seconds to cache metadata; 0 to disable metadata caching entirely +# (not recommended). Increasingthis should improve response times of the +# metadata API when under heavy load. Higher values may increase memoryusage +# and result in longer times for host metadata changes to take effect. (integer +# value) +#metadata_cache_expiration=15 + +# The maximum number of items returned in a single response from a collection +# resource (integer value) +#osapi_max_limit=1000 + +# Base URL that will be presented to users in links to the OpenStack Compute +# API (string value) +#osapi_compute_link_prefix=<None> + +# Base URL that will be presented to users in links to glance resources (string +# value) +#osapi_glance_link_prefix=<None> + +# DEPRECATED: Specify list of extensions to load when using +# osapi_compute_extension option with +# nova.api.openstack.compute.legacy_v2.contrib.select_extensions This option +# will be removed in the near future. After that point you have to run all of +# the API. (list value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#osapi_compute_ext_list = + +# Full path to fping. (string value) +#fping_path=/usr/sbin/fping +fping_path=/usr/sbin/fping + +# Enables or disables quota checking for tenant networks (boolean value) +#enable_network_quota=false + +# Control for checking for default networks (string value) +#use_neutron_default_nets=False + +# Default tenant id when creating neutron networks (string value) +#neutron_default_tenant_id=default + +# Number of private networks allowed per project (integer value) +#quota_networks=3 + +# osapi compute extension to load. This option will be removed in the near +# future. After that point you have to run all of the API. (multi valued) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#osapi_compute_extension=nova.api.openstack.compute.legacy_v2.contrib.standard_extensions + +# List of instance states that should hide network info (list value) +#osapi_hide_server_address_states=building + +# Enables returning of the instance password by the relevant server API calls +# such as create, rebuild or rescue, If the hypervisor does not support +# password injection then the password returned will not be correct (boolean +# value) +#enable_instance_password=true + +# +# From nova.compute +# + +# Allow destination machine to match source for resize. Useful when testing in +# single-host environments. (boolean value) +#allow_resize_to_same_host=false +allow_resize_to_same_host=False + +# Availability zone to use when user doesn't specify one (string value) +#default_schedule_zone=<None> + +# These are image properties which a snapshot should not inherit from an +# instance (list value) +#non_inheritable_image_properties=cache_in_nova,bittorrent + +# Kernel image that indicates not to use a kernel, but to use a raw disk image +# instead (string value) +#null_kernel=nokernel + +# When creating multiple instances with a single request using the os-multiple- +# create API extension, this template will be used to build the display name +# for each instance. The benefit is that the instances end up with different +# hostnames. To restore legacy behavior of every instance having the same name, +# set this option to "%(name)s". Valid keys for the template are: name, uuid, +# count. (string value) +#multi_instance_display_name_template=%(name)s-%(count)d + +# Maximum number of devices that will result in a local image being created on +# the hypervisor node. A negative number means unlimited. Setting +# max_local_block_devices to 0 means that any request that attempts to create a +# local disk will fail. This option is meant to limit the number of local discs +# (so root local disc that is the result of --image being used, and any other +# ephemeral and swap disks). 0 does not mean that images will be automatically +# converted to volumes and boot instances from volumes - it just means that all +# requests that attempt to create a local disk will fail. (integer value) +#max_local_block_devices=3 + +# Default flavor to use for the EC2 API only. The Nova API does not support a +# default flavor. (string value) +#default_flavor=m1.small + +# Console proxy host to use to connect to instances on this host. (string +# value) +#console_host=x86-017.build.eng.bos.redhat.com + +# Name of network to use to set access IPs for instances (string value) +#default_access_ip_network_name=<None> + +# Whether to batch up the application of IPTables rules during a host restart +# and apply all at the end of the init phase (boolean value) +#defer_iptables_apply=false + +# Where instances are stored on disk (string value) +#instances_path=$state_path/instances + +# Generate periodic compute.instance.exists notifications (boolean value) +#instance_usage_audit=false + +# Number of 1 second retries needed in live_migration (integer value) +#live_migration_retry_count=30 + +# Whether to start guests that were running before the host rebooted (boolean +# value) +#resume_guests_state_on_host_boot=false + +# Number of times to retry network allocation on failures (integer value) +#network_allocate_retries=0 + +# Maximum number of instance builds to run concurrently (integer value) +#max_concurrent_builds=10 + +# Maximum number of live migrations to run concurrently. This limit is enforced +# to avoid outbound live migrations overwhelming the host/network and causing +# failures. It is not recommended that you change this unless you are very sure +# that doing so is safe and stable in your environment. (integer value) +#max_concurrent_live_migrations=1 + +# Number of times to retry block device allocation on failures (integer value) +#block_device_allocate_retries=60 + +# The number of times to attempt to reap an instance's files. (integer value) +#maximum_instance_delete_attempts=5 + +# Interval to pull network bandwidth usage info. Not supported on all +# hypervisors. Set to -1 to disable. Setting this to 0 will run at the default +# rate. (integer value) +#bandwidth_poll_interval=600 + +# Interval to sync power states between the database and the hypervisor. Set to +# -1 to disable. Setting this to 0 will run at the default rate. (integer +# value) +#sync_power_state_interval=600 + +# Number of seconds between instance network information cache updates (integer +# value) +#heal_instance_info_cache_interval=60 +heal_instance_info_cache_interval=60 + +# Interval in seconds for reclaiming deleted instances (integer value) +#reclaim_instance_interval=0 + +# Interval in seconds for gathering volume usages (integer value) +#volume_usage_poll_interval=0 + +# Interval in seconds for polling shelved instances to offload. Set to -1 to +# disable.Setting this to 0 will run at the default rate. (integer value) +#shelved_poll_interval=3600 + +# Time in seconds before a shelved instance is eligible for removing from a +# host. -1 never offload, 0 offload immediately when shelved (integer value) +#shelved_offload_time=0 + +# Interval in seconds for retrying failed instance file deletes. Set to -1 to +# disable. Setting this to 0 will run at the default rate. (integer value) +#instance_delete_interval=300 + +# Waiting time interval (seconds) between block device allocation retries on +# failures (integer value) +#block_device_allocate_retries_interval=3 + +# Waiting time interval (seconds) between sending the scheduler a list of +# current instance UUIDs to verify that its view of instances is in sync with +# nova. If the CONF option `scheduler_tracks_instance_changes` is False, +# changing this option will have no effect. (integer value) +#scheduler_instance_sync_interval=120 + +# Interval in seconds for updating compute resources. A number less than 0 +# means to disable the task completely. Leaving this at the default of 0 will +# cause this to run at the default periodic interval. Setting it to any +# positive value will cause it to run at approximately that number of seconds. +# (integer value) +#update_resources_interval=0 + +# Action to take if a running deleted instance is detected.Set to 'noop' to +# take no action. (string value) +# Allowed values: noop, log, shutdown, reap +#running_deleted_instance_action=reap + +# Number of seconds to wait between runs of the cleanup task. (integer value) +#running_deleted_instance_poll_interval=1800 + +# Number of seconds after being deleted when a running instance should be +# considered eligible for cleanup. (integer value) +#running_deleted_instance_timeout=0 + +# Automatically hard reboot an instance if it has been stuck in a rebooting +# state longer than N seconds. Set to 0 to disable. (integer value) +#reboot_timeout=0 + +# Amount of time in seconds an instance can be in BUILD before going into ERROR +# status. Set to 0 to disable. (integer value) +#instance_build_timeout=0 + +# Automatically unrescue an instance after N seconds. Set to 0 to disable. +# (integer value) +#rescue_timeout=0 + +# Automatically confirm resizes after N seconds. Set to 0 to disable. (integer +# value) +#resize_confirm_window=0 + +# Total amount of time to wait in seconds for an instance to perform a clean +# shutdown. (integer value) +#shutdown_timeout=60 + +# Monitor classes available to the compute which may be specified more than +# once. This option is DEPRECATED and no longer used. Use setuptools entry +# points to list available monitor plugins. (multi valued) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#compute_available_monitors = + +# A list of monitors that can be used for getting compute metrics. You can use +# the alias/name from the setuptools entry points for nova.compute.monitors.* +# namespaces. If no namespace is supplied, the "cpu." namespace is assumed for +# backwards-compatibility. An example value that would enable both the CPU and +# NUMA memory bandwidth monitors that used the virt driver variant: +# ["cpu.virt_driver", "numa_mem_bw.virt_driver"] (list value) +#compute_monitors = + +# Amount of disk in MB to reserve for the host (integer value) +#reserved_host_disk_mb=0 + +# Amount of memory in MB to reserve for the host (integer value) +#reserved_host_memory_mb=512 +reserved_host_memory_mb=512 + +# Class that will manage stats for the local compute host (string value) +#compute_stats_class=nova.compute.stats.Stats + +# The names of the extra resources to track. (list value) +#compute_resources=vcpu + +# Virtual CPU to physical CPU allocation ratio which affects all CPU filters. +# This configuration specifies a global ratio for CoreFilter. For +# AggregateCoreFilter, it will fall back to this configuration value if no per- +# aggregate setting found. NOTE: This can be set per-compute, or if set to 0.0, +# the value set on the scheduler node(s) will be used and defaulted to 16.0 +# (floating point value) +#cpu_allocation_ratio=0.0 +cpu_allocation_ratio=16.0 + +# Virtual ram to physical ram allocation ratio which affects all ram filters. +# This configuration specifies a global ratio for RamFilter. For +# AggregateRamFilter, it will fall back to this configuration value if no per- +# aggregate setting found. NOTE: This can be set per-compute, or if set to 0.0, +# the value set on the scheduler node(s) will be used and defaulted to 1.5 +# (floating point value) +#ram_allocation_ratio=0.0 +ram_allocation_ratio=1.5 + +# The topic compute nodes listen on (string value) +#compute_topic=compute + +# +# From nova.network +# + +# The full class name of the network API class to use (string value) +#network_api_class=nova.network.api.API +network_api_class=nova.network.neutronv2.api.API + +# Driver to use for network creation (string value) +#network_driver=nova.network.linux_net + +# Default pool for floating IPs (string value) +#default_floating_pool=nova +default_floating_pool=public + +# Autoassigning floating IP to VM (boolean value) +#auto_assign_floating_ip=false + +# Full class name for the DNS Manager for floating IPs (string value) +#floating_ip_dns_manager=nova.network.noop_dns_driver.NoopDNSDriver + +# Full class name for the DNS Manager for instance IPs (string value) +#instance_dns_manager=nova.network.noop_dns_driver.NoopDNSDriver + +# Full class name for the DNS Zone for instance IPs (string value) +#instance_dns_domain = + +# URL for LDAP server which will store DNS entries (string value) +#ldap_dns_url=ldap://ldap.example.com:389 + +# User for LDAP DNS (string value) +#ldap_dns_user=uid=admin,ou=people,dc=example,dc=org + +# Password for LDAP DNS (string value) +#ldap_dns_password=password + +# Hostmaster for LDAP DNS driver Statement of Authority (string value) +#ldap_dns_soa_hostmaster=hostmaster@example.org + +# DNS Servers for LDAP DNS driver (multi valued) +#ldap_dns_servers=dns.example.org + +# Base DN for DNS entries in LDAP (string value) +#ldap_dns_base_dn=ou=hosts,dc=example,dc=org + +# Refresh interval (in seconds) for LDAP DNS driver Statement of Authority +# (string value) +#ldap_dns_soa_refresh=1800 + +# Retry interval (in seconds) for LDAP DNS driver Statement of Authority +# (string value) +#ldap_dns_soa_retry=3600 + +# Expiry interval (in seconds) for LDAP DNS driver Statement of Authority +# (string value) +#ldap_dns_soa_expiry=86400 + +# Minimum interval (in seconds) for LDAP DNS driver Statement of Authority +# (string value) +#ldap_dns_soa_minimum=7200 + +# Location of flagfiles for dhcpbridge (multi valued) +#dhcpbridge_flagfile=/etc/nova/nova.conf + +# Location to keep network config files (string value) +#networks_path=$state_path/networks + +# Interface for public IP addresses (string value) +#public_interface=eth0 + +# Location of nova-dhcpbridge (string value) +#dhcpbridge=/usr/bin/nova-dhcpbridge + +# Public IP of network host (string value) +#routing_source_ip=$my_ip + +# Lifetime of a DHCP lease in seconds (integer value) +#dhcp_lease_time=86400 + +# If set, uses specific DNS server for dnsmasq. Can be specified multiple +# times. (multi valued) +#dns_server = + +# If set, uses the dns1 and dns2 from the network ref. as dns servers. (boolean +# value) +#use_network_dns_servers=false + +# A list of dmz ranges that should be accepted (list value) +#dmz_cidr = + +# Traffic to this range will always be snatted to the fallback ip, even if it +# would normally be bridged out of the node. Can be specified multiple times. +# (multi valued) +#force_snat_range = +force_snat_range =0.0.0.0/0 + +# Override the default dnsmasq settings with this file (string value) +#dnsmasq_config_file = + +# Driver used to create ethernet devices. (string value) +#linuxnet_interface_driver=nova.network.linux_net.LinuxBridgeInterfaceDriver + +# Name of Open vSwitch bridge used with linuxnet (string value) +#linuxnet_ovs_integration_bridge=br-int + +# Send gratuitous ARPs for HA setup (boolean value) +#send_arp_for_ha=false + +# Send this many gratuitous ARPs for HA setup (integer value) +#send_arp_for_ha_count=3 + +# Use single default gateway. Only first nic of vm will get default gateway +# from dhcp server (boolean value) +#use_single_default_gateway=false + +# An interface that bridges can forward to. If this is set to all then all +# traffic will be forwarded. Can be specified multiple times. (multi valued) +#forward_bridge_interface=all + +# The IP address for the metadata API server (string value) +#metadata_host=$my_ip +metadata_host=VARINET4ADDR + +# The port for the metadata API port (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#metadata_port=8775 + +# Regular expression to match the iptables rule that should always be on the +# top. (string value) +#iptables_top_regex = + +# Regular expression to match the iptables rule that should always be on the +# bottom. (string value) +#iptables_bottom_regex = + +# The table that iptables to jump to when a packet is to be dropped. (string +# value) +#iptables_drop_action=DROP + +# Amount of time, in seconds, that ovs_vsctl should wait for a response from +# the database. 0 is to wait forever. (integer value) +#ovs_vsctl_timeout=120 + +# If passed, use fake network devices and addresses (boolean value) +#fake_network=false + +# Number of times to retry ebtables commands on failure. (integer value) +#ebtables_exec_attempts=3 + +# Number of seconds to wait between ebtables retries. (floating point value) +#ebtables_retry_interval=1.0 + +# Bridge for simple network instances (string value) +#flat_network_bridge=<None> + +# DNS server for simple network (string value) +#flat_network_dns=8.8.4.4 + +# Whether to attempt to inject network setup into guest (boolean value) +#flat_injected=false + +# FlatDhcp will bridge into this interface if set (string value) +#flat_interface=<None> + +# First VLAN for private networks (integer value) +# Minimum value: 1 +# Maximum value: 4094 +#vlan_start=100 + +# VLANs will bridge into this interface if set (string value) +#vlan_interface=<None> + +# Number of networks to support (integer value) +#num_networks=1 + +# Public IP for the cloudpipe VPN servers (string value) +#vpn_ip=$my_ip + +# First Vpn port for private networks (integer value) +#vpn_start=1000 + +# Number of addresses in each private subnet (integer value) +#network_size=256 + +# Fixed IPv6 address block (string value) +#fixed_range_v6=fd00::/48 + +# Default IPv4 gateway (string value) +#gateway=<None> + +# Default IPv6 gateway (string value) +#gateway_v6=<None> + +# Number of addresses reserved for vpn clients (integer value) +#cnt_vpn_clients=0 + +# Seconds after which a deallocated IP is disassociated (integer value) +#fixed_ip_disassociate_timeout=600 + +# Number of attempts to create unique mac address (integer value) +#create_unique_mac_address_attempts=5 + +# If True, skip using the queue and make local calls (boolean value) +#fake_call=false + +# If True, unused gateway devices (VLAN and bridge) are deleted in VLAN network +# mode with multi hosted networks (boolean value) +#teardown_unused_network_gateway=false + +# If True, send a dhcp release on instance termination (boolean value) +#force_dhcp_release=True + +# If True, when a DNS entry must be updated, it sends a fanout cast to all +# network hosts to update their DNS entries in multi host mode (boolean value) +#update_dns_entries=false + +# Number of seconds to wait between runs of updates to DNS entries. (integer +# value) +#dns_update_periodic_interval=-1 + +# Domain to use for building the hostnames (string value) +#dhcp_domain=novalocal +dhcp_domain=novalocal + +# Indicates underlying L3 management library (string value) +#l3_lib=nova.network.l3.LinuxNetL3 + +# The topic network nodes listen on (string value) +#network_topic=network + +# Default value for multi_host in networks. Also, if set, some rpc network +# calls will be sent directly to host. (boolean value) +#multi_host=false + +# The full class name of the security API class (string value) +#security_group_api=nova +security_group_api=neutron + +# +# From nova.openstack.common.memorycache +# + +# Memcached servers or None for in process cache. (list value) +#memcached_servers=<None> + +# +# From nova.openstack.common.policy +# + +# The JSON file that defines policies. (string value) +#policy_file=policy.json + +# Default rule. Enforced when a requested rule is not found. (string value) +#policy_default_rule=default + +# Directories where policy configuration files are stored. They can be relative +# to any directory in the search path defined by the config_dir option, or +# absolute paths. The file defined by policy_file must exist for these +# directories to be searched. Missing or empty directories are ignored. (multi +# valued) +#policy_dirs=policy.d + +# +# From nova.scheduler +# + +# Virtual disk to physical disk allocation ratio (floating point value) +#disk_allocation_ratio=1.0 + +# Tells filters to ignore hosts that have this many or more instances currently +# in build, resize, snapshot, migrate, rescue or unshelve task states (integer +# value) +#max_io_ops_per_host=8 + +# Ignore hosts that have too many instances (integer value) +#max_instances_per_host=50 + +# Absolute path to scheduler configuration JSON file. (string value) +#scheduler_json_config_location = + +# The scheduler host manager class to use (string value) +#scheduler_host_manager=nova.scheduler.host_manager.HostManager + +# New instances will be scheduled on a host chosen randomly from a subset of +# the N best hosts. This property defines the subset size that a host is chosen +# from. A value of 1 chooses the first host returned by the weighing functions. +# This value must be at least 1. Any value less than 1 will be ignored, and 1 +# will be used instead (integer value) +#scheduler_host_subset_size=1 + +# Force the filter to consider only keys matching the given namespace. (string +# value) +#aggregate_image_properties_isolation_namespace=<None> + +# The separator used between the namespace and keys (string value) +#aggregate_image_properties_isolation_separator=. + +# Images to run on isolated host (list value) +#isolated_images = + +# Host reserved for specific images (list value) +#isolated_hosts = + +# Whether to force isolated hosts to run only isolated images (boolean value) +#restrict_isolated_hosts_to_isolated_images=true + +# Filter classes available to the scheduler which may be specified more than +# once. An entry of "nova.scheduler.filters.all_filters" maps to all filters +# included with nova. (multi valued) +#scheduler_available_filters=nova.scheduler.filters.all_filters + +# Which filter class names to use for filtering hosts when not specified in the +# request. (list value) +#scheduler_default_filters=RetryFilter,AvailabilityZoneFilter,RamFilter,DiskFilter,ComputeFilter,ComputeCapabilitiesFilter,ImagePropertiesFilter,ServerGroupAntiAffinityFilter,ServerGroupAffinityFilter +scheduler_default_filters=RetryFilter,AvailabilityZoneFilter,RamFilter,ComputeFilter,ComputeCapabilitiesFilter,ImagePropertiesFilter,CoreFilter + +# Which weight class names to use for weighing hosts (list value) +#scheduler_weight_classes=nova.scheduler.weights.all_weighers + +# Determines if the Scheduler tracks changes to instances to help with its +# filtering decisions. (boolean value) +#scheduler_tracks_instance_changes=true + +# Which filter class names to use for filtering baremetal hosts when not +# specified in the request. (list value) +#baremetal_scheduler_default_filters=RetryFilter,AvailabilityZoneFilter,ComputeFilter,ComputeCapabilitiesFilter,ImagePropertiesFilter,ExactRamFilter,ExactDiskFilter,ExactCoreFilter + +# Flag to decide whether to use baremetal_scheduler_default_filters or not. +# (boolean value) +#scheduler_use_baremetal_filters=false + +# Default driver to use for the scheduler (string value) +#scheduler_driver=nova.scheduler.filter_scheduler.FilterScheduler +scheduler_driver=nova.scheduler.filter_scheduler.FilterScheduler + +# How often (in seconds) to run periodic tasks in the scheduler driver of your +# choice. Please note this is likely to interact with the value of +# service_down_time, but exactly how they interact will depend on your choice +# of scheduler driver. (integer value) +#scheduler_driver_task_period=60 + +# The topic scheduler nodes listen on (string value) +#scheduler_topic=scheduler + +# Maximum number of attempts to schedule an instance (integer value) +#scheduler_max_attempts=3 + +# Multiplier used for weighing host io ops. Negative numbers mean a preference +# to choose light workload compute hosts. (floating point value) +#io_ops_weight_multiplier=-1.0 + +# Multiplier used for weighing ram. Negative numbers mean to stack vs spread. +# (floating point value) +#ram_weight_multiplier=1.0 + +# +# From nova.virt +# + +# Config drive format. (string value) +# Allowed values: iso9660, vfat +#config_drive_format=iso9660 + +# Set to "always" to force injection to take place on a config drive. NOTE: The +# "always" will be deprecated in the Liberty release cycle. (string value) +# Allowed values: always, True, False +#force_config_drive=<None> + +# Name and optionally path of the tool used for ISO image creation (string +# value) +#mkisofs_cmd=genisoimage + +# Name of the mkfs commands for ephemeral device. The format is <os_type>=<mkfs +# command> (multi valued) +#virt_mkfs = + +# Attempt to resize the filesystem by accessing the image over a block device. +# This is done by the host and may not be necessary if the image contains a +# recent version of cloud-init. Possible mechanisms require the nbd driver (for +# qcow and raw), or loop (for raw). (boolean value) +#resize_fs_using_block_device=false + +# Amount of time, in seconds, to wait for NBD device start up. (integer value) +#timeout_nbd=10 + +# Driver to use for controlling virtualization. Options include: +# libvirt.LibvirtDriver, xenapi.XenAPIDriver, fake.FakeDriver, +# ironic.IronicDriver, vmwareapi.VMwareVCDriver, hyperv.HyperVDriver (string +# value) +#compute_driver=libvirt.LibvirtDriver +compute_driver=libvirt.LibvirtDriver + +# The default format an ephemeral_volume will be formatted with on creation. +# (string value) +#default_ephemeral_format=<None> + +# VM image preallocation mode: "none" => no storage provisioning is done up +# front, "space" => storage is fully allocated at instance start (string value) +# Allowed values: none, space +#preallocate_images=none + +# Whether to use cow images (boolean value) +#use_cow_images=true + +# Fail instance boot if vif plugging fails (boolean value) +#vif_plugging_is_fatal=true +vif_plugging_is_fatal=True + +# Number of seconds to wait for neutron vif plugging events to arrive before +# continuing or failing (see vif_plugging_is_fatal). If this is set to zero and +# vif_plugging_is_fatal is False, events should not be expected to arrive at +# all. (integer value) +#vif_plugging_timeout=300 +vif_plugging_timeout=300 + +# Firewall driver (defaults to hypervisor specific iptables driver) (string +# value) +#firewall_driver=nova.virt.libvirt.firewall.IptablesFirewallDriver +firewall_driver=nova.virt.firewall.NoopFirewallDriver + +# Whether to allow network traffic from same network (boolean value) +#allow_same_net_traffic=true + +# Defines which pcpus that instance vcpus can use. For example, "4-12,^8,15" +# (string value) +#vcpu_pin_set=<None> + +# Number of seconds to wait between runs of the image cache manager. Set to -1 +# to disable. Setting this to 0 will run at the default rate. (integer value) +#image_cache_manager_interval=2400 + +# Where cached images are stored under $instances_path. This is NOT the full +# path - just a folder name. For per-compute-host cached images, set to +# _base_$my_ip (string value) +#image_cache_subdirectory_name=_base + +# Should unused base images be removed? (boolean value) +#remove_unused_base_images=true + +# Unused unresized base images younger than this will not be removed (integer +# value) +#remove_unused_original_minimum_age_seconds=86400 + +# Force backing images to raw format (boolean value) +#force_raw_images=true +force_raw_images=True + +# Template file for injected network (string value) +#injected_network_template=/usr/share/nova/interfaces.template + +# +# From oslo.log +# + +# Print debugging output (set logging level to DEBUG instead of default INFO +# level). (boolean value) +#debug=false +debug=True + +# If set to false, will disable INFO logging level, making WARNING the default. +# (boolean value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#verbose=true +verbose=True + +# The name of a logging configuration file. This file is appended to any +# existing logging configuration files. For details about logging configuration +# files, see the Python logging module documentation. (string value) +# Deprecated group;name - DEFAULT;log_config +#log_config_append=<None> + +# DEPRECATED. A logging.Formatter log message format string which may use any +# of the available logging.LogRecord attributes. This option is deprecated. +# Please use logging_context_format_string and logging_default_format_string +# instead. (string value) +#log_format=<None> + +# Format string for %%(asctime)s in log records. Default: %(default)s . (string +# value) +#log_date_format=%Y-%m-%d %H:%M:%S + +# (Optional) Name of log file to output to. If no default is set, logging will +# go to stdout. (string value) +# Deprecated group;name - DEFAULT;logfile +#log_file=<None> + +# (Optional) The base directory used for relative --log-file paths. (string +# value) +# Deprecated group;name - DEFAULT;logdir +#log_dir=/var/log/nova +log_dir=/var/log/nova + +# Use syslog for logging. Existing syslog format is DEPRECATED and will be +# changed later to honor RFC5424. (boolean value) +#use_syslog=false +use_syslog=False + +# (Optional) Enables or disables syslog rfc5424 format for logging. If enabled, +# prefixes the MSG part of the syslog message with APP-NAME (RFC5424). The +# format without the APP-NAME is deprecated in Kilo, and will be removed in +# Mitaka, along with this option. (boolean value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#use_syslog_rfc_format=true + +# Syslog facility to receive log lines. (string value) +#syslog_log_facility=LOG_USER +syslog_log_facility=LOG_USER + +# Log output to standard error. (boolean value) +#use_stderr=False +use_stderr=True + +# Format string to use for log messages with context. (string value) +#logging_context_format_string=%(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [%(request_id)s %(user_identity)s] %(instance)s%(message)s + +# Format string to use for log messages without context. (string value) +#logging_default_format_string=%(asctime)s.%(msecs)03d %(process)d %(levelname)s %(name)s [-] %(instance)s%(message)s + +# Data to append to log format when level is DEBUG. (string value) +#logging_debug_format_suffix=%(funcName)s %(pathname)s:%(lineno)d + +# Prefix each line of exception output with this format. (string value) +#logging_exception_prefix=%(asctime)s.%(msecs)03d %(process)d ERROR %(name)s %(instance)s + +# List of logger=LEVEL pairs. (list value) +#default_log_levels=amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=WARN,urllib3.connectionpool=WARN,websocket=WARN,requests.packages.urllib3.util.retry=WARN,urllib3.util.retry=WARN,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN,taskflow=WARN + +# Enables or disables publication of error events. (boolean value) +#publish_errors=false + +# The format for an instance that is passed with the log message. (string +# value) +#instance_format="[instance: %(uuid)s] " + +# The format for an instance UUID that is passed with the log message. (string +# value) +#instance_uuid_format="[instance: %(uuid)s] " + +# Enables or disables fatal status of deprecations. (boolean value) +#fatal_deprecations=false + +# +# From oslo.messaging +# + +# Size of RPC connection pool. (integer value) +# Deprecated group;name - DEFAULT;rpc_conn_pool_size +#rpc_conn_pool_size=30 + +# ZeroMQ bind address. Should be a wildcard (*), an ethernet interface, or IP. +# The "host" option should point or resolve to this address. (string value) +#rpc_zmq_bind_address=* + +# MatchMaker driver. (string value) +#rpc_zmq_matchmaker=local + +# ZeroMQ receiver listening port. (integer value) +#rpc_zmq_port=9501 + +# Number of ZeroMQ contexts, defaults to 1. (integer value) +#rpc_zmq_contexts=1 + +# Maximum number of ingress messages to locally buffer per topic. Default is +# unlimited. (integer value) +#rpc_zmq_topic_backlog=<None> + +# Directory for holding IPC sockets. (string value) +#rpc_zmq_ipc_dir=/var/run/openstack + +# Name of this node. Must be a valid hostname, FQDN, or IP address. Must match +# "host" option, if running Nova. (string value) +#rpc_zmq_host=localhost + +# Seconds to wait before a cast expires (TTL). Only supported by impl_zmq. +# (integer value) +#rpc_cast_timeout=30 + +# Heartbeat frequency. (integer value) +#matchmaker_heartbeat_freq=300 + +# Heartbeat time-to-live. (integer value) +#matchmaker_heartbeat_ttl=600 + +# Size of executor thread pool. (integer value) +# Deprecated group;name - DEFAULT;rpc_thread_pool_size +#executor_thread_pool_size=64 + +# The Drivers(s) to handle sending notifications. Possible values are +# messaging, messagingv2, routing, log, test, noop (multi valued) +#notification_driver = +notification_driver =nova.openstack.common.notifier.rabbit_notifier,ceilometer.compute.nova_notifier + +# AMQP topic used for OpenStack notifications. (list value) +# Deprecated group;name - [rpc_notifier2]/topics +#notification_topics=notifications +notification_topics=notifications + +# Seconds to wait for a response from a call. (integer value) +#rpc_response_timeout=60 + +# A URL representing the messaging driver to use and its full configuration. If +# not set, we fall back to the rpc_backend option and driver specific +# configuration. (string value) +#transport_url=<None> + +# The messaging driver to use, defaults to rabbit. Other drivers include qpid +# and zmq. (string value) +#rpc_backend=rabbit +rpc_backend=rabbit + +# The default exchange under which topics are scoped. May be overridden by an +# exchange name specified in the transport_url option. (string value) +#control_exchange=openstack + +# +# From oslo.service.periodic_task +# + +# Some periodic tasks can be run in a separate process. Should we run them +# here? (boolean value) +#run_external_periodic_tasks=true + +# +# From oslo.service.service +# + +# Enable eventlet backdoor. Acceptable values are 0, <port>, and +# <start>:<end>, where 0 results in listening on a random tcp port number; +# <port> results in listening on the specified port number (and not enabling +# backdoor if that port is in use); and <start>:<end> results in listening on +# the smallest unused port number within the specified range of port numbers. +# The chosen port is displayed in the service's log file. (string value) +#backdoor_port=<None> + +# Enables or disables logging values of all registered options when starting a +# service (at DEBUG level). (boolean value) +#log_options=true +sql_connection=mysql+pymysql://nova:qum5net@VARINET4ADDR/nova +image_service=nova.image.glance.GlanceImageService +lock_path=/var/lib/nova/tmp +osapi_volume_listen=0.0.0.0 +vncserver_proxyclient_address=VARHOSTNAME.ceph.redhat.com +vnc_keymap=en-us +vnc_enabled=True +vncserver_listen=0.0.0.0 +novncproxy_base_url=http://VARINET4ADDR:6080/vnc_auto.html + +rbd_user = cinder +rbd_secret_uuid = RBDSECRET + +[api_database] + +# +# From nova +# + +# The SQLAlchemy connection string to use to connect to the Nova API database. +# (string value) +#connection=mysql://nova:nova@localhost/nova + +# If True, SQLite uses synchronous mode. (boolean value) +#sqlite_synchronous=true + +# The SQLAlchemy connection string to use to connect to the slave database. +# (string value) +#slave_connection=<None> + +# The SQL mode to be used for MySQL sessions. This option, including the +# default, overrides any server-set SQL mode. To use whatever SQL mode is set +# by the server configuration, set this to no value. Example: mysql_sql_mode= +# (string value) +#mysql_sql_mode=TRADITIONAL + +# Timeout before idle SQL connections are reaped. (integer value) +#idle_timeout=3600 + +# Maximum number of SQL connections to keep open in a pool. (integer value) +#max_pool_size=<None> + +# Maximum number of database connection retries during startup. Set to -1 to +# specify an infinite retry count. (integer value) +#max_retries=-1 + +# Interval between retries of opening a SQL connection. (integer value) +#retry_interval=10 + +# If set, use this value for max_overflow with SQLAlchemy. (integer value) +#max_overflow=<None> + +# Verbosity of SQL debugging information: 0=None, 100=Everything. (integer +# value) +#connection_debug=0 + +# Add Python stack traces to SQL as comment strings. (boolean value) +#connection_trace=false + +# If set, use this value for pool_timeout with SQLAlchemy. (integer value) +#pool_timeout=<None> + + +[barbican] + +# +# From nova +# + +# Info to match when looking for barbican in the service catalog. Format is: +# separated values of the form: <service_type>:<service_name>:<endpoint_type> +# (string value) +#catalog_info=key-manager:barbican:public + +# Override service catalog lookup with template for barbican endpoint e.g. +# http://localhost:9311/v1/%(project_id)s (string value) +#endpoint_template=<None> + +# Region name of this node (string value) +#os_region_name=<None> + + +[cells] + +# +# From nova.cells +# + +# Enable cell functionality (boolean value) +#enable=false + +# The topic cells nodes listen on (string value) +#topic=cells + +# Manager for cells (string value) +#manager=nova.cells.manager.CellsManager + +# Name of this cell (string value) +#name=nova + +# Key/Multi-value list with the capabilities of the cell (list value) +#capabilities=hypervisor=xenserver;kvm,os=linux;windows + +# Seconds to wait for response from a call to a cell. (integer value) +#call_timeout=60 + +# Percentage of cell capacity to hold in reserve. Affects both memory and disk +# utilization (floating point value) +#reserve_percent=10.0 + +# Type of cell (string value) +# Allowed values: api, compute +#cell_type=compute + +# Number of seconds after which a lack of capability and capacity updates +# signals the child cell is to be treated as a mute. (integer value) +#mute_child_interval=300 + +# Seconds between bandwidth updates for cells. (integer value) +#bandwidth_update_interval=600 + +# Cells communication driver to use (string value) +#driver=nova.cells.rpc_driver.CellsRPCDriver + +# Number of seconds after an instance was updated or deleted to continue to +# update cells (integer value) +#instance_updated_at_threshold=3600 + +# Number of instances to update per periodic task run (integer value) +#instance_update_num_instances=1 + +# Maximum number of hops for cells routing. (integer value) +#max_hop_count=10 + +# Cells scheduler to use (string value) +#scheduler=nova.cells.scheduler.CellsScheduler + +# Base queue name to use when communicating between cells. Various topics by +# message type will be appended to this. (string value) +#rpc_driver_queue_base=cells.intercell + +# Filter classes the cells scheduler should use. An entry of +# "nova.cells.filters.all_filters" maps to all cells filters included with +# nova. (list value) +#scheduler_filter_classes=nova.cells.filters.all_filters + +# Weigher classes the cells scheduler should use. An entry of +# "nova.cells.weights.all_weighers" maps to all cell weighers included with +# nova. (list value) +#scheduler_weight_classes=nova.cells.weights.all_weighers + +# How many retries when no cells are available. (integer value) +#scheduler_retries=10 + +# How often to retry in seconds when no cells are available. (integer value) +#scheduler_retry_delay=2 + +# Interval, in seconds, for getting fresh cell information from the database. +# (integer value) +#db_check_interval=60 + +# Configuration file from which to read cells configuration. If given, +# overrides reading cells from the database. (string value) +#cells_config=<None> + +# Multiplier used to weigh mute children. (The value should be negative.) +# (floating point value) +#mute_weight_multiplier=-10000.0 + +# Multiplier used for weighing ram. Negative numbers mean to stack vs spread. +# (floating point value) +#ram_weight_multiplier=10.0 + +# Multiplier used to weigh offset weigher. (floating point value) +#offset_weight_multiplier=1.0 + + +[cinder] + +# +# From nova +# + +# Info to match when looking for cinder in the service catalog. Format is: +# separated values of the form: <service_type>:<service_name>:<endpoint_type> +# (string value) +#catalog_info=volumev2:cinderv2:publicURL +catalog_info=volumev2:cinderv2:publicURL + +# Override service catalog lookup with template for cinder endpoint e.g. +# http://localhost:8776/v1/%(project_id)s (string value) +#endpoint_template=<None> + +# Region name of this node (string value) +#os_region_name=<None> + +# Number of cinderclient retries on failed http calls (integer value) +#http_retries=3 + +# Allow attach between instance and volume in different availability zones. +# (boolean value) +#cross_az_attach=true + + +[conductor] + +# +# From nova +# + +# Perform nova-conductor operations locally (boolean value) +#use_local=false +use_local=False + +# The topic on which conductor nodes listen (string value) +#topic=conductor + +# Full class name for the Manager for conductor (string value) +#manager=nova.conductor.manager.ConductorManager + +# Number of workers for OpenStack Conductor service. The default will be the +# number of CPUs available. (integer value) +#workers=<None> + + +[cors] + +# +# From oslo.middleware +# + +# Indicate whether this resource may be shared with the domain received in the +# requests "origin" header. (string value) +#allowed_origin=<None> + +# Indicate that the actual request can include user credentials (boolean value) +#allow_credentials=true + +# Indicate which headers are safe to expose to the API. Defaults to HTTP Simple +# Headers. (list value) +#expose_headers=Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma + +# Maximum cache age of CORS preflight requests. (integer value) +#max_age=3600 + +# Indicate which methods can be used during the actual request. (list value) +#allow_methods=GET,POST,PUT,DELETE,OPTIONS + +# Indicate which header field names may be used during the actual request. +# (list value) +#allow_headers=Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma + + +[cors.subdomain] + +# +# From oslo.middleware +# + +# Indicate whether this resource may be shared with the domain received in the +# requests "origin" header. (string value) +#allowed_origin=<None> + +# Indicate that the actual request can include user credentials (boolean value) +#allow_credentials=true + +# Indicate which headers are safe to expose to the API. Defaults to HTTP Simple +# Headers. (list value) +#expose_headers=Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma + +# Maximum cache age of CORS preflight requests. (integer value) +#max_age=3600 + +# Indicate which methods can be used during the actual request. (list value) +#allow_methods=GET,POST,PUT,DELETE,OPTIONS + +# Indicate which header field names may be used during the actual request. +# (list value) +#allow_headers=Content-Type,Cache-Control,Content-Language,Expires,Last-Modified,Pragma + + +[database] + +# +# From nova +# + +# The file name to use with SQLite. (string value) +# Deprecated group;name - DEFAULT;sqlite_db +#sqlite_db=oslo.sqlite + +# If True, SQLite uses synchronous mode. (boolean value) +# Deprecated group;name - DEFAULT;sqlite_synchronous +#sqlite_synchronous=true + +# The back end to use for the database. (string value) +# Deprecated group;name - DEFAULT;db_backend +#backend=sqlalchemy + +# The SQLAlchemy connection string to use to connect to the database. (string +# value) +# Deprecated group;name - DEFAULT;sql_connection +# Deprecated group;name - [DATABASE]/sql_connection +# Deprecated group;name - [sql]/connection +#connection=<None> + +# The SQLAlchemy connection string to use to connect to the slave database. +# (string value) +#slave_connection=<None> + +# The SQL mode to be used for MySQL sessions. This option, including the +# default, overrides any server-set SQL mode. To use whatever SQL mode is set +# by the server configuration, set this to no value. Example: mysql_sql_mode= +# (string value) +#mysql_sql_mode=TRADITIONAL + +# Timeout before idle SQL connections are reaped. (integer value) +# Deprecated group;name - DEFAULT;sql_idle_timeout +# Deprecated group;name - [DATABASE]/sql_idle_timeout +# Deprecated group;name - [sql]/idle_timeout +#idle_timeout=3600 + +# Minimum number of SQL connections to keep open in a pool. (integer value) +# Deprecated group;name - DEFAULT;sql_min_pool_size +# Deprecated group;name - [DATABASE]/sql_min_pool_size +#min_pool_size=1 + +# Maximum number of SQL connections to keep open in a pool. (integer value) +# Deprecated group;name - DEFAULT;sql_max_pool_size +# Deprecated group;name - [DATABASE]/sql_max_pool_size +#max_pool_size=<None> + +# Maximum number of database connection retries during startup. Set to -1 to +# specify an infinite retry count. (integer value) +# Deprecated group;name - DEFAULT;sql_max_retries +# Deprecated group;name - [DATABASE]/sql_max_retries +#max_retries=10 + +# Interval between retries of opening a SQL connection. (integer value) +# Deprecated group;name - DEFAULT;sql_retry_interval +# Deprecated group;name - [DATABASE]/reconnect_interval +#retry_interval=10 + +# If set, use this value for max_overflow with SQLAlchemy. (integer value) +# Deprecated group;name - DEFAULT;sql_max_overflow +# Deprecated group;name - [DATABASE]/sqlalchemy_max_overflow +#max_overflow=<None> + +# Verbosity of SQL debugging information: 0=None, 100=Everything. (integer +# value) +# Deprecated group;name - DEFAULT;sql_connection_debug +#connection_debug=0 + +# Add Python stack traces to SQL as comment strings. (boolean value) +# Deprecated group;name - DEFAULT;sql_connection_trace +#connection_trace=false + +# If set, use this value for pool_timeout with SQLAlchemy. (integer value) +# Deprecated group;name - [DATABASE]/sqlalchemy_pool_timeout +#pool_timeout=<None> + +# Enable the experimental use of database reconnect on connection lost. +# (boolean value) +#use_db_reconnect=false + +# Seconds between retries of a database transaction. (integer value) +#db_retry_interval=1 + +# If True, increases the interval between retries of a database operation up to +# db_max_retry_interval. (boolean value) +#db_inc_retry_interval=true + +# If db_inc_retry_interval is set, the maximum seconds between retries of a +# database operation. (integer value) +#db_max_retry_interval=10 + +# Maximum retries in case of connection error or deadlock error before error is +# raised. Set to -1 to specify an infinite retry count. (integer value) +#db_max_retries=20 + +# +# From oslo.db +# + +# The file name to use with SQLite. (string value) +# Deprecated group;name - DEFAULT;sqlite_db +#sqlite_db=oslo.sqlite + +# If True, SQLite uses synchronous mode. (boolean value) +# Deprecated group;name - DEFAULT;sqlite_synchronous +#sqlite_synchronous=true + +# The back end to use for the database. (string value) +# Deprecated group;name - DEFAULT;db_backend +#backend=sqlalchemy + +# The SQLAlchemy connection string to use to connect to the database. (string +# value) +# Deprecated group;name - DEFAULT;sql_connection +# Deprecated group;name - [DATABASE]/sql_connection +# Deprecated group;name - [sql]/connection +#connection=<None> + +# The SQLAlchemy connection string to use to connect to the slave database. +# (string value) +#slave_connection=<None> + +# The SQL mode to be used for MySQL sessions. This option, including the +# default, overrides any server-set SQL mode. To use whatever SQL mode is set +# by the server configuration, set this to no value. Example: mysql_sql_mode= +# (string value) +#mysql_sql_mode=TRADITIONAL + +# Timeout before idle SQL connections are reaped. (integer value) +# Deprecated group;name - DEFAULT;sql_idle_timeout +# Deprecated group;name - [DATABASE]/sql_idle_timeout +# Deprecated group;name - [sql]/idle_timeout +#idle_timeout=3600 + +# Minimum number of SQL connections to keep open in a pool. (integer value) +# Deprecated group;name - DEFAULT;sql_min_pool_size +# Deprecated group;name - [DATABASE]/sql_min_pool_size +#min_pool_size=1 + +# Maximum number of SQL connections to keep open in a pool. (integer value) +# Deprecated group;name - DEFAULT;sql_max_pool_size +# Deprecated group;name - [DATABASE]/sql_max_pool_size +#max_pool_size=<None> + +# Maximum number of database connection retries during startup. Set to -1 to +# specify an infinite retry count. (integer value) +# Deprecated group;name - DEFAULT;sql_max_retries +# Deprecated group;name - [DATABASE]/sql_max_retries +#max_retries=10 + +# Interval between retries of opening a SQL connection. (integer value) +# Deprecated group;name - DEFAULT;sql_retry_interval +# Deprecated group;name - [DATABASE]/reconnect_interval +#retry_interval=10 + +# If set, use this value for max_overflow with SQLAlchemy. (integer value) +# Deprecated group;name - DEFAULT;sql_max_overflow +# Deprecated group;name - [DATABASE]/sqlalchemy_max_overflow +#max_overflow=<None> + +# Verbosity of SQL debugging information: 0=None, 100=Everything. (integer +# value) +# Deprecated group;name - DEFAULT;sql_connection_debug +#connection_debug=0 + +# Add Python stack traces to SQL as comment strings. (boolean value) +# Deprecated group;name - DEFAULT;sql_connection_trace +#connection_trace=false + +# If set, use this value for pool_timeout with SQLAlchemy. (integer value) +# Deprecated group;name - [DATABASE]/sqlalchemy_pool_timeout +#pool_timeout=<None> + +# Enable the experimental use of database reconnect on connection lost. +# (boolean value) +#use_db_reconnect=false + +# Seconds between retries of a database transaction. (integer value) +#db_retry_interval=1 + +# If True, increases the interval between retries of a database operation up to +# db_max_retry_interval. (boolean value) +#db_inc_retry_interval=true + +# If db_inc_retry_interval is set, the maximum seconds between retries of a +# database operation. (integer value) +#db_max_retry_interval=10 + +# Maximum retries in case of connection error or deadlock error before error is +# raised. Set to -1 to specify an infinite retry count. (integer value) +#db_max_retries=20 + + +[ephemeral_storage_encryption] + +# +# From nova.compute +# + +# Whether to encrypt ephemeral storage (boolean value) +#enabled=false + +# The cipher and mode to be used to encrypt ephemeral storage. Which ciphers +# are available ciphers depends on kernel support. See /proc/crypto for the +# list of available options. (string value) +#cipher=aes-xts-plain64 + +# The bit length of the encryption key to be used to encrypt ephemeral storage +# (in XTS mode only half of the bits are used for encryption key) (integer +# value) +#key_size=512 + + +[glance] + +# +# From nova +# + +# Default glance hostname or IP address (string value) +#host=$my_ip + +# Default glance port (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#port=9292 + +# Default protocol to use when connecting to glance. Set to https for SSL. +# (string value) +# Allowed values: http, https +#protocol=http + +# A list of the glance api servers available to nova. Prefix with https:// for +# ssl-based glance api servers. ([hostname|ip]:port) (list value) +#api_servers=<None> +api_servers=VARINET4ADDR:9292 + +# Allow to perform insecure SSL (https) requests to glance (boolean value) +#api_insecure=false + +# Number of retries when uploading / downloading an image to / from glance. +# (integer value) +#num_retries=0 + +# A list of url scheme that can be downloaded directly via the direct_url. +# Currently supported schemes: [file]. (list value) +#allowed_direct_url_schemes = + + +[guestfs] + +# +# From nova.virt +# + +# Enable guestfs debug (boolean value) +#debug=false + + +[hyperv] + +# +# From nova.virt +# + +# The name of a Windows share name mapped to the "instances_path" dir and used +# by the resize feature to copy files to the target host. If left blank, an +# administrative share will be used, looking for the same "instances_path" used +# locally (string value) +#instances_path_share = + +# Force V1 WMI utility classes (boolean value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#force_hyperv_utils_v1=false + +# Force V1 volume utility class (boolean value) +#force_volumeutils_v1=false + +# External virtual switch Name, if not provided, the first external virtual +# switch is used (string value) +#vswitch_name=<None> + +# Required for live migration among hosts with different CPU features (boolean +# value) +#limit_cpu_features=false + +# Sets the admin password in the config drive image (boolean value) +#config_drive_inject_password=false + +# Path of qemu-img command which is used to convert between different image +# types (string value) +#qemu_img_cmd=qemu-img.exe + +# Attaches the Config Drive image as a cdrom drive instead of a disk drive +# (boolean value) +#config_drive_cdrom=false + +# Enables metrics collections for an instance by using Hyper-V's metric APIs. +# Collected data can by retrieved by other apps and services, e.g.: Ceilometer. +# Requires Hyper-V / Windows Server 2012 and above (boolean value) +#enable_instance_metrics_collection=false + +# Enables dynamic memory allocation (ballooning) when set to a value greater +# than 1. The value expresses the ratio between the total RAM assigned to an +# instance and its startup RAM amount. For example a ratio of 2.0 for an +# instance with 1024MB of RAM implies 512MB of RAM allocated at startup +# (floating point value) +#dynamic_memory_ratio=1.0 + +# Number of seconds to wait for instance to shut down after soft reboot request +# is made. We fall back to hard reboot if instance does not shutdown within +# this window. (integer value) +#wait_soft_reboot_seconds=60 + +# The number of times to retry to attach a volume (integer value) +#volume_attach_retry_count=10 + +# Interval between volume attachment attempts, in seconds (integer value) +#volume_attach_retry_interval=5 + +# The number of times to retry checking for a disk mounted via iSCSI. (integer +# value) +#mounted_disk_query_retry_count=10 + +# Interval between checks for a mounted iSCSI disk, in seconds. (integer value) +#mounted_disk_query_retry_interval=5 + + +[image_file_url] + +# +# From nova +# + +# List of file systems that are configured in this file in the +# image_file_url:<list entry name> sections (list value) +#filesystems = + + +[ironic] + +# +# From nova.virt +# + +# Version of Ironic API service endpoint. (integer value) +#api_version=1 + +# URL for Ironic API endpoint. (string value) +#api_endpoint=<None> + +# Ironic keystone admin name (string value) +#admin_username=<None> + +# Ironic keystone admin password. (string value) +#admin_password=<None> + +# Ironic keystone auth token.DEPRECATED: use admin_username, admin_password, +# and admin_tenant_name instead (string value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#admin_auth_token=<None> + +# Keystone public API endpoint. (string value) +#admin_url=<None> + +# Log level override for ironicclient. Set this in order to override the global +# "default_log_levels", "verbose", and "debug" settings. DEPRECATED: use +# standard logging configuration. (string value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#client_log_level=<None> + +# Ironic keystone tenant name. (string value) +#admin_tenant_name=<None> + +# How many retries when a request does conflict. If <= 0, only try once, no +# retries. (integer value) +#api_max_retries=60 + +# How often to retry in seconds when a request does conflict (integer value) +#api_retry_interval=2 + + +[keymgr] + +# +# From nova +# + +# Fixed key returned by key manager, specified in hex (string value) +#fixed_key=<None> + +# The full class name of the key manager API class (string value) +#api_class=nova.keymgr.conf_key_mgr.ConfKeyManager + + +[keystone_authtoken] + +# +# From keystonemiddleware.auth_token +# + +# Complete public Identity API endpoint. (string value) +#auth_uri=<None> +auth_uri=http://VARINET4ADDR:5000/v2.0 + +# API version of the admin Identity API endpoint. (string value) +#auth_version=<None> + +# Do not handle authorization requests within the middleware, but delegate the +# authorization decision to downstream WSGI components. (boolean value) +#delay_auth_decision=false + +# Request timeout value for communicating with Identity API server. (integer +# value) +#http_connect_timeout=<None> + +# How many times are we trying to reconnect when communicating with Identity +# API Server. (integer value) +#http_request_max_retries=3 + +# Env key for the swift cache. (string value) +#cache=<None> + +# Required if identity server requires client certificate (string value) +#certfile=<None> + +# Required if identity server requires client certificate (string value) +#keyfile=<None> + +# A PEM encoded Certificate Authority to use when verifying HTTPs connections. +# Defaults to system CAs. (string value) +#cafile=<None> + +# Verify HTTPS connections. (boolean value) +#insecure=false + +# The region in which the identity server can be found. (string value) +#region_name=<None> + +# Directory used to cache files related to PKI tokens. (string value) +#signing_dir=<None> + +# Optionally specify a list of memcached server(s) to use for caching. If left +# undefined, tokens will instead be cached in-process. (list value) +# Deprecated group;name - DEFAULT;memcache_servers +#memcached_servers=<None> + +# In order to prevent excessive effort spent validating tokens, the middleware +# caches previously-seen tokens for a configurable duration (in seconds). Set +# to -1 to disable caching completely. (integer value) +#token_cache_time=300 + +# Determines the frequency at which the list of revoked tokens is retrieved +# from the Identity service (in seconds). A high number of revocation events +# combined with a low cache duration may significantly reduce performance. +# (integer value) +#revocation_cache_time=10 + +# (Optional) If defined, indicate whether token data should be authenticated or +# authenticated and encrypted. Acceptable values are MAC or ENCRYPT. If MAC, +# token data is authenticated (with HMAC) in the cache. If ENCRYPT, token data +# is encrypted and authenticated in the cache. If the value is not one of these +# options or empty, auth_token will raise an exception on initialization. +# (string value) +#memcache_security_strategy=<None> + +# (Optional, mandatory if memcache_security_strategy is defined) This string is +# used for key derivation. (string value) +#memcache_secret_key=<None> + +# (Optional) Number of seconds memcached server is considered dead before it is +# tried again. (integer value) +#memcache_pool_dead_retry=300 + +# (Optional) Maximum total number of open connections to every memcached +# server. (integer value) +#memcache_pool_maxsize=10 + +# (Optional) Socket timeout in seconds for communicating with a memcached +# server. (integer value) +#memcache_pool_socket_timeout=3 + +# (Optional) Number of seconds a connection to memcached is held unused in the +# pool before it is closed. (integer value) +#memcache_pool_unused_timeout=60 + +# (Optional) Number of seconds that an operation will wait to get a memcached +# client connection from the pool. (integer value) +#memcache_pool_conn_get_timeout=10 + +# (Optional) Use the advanced (eventlet safe) memcached client pool. The +# advanced pool will only work under python 2.x. (boolean value) +#memcache_use_advanced_pool=false + +# (Optional) Indicate whether to set the X-Service-Catalog header. If False, +# middleware will not ask for service catalog on token validation and will not +# set the X-Service-Catalog header. (boolean value) +#include_service_catalog=true + +# Used to control the use and type of token binding. Can be set to: "disabled" +# to not check token binding. "permissive" (default) to validate binding +# information if the bind type is of a form known to the server and ignore it +# if not. "strict" like "permissive" but if the bind type is unknown the token +# will be rejected. "required" any form of token binding is needed to be +# allowed. Finally the name of a binding method that must be present in tokens. +# (string value) +#enforce_token_bind=permissive + +# If true, the revocation list will be checked for cached tokens. This requires +# that PKI tokens are configured on the identity server. (boolean value) +#check_revocations_for_cached=false + +# Hash algorithms to use for hashing PKI tokens. This may be a single algorithm +# or multiple. The algorithms are those supported by Python standard +# hashlib.new(). The hashes will be tried in the order given, so put the +# preferred one first for performance. The result of the first hash will be +# stored in the cache. This will typically be set to multiple values only while +# migrating from a less secure algorithm to a more secure one. Once all the old +# tokens are expired this option should be set to a single value for better +# performance. (list value) +#hash_algorithms=md5 + +# Prefix to prepend at the beginning of the path. Deprecated, use identity_uri. +# (string value) +#auth_admin_prefix = + +# Host providing the admin Identity API endpoint. Deprecated, use identity_uri. +# (string value) +#auth_host=127.0.0.1 + +# Port of the admin Identity API endpoint. Deprecated, use identity_uri. +# (integer value) +#auth_port=35357 + +# Protocol of the admin Identity API endpoint (http or https). Deprecated, use +# identity_uri. (string value) +#auth_protocol=http + +# Complete admin Identity API endpoint. This should specify the unversioned +# root endpoint e.g. https://localhost:35357/ (string value) +#identity_uri=<None> +identity_uri=http://VARINET4ADDR:35357 + +# This option is deprecated and may be removed in a future release. Single +# shared secret with the Keystone configuration used for bootstrapping a +# Keystone installation, or otherwise bypassing the normal authentication +# process. This option should not be used, use `admin_user` and +# `admin_password` instead. (string value) +#admin_token=<None> + +# Service username. (string value) +#admin_user=<None> +admin_user=nova + +# Service user password. (string value) +#admin_password=<None> +admin_password=qum5net + +# Service tenant name. (string value) +#admin_tenant_name=admin +admin_tenant_name=services + + +[libvirt] + +# +# From nova.virt +# + +# Rescue ami image. This will not be used if an image id is provided by the +# user. (string value) +#rescue_image_id=<None> + +# Rescue aki image (string value) +#rescue_kernel_id=<None> + +# Rescue ari image (string value) +#rescue_ramdisk_id=<None> + +# Libvirt domain type (string value) +# Allowed values: kvm, lxc, qemu, uml, xen, parallels +#virt_type=kvm +virt_type=kvm + +# Override the default libvirt URI (which is dependent on virt_type) (string +# value) +#connection_uri = + +# Inject the admin password at boot time, without an agent. (boolean value) +#inject_password=false +inject_password=False + +# Inject the ssh public key at boot time (boolean value) +#inject_key=false +inject_key=False + +# The partition to inject to : -2 => disable, -1 => inspect (libguestfs only), +# 0 => not partitioned, >0 => partition number (integer value) +#inject_partition=-2 +inject_partition=-2 + +# Sync virtual and real mouse cursors in Windows VMs (boolean value) +#use_usb_tablet=true + +# Migration target URI (any included "%s" is replaced with the migration target +# hostname) (string value) +#live_migration_uri=qemu+tcp://%s/system +live_migration_uri=qemu+tcp://nova@%s/system + +# Migration flags to be set for live migration (string value) +#live_migration_flag=VIR_MIGRATE_UNDEFINE_SOURCE, VIR_MIGRATE_PEER2PEER, VIR_MIGRATE_LIVE, VIR_MIGRATE_TUNNELLED +live_migration_flag="VIR_MIGRATE_UNDEFINE_SOURCE, VIR_MIGRATE_PEER2PEER, VIR_MIGRATE_LIVE, VIR_MIGRATE_PERSIST_DEST, VIR_MIGRATE_TUNNELLED" + +# Migration flags to be set for block migration (string value) +#block_migration_flag=VIR_MIGRATE_UNDEFINE_SOURCE, VIR_MIGRATE_PEER2PEER, VIR_MIGRATE_LIVE, VIR_MIGRATE_TUNNELLED, VIR_MIGRATE_NON_SHARED_INC + +# Maximum bandwidth(in MiB/s) to be used during migration. If set to 0, will +# choose a suitable default. Some hypervisors do not support this feature and +# will return an error if bandwidth is not 0. Please refer to the libvirt +# documentation for further details (integer value) +#live_migration_bandwidth=0 + +# Maximum permitted downtime, in milliseconds, for live migration switchover. +# Will be rounded up to a minimum of 100ms. Use a large value if guest liveness +# is unimportant. (integer value) +#live_migration_downtime=500 + +# Number of incremental steps to reach max downtime value. Will be rounded up +# to a minimum of 3 steps (integer value) +#live_migration_downtime_steps=10 + +# Time to wait, in seconds, between each step increase of the migration +# downtime. Minimum delay is 10 seconds. Value is per GiB of guest RAM + disk +# to be transferred, with lower bound of a minimum of 2 GiB per device (integer +# value) +#live_migration_downtime_delay=75 + +# Time to wait, in seconds, for migration to successfully complete transferring +# data before aborting the operation. Value is per GiB of guest RAM + disk to +# be transferred, with lower bound of a minimum of 2 GiB. Should usually be +# larger than downtime delay * downtime steps. Set to 0 to disable timeouts. +# (integer value) +#live_migration_completion_timeout=800 + +# Time to wait, in seconds, for migration to make forward progress in +# transferring data before aborting the operation. Set to 0 to disable +# timeouts. (integer value) +#live_migration_progress_timeout=150 + +# Snapshot image format. Defaults to same as source image (string value) +# Allowed values: raw, qcow2, vmdk, vdi +#snapshot_image_format=<None> + +# Override the default disk prefix for the devices attached to a server, which +# is dependent on virt_type. (valid options are: sd, xvd, uvd, vd) (string +# value) +#disk_prefix=<None> + +# Number of seconds to wait for instance to shut down after soft reboot request +# is made. We fall back to hard reboot if instance does not shutdown within +# this window. (integer value) +#wait_soft_reboot_seconds=120 + +# Set to "host-model" to clone the host CPU feature flags; to "host- +# passthrough" to use the host CPU model exactly; to "custom" to use a named +# CPU model; to "none" to not set any CPU model. If virt_type="kvm|qemu", it +# will default to "host-model", otherwise it will default to "none" (string +# value) +# Allowed values: host-model, host-passthrough, custom, none +#cpu_mode=<None> +cpu_mode=host-model + +# Set to a named libvirt CPU model (see names listed in +# /usr/share/libvirt/cpu_map.xml). Only has effect if cpu_mode="custom" and +# virt_type="kvm|qemu" (string value) +#cpu_model=<None> + +# Location where libvirt driver will store snapshots before uploading them to +# image service (string value) +#snapshots_directory=$instances_path/snapshots + +# Location where the Xen hvmloader is kept (string value) +#xen_hvmloader_path=/usr/lib/xen/boot/hvmloader + +# Specific cachemodes to use for different disk types e.g: +# file=directsync,block=none (list value) +#disk_cachemodes = +disk_cachemodes="network=writeback" + +# A path to a device that will be used as source of entropy on the host. +# Permitted options are: /dev/random or /dev/hwrng (string value) +#rng_dev_path=<None> + +# For qemu or KVM guests, set this option to specify a default machine type per +# host architecture. You can find a list of supported machine types in your +# environment by checking the output of the "virsh capabilities"command. The +# format of the value for this config option is host-arch=machine-type. For +# example: x86_64=machinetype1,armv7l=machinetype2 (list value) +#hw_machine_type=<None> + +# The data source used to the populate the host "serial" UUID exposed to guest +# in the virtual BIOS. (string value) +# Allowed values: none, os, hardware, auto +#sysinfo_serial=auto + +# A number of seconds to memory usage statistics period. Zero or negative value +# mean to disable memory usage statistics. (integer value) +#mem_stats_period_seconds=10 + +# List of uid targets and ranges.Syntax is guest-uid:host-uid:countMaximum of 5 +# allowed. (list value) +#uid_maps = + +# List of guid targets and ranges.Syntax is guest-gid:host-gid:countMaximum of +# 5 allowed. (list value) +#gid_maps = + +# In a realtime host context vCPUs for guest will run in that scheduling +# priority. Priority depends on the host kernel (usually 1-99) (integer value) +#realtime_scheduler_priority=1 + +# VM Images format. If default is specified, then use_cow_images flag is used +# instead of this one. (string value) +# Allowed values: raw, qcow2, lvm, rbd, ploop, default +#images_type=default +images_type=rbd + +# LVM Volume Group that is used for VM images, when you specify +# images_type=lvm. (string value) +#images_volume_group=<None> + +# Create sparse logical volumes (with virtualsize) if this flag is set to True. +# (boolean value) +#sparse_logical_volumes=false + +# The RADOS pool in which rbd volumes are stored (string value) +#images_rbd_pool=rbd +images_rbd_pool=vms + +# Path to the ceph configuration file to use (string value) +#images_rbd_ceph_conf = +images_rbd_ceph_conf = /etc/ceph/ceph.conf +rbd_user = cinder +rbd_secret_uuid = RBDSECRET + +# Discard option for nova managed disks. Need Libvirt(1.0.6) Qemu1.5 (raw +# format) Qemu1.6(qcow2 format) (string value) +# Allowed values: ignore, unmap +#hw_disk_discard=<None> +hw_disk_discard=unmap + +# Allows image information files to be stored in non-standard locations (string +# value) +#image_info_filename_pattern=$instances_path/$image_cache_subdirectory_name/%(image)s.info + +# DEPRECATED: Should unused kernel images be removed? This is only safe to +# enable if all compute nodes have been updated to support this option (running +# Grizzly or newer level compute). This will be the default behavior in the +# 13.0.0 release. (boolean value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#remove_unused_kernels=true + +# Unused resized base images younger than this will not be removed (integer +# value) +#remove_unused_resized_minimum_age_seconds=3600 + +# Write a checksum for files in _base to disk (boolean value) +#checksum_base_images=false + +# How frequently to checksum base images (integer value) +#checksum_interval_seconds=3600 + +# Method used to wipe old volumes. (string value) +# Allowed values: none, zero, shred +#volume_clear=zero + +# Size in MiB to wipe at start of old volumes. 0 => all (integer value) +#volume_clear_size=0 + +# Compress snapshot images when possible. This currently applies exclusively to +# qcow2 images (boolean value) +#snapshot_compression=false + +# Use virtio for bridge interfaces with KVM/QEMU (boolean value) +#use_virtio_for_bridges=true + +# Protocols listed here will be accessed directly from QEMU. Currently +# supported protocols: [gluster] (list value) +#qemu_allowed_storage_drivers = +vif_driver=nova.virt.libvirt.vif.LibvirtGenericVIFDriver + + +[matchmaker_redis] + +# +# From oslo.messaging +# + +# Host to locate redis. (string value) +#host=127.0.0.1 + +# Use this port to connect to redis host. (integer value) +#port=6379 + +# Password for Redis server (optional). (string value) +#password=<None> + + +[matchmaker_ring] + +# +# From oslo.messaging +# + +# Matchmaker ring file (JSON). (string value) +# Deprecated group;name - DEFAULT;matchmaker_ringfile +#ringfile=/etc/oslo/matchmaker_ring.json + + +[metrics] + +# +# From nova.scheduler +# + +# Multiplier used for weighing metrics. (floating point value) +#weight_multiplier=1.0 + +# How the metrics are going to be weighed. This should be in the form of +# "<name1>=<ratio1>, <name2>=<ratio2>, ...", where <nameX> is one of the +# metrics to be weighed, and <ratioX> is the corresponding ratio. So for +# "name1=1.0, name2=-1.0" The final weight would be name1.value * 1.0 + +# name2.value * -1.0. (list value) +#weight_setting = + +# How to treat the unavailable metrics. When a metric is NOT available for a +# host, if it is set to be True, it would raise an exception, so it is +# recommended to use the scheduler filter MetricFilter to filter out those +# hosts. If it is set to be False, the unavailable metric would be treated as a +# negative factor in weighing process, the returned value would be set by the +# option weight_of_unavailable. (boolean value) +#required=true + +# The final weight value to be returned if required is set to False and any one +# of the metrics set by weight_setting is unavailable. (floating point value) +#weight_of_unavailable=-10000.0 + + +[neutron] + +# +# From nova.api +# + +# Set flag to indicate Neutron will proxy metadata requests and resolve +# instance ids. (boolean value) +#service_metadata_proxy=false +service_metadata_proxy=True + +# Shared secret to validate proxies Neutron metadata requests (string value) +#metadata_proxy_shared_secret = +metadata_proxy_shared_secret =qum5net + +# +# From nova.network +# + +# URL for connecting to neutron (string value) +#url=http://127.0.0.1:9696 +url=http://VARINET4ADDR:9696 + +# User id for connecting to neutron in admin context. DEPRECATED: specify an +# auth_plugin and appropriate credentials instead. (string value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#admin_user_id=<None> + +# Username for connecting to neutron in admin context DEPRECATED: specify an +# auth_plugin and appropriate credentials instead. (string value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#admin_username=<None> +admin_username=neutron + +# Password for connecting to neutron in admin context DEPRECATED: specify an +# auth_plugin and appropriate credentials instead. (string value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#admin_password=<None> +admin_password=qum5net + +# Tenant id for connecting to neutron in admin context DEPRECATED: specify an +# auth_plugin and appropriate credentials instead. (string value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#admin_tenant_id=<None> + +# Tenant name for connecting to neutron in admin context. This option will be +# ignored if neutron_admin_tenant_id is set. Note that with Keystone V3 tenant +# names are only unique within a domain. DEPRECATED: specify an auth_plugin and +# appropriate credentials instead. (string value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#admin_tenant_name=<None> +admin_tenant_name=services + +# Region name for connecting to neutron in admin context (string value) +#region_name=<None> +region_name=RegionOne + +# Authorization URL for connecting to neutron in admin context. DEPRECATED: +# specify an auth_plugin and appropriate credentials instead. (string value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#admin_auth_url=http://localhost:5000/v2.0 +admin_auth_url=http://VARINET4ADDR:5000/v2.0 + +# Authorization strategy for connecting to neutron in admin context. +# DEPRECATED: specify an auth_plugin and appropriate credentials instead. If an +# auth_plugin is specified strategy will be ignored. (string value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#auth_strategy=keystone +auth_strategy=keystone + +# Name of Integration Bridge used by Open vSwitch (string value) +#ovs_bridge=br-int +ovs_bridge=br-int + +# Number of seconds before querying neutron for extensions (integer value) +#extension_sync_interval=600 +extension_sync_interval=600 + +# +# From nova.network.neutronv2 +# + +# Authentication URL (string value) +#auth_url=<None> + +# Name of the plugin to load (string value) +#auth_plugin=<None> + +# PEM encoded Certificate Authority to use when verifying HTTPs connections. +# (string value) +# Deprecated group;name - [neutron]/ca_certificates_file +#cafile=<None> + +# PEM encoded client certificate cert file (string value) +#certfile=<None> + +# Domain ID to scope to (string value) +#domain_id=<None> + +# Domain name to scope to (string value) +#domain_name=<None> + +# Verify HTTPS connections. (boolean value) +# Deprecated group;name - [neutron]/api_insecure +#insecure=false + +# PEM encoded client certificate key file (string value) +#keyfile=<None> + +# User's password (string value) +#password=<None> + +# Domain ID containing project (string value) +#project_domain_id=<None> + +# Domain name containing project (string value) +#project_domain_name=<None> + +# Project ID to scope to (string value) +#project_id=<None> + +# Project name to scope to (string value) +#project_name=<None> + +# Tenant ID to scope to (string value) +#tenant_id=<None> + +# Tenant name to scope to (string value) +#tenant_name=<None> + +# Timeout value for http requests (integer value) +# Deprecated group;name - [neutron]/url_timeout +#timeout=<None> +timeout=30 + +# Trust ID (string value) +#trust_id=<None> + +# User's domain id (string value) +#user_domain_id=<None> + +# User's domain name (string value) +#user_domain_name=<None> + +# User id (string value) +#user_id=<None> + +# Username (string value) +# Deprecated group;name - DEFAULT;username +#username=<None> +default_tenant_id=default + + +[osapi_v21] + +# +# From nova.api +# + +# DEPRECATED: Whether the V2.1 API is enabled or not. This option will be +# removed in the near future. (boolean value) +# Deprecated group;name - [osapi_v21]/enabled +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#enabled=true + +# DEPRECATED: A list of v2.1 API extensions to never load. Specify the +# extension aliases here. This option will be removed in the near future. After +# that point you have to run all of the API. (list value) +# Deprecated group;name - [osapi_v21]/extensions_blacklist +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#extensions_blacklist = + +# DEPRECATED: If the list is not empty then a v2.1 API extension will only be +# loaded if it exists in this list. Specify the extension aliases here. This +# option will be removed in the near future. After that point you have to run +# all of the API. (list value) +# Deprecated group;name - [osapi_v21]/extensions_whitelist +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#extensions_whitelist = + + +[oslo_concurrency] + +# +# From oslo.concurrency +# + +# Enables or disables inter-process locks. (boolean value) +# Deprecated group;name - DEFAULT;disable_process_locking +#disable_process_locking=false + +# Directory to use for lock files. For security, the specified directory +# should only be writable by the user running the processes that need locking. +# Defaults to environment variable OSLO_LOCK_PATH. If external locks are used, +# a lock path must be set. (string value) +# Deprecated group;name - DEFAULT;lock_path +#lock_path=/var/lib/nova/tmp + + +[oslo_messaging_amqp] + +# +# From oslo.messaging +# + +# address prefix used when sending to a specific server (string value) +# Deprecated group;name - [amqp1]/server_request_prefix +#server_request_prefix=exclusive + +# address prefix used when broadcasting to all servers (string value) +# Deprecated group;name - [amqp1]/broadcast_prefix +#broadcast_prefix=broadcast + +# address prefix when sending to any server in group (string value) +# Deprecated group;name - [amqp1]/group_request_prefix +#group_request_prefix=unicast + +# Name for the AMQP container (string value) +# Deprecated group;name - [amqp1]/container_name +#container_name=<None> + +# Timeout for inactive connections (in seconds) (integer value) +# Deprecated group;name - [amqp1]/idle_timeout +#idle_timeout=0 + +# Debug: dump AMQP frames to stdout (boolean value) +# Deprecated group;name - [amqp1]/trace +#trace=false + +# CA certificate PEM file to verify server certificate (string value) +# Deprecated group;name - [amqp1]/ssl_ca_file +#ssl_ca_file = + +# Identifying certificate PEM file to present to clients (string value) +# Deprecated group;name - [amqp1]/ssl_cert_file +#ssl_cert_file = + +# Private key PEM file used to sign cert_file certificate (string value) +# Deprecated group;name - [amqp1]/ssl_key_file +#ssl_key_file = + +# Password for decrypting ssl_key_file (if encrypted) (string value) +# Deprecated group;name - [amqp1]/ssl_key_password +#ssl_key_password=<None> + +# Accept clients using either SSL or plain TCP (boolean value) +# Deprecated group;name - [amqp1]/allow_insecure_clients +#allow_insecure_clients=false + + +[oslo_messaging_qpid] + +# +# From oslo.messaging +# + +# Use durable queues in AMQP. (boolean value) +# Deprecated group;name - DEFAULT;amqp_durable_queues +# Deprecated group;name - DEFAULT;rabbit_durable_queues +#amqp_durable_queues=false + +# Auto-delete queues in AMQP. (boolean value) +# Deprecated group;name - DEFAULT;amqp_auto_delete +#amqp_auto_delete=false + +# Send a single AMQP reply to call message. The current behaviour since oslo- +# incubator is to send two AMQP replies - first one with the payload, a second +# one to ensure the other have finish to send the payload. We are going to +# remove it in the N release, but we must keep backward compatible at the same +# time. This option provides such compatibility - it defaults to False in +# Liberty and can be turned on for early adopters with a new installations or +# for testing. Please note, that this option will be removed in the Mitaka +# release. (boolean value) +#send_single_reply=false + +# Qpid broker hostname. (string value) +# Deprecated group;name - DEFAULT;qpid_hostname +#qpid_hostname=localhost + +# Qpid broker port. (integer value) +# Deprecated group;name - DEFAULT;qpid_port +#qpid_port=5672 + +# Qpid HA cluster host:port pairs. (list value) +# Deprecated group;name - DEFAULT;qpid_hosts +#qpid_hosts=$qpid_hostname:$qpid_port + +# Username for Qpid connection. (string value) +# Deprecated group;name - DEFAULT;qpid_username +#qpid_username = + +# Password for Qpid connection. (string value) +# Deprecated group;name - DEFAULT;qpid_password +#qpid_password = + +# Space separated list of SASL mechanisms to use for auth. (string value) +# Deprecated group;name - DEFAULT;qpid_sasl_mechanisms +#qpid_sasl_mechanisms = + +# Seconds between connection keepalive heartbeats. (integer value) +# Deprecated group;name - DEFAULT;qpid_heartbeat +#qpid_heartbeat=60 + +# Transport to use, either 'tcp' or 'ssl'. (string value) +# Deprecated group;name - DEFAULT;qpid_protocol +#qpid_protocol=tcp + +# Whether to disable the Nagle algorithm. (boolean value) +# Deprecated group;name - DEFAULT;qpid_tcp_nodelay +#qpid_tcp_nodelay=true + +# The number of prefetched messages held by receiver. (integer value) +# Deprecated group;name - DEFAULT;qpid_receiver_capacity +#qpid_receiver_capacity=1 + +# The qpid topology version to use. Version 1 is what was originally used by +# impl_qpid. Version 2 includes some backwards-incompatible changes that allow +# broker federation to work. Users should update to version 2 when they are +# able to take everything down, as it requires a clean break. (integer value) +# Deprecated group;name - DEFAULT;qpid_topology_version +#qpid_topology_version=1 + + +[oslo_messaging_rabbit] + +# +# From oslo.messaging +# + +# Use durable queues in AMQP. (boolean value) +# Deprecated group;name - DEFAULT;amqp_durable_queues +# Deprecated group;name - DEFAULT;rabbit_durable_queues +#amqp_durable_queues=false +amqp_durable_queues=False + +# Auto-delete queues in AMQP. (boolean value) +# Deprecated group;name - DEFAULT;amqp_auto_delete +#amqp_auto_delete=false + +# Send a single AMQP reply to call message. The current behaviour since oslo- +# incubator is to send two AMQP replies - first one with the payload, a second +# one to ensure the other have finish to send the payload. We are going to +# remove it in the N release, but we must keep backward compatible at the same +# time. This option provides such compatibility - it defaults to False in +# Liberty and can be turned on for early adopters with a new installations or +# for testing. Please note, that this option will be removed in the Mitaka +# release. (boolean value) +#send_single_reply=false + +# SSL version to use (valid only if SSL enabled). Valid values are TLSv1 and +# SSLv23. SSLv2, SSLv3, TLSv1_1, and TLSv1_2 may be available on some +# distributions. (string value) +# Deprecated group;name - DEFAULT;kombu_ssl_version +#kombu_ssl_version = + +# SSL key file (valid only if SSL enabled). (string value) +# Deprecated group;name - DEFAULT;kombu_ssl_keyfile +#kombu_ssl_keyfile = + +# SSL cert file (valid only if SSL enabled). (string value) +# Deprecated group;name - DEFAULT;kombu_ssl_certfile +#kombu_ssl_certfile = + +# SSL certification authority file (valid only if SSL enabled). (string value) +# Deprecated group;name - DEFAULT;kombu_ssl_ca_certs +#kombu_ssl_ca_certs = + +# How long to wait before reconnecting in response to an AMQP consumer cancel +# notification. (floating point value) +# Deprecated group;name - DEFAULT;kombu_reconnect_delay +#kombu_reconnect_delay=1.0 +kombu_reconnect_delay=1.0 + +# How long to wait before considering a reconnect attempt to have failed. This +# value should not be longer than rpc_response_timeout. (integer value) +#kombu_reconnect_timeout=60 + +# Determines how the next RabbitMQ node is chosen in case the one we are +# currently connected to becomes unavailable. Takes effect only if more than +# one RabbitMQ node is provided in config. (string value) +# Allowed values: round-robin, shuffle +#kombu_failover_strategy=round-robin + +# The RabbitMQ broker address where a single node is used. (string value) +# Deprecated group;name - DEFAULT;rabbit_host +#rabbit_host=localhost +rabbit_host=VARINET4ADDR + +# The RabbitMQ broker port where a single node is used. (integer value) +# Deprecated group;name - DEFAULT;rabbit_port +#rabbit_port=5672 +rabbit_port=5672 + +# RabbitMQ HA cluster host:port pairs. (list value) +# Deprecated group;name - DEFAULT;rabbit_hosts +#rabbit_hosts=$rabbit_host:$rabbit_port +rabbit_hosts=VARINET4ADDR:5672 + +# Connect over SSL for RabbitMQ. (boolean value) +# Deprecated group;name - DEFAULT;rabbit_use_ssl +#rabbit_use_ssl=false +rabbit_use_ssl=False + +# The RabbitMQ userid. (string value) +# Deprecated group;name - DEFAULT;rabbit_userid +#rabbit_userid=guest +rabbit_userid=guest + +# The RabbitMQ password. (string value) +# Deprecated group;name - DEFAULT;rabbit_password +#rabbit_password=guest +rabbit_password=guest + +# The RabbitMQ login method. (string value) +# Deprecated group;name - DEFAULT;rabbit_login_method +#rabbit_login_method=AMQPLAIN + +# The RabbitMQ virtual host. (string value) +# Deprecated group;name - DEFAULT;rabbit_virtual_host +#rabbit_virtual_host=/ +rabbit_virtual_host=/ + +# How frequently to retry connecting with RabbitMQ. (integer value) +#rabbit_retry_interval=1 + +# How long to backoff for between retries when connecting to RabbitMQ. (integer +# value) +# Deprecated group;name - DEFAULT;rabbit_retry_backoff +#rabbit_retry_backoff=2 + +# Maximum number of RabbitMQ connection retries. Default is 0 (infinite retry +# count). (integer value) +# Deprecated group;name - DEFAULT;rabbit_max_retries +#rabbit_max_retries=0 + +# Use HA queues in RabbitMQ (x-ha-policy: all). If you change this option, you +# must wipe the RabbitMQ database. (boolean value) +# Deprecated group;name - DEFAULT;rabbit_ha_queues +#rabbit_ha_queues=false +rabbit_ha_queues=False + +# Specifies the number of messages to prefetch. Setting to zero allows +# unlimited messages. (integer value) +#rabbit_qos_prefetch_count=0 + +# Number of seconds after which the Rabbit broker is considered down if +# heartbeat's keep-alive fails (0 disable the heartbeat). EXPERIMENTAL (integer +# value) +#heartbeat_timeout_threshold=60 +heartbeat_timeout_threshold=0 + +# How often times during the heartbeat_timeout_threshold we check the +# heartbeat. (integer value) +#heartbeat_rate=2 +heartbeat_rate=2 + +# Deprecated, use rpc_backend=kombu+memory or rpc_backend=fake (boolean value) +# Deprecated group;name - DEFAULT;fake_rabbit +#fake_rabbit=false + + +[oslo_middleware] + +# +# From oslo.middleware +# + +# The maximum body size for each request, in bytes. (integer value) +# Deprecated group;name - DEFAULT;osapi_max_request_body_size +# Deprecated group;name - DEFAULT;max_request_body_size +#max_request_body_size=114688 + +# +# From oslo.middleware +# + +# The HTTP Header that will be used to determine what the original request +# protocol scheme was, even if it was hidden by an SSL termination proxy. +# (string value) +#secure_proxy_ssl_header=X-Forwarded-Proto + + +[rdp] + +# +# From nova +# + +# Location of RDP html5 console proxy, in the form "http://127.0.0.1:6083/" +# (string value) +#html5_proxy_base_url=http://127.0.0.1:6083/ + +# Enable RDP related features (boolean value) +#enabled=false + + +[serial_console] + +# +# From nova +# + +# Host on which to listen for incoming requests (string value) +#serialproxy_host=0.0.0.0 + +# Port on which to listen for incoming requests (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#serialproxy_port=6083 + +# Enable serial console related features (boolean value) +#enabled=false + +# Range of TCP ports to use for serial ports on compute hosts (string value) +#port_range=10000:20000 + +# Location of serial console proxy. (string value) +#base_url=ws://127.0.0.1:6083/ + +# IP address on which instance serial console should listen (string value) +#listen=127.0.0.1 + +# The address to which proxy clients (like nova-serialproxy) should connect +# (string value) +#proxyclient_address=127.0.0.1 + + +[spice] + +# +# From nova +# + +# Host on which to listen for incoming requests (string value) +#html5proxy_host=0.0.0.0 + +# Port on which to listen for incoming requests (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#html5proxy_port=6082 + +# Location of spice HTML5 console proxy, in the form +# "http://127.0.0.1:6082/spice_auto.html" (string value) +#html5proxy_base_url=http://127.0.0.1:6082/spice_auto.html + +# IP address on which instance spice server should listen (string value) +#server_listen=127.0.0.1 + +# The address to which proxy clients (like nova-spicehtml5proxy) should connect +# (string value) +#server_proxyclient_address=127.0.0.1 + +# Enable spice related features (boolean value) +#enabled=false + +# Enable spice guest agent support (boolean value) +#agent_enabled=true + +# Keymap for spice (string value) +#keymap=en-us + + +[ssl] + +# +# From oslo.service.sslutils +# + +# CA certificate file to use to verify connecting clients. (string value) +#ca_file=<None> + +# Certificate file to use when starting the server securely. (string value) +#cert_file=<None> + +# Private key file to use when starting the server securely. (string value) +#key_file=<None> + + +[trusted_computing] + +# +# From nova.scheduler +# + +# Attestation server HTTP (string value) +#attestation_server=<None> + +# Attestation server Cert file for Identity verification (string value) +#attestation_server_ca_file=<None> + +# Attestation server port (string value) +#attestation_port=8443 + +# Attestation web API URL (string value) +#attestation_api_url=/OpenAttestationWebServices/V1.0 + +# Attestation authorization blob - must change (string value) +#attestation_auth_blob=<None> + +# Attestation status cache valid period length (integer value) +#attestation_auth_timeout=60 + +# Disable SSL cert verification for Attestation service (boolean value) +#attestation_insecure_ssl=false + + +[upgrade_levels] + +# +# From nova +# + +# Set a version cap for messages sent to the base api in any service (string +# value) +#baseapi=<None> + +# Set a version cap for messages sent to cert services (string value) +#cert=<None> + +# Set a version cap for messages sent to conductor services (string value) +#conductor=<None> + +# Set a version cap for messages sent to console services (string value) +#console=<None> + +# Set a version cap for messages sent to consoleauth services (string value) +#consoleauth=<None> + +# +# From nova.cells +# + +# Set a version cap for messages sent between cells services (string value) +#intercell=<None> + +# Set a version cap for messages sent to local cells services (string value) +#cells=<None> + +# +# From nova.compute +# + +# Set a version cap for messages sent to compute services. If you plan to do a +# live upgrade from an old version to a newer version, you should set this +# option to the old version before beginning the live upgrade procedure. Only +# upgrading to the next version is supported, so you cannot skip a release for +# the live upgrade procedure. (string value) +#compute=<None> + +# +# From nova.network +# + +# Set a version cap for messages sent to network services (string value) +#network=<None> + +# +# From nova.scheduler +# + +# Set a version cap for messages sent to scheduler services (string value) +#scheduler=<None> + + +[vmware] + +# +# From nova.virt +# + +# The maximum number of ObjectContent data objects that should be returned in a +# single result. A positive value will cause the operation to suspend the +# retrieval when the count of objects reaches the specified maximum. The server +# may still limit the count to something less than the configured value. Any +# remaining objects may be retrieved with additional requests. (integer value) +#maximum_objects=100 + +# The PBM status. (boolean value) +#pbm_enabled=false + +# PBM service WSDL file location URL. e.g. +# file:///opt/SDK/spbm/wsdl/pbmService.wsdl Not setting this will disable +# storage policy based placement of instances. (string value) +#pbm_wsdl_location=<None> + +# The PBM default policy. If pbm_wsdl_location is set and there is no defined +# storage policy for the specific request then this policy will be used. +# (string value) +#pbm_default_policy=<None> + +# Hostname or IP address for connection to VMware vCenter host. (string value) +#host_ip=<None> + +# Port for connection to VMware vCenter host. (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#host_port=443 + +# Username for connection to VMware vCenter host. (string value) +#host_username=<None> + +# Password for connection to VMware vCenter host. (string value) +#host_password=<None> + +# Specify a CA bundle file to use in verifying the vCenter server certificate. +# (string value) +#ca_file=<None> + +# If true, the vCenter server certificate is not verified. If false, then the +# default CA truststore is used for verification. This option is ignored if +# "ca_file" is set. (boolean value) +#insecure=false + +# Name of a VMware Cluster ComputeResource. (string value) +#cluster_name=<None> + +# Regex to match the name of a datastore. (string value) +#datastore_regex=<None> + +# The interval used for polling of remote tasks. (floating point value) +#task_poll_interval=0.5 + +# The number of times we retry on failures, e.g., socket error, etc. (integer +# value) +#api_retry_count=10 + +# VNC starting port (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#vnc_port=5900 + +# Total number of VNC ports (integer value) +#vnc_port_total=10000 + +# Whether to use linked clone (boolean value) +#use_linked_clone=true + +# Optional VIM Service WSDL Location e.g http://<server>/vimService.wsdl. +# Optional over-ride to default location for bug work-arounds (string value) +#wsdl_location=<None> + +# Physical ethernet adapter name for vlan networking (string value) +#vlan_interface=vmnic0 + +# Name of Integration Bridge (string value) +#integration_bridge=br-int + +# Set this value if affected by an increased network latency causing repeated +# characters when typing in a remote console. (integer value) +#console_delay_seconds=<None> + +# Identifies the remote system that serial port traffic will be sent to. If +# this is not set, no serial ports will be added to the created VMs. (string +# value) +#serial_port_service_uri=<None> + +# Identifies a proxy service that provides network access to the +# serial_port_service_uri. This option is ignored if serial_port_service_uri is +# not specified. (string value) +#serial_port_proxy_uri=<None> + +# The prefix for where cached images are stored. This is NOT the full path - +# just a folder prefix. This should only be used when a datastore cache should +# be shared between compute nodes. Note: this should only be used when the +# compute nodes have a shared file system. (string value) +#cache_prefix=<None> + + +[vnc] + +# +# From nova +# + +# Location of VNC console proxy, in the form +# "http://127.0.0.1:6080/vnc_auto.html" (string value) +# Deprecated group;name - DEFAULT;novncproxy_base_url +#novncproxy_base_url=http://127.0.0.1:6080/vnc_auto.html + +# Location of nova xvp VNC console proxy, in the form +# "http://127.0.0.1:6081/console" (string value) +# Deprecated group;name - DEFAULT;xvpvncproxy_base_url +#xvpvncproxy_base_url=http://127.0.0.1:6081/console + +# IP address on which instance vncservers should listen (string value) +# Deprecated group;name - DEFAULT;vncserver_listen +#vncserver_listen=127.0.0.1 + +# The address to which proxy clients (like nova-xvpvncproxy) should connect +# (string value) +# Deprecated group;name - DEFAULT;vncserver_proxyclient_address +#vncserver_proxyclient_address=127.0.0.1 + +# Enable VNC related features (boolean value) +# Deprecated group;name - DEFAULT;vnc_enabled +#enabled=true + +# Keymap for VNC (string value) +# Deprecated group;name - DEFAULT;vnc_keymap +#keymap=en-us + + +[workarounds] + +# +# From nova +# + +# This option allows a fallback to sudo for performance reasons. For example +# see https://bugs.launchpad.net/nova/+bug/1415106 (boolean value) +#disable_rootwrap=false + +# When using libvirt 1.2.2 live snapshots fail intermittently under load. This +# config option provides a mechanism to enable live snapshot while this is +# resolved. See https://bugs.launchpad.net/nova/+bug/1334398 (boolean value) +#disable_libvirt_livesnapshot=true + +# DEPRECATED: Whether to destroy instances on startup when we suspect they have +# previously been evacuated. This can result in data loss if undesired. See +# https://launchpad.net/bugs/1419785 (boolean value) +# This option is deprecated for removal. +# Its value may be silently ignored in the future. +#destroy_after_evacuate=true + +# Whether or not to handle events raised from the compute driver's 'emit_event' +# method. These are lifecycle events raised from compute drivers that implement +# the method. An example of a lifecycle event is an instance starting or +# stopping. If the instance is going through task state changes due to an API +# operation, like resize, the events are ignored. However, this is an advanced +# feature which allows the hypervisor to signal to the compute service that an +# unexpected state change has occurred in an instance and the instance can be +# shutdown automatically - which can inherently race in reboot operations or +# when the compute service or host is rebooted, either planned or due to an +# unexpected outage. Care should be taken when using this and +# sync_power_state_interval is negative since then if any instances are out of +# sync between the hypervisor and the Nova database they will have to be +# synchronized manually. See https://bugs.launchpad.net/bugs/1444630 (boolean +# value) +#handle_virt_lifecycle_events=true + + +[xenserver] + +# +# From nova.virt +# + +# Name of Integration Bridge used by Open vSwitch (string value) +#ovs_integration_bridge=xapi1 + +# Number of seconds to wait for agent reply (integer value) +#agent_timeout=30 + +# Number of seconds to wait for agent to be fully operational (integer value) +#agent_version_timeout=300 + +# Number of seconds to wait for agent reply to resetnetwork request (integer +# value) +#agent_resetnetwork_timeout=60 + +# Specifies the path in which the XenAPI guest agent should be located. If the +# agent is present, network configuration is not injected into the image. Used +# if compute_driver=xenapi.XenAPIDriver and flat_injected=True (string value) +#agent_path=usr/sbin/xe-update-networking + +# Disables the use of the XenAPI agent in any image regardless of what image +# properties are present. (boolean value) +#disable_agent=false + +# Determines if the XenAPI agent should be used when the image used does not +# contain a hint to declare if the agent is present or not. The hint is a +# glance property "xenapi_use_agent" that has the value "True" or "False". Note +# that waiting for the agent when it is not present will significantly increase +# server boot times. (boolean value) +#use_agent_default=false + +# Timeout in seconds for XenAPI login. (integer value) +#login_timeout=10 + +# Maximum number of concurrent XenAPI connections. Used only if +# compute_driver=xenapi.XenAPIDriver (integer value) +#connection_concurrent=5 + +# URL for connection to XenServer/Xen Cloud Platform. A special value of +# unix://local can be used to connect to the local unix socket. Required if +# compute_driver=xenapi.XenAPIDriver (string value) +#connection_url=<None> + +# Username for connection to XenServer/Xen Cloud Platform. Used only if +# compute_driver=xenapi.XenAPIDriver (string value) +#connection_username=root + +# Password for connection to XenServer/Xen Cloud Platform. Used only if +# compute_driver=xenapi.XenAPIDriver (string value) +#connection_password=<None> + +# The interval used for polling of coalescing vhds. Used only if +# compute_driver=xenapi.XenAPIDriver (floating point value) +#vhd_coalesce_poll_interval=5.0 + +# Ensure compute service is running on host XenAPI connects to. (boolean value) +#check_host=true + +# Max number of times to poll for VHD to coalesce. Used only if +# compute_driver=xenapi.XenAPIDriver (integer value) +#vhd_coalesce_max_attempts=20 + +# Base path to the storage repository (string value) +#sr_base_path=/var/run/sr-mount + +# The iSCSI Target Host (string value) +#target_host=<None> + +# The iSCSI Target Port, default is port 3260 (string value) +#target_port=3260 + +# IQN Prefix (string value) +#iqn_prefix=iqn.2010-10.org.openstack + +# Used to enable the remapping of VBD dev (Works around an issue in Ubuntu +# Maverick) (boolean value) +#remap_vbd_dev=false + +# Specify prefix to remap VBD dev to (ex. /dev/xvdb -> /dev/sdb) (string value) +#remap_vbd_dev_prefix=sd + +# Base URL for torrent files; must contain a slash character (see RFC 1808, +# step 6) (string value) +#torrent_base_url=<None> + +# Probability that peer will become a seeder. (1.0 = 100%) (floating point +# value) +#torrent_seed_chance=1.0 + +# Number of seconds after downloading an image via BitTorrent that it should be +# seeded for other peers. (integer value) +#torrent_seed_duration=3600 + +# Cached torrent files not accessed within this number of seconds can be reaped +# (integer value) +#torrent_max_last_accessed=86400 + +# Beginning of port range to listen on (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#torrent_listen_port_start=6881 + +# End of port range to listen on (integer value) +# Minimum value: 1 +# Maximum value: 65535 +#torrent_listen_port_end=6891 + +# Number of seconds a download can remain at the same progress percentage w/o +# being considered a stall (integer value) +#torrent_download_stall_cutoff=600 + +# Maximum number of seeder processes to run concurrently within a given dom0. +# (-1 = no limit) (integer value) +#torrent_max_seeder_processes_per_host=1 + +# To use for hosts with different CPUs (boolean value) +#use_join_force=true + +# Cache glance images locally. `all` will cache all images, `some` will only +# cache images that have the image_property `cache_in_nova=True`, and `none` +# turns off caching entirely (string value) +# Allowed values: all, some, none +#cache_images=all + +# Compression level for images, e.g., 9 for gzip -9. Range is 1-9, 9 being most +# compressed but most CPU intensive on dom0. (integer value) +# Minimum value: 1 +# Maximum value: 9 +#image_compression_level=<None> + +# Default OS type (string value) +#default_os_type=linux + +# Time to wait for a block device to be created (integer value) +#block_device_creation_timeout=10 + +# Maximum size in bytes of kernel or ramdisk images (integer value) +#max_kernel_ramdisk_size=16777216 + +# Filter for finding the SR to be used to install guest instances on. To use +# the Local Storage in default XenServer/XCP installations set this flag to +# other-config:i18n-key=local-storage. To select an SR with a different +# matching criteria, you could set it to other-config:my_favorite_sr=true. On +# the other hand, to fall back on the Default SR, as displayed by XenCenter, +# set this flag to: default-sr:true (string value) +#sr_matching_filter=default-sr:true + +# Whether to use sparse_copy for copying data on a resize down (False will use +# standard dd). This speeds up resizes down considerably since large runs of +# zeros won't have to be rsynced (boolean value) +#sparse_copy=true + +# Maximum number of retries to unplug VBD. if <=0, should try once and no retry +# (integer value) +#num_vbd_unplug_retries=10 + +# Whether or not to download images via Bit Torrent. (string value) +# Allowed values: all, some, none +#torrent_images=none + +# Name of network to use for booting iPXE ISOs (string value) +#ipxe_network_name=<None> + +# URL to the iPXE boot menu (string value) +#ipxe_boot_menu_url=<None> + +# Name and optionally path of the tool used for ISO image creation (string +# value) +#ipxe_mkisofs_cmd=mkisofs + +# Number of seconds to wait for instance to go to running state (integer value) +#running_timeout=60 + +# The XenAPI VIF driver using XenServer Network APIs. (string value) +#vif_driver=nova.virt.xenapi.vif.XenAPIBridgeDriver + +# Dom0 plugin driver used to handle image uploads. (string value) +#image_upload_handler=nova.virt.xenapi.image.glance.GlanceStore + +# Number of seconds to wait for an SR to settle if the VDI does not exist when +# first introduced (integer value) +#introduce_vdi_retry_wait=20 + + +[zookeeper] + +# +# From nova +# + +# The ZooKeeper addresses for servicegroup service in the format of +# host1:port,host2:port,host3:port (string value) +#address=<None> + +# The recv_timeout parameter for the zk session (integer value) +#recv_timeout=4000 + +# The prefix used in ZooKeeper to store ephemeral nodes (string value) +#sg_prefix=/servicegroups + +# Number of seconds to wait until retrying to join the session (integer value) +#sg_retry_interval=5 + +[osapi_v3] +enabled=False diff --git a/qa/qa_scripts/openstack/fix_conf_file.sh b/qa/qa_scripts/openstack/fix_conf_file.sh new file mode 100755 index 000000000..8ccd27249 --- /dev/null +++ b/qa/qa_scripts/openstack/fix_conf_file.sh @@ -0,0 +1,28 @@ +source ./copy_func.sh +# +# Take a templated file, modify a local copy, and write it to the +# remote site. +# +# Usage: fix_conf_file <remote-site> <file-name> <remote-location> [<rbd-secret>] +# <remote-site> -- site where we want this modified file stored. +# <file-name> -- name of the remote file. +# <remote-location> -- directory where the file will be stored +# <rbd-secret> -- (optional) rbd_secret used by libvirt +# +function fix_conf_file() { + if [[ $# < 3 ]]; then + echo 'fix_conf_file: Too few parameters' + exit 1 + fi + openstack_node_local=${1} + cp files/${2}.template.conf ${2}.conf + hostname=`ssh $openstack_node_local hostname` + inet4addr=`ssh $openstack_node_local hostname -i` + sed -i s/VARHOSTNAME/$hostname/g ${2}.conf + sed -i s/VARINET4ADDR/$inet4addr/g ${2}.conf + if [[ $# == 4 ]]; then + sed -i s/RBDSECRET/${4}/g ${2}.conf + fi + copy_file ${2}.conf $openstack_node_local ${3} 0644 "root:root" + rm ${2}.conf +} diff --git a/qa/qa_scripts/openstack/image_create.sh b/qa/qa_scripts/openstack/image_create.sh new file mode 100755 index 000000000..ee7f61f3b --- /dev/null +++ b/qa/qa_scripts/openstack/image_create.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# +# Set up a vm on packstack. Use the iso in RHEL_ISO (defaults to home dir) +# +set -fv +source ./copy_func.sh +source ./fix_conf_file.sh +openstack_node=${1} +ceph_node=${2} + +RHEL_ISO=${RHEL_ISO:-~/rhel-server-7.2-x86_64-boot.iso} +copy_file ${RHEL_ISO} $openstack_node . +copy_file execs/run_openstack.sh $openstack_node . 0755 +filler=`date +%s` +ssh $openstack_node ./run_openstack.sh "${openstack_node}X${filler}" rhel-server-7.2-x86_64-boot.iso +ssh $ceph_node sudo ceph df diff --git a/qa/qa_scripts/openstack/openstack.sh b/qa/qa_scripts/openstack/openstack.sh new file mode 100755 index 000000000..1c1e6c00f --- /dev/null +++ b/qa/qa_scripts/openstack/openstack.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# +# Install Openstack. +# Usage: openstack <openstack-site> <ceph-monitor> +# +# This script installs Openstack on one node, and connects it to a ceph +# cluster on another set of nodes. It is intended to run from a third +# node. +# +# Assumes a single node Openstack cluster and a single monitor ceph +# cluster. +# +# The execs directory contains scripts to be run on remote sites. +# The files directory contains files to be copied to remote sites. +# + +set -fv +source ./copy_func.sh +source ./fix_conf_file.sh +openstack_node=${1} +ceph_node=${2} +./packstack.sh $openstack_node $ceph_node +echo 'done running packstack' +sleep 60 +./connectceph.sh $openstack_node $ceph_node +echo 'done connecting' +sleep 60 +./image_create.sh $openstack_node $ceph_node diff --git a/qa/qa_scripts/openstack/packstack.sh b/qa/qa_scripts/openstack/packstack.sh new file mode 100755 index 000000000..3f891f98c --- /dev/null +++ b/qa/qa_scripts/openstack/packstack.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# +# Install openstack by running packstack. +# +# Implements the operations in: +# https://docs.google.com/document/d/1us18KR3LuLyINgGk2rmI-SVj9UksCE7y4C2D_68Aa8o/edit?ts=56a78fcb +# +# The directory named files contains a template for the kilo.conf file used by packstack. +# +set -fv +source ./copy_func.sh +source ./fix_conf_file.sh +openstack_node=${1} +ceph_node=${2} + +copy_file execs/openstack-preinstall.sh $openstack_node . 0777 +fix_conf_file $openstack_node kilo . +ssh $openstack_node sudo ./openstack-preinstall.sh +sleep 240 +ssh $openstack_node sudo packstack --answer-file kilo.conf diff --git a/qa/rbd/common.sh b/qa/rbd/common.sh new file mode 100644 index 000000000..232cf45ad --- /dev/null +++ b/qa/rbd/common.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash + +die() { + echo "$*" + exit 1 +} + +cleanup() { + rm -rf $TDIR + TDIR="" +} + +set_variables() { + # defaults + [ -z "$bindir" ] && bindir=$PWD # location of init-ceph + if [ -z "$conf" ]; then + conf="$basedir/ceph.conf" + [ -e $conf ] || conf="/etc/ceph/ceph.conf" + fi + [ -e $conf ] || die "conf file not found" + + CCONF="ceph-conf -c $conf" + + [ -z "$mnt" ] && mnt="/c" + if [ -z "$monhost" ]; then + $CCONF -t mon -i 0 'mon addr' > $TDIR/cconf_mon + if [ $? -ne 0 ]; then + $CCONF -t mon.a -i 0 'mon addr' > $TDIR/cconf_mon + [ $? -ne 0 ] && die "can't figure out \$monhost" + fi + read monhost < $TDIR/cconf_mon + fi + + [ -z "$imgsize" ] && imgsize=1024 + [ -z "$user" ] && user=admin + [ -z "$keyring" ] && keyring="`$CCONF keyring`" + [ -z "$secret" ] && secret="`ceph-authtool $keyring -n client.$user -p`" + + monip="`echo $monhost | sed 's/:/ /g' | awk '{print $1}'`" + monport="`echo $monhost | sed 's/:/ /g' | awk '{print $2}'`" + + [ -z "$monip" ] && die "bad mon address" + + [ -z "$monport" ] && monport=6789 + + set -e + + mydir=`hostname`_`echo $0 | sed 's/\//_/g'` + + img_name=test.`hostname`.$$ +} + +rbd_load() { + modprobe rbd +} + +rbd_create_image() { + id=$1 + rbd create $img_name.$id --size=$imgsize +} + +rbd_add() { + id=$1 + echo "$monip:$monport name=$user,secret=$secret rbd $img_name.$id" \ + > /sys/bus/rbd/add + + pushd /sys/bus/rbd/devices &> /dev/null + [ $? -eq 0 ] || die "failed to cd" + devid="" + rm -f "$TDIR/rbd_devs" + for f in *; do echo $f >> "$TDIR/rbd_devs"; done + sort -nr "$TDIR/rbd_devs" > "$TDIR/rev_rbd_devs" + while read f < "$TDIR/rev_rbd_devs"; do + read d_img_name < "$f/name" + if [ "x$d_img_name" == "x$img_name.$id" ]; then + devid=$f + break + fi + done + popd &> /dev/null + + [ "x$devid" == "x" ] && die "failed to find $img_name.$id" + + export rbd$id=$devid + while [ ! -e /dev/rbd$devid ]; do sleep 1; done +} + +rbd_test_init() { + rbd_load +} + +rbd_remove() { + echo $1 > /sys/bus/rbd/remove +} + +rbd_rm_image() { + id=$1 + rbd rm $imgname.$id +} + +TDIR=`mktemp -d` +trap cleanup INT TERM EXIT +set_variables diff --git a/qa/rbd/conf/+ b/qa/rbd/conf/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/rbd/conf/+ diff --git a/qa/rbd/conf/disable-pool-app.yaml b/qa/rbd/conf/disable-pool-app.yaml new file mode 100644 index 000000000..099532f57 --- /dev/null +++ b/qa/rbd/conf/disable-pool-app.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false diff --git a/qa/rbd/data-pool/ec.yaml b/qa/rbd/data-pool/ec.yaml new file mode 100644 index 000000000..f39a5bb4c --- /dev/null +++ b/qa/rbd/data-pool/ec.yaml @@ -0,0 +1,24 @@ +tasks: +- exec: + client.0: + - sudo ceph osd erasure-code-profile set teuthologyprofile crush-failure-domain=osd m=1 k=2 + - sudo ceph osd pool create datapool 4 4 erasure teuthologyprofile + - sudo ceph osd pool set datapool allow_ec_overwrites true + - rbd pool init datapool + +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + client: + rbd default data pool: datapool + osd: # force bluestore since it's required for ec overwrites + osd objectstore: bluestore + bluestore block size: 96636764160 + enable experimental unrecoverable data corrupting features: "*" + osd debug randomize hobject sort order: false +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/rbd/data-pool/none.yaml b/qa/rbd/data-pool/none.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/rbd/data-pool/none.yaml diff --git a/qa/rbd/data-pool/replicated.yaml b/qa/rbd/data-pool/replicated.yaml new file mode 100644 index 000000000..c5647dba1 --- /dev/null +++ b/qa/rbd/data-pool/replicated.yaml @@ -0,0 +1,11 @@ +tasks: +- exec: + client.0: + - sudo ceph osd pool create datapool 4 + - rbd pool init datapool + +overrides: + ceph: + conf: + client: + rbd default data pool: datapool diff --git a/qa/rbd/krbd_blkroset.t b/qa/rbd/krbd_blkroset.t new file mode 100644 index 000000000..428636de0 --- /dev/null +++ b/qa/rbd/krbd_blkroset.t @@ -0,0 +1,352 @@ + +Setup +===== + + $ RO_KEY=$(ceph auth get-or-create-key client.ro mon 'profile rbd' mgr 'profile rbd' osd 'profile rbd-read-only') + $ rbd create --size 10 img + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + $ rbd clone img@snap cloneimg + $ rbd create --size 1 imgpart + $ DEV=$(sudo rbd map imgpart) + $ cat <<EOF | sudo sfdisk $DEV >/dev/null 2>&1 + > unit: sectors + > /dev/rbd0p1 : start= 512, size= 512, Id=83 + > /dev/rbd0p2 : start= 1024, size= 512, Id=83 + > EOF + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress imgpart@snap + + +Image HEAD +========== + +R/W, unpartitioned: + + $ DEV=$(sudo rbd map img) + $ blockdev --getro $DEV + 0 + $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none + $ blkdiscard $DEV + $ blockdev --setro $DEV + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setro $DEV + $ blockdev --getro $DEV + 1 + $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?': Operation not permitted (glob) + [1] + $ blkdiscard $DEV + blkdiscard: /dev/rbd?: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ blockdev --setrw $DEV + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setrw $DEV + $ blockdev --getro $DEV + 0 + $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none + $ blkdiscard $DEV + $ sudo rbd unmap $DEV + +R/W, partitioned: + + $ DEV=$(sudo rbd map imgpart) + $ udevadm settle + $ blockdev --getro ${DEV}p1 + 0 + $ blockdev --getro ${DEV}p2 + 0 + $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none + $ blkdiscard ${DEV}p1 + $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none + $ blkdiscard ${DEV}p2 + $ blockdev --setro ${DEV}p1 + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setro ${DEV}p1 + $ blockdev --getro ${DEV}p1 + 1 + $ blockdev --getro ${DEV}p2 + 0 + $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?p1': Operation not permitted (glob) + [1] + $ blkdiscard ${DEV}p1 + blkdiscard: /dev/rbd?p1: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none + $ blkdiscard ${DEV}p2 + $ blockdev --setrw ${DEV}p1 + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setrw ${DEV}p1 + $ blockdev --getro ${DEV}p1 + 0 + $ blockdev --getro ${DEV}p2 + 0 + $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none + $ blkdiscard ${DEV}p1 + $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none + $ blkdiscard ${DEV}p2 + $ sudo rbd unmap $DEV + + $ DEV=$(sudo rbd map imgpart) + $ udevadm settle + $ blockdev --getro ${DEV}p1 + 0 + $ blockdev --getro ${DEV}p2 + 0 + $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none + $ blkdiscard ${DEV}p1 + $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none + $ blkdiscard ${DEV}p2 + $ blockdev --setro ${DEV}p2 + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setro ${DEV}p2 + $ blockdev --getro ${DEV}p1 + 0 + $ blockdev --getro ${DEV}p2 + 1 + $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none + $ blkdiscard ${DEV}p1 + $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?p2': Operation not permitted (glob) + [1] + $ blkdiscard ${DEV}p2 + blkdiscard: /dev/rbd?p2: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ blockdev --setrw ${DEV}p2 + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setrw ${DEV}p2 + $ blockdev --getro ${DEV}p1 + 0 + $ blockdev --getro ${DEV}p2 + 0 + $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none + $ blkdiscard ${DEV}p1 + $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none + $ blkdiscard ${DEV}p2 + $ sudo rbd unmap $DEV + +R/O, unpartitioned: + + $ DEV=$(sudo rbd map --read-only img) + $ blockdev --getro $DEV + 1 + $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?': Operation not permitted (glob) + [1] + $ blkdiscard $DEV + blkdiscard: /dev/rbd?: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ blockdev --setrw $DEV + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setrw $DEV # succeeds but effectively ignored + $ blockdev --getro $DEV + 1 + $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?': Operation not permitted (glob) + [1] + $ blkdiscard $DEV + blkdiscard: /dev/rbd?: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ sudo rbd unmap $DEV + +R/O, partitioned: + + $ DEV=$(sudo rbd map --read-only imgpart) + $ udevadm settle + $ blockdev --getro ${DEV}p1 + 1 + $ blockdev --getro ${DEV}p2 + 1 + $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?p1': Operation not permitted (glob) + [1] + $ blkdiscard ${DEV}p1 + blkdiscard: /dev/rbd?p1: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?p2': Operation not permitted (glob) + [1] + $ blkdiscard ${DEV}p2 + blkdiscard: /dev/rbd?p2: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ blockdev --setrw ${DEV}p1 + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setrw ${DEV}p1 # succeeds but effectively ignored + $ blockdev --setrw ${DEV}p2 + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setrw ${DEV}p2 # succeeds but effectively ignored + $ blockdev --getro ${DEV}p1 + 1 + $ blockdev --getro ${DEV}p2 + 1 + $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?p1': Operation not permitted (glob) + [1] + $ blkdiscard ${DEV}p1 + blkdiscard: /dev/rbd?p1: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?p2': Operation not permitted (glob) + [1] + $ blkdiscard ${DEV}p2 + blkdiscard: /dev/rbd?p2: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ sudo rbd unmap $DEV + + +Image snapshot +============== + +Unpartitioned: + + $ DEV=$(sudo rbd map img@snap) + $ blockdev --getro $DEV + 1 + $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?': Operation not permitted (glob) + [1] + $ blkdiscard $DEV + blkdiscard: /dev/rbd?: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ blockdev --setrw $DEV + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setrw $DEV # succeeds but effectively ignored + $ blockdev --getro $DEV + 1 + $ dd if=/dev/urandom of=$DEV bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?': Operation not permitted (glob) + [1] + $ blkdiscard $DEV + blkdiscard: /dev/rbd?: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ sudo rbd unmap $DEV + +Partitioned: + + $ DEV=$(sudo rbd map imgpart@snap) + $ udevadm settle + $ blockdev --getro ${DEV}p1 + 1 + $ blockdev --getro ${DEV}p2 + 1 + $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?p1': Operation not permitted (glob) + [1] + $ blkdiscard ${DEV}p1 + blkdiscard: /dev/rbd?p1: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?p2': Operation not permitted (glob) + [1] + $ blkdiscard ${DEV}p2 + blkdiscard: /dev/rbd?p2: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ blockdev --setrw ${DEV}p1 + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setrw ${DEV}p1 # succeeds but effectively ignored + $ blockdev --setrw ${DEV}p2 + .*BLKROSET: Permission denied (re) + [1] + $ sudo blockdev --setrw ${DEV}p2 # succeeds but effectively ignored + $ blockdev --getro ${DEV}p1 + 1 + $ blockdev --getro ${DEV}p2 + 1 + $ dd if=/dev/urandom of=${DEV}p1 bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?p1': Operation not permitted (glob) + [1] + $ blkdiscard ${DEV}p1 + blkdiscard: /dev/rbd?p1: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ dd if=/dev/urandom of=${DEV}p2 bs=1k seek=1 count=1 status=none + dd: error writing '/dev/rbd?p2': Operation not permitted (glob) + [1] + $ blkdiscard ${DEV}p2 + blkdiscard: /dev/rbd?p2: BLKDISCARD ioctl failed: Operation not permitted (glob) + [1] + $ sudo rbd unmap $DEV + + +read-only OSD caps +================== + +R/W: + + $ DEV=$(sudo rbd map --id ro --key $(echo $RO_KEY) img) + rbd: sysfs write failed + rbd: map failed: (1) Operation not permitted + [1] + +R/O: + + $ DEV=$(sudo rbd map --id ro --key $(echo $RO_KEY) --read-only img) + $ blockdev --getro $DEV + 1 + $ sudo rbd unmap $DEV + +Snapshot: + + $ DEV=$(sudo rbd map --id ro --key $(echo $RO_KEY) img@snap) + $ blockdev --getro $DEV + 1 + $ sudo rbd unmap $DEV + +R/W, clone: + + $ DEV=$(sudo rbd map --id ro --key $(echo $RO_KEY) cloneimg) + rbd: sysfs write failed + rbd: map failed: (1) Operation not permitted + [1] + +R/O, clone: + + $ DEV=$(sudo rbd map --id ro --key $(echo $RO_KEY) --read-only cloneimg) + $ blockdev --getro $DEV + 1 + $ sudo rbd unmap $DEV + + +rw -> ro with open_count > 0 +============================ + + $ DEV=$(sudo rbd map img) + $ { sleep 10; sudo blockdev --setro $DEV; } & + $ dd if=/dev/urandom of=$DEV bs=1k oflag=direct status=noxfer + dd: error writing '/dev/rbd?': Operation not permitted (glob) + [1-9]\d*\+0 records in (re) + [1-9]\d*\+0 records out (re) + [1] + $ sudo rbd unmap $DEV + + +"-o rw --read-only" should result in read-only mapping +====================================================== + + $ DEV=$(sudo rbd map -o rw --read-only img) + $ blockdev --getro $DEV + 1 + $ sudo rbd unmap $DEV + + +Teardown +======== + + $ rbd snap purge imgpart >/dev/null 2>&1 + $ rbd rm imgpart >/dev/null 2>&1 + $ rbd rm cloneimg >/dev/null 2>&1 + $ rbd snap unprotect img@snap + $ rbd snap purge img >/dev/null 2>&1 + $ rbd rm img >/dev/null 2>&1 + diff --git a/qa/rbd/krbd_deep_flatten.t b/qa/rbd/krbd_deep_flatten.t new file mode 100644 index 000000000..486b966d9 --- /dev/null +++ b/qa/rbd/krbd_deep_flatten.t @@ -0,0 +1,329 @@ + +Write: + + $ rbd create --size 12M --image-feature layering,deep-flatten img + $ DEV=$(sudo rbd map img) + $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + $ rbd clone img@snap cloneimg + $ rbd snap create --no-progress cloneimg@snap + $ DEV=$(sudo rbd map cloneimg) + $ xfs_io -c 'pwrite -S 0xab -w 6M 1k' $DEV >/dev/null + $ sudo rbd unmap $DEV + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0600000 abab abab abab abab abab abab abab abab + * + 0600400 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd flatten --no-progress cloneimg + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0600000 abab abab abab abab abab abab abab abab + * + 0600400 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd snap rm --no-progress cloneimg@snap + $ rbd rm --no-progress cloneimg + +Write, whole object: + + $ rbd create --size 12M --image-feature layering,deep-flatten img + $ DEV=$(sudo rbd map img) + $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + $ rbd clone img@snap cloneimg + $ rbd snap create --no-progress cloneimg@snap + $ DEV=$(sudo rbd map cloneimg) + $ xfs_io -d -c 'pwrite -b 4M -S 0xab 4M 4M' $DEV >/dev/null + $ sudo rbd unmap $DEV + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 abab abab abab abab abab abab abab abab + * + 0800000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd flatten --no-progress cloneimg + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 abab abab abab abab abab abab abab abab + * + 0800000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd snap rm --no-progress cloneimg@snap + $ rbd rm --no-progress cloneimg + +Zeroout: + + $ rbd create --size 12M --image-feature layering,deep-flatten img + $ DEV=$(sudo rbd map img) + $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + $ rbd clone img@snap cloneimg + $ rbd snap create --no-progress cloneimg@snap + $ DEV=$(sudo rbd map cloneimg) + $ fallocate -z -o 6M -l 1k $DEV + $ sudo rbd unmap $DEV + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0600000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0600400 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd flatten --no-progress cloneimg + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0600000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0600400 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd snap rm --no-progress cloneimg@snap + $ rbd rm --no-progress cloneimg + +Zeroout, whole object: + + $ rbd create --size 12M --image-feature layering,deep-flatten img + $ DEV=$(sudo rbd map img) + $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + $ rbd clone img@snap cloneimg + $ rbd snap create --no-progress cloneimg@snap + $ DEV=$(sudo rbd map cloneimg) + $ fallocate -z -o 4M -l 4M $DEV + $ sudo rbd unmap $DEV + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0800000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd flatten --no-progress cloneimg + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0800000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd snap rm --no-progress cloneimg@snap + $ rbd rm --no-progress cloneimg + +Discard, whole object, empty clone: + + $ rbd create --size 12M --image-feature layering,deep-flatten img + $ DEV=$(sudo rbd map img) + $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + $ rbd clone img@snap cloneimg + $ rbd snap create --no-progress cloneimg@snap + $ DEV=$(sudo rbd map cloneimg) + $ blkdiscard -o 4M -l 4M $DEV + $ sudo rbd unmap $DEV + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd flatten --no-progress cloneimg + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd snap rm --no-progress cloneimg@snap + $ rbd rm --no-progress cloneimg + +Discard, whole object, full clone: + + $ rbd create --size 12M --image-feature layering,deep-flatten img + $ DEV=$(sudo rbd map img) + $ xfs_io -c 'pwrite -w 0 12M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + $ rbd clone img@snap cloneimg + $ rbd snap create --no-progress cloneimg@snap + $ DEV=$(sudo rbd map cloneimg) + $ xfs_io -c 'pwrite -S 0xab -w 0 12M' $DEV >/dev/null + $ blkdiscard -o 4M -l 4M $DEV + $ sudo rbd unmap $DEV + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 abab abab abab abab abab abab abab abab + * + 0400000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0800000 abab abab abab abab abab abab abab abab + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd flatten --no-progress cloneimg + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img + + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 abab abab abab abab abab abab abab abab + * + 0400000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0800000 abab abab abab abab abab abab abab abab + * + 0c00000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg@snap) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0c00000 + $ sudo rbd unmap $DEV + + $ rbd snap rm --no-progress cloneimg@snap + $ rbd rm --no-progress cloneimg diff --git a/qa/rbd/krbd_default_map_options.t b/qa/rbd/krbd_default_map_options.t new file mode 100644 index 000000000..5dac5d6ae --- /dev/null +++ b/qa/rbd/krbd_default_map_options.t @@ -0,0 +1,64 @@ +Set up + + $ ceph osd pool create rbda + pool 'rbda' created + $ rbd pool init rbda + $ rbd create rbda/image1 --size 1000 + +Test at map options level + + $ OPTIONS="alloc_size=65536,lock_on_read" + $ EXPECTED="${OPTIONS}" + $ DEV=$(sudo rbd map rbda/image1 --options ${OPTIONS}) + $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info + $ sudo rbd unmap rbda/image1 + +Test at global level + + $ OPTIONS="alloc_size=4096,crc" + $ EXPECTED="${OPTIONS}" + $ rbd config global set global rbd_default_map_options ${OPTIONS} + $ DEV=$(sudo rbd map rbda/image1) + $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info + $ sudo rbd unmap rbda/image1 + + $ OPTIONS="alloc_size=65536,lock_on_read" + $ EXPECTED="alloc_size=65536,crc,lock_on_read" + $ DEV=$(sudo rbd map rbda/image1 --options ${OPTIONS}) + $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info + $ sudo rbd unmap rbda/image1 + +Test at pool level + + $ OPTIONS="alloc_size=8192,share" + $ EXPECTED="${OPTIONS}" + $ rbd config pool set rbda rbd_default_map_options ${OPTIONS} + $ DEV=$(sudo rbd map rbda/image1) + $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info + $ sudo rbd unmap rbda/image1 + + $ OPTIONS="lock_on_read,alloc_size=65536" + $ EXPECTED="alloc_size=65536,lock_on_read,share" + $ DEV=$(sudo rbd map rbda/image1 --options ${OPTIONS}) + $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info + $ sudo rbd unmap rbda/image1 + +Test at image level + + $ OPTIONS="alloc_size=16384,tcp_nodelay" + $ EXPECTED="${OPTIONS}" + $ rbd config image set rbda/image1 rbd_default_map_options ${OPTIONS} + $ DEV=$(sudo rbd map rbda/image1) + $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info + $ sudo rbd unmap rbda/image1 + + $ OPTIONS="lock_on_read,alloc_size=65536" + $ EXPECTED="alloc_size=65536,lock_on_read,tcp_nodelay" + $ DEV=$(sudo rbd map rbda/image1 --options ${OPTIONS}) + $ sudo grep -q ${EXPECTED} /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info + $ sudo rbd unmap rbda/image1 + +Teardown + + $ ceph osd pool rm rbda rbda --yes-i-really-really-mean-it + pool 'rbda' removed diff --git a/qa/rbd/krbd_discard.t b/qa/rbd/krbd_discard.t new file mode 100644 index 000000000..528e1dc3d --- /dev/null +++ b/qa/rbd/krbd_discard.t @@ -0,0 +1,398 @@ + + $ rbd create --size 4M img + $ DEV=$(sudo rbd map img) + +Zero, < 1 block: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 156672 -l 512 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 64512 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 65024 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 65024 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 1 block: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 65536 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 66048 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 66048 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 66560 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, < 2 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 163840 -l 65536 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 130048 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 130560 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0030000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 130560 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 2 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 131072 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 131584 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 131584 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 132096 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 37 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 589824 -l 2424832 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0090000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 02e0000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 589312 -l 2424832 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0090000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 02d0000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 590336 -l 2424832 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 00a0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 02e0000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Truncate: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4193792 -l 512 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4129280 -l 65024 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4128768 -l 65536 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03f0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4128256 -l 66048 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03f0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4063744 -l 130560 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03f0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4063232 -l 131072 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03e0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4062720 -l 131584 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03e0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 512 -l 4193792 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0010000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + +Delete: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 0 -l 4194304 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + +Empty clone: + + $ xfs_io -c 'pwrite -S 0xab -w 0 4M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + + $ rbd clone img@snap cloneimg1 + $ DEV=$(sudo rbd map cloneimg1) + $ blkdiscard -o 720896 -l 2719744 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 abab abab abab abab abab abab abab abab + * + 0400000 + $ sudo rbd unmap $DEV + + $ rbd clone img@snap cloneimg2 + $ DEV=$(sudo rbd map cloneimg2) + $ blkdiscard -o 1474560 -l 2719744 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 abab abab abab abab abab abab abab abab + * + 0400000 + $ sudo rbd unmap $DEV + + $ rbd clone img@snap cloneimg3 + $ DEV=$(sudo rbd map cloneimg3) + $ blkdiscard -o 0 -l 4194304 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 abab abab abab abab abab abab abab abab + * + 0400000 + $ sudo rbd unmap $DEV + +Full clone: + + $ rbd clone img@snap cloneimg4 + $ DEV=$(sudo rbd map cloneimg4) + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 720896 -l 2719744 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 00b0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0340000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 1474560 -l 2719744 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0170000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 0 -l 4194304 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ sudo rbd unmap $DEV + +Multiple object requests: + + $ rbd create --size 50M --stripe-unit 16K --stripe-count 5 fancyimg + $ DEV=$(sudo rbd map fancyimg) + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ blkdiscard -o 0 -l 143360 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ blkdiscard -o 0 -l 286720 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0008000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0014000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 001c000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0028000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 003c000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0044000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ blkdiscard -o 0 -l 573440 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0050000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ sudo rbd unmap $DEV + + $ rbd rm --no-progress fancyimg + $ rbd rm --no-progress cloneimg4 + $ rbd rm --no-progress cloneimg3 + $ rbd rm --no-progress cloneimg2 + $ rbd rm --no-progress cloneimg1 + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img diff --git a/qa/rbd/krbd_discard_4M.t b/qa/rbd/krbd_discard_4M.t new file mode 100644 index 000000000..7ed744c11 --- /dev/null +++ b/qa/rbd/krbd_discard_4M.t @@ -0,0 +1,330 @@ + + $ rbd create --size 4M img + $ DEV=$(sudo rbd map -o alloc_size=4194304 img) + +Zero, < 1 block: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 156672 -l 512 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 64512 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 65024 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 65024 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 1 block: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 65536 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 66048 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 66048 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 66560 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, < 2 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 163840 -l 65536 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 130048 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 130560 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 130560 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 2 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 131072 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 131584 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 131584 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 132096 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 37 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 589824 -l 2424832 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 589312 -l 2424832 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 590336 -l 2424832 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Truncate: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4193792 -l 512 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03ffe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4129280 -l 65024 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03f0200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4128768 -l 65536 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03f0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4128256 -l 66048 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03efe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4063744 -l 130560 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03e0200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4063232 -l 131072 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03e0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4062720 -l 131584 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03dfe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 512 -l 4193792 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0000200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + +Delete: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 0 -l 4194304 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + +Empty clone: + + $ xfs_io -c 'pwrite -S 0xab -w 0 4M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + + $ rbd clone img@snap cloneimg1 + $ DEV=$(sudo rbd map -o alloc_size=4194304 cloneimg1) + $ blkdiscard -o 720896 -l 2719744 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 abab abab abab abab abab abab abab abab + * + 0400000 + $ sudo rbd unmap $DEV + + $ rbd clone img@snap cloneimg2 + $ DEV=$(sudo rbd map -o alloc_size=4194304 cloneimg2) + $ blkdiscard -o 1474560 -l 2719744 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 abab abab abab abab abab abab abab abab + * + 0400000 + $ sudo rbd unmap $DEV + + $ rbd clone img@snap cloneimg3 + $ DEV=$(sudo rbd map -o alloc_size=4194304 cloneimg3) + $ blkdiscard -o 0 -l 4194304 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 abab abab abab abab abab abab abab abab + * + 0400000 + $ sudo rbd unmap $DEV + +Full clone: + + $ rbd clone img@snap cloneimg4 + $ DEV=$(sudo rbd map -o alloc_size=4194304 cloneimg4) + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 720896 -l 2719744 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 1474560 -l 2719744 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0168000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 0 -l 4194304 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ sudo rbd unmap $DEV + +Multiple object requests: + + $ rbd create --size 50M --stripe-unit 16K --stripe-count 5 fancyimg + $ DEV=$(sudo rbd map -o alloc_size=4194304 fancyimg) + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ blkdiscard -o 0 -l 143360 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ blkdiscard -o 0 -l 286720 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ blkdiscard -o 0 -l 573440 $DEV + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ sudo rbd unmap $DEV + + $ rbd rm --no-progress fancyimg + $ rbd rm --no-progress cloneimg4 + $ rbd rm --no-progress cloneimg3 + $ rbd rm --no-progress cloneimg2 + $ rbd rm --no-progress cloneimg1 + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img diff --git a/qa/rbd/krbd_discard_512b.t b/qa/rbd/krbd_discard_512b.t new file mode 100644 index 000000000..6669ca8fc --- /dev/null +++ b/qa/rbd/krbd_discard_512b.t @@ -0,0 +1,416 @@ + + $ rbd create --size 4M img + $ DEV=$(sudo rbd map -o alloc_size=512 img) + +Zero, < 1 block: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 156672 -l 512 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0026400 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0026600 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 64512 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 002fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 65024 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 65024 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 002fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 1 block: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 65536 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 66048 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 66048 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 001fe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 66560 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 001fe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, < 2 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 163840 -l 65536 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0028000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0038000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 130048 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 003fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131584 -l 130560 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 130560 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 003fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 2 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 131072 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 131072 -l 131584 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 131584 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 001fe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 130560 -l 132096 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 001fe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 37 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 589824 -l 2424832 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0090000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 02e0000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 589312 -l 2424832 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 008fe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 02dfe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 590336 -l 2424832 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0090200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 02e0200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Truncate: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4193792 -l 512 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03ffe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4129280 -l 65024 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03f0200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4128768 -l 65536 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03f0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4128256 -l 66048 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03efe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4063744 -l 130560 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03e0200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4063232 -l 131072 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03e0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 4062720 -l 131584 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03dfe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 512 -l 4193792 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0000200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + +Delete: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 0 -l 4194304 $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + +Empty clone: + + $ xfs_io -c 'pwrite -S 0xab -w 0 4M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + + $ rbd clone img@snap cloneimg1 + $ DEV=$(sudo rbd map -o alloc_size=512 cloneimg1) + $ blkdiscard -o 720896 -l 2719744 $DEV + $ hexdump $DEV + 0000000 abab abab abab abab abab abab abab abab + * + 0400000 + $ sudo rbd unmap $DEV + + $ rbd clone img@snap cloneimg2 + $ DEV=$(sudo rbd map -o alloc_size=512 cloneimg2) + $ blkdiscard -o 1474560 -l 2719744 $DEV + $ hexdump $DEV + 0000000 abab abab abab abab abab abab abab abab + * + 0400000 + $ sudo rbd unmap $DEV + + $ rbd clone img@snap cloneimg3 + $ DEV=$(sudo rbd map -o alloc_size=512 cloneimg3) + $ blkdiscard -o 0 -l 4194304 $DEV + $ hexdump $DEV + 0000000 abab abab abab abab abab abab abab abab + * + 0400000 + $ sudo rbd unmap $DEV + +Full clone: + + $ rbd clone img@snap cloneimg4 + $ DEV=$(sudo rbd map -o alloc_size=512 cloneimg4) + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 720896 -l 2719744 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 00b0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0348000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 1474560 -l 2719744 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0168000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ blkdiscard -o 0 -l 4194304 $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ sudo rbd unmap $DEV + +Multiple object requests: + + $ rbd create --size 50M --stripe-unit 16K --stripe-count 5 fancyimg + $ DEV=$(sudo rbd map -o alloc_size=512 fancyimg) + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ blkdiscard -o 0 -l 143360 $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0023000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ blkdiscard -o 0 -l 286720 $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0046000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ blkdiscard -o 0 -l 573440 $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 008c000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ sudo rbd unmap $DEV + + $ rbd rm --no-progress fancyimg + $ rbd rm --no-progress cloneimg4 + $ rbd rm --no-progress cloneimg3 + $ rbd rm --no-progress cloneimg2 + $ rbd rm --no-progress cloneimg1 + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img diff --git a/qa/rbd/krbd_discard_granularity.t b/qa/rbd/krbd_discard_granularity.t new file mode 100644 index 000000000..844643bae --- /dev/null +++ b/qa/rbd/krbd_discard_granularity.t @@ -0,0 +1,40 @@ + + $ rbd create --size 20M img + + $ DEV=$(sudo rbd map img) + $ blockdev --getiomin $DEV + 65536 + $ blockdev --getioopt $DEV + 65536 + $ cat /sys/block/${DEV#/dev/}/queue/discard_granularity + 65536 + $ sudo rbd unmap $DEV + + $ DEV=$(sudo rbd map -o alloc_size=512 img) + $ blockdev --getiomin $DEV + 512 + $ blockdev --getioopt $DEV + 512 + $ cat /sys/block/${DEV#/dev/}/queue/discard_granularity + 512 + $ sudo rbd unmap $DEV + + $ DEV=$(sudo rbd map -o alloc_size=4194304 img) + $ blockdev --getiomin $DEV + 4194304 + $ blockdev --getioopt $DEV + 4194304 + $ cat /sys/block/${DEV#/dev/}/queue/discard_granularity + 4194304 + $ sudo rbd unmap $DEV + + $ DEV=$(sudo rbd map -o alloc_size=8388608 img) + $ blockdev --getiomin $DEV + 4194304 + $ blockdev --getioopt $DEV + 4194304 + $ cat /sys/block/${DEV#/dev/}/queue/discard_granularity + 4194304 + $ sudo rbd unmap $DEV + + $ rbd rm --no-progress img diff --git a/qa/rbd/krbd_get_features.t b/qa/rbd/krbd_get_features.t new file mode 100644 index 000000000..0c26e9c33 --- /dev/null +++ b/qa/rbd/krbd_get_features.t @@ -0,0 +1,31 @@ + +journaling makes the image only unwritable, rather than both unreadable +and unwritable: + + $ rbd create --size 1 --image-feature layering,exclusive-lock,journaling img + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + $ rbd clone --image-feature layering,exclusive-lock,journaling img@snap cloneimg + + $ DEV=$(sudo rbd map img) + rbd: sysfs write failed + rbd: map failed: (6) No such device or address + [6] + $ DEV=$(sudo rbd map --read-only img) + $ blockdev --getro $DEV + 1 + $ sudo rbd unmap $DEV + + $ DEV=$(sudo rbd map cloneimg) + rbd: sysfs write failed + rbd: map failed: (6) No such device or address + [6] + $ DEV=$(sudo rbd map --read-only cloneimg) + $ blockdev --getro $DEV + 1 + $ sudo rbd unmap $DEV + + $ rbd rm --no-progress cloneimg + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img diff --git a/qa/rbd/krbd_huge_image.t b/qa/rbd/krbd_huge_image.t new file mode 100644 index 000000000..e0ce6a954 --- /dev/null +++ b/qa/rbd/krbd_huge_image.t @@ -0,0 +1,41 @@ + + $ get_field() { + > rbd info --format=json $1 | python3 -c "import sys, json; print(json.load(sys.stdin)['$2'])" + > } + +Write to first and last sectors and make sure we hit the right objects: + + $ ceph osd pool create hugeimg 12 >/dev/null 2>&1 + $ rbd pool init hugeimg + $ rbd create --size 4E --object-size 4K --image-feature layering hugeimg/img + $ DEV=$(sudo rbd map hugeimg/img) + $ xfs_io -c 'pwrite 0 512' $DEV >/dev/null # first sector + $ xfs_io -c 'pwrite 4611686018427387392 512' $DEV >/dev/null # last sector + $ sudo rbd unmap $DEV + + $ get_field hugeimg/img size + 4611686018427387904 + $ get_field hugeimg/img objects + 1125899906842624 + $ rados -p hugeimg ls | grep $(get_field hugeimg/img block_name_prefix) | sort + .*\.0000000000000000 (re) + .*\.0003ffffffffffff (re) + +Dump first and last megabytes: + + $ DEV=$(sudo rbd map hugeimg/img) + $ dd if=$DEV bs=1M count=1 status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0000200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0100000 + $ dd if=$DEV bs=1M skip=4398046511103 status=none | hexdump + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 00ffe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0100000 + $ sudo rbd unmap $DEV + + $ ceph osd pool delete hugeimg hugeimg --yes-i-really-really-mean-it >/dev/null 2>&1 diff --git a/qa/rbd/krbd_modprobe.t b/qa/rbd/krbd_modprobe.t new file mode 100644 index 000000000..a0e04d394 --- /dev/null +++ b/qa/rbd/krbd_modprobe.t @@ -0,0 +1,10 @@ + + $ sudo modprobe -r rbd + $ sudo modprobe -r libceph + $ lsmod | grep libceph + [1] + $ rbd create --size 1 img + $ DEV=$(sudo rbd map img) + $ sudo grep -q ',key=' /sys/bus/rbd/devices/${DEV#/dev/rbd}/config_info + $ sudo rbd unmap $DEV + $ rbd rm --no-progress img diff --git a/qa/rbd/krbd_msgr_segments.t b/qa/rbd/krbd_msgr_segments.t new file mode 100644 index 000000000..b89a921a1 --- /dev/null +++ b/qa/rbd/krbd_msgr_segments.t @@ -0,0 +1,85 @@ + + $ get_block_name_prefix() { + > rbd info --format=json $1 | python3 -c "import sys, json; print(json.load(sys.stdin)['block_name_prefix'])" + > } + +Short segments: + + $ rbd create --size 12M img + $ DEV=$(sudo rbd map img) + $ xfs_io -d -c 'pwrite 5120 512' $DEV >/dev/null + $ xfs_io -d -c 'pwrite 12577280 512' $DEV >/dev/null + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0001400 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0001600 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0bfea00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0bfec00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0c00000 + $ sudo rbd unmap $DEV + $ rbd rm --no-progress img + +Short segment, ceph_msg_data_bio_cursor_init(): + + $ rbd create --size 12M img + $ DEV=$(sudo rbd map img) + $ xfs_io -d -c 'pwrite 0 512' $DEV >/dev/null + $ rados -p rbd stat $(get_block_name_prefix img).0000000000000000 + .* size 512 (re) + $ xfs_io -d -c 'pread -b 2M 0 2M' $DEV >/dev/null + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0000200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0c00000 + $ sudo rbd unmap $DEV + $ rbd rm --no-progress img + +Short segment, ceph_msg_data_bio_advance(): + + $ rbd create --size 12M img + $ DEV=$(sudo rbd map img) + $ xfs_io -d -c 'pwrite 0 1049088' $DEV >/dev/null + $ rados -p rbd stat $(get_block_name_prefix img).0000000000000000 + .* size 1049088 (re) + $ xfs_io -d -c 'pread -b 2M 0 2M' $DEV >/dev/null + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0100200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0c00000 + $ sudo rbd unmap $DEV + $ rbd rm --no-progress img + +Cloned bios (dm-snapshot.ko, based on generic/081): + + $ rbd create --size 300M img + $ DEV=$(sudo rbd map img) + $ sudo vgcreate vg_img $DEV + Physical volume "/dev/rbd?" successfully created* (glob) + Volume group "vg_img" successfully created + $ sudo lvcreate -L 256M -n lv_img vg_img + Logical volume "lv_img" created. + $ udevadm settle + $ sudo mkfs.ext4 -q /dev/mapper/vg_img-lv_img + $ sudo lvcreate -L 4M --snapshot -n lv_snap vg_img/lv_img | grep created + Logical volume "lv_snap" created. + $ udevadm settle + $ sudo mount /dev/mapper/vg_img-lv_snap /mnt + $ sudo xfs_io -f -c 'pwrite 0 5M' /mnt/file1 >/dev/null + $ sudo umount /mnt + $ sudo vgremove -f vg_img + Logical volume "lv_snap" successfully removed + Logical volume "lv_img" successfully removed + Volume group "vg_img" successfully removed + $ sudo pvremove $DEV + Labels on physical volume "/dev/rbd?" successfully wiped* (glob) + $ sudo rbd unmap $DEV + $ rbd rm --no-progress img diff --git a/qa/rbd/krbd_parent_overlap.t b/qa/rbd/krbd_parent_overlap.t new file mode 100644 index 000000000..3489d83db --- /dev/null +++ b/qa/rbd/krbd_parent_overlap.t @@ -0,0 +1,69 @@ + +For reads, only the object extent needs to be reverse mapped: + + $ rbd create --size 5M img + $ DEV=$(sudo rbd map img) + $ xfs_io -c 'pwrite 0 5M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + $ rbd clone img@snap cloneimg + $ rbd resize --no-progress --size 12M cloneimg + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0500000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0c00000 + $ dd if=$DEV iflag=direct bs=4M status=none | hexdump + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0500000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0c00000 + $ sudo rbd unmap $DEV + $ rbd rm --no-progress cloneimg + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img + +For writes, the entire object needs to be reverse mapped: + + $ rbd create --size 2M img + $ DEV=$(sudo rbd map img) + $ xfs_io -c 'pwrite 0 1M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + $ rbd clone img@snap cloneimg + $ rbd resize --no-progress --size 8M cloneimg + $ DEV=$(sudo rbd map cloneimg) + $ xfs_io -c 'pwrite -S 0xef 3M 1M' $DEV >/dev/null + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0100000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0300000 efef efef efef efef efef efef efef efef + * + 0400000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0800000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0100000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0300000 efef efef efef efef efef efef efef efef + * + 0400000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0800000 + $ sudo rbd unmap $DEV + $ rbd rm --no-progress cloneimg + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img diff --git a/qa/rbd/krbd_whole_object_zeroout.t b/qa/rbd/krbd_whole_object_zeroout.t new file mode 100644 index 000000000..850c8c968 --- /dev/null +++ b/qa/rbd/krbd_whole_object_zeroout.t @@ -0,0 +1,143 @@ + + $ get_block_name_prefix() { + > rbd info --format=json $1 | python3 -c "import sys, json; print(json.load(sys.stdin)['block_name_prefix'])" + > } + + $ rbd create --size 200M img + $ DEV=$(sudo rbd map img) + $ xfs_io -c 'pwrite -b 4M 0 200M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + +cloneimg1: +1 object in an object set, 4M +25 full object sets +25 objects in total + + $ rbd clone img@snap cloneimg1 + $ DEV=$(sudo rbd map cloneimg1) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ fallocate -z -l 100M $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg1) + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ sudo rbd unmap $DEV + +cloneimg2: +7 objects in an object set, 28M +3 full object sets +min((100M % 28M) / 512K, 7) = 7 objects in the last object set +28 objects in total + + $ rbd clone --stripe-unit 512K --stripe-count 7 img@snap cloneimg2 + $ DEV=$(sudo rbd map cloneimg2) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ fallocate -z -l 100M $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg2) + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ sudo rbd unmap $DEV + +cloneimg3: +23 objects in an object set, 92M +1 full object set +min((100M % 92M) / 512K, 23) = 16 objects in the last object set +39 objects in total + + $ rbd clone --stripe-unit 512K --stripe-count 23 img@snap cloneimg3 + $ DEV=$(sudo rbd map cloneimg3) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ fallocate -z -l 100M $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg3) + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ sudo rbd unmap $DEV + +cloneimg4: +65 objects in an object set, 260M +0 full object sets +min((100M % 260M) / 512K, 65) = 65 objects in the last object set +65 objects in total + + $ rbd clone --stripe-unit 512K --stripe-count 65 img@snap cloneimg4 + $ DEV=$(sudo rbd map cloneimg4) + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ fallocate -z -l 100M $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ sudo rbd unmap $DEV + $ DEV=$(sudo rbd map cloneimg4) + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 6400000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + c800000 + $ sudo rbd unmap $DEV + + $ rados -p rbd ls | grep -c $(get_block_name_prefix cloneimg1) + 25 + $ rados -p rbd ls | grep -c $(get_block_name_prefix cloneimg2) + 28 + $ rados -p rbd ls | grep -c $(get_block_name_prefix cloneimg3) + 39 + $ rados -p rbd ls | grep -c $(get_block_name_prefix cloneimg4) + 65 + + $ rbd rm --no-progress cloneimg4 + $ rbd rm --no-progress cloneimg3 + $ rbd rm --no-progress cloneimg2 + $ rbd rm --no-progress cloneimg1 + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img diff --git a/qa/rbd/krbd_zeroout.t b/qa/rbd/krbd_zeroout.t new file mode 100644 index 000000000..10bb230e8 --- /dev/null +++ b/qa/rbd/krbd_zeroout.t @@ -0,0 +1,422 @@ + + $ rbd create --size 4M img + $ DEV=$(sudo rbd map img) + +Zero, < 1 block: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 156672 -l 512 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0026400 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0026600 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 131584 -l 64512 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 002fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 131584 -l 65024 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 131072 -l 65024 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 002fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 1 block: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 131072 -l 65536 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 131072 -l 66048 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 130560 -l 66048 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 001fe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 130560 -l 66560 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 001fe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0030200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, < 2 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 163840 -l 65536 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0028000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0038000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 131584 -l 130048 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 003fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 131584 -l 130560 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 131072 -l 130560 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 003fe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 2 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 131072 -l 131072 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 131072 -l 131584 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0020000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 130560 -l 131584 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 001fe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 130560 -l 132096 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 001fe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0040200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Zero, 37 blocks: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 589824 -l 2424832 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0090000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 02e0000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 589312 -l 2424832 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 008fe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 02dfe00 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 590336 -l 2424832 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0090200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 02e0200 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + +Truncate: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 4193792 -l 512 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03ffe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 4129280 -l 65024 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03f0200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 4128768 -l 65536 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03f0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 4128256 -l 66048 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03efe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 4063744 -l 130560 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03e0200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 4063232 -l 131072 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03e0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 4062720 -l 131584 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 03dfe00 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 512 -l 4193792 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0000200 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + +Delete: + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 0 -l 4194304 $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + +Empty clone: + + $ xfs_io -c 'pwrite -S 0xab -w 0 4M' $DEV >/dev/null + $ sudo rbd unmap $DEV + $ rbd snap create --no-progress img@snap + $ rbd snap protect img@snap + + $ rbd clone img@snap cloneimg1 + $ DEV=$(sudo rbd map cloneimg1) + $ fallocate -z -o 720896 -l 2719744 $DEV + $ hexdump $DEV + 0000000 abab abab abab abab abab abab abab abab + * + 00b0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0348000 abab abab abab abab abab abab abab abab + * + 0400000 + $ sudo rbd unmap $DEV + + $ rbd clone img@snap cloneimg2 + $ DEV=$(sudo rbd map cloneimg2) + $ fallocate -z -o 1474560 -l 2719744 $DEV + $ hexdump $DEV + 0000000 abab abab abab abab abab abab abab abab + * + 0168000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + $ sudo rbd unmap $DEV + + $ rbd clone img@snap cloneimg3 + $ DEV=$(sudo rbd map cloneimg3) + $ fallocate -z -o 0 -l 4194304 $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + $ sudo rbd unmap $DEV + +Full clone: + + $ rbd clone img@snap cloneimg4 + $ DEV=$(sudo rbd map cloneimg4) + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 720896 -l 2719744 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 00b0000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0348000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 1474560 -l 2719744 $DEV + $ hexdump $DEV + 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 0168000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ xfs_io -c 'pwrite -w 0 4M' $DEV >/dev/null + $ fallocate -z -o 0 -l 4194304 $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0400000 + + $ sudo rbd unmap $DEV + +Multiple object requests: + + $ rbd create --size 50M --stripe-unit 16K --stripe-count 5 fancyimg + $ DEV=$(sudo rbd map fancyimg) + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ fallocate -z -o 0 -l 143360 $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0023000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ fallocate -z -o 0 -l 286720 $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 0046000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ xfs_io -c 'pwrite -b 4M -w 0 50M' $DEV >/dev/null + $ fallocate -z -o 0 -l 573440 $DEV + $ hexdump $DEV + 0000000 0000 0000 0000 0000 0000 0000 0000 0000 + * + 008c000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd + * + 3200000 + + $ sudo rbd unmap $DEV + + $ rbd rm --no-progress fancyimg + $ rbd rm --no-progress cloneimg4 + $ rbd rm --no-progress cloneimg3 + $ rbd rm --no-progress cloneimg2 + $ rbd rm --no-progress cloneimg1 + $ rbd snap unprotect img@snap + $ rbd snap rm --no-progress img@snap + $ rbd rm --no-progress img diff --git a/qa/rbd/rbd.sh b/qa/rbd/rbd.sh new file mode 100755 index 000000000..2b7ce8ee2 --- /dev/null +++ b/qa/rbd/rbd.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -x + +basedir=`echo $0 | sed 's/[^/]*$//g'`. +. $basedir/common.sh + +rbd_test_init + + +create_multiple() { + for i in `seq 1 10`; do + rbd_create_image $i + done + + for i in `seq 1 10`; do + rbd_add $i + done + for i in `seq 1 10`; do + devname=/dev/rbd`eval echo \\$rbd$i` + echo $devname + done + for i in `seq 1 10`; do + devid=`eval echo \\$rbd$i` + rbd_remove $devid + done + for i in `seq 1 10`; do + rbd_rm_image $i + done +} + +test_dbench() { + rbd_create_image 0 + rbd_add 0 + + devname=/dev/rbd$rbd0 + + mkfs -t ext3 $devname + mount -t ext3 $devname $mnt + + dbench -D $mnt -t 30 5 + sync + + umount $mnt + rbd_remove $rbd0 + rbd_rm_image 0 +} + +create_multiple +test_dbench + diff --git a/qa/releases/infernalis.yaml b/qa/releases/infernalis.yaml new file mode 100644 index 000000000..f21e7fe8a --- /dev/null +++ b/qa/releases/infernalis.yaml @@ -0,0 +1,5 @@ +tasks: +- exec: + osd.0: + - ceph osd set sortbitwise + - for p in `ceph osd pool ls` ; do ceph osd pool set $p use_gmt_hitset true ; done diff --git a/qa/releases/jewel.yaml b/qa/releases/jewel.yaml new file mode 100644 index 000000000..ab09c083e --- /dev/null +++ b/qa/releases/jewel.yaml @@ -0,0 +1,6 @@ +tasks: +- exec: + osd.0: + - ceph osd set sortbitwise + - ceph osd set require_jewel_osds + - for p in `ceph osd pool ls` ; do ceph osd pool set $p use_gmt_hitset true ; done diff --git a/qa/releases/kraken.yaml b/qa/releases/kraken.yaml new file mode 100644 index 000000000..57342057a --- /dev/null +++ b/qa/releases/kraken.yaml @@ -0,0 +1,4 @@ +tasks: +- exec: + osd.0: + - ceph osd set require_kraken_osds diff --git a/qa/releases/luminous-with-mgr.yaml b/qa/releases/luminous-with-mgr.yaml new file mode 100644 index 000000000..ea3130768 --- /dev/null +++ b/qa/releases/luminous-with-mgr.yaml @@ -0,0 +1,11 @@ +tasks: +- exec: + osd.0: + - ceph osd require-osd-release luminous + - ceph osd set-require-min-compat-client luminous +- ceph.healthy: +overrides: + ceph: + conf: + mon: + mon warn on osd down out interval zero: false diff --git a/qa/releases/luminous.yaml b/qa/releases/luminous.yaml new file mode 100644 index 000000000..768861c21 --- /dev/null +++ b/qa/releases/luminous.yaml @@ -0,0 +1,21 @@ +tasks: +- exec: + mgr.x: + - mkdir -p /var/lib/ceph/mgr/ceph-x + - ceph auth get-or-create-key mgr.x mon 'allow profile mgr' + - ceph auth export mgr.x > /var/lib/ceph/mgr/ceph-x/keyring +- ceph.restart: + daemons: [mgr.x] + wait-for-healthy: false +- exec: + osd.0: + - ceph osd require-osd-release luminous + - ceph osd set-require-min-compat-client luminous +- ceph.healthy: +overrides: + ceph: + conf: + mon: + mon warn on osd down out interval zero: false + log-ignorelist: + - no active mgr diff --git a/qa/releases/mimic.yaml b/qa/releases/mimic.yaml new file mode 100644 index 000000000..f901e7eda --- /dev/null +++ b/qa/releases/mimic.yaml @@ -0,0 +1,6 @@ +tasks: +- exec: + osd.0: + - ceph osd require-osd-release mimic + - ceph osd set-require-min-compat-client mimic +- ceph.healthy: diff --git a/qa/releases/nautilus.yaml b/qa/releases/nautilus.yaml new file mode 100644 index 000000000..dd650f173 --- /dev/null +++ b/qa/releases/nautilus.yaml @@ -0,0 +1,7 @@ +tasks: +- exec: + osd.0: + - ceph osd require-osd-release nautilus + - ceph osd set-require-min-compat-client nautilus + - for p in `ceph osd pool ls`; do ceph osd pool set $p pg_autoscale_mode off; done +- ceph.healthy: diff --git a/qa/releases/octopus.yaml b/qa/releases/octopus.yaml new file mode 100644 index 000000000..935603395 --- /dev/null +++ b/qa/releases/octopus.yaml @@ -0,0 +1,7 @@ +tasks: +- exec: + osd.0: + - ceph osd require-osd-release octopus + - ceph osd set-require-min-compat-client octopus + - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done +- ceph.healthy: diff --git a/qa/releases/pacific-from-o.yaml b/qa/releases/pacific-from-o.yaml new file mode 100644 index 000000000..a5eabff35 --- /dev/null +++ b/qa/releases/pacific-from-o.yaml @@ -0,0 +1,6 @@ +tasks: +- exec: + osd.0: + - ceph osd require-osd-release pacific + - ceph osd set-require-min-compat-client pacific +- ceph.healthy: diff --git a/qa/releases/pacific.yaml b/qa/releases/pacific.yaml new file mode 100644 index 000000000..fb5c39a08 --- /dev/null +++ b/qa/releases/pacific.yaml @@ -0,0 +1,7 @@ +tasks: +- exec: + osd.0: + - ceph osd require-osd-release pacific + - ceph osd set-require-min-compat-client pacific + - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done +- ceph.healthy: diff --git a/qa/releases/quincy.yaml b/qa/releases/quincy.yaml new file mode 100644 index 000000000..731c6bead --- /dev/null +++ b/qa/releases/quincy.yaml @@ -0,0 +1,6 @@ +tasks: +- exec: + osd.0: + - ceph osd require-osd-release quincy + - ceph osd set-require-min-compat-client quincy +- ceph.healthy: diff --git a/qa/releases/reef.yaml b/qa/releases/reef.yaml new file mode 100644 index 000000000..a64b2fb9f --- /dev/null +++ b/qa/releases/reef.yaml @@ -0,0 +1,6 @@ +tasks: +- exec: + osd.0: + - ceph osd require-osd-release reef + - ceph osd set-require-min-compat-client reef +- ceph.healthy: diff --git a/qa/rgw/ignore-pg-availability.yaml b/qa/rgw/ignore-pg-availability.yaml new file mode 100644 index 000000000..732e40306 --- /dev/null +++ b/qa/rgw/ignore-pg-availability.yaml @@ -0,0 +1,11 @@ +# https://tracker.ceph.com/issues/45802 +# https://tracker.ceph.com/issues/51282 +# https://tracker.ceph.com/issues/61168 +# https://tracker.ceph.com/issues/62504 +overrides: + ceph: + log-ignorelist: + - \(PG_AVAILABILITY\) + - \(PG_DEGRADED\) + - \(POOL_APP_NOT_ENABLED\) + - not have an application enabled diff --git a/qa/rgw/s3tests-branch.yaml b/qa/rgw/s3tests-branch.yaml new file mode 100644 index 000000000..10a5e9374 --- /dev/null +++ b/qa/rgw/s3tests-branch.yaml @@ -0,0 +1,4 @@ +overrides: + s3tests: + force-branch: ceph-reef + # git_remote: https://github.com/ceph/ diff --git a/qa/rgw_bucket_sharding/default.yaml b/qa/rgw_bucket_sharding/default.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/rgw_bucket_sharding/default.yaml diff --git a/qa/rgw_bucket_sharding/single.yaml b/qa/rgw_bucket_sharding/single.yaml new file mode 100644 index 000000000..c6b85f7ce --- /dev/null +++ b/qa/rgw_bucket_sharding/single.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + rgw override bucket index max shards: 1 diff --git a/qa/rgw_frontend/beast.yaml b/qa/rgw_frontend/beast.yaml new file mode 100644 index 000000000..369e65f7c --- /dev/null +++ b/qa/rgw_frontend/beast.yaml @@ -0,0 +1,3 @@ +overrides: + rgw: + frontend: beast diff --git a/qa/rgw_pool_type/ec-profile.yaml b/qa/rgw_pool_type/ec-profile.yaml new file mode 100644 index 000000000..05384cb53 --- /dev/null +++ b/qa/rgw_pool_type/ec-profile.yaml @@ -0,0 +1,10 @@ +overrides: + rgw: + ec-data-pool: true + erasure_code_profile: + name: testprofile + k: 3 + m: 1 + crush-failure-domain: osd + s3tests: + slow_backend: true diff --git a/qa/rgw_pool_type/ec.yaml b/qa/rgw_pool_type/ec.yaml new file mode 100644 index 000000000..7c99b7f85 --- /dev/null +++ b/qa/rgw_pool_type/ec.yaml @@ -0,0 +1,5 @@ +overrides: + rgw: + ec-data-pool: true + s3tests: + slow_backend: true diff --git a/qa/rgw_pool_type/replicated.yaml b/qa/rgw_pool_type/replicated.yaml new file mode 100644 index 000000000..c91709eaa --- /dev/null +++ b/qa/rgw_pool_type/replicated.yaml @@ -0,0 +1,3 @@ +overrides: + rgw: + ec-data-pool: false diff --git a/qa/run-standalone.sh b/qa/run-standalone.sh new file mode 100755 index 000000000..9daaabfa1 --- /dev/null +++ b/qa/run-standalone.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash +set -e + +if [ ! -e CMakeCache.txt -o ! -d bin ]; then + echo 'run this from the build dir' + exit 1 +fi + +function get_cmake_variable() { + local variable=$1 + grep "$variable" CMakeCache.txt | cut -d "=" -f 2 +} + +function get_python_path() { + python_common=$(realpath ../src/python-common) + echo $(realpath ../src/pybind):$(pwd)/lib/cython_modules/lib.3:$python_common +} + +if [ `uname` = FreeBSD ]; then + # otherwise module prettytable will not be found + export PYTHONPATH=$(get_python_path):/usr/local/lib/python3.6/site-packages + exec_mode=+111 + KERNCORE="kern.corefile" + COREPATTERN="core.%N.%P" +else + export PYTHONPATH=$(get_python_path) + exec_mode=/111 + KERNCORE="kernel.core_pattern" + COREPATTERN="core.%e.%p.%t" +fi + +function cleanup() { + if [ -n "$precore" ]; then + sudo sysctl -w "${KERNCORE}=${precore}" + fi +} + +function finish() { + cleanup + exit 0 +} + +trap finish TERM HUP INT + +PATH=$(pwd)/bin:$PATH + +# add /sbin and /usr/sbin to PATH to find sysctl in those cases where the +# user's PATH does not get these directories by default (e.g., tumbleweed) +PATH=$PATH:/sbin:/usr/sbin + +export LD_LIBRARY_PATH="$(pwd)/lib" + +# TODO: Use getops +dryrun=false +if [[ "$1" = "--dry-run" ]]; then + dryrun=true + shift +fi + +all=false +if [ "$1" = "" ]; then + all=true +fi + +select=("$@") + +location="../qa/standalone" + +count=0 +errors=0 +userargs="" +precore="$(sysctl -n $KERNCORE)" +# If corepattern already set, avoid having to use sudo +if [ "$precore" = "$COREPATTERN" ]; then + precore="" +else + sudo sysctl -w "${KERNCORE}=${COREPATTERN}" +fi +# Clean out any cores in core target directory (currently .) +if ls $(dirname $(sysctl -n $KERNCORE)) | grep -q '^core\|core$' ; then + mkdir found.cores.$$ 2> /dev/null || true + for i in $(ls $(dirname $(sysctl -n $KERNCORE)) | grep '^core\|core$'); do + mv $i found.cores.$$ + done + echo "Stray cores put in $(pwd)/found.cores.$$" +fi + +ulimit -c unlimited +for f in $(cd $location ; find . -mindepth 2 -perm $exec_mode -type f) +do + f=$(echo $f | sed 's/\.\///') + if [[ "$all" = "false" ]]; then + found=false + for c in "${!select[@]}" + do + # Get command and any arguments of subset of tests to run + allargs="${select[$c]}" + arg1=$(echo "$allargs" | cut --delimiter " " --field 1) + # Get user args for this selection for use below + userargs="$(echo $allargs | cut -s --delimiter " " --field 2-)" + if [[ "$arg1" = $(basename $f) ]] || [[ "$arg1" = $(dirname $f) ]]; then + found=true + break + fi + if [[ "$arg1" = "$f" ]]; then + found=true + break + fi + done + if [[ "$found" = "false" ]]; then + continue + fi + fi + # Don't run test-failure.sh unless explicitly specified + if [ "$all" = "true" -a "$f" = "special/test-failure.sh" ]; then + continue + fi + + cmd="$location/$f $userargs" + count=$(expr $count + 1) + echo "--- $cmd ---" + if [[ "$dryrun" != "true" ]]; then + if ! PATH=$PATH:bin \ + CEPH_ROOT=.. \ + CEPH_LIB=lib \ + LOCALRUN=yes \ + time -f "Elapsed %E (%e seconds)" $cmd ; then + echo "$f .............. FAILED" + errors=$(expr $errors + 1) + fi + fi +done +cleanup + +if [ "$errors" != "0" ]; then + echo "$errors TESTS FAILED, $count TOTAL TESTS" + exit 1 +fi + +echo "ALL $count TESTS PASSED" +exit 0 diff --git a/qa/run_xfstests-obsolete.sh b/qa/run_xfstests-obsolete.sh new file mode 100644 index 000000000..48afff879 --- /dev/null +++ b/qa/run_xfstests-obsolete.sh @@ -0,0 +1,458 @@ +#!/usr/bin/env bash + +# Copyright (C) 2012 Dreamhost, LLC +# +# This is free software; see the source for copying conditions. +# There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# This is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as +# published by the Free Software Foundation version 2. + +# Usage: +# run_xfs_tests -t /dev/<testdev> -s /dev/<scratchdev> -f <fstype> <tests> +# - test device and scratch device will both get trashed +# - fstypes can be xfs, ext4, or btrfs (xfs default) +# - tests can be listed individually or in ranges: 1 3-5 8 +# tests can also be specified by group: -g quick +# +# Exit status: +# 0: success +# 1: usage error +# 2: other runtime error +# 99: argument count error (programming error) +# 100: getopt error (internal error) + +# Alex Elder <elder@dreamhost.com> +# April 13, 2012 + +set -e + +PROGNAME=$(basename $0) + +# xfstests is downloaded from this git repository and then built. +# XFSTESTS_REPO="git://oss.sgi.com/xfs/cmds/xfstests.git" +XFSTESTS_REPO="git://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git" + +# Default command line option values +COUNT="1" +FS_TYPE="xfs" +SCRATCH_DEV="" # MUST BE SPECIFIED +TEST_DEV="" # MUST BE SPECIFIED +TESTS="-g auto" # The "auto" group is supposed to be "known good" + +# rbd presents geometry information that causes mkfs.xfs to +# issue a warning. This option avoids this class of problems. +XFS_MKFS_OPTIONS="-l su=32k" + +# Override the default test list with a list of tests known to pass +# until we can work through getting them all passing reliably. +TESTS="1-7 9 11-15 17 19-21 26-29 31-34 41 46-48 50-54 56 61 63-67 69-70 74-76" +TESTS="${TESTS} 78 79 84-89 91-92 100 103 105 108 110 116-121 124 126" +TESTS="${TESTS} 129-135 137-141 164-167 182 184 187-190 192 194" +TESTS="${TESTS} 196 199 201 203 214-216 220-227 234 236-238 241 243-249" +TESTS="${TESTS} 253 257-259 261 262 269 273 275 277 278 280 285 286" +# 275 was the highest available test as of 4/10/12. +# 289 was the highest available test as of 11/15/12. + +###### +# Some explanation of why tests have been excluded above: +# +# Test 008 was pulled because it contained a race condition leading to +# spurious failures. +# +# Test 049 was pulled because it caused a kernel fault. +# http://tracker.newdream.net/issues/2260 +# Test 232 was pulled because it caused an XFS error +# http://tracker.newdream.net/issues/2302 +# +# This test passes but takes a LONG time (1+ hours): 127 +# +# These were not run for one (anticipated) reason or another: +# 010 016 030 035 040 044 057 058-060 072 077 090 093-095 097-099 104 +# 112 113 122 123 125 128 142 147-163 168 175-178 180 185 191 193 +# 195 197 198 207-213 217 228 230-233 235 239 240 252 254 255 264-266 +# 270-272 276 278-279 281-284 288 289 +# +# These tests all failed (produced output different from golden): +# 042 073 083 096 109 169 170 200 202 204-206 218 229 240 242 250 +# 263 276 277 279 287 +# +# The rest were not part of the "auto" group: +# 018 022 023 024 025 036 037 038 039 043 055 071 080 081 082 101 +# 102 106 107 111 114 115 136 171 172 173 251 267 268 +###### + +# print an error message and quit with non-zero status +function err() { + if [ $# -gt 0 ]; then + echo "" >&2 + echo "${PROGNAME}: ${FUNCNAME[1]}: $@" >&2 + fi + exit 2 +} + +# routine used to validate argument counts to all shell functions +function arg_count() { + local func + local want + local got + + if [ $# -eq 2 ]; then + func="${FUNCNAME[1]}" # calling function + want=$1 + got=$2 + else + func="${FUNCNAME[0]}" # i.e., arg_count + want=2 + got=$# + fi + [ "${want}" -eq "${got}" ] && return 0 + echo "${PROGNAME}: ${func}: arg count bad (want ${want} got ${got})" >&2 + exit 99 +} + +# validation function for repeat count argument +function count_valid() { + arg_count 1 $# + + test "$1" -gt 0 # 0 is pointless; negative is wrong +} + +# validation function for filesystem type argument +function fs_type_valid() { + arg_count 1 $# + + case "$1" in + xfs|ext4|btrfs) return 0 ;; + *) return 1 ;; + esac +} + +# validation function for device arguments +function device_valid() { + arg_count 1 $# + + # Very simple testing--really should try to be more careful... + test -b "$1" +} + +# print a usage message and quit +# +# if a message is supplied, print that first, and then exit +# with non-zero status +function usage() { + if [ $# -gt 0 ]; then + echo "" >&2 + echo "$@" >&2 + fi + + echo "" >&2 + echo "Usage: ${PROGNAME} <options> <tests>" >&2 + echo "" >&2 + echo " options:" >&2 + echo " -h or --help" >&2 + echo " show this message" >&2 + echo " -c or --count" >&2 + echo " iteration count (1 or more)" >&2 + echo " -f or --fs-type" >&2 + echo " one of: xfs, ext4, btrfs" >&2 + echo " (default fs-type: xfs)" >&2 + echo " -s or --scratch-dev (REQUIRED)" >&2 + echo " name of device used for scratch filesystem" >&2 + echo " -t or --test-dev (REQUIRED)" >&2 + echo " name of device used for test filesystem" >&2 + echo " tests:" >&2 + echo " list of test numbers or ranges, e.g.:" >&2 + echo " 1-9 11-15 17 19-21 26-28 31-34 41" >&2 + echo " or possibly an xfstests test group, e.g.:" >&2 + echo " -g quick" >&2 + echo " (default tests: -g auto)" >&2 + echo "" >&2 + + [ $# -gt 0 ] && exit 1 + + exit 0 # This is used for a --help +} + +# parse command line arguments +function parseargs() { + # Short option flags + SHORT_OPTS="" + SHORT_OPTS="${SHORT_OPTS},h" + SHORT_OPTS="${SHORT_OPTS},c:" + SHORT_OPTS="${SHORT_OPTS},f:" + SHORT_OPTS="${SHORT_OPTS},s:" + SHORT_OPTS="${SHORT_OPTS},t:" + + # Short option flags + LONG_OPTS="" + LONG_OPTS="${LONG_OPTS},help" + LONG_OPTS="${LONG_OPTS},count:" + LONG_OPTS="${LONG_OPTS},fs-type:" + LONG_OPTS="${LONG_OPTS},scratch-dev:" + LONG_OPTS="${LONG_OPTS},test-dev:" + + TEMP=$(getopt --name "${PROGNAME}" \ + --options "${SHORT_OPTS}" \ + --longoptions "${LONG_OPTS}" \ + -- "$@") + eval set -- "$TEMP" + + while [ "$1" != "--" ]; do + case "$1" in + -h|--help) + usage + ;; + -c|--count) + count_valid "$2" || + usage "invalid count '$2'" + COUNT="$2" + shift + ;; + -f|--fs-type) + fs_type_valid "$2" || + usage "invalid fs_type '$2'" + FS_TYPE="$2" + shift + ;; + -s|--scratch-dev) + device_valid "$2" || + usage "invalid scratch-dev '$2'" + SCRATCH_DEV="$2" + shift + ;; + -t|--test-dev) + device_valid "$2" || + usage "invalid test-dev '$2'" + TEST_DEV="$2" + shift + ;; + *) + exit 100 # Internal error + ;; + esac + shift + done + shift + + [ -n "${TEST_DEV}" ] || usage "test-dev must be supplied" + [ -n "${SCRATCH_DEV}" ] || usage "scratch-dev must be supplied" + + [ $# -eq 0 ] || TESTS="$@" +} + +################################################################ + +[ -z "$TESTDIR" ] && export TESTDIR="/tmp/cephtest" + +# Set up some environment for normal teuthology test setup. +# This really should not be necessary but I found it was. +export CEPH_ARGS="--conf ${TESTDIR}/ceph.conf" +export CEPH_ARGS="${CEPH_ARGS} --keyring ${TESTDIR}/data/client.0.keyring" +export CEPH_ARGS="${CEPH_ARGS} --name client.0" + +export LD_LIBRARY_PATH="${TESTDIR}/binary/usr/local/lib:${LD_LIBRARY_PATH}" +export PATH="${TESTDIR}/binary/usr/local/bin:${PATH}" +export PATH="${TESTDIR}/binary/usr/local/sbin:${PATH}" + +################################################################ + +# Filesystem-specific mkfs options--set if not supplied +export XFS_MKFS_OPTIONS="${XFS_MKFS_OPTIONS:--f -l su=65536}" +export EXT4_MKFS_OPTIONS="${EXT4_MKFS_OPTIONS:--F}" +export BTRFS_MKFS_OPTION # No defaults + +XFSTESTS_DIR="/var/lib/xfstests" # Where the tests live + +# download, build, and install xfstests +function install_xfstests() { + arg_count 0 $# + + local multiple="" + local ncpu + + pushd "${TESTDIR}" + + git clone "${XFSTESTS_REPO}" + + cd xfstests-dev + + # FIXME: use an older version before the tests were rearranged! + git reset --hard e5f1a13792f20cfac097fef98007610b422f2cac + + ncpu=$(getconf _NPROCESSORS_ONLN 2>&1) + [ -n "${ncpu}" -a "${ncpu}" -gt 1 ] && multiple="-j ${ncpu}" + + make realclean + make ${multiple} + make -k install + + popd +} + +# remove previously-installed xfstests files +function remove_xfstests() { + arg_count 0 $# + + rm -rf "${TESTDIR}/xfstests-dev" + rm -rf "${XFSTESTS_DIR}" +} + +# create a host options file that uses the specified devices +function setup_host_options() { + arg_count 0 $# + + # Create mount points for the test and scratch filesystems + local test_dir="$(mktemp -d ${TESTDIR}/test_dir.XXXXXXXXXX)" + local scratch_dir="$(mktemp -d ${TESTDIR}/scratch_mnt.XXXXXXXXXX)" + + # Write a host options file that uses these devices. + # xfstests uses the file defined by HOST_OPTIONS as the + # place to get configuration variables for its run, and + # all (or most) of the variables set here are required. + export HOST_OPTIONS="$(mktemp ${TESTDIR}/host_options.XXXXXXXXXX)" + cat > "${HOST_OPTIONS}" <<-! + # Created by ${PROGNAME} on $(date) + # HOST_OPTIONS="${HOST_OPTIONS}" + TEST_DEV="${TEST_DEV}" + SCRATCH_DEV="${SCRATCH_DEV}" + TEST_DIR="${test_dir}" + SCRATCH_MNT="${scratch_dir}" + FSTYP="${FS_TYPE}" + export TEST_DEV SCRATCH_DEV TEST_DIR SCRATCH_MNT FSTYP + # + export XFS_MKFS_OPTIONS="${XFS_MKFS_OPTIONS}" + ! + + # Now ensure we are using the same values + . "${HOST_OPTIONS}" +} + +# remove the host options file, plus the directories it refers to +function cleanup_host_options() { + arg_count 0 $# + + rm -rf "${TEST_DIR}" "${SCRATCH_MNT}" + rm -f "${HOST_OPTIONS}" +} + +# run mkfs on the given device using the specified filesystem type +function do_mkfs() { + arg_count 1 $# + + local dev="${1}" + local options + + case "${FSTYP}" in + xfs) options="${XFS_MKFS_OPTIONS}" ;; + ext4) options="${EXT4_MKFS_OPTIONS}" ;; + btrfs) options="${BTRFS_MKFS_OPTIONS}" ;; + esac + + "mkfs.${FSTYP}" ${options} "${dev}" || + err "unable to make ${FSTYP} file system on device \"${dev}\"" +} + +# mount the given device on the given mount point +function do_mount() { + arg_count 2 $# + + local dev="${1}" + local dir="${2}" + + mount "${dev}" "${dir}" || + err "unable to mount file system \"${dev}\" on \"${dir}\"" +} + +# unmount a previously-mounted device +function do_umount() { + arg_count 1 $# + + local dev="${1}" + + if mount | grep "${dev}" > /dev/null; then + if ! umount "${dev}"; then + err "unable to unmount device \"${dev}\"" + fi + else + # Report it but don't error out + echo "device \"${dev}\" was not mounted" >&2 + fi +} + +# do basic xfstests setup--make and mount the test and scratch filesystems +function setup_xfstests() { + arg_count 0 $# + + # TEST_DEV can persist across test runs, but for now we + # don't bother. I believe xfstests prefers its devices to + # have been already been formatted for the desired + # filesystem type--it uses blkid to identify things or + # something. So we mkfs both here for a fresh start. + do_mkfs "${TEST_DEV}" + do_mkfs "${SCRATCH_DEV}" + + # I believe the test device is expected to be mounted; the + # scratch doesn't need to be (but it doesn't hurt). + do_mount "${TEST_DEV}" "${TEST_DIR}" + do_mount "${SCRATCH_DEV}" "${SCRATCH_MNT}" +} + +# clean up changes made by setup_xfstests +function cleanup_xfstests() { + arg_count 0 $# + + # Unmount these in case a test left them mounted (plus + # the corresponding setup function mounted them...) + do_umount "${TEST_DEV}" + do_umount "${SCRATCH_DEV}" +} + +# top-level setup routine +function setup() { + arg_count 0 $# + + setup_host_options + install_xfstests + setup_xfstests +} + +# top-level (final) cleanup routine +function cleanup() { + arg_count 0 $# + + cd / + cleanup_xfstests + remove_xfstests + cleanup_host_options +} +trap cleanup EXIT ERR HUP INT QUIT + +# ################################################################ + +start_date="$(date)" + +parseargs "$@" + +setup + +pushd "${XFSTESTS_DIR}" +for (( i = 1 ; i <= "${COUNT}" ; i++ )); do + [ "${COUNT}" -gt 1 ] && echo "=== Iteration "$i" starting at: $(date)" + + ./check ${TESTS} # Here we actually run the tests + status=$? + + [ "${COUNT}" -gt 1 ] && echo "=== Iteration "$i" complete at: $(date)" +done +popd + +# cleanup is called via the trap call, above + +echo "This xfstests run started at: ${start_date}" +echo "xfstests run completed at: $(date)" +[ "${COUNT}" -gt 1 ] && echo "xfstests run consisted of ${COUNT} iterations" + +exit "${status}" diff --git a/qa/run_xfstests.sh b/qa/run_xfstests.sh new file mode 100755 index 000000000..70e494331 --- /dev/null +++ b/qa/run_xfstests.sh @@ -0,0 +1,323 @@ +#!/usr/bin/env bash + +# Copyright (C) 2012 Dreamhost, LLC +# +# This is free software; see the source for copying conditions. +# There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# This is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as +# published by the Free Software Foundation version 2. + +# Usage: +# run_xfstests -t /dev/<testdev> -s /dev/<scratchdev> [-f <fstype>] -- <tests> +# - test device and scratch device will both get trashed +# - fstypes can be xfs, ext4, or btrfs (xfs default) +# - tests can be listed individually: generic/001 xfs/008 xfs/009 +# tests can also be specified by group: -g quick +# +# Exit status: +# 0: success +# 1: usage error +# 2: other runtime error +# 99: argument count error (programming error) +# 100: getopt error (internal error) + +# Alex Elder <elder@dreamhost.com> +# April 13, 2012 + +set -e + +PROGNAME=$(basename $0) + +# Default command line option values +COUNT="1" +EXPUNGE_FILE="" +DO_RANDOMIZE="" # false +FSTYP="xfs" +SCRATCH_DEV="" # MUST BE SPECIFIED +TEST_DEV="" # MUST BE SPECIFIED +TESTS="-g auto" # The "auto" group is supposed to be "known good" + +# print an error message and quit with non-zero status +function err() { + if [ $# -gt 0 ]; then + echo "" >&2 + echo "${PROGNAME}: ${FUNCNAME[1]}: $@" >&2 + fi + exit 2 +} + +# routine used to validate argument counts to all shell functions +function arg_count() { + local func + local want + local got + + if [ $# -eq 2 ]; then + func="${FUNCNAME[1]}" # calling function + want=$1 + got=$2 + else + func="${FUNCNAME[0]}" # i.e., arg_count + want=2 + got=$# + fi + [ "${want}" -eq "${got}" ] && return 0 + echo "${PROGNAME}: ${func}: arg count bad (want ${want} got ${got})" >&2 + exit 99 +} + +# validation function for repeat count argument +function count_valid() { + arg_count 1 $# + + test "$1" -gt 0 # 0 is pointless; negative is wrong +} + +# validation function for filesystem type argument +function fs_type_valid() { + arg_count 1 $# + + case "$1" in + xfs|ext4|btrfs) return 0 ;; + *) return 1 ;; + esac +} + +# validation function for device arguments +function device_valid() { + arg_count 1 $# + + # Very simple testing--really should try to be more careful... + test -b "$1" +} + +# validation function for expunge file argument +function expunge_file_valid() { + arg_count 1 $# + + test -s "$1" +} + +# print a usage message and quit +# +# if a message is supplied, print that first, and then exit +# with non-zero status +function usage() { + if [ $# -gt 0 ]; then + echo "" >&2 + echo "$@" >&2 + fi + + echo "" >&2 + echo "Usage: ${PROGNAME} <options> -- <tests>" >&2 + echo "" >&2 + echo " options:" >&2 + echo " -h or --help" >&2 + echo " show this message" >&2 + echo " -c or --count" >&2 + echo " iteration count (1 or more)" >&2 + echo " -f or --fs-type" >&2 + echo " one of: xfs, ext4, btrfs" >&2 + echo " (default fs-type: xfs)" >&2 + echo " -r or --randomize" >&2 + echo " randomize test order" >&2 + echo " -s or --scratch-dev (REQUIRED)" >&2 + echo " name of device used for scratch filesystem" >&2 + echo " -t or --test-dev (REQUIRED)" >&2 + echo " name of device used for test filesystem" >&2 + echo " -x or --expunge-file" >&2 + echo " name of file with list of tests to skip" >&2 + echo " tests:" >&2 + echo " list of test numbers, e.g.:" >&2 + echo " generic/001 xfs/008 shared/032 btrfs/009" >&2 + echo " or possibly an xfstests test group, e.g.:" >&2 + echo " -g quick" >&2 + echo " (default tests: -g auto)" >&2 + echo "" >&2 + + [ $# -gt 0 ] && exit 1 + + exit 0 # This is used for a --help +} + +# parse command line arguments +function parseargs() { + # Short option flags + SHORT_OPTS="" + SHORT_OPTS="${SHORT_OPTS},h" + SHORT_OPTS="${SHORT_OPTS},c:" + SHORT_OPTS="${SHORT_OPTS},f:" + SHORT_OPTS="${SHORT_OPTS},r" + SHORT_OPTS="${SHORT_OPTS},s:" + SHORT_OPTS="${SHORT_OPTS},t:" + SHORT_OPTS="${SHORT_OPTS},x:" + + # Long option flags + LONG_OPTS="" + LONG_OPTS="${LONG_OPTS},help" + LONG_OPTS="${LONG_OPTS},count:" + LONG_OPTS="${LONG_OPTS},fs-type:" + LONG_OPTS="${LONG_OPTS},randomize" + LONG_OPTS="${LONG_OPTS},scratch-dev:" + LONG_OPTS="${LONG_OPTS},test-dev:" + LONG_OPTS="${LONG_OPTS},expunge-file:" + + TEMP=$(getopt --name "${PROGNAME}" \ + --options "${SHORT_OPTS}" \ + --longoptions "${LONG_OPTS}" \ + -- "$@") + eval set -- "$TEMP" + + while [ "$1" != "--" ]; do + case "$1" in + -h|--help) + usage + ;; + -c|--count) + count_valid "$2" || + usage "invalid count '$2'" + COUNT="$2" + shift + ;; + -f|--fs-type) + fs_type_valid "$2" || + usage "invalid fs_type '$2'" + FSTYP="$2" + shift + ;; + -r|--randomize) + DO_RANDOMIZE="t" + ;; + -s|--scratch-dev) + device_valid "$2" || + usage "invalid scratch-dev '$2'" + SCRATCH_DEV="$2" + shift + ;; + -t|--test-dev) + device_valid "$2" || + usage "invalid test-dev '$2'" + TEST_DEV="$2" + shift + ;; + -x|--expunge-file) + expunge_file_valid "$2" || + usage "invalid expunge-file '$2'" + EXPUNGE_FILE="$2" + shift + ;; + *) + exit 100 # Internal error + ;; + esac + shift + done + shift + + [ -n "${TEST_DEV}" ] || usage "test-dev must be supplied" + [ -n "${SCRATCH_DEV}" ] || usage "scratch-dev must be supplied" + + [ $# -eq 0 ] || TESTS="$@" +} + +################################################################ + +# run mkfs on the given device using the specified filesystem type +function do_mkfs() { + arg_count 1 $# + + local dev="${1}" + local options + + case "${FSTYP}" in + xfs) options="-f" ;; + ext4) options="-F" ;; + btrfs) options="-f" ;; + esac + + "mkfs.${FSTYP}" ${options} "${dev}" || + err "unable to make ${FSTYP} file system on device \"${dev}\"" +} + +# top-level setup routine +function setup() { + arg_count 0 $# + + wget -P "${TESTDIR}" http://download.ceph.com/qa/xfstests.tar.gz + tar zxf "${TESTDIR}/xfstests.tar.gz" -C "$(dirname "${XFSTESTS_DIR}")" + mkdir "${TEST_DIR}" + mkdir "${SCRATCH_MNT}" + do_mkfs "${TEST_DEV}" +} + +# top-level (final) cleanup routine +function cleanup() { + arg_count 0 $# + + # ensure teuthology can clean up the logs + chmod -R a+rw "${TESTDIR}/archive" + + findmnt "${TEST_DEV}" && umount "${TEST_DEV}" + [ -d "${SCRATCH_MNT}" ] && rmdir "${SCRATCH_MNT}" + [ -d "${TEST_DIR}" ] && rmdir "${TEST_DIR}" + rm -rf "${XFSTESTS_DIR}" + rm -f "${TESTDIR}/xfstests.tar.gz" +} + +# ################################################################ + +start_date="$(date)" +parseargs "$@" +[ -n "${TESTDIR}" ] || usage "TESTDIR env variable must be set" +[ -d "${TESTDIR}/archive" ] || usage "\$TESTDIR/archive directory must exist" +TESTDIR="$(readlink -e "${TESTDIR}")" +[ -n "${EXPUNGE_FILE}" ] && EXPUNGE_FILE="$(readlink -e "${EXPUNGE_FILE}")" + +XFSTESTS_DIR="/var/lib/xfstests" # hardcoded into dbench binary +TEST_DIR="/mnt/test_dir" +SCRATCH_MNT="/mnt/scratch_mnt" +MKFS_OPTIONS="" +EXT_MOUNT_OPTIONS="-o block_validity,dioread_nolock" + +trap cleanup EXIT ERR HUP INT QUIT +setup + +export TEST_DEV +export TEST_DIR +export SCRATCH_DEV +export SCRATCH_MNT +export FSTYP +export MKFS_OPTIONS +export EXT_MOUNT_OPTIONS + +pushd "${XFSTESTS_DIR}" +for (( i = 1 ; i <= "${COUNT}" ; i++ )); do + [ "${COUNT}" -gt 1 ] && echo "=== Iteration "$i" starting at: $(date)" + + RESULT_BASE="${TESTDIR}/archive/results-${i}" + mkdir "${RESULT_BASE}" + export RESULT_BASE + + EXPUNGE="" + [ -n "${EXPUNGE_FILE}" ] && EXPUNGE="-E ${EXPUNGE_FILE}" + + RANDOMIZE="" + [ -n "${DO_RANDOMIZE}" ] && RANDOMIZE="-r" + + # -T output timestamps + PATH="${PWD}/bin:${PATH}" ./check -T ${RANDOMIZE} ${EXPUNGE} ${TESTS} + findmnt "${TEST_DEV}" && umount "${TEST_DEV}" + + [ "${COUNT}" -gt 1 ] && echo "=== Iteration "$i" complete at: $(date)" +done +popd + +# cleanup is called via the trap call, above + +echo "This xfstests run started at: ${start_date}" +echo "xfstests run completed at: $(date)" +[ "${COUNT}" -gt 1 ] && echo "xfstests run consisted of ${COUNT} iterations" +echo OK diff --git a/qa/run_xfstests_qemu.sh b/qa/run_xfstests_qemu.sh new file mode 100644 index 000000000..651c75401 --- /dev/null +++ b/qa/run_xfstests_qemu.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# TODO switch to run_xfstests.sh (see run_xfstests_krbd.sh) + +set -x + +[ -n "${TESTDIR}" ] || export TESTDIR="/tmp/cephtest" +[ -d "${TESTDIR}" ] || mkdir "${TESTDIR}" + +URL_BASE="https://git.ceph.com/?p=ceph.git;a=blob_plain;f=qa" +SCRIPT="run_xfstests-obsolete.sh" + +cd "${TESTDIR}" + +curl -O "${URL_BASE}/${SCRIPT}" +# mark executable only if the file isn't empty since ./"${SCRIPT}" +# on an empty file would succeed +if [[ -s "${SCRIPT}" ]]; then + chmod +x "${SCRIPT}" +fi + +TEST_DEV="/dev/vdb" +if [[ ! -b "${TEST_DEV}" ]]; then + TEST_DEV="/dev/sdb" +fi +SCRATCH_DEV="/dev/vdc" +if [[ ! -b "${SCRATCH_DEV}" ]]; then + SCRATCH_DEV="/dev/sdc" +fi + +# tests excluded fail in the current testing vm regardless of whether +# rbd is used + +./"${SCRIPT}" -c 1 -f xfs -t "${TEST_DEV}" -s "${SCRATCH_DEV}" \ + 1-7 9-17 19-26 28-49 51-61 63 66-67 69-79 83 85-105 108-110 112-135 \ + 137-170 174-191 193-204 206-217 220-227 230-231 233 235-241 243-249 \ + 252-259 261-262 264-278 281-286 289 +STATUS=$? + +rm -f "${SCRIPT}" + +exit "${STATUS}" diff --git a/qa/runallonce.sh b/qa/runallonce.sh new file mode 100755 index 000000000..bd809fef7 --- /dev/null +++ b/qa/runallonce.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +set -ex + +basedir=`echo $0 | sed 's/[^/]*$//g'`. +testdir="$1" +[ -n "$2" ] && logdir=$2 || logdir=$1 + +[ ${basedir:0:1} == "." ] && basedir=`pwd`/${basedir:1} + +PATH="$basedir/src:$PATH" + +[ -z "$testdir" ] || [ ! -d "$testdir" ] && echo "specify test dir" && exit 1 +cd $testdir + +for test in `cd $basedir/workunits && find . -executable -type f | $basedir/../src/script/permute` +do + echo "------ running test $test ------" + pwd + [ -d $test ] && rm -r $test + mkdir -p $test + mkdir -p `dirname $logdir/$test.log` + test -e $logdir/$test.log && rm $logdir/$test.log + sh -c "cd $test && $basedir/workunits/$test" 2>&1 | tee $logdir/$test.log +done diff --git a/qa/runoncfuse.sh b/qa/runoncfuse.sh new file mode 100755 index 000000000..7be545356 --- /dev/null +++ b/qa/runoncfuse.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -x + +mkdir -p testspace +ceph-fuse testspace -m $1 + +./runallonce.sh testspace +killall ceph-fuse diff --git a/qa/runonkclient.sh b/qa/runonkclient.sh new file mode 100755 index 000000000..f7e8605fa --- /dev/null +++ b/qa/runonkclient.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -x + +mkdir -p testspace +/bin/mount -t ceph $1 testspace + +./runallonce.sh testspace + +/bin/umount testspace diff --git a/qa/setup-chroot.sh b/qa/setup-chroot.sh new file mode 100755 index 000000000..a6e12f356 --- /dev/null +++ b/qa/setup-chroot.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +die() { + echo ${@} + exit 1 +} + +usage() +{ + cat << EOF +$0: sets up a chroot environment for building the ceph server +usage: +-h Show this message + +-r [install_dir] location of the root filesystem to install to + example: -r /images/sepia/ + +-s [src_dir] location of the directory with the source code + example: -s ./src/ceph +EOF +} + +cleanup() { + umount -l "${INSTALL_DIR}/mnt/tmp" + umount -l "${INSTALL_DIR}/proc" + umount -l "${INSTALL_DIR}/sys" +} + +INSTALL_DIR= +SRC_DIR= +while getopts “hr:s:†OPTION; do + case $OPTION in + h) usage; exit 1 ;; + r) INSTALL_DIR=$OPTARG ;; + s) SRC_DIR=$OPTARG ;; + ?) usage; exit + ;; + esac +done + +[ $EUID -eq 0 ] || die "This script uses chroot, which requires root permissions." + +[ -d "${INSTALL_DIR}" ] || die "No such directory as '${INSTALL_DIR}'. \ +You must specify an install directory with -r" + +[ -d "${SRC_DIR}" ] || die "no such directory as '${SRC_DIR}'. \ +You must specify a source directory with -s" + +readlink -f ${SRC_DIR} || die "readlink failed on ${SRC_DIR}" +ABS_SRC_DIR=`readlink -f ${SRC_DIR}` + +trap cleanup INT TERM EXIT + +mount --bind "${ABS_SRC_DIR}" "${INSTALL_DIR}/mnt/tmp" || die "bind mount failed" +mount -t proc none "${INSTALL_DIR}/proc" || die "mounting proc failed" +mount -t sysfs none "${INSTALL_DIR}/sys" || die "mounting sys failed" + +echo "$0: starting chroot." +echo "cd /mnt/tmp before building" +echo +chroot ${INSTALL_DIR} env HOME=/mnt/tmp /bin/bash + +echo "$0: exiting chroot." + +exit 0 diff --git a/qa/standalone/README b/qa/standalone/README new file mode 100644 index 000000000..3082442cb --- /dev/null +++ b/qa/standalone/README @@ -0,0 +1,23 @@ +qa/standalone +============= + +These scripts run standalone clusters, but not in a normal way. They make +use of functions ceph-helpers.sh to quickly start/stop daemons against +toy clusters in a single directory. + +They are normally run via teuthology based on qa/suites/rados/standalone/*.yaml. + +You can run them in a git checkout + build directory as well: + + * The qa/run-standalone.sh will run all of them in sequence. This is slow + since there is no parallelism. + + * You can run individual script(s) by specifying the basename or path below + qa/standalone as arguments to qa/run-standalone.sh. + +../qa/run-standalone.sh misc.sh osd/osd-dup.sh + + * Add support for specifying arguments to selected tests by simply adding + list of tests to each argument. + +../qa/run-standalone.sh "test-ceph-helpers.sh test_get_last_scrub_stamp" diff --git a/qa/standalone/c2c/c2c.sh b/qa/standalone/c2c/c2c.sh new file mode 100755 index 000000000..a6969d555 --- /dev/null +++ b/qa/standalone/c2c/c2c.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash + +set -ex + +function run_perf_c2c() { + # First get some background system info + uname -a > uname.out + lscpu > lscpu.out + cat /proc/cmdline > cmdline.out + timeout -s INT 10 vmstat -w 1 > vmstat.out || true + sudo dmesg >& dmesg.out + cat /proc/cpuinfo > cpuinfo.out + ps axo psr,time,stat,ppid,pid,pcpu,comm > ps.1.out + ps -eafT > ps.2.out + sudo sysctl -a > sysctl.out + + nodecnt=`lscpu|grep "NUMA node(" |awk '{print $3}'` + for ((i=0; i<$nodecnt; i++)) + do + sudo cat /sys/devices/system/node/node${i}/meminfo > meminfo.$i.out + done + sudo more `sudo find /proc -name status` > proc_parent_child_status.out + sudo more /proc/*/numa_maps > numa_maps.out + + # + # Get separate kernel and user perf-c2c stats + # + sudo perf c2c record -a --ldlat=70 --all-user -o perf_c2c_a_all_user.data sleep 5 + sudo perf c2c report --stdio -i perf_c2c_a_all_user.data > perf_c2c_a_all_user.out 2>&1 + sudo perf c2c report --full-symbols --stdio -i perf_c2c_a_all_user.data > perf_c2c_full-sym_a_all_user.out 2>&1 + + sudo perf c2c record --call-graph dwarf -a --ldlat=70 --all-user -o perf_c2c_g_a_all_user.data sleep 5 + sudo perf c2c report -g --stdio -i perf_c2c_g_a_all_user.data > perf_c2c_g_a_all_user.out 2>&1 + + sudo perf c2c record -a --ldlat=70 --all-kernel -o perf_c2c_a_all_kernel.data sleep 4 + sudo perf c2c report --stdio -i perf_c2c_a_all_kernel.data > perf_c2c_a_all_kernel.out 2>&1 + + sudo perf c2c record --call-graph dwarf --ldlat=70 -a --all-kernel -o perf_c2c_g_a_all_kernel.data sleep 4 + + sudo perf c2c report -g --stdio -i perf_c2c_g_a_all_kernel.data > perf_c2c_g_a_all_kernel.out 2>&1 + + # + # Get combined kernel and user perf-c2c stats + # + sudo perf c2c record -a --ldlat=70 -o perf_c2c_a_both.data sleep 4 + sudo perf c2c report --stdio -i perf_c2c_a_both.data > perf_c2c_a_both.out 2>&1 + + sudo perf c2c record --call-graph dwarf --ldlat=70 -a --all-kernel -o perf_c2c_g_a_both.data sleep 4 + sudo perf c2c report -g --stdio -i perf_c2c_g_a_both.data > perf_c2c_g_a_both.out 2>&1 + + # + # Get all-user physical addr stats, in case multiple threads or processes are + # accessing shared memory with different vaddrs. + # + sudo perf c2c record --phys-data -a --ldlat=70 --all-user -o perf_c2c_a_all_user_phys_data.data sleep 5 + sudo perf c2c report --stdio -i perf_c2c_a_all_user_phys_data.data > perf_c2c_a_all_user_phys_data.out 2>&1 +} + +function run() { + local dir=$1 + shift + ( + rm -fr $dir + mkdir $dir + cd $dir + ceph_test_c2c --threads $(($(nproc) * 2)) "$@" & + sleep 30 # let it warm up + run_perf_c2c + kill $! || { echo "ceph_test_c2c WAS NOT RUNNING" ; exit 1 ; } + ) || exit 1 +} + +function bench() { + optimized=$(timeout 30 ceph_test_c2c --threads $(($(nproc) * 2)) --sharding 2> /dev/null || true) + not_optimized=$(timeout 30 ceph_test_c2c --threads $(($(nproc) * 2)) 2> /dev/null || true) + if ! (( $optimized > ( $not_optimized * 2 ) )) ; then + echo "the optimization is expected to be at least x2 faster" + exit 1 + fi +} + +run with-sharding --sharding +run without-sharding +bench diff --git a/qa/standalone/ceph-helpers.sh b/qa/standalone/ceph-helpers.sh new file mode 100755 index 000000000..bf2c91bc0 --- /dev/null +++ b/qa/standalone/ceph-helpers.sh @@ -0,0 +1,2409 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014,2015 Red Hat <contact@redhat.com> +# Copyright (C) 2014 Federico Gimenez <fgimenez@coit.es> +# +# Author: Loic Dachary <loic@dachary.org> +# Author: Federico Gimenez <fgimenez@coit.es> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +TIMEOUT=300 +WAIT_FOR_CLEAN_TIMEOUT=90 +MAX_TIMEOUT=15 +PG_NUM=4 +TMPDIR=${TMPDIR:-/tmp} +CEPH_BUILD_VIRTUALENV=${TMPDIR} +TESTDIR=${TESTDIR:-${TMPDIR}} + +if type xmlstarlet > /dev/null 2>&1; then + XMLSTARLET=xmlstarlet +elif type xml > /dev/null 2>&1; then + XMLSTARLET=xml +else + echo "Missing xmlstarlet binary!" + exit 1 +fi + +if [ `uname` = FreeBSD ]; then + SED=gsed + AWK=gawk + DIFFCOLOPTS="" + KERNCORE="kern.corefile" +else + SED=sed + AWK=awk + termwidth=$(stty -a | head -1 | sed -e 's/.*columns \([0-9]*\).*/\1/') + if [ -n "$termwidth" -a "$termwidth" != "0" ]; then + termwidth="-W ${termwidth}" + fi + DIFFCOLOPTS="-y $termwidth" + KERNCORE="kernel.core_pattern" +fi + +EXTRA_OPTS="" + +#! @file ceph-helpers.sh +# @brief Toolbox to manage Ceph cluster dedicated to testing +# +# Example use case: +# +# ~~~~~~~~~~~~~~~~{.sh} +# source ceph-helpers.sh +# +# function mytest() { +# # cleanup leftovers and reset mydir +# setup mydir +# # create a cluster with one monitor and three osds +# run_mon mydir a +# run_osd mydir 0 +# run_osd mydir 2 +# run_osd mydir 3 +# # put and get an object +# rados --pool rbd put GROUP /etc/group +# rados --pool rbd get GROUP /tmp/GROUP +# # stop the cluster and cleanup the directory +# teardown mydir +# } +# ~~~~~~~~~~~~~~~~ +# +# The focus is on simplicity and efficiency, in the context of +# functional tests. The output is intentionally very verbose +# and functions return as soon as an error is found. The caller +# is also expected to abort on the first error so that debugging +# can be done by looking at the end of the output. +# +# Each function is documented, implemented and tested independently. +# When modifying a helper, the test and the documentation are +# expected to be updated and it is easier of they are collocated. A +# test for a given function can be run with +# +# ~~~~~~~~~~~~~~~~{.sh} +# ceph-helpers.sh TESTS test_get_osds +# ~~~~~~~~~~~~~~~~ +# +# and all the tests (i.e. all functions matching test_*) are run +# with: +# +# ~~~~~~~~~~~~~~~~{.sh} +# ceph-helpers.sh TESTS +# ~~~~~~~~~~~~~~~~ +# +# A test function takes a single argument : the directory dedicated +# to the tests. It is expected to not create any file outside of this +# directory and remove it entirely when it completes successfully. +# + + +function get_asok_dir() { + if [ -n "$CEPH_ASOK_DIR" ]; then + echo "$CEPH_ASOK_DIR" + else + echo ${TMPDIR:-/tmp}/ceph-asok.$$ + fi +} + +function get_asok_path() { + local name=$1 + if [ -n "$name" ]; then + echo $(get_asok_dir)/ceph-$name.asok + else + echo $(get_asok_dir)/\$cluster-\$name.asok + fi +} +## +# Cleanup any leftovers found in **dir** via **teardown** +# and reset **dir** as an empty environment. +# +# @param dir path name of the environment +# @return 0 on success, 1 on error +# +function setup() { + local dir=$1 + teardown $dir || return 1 + mkdir -p $dir + mkdir -p $(get_asok_dir) + if [ $(ulimit -n) -le 1024 ]; then + ulimit -n 4096 || return 1 + fi + if [ -z "$LOCALRUN" ]; then + trap "teardown $dir 1" TERM HUP INT + fi +} + +function test_setup() { + local dir=$dir + setup $dir || return 1 + test -d $dir || return 1 + setup $dir || return 1 + test -d $dir || return 1 + teardown $dir +} + +####################################################################### + +## +# Kill all daemons for which a .pid file exists in **dir** and remove +# **dir**. If the file system in which **dir** is btrfs, delete all +# subvolumes that relate to it. +# +# @param dir path name of the environment +# @param dumplogs pass "1" to dump logs otherwise it will only if cores found +# @return 0 on success, 1 on error +# +function teardown() { + local dir=$1 + local dumplogs=$2 + kill_daemons $dir KILL + if [ `uname` != FreeBSD ] \ + && [ $(stat -f -c '%T' .) == "btrfs" ]; then + __teardown_btrfs $dir + fi + local cores="no" + local pattern="$(sysctl -n $KERNCORE)" + # See if we have apport core handling + if [ "${pattern:0:1}" = "|" ]; then + # TODO: Where can we get the dumps? + # Not sure where the dumps really are so this will look in the CWD + pattern="" + fi + # Local we start with core and teuthology ends with core + if ls $(dirname "$pattern") | grep -q '^core\|core$' ; then + cores="yes" + if [ -n "$LOCALRUN" ]; then + mkdir /tmp/cores.$$ 2> /dev/null || true + for i in $(ls $(dirname $(sysctl -n $KERNCORE)) | grep '^core\|core$'); do + mv $i /tmp/cores.$$ + done + fi + fi + if [ "$cores" = "yes" -o "$dumplogs" = "1" ]; then + if [ -n "$LOCALRUN" ]; then + display_logs $dir + else + # Move logs to where Teuthology will archive it + mkdir -p $TESTDIR/archive/log + mv $dir/*.log $TESTDIR/archive/log + fi + fi + rm -fr $dir + rm -rf $(get_asok_dir) + if [ "$cores" = "yes" ]; then + echo "ERROR: Failure due to cores found" + if [ -n "$LOCALRUN" ]; then + echo "Find saved core files in /tmp/cores.$$" + fi + return 1 + fi + return 0 +} + +function __teardown_btrfs() { + local btrfs_base_dir=$1 + local btrfs_root=$(df -P . | tail -1 | $AWK '{print $NF}') + local btrfs_dirs=$(cd $btrfs_base_dir; sudo btrfs subvolume list -t . | $AWK '/^[0-9]/ {print $4}' | grep "$btrfs_base_dir/$btrfs_dir") + for subvolume in $btrfs_dirs; do + sudo btrfs subvolume delete $btrfs_root/$subvolume + done +} + +function test_teardown() { + local dir=$dir + setup $dir || return 1 + teardown $dir || return 1 + ! test -d $dir || return 1 +} + +####################################################################### + +## +# Sends a signal to a single daemon. +# This is a helper function for kill_daemons +# +# After the daemon is sent **signal**, its actual termination +# will be verified by sending it signal 0. If the daemon is +# still alive, kill_daemon will pause for a few seconds and +# try again. This will repeat for a fixed number of times +# before kill_daemon returns on failure. The list of +# sleep intervals can be specified as **delays** and defaults +# to: +# +# 0.1 0.2 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120 +# +# This sequence is designed to run first a very short sleep time (0.1) +# if the machine is fast enough and the daemon terminates in a fraction of a +# second. The increasing sleep numbers should give plenty of time for +# the daemon to die even on the slowest running machine. If a daemon +# takes more than a few minutes to stop (the sum of all sleep times), +# there probably is no point in waiting more and a number of things +# are likely to go wrong anyway: better give up and return on error. +# +# @param pid the process id to send a signal +# @param send_signal the signal to send +# @param delays sequence of sleep times before failure +# +function kill_daemon() { + local pid=$(cat $1) + local send_signal=$2 + local delays=${3:-0.1 0.2 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120} + local exit_code=1 + # In order to try after the last large sleep add 0 at the end so we check + # one last time before dropping out of the loop + for try in $delays 0 ; do + if kill -$send_signal $pid 2> /dev/null ; then + exit_code=1 + else + exit_code=0 + break + fi + send_signal=0 + sleep $try + done; + return $exit_code +} + +function test_kill_daemon() { + local dir=$1 + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + name_prefix=osd + for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do + # + # sending signal 0 won't kill the daemon + # waiting just for one second instead of the default schedule + # allows us to quickly verify what happens when kill fails + # to stop the daemon (i.e. it must return false) + # + ! kill_daemon $pidfile 0 1 || return 1 + # + # killing just the osd and verify the mon still is responsive + # + kill_daemon $pidfile TERM || return 1 + done + + name_prefix=mgr + for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do + # + # kill the mgr + # + kill_daemon $pidfile TERM || return 1 + done + + name_prefix=mon + for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do + # + # kill the mon and verify it cannot be reached + # + kill_daemon $pidfile TERM || return 1 + ! timeout 5 ceph status || return 1 + done + + teardown $dir || return 1 +} + +## +# Kill all daemons for which a .pid file exists in **dir**. Each +# daemon is sent a **signal** and kill_daemons waits for it to exit +# during a few minutes. By default all daemons are killed. If a +# **name_prefix** is provided, only the daemons for which a pid +# file is found matching the prefix are killed. See run_osd and +# run_mon for more information about the name conventions for +# the pid files. +# +# Send TERM to all daemons : kill_daemons $dir +# Send KILL to all daemons : kill_daemons $dir KILL +# Send KILL to all osds : kill_daemons $dir KILL osd +# Send KILL to osd 1 : kill_daemons $dir KILL osd.1 +# +# If a daemon is sent the TERM signal and does not terminate +# within a few minutes, it will still be running even after +# kill_daemons returns. +# +# If all daemons are kill successfully the function returns 0 +# if at least one daemon remains, this is treated as an +# error and the function return 1. +# +# @param dir path name of the environment +# @param signal name of the first signal (defaults to TERM) +# @param name_prefix only kill match daemons (defaults to all) +# @param delays sequence of sleep times before failure +# @return 0 on success, 1 on error +# +function kill_daemons() { + local trace=$(shopt -q -o xtrace && echo true || echo false) + $trace && shopt -u -o xtrace + local dir=$1 + local signal=${2:-TERM} + local name_prefix=$3 # optional, osd, mon, osd.1 + local delays=$4 #optional timing + local status=0 + local pids="" + + for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do + run_in_background pids kill_daemon $pidfile $signal $delays + done + + wait_background pids + status=$? + + $trace && shopt -s -o xtrace + return $status +} + +function test_kill_daemons() { + local dir=$1 + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + # + # sending signal 0 won't kill the daemon + # waiting just for one second instead of the default schedule + # allows us to quickly verify what happens when kill fails + # to stop the daemon (i.e. it must return false) + # + ! kill_daemons $dir 0 osd 1 || return 1 + # + # killing just the osd and verify the mon still is responsive + # + kill_daemons $dir TERM osd || return 1 + # + # kill the mgr + # + kill_daemons $dir TERM mgr || return 1 + # + # kill the mon and verify it cannot be reached + # + kill_daemons $dir TERM || return 1 + ! timeout 5 ceph status || return 1 + teardown $dir || return 1 +} + +# +# return a random TCP port which is not used yet +# +# please note, there could be racing if we use this function for +# a free port, and then try to bind on this port. +# +function get_unused_port() { + local ip=127.0.0.1 + python3 -c "import socket; s=socket.socket(); s.bind(('$ip', 0)); print(s.getsockname()[1]); s.close()" +} + +####################################################################### + +## +# Run a monitor by the name mon.**id** with data in **dir**/**id**. +# The logs can be found in **dir**/mon.**id**.log and the pid file +# is **dir**/mon.**id**.pid and the admin socket is +# **dir**/**id**/ceph-mon.**id**.asok. +# +# The remaining arguments are passed verbatim to ceph-mon --mkfs +# and the ceph-mon daemon. +# +# Two mandatory arguments must be provided: --fsid and --mon-host +# Instead of adding them to every call to run_mon, they can be +# set in the CEPH_ARGS environment variable to be read implicitly +# by every ceph command. +# +# The CEPH_CONF variable is expected to be set to /dev/null to +# only rely on arguments for configuration. +# +# Examples: +# +# CEPH_ARGS="--fsid=$(uuidgen) " +# CEPH_ARGS+="--mon-host=127.0.0.1:7018 " +# run_mon $dir a # spawn a mon and bind port 7018 +# run_mon $dir a --debug-filestore=20 # spawn with filestore debugging +# +# If mon_initial_members is not set, the default rbd pool is deleted +# and replaced with a replicated pool with less placement groups to +# speed up initialization. If mon_initial_members is set, no attempt +# is made to recreate the rbd pool because it would hang forever, +# waiting for other mons to join. +# +# A **dir**/ceph.conf file is created but not meant to be used by any +# function. It is convenient for debugging a failure with: +# +# ceph --conf **dir**/ceph.conf -s +# +# @param dir path name of the environment +# @param id mon identifier +# @param ... can be any option valid for ceph-mon +# @return 0 on success, 1 on error +# +function run_mon() { + local dir=$1 + shift + local id=$1 + shift + local data=$dir/$id + + ceph-mon \ + --id $id \ + --mkfs \ + --mon-data=$data \ + --run-dir=$dir \ + "$@" || return 1 + + ceph-mon \ + --id $id \ + --osd-failsafe-full-ratio=.99 \ + --mon-osd-full-ratio=.99 \ + --mon-data-avail-crit=1 \ + --mon-data-avail-warn=5 \ + --paxos-propose-interval=0.1 \ + --osd-crush-chooseleaf-type=0 \ + $EXTRA_OPTS \ + --debug-mon 20 \ + --debug-ms 20 \ + --debug-paxos 20 \ + --chdir= \ + --mon-data=$data \ + --log-file=$dir/\$name.log \ + --admin-socket=$(get_asok_path) \ + --mon-cluster-log-file=$dir/log \ + --run-dir=$dir \ + --pid-file=$dir/\$name.pid \ + --mon-allow-pool-delete \ + --mon-allow-pool-size-one \ + --osd-pool-default-pg-autoscale-mode off \ + --mon-osd-backfillfull-ratio .99 \ + --mon-warn-on-insecure-global-id-reclaim-allowed=false \ + "$@" || return 1 + + cat > $dir/ceph.conf <<EOF +[global] +fsid = $(get_config mon $id fsid) +mon host = $(get_config mon $id mon_host) +EOF +} + +function test_run_mon() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + ceph mon dump | grep "mon.a" || return 1 + kill_daemons $dir || return 1 + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + create_rbd_pool || return 1 + ceph osd dump | grep "pool 1 'rbd'" || return 1 + local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \ + config get osd_pool_default_size) + test "$size" = '{"osd_pool_default_size":"3"}' || return 1 + + ! CEPH_ARGS='' ceph status || return 1 + CEPH_ARGS='' ceph --conf $dir/ceph.conf status || return 1 + + kill_daemons $dir || return 1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \ + config get osd_pool_default_size) + test "$size" = '{"osd_pool_default_size":"1"}' || return 1 + kill_daemons $dir || return 1 + + CEPH_ARGS="$CEPH_ARGS --osd_pool_default_size=2" \ + run_mon $dir a || return 1 + local size=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path mon.a) \ + config get osd_pool_default_size) + test "$size" = '{"osd_pool_default_size":"2"}' || return 1 + kill_daemons $dir || return 1 + + teardown $dir || return 1 +} + +function create_rbd_pool() { + ceph osd pool delete rbd rbd --yes-i-really-really-mean-it || return 1 + create_pool rbd $PG_NUM || return 1 + rbd pool init rbd +} + +function create_pool() { + ceph osd pool create "$@" + sleep 1 +} + +function delete_pool() { + local poolname=$1 + ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it +} + +####################################################################### + +function run_mgr() { + local dir=$1 + shift + local id=$1 + shift + local data=$dir/$id + + ceph config set mgr mgr_pool false --force + ceph-mgr \ + --id $id \ + $EXTRA_OPTS \ + --osd-failsafe-full-ratio=.99 \ + --debug-mgr 20 \ + --debug-objecter 20 \ + --debug-ms 20 \ + --debug-paxos 20 \ + --chdir= \ + --mgr-data=$data \ + --log-file=$dir/\$name.log \ + --admin-socket=$(get_asok_path) \ + --run-dir=$dir \ + --pid-file=$dir/\$name.pid \ + --mgr-module-path=$(realpath ${CEPH_ROOT}/src/pybind/mgr) \ + "$@" || return 1 +} + +function run_mds() { + local dir=$1 + shift + local id=$1 + shift + local data=$dir/$id + + ceph-mds \ + --id $id \ + $EXTRA_OPTS \ + --debug-mds 20 \ + --debug-objecter 20 \ + --debug-ms 20 \ + --chdir= \ + --mds-data=$data \ + --log-file=$dir/\$name.log \ + --admin-socket=$(get_asok_path) \ + --run-dir=$dir \ + --pid-file=$dir/\$name.pid \ + "$@" || return 1 +} + +####################################################################### + +## +# Create (prepare) and run (activate) an osd by the name osd.**id** +# with data in **dir**/**id**. The logs can be found in +# **dir**/osd.**id**.log, the pid file is **dir**/osd.**id**.pid and +# the admin socket is **dir**/**id**/ceph-osd.**id**.asok. +# +# The remaining arguments are passed verbatim to ceph-osd. +# +# Two mandatory arguments must be provided: --fsid and --mon-host +# Instead of adding them to every call to run_osd, they can be +# set in the CEPH_ARGS environment variable to be read implicitly +# by every ceph command. +# +# The CEPH_CONF variable is expected to be set to /dev/null to +# only rely on arguments for configuration. +# +# The run_osd function creates the OSD data directory on the **dir**/**id** +# directory and relies on the activate_osd function to run the daemon. +# +# Examples: +# +# CEPH_ARGS="--fsid=$(uuidgen) " +# CEPH_ARGS+="--mon-host=127.0.0.1:7018 " +# run_osd $dir 0 # prepare and activate an osd using the monitor listening on 7018 +# +# @param dir path name of the environment +# @param id osd identifier +# @param ... can be any option valid for ceph-osd +# @return 0 on success, 1 on error +# +function run_osd() { + local dir=$1 + shift + local id=$1 + shift + local osd_data=$dir/$id + + local ceph_args="$CEPH_ARGS" + ceph_args+=" --osd-failsafe-full-ratio=.99" + ceph_args+=" --osd-journal-size=100" + ceph_args+=" --osd-scrub-load-threshold=2000" + ceph_args+=" --osd-data=$osd_data" + ceph_args+=" --osd-journal=${osd_data}/journal" + ceph_args+=" --chdir=" + ceph_args+=$EXTRA_OPTS + ceph_args+=" --run-dir=$dir" + ceph_args+=" --admin-socket=$(get_asok_path)" + ceph_args+=" --debug-osd=20" + ceph_args+=" --debug-ms=1" + ceph_args+=" --debug-monc=20" + ceph_args+=" --log-file=$dir/\$name.log" + ceph_args+=" --pid-file=$dir/\$name.pid" + ceph_args+=" --osd-max-object-name-len=460" + ceph_args+=" --osd-max-object-namespace-len=64" + ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*" + ceph_args+=" --osd-mclock-profile=high_recovery_ops" + ceph_args+=" " + ceph_args+="$@" + mkdir -p $osd_data + + local uuid=`uuidgen` + echo "add osd$id $uuid" + OSD_SECRET=$(ceph-authtool --gen-print-key) + echo "{\"cephx_secret\": \"$OSD_SECRET\"}" > $osd_data/new.json + ceph osd new $uuid -i $osd_data/new.json + rm $osd_data/new.json + ceph-osd -i $id $ceph_args --mkfs --key $OSD_SECRET --osd-uuid $uuid + + local key_fn=$osd_data/keyring + cat > $key_fn<<EOF +[osd.$id] +key = $OSD_SECRET +EOF + echo adding osd$id key to auth repository + ceph -i "$key_fn" auth add osd.$id osd "allow *" mon "allow profile osd" mgr "allow profile osd" + echo start osd.$id + ceph-osd -i $id $ceph_args & + + # If noup is set, then can't wait for this osd + if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then + return 0 + fi + wait_for_osd up $id || return 1 + +} + +function run_osd_filestore() { + local dir=$1 + shift + local id=$1 + shift + local osd_data=$dir/$id + + local ceph_args="$CEPH_ARGS" + ceph_args+=" --osd-failsafe-full-ratio=.99" + ceph_args+=" --osd-journal-size=100" + ceph_args+=" --osd-scrub-load-threshold=2000" + ceph_args+=" --osd-data=$osd_data" + ceph_args+=" --osd-journal=${osd_data}/journal" + ceph_args+=" --chdir=" + ceph_args+=$EXTRA_OPTS + ceph_args+=" --run-dir=$dir" + ceph_args+=" --admin-socket=$(get_asok_path)" + ceph_args+=" --debug-osd=20" + ceph_args+=" --debug-ms=1" + ceph_args+=" --debug-monc=20" + ceph_args+=" --log-file=$dir/\$name.log" + ceph_args+=" --pid-file=$dir/\$name.pid" + ceph_args+=" --osd-max-object-name-len=460" + ceph_args+=" --osd-max-object-namespace-len=64" + ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*" + ceph_args+=" " + ceph_args+="$@" + mkdir -p $osd_data + + local uuid=`uuidgen` + echo "add osd$osd $uuid" + OSD_SECRET=$(ceph-authtool --gen-print-key) + echo "{\"cephx_secret\": \"$OSD_SECRET\"}" > $osd_data/new.json + ceph osd new $uuid -i $osd_data/new.json + rm $osd_data/new.json + ceph-osd -i $id $ceph_args --mkfs --key $OSD_SECRET --osd-uuid $uuid --osd-objectstore=filestore + + local key_fn=$osd_data/keyring + cat > $key_fn<<EOF +[osd.$osd] +key = $OSD_SECRET +EOF + echo adding osd$id key to auth repository + ceph -i "$key_fn" auth add osd.$id osd "allow *" mon "allow profile osd" mgr "allow profile osd" + echo start osd.$id + ceph-osd -i $id $ceph_args & + + # If noup is set, then can't wait for this osd + if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then + return 0 + fi + wait_for_osd up $id || return 1 + + +} + +function test_run_osd() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills) + echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1 + + run_osd $dir 1 --osd-max-backfills 20 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.1) \ + config get osd_max_backfills) + test "$backfills" = '{"osd_max_backfills":"20"}' || return 1 + + CEPH_ARGS="$CEPH_ARGS --osd-max-backfills 30" run_osd $dir 2 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.2) \ + config get osd_max_backfills) + test "$backfills" = '{"osd_max_backfills":"30"}' || return 1 + + teardown $dir || return 1 +} + +####################################################################### + +## +# Shutdown and remove all traces of the osd by the name osd.**id**. +# +# The OSD is shutdown with the TERM signal. It is then removed from +# the auth list, crush map, osd map etc and the files associated with +# it are also removed. +# +# @param dir path name of the environment +# @param id osd identifier +# @return 0 on success, 1 on error +# +function destroy_osd() { + local dir=$1 + local id=$2 + + ceph osd out osd.$id || return 1 + kill_daemons $dir TERM osd.$id || return 1 + ceph osd down osd.$id || return 1 + ceph osd purge osd.$id --yes-i-really-mean-it || return 1 + teardown $dir/$id || return 1 + rm -fr $dir/$id +} + +function test_destroy_osd() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + destroy_osd $dir 0 || return 1 + ! ceph osd dump | grep "osd.$id " || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Run (activate) an osd by the name osd.**id** with data in +# **dir**/**id**. The logs can be found in **dir**/osd.**id**.log, +# the pid file is **dir**/osd.**id**.pid and the admin socket is +# **dir**/**id**/ceph-osd.**id**.asok. +# +# The remaining arguments are passed verbatim to ceph-osd. +# +# Two mandatory arguments must be provided: --fsid and --mon-host +# Instead of adding them to every call to activate_osd, they can be +# set in the CEPH_ARGS environment variable to be read implicitly +# by every ceph command. +# +# The CEPH_CONF variable is expected to be set to /dev/null to +# only rely on arguments for configuration. +# +# The activate_osd function expects a valid OSD data directory +# in **dir**/**id**, either just created via run_osd or re-using +# one left by a previous run of ceph-osd. The ceph-osd daemon is +# run directly on the foreground +# +# The activate_osd function blocks until the monitor reports the osd +# up. If it fails to do so within $TIMEOUT seconds, activate_osd +# fails. +# +# Examples: +# +# CEPH_ARGS="--fsid=$(uuidgen) " +# CEPH_ARGS+="--mon-host=127.0.0.1:7018 " +# activate_osd $dir 0 # activate an osd using the monitor listening on 7018 +# +# @param dir path name of the environment +# @param id osd identifier +# @param ... can be any option valid for ceph-osd +# @return 0 on success, 1 on error +# +function activate_osd() { + local dir=$1 + shift + local id=$1 + shift + local osd_data=$dir/$id + + local ceph_args="$CEPH_ARGS" + ceph_args+=" --osd-failsafe-full-ratio=.99" + ceph_args+=" --osd-journal-size=100" + ceph_args+=" --osd-scrub-load-threshold=2000" + ceph_args+=" --osd-data=$osd_data" + ceph_args+=" --osd-journal=${osd_data}/journal" + ceph_args+=" --chdir=" + ceph_args+=$EXTRA_OPTS + ceph_args+=" --run-dir=$dir" + ceph_args+=" --admin-socket=$(get_asok_path)" + ceph_args+=" --debug-osd=20" + ceph_args+=" --log-file=$dir/\$name.log" + ceph_args+=" --pid-file=$dir/\$name.pid" + ceph_args+=" --osd-max-object-name-len=460" + ceph_args+=" --osd-max-object-namespace-len=64" + ceph_args+=" --enable-experimental-unrecoverable-data-corrupting-features=*" + ceph_args+=" --osd-mclock-profile=high_recovery_ops" + ceph_args+=" " + ceph_args+="$@" + mkdir -p $osd_data + + echo start osd.$id + ceph-osd -i $id $ceph_args & + + [ "$id" = "$(cat $osd_data/whoami)" ] || return 1 + + # If noup is set, then can't wait for this osd + if ceph osd dump --format=json | jq '.flags_set[]' | grep -q '"noup"' ; then + return 0 + fi + wait_for_osd up $id || return 1 +} + +function test_activate_osd() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills) + echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1 + + kill_daemons $dir TERM osd || return 1 + + activate_osd $dir 0 --osd-max-backfills 20 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills) + test "$backfills" = '{"osd_max_backfills":"20"}' || return 1 + + teardown $dir || return 1 +} + +function test_activate_osd_after_mark_down() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills) + echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1 + + kill_daemons $dir TERM osd || return 1 + ceph osd down 0 || return 1 + wait_for_osd down 0 || return 1 + + activate_osd $dir 0 --osd-max-backfills 20 || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills) + test "$backfills" = '{"osd_max_backfills":"20"}' || return 1 + + teardown $dir || return 1 +} + +function test_activate_osd_skip_benchmark() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + # Skip the osd benchmark during first osd bring-up. + run_osd $dir 0 --osd-op-queue=mclock_scheduler \ + --osd-mclock-skip-benchmark=true || return 1 + local max_iops_hdd_def=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_hdd) + local max_iops_ssd_def=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_ssd) + + kill_daemons $dir TERM osd || return 1 + ceph osd down 0 || return 1 + wait_for_osd down 0 || return 1 + + # Skip the osd benchmark during activation as well. Validate that + # the max osd capacities are left unchanged. + activate_osd $dir 0 --osd-op-queue=mclock_scheduler \ + --osd-mclock-skip-benchmark=true || return 1 + local max_iops_hdd_after_boot=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_hdd) + local max_iops_ssd_after_boot=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_max_capacity_iops_ssd) + + test "$max_iops_hdd_def" = "$max_iops_hdd_after_boot" || return 1 + test "$max_iops_ssd_def" = "$max_iops_ssd_after_boot" || return 1 + + teardown $dir || return 1 +} +####################################################################### + +## +# Wait until the OSD **id** is either up or down, as specified by +# **state**. It fails after $TIMEOUT seconds. +# +# @param state either up or down +# @param id osd identifier +# @return 0 on success, 1 on error +# +function wait_for_osd() { + local state=$1 + local id=$2 + + status=1 + for ((i=0; i < $TIMEOUT; i++)); do + echo $i + if ! ceph osd dump | grep "osd.$id $state"; then + sleep 1 + else + status=0 + break + fi + done + return $status +} + +function test_wait_for_osd() { + local dir=$1 + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + wait_for_osd up 0 || return 1 + wait_for_osd up 1 || return 1 + kill_daemons $dir TERM osd.0 || return 1 + wait_for_osd down 0 || return 1 + ( TIMEOUT=1 ; ! wait_for_osd up 0 ) || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Display the list of OSD ids supporting the **objectname** stored in +# **poolname**, as reported by ceph osd map. +# +# @param poolname an existing pool +# @param objectname an objectname (may or may not exist) +# @param STDOUT white space separated list of OSD ids +# @return 0 on success, 1 on error +# +function get_osds() { + local poolname=$1 + local objectname=$2 + + local osds=$(ceph --format json osd map $poolname $objectname 2>/dev/null | \ + jq '.acting | .[]') + # get rid of the trailing space + echo $osds +} + +function test_get_osds() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + create_rbd_pool || return 1 + get_osds rbd GROUP | grep --quiet '^[0-1] [0-1]$' || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Wait for the monitor to form quorum (optionally, of size N) +# +# @param timeout duration (lower-bound) to wait for quorum to be formed +# @param quorumsize size of quorum to wait for +# @return 0 on success, 1 on error +# +function wait_for_quorum() { + local timeout=$1 + local quorumsize=$2 + + if [[ -z "$timeout" ]]; then + timeout=300 + fi + + if [[ -z "$quorumsize" ]]; then + timeout $timeout ceph quorum_status --format=json >&/dev/null || return 1 + return 0 + fi + + no_quorum=1 + wait_until=$((`date +%s` + $timeout)) + while [[ $(date +%s) -lt $wait_until ]]; do + jqfilter='.quorum | length == '$quorumsize + jqinput="$(timeout $timeout ceph quorum_status --format=json 2>/dev/null)" + res=$(echo $jqinput | jq "$jqfilter") + if [[ "$res" == "true" ]]; then + no_quorum=0 + break + fi + done + return $no_quorum +} + +####################################################################### + +## +# Return the PG of supporting the **objectname** stored in +# **poolname**, as reported by ceph osd map. +# +# @param poolname an existing pool +# @param objectname an objectname (may or may not exist) +# @param STDOUT a PG +# @return 0 on success, 1 on error +# +function get_pg() { + local poolname=$1 + local objectname=$2 + + ceph --format json osd map $poolname $objectname 2>/dev/null | jq -r '.pgid' +} + +function test_get_pg() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + get_pg rbd GROUP | grep --quiet '^[0-9]\.[0-9a-f][0-9a-f]*$' || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the value of the **config**, obtained via the config get command +# of the admin socket of **daemon**.**id**. +# +# @param daemon mon or osd +# @param id mon or osd ID +# @param config the configuration variable name as found in config_opts.h +# @param STDOUT the config value +# @return 0 on success, 1 on error +# +function get_config() { + local daemon=$1 + local id=$2 + local config=$3 + + CEPH_ARGS='' \ + ceph --format json daemon $(get_asok_path $daemon.$id) \ + config get $config 2> /dev/null | \ + jq -r ".$config" +} + +function test_get_config() { + local dir=$1 + + # override the default config using command line arg and check it + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + test $(get_config mon a osd_pool_default_size) = 1 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_max_scrubs=3 || return 1 + test $(get_config osd 0 osd_max_scrubs) = 3 || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Set the **config** to specified **value**, via the config set command +# of the admin socket of **daemon**.**id** +# +# @param daemon mon or osd +# @param id mon or osd ID +# @param config the configuration variable name as found in config_opts.h +# @param value the config value +# @return 0 on success, 1 on error +# +function set_config() { + local daemon=$1 + local id=$2 + local config=$3 + local value=$4 + + test $(env CEPH_ARGS='' ceph --format json daemon $(get_asok_path $daemon.$id) \ + config set $config $value 2> /dev/null | \ + jq 'has("success")') == true +} + +function test_set_config() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + test $(get_config mon a ms_crc_header) = true || return 1 + set_config mon a ms_crc_header false || return 1 + test $(get_config mon a ms_crc_header) = false || return 1 + set_config mon a ms_crc_header true || return 1 + test $(get_config mon a ms_crc_header) = true || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the OSD id of the primary OSD supporting the **objectname** +# stored in **poolname**, as reported by ceph osd map. +# +# @param poolname an existing pool +# @param objectname an objectname (may or may not exist) +# @param STDOUT the primary OSD id +# @return 0 on success, 1 on error +# +function get_primary() { + local poolname=$1 + local objectname=$2 + + ceph --format json osd map $poolname $objectname 2>/dev/null | \ + jq '.acting_primary' +} + +function test_get_primary() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + local osd=0 + run_mgr $dir x || return 1 + run_osd $dir $osd || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + test $(get_primary rbd GROUP) = $osd || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the id of any OSD supporting the **objectname** stored in +# **poolname**, as reported by ceph osd map, except the primary. +# +# @param poolname an existing pool +# @param objectname an objectname (may or may not exist) +# @param STDOUT the OSD id +# @return 0 on success, 1 on error +# +function get_not_primary() { + local poolname=$1 + local objectname=$2 + + local primary=$(get_primary $poolname $objectname) + ceph --format json osd map $poolname $objectname 2>/dev/null | \ + jq ".acting | map(select (. != $primary)) | .[0]" +} + +function test_get_not_primary() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local primary=$(get_primary rbd GROUP) + local not_primary=$(get_not_primary rbd GROUP) + test $not_primary != $primary || return 1 + test $not_primary = 0 -o $not_primary = 1 || return 1 + teardown $dir || return 1 +} + +####################################################################### + +function _objectstore_tool_nodown() { + local dir=$1 + shift + local id=$1 + shift + local osd_data=$dir/$id + + ceph-objectstore-tool \ + --data-path $osd_data \ + "$@" || return 1 +} + +function _objectstore_tool_nowait() { + local dir=$1 + shift + local id=$1 + shift + + kill_daemons $dir TERM osd.$id >&2 < /dev/null || return 1 + + _objectstore_tool_nodown $dir $id "$@" || return 1 + activate_osd $dir $id $ceph_osd_args >&2 || return 1 +} + +## +# Run ceph-objectstore-tool against the OSD **id** using the data path +# **dir**. The OSD is killed with TERM prior to running +# ceph-objectstore-tool because access to the data path is +# exclusive. The OSD is restarted after the command completes. The +# objectstore_tool returns after all PG are active+clean again. +# +# @param dir the data path of the OSD +# @param id the OSD id +# @param ... arguments to ceph-objectstore-tool +# @param STDIN the input of ceph-objectstore-tool +# @param STDOUT the output of ceph-objectstore-tool +# @return 0 on success, 1 on error +# +# The value of $ceph_osd_args will be passed to restarted osds +# +function objectstore_tool() { + local dir=$1 + shift + local id=$1 + shift + + _objectstore_tool_nowait $dir $id "$@" || return 1 + wait_for_clean >&2 +} + +function test_objectstore_tool() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + local osd=0 + run_mgr $dir x || return 1 + run_osd $dir $osd || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + rados --pool rbd put GROUP /etc/group || return 1 + objectstore_tool $dir $osd GROUP get-bytes | \ + diff - /etc/group + ! objectstore_tool $dir $osd NOTEXISTS get-bytes || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Predicate checking if there is an ongoing recovery in the +# cluster. If any of the recovering_{keys,bytes,objects}_per_sec +# counters are reported by ceph status, it means recovery is in +# progress. +# +# @return 0 if recovery in progress, 1 otherwise +# +function get_is_making_recovery_progress() { + local recovery_progress + recovery_progress+=".recovering_keys_per_sec + " + recovery_progress+=".recovering_bytes_per_sec + " + recovery_progress+=".recovering_objects_per_sec" + local progress=$(ceph --format json status 2>/dev/null | \ + jq -r ".pgmap | $recovery_progress") + test "$progress" != null +} + +function test_get_is_making_recovery_progress() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + ! get_is_making_recovery_progress || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the number of active PGs in the cluster. A PG is active if +# ceph pg dump pgs reports it both **active** and **clean** and that +# not **stale**. +# +# @param STDOUT the number of active PGs +# @return 0 on success, 1 on error +# +function get_num_active_clean() { + local expression + expression+="select(contains(\"active\") and contains(\"clean\")) | " + expression+="select(contains(\"stale\") | not)" + ceph --format json pg dump pgs 2>/dev/null | \ + jq ".pg_stats | [.[] | .state | $expression] | length" +} + +function test_get_num_active_clean() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local num_active_clean=$(get_num_active_clean) + test "$num_active_clean" = $PG_NUM || return 1 + teardown $dir || return 1 +} + +## +# Return the number of active or peered PGs in the cluster. A PG matches if +# ceph pg dump pgs reports it is either **active** or **peered** and that +# not **stale**. +# +# @param STDOUT the number of active PGs +# @return 0 on success, 1 on error +# +function get_num_active_or_peered() { + local expression + expression+="select(contains(\"active\") or contains(\"peered\")) | " + expression+="select(contains(\"stale\") | not)" + ceph --format json pg dump pgs 2>/dev/null | \ + jq ".pg_stats | [.[] | .state | $expression] | length" +} + +function test_get_num_active_or_peered() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local num_peered=$(get_num_active_or_peered) + test "$num_peered" = $PG_NUM || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the number of PGs in the cluster, according to +# ceph pg dump pgs. +# +# @param STDOUT the number of PGs +# @return 0 on success, 1 on error +# +function get_num_pgs() { + ceph --format json status 2>/dev/null | jq '.pgmap.num_pgs' +} + +function test_get_num_pgs() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local num_pgs=$(get_num_pgs) + test "$num_pgs" -gt 0 || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the OSD ids in use by at least one PG in the cluster (either +# in the up or the acting set), according to ceph pg dump pgs. Every +# OSD id shows as many times as they are used in up and acting sets. +# If an OSD id is in both the up and acting set of a given PG, it will +# show twice. +# +# @param STDOUT a sorted list of OSD ids +# @return 0 on success, 1 on error +# +function get_osd_id_used_by_pgs() { + ceph --format json pg dump pgs 2>/dev/null | jq '.pg_stats | .[] | .up[], .acting[]' | sort +} + +function test_get_osd_id_used_by_pgs() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local osd_ids=$(get_osd_id_used_by_pgs | uniq) + test "$osd_ids" = "0" || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Wait until the OSD **id** shows **count** times in the +# PGs (see get_osd_id_used_by_pgs for more information about +# how OSD ids are counted). +# +# @param id the OSD id +# @param count the number of time it must show in the PGs +# @return 0 on success, 1 on error +# +function wait_osd_id_used_by_pgs() { + local id=$1 + local count=$2 + + status=1 + for ((i=0; i < $TIMEOUT / 5; i++)); do + echo $i + if ! test $(get_osd_id_used_by_pgs | grep -c $id) = $count ; then + sleep 5 + else + status=0 + break + fi + done + return $status +} + +function test_wait_osd_id_used_by_pgs() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + wait_osd_id_used_by_pgs 0 8 || return 1 + ! TIMEOUT=1 wait_osd_id_used_by_pgs 123 5 || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return the date and time of the last completed scrub for **pgid**, +# as reported by ceph pg dump pgs. Note that a repair also sets this +# date. +# +# @param pgid the id of the PG +# @param STDOUT the date and time of the last scrub +# @return 0 on success, 1 on error +# +function get_last_scrub_stamp() { + local pgid=$1 + local sname=${2:-last_scrub_stamp} + ceph --format json pg dump pgs 2>/dev/null | \ + jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname" +} + +function test_get_last_scrub_stamp() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + stamp=$(get_last_scrub_stamp 1.0) + test -n "$stamp" || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Predicate checking if the cluster is clean, i.e. all of its PGs are +# in a clean state (see get_num_active_clean for a definition). +# +# @return 0 if the cluster is clean, 1 otherwise +# +function is_clean() { + num_pgs=$(get_num_pgs) + test $num_pgs != 0 || return 1 + test $(get_num_active_clean) = $num_pgs || return 1 +} + +function test_is_clean() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + is_clean || return 1 + teardown $dir || return 1 +} + +####################################################################### + +calc() { $AWK "BEGIN{print $*}"; } + +## +# Return a list of numbers that are increasingly larger and whose +# total is **timeout** seconds. It can be used to have short sleep +# delay while waiting for an event on a fast machine. But if running +# very slowly the larger delays avoid stressing the machine even +# further or spamming the logs. +# +# @param timeout sum of all delays, in seconds +# @return a list of sleep delays +# +function get_timeout_delays() { + local trace=$(shopt -q -o xtrace && echo true || echo false) + $trace && shopt -u -o xtrace + local timeout=$1 + local first_step=${2:-1} + local max_timeout=${3:-$MAX_TIMEOUT} + + local i + local total="0" + i=$first_step + while test "$(calc $total + $i \<= $timeout)" = "1"; do + echo -n "$(calc $i) " + total=$(calc $total + $i) + i=$(calc $i \* 2) + if [ $max_timeout -gt 0 ]; then + # Did we reach max timeout ? + if [ ${i%.*} -eq ${max_timeout%.*} ] && [ ${i#*.} \> ${max_timeout#*.} ] || [ ${i%.*} -gt ${max_timeout%.*} ]; then + # Yes, so let's cap the max wait time to max + i=$max_timeout + fi + fi + done + if test "$(calc $total \< $timeout)" = "1"; then + echo -n "$(calc $timeout - $total) " + fi + $trace && shopt -s -o xtrace +} + +function test_get_timeout_delays() { + test "$(get_timeout_delays 1)" = "1 " || return 1 + test "$(get_timeout_delays 5)" = "1 2 2 " || return 1 + test "$(get_timeout_delays 6)" = "1 2 3 " || return 1 + test "$(get_timeout_delays 7)" = "1 2 4 " || return 1 + test "$(get_timeout_delays 8)" = "1 2 4 1 " || return 1 + test "$(get_timeout_delays 1 .1)" = "0.1 0.2 0.4 0.3 " || return 1 + test "$(get_timeout_delays 1.5 .1)" = "0.1 0.2 0.4 0.8 " || return 1 + test "$(get_timeout_delays 5 .1)" = "0.1 0.2 0.4 0.8 1.6 1.9 " || return 1 + test "$(get_timeout_delays 6 .1)" = "0.1 0.2 0.4 0.8 1.6 2.9 " || return 1 + test "$(get_timeout_delays 6.3 .1)" = "0.1 0.2 0.4 0.8 1.6 3.2 " || return 1 + test "$(get_timeout_delays 20 .1)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 7.3 " || return 1 + test "$(get_timeout_delays 300 .1 0)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 12.8 25.6 51.2 102.4 95.3 " || return 1 + test "$(get_timeout_delays 300 .1 10)" = "0.1 0.2 0.4 0.8 1.6 3.2 6.4 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 7.3 " || return 1 +} + +####################################################################### + +## +# Wait until the cluster becomes clean or if it does not make progress +# for $WAIT_FOR_CLEAN_TIMEOUT seconds. +# Progress is measured either via the **get_is_making_recovery_progress** +# predicate or if the number of clean PGs changes (as returned by get_num_active_clean) +# +# @return 0 if the cluster is clean, 1 otherwise +# +function wait_for_clean() { + local cmd=$1 + local num_active_clean=-1 + local cur_active_clean + local -a delays=($(get_timeout_delays $WAIT_FOR_CLEAN_TIMEOUT .1)) + local -i loop=0 + + flush_pg_stats || return 1 + while test $(get_num_pgs) == 0 ; do + sleep 1 + done + + while true ; do + # Comparing get_num_active_clean & get_num_pgs is used to determine + # if the cluster is clean. That's almost an inline of is_clean() to + # get more performance by avoiding multiple calls of get_num_active_clean. + cur_active_clean=$(get_num_active_clean) + test $cur_active_clean = $(get_num_pgs) && break + if test $cur_active_clean != $num_active_clean ; then + loop=0 + num_active_clean=$cur_active_clean + elif get_is_making_recovery_progress ; then + loop=0 + elif (( $loop >= ${#delays[*]} )) ; then + ceph report + return 1 + fi + # eval is a no-op if cmd is empty + eval $cmd + sleep ${delays[$loop]} + loop+=1 + done + return 0 +} + +function test_wait_for_clean() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_osd $dir 0 || return 1 + run_mgr $dir x || return 1 + create_rbd_pool || return 1 + ! WAIT_FOR_CLEAN_TIMEOUT=1 wait_for_clean || return 1 + run_osd $dir 1 || return 1 + wait_for_clean || return 1 + teardown $dir || return 1 +} + +## +# Wait until the cluster becomes peered or if it does not make progress +# for $WAIT_FOR_CLEAN_TIMEOUT seconds. +# Progress is measured either via the **get_is_making_recovery_progress** +# predicate or if the number of peered PGs changes (as returned by get_num_active_or_peered) +# +# @return 0 if the cluster is clean, 1 otherwise +# +function wait_for_peered() { + local cmd=$1 + local num_peered=-1 + local cur_peered + local -a delays=($(get_timeout_delays $WAIT_FOR_CLEAN_TIMEOUT .1)) + local -i loop=0 + + flush_pg_stats || return 1 + while test $(get_num_pgs) == 0 ; do + sleep 1 + done + + while true ; do + # Comparing get_num_active_clean & get_num_pgs is used to determine + # if the cluster is clean. That's almost an inline of is_clean() to + # get more performance by avoiding multiple calls of get_num_active_clean. + cur_peered=$(get_num_active_or_peered) + test $cur_peered = $(get_num_pgs) && break + if test $cur_peered != $num_peered ; then + loop=0 + num_peered=$cur_peered + elif get_is_making_recovery_progress ; then + loop=0 + elif (( $loop >= ${#delays[*]} )) ; then + ceph report + return 1 + fi + # eval is a no-op if cmd is empty + eval $cmd + sleep ${delays[$loop]} + loop+=1 + done + return 0 +} + +function test_wait_for_peered() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_osd $dir 0 || return 1 + run_mgr $dir x || return 1 + create_rbd_pool || return 1 + ! WAIT_FOR_CLEAN_TIMEOUT=1 wait_for_clean || return 1 + run_osd $dir 1 || return 1 + wait_for_peered || return 1 + teardown $dir || return 1 +} + + +####################################################################### + +## +# Wait until the cluster's health condition disappeared. +# $TIMEOUT default +# +# @param string to grep for in health detail +# @return 0 if the cluster health doesn't matches request, +# 1 otherwise if after $TIMEOUT seconds health condition remains. +# +function wait_for_health_gone() { + local grepstr=$1 + local -a delays=($(get_timeout_delays $TIMEOUT .1)) + local -i loop=0 + + while ceph health detail | grep "$grepstr" ; do + if (( $loop >= ${#delays[*]} )) ; then + ceph health detail + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done +} + +## +# Wait until the cluster has health condition passed as arg +# again for $TIMEOUT seconds. +# +# @param string to grep for in health detail +# @return 0 if the cluster health matches request, 1 otherwise +# +function wait_for_health() { + local grepstr=$1 + local -a delays=($(get_timeout_delays $TIMEOUT .1)) + local -i loop=0 + + while ! ceph health detail | grep "$grepstr" ; do + if (( $loop >= ${#delays[*]} )) ; then + ceph health detail + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done +} + +## +# Wait until the cluster becomes HEALTH_OK again or if it does not make progress +# for $TIMEOUT seconds. +# +# @return 0 if the cluster is HEALTHY, 1 otherwise +# +function wait_for_health_ok() { + wait_for_health "HEALTH_OK" || return 1 +} + +function test_wait_for_health_ok() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_failsafe_full_ratio=.99 --mon_pg_warn_min_per_osd=0 || return 1 + run_mgr $dir x --mon_pg_warn_min_per_osd=0 || return 1 + # start osd_pool_default_size OSDs + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + kill_daemons $dir TERM osd || return 1 + ceph osd down 0 || return 1 + # expect TOO_FEW_OSDS warning + ! TIMEOUT=1 wait_for_health_ok || return 1 + # resurrect all OSDs + activate_osd $dir 0 || return 1 + activate_osd $dir 1 || return 1 + activate_osd $dir 2 || return 1 + wait_for_health_ok || return 1 + teardown $dir || return 1 +} + + +####################################################################### + +## +# Run repair on **pgid** and wait until it completes. The repair +# function will fail if repair does not complete within $TIMEOUT +# seconds. +# +# @param pgid the id of the PG +# @return 0 on success, 1 on error +# +function repair() { + local pgid=$1 + local last_scrub=$(get_last_scrub_stamp $pgid) + ceph pg repair $pgid + wait_for_scrub $pgid "$last_scrub" +} + +function test_repair() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + repair 1.0 || return 1 + kill_daemons $dir KILL osd || return 1 + ! TIMEOUT=1 repair 1.0 || return 1 + teardown $dir || return 1 +} +####################################################################### + +## +# Run scrub on **pgid** and wait until it completes. The pg_scrub +# function will fail if repair does not complete within $TIMEOUT +# seconds. The pg_scrub is complete whenever the +# **get_last_scrub_stamp** function reports a timestamp different from +# the one stored before starting the scrub. +# +# @param pgid the id of the PG +# @return 0 on success, 1 on error +# +function pg_scrub() { + local pgid=$1 + local last_scrub=$(get_last_scrub_stamp $pgid) + ceph pg scrub $pgid + wait_for_scrub $pgid "$last_scrub" +} + +function pg_deep_scrub() { + local pgid=$1 + local last_scrub=$(get_last_scrub_stamp $pgid last_deep_scrub_stamp) + ceph pg deep-scrub $pgid + wait_for_scrub $pgid "$last_scrub" last_deep_scrub_stamp +} + +function test_pg_scrub() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + pg_scrub 1.0 || return 1 + kill_daemons $dir KILL osd || return 1 + ! TIMEOUT=1 pg_scrub 1.0 || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Run the *command* and expect it to fail (i.e. return a non zero status). +# The output (stderr and stdout) is stored in a temporary file in *dir* +# and is expected to contain the string *expected*. +# +# Return 0 if the command failed and the string was found. Otherwise +# return 1 and cat the full output of the command on stderr for debug. +# +# @param dir temporary directory to store the output +# @param expected string to look for in the output +# @param command ... the command and its arguments +# @return 0 on success, 1 on error +# + +function expect_failure() { + local dir=$1 + shift + local expected="$1" + shift + local success + + if "$@" > $dir/out 2>&1 ; then + success=true + else + success=false + fi + + if $success || ! grep --quiet "$expected" $dir/out ; then + cat $dir/out >&2 + return 1 + else + return 0 + fi +} + +function test_expect_failure() { + local dir=$1 + + setup $dir || return 1 + expect_failure $dir FAIL bash -c 'echo FAIL ; exit 1' || return 1 + # the command did not fail + ! expect_failure $dir FAIL bash -c 'echo FAIL ; exit 0' > $dir/out || return 1 + grep --quiet FAIL $dir/out || return 1 + # the command failed but the output does not contain the expected string + ! expect_failure $dir FAIL bash -c 'echo UNEXPECTED ; exit 1' > $dir/out || return 1 + ! grep --quiet FAIL $dir/out || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Given the *last_scrub*, wait for scrub to happen on **pgid**. It +# will fail if scrub does not complete within $TIMEOUT seconds. The +# repair is complete whenever the **get_last_scrub_stamp** function +# reports a timestamp different from the one given in argument. +# +# @param pgid the id of the PG +# @param last_scrub timestamp of the last scrub for *pgid* +# @return 0 on success, 1 on error +# +function wait_for_scrub() { + local pgid=$1 + local last_scrub="$2" + local sname=${3:-last_scrub_stamp} + + for ((i=0; i < $TIMEOUT; i++)); do + if test "$(get_last_scrub_stamp $pgid $sname)" '>' "$last_scrub" ; then + return 0 + fi + sleep 1 + done + return 1 +} + +function test_wait_for_scrub() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + local pgid=1.0 + ceph pg repair $pgid + local last_scrub=$(get_last_scrub_stamp $pgid) + wait_for_scrub $pgid "$last_scrub" || return 1 + kill_daemons $dir KILL osd || return 1 + last_scrub=$(get_last_scrub_stamp $pgid) + ! TIMEOUT=1 wait_for_scrub $pgid "$last_scrub" || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Return 0 if the erasure code *plugin* is available, 1 otherwise. +# +# @param plugin erasure code plugin +# @return 0 on success, 1 on error +# + +function erasure_code_plugin_exists() { + local plugin=$1 + local status + local grepstr + local s + case `uname` in + FreeBSD) grepstr="Cannot open.*$plugin" ;; + *) grepstr="$plugin.*No such file" ;; + esac + + s=$(ceph osd erasure-code-profile set TESTPROFILE plugin=$plugin 2>&1) + local status=$? + if [ $status -eq 0 ]; then + ceph osd erasure-code-profile rm TESTPROFILE + elif ! echo $s | grep --quiet "$grepstr" ; then + status=1 + # display why the string was rejected. + echo $s + fi + return $status +} + +function test_erasure_code_plugin_exists() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + erasure_code_plugin_exists jerasure || return 1 + ! erasure_code_plugin_exists FAKE || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Display all log files from **dir** on stdout. +# +# @param dir directory in which all data is stored +# + +function display_logs() { + local dir=$1 + + find $dir -maxdepth 1 -name '*.log' | \ + while read file ; do + echo "======================= $file" + cat $file + done +} + +function test_display_logs() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + kill_daemons $dir || return 1 + display_logs $dir > $dir/log.out + grep --quiet mon.a.log $dir/log.out || return 1 + teardown $dir || return 1 +} + +####################################################################### +## +# Spawn a command in background and save the pid in the variable name +# passed in argument. To make the output reading easier, the output is +# prepend with the process id. +# +# Example: +# pids1="" +# run_in_background pids1 bash -c 'sleep 1; exit 1' +# +# @param pid_variable the variable name (not value) where the pids will be stored +# @param ... the command to execute +# @return only the pid_variable output should be considered and used with **wait_background** +# +function run_in_background() { + local pid_variable=$1 + shift + # Execute the command and prepend the output with its pid + # We enforce to return the exit status of the command and not the sed one. + ("$@" |& sed 's/^/'$BASHPID': /'; return "${PIPESTATUS[0]}") >&2 & + eval "$pid_variable+=\" $!\"" +} + +function save_stdout { + local out="$1" + shift + "$@" > "$out" +} + +function test_run_in_background() { + local pids + run_in_background pids sleep 1 + run_in_background pids sleep 1 + test $(echo $pids | wc -w) = 2 || return 1 + wait $pids || return 1 +} + +####################################################################### +## +# Wait for pids running in background to complete. +# This function is usually used after a **run_in_background** call +# Example: +# pids1="" +# run_in_background pids1 bash -c 'sleep 1; exit 1' +# wait_background pids1 +# +# @param pids The variable name that contains the active PIDS. Set as empty at then end of the function. +# @return returns 1 if at least one process exits in error unless returns 0 +# +function wait_background() { + # We extract the PIDS from the variable name + pids=${!1} + + return_code=0 + for pid in $pids; do + if ! wait $pid; then + # If one process failed then return 1 + return_code=1 + fi + done + + # We empty the variable reporting that all process ended + eval "$1=''" + + return $return_code +} + + +function test_wait_background() { + local pids="" + run_in_background pids bash -c "sleep 1; exit 1" + run_in_background pids bash -c "sleep 2; exit 0" + wait_background pids + if [ $? -ne 1 ]; then return 1; fi + + run_in_background pids bash -c "sleep 1; exit 0" + run_in_background pids bash -c "sleep 2; exit 0" + wait_background pids + if [ $? -ne 0 ]; then return 1; fi + + if [ ! -z "$pids" ]; then return 1; fi +} + +function flush_pg_stats() +{ + local timeout=${1:-$TIMEOUT} + + ids=`ceph osd ls` + seqs='' + for osd in $ids; do + seq=`ceph tell osd.$osd flush_pg_stats` + if test -z "$seq" + then + continue + fi + seqs="$seqs $osd-$seq" + done + + for s in $seqs; do + osd=`echo $s | cut -d - -f 1` + seq=`echo $s | cut -d - -f 2` + echo "waiting osd.$osd seq $seq" + while test $(ceph osd last-stat-seq $osd) -lt $seq; do + sleep 1 + if [ $((timeout--)) -eq 0 ]; then + return 1 + fi + done + done +} + +function test_flush_pg_stats() +{ + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + rados -p rbd put obj /etc/group + flush_pg_stats || return 1 + local jq_filter='.pools | .[] | select(.name == "rbd") | .stats' + stored=`ceph df detail --format=json | jq "$jq_filter.stored"` + stored_raw=`ceph df detail --format=json | jq "$jq_filter.stored_raw"` + test $stored -gt 0 || return 1 + test $stored == $stored_raw || return 1 + teardown $dir +} + +######################################################################## +## +# Get the current op scheduler enabled on an osd by reading the +# osd_op_queue config option +# +# Example: +# get_op_scheduler $osdid +# +# @param id the id of the OSD +# @return the name of the op scheduler enabled for the OSD +# +function get_op_scheduler() { + local id=$1 + + get_config osd $id osd_op_queue +} + +function test_get_op_scheduler() { + local dir=$1 + + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 --osd_op_queue=wpq || return 1 + test $(get_op_scheduler 0) = "wpq" || return 1 + + run_osd $dir 1 --osd_op_queue=mclock_scheduler || return 1 + test $(get_op_scheduler 1) = "mclock_scheduler" || return 1 + teardown $dir || return 1 +} + +####################################################################### + +## +# Call the **run** function (which must be defined by the caller) with +# the **dir** argument followed by the caller argument list. +# +# If the **run** function returns on error, all logs found in **dir** +# are displayed for diagnostic purposes. +# +# **teardown** function is called when the **run** function returns +# (on success or on error), to cleanup leftovers. The CEPH_CONF is set +# to /dev/null and CEPH_ARGS is unset so that the tests are protected from +# external interferences. +# +# It is the responsibility of the **run** function to call the +# **setup** function to prepare the test environment (create a temporary +# directory etc.). +# +# The shell is required (via PS4) to display the function and line +# number whenever a statement is executed to help debugging. +# +# @param dir directory in which all data is stored +# @param ... arguments passed transparently to **run** +# @return 0 on success, 1 on error +# +function main() { + local dir=td/$1 + shift + + shopt -s -o xtrace + PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' + + export PATH=.:$PATH # make sure program from sources are preferred + export PYTHONWARNINGS=ignore + export CEPH_CONF=/dev/null + unset CEPH_ARGS + + local code + if run $dir "$@" ; then + code=0 + else + code=1 + fi + teardown $dir $code || return 1 + return $code +} + +####################################################################### + +function run_tests() { + shopt -s -o xtrace + PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' + + export .:$PATH # make sure program from sources are preferred + + export CEPH_MON="127.0.0.1:7109" # git grep '\<7109\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+=" --fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + export CEPH_CONF=/dev/null + + local funcs=${@:-$(set | sed -n -e 's/^\(test_[0-9a-z_]*\) .*/\1/p')} + local dir=td/ceph-helpers + + for func in $funcs ; do + if ! $func $dir; then + teardown $dir 1 + return 1 + fi + done +} + +if test "$1" = TESTS ; then + shift + run_tests "$@" + exit $? +fi + +# NOTE: +# jq only support --exit-status|-e from version 1.4 forwards, which makes +# returning on error waaaay prettier and straightforward. +# However, the current automated upstream build is running with v1.3, +# which has no idea what -e is. Hence the convoluted error checking we +# need. Sad. +# The next time someone changes this code, please check if v1.4 is now +# a thing, and, if so, please change these to use -e. Thanks. + +# jq '.all.supported | select([.[] == "foo"] | any)' +function jq_success() { + input="$1" + filter="$2" + expects="\"$3\"" + + in_escaped=$(printf %s "$input" | sed "s/'/'\\\\''/g") + filter_escaped=$(printf %s "$filter" | sed "s/'/'\\\\''/g") + + ret=$(echo "$in_escaped" | jq "$filter_escaped") + if [[ "$ret" == "true" ]]; then + return 0 + elif [[ -n "$expects" ]]; then + if [[ "$ret" == "$expects" ]]; then + return 0 + fi + fi + return 1 + input=$1 + filter=$2 + expects="$3" + + ret="$(echo $input | jq \"$filter\")" + if [[ "$ret" == "true" ]]; then + return 0 + elif [[ -n "$expects" && "$ret" == "$expects" ]]; then + return 0 + fi + return 1 +} + +function inject_eio() { + local pooltype=$1 + shift + local which=$1 + shift + local poolname=$1 + shift + local objname=$1 + shift + local dir=$1 + shift + local shard_id=$1 + shift + + local -a initial_osds=($(get_osds $poolname $objname)) + local osd_id=${initial_osds[$shard_id]} + if [ "$pooltype" != "ec" ]; then + shard_id="" + fi + type=$(cat $dir/$osd_id/type) + set_config osd $osd_id ${type}_debug_inject_read_err true || return 1 + local loop=0 + while ( CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.$osd_id) \ + inject${which}err $poolname $objname $shard_id | grep -q Invalid ); do + loop=$(expr $loop + 1) + if [ $loop = "10" ]; then + return 1 + fi + sleep 1 + done +} + +function multidiff() { + if ! diff $@ ; then + if [ "$DIFFCOLOPTS" = "" ]; then + return 1 + fi + diff $DIFFCOLOPTS $@ + fi +} + +function create_ec_pool() { + local pool_name=$1 + shift + local allow_overwrites=$1 + shift + + ceph osd erasure-code-profile set myprofile crush-failure-domain=osd "$@" || return 1 + + create_pool "$poolname" 1 1 erasure myprofile || return 1 + + if [ "$allow_overwrites" = "true" ]; then + ceph osd pool set "$poolname" allow_ec_overwrites true || return 1 + fi + + wait_for_clean || return 1 + return 0 +} + +# Local Variables: +# compile-command: "cd ../../src ; make -j4 && ../qa/standalone/ceph-helpers.sh TESTS # test_get_config" +# End: diff --git a/qa/standalone/crush/crush-choose-args.sh b/qa/standalone/crush/crush-choose-args.sh new file mode 100755 index 000000000..ee548db12 --- /dev/null +++ b/qa/standalone/crush/crush-choose-args.sh @@ -0,0 +1,243 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7131" # git grep '\<7131\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--crush-location=root=default,host=HOST " + CEPH_ARGS+="--osd-crush-initial-weight=3 " + # + # Disable device auto class feature for now. + # The device class is non-deterministic and will + # crash the crushmap comparison below. + # + CEPH_ARGS+="--osd-class-update-on-start=false " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_choose_args_update() { + # + # adding a weighted OSD updates the weight up to the top + # + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph osd set-require-min-compat-client luminous + ceph osd getcrushmap > $dir/map || return 1 + crushtool -d $dir/map -o $dir/map.txt || return 1 + sed -i -e '/end crush map/d' $dir/map.txt + cat >> $dir/map.txt <<EOF +# choose_args +choose_args 0 { + { + bucket_id -1 + weight_set [ + [ 2.00000 ] + [ 2.00000 ] + ] + ids [ -10 ] + } + { + bucket_id -2 + weight_set [ + [ 2.00000 ] + [ 2.00000 ] + ] + ids [ -20 ] + } +} + +# end crush map +EOF + crushtool -c $dir/map.txt -o $dir/map-new || return 1 + ceph osd setcrushmap -i $dir/map-new || return 1 + ceph osd crush tree + + run_osd $dir 1 || return 1 + ceph osd crush tree + ceph osd getcrushmap > $dir/map-one-more || return 1 + crushtool -d $dir/map-one-more -o $dir/map-one-more.txt || return 1 + cat $dir/map-one-more.txt + diff -u $dir/map-one-more.txt $CEPH_ROOT/src/test/crush/crush-choose-args-expected-one-more-3.txt || return 1 + + destroy_osd $dir 1 || return 1 + ceph osd crush tree + ceph osd getcrushmap > $dir/map-one-less || return 1 + crushtool -d $dir/map-one-less -o $dir/map-one-less.txt || return 1 + diff -u $dir/map-one-less.txt $dir/map.txt || return 1 +} + +function TEST_no_update_weight_set() { + # + # adding a zero weight OSD does not update the weight set at all + # + local dir=$1 + + ORIG_CEPH_ARGS="$CEPH_ARGS" + CEPH_ARGS+="--osd-crush-update-weight-set=false " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph osd set-require-min-compat-client luminous + ceph osd crush tree + ceph osd getcrushmap > $dir/map || return 1 + crushtool -d $dir/map -o $dir/map.txt || return 1 + sed -i -e '/end crush map/d' $dir/map.txt + cat >> $dir/map.txt <<EOF +# choose_args +choose_args 0 { + { + bucket_id -1 + weight_set [ + [ 2.00000 ] + [ 1.00000 ] + ] + ids [ -10 ] + } + { + bucket_id -2 + weight_set [ + [ 2.00000 ] + [ 1.00000 ] + ] + ids [ -20 ] + } +} + +# end crush map +EOF + crushtool -c $dir/map.txt -o $dir/map-new || return 1 + ceph osd setcrushmap -i $dir/map-new || return 1 + ceph osd crush tree + + + run_osd $dir 1 || return 1 + ceph osd crush tree + ceph osd getcrushmap > $dir/map-one-more || return 1 + crushtool -d $dir/map-one-more -o $dir/map-one-more.txt || return 1 + cat $dir/map-one-more.txt + diff -u $dir/map-one-more.txt $CEPH_ROOT/src/test/crush/crush-choose-args-expected-one-more-0.txt || return 1 + + destroy_osd $dir 1 || return 1 + ceph osd crush tree + ceph osd getcrushmap > $dir/map-one-less || return 1 + crushtool -d $dir/map-one-less -o $dir/map-one-less.txt || return 1 + diff -u $dir/map-one-less.txt $dir/map.txt || return 1 + + CEPH_ARGS="$ORIG_CEPH_ARGS" +} + +function TEST_reweight() { + # reweight and reweight-compat behave appropriately + local dir=$1 + + ORIG_CEPH_ARGS="$CEPH_ARGS" + CEPH_ARGS+="--osd-crush-update-weight-set=false " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + + ceph osd crush weight-set create-compat || return 1 + ceph osd crush tree + + ceph osd crush weight-set reweight-compat osd.0 2 || return 1 + ceph osd crush tree + ceph osd crush tree | grep host | grep '6.00000 5.00000' || return 1 + + run_osd $dir 2 || return 1 + ceph osd crush tree + ceph osd crush tree | grep host | grep '9.00000 5.00000' || return 1 + + ceph osd crush reweight osd.2 4 + ceph osd crush tree + ceph osd crush tree | grep host | grep '10.00000 5.00000' || return 1 + + ceph osd crush weight-set reweight-compat osd.2 4 + ceph osd crush tree + ceph osd crush tree | grep host | grep '10.00000 9.00000' || return 1 +} + +function TEST_move_bucket() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + + ceph osd crush weight-set create-compat || return 1 + ceph osd crush weight-set reweight-compat osd.0 2 || return 1 + ceph osd crush weight-set reweight-compat osd.1 2 || return 1 + ceph osd crush tree + ceph osd crush tree | grep HOST | grep '6.00000 4.00000' || return 1 + + # moving a bucket adjusts the weights + ceph osd crush add-bucket RACK rack root=default || return 1 + ceph osd crush move HOST rack=RACK || return 1 + ceph osd crush tree + ceph osd crush tree | grep HOST | grep '6.00000 4.00000' || return 1 + ceph osd crush tree | grep RACK | grep '6.00000 4.00000' || return 1 + + # weight-set reweight adjusts containing buckets + ceph osd crush weight-set reweight-compat osd.0 1 || return 1 + ceph osd crush tree + ceph osd crush tree | grep HOST | grep '6.00000 3.00000' || return 1 + ceph osd crush tree | grep RACK | grep '6.00000 3.00000' || return 1 + + # moving a leaf resets its weight-set to the canonical weight... + ceph config set mon osd_crush_update_weight_set true || return 1 + ceph osd crush add-bucket FOO host root=default || return 1 + ceph osd crush move osd.0 host=FOO || return 1 + ceph osd crush tree + ceph osd crush tree | grep osd.0 | grep '3.00000 3.00000' || return 1 + ceph osd crush tree | grep HOST | grep '3.00000 2.00000' || return 1 + ceph osd crush tree | grep RACK | grep '3.00000 2.00000' || return 1 + + # ...or to zero. + ceph config set mon osd_crush_update_weight_set false || return 1 + ceph osd crush move osd.1 host=FOO || return 1 + ceph osd crush tree + ceph osd crush tree | grep osd.0 | grep '3.00000 3.00000' || return 1 + ceph osd crush tree | grep osd.1 | grep '3.00000 0' || return 1 + ceph osd crush tree | grep FOO | grep '6.00000 3.00000' || return 1 +} + +main crush-choose-args "$@" + +# Local Variables: +# compile-command: "cd ../../../build ; ln -sf ../src/ceph-disk/ceph_disk/main.py bin/ceph-disk && make -j4 && ../src/test/crush/crush-choose-args.sh" +# End: diff --git a/qa/standalone/crush/crush-classes.sh b/qa/standalone/crush/crush-classes.sh new file mode 100755 index 000000000..558aabe6d --- /dev/null +++ b/qa/standalone/crush/crush-classes.sh @@ -0,0 +1,265 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7130" # git grep '\<7130\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # + # Disable auto-class, so we can inject device class manually below + # + CEPH_ARGS+="--osd-class-update-on-start=false " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function add_something() { + local dir=$1 + local obj=${2:-SOMETHING} + + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool rbd put $obj $dir/ORIGINAL || return 1 +} + +function get_osds_up() { + local poolname=$1 + local objectname=$2 + + local osds=$(ceph --format xml osd map $poolname $objectname 2>/dev/null | \ + $XMLSTARLET sel -t -m "//up/osd" -v . -o ' ') + # get rid of the trailing space + echo $osds +} + +function TEST_reweight_vs_classes() { + local dir=$1 + + # CrushWrapper::update_item (and ceph osd crush set) must rebuild the shadow + # tree too. https://tracker.ceph.com/issues/48065 + + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd crush set-device-class ssd osd.0 || return 1 + ceph osd crush class ls-osd ssd | grep 0 || return 1 + ceph osd crush set-device-class ssd osd.1 || return 1 + ceph osd crush class ls-osd ssd | grep 1 || return 1 + + ceph osd crush reweight osd.0 1 + + h=`hostname -s` + ceph osd crush dump | jq ".buckets[] | select(.name==\"$h\") | .items[0].weight" | grep 65536 + ceph osd crush dump | jq ".buckets[] | select(.name==\"$h~ssd\") | .items[0].weight" | grep 65536 + + ceph osd crush set 0 2 host=$h + + ceph osd crush dump | jq ".buckets[] | select(.name==\"$h\") | .items[0].weight" | grep 131072 + ceph osd crush dump | jq ".buckets[] | select(.name==\"$h~ssd\") | .items[0].weight" | grep 131072 +} + +function TEST_classes() { + local dir=$1 + + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + create_rbd_pool || return 1 + + test "$(get_osds_up rbd SOMETHING)" == "1 2 0" || return 1 + add_something $dir SOMETHING || return 1 + + # + # osd.0 has class ssd and the rule is modified + # to only take ssd devices. + # + ceph osd getcrushmap > $dir/map || return 1 + crushtool -d $dir/map -o $dir/map.txt || return 1 + ${SED} -i \ + -e '/device 0 osd.0/s/$/ class ssd/' \ + -e '/step take default/s/$/ class ssd/' \ + $dir/map.txt || return 1 + crushtool -c $dir/map.txt -o $dir/map-new || return 1 + ceph osd setcrushmap -i $dir/map-new || return 1 + + # + # There can only be one mapping since there only is + # one device with ssd class. + # + ok=false + for delay in 2 4 8 16 32 64 128 256 ; do + if test "$(get_osds_up rbd SOMETHING_ELSE)" == "0" ; then + ok=true + break + fi + sleep $delay + ceph osd dump # for debugging purposes + ceph pg dump # for debugging purposes + done + $ok || return 1 + # + # Writing keeps working because the pool is min_size 1 by + # default. + # + add_something $dir SOMETHING_ELSE || return 1 + + # + # Sanity check that the rule indeed has ssd + # generated bucket with a name including ~ssd. + # + ceph osd crush dump | grep -q '~ssd' || return 1 +} + +function TEST_set_device_class() { + local dir=$1 + + TEST_classes $dir || return 1 + + ceph osd crush set-device-class ssd osd.0 || return 1 + ceph osd crush class ls-osd ssd | grep 0 || return 1 + ceph osd crush set-device-class ssd osd.1 || return 1 + ceph osd crush class ls-osd ssd | grep 1 || return 1 + ceph osd crush set-device-class ssd 0 1 || return 1 # should be idempotent + + ok=false + for delay in 2 4 8 16 32 64 128 256 ; do + if test "$(get_osds_up rbd SOMETHING_ELSE)" == "0 1" ; then + ok=true + break + fi + sleep $delay + ceph osd crush dump + ceph osd dump # for debugging purposes + ceph pg dump # for debugging purposes + done + $ok || return 1 +} + +function TEST_mon_classes() { + local dir=$1 + + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + create_rbd_pool || return 1 + + test "$(get_osds_up rbd SOMETHING)" == "1 2 0" || return 1 + add_something $dir SOMETHING || return 1 + + # test create and remove class + ceph osd crush class create CLASS || return 1 + ceph osd crush class create CLASS || return 1 # idempotent + ceph osd crush class ls | grep CLASS || return 1 + ceph osd crush class rename CLASS TEMP || return 1 + ceph osd crush class ls | grep TEMP || return 1 + ceph osd crush class rename TEMP CLASS || return 1 + ceph osd crush class ls | grep CLASS || return 1 + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd crush-device-class=CLASS || return 1 + expect_failure $dir EBUSY ceph osd crush class rm CLASS || return 1 + ceph osd erasure-code-profile rm myprofile || return 1 + ceph osd crush class rm CLASS || return 1 + ceph osd crush class rm CLASS || return 1 # test idempotence + + # test rm-device-class + ceph osd crush set-device-class aaa osd.0 || return 1 + ceph osd tree | grep -q 'aaa' || return 1 + ceph osd crush dump | grep -q '~aaa' || return 1 + ceph osd crush tree --show-shadow | grep -q '~aaa' || return 1 + ceph osd crush set-device-class bbb osd.1 || return 1 + ceph osd tree | grep -q 'bbb' || return 1 + ceph osd crush dump | grep -q '~bbb' || return 1 + ceph osd crush tree --show-shadow | grep -q '~bbb' || return 1 + ceph osd crush set-device-class ccc osd.2 || return 1 + ceph osd tree | grep -q 'ccc' || return 1 + ceph osd crush dump | grep -q '~ccc' || return 1 + ceph osd crush tree --show-shadow | grep -q '~ccc' || return 1 + ceph osd crush rm-device-class 0 || return 1 + ceph osd tree | grep -q 'aaa' && return 1 + ceph osd crush class ls | grep -q 'aaa' && return 1 # class 'aaa' should gone + ceph osd crush rm-device-class 1 || return 1 + ceph osd tree | grep -q 'bbb' && return 1 + ceph osd crush class ls | grep -q 'bbb' && return 1 # class 'bbb' should gone + ceph osd crush rm-device-class 2 || return 1 + ceph osd tree | grep -q 'ccc' && return 1 + ceph osd crush class ls | grep -q 'ccc' && return 1 # class 'ccc' should gone + ceph osd crush set-device-class asdf all || return 1 + ceph osd tree | grep -q 'asdf' || return 1 + ceph osd crush dump | grep -q '~asdf' || return 1 + ceph osd crush tree --show-shadow | grep -q '~asdf' || return 1 + ceph osd crush rule create-replicated asdf-rule default host asdf || return 1 + ceph osd crush rm-device-class all || return 1 + ceph osd tree | grep -q 'asdf' && return 1 + ceph osd crush class ls | grep -q 'asdf' || return 1 # still referenced by asdf-rule + + ceph osd crush set-device-class abc osd.2 || return 1 + ceph osd crush move osd.2 root=foo rack=foo-rack host=foo-host || return 1 + out=`ceph osd tree |awk '$1 == 2 && $2 == "abc" {print $0}'` + if [ "$out" == "" ]; then + return 1 + fi + + # verify 'crush move' too + ceph osd crush dump | grep -q 'foo~abc' || return 1 + ceph osd crush tree --show-shadow | grep -q 'foo~abc' || return 1 + ceph osd crush dump | grep -q 'foo-rack~abc' || return 1 + ceph osd crush tree --show-shadow | grep -q 'foo-rack~abc' || return 1 + ceph osd crush dump | grep -q 'foo-host~abc' || return 1 + ceph osd crush tree --show-shadow | grep -q 'foo-host~abc' || return 1 + ceph osd crush rm-device-class osd.2 || return 1 + # restore class, so we can continue to test create-replicated + ceph osd crush set-device-class abc osd.2 || return 1 + + ceph osd crush rule create-replicated foo-rule foo host abc || return 1 + + # test set-device-class implicitly change class + ceph osd crush set-device-class hdd osd.0 || return 1 + expect_failure $dir EBUSY ceph osd crush set-device-class nvme osd.0 || return 1 + + # test class rename + ceph osd crush rm-device-class all || return 1 + ceph osd crush set-device-class class_1 all || return 1 + ceph osd crush class ls | grep 'class_1' || return 1 + ceph osd crush tree --show-shadow | grep 'class_1' || return 1 + ceph osd crush rule create-replicated class_1_rule default host class_1 || return 1 + ceph osd crush class rename class_1 class_2 + ceph osd crush class rename class_1 class_2 # idempotent + ceph osd crush class ls | grep 'class_1' && return 1 + ceph osd crush tree --show-shadow | grep 'class_1' && return 1 + ceph osd crush class ls | grep 'class_2' || return 1 + ceph osd crush tree --show-shadow | grep 'class_2' || return 1 +} + +main crush-classes "$@" + +# Local Variables: +# compile-command: "cd ../../../build ; ln -sf ../src/ceph-disk/ceph_disk/main.py bin/ceph-disk && make -j4 && ../src/test/crush/crush-classes.sh" +# End: diff --git a/qa/standalone/erasure-code/test-erasure-code-plugins.sh b/qa/standalone/erasure-code/test-erasure-code-plugins.sh new file mode 100755 index 000000000..b5648d472 --- /dev/null +++ b/qa/standalone/erasure-code/test-erasure-code-plugins.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +set -x + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +arch=$(uname -m) + +case $arch in + i[[3456]]86*|x86_64*|amd64*) + legacy_jerasure_plugins=(jerasure_generic jerasure_sse3 jerasure_sse4) + legacy_shec_plugins=(shec_generic shec_sse3 shec_sse4) + plugins=(jerasure shec lrc isa) + ;; + aarch64*|arm*) + legacy_jerasure_plugins=(jerasure_generic jerasure_neon) + legacy_shec_plugins=(shec_generic shec_neon) + plugins=(jerasure shec lrc) + ;; + *) + echo "unsupported platform ${arch}." + return 1 + ;; +esac + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:17110" # git grep '\<17110\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +function TEST_preload_warning() { + local dir=$1 + + for plugin in ${legacy_jerasure_plugins[*]} ${legacy_shec_plugins[*]}; do + setup $dir || return 1 + run_mon $dir a --osd_erasure_code_plugins="${plugin}" || return 1 + run_mgr $dir x || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + run_osd $dir 0 --osd_erasure_code_plugins="${plugin}" || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep "WARNING: osd_erasure_code_plugins contains plugin ${plugin}" $dir/mon.a.log || return 1 + grep "WARNING: osd_erasure_code_plugins contains plugin ${plugin}" $dir/osd.0.log || return 1 + teardown $dir || return 1 + done + return 0 +} + +function TEST_preload_no_warning() { + local dir=$1 + + for plugin in ${plugins[*]}; do + setup $dir || return 1 + run_mon $dir a --osd_erasure_code_plugins="${plugin}" || return 1 + run_mgr $dir x || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + run_osd $dir 0 --osd_erasure_code_plugins="${plugin}" || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + ! grep "WARNING: osd_erasure_code_plugins contains plugin" $dir/mon.a.log || return 1 + ! grep "WARNING: osd_erasure_code_plugins contains plugin" $dir/osd.0.log || return 1 + teardown $dir || return 1 + done + + return 0 +} + +function TEST_preload_no_warning_default() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + ! grep "WARNING: osd_erasure_code_plugins" $dir/mon.a.log || return 1 + ! grep "WARNING: osd_erasure_code_plugins" $dir/osd.0.log || return 1 + teardown $dir || return 1 + + return 0 +} + +function TEST_ec_profile_warning() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 2) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + for plugin in ${legacy_jerasure_plugins[*]}; do + ceph osd erasure-code-profile set prof-${plugin} crush-failure-domain=osd technique=reed_sol_van plugin=${plugin} || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep "WARNING: erasure coding profile prof-${plugin} uses plugin ${plugin}" $dir/mon.a.log || return 1 + done + + for plugin in ${legacy_shec_plugins[*]}; do + ceph osd erasure-code-profile set prof-${plugin} crush-failure-domain=osd plugin=${plugin} || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep "WARNING: erasure coding profile prof-${plugin} uses plugin ${plugin}" $dir/mon.a.log || return 1 + done + + teardown $dir || return 1 +} + +main test-erasure-code-plugins "$@" diff --git a/qa/standalone/erasure-code/test-erasure-code.sh b/qa/standalone/erasure-code/test-erasure-code.sh new file mode 100755 index 000000000..b93151233 --- /dev/null +++ b/qa/standalone/erasure-code/test-erasure-code.sh @@ -0,0 +1,337 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7101" # git grep '\<7101\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --mon-osd-prime-pg-temp=false" + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + # check that erasure code plugins are preloaded + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1 + for id in $(seq 0 10) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + # check that erasure code plugins are preloaded + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1 + create_erasure_coded_pool ecpool || return 1 + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done + + delete_pool ecpool || return 1 + teardown $dir || return 1 +} + +function create_erasure_coded_pool() { + local poolname=$1 + + ceph osd erasure-code-profile set myprofile \ + crush-failure-domain=osd || return 1 + create_pool $poolname 12 12 erasure myprofile \ + || return 1 + wait_for_clean || return 1 +} + +function rados_put_get() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + + + for marker in AAA BBB CCCC DDDD ; do + printf "%*s" 1024 $marker + done > $dir/ORIGINAL + + # + # get and put an object, compare they are equal + # + rados --pool $poolname put $objname $dir/ORIGINAL || return 1 + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY + + # + # take out an OSD used to store the object and + # check the object can still be retrieved, which implies + # recovery + # + local -a initial_osds=($(get_osds $poolname $objname)) + local last=$((${#initial_osds[@]} - 1)) + ceph osd out ${initial_osds[$last]} || return 1 + + # give the osdmap up to 5 seconds to refresh + sleep 5 + ! get_osds $poolname $objname | grep '\<'${initial_osds[$last]}'\>' || return 1 + + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + ceph osd in ${initial_osds[$last]} || return 1 + + rm $dir/ORIGINAL +} + +function rados_osds_out_in() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + + + for marker in FFFF GGGG HHHH IIII ; do + printf "%*s" 1024 $marker + done > $dir/ORIGINAL + + # + # get and put an object, compare they are equal + # + rados --pool $poolname put $objname $dir/ORIGINAL || return 1 + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY + + # + # take out two OSDs used to store the object, wait for the cluster + # to be clean (i.e. all PG are clean and active) again which + # implies the PG have been moved to use the remaining OSDs. Check + # the object can still be retrieved. + # + wait_for_clean || return 1 + local osds_list=$(get_osds $poolname $objname) + local -a osds=($osds_list) + for osd in 0 1 ; do + ceph osd out ${osds[$osd]} || return 1 + done + wait_for_clean || return 1 + # + # verify the object is no longer mapped to the osds that are out + # + for osd in 0 1 ; do + ! get_osds $poolname $objname | grep '\<'${osds[$osd]}'\>' || return 1 + done + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + # + # bring the osds back in, , wait for the cluster + # to be clean (i.e. all PG are clean and active) again which + # implies the PG go back to using the same osds as before + # + for osd in 0 1 ; do + ceph osd in ${osds[$osd]} || return 1 + done + wait_for_clean || return 1 + test "$osds_list" = "$(get_osds $poolname $objname)" || return 1 + rm $dir/ORIGINAL +} + +function TEST_rados_put_get_lrc_advanced() { + local dir=$1 + local poolname=pool-lrc-a + local profile=profile-lrc-a + + ceph osd erasure-code-profile set $profile \ + plugin=lrc \ + mapping=DD_ \ + crush-steps='[ [ "chooseleaf", "osd", 0 ] ]' \ + layers='[ [ "DDc", "" ] ]' || return 1 + create_pool $poolname 12 12 erasure $profile \ + || return 1 + + rados_put_get $dir $poolname || return 1 + + delete_pool $poolname + ceph osd erasure-code-profile rm $profile +} + +function TEST_rados_put_get_lrc_kml() { + local dir=$1 + local poolname=pool-lrc + local profile=profile-lrc + + ceph osd erasure-code-profile set $profile \ + plugin=lrc \ + k=4 m=2 l=3 \ + crush-failure-domain=osd || return 1 + create_pool $poolname 12 12 erasure $profile \ + || return 1 + + rados_put_get $dir $poolname || return 1 + + delete_pool $poolname + ceph osd erasure-code-profile rm $profile +} + +function TEST_rados_put_get_isa() { + if ! erasure_code_plugin_exists isa ; then + echo "SKIP because plugin isa has not been built" + return 0 + fi + local dir=$1 + local poolname=pool-isa + + ceph osd erasure-code-profile set profile-isa \ + plugin=isa \ + crush-failure-domain=osd || return 1 + create_pool $poolname 1 1 erasure profile-isa \ + || return 1 + + rados_put_get $dir $poolname || return 1 + + delete_pool $poolname +} + +function TEST_rados_put_get_jerasure() { + local dir=$1 + + rados_put_get $dir ecpool || return 1 + + local poolname=pool-jerasure + local profile=profile-jerasure + + ceph osd erasure-code-profile set $profile \ + plugin=jerasure \ + k=4 m=2 \ + crush-failure-domain=osd || return 1 + create_pool $poolname 12 12 erasure $profile \ + || return 1 + + rados_put_get $dir $poolname || return 1 + rados_osds_out_in $dir $poolname || return 1 + + delete_pool $poolname + ceph osd erasure-code-profile rm $profile +} + +function TEST_rados_put_get_shec() { + local dir=$1 + + local poolname=pool-shec + local profile=profile-shec + + ceph osd erasure-code-profile set $profile \ + plugin=shec \ + k=2 m=1 c=1 \ + crush-failure-domain=osd || return 1 + create_pool $poolname 12 12 erasure $profile \ + || return 1 + + rados_put_get $dir $poolname || return 1 + + delete_pool $poolname + ceph osd erasure-code-profile rm $profile +} + +function TEST_alignment_constraints() { + local payload=ABC + echo "$payload" > $dir/ORIGINAL + # + # Verify that the rados command enforces alignment constraints + # imposed by the stripe width + # See http://tracker.ceph.com/issues/8622 + # + local stripe_unit=$(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit) + eval local $(ceph osd erasure-code-profile get myprofile | grep k=) + local block_size=$((stripe_unit * k - 1)) + dd if=/dev/zero of=$dir/ORIGINAL bs=$block_size count=2 + rados --block-size=$block_size \ + --pool ecpool put UNALIGNED $dir/ORIGINAL || return 1 + rm $dir/ORIGINAL +} + +function chunk_size() { + echo $(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit) +} + +# +# By default an object will be split in two (k=2) with the first part +# of the object in the first OSD of the up set and the second part in +# the next OSD in the up set. This layout is defined by the mapping +# parameter and this function helps verify that the first and second +# part of the object are located in the OSD where they should be. +# +function verify_chunk_mapping() { + local dir=$1 + local poolname=$2 + local first=$3 + local second=$4 + + local payload=$(printf '%*s' $(chunk_size) FIRST$poolname ; printf '%*s' $(chunk_size) SECOND$poolname) + echo -n "$payload" > $dir/ORIGINAL + + rados --pool $poolname put SOMETHING$poolname $dir/ORIGINAL || return 1 + rados --pool $poolname get SOMETHING$poolname $dir/COPY || return 1 + local -a osds=($(get_osds $poolname SOMETHING$poolname)) + for (( i = 0; i < ${#osds[@]}; i++ )) ; do + ceph daemon osd.${osds[$i]} flush_journal + done + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY + + local -a osds=($(get_osds $poolname SOMETHING$poolname)) + objectstore_tool $dir ${osds[$first]} SOMETHING$poolname get-bytes | grep --quiet FIRST$poolname || return 1 + objectstore_tool $dir ${osds[$second]} SOMETHING$poolname get-bytes | grep --quiet SECOND$poolname || return 1 +} + +function TEST_chunk_mapping() { + local dir=$1 + + # + # mapping=DD_ is the default: + # first OSD (i.e. 0) in the up set has the first part of the object + # second OSD (i.e. 1) in the up set has the second part of the object + # + verify_chunk_mapping $dir ecpool 0 1 || return 1 + + ceph osd erasure-code-profile set remap-profile \ + plugin=lrc \ + layers='[ [ "cDD", "" ] ]' \ + mapping='_DD' \ + crush-steps='[ [ "choose", "osd", 0 ] ]' || return 1 + ceph osd erasure-code-profile get remap-profile + create_pool remap-pool 12 12 erasure remap-profile \ + || return 1 + + # + # mapping=_DD + # second OSD (i.e. 1) in the up set has the first part of the object + # third OSD (i.e. 2) in the up set has the second part of the object + # + verify_chunk_mapping $dir remap-pool 1 2 || return 1 + + delete_pool remap-pool + ceph osd erasure-code-profile rm remap-profile +} + +main test-erasure-code "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-code.sh" +# End: diff --git a/qa/standalone/erasure-code/test-erasure-eio.sh b/qa/standalone/erasure-code/test-erasure-eio.sh new file mode 100755 index 000000000..42c538eb9 --- /dev/null +++ b/qa/standalone/erasure-code/test-erasure-eio.sh @@ -0,0 +1,700 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Red Hat <contact@redhat.com> +# +# +# Author: Kefu Chai <kchai@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7112" # git grep '\<7112\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_mclock_override_recovery_settings=true " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + create_pool rbd 4 || return 1 + + # check that erasure code plugins are preloaded + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep 'load: jerasure.*lrc' $dir/mon.a.log || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function setup_osds() { + local count=$1 + shift + + for id in $(seq 0 $(expr $count - 1)) ; do + run_osd $dir $id || return 1 + done + + # check that erasure code plugins are preloaded + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep 'load: jerasure.*lrc' $dir/osd.0.log || return 1 +} + +function get_state() { + local pgid=$1 + local sname=state + ceph --format json pg dump pgs 2>/dev/null | \ + jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname" +} + +function create_erasure_coded_pool() { + local poolname=$1 + shift + local k=$1 + shift + local m=$1 + shift + + ceph osd erasure-code-profile set myprofile \ + plugin=jerasure \ + k=$k m=$m \ + crush-failure-domain=osd || return 1 + create_pool $poolname 1 1 erasure myprofile \ + || return 1 + wait_for_clean || return 1 +} + +function delete_erasure_coded_pool() { + local poolname=$1 + ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it + ceph osd erasure-code-profile rm myprofile +} + +function rados_put() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + + for marker in AAA BBB CCCC DDDD ; do + printf "%*s" 1024 $marker + done > $dir/ORIGINAL + # + # get and put an object, compare they are equal + # + rados --pool $poolname put $objname $dir/ORIGINAL || return 1 +} + +function rados_get() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + local expect=${4:-ok} + + # + # Expect a failure to get object + # + if [ $expect = "fail" ]; + then + ! rados --pool $poolname get $objname $dir/COPY + return + fi + # + # get an object, compare with $dir/ORIGINAL + # + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY +} + + +function inject_remove() { + local pooltype=$1 + shift + local which=$1 + shift + local poolname=$1 + shift + local objname=$1 + shift + local dir=$1 + shift + local shard_id=$1 + shift + + local -a initial_osds=($(get_osds $poolname $objname)) + local osd_id=${initial_osds[$shard_id]} + objectstore_tool $dir $osd_id $objname remove || return 1 +} + +# Test with an inject error +function rados_put_get_data() { + local inject=$1 + shift + local dir=$1 + shift + local shard_id=$1 + shift + local arg=$1 + + # inject eio to speificied shard + # + local poolname=pool-jerasure + local objname=obj-$inject-$$-$shard_id + rados_put $dir $poolname $objname || return 1 + inject_$inject ec data $poolname $objname $dir $shard_id || return 1 + rados_get $dir $poolname $objname || return 1 + + if [ "$arg" = "recovery" ]; + then + # + # take out the last OSD used to store the object, + # bring it back, and check for clean PGs which means + # recovery didn't crash the primary. + # + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + # Kill OSD + kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 + ceph osd out ${last_osd} || return 1 + ! get_osds $poolname $objname | grep '\<'${last_osd}'\>' || return 1 + ceph osd in ${last_osd} || return 1 + activate_osd $dir ${last_osd} || return 1 + wait_for_clean || return 1 + # Won't check for eio on get here -- recovery above might have fixed it + else + shard_id=$(expr $shard_id + 1) + inject_$inject ec data $poolname $objname $dir $shard_id || return 1 + rados_get $dir $poolname $objname fail || return 1 + rm $dir/ORIGINAL + fi + +} + +# Change the size of speificied shard +# +function set_size() { + local objname=$1 + shift + local dir=$1 + shift + local shard_id=$1 + shift + local bytes=$1 + shift + local mode=${1} + + local poolname=pool-jerasure + local -a initial_osds=($(get_osds $poolname $objname)) + local osd_id=${initial_osds[$shard_id]} + ceph osd set noout + if [ "$mode" = "add" ]; + then + objectstore_tool $dir $osd_id $objname get-bytes $dir/CORRUPT || return 1 + dd if=/dev/urandom bs=$bytes count=1 >> $dir/CORRUPT + elif [ "$bytes" = "0" ]; + then + touch $dir/CORRUPT + else + dd if=/dev/urandom bs=$bytes count=1 of=$dir/CORRUPT + fi + objectstore_tool $dir $osd_id $objname set-bytes $dir/CORRUPT || return 1 + rm -f $dir/CORRUPT + ceph osd unset noout +} + +function rados_get_data_bad_size() { + local dir=$1 + shift + local shard_id=$1 + shift + local bytes=$1 + shift + local mode=${1:-set} + + local poolname=pool-jerasure + local objname=obj-size-$$-$shard_id-$bytes + rados_put $dir $poolname $objname || return 1 + + # Change the size of speificied shard + # + set_size $objname $dir $shard_id $bytes $mode || return 1 + + rados_get $dir $poolname $objname || return 1 + + # Leave objname and modify another shard + shard_id=$(expr $shard_id + 1) + set_size $objname $dir $shard_id $bytes $mode || return 1 + rados_get $dir $poolname $objname fail || return 1 + rm $dir/ORIGINAL +} + +# +# These two test cases try to validate the following behavior: +# For object on EC pool, if there is one shard having read error ( +# either primary or replica), client can still read object. +# +# If 2 shards have read errors the client will get an error. +# +function TEST_rados_get_subread_eio_shard_0() { + local dir=$1 + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # inject eio on primary OSD (0) and replica OSD (1) + local shard_id=0 + rados_put_get_data eio $dir $shard_id || return 1 + delete_erasure_coded_pool $poolname +} + +function TEST_rados_get_subread_eio_shard_1() { + local dir=$1 + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # inject eio into replicas OSD (1) and OSD (2) + local shard_id=1 + rados_put_get_data eio $dir $shard_id || return 1 + delete_erasure_coded_pool $poolname +} + +# We don't remove the object from the primary because +# that just causes it to appear to be missing + +function TEST_rados_get_subread_missing() { + local dir=$1 + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # inject remove into replicas OSD (1) and OSD (2) + local shard_id=1 + rados_put_get_data remove $dir $shard_id || return 1 + delete_erasure_coded_pool $poolname +} + +# +# +# These two test cases try to validate that following behavior: +# For object on EC pool, if there is one shard which an incorrect +# size this will cause an internal read error, client can still read object. +# +# If 2 shards have incorrect size the client will get an error. +# +function TEST_rados_get_bad_size_shard_0() { + local dir=$1 + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # Set incorrect size into primary OSD (0) and replica OSD (1) + local shard_id=0 + rados_get_data_bad_size $dir $shard_id 10 || return 1 + rados_get_data_bad_size $dir $shard_id 0 || return 1 + rados_get_data_bad_size $dir $shard_id 256 add || return 1 + delete_erasure_coded_pool $poolname +} + +function TEST_rados_get_bad_size_shard_1() { + local dir=$1 + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # Set incorrect size into replicas OSD (1) and OSD (2) + local shard_id=1 + rados_get_data_bad_size $dir $shard_id 10 || return 1 + rados_get_data_bad_size $dir $shard_id 0 || return 1 + rados_get_data_bad_size $dir $shard_id 256 add || return 1 + delete_erasure_coded_pool $poolname +} + +function TEST_rados_get_with_subreadall_eio_shard_0() { + local dir=$1 + local shard_id=0 + + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # inject eio on primary OSD (0) + rados_put_get_data eio $dir $shard_id recovery || return 1 + + delete_erasure_coded_pool $poolname +} + +function TEST_rados_get_with_subreadall_eio_shard_1() { + local dir=$1 + local shard_id=1 + + setup_osds 4 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 2 1 || return 1 + # inject eio on replica OSD (1) + rados_put_get_data eio $dir $shard_id recovery || return 1 + + delete_erasure_coded_pool $poolname +} + +# Test recovery the object attr read error +function TEST_ec_object_attr_read_error() { + local dir=$1 + local objname=myobject + + setup_osds 7 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + local primary_osd=$(get_primary $poolname $objname) + # Kill primary OSD + kill_daemons $dir TERM osd.${primary_osd} >&2 < /dev/null || return 1 + + # Write data + rados_put $dir $poolname $objname || return 1 + + # Inject eio, shard 1 is the one read attr + inject_eio ec mdata $poolname $objname $dir 1 || return 1 + + # Restart OSD + activate_osd $dir ${primary_osd} || return 1 + + # Cluster should recover this object + wait_for_clean || return 1 + + rados_get $dir $poolname myobject || return 1 + + delete_erasure_coded_pool $poolname +} + +# Test recovery the first k copies aren't all available +function TEST_ec_single_recovery_error() { + local dir=$1 + local objname=myobject + + setup_osds 7 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + rados_put $dir $poolname $objname || return 1 + inject_eio ec data $poolname $objname $dir 0 || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + # Kill OSD + kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + # Cluster should recover this object + wait_for_clean || return 1 + + rados_get $dir $poolname myobject || return 1 + + delete_erasure_coded_pool $poolname +} + +# Test recovery when repeated reads are needed due to EIO +function TEST_ec_recovery_multiple_errors() { + local dir=$1 + local objname=myobject + + setup_osds 9 || return 1 + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 4 4 || return 1 + + rados_put $dir $poolname $objname || return 1 + inject_eio ec data $poolname $objname $dir 0 || return 1 + # first read will try shards 0,1,2 when 0 gets EIO, shard 3 gets + # tried as well. Make that fail to test multiple-EIO handling. + inject_eio ec data $poolname $objname $dir 3 || return 1 + inject_eio ec data $poolname $objname $dir 4 || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + # Kill OSD + kill_daemons $dir TERM osd.${last_osd} >&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + # Cluster should recover this object + wait_for_clean || return 1 + + rados_get $dir $poolname myobject || return 1 + + delete_erasure_coded_pool $poolname +} + +# Test recovery when there's only one shard to recover, but multiple +# objects recovering in one RecoveryOp +function TEST_ec_recovery_multiple_objects() { + local dir=$1 + local objname=myobject + + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' + setup_osds 7 || return 1 + CEPH_ARGS=$ORIG_ARGS + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + rados_put $dir $poolname test1 + rados_put $dir $poolname test2 + rados_put $dir $poolname test3 + + ceph osd out 0 || return 1 + + # Cluster should recover these objects all at once + wait_for_clean || return 1 + + rados_get $dir $poolname test1 + rados_get $dir $poolname test2 + rados_get $dir $poolname test3 + + delete_erasure_coded_pool $poolname +} + +# test multi-object recovery when the one missing shard gets EIO +function TEST_ec_recovery_multiple_objects_eio() { + local dir=$1 + local objname=myobject + + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' + setup_osds 7 || return 1 + CEPH_ARGS=$ORIG_ARGS + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + rados_put $dir $poolname test1 + rados_put $dir $poolname test2 + rados_put $dir $poolname test3 + + # can't read from this shard anymore + inject_eio ec data $poolname $objname $dir 0 || return 1 + ceph osd out 0 || return 1 + + # Cluster should recover these objects all at once + wait_for_clean || return 1 + + rados_get $dir $poolname test1 + rados_get $dir $poolname test2 + rados_get $dir $poolname test3 + + delete_erasure_coded_pool $poolname +} + +# Test backfill with unfound object +function TEST_ec_backfill_unfound() { + local dir=$1 + local objname=myobject + local lastobj=300 + # Must be between 1 and $lastobj + local testobj=obj250 + + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' + setup_osds 5 || return 1 + CEPH_ARGS=$ORIG_ARGS + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + local primary=$(get_primary $poolname $objname) + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio ec data $poolname $testobj $dir 0 || return 1 + inject_eio ec data $poolname $testobj $dir 1 || return 1 + + activate_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + for tmp in $(seq 1 240); do + state=$(get_state 2.0) + echo $state | grep backfill_unfound + if [ "$?" = "0" ]; then + break + fi + echo $state + sleep 1 + done + + ceph pg dump pgs + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + sleep 5 + + ceph pg dump pgs + ceph pg 2.0 list_unfound + ceph pg 2.0 query + + ceph pg 2.0 list_unfound | grep -q $testobj || return 1 + + check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound") + test "$check" == "true" || return 1 + + eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].status) + test "$check" == "osd is down" || return 1 + + eval check=$(ceph pg 2.0 list_unfound | jq .might_have_unfound[0].osd) + test "$check" == "2(4)" || return 1 + + activate_osd $dir ${last_osd} || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_erasure_coded_pool $poolname +} + +# Test recovery with unfound object +function TEST_ec_recovery_unfound() { + local dir=$1 + local objname=myobject + local lastobj=100 + # Must be between 1 and $lastobj + local testobj=obj75 + + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=' --osd-recovery-max-single-start 3 --osd-recovery-max-active 3 ' + CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' + setup_osds 5 || return 1 + CEPH_ARGS=$ORIG_ARGS + + local poolname=pool-jerasure + create_erasure_coded_pool $poolname 3 2 || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio ec data $poolname $testobj $dir 0 || return 1 + inject_eio ec data $poolname $testobj $dir 1 || return 1 + + activate_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + for tmp in $(seq 1 100); do + state=$(get_state 2.0) + echo $state | grep recovery_unfound + if [ "$?" = "0" ]; then + break + fi + echo "$state " + sleep 1 + done + + ceph pg dump pgs + ceph pg 2.0 list_unfound + ceph pg 2.0 query + + ceph pg 2.0 list_unfound | grep -q $testobj || return 1 + + check=$(ceph pg 2.0 list_unfound | jq ".available_might_have_unfound") + test "$check" == "true" || return 1 + + check=$(ceph pg 2.0 list_unfound | jq ".might_have_unfound | length") + test $check == 0 || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_erasure_coded_pool $poolname +} + +main test-erasure-eio "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/erasure-code/test-erasure-eio.sh" +# End: diff --git a/qa/standalone/mgr/balancer.sh b/qa/standalone/mgr/balancer.sh new file mode 100755 index 000000000..2d7b2f35d --- /dev/null +++ b/qa/standalone/mgr/balancer.sh @@ -0,0 +1,223 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7102" # git grep '\<7102\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +TEST_POOL1=test1 +TEST_POOL2=test2 + +function TEST_balancer() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + create_pool $TEST_POOL1 8 + create_pool $TEST_POOL2 8 + + wait_for_clean || return 1 + + ceph pg dump pgs + ceph balancer status || return 1 + eval MODE=$(ceph balancer status | jq '.mode') + test $MODE = "upmap" || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "true" || return 1 + + ceph balancer ls || return 1 + PLANS=$(ceph balancer ls) + test "$PLANS" = "[]" || return 1 + ceph balancer eval || return 1 + EVAL="$(ceph balancer eval)" + test "$EVAL" = "current cluster score 0.000000 (lower is better)" + ceph balancer eval-verbose || return 1 + + ceph balancer pool add $TEST_POOL1 || return 1 + ceph balancer pool add $TEST_POOL2 || return 1 + ceph balancer pool ls || return 1 + eval POOL=$(ceph balancer pool ls | jq 'sort | .[0]') + test "$POOL" = "$TEST_POOL1" || return 1 + eval POOL=$(ceph balancer pool ls | jq 'sort | .[1]') + test "$POOL" = "$TEST_POOL2" || return 1 + ceph balancer pool rm $TEST_POOL1 || return 1 + ceph balancer pool rm $TEST_POOL2 || return 1 + ceph balancer pool ls || return 1 + ceph balancer pool add $TEST_POOL1 || return 1 + + ceph balancer mode crush-compat || return 1 + ceph balancer status || return 1 + eval MODE=$(ceph balancer status | jq '.mode') + test $MODE = "crush-compat" || return 1 + ceph balancer off || return 1 + ! ceph balancer optimize plan_crush $TEST_POOL1 || return 1 + ceph balancer status || return 1 + eval RESULT=$(ceph balancer status | jq '.optimize_result') + test "$RESULT" = "Distribution is already perfect" || return 1 + + ceph balancer on || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "true" || return 1 + sleep 2 + ceph balancer status || return 1 + ceph balancer off || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "false" || return 1 + sleep 2 + + ceph balancer reset || return 1 + + ceph balancer mode upmap || return 1 + ceph balancer status || return 1 + eval MODE=$(ceph balancer status | jq '.mode') + test $MODE = "upmap" || return 1 + ! ceph balancer optimize plan_upmap $TEST_POOL || return 1 + ceph balancer status || return 1 + eval RESULT=$(ceph balancer status | jq '.optimize_result') + test "$RESULT" = "Unable to find further optimization, or pool(s) pg_num is decreasing, or distribution is already perfect" || return 1 + + ceph balancer on || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "true" || return 1 + sleep 2 + ceph balancer status || return 1 + ceph balancer off || return 1 + ACTIVE=$(ceph balancer status | jq '.active') + test $ACTIVE = "false" || return 1 + + teardown $dir || return 1 +} + +function TEST_balancer2() { + local dir=$1 + TEST_PGS1=118 + TEST_PGS2=132 + TOTAL_PGS=$(expr $TEST_PGS1 + $TEST_PGS2) + OSDS=5 + DEFAULT_REPLICAS=3 + # Integer average of PGS per OSD (70.8), so each OSD >= this + FINAL_PER_OSD1=$(expr \( $TEST_PGS1 \* $DEFAULT_REPLICAS \) / $OSDS) + # Integer average of PGS per OSD (150) + FINAL_PER_OSD2=$(expr \( \( $TEST_PGS1 + $TEST_PGS2 \) \* $DEFAULT_REPLICAS \) / $OSDS) + + CEPH_ARGS+="--osd_pool_default_pg_autoscale_mode=off " + CEPH_ARGS+="--debug_osd=20 " + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $i || return 1 + done + + ceph osd set-require-min-compat-client luminous + ceph config set mgr mgr/balancer/upmap_max_deviation 1 + ceph balancer mode upmap || return 1 + ceph balancer on || return 1 + ceph config set mgr mgr/balancer/sleep_interval 5 + + create_pool $TEST_POOL1 $TEST_PGS1 + + wait_for_clean || return 1 + + # Wait up to 2 minutes + OK=no + for i in $(seq 1 25) + do + sleep 5 + if grep -q "Optimization plan is almost perfect" $dir/mgr.x.log + then + OK=yes + break + fi + done + test $OK = "yes" || return 1 + # Plan is found, but PGs still need to move + sleep 10 + wait_for_clean || return 1 + ceph osd df + + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[0].pgs') + test $PGS -ge $FINAL_PER_OSD1 || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[1].pgs') + test $PGS -ge $FINAL_PER_OSD1 || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[2].pgs') + test $PGS -ge $FINAL_PER_OSD1 || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[3].pgs') + test $PGS -ge $FINAL_PER_OSD1 || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[4].pgs') + test $PGS -ge $FINAL_PER_OSD1 || return 1 + + create_pool $TEST_POOL2 $TEST_PGS2 + + # Wait up to 2 minutes + OK=no + for i in $(seq 1 25) + do + sleep 5 + COUNT=$(grep "Optimization plan is almost perfect" $dir/mgr.x.log | wc -l) + if test $COUNT = "2" + then + OK=yes + break + fi + done + test $OK = "yes" || return 1 + # Plan is found, but PGs still need to move + sleep 10 + wait_for_clean || return 1 + ceph osd df + + # We should be with plus or minus 2 of FINAL_PER_OSD2 + # This is because here each pool is balanced independently + MIN=$(expr $FINAL_PER_OSD2 - 2) + MAX=$(expr $FINAL_PER_OSD2 + 2) + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[0].pgs') + test $PGS -ge $MIN -a $PGS -le $MAX || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[1].pgs') + test $PGS -ge $MIN -a $PGS -le $MAX || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[2].pgs') + test $PGS -ge $MIN -a $PGS -le $MAX || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[3].pgs') + test $PGS -ge $MIN -a $PGS -le $MAX || return 1 + PGS=$(ceph osd df --format=json-pretty | jq '.nodes[4].pgs') + test $PGS -ge $MIN -a $PGS -le $MAX || return 1 + + teardown $dir || return 1 +} + +main balancer "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh balancer.sh" +# End: diff --git a/qa/standalone/misc/mclock-config.sh b/qa/standalone/misc/mclock-config.sh new file mode 100755 index 000000000..59f002584 --- /dev/null +++ b/qa/standalone/misc/mclock-config.sh @@ -0,0 +1,467 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2022 Red Hat <contact@redhat.com> +# +# Author: Sridhar Seshasayee <sseshasa@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--debug-mclock 20 " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_profile_builtin_to_custom() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + + # Verify the default mclock profile on the OSD + local mclock_profile=$(ceph config get osd.0 osd_mclock_profile) + test "$mclock_profile" = "balanced" || return 1 + + # Verify the running mClock profile + mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile |\ + jq .osd_mclock_profile) + mclock_profile=$(eval echo $mclock_profile) + test "$mclock_profile" = "high_recovery_ops" || return 1 + + # Change the mclock profile to 'custom' + ceph tell osd.0 config set osd_mclock_profile custom || return 1 + + # Verify that the mclock profile is set to 'custom' on the OSDs + mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_profile | jq .osd_mclock_profile) + mclock_profile=$(eval echo $mclock_profile) + test "$mclock_profile" = "custom" || return 1 + + # Change a mclock config param and confirm the change + local client_res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + echo "client_res = $client_res" + local client_res_new=$(echo "$client_res + 0.1" | bc -l) + echo "client_res_new = $client_res_new" + ceph config set osd.0 osd_mclock_scheduler_client_res \ + $client_res_new || return 1 + + # Check value in config monitor db + local res=$(ceph config get osd.0 \ + osd_mclock_scheduler_client_res) || return 1 + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + # Check value in the in-memory 'values' map + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + + teardown $dir || return 1 +} + +function TEST_profile_custom_to_builtin() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + + # Verify the default mclock profile on the OSD + local def_mclock_profile + def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile) + test "$def_mclock_profile" = "balanced" || return 1 + + # Verify the running mClock profile + local orig_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile |\ + jq .osd_mclock_profile) + orig_mclock_profile=$(eval echo $orig_mclock_profile) + test $orig_mclock_profile = "high_recovery_ops" || return 1 + + # Change the mclock profile to 'custom' + ceph tell osd.0 config set osd_mclock_profile custom || return 1 + + # Verify that the mclock profile is set to 'custom' on the OSDs + local mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile | \ + jq .osd_mclock_profile) + mclock_profile=$(eval echo $mclock_profile) + test $mclock_profile = "custom" || return 1 + + # Save the original client reservations allocated to the OSDs + local client_res + client_res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + echo "Original client_res for osd.0 = $client_res" + + # Change a mclock config param and confirm the change + local client_res_new=$(echo "$client_res + 0.1" | bc -l) + echo "client_res_new = $client_res_new" + ceph config set osd osd_mclock_scheduler_client_res \ + $client_res_new || return 1 + # Check value in config monitor db + local res=$(ceph config get osd.0 \ + osd_mclock_scheduler_client_res) || return 1 + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + # Check value in the in-memory 'values' map + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + + # Switch the mclock profile back to the original built-in profile. + # The config subsystem prevents the overwrite of the changed QoS config + # option above i.e. osd_mclock_scheduler_client_res. This fact is verified + # before proceeding to remove the entry from the config monitor db. After + # the config entry is removed, the original value for the config option is + # restored and is verified. + ceph tell osd.0 config set osd_mclock_profile $orig_mclock_profile || return 1 + # Verify that the mclock profile is set back to the original on the OSD + eval mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile | \ + jq .osd_mclock_profile) + #mclock_profile=$(ceph config get osd.0 osd_mclock_profile) + test "$mclock_profile" = "$orig_mclock_profile" || return 1 + + # Verify that the new value is still in effect + # Check value in config monitor db + local res=$(ceph config get osd.0 \ + osd_mclock_scheduler_client_res) || return 1 + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + # Check value in the in-memory 'values' map + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + if (( $(echo "$res != $client_res_new" | bc -l) )); then + return 1 + fi + + # Remove the changed QoS config option from monitor db + ceph config rm osd osd_mclock_scheduler_client_res || return 1 + + sleep 5 # Allow time for change to take effect + + # Verify that the original values are now restored + # Check value in config monitor db + res=$(ceph config get osd.0 \ + osd_mclock_scheduler_client_res) || return 1 + if (( $(echo "$res != 0.0" | bc -l) )); then + return 1 + fi + + # Check value in the in-memory 'values' map + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get osd_mclock_scheduler_client_res | \ + jq .osd_mclock_scheduler_client_res | bc) + if (( $(echo "$res != $client_res" | bc -l) )); then + return 1 + fi + + teardown $dir || return 1 +} + +function TEST_recovery_limit_adjustment_mclock() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + local recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_recovery_max_active) + # Get default value + echo "$recoveries" | grep --quiet 'osd_recovery_max_active' || return 1 + + # Change the recovery limit without setting + # osd_mclock_override_recovery_settings option. Verify that the recovery + # limit is retained at its default value. + ceph config set osd.0 osd_recovery_max_active 10 || return 1 + sleep 2 # Allow time for change to take effect + local max_recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_recovery_max_active) + test "$max_recoveries" = "$recoveries" || return 1 + + # Change recovery limit after setting osd_mclock_override_recovery_settings. + # Verify that the recovery limit is modified. + ceph config set osd.0 osd_mclock_override_recovery_settings true || return 1 + ceph config set osd.0 osd_recovery_max_active 10 || return 1 + sleep 2 # Allow time for change to take effect + max_recoveries=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_recovery_max_active) + test "$max_recoveries" = '{"osd_recovery_max_active":"10"}' || return 1 + + teardown $dir || return 1 +} + +function TEST_backfill_limit_adjustment_mclock() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills | jq .osd_max_backfills | bc) + # Get default value + echo "osd_max_backfills: $backfills" || return 1 + + # Change the backfill limit without setting + # osd_mclock_override_recovery_settings option. Verify that the backfill + # limit is retained at its default value. + ceph config set osd.0 osd_max_backfills 20 || return 1 + sleep 2 # Allow time for change to take effect + local max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills | jq .osd_max_backfills | bc) + test $max_backfills = $backfills || return 1 + + # Verify local and async reserver settings are not changed + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .local_reservations.max_allowed | bc) + test $max_backfills = $backfills || return 1 + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .remote_reservations.max_allowed | bc) + test $max_backfills = $backfills || return 1 + + # Change backfills limit after setting osd_mclock_override_recovery_settings. + # Verify that the backfills limit is modified. + ceph config set osd.0 osd_mclock_override_recovery_settings true || return 1 + ceph config set osd.0 osd_max_backfills 20 || return 1 + sleep 2 # Allow time for change to take effect + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills | jq .osd_max_backfills | bc) + test $max_backfills = 20 || return 1 + + # Verify local and async reserver settings are changed + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .local_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .remote_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 + + # Kill osd and bring it back up. + # Confirm that the backfill settings are retained. + kill_daemons $dir TERM osd || return 1 + ceph osd down 0 || return 1 + wait_for_osd down 0 || return 1 + activate_osd $dir 0 --osd-op-queue=mclock_scheduler || return 1 + + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills | jq .osd_max_backfills | bc) + test $max_backfills = 20 || return 1 + + # Verify local and async reserver settings are changed + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .local_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .remote_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 + + teardown $dir || return 1 +} + +function TEST_profile_disallow_builtin_params_modify() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + + # Verify that the default mclock profile is set on the OSD + local def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile) + test "$def_mclock_profile" = "balanced" || return 1 + + # Verify the running mClock profile + local cur_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile |\ + jq .osd_mclock_profile) + cur_mclock_profile=$(eval echo $cur_mclock_profile) + test $cur_mclock_profile = "high_recovery_ops" || return 1 + + declare -a options=("osd_mclock_scheduler_background_recovery_res" + "osd_mclock_scheduler_client_res") + + local retries=10 + local errors=0 + for opt in "${options[@]}" + do + # Try and change a mclock config param and confirm that no change occurred + local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get $opt | jq .$opt | bc) + local opt_val_new=$(echo "$opt_val_orig + 0.1" | bc -l) + ceph config set osd.0 $opt $opt_val_new || return 1 + + # Check configuration values + for count in $(seq 0 $(expr $retries - 1)) + do + errors=0 + sleep 2 # Allow time for changes to take effect + + echo "Check configuration values - Attempt#: $count" + # Check configuration value on Mon store (or the default) for the osd + local res=$(ceph config get osd.0 $opt) || return 1 + echo "Mon db (or default): osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check running configuration value using "config show" cmd + res=$(ceph config show osd.0 | grep $opt |\ + awk '{ print $2 }' | bc ) || return 1 + echo "Running config: osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) || \ + $(echo "$res != $opt_val_orig" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check value in the in-memory 'values' map is unmodified + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get $opt | jq .$opt | bc) + echo "Values map: osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) || \ + $(echo "$res != $opt_val_orig" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check if we succeeded or exhausted retry count + if [ $errors -eq 0 ] + then + break + elif [ $count -eq $(expr $retries - 1) ] + then + return 1 + fi + done + done + + teardown $dir || return 1 +} + +function TEST_profile_disallow_builtin_params_override() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 + + # Verify that the default mclock profile is set on the OSD + local def_mclock_profile=$(ceph config get osd.0 osd_mclock_profile) + test "$def_mclock_profile" = "balanced" || return 1 + + # Verify the running mClock profile + local cur_mclock_profile=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get osd_mclock_profile |\ + jq .osd_mclock_profile) + cur_mclock_profile=$(eval echo $cur_mclock_profile) + test $cur_mclock_profile = "high_recovery_ops" || return 1 + + declare -a options=("osd_mclock_scheduler_background_recovery_res" + "osd_mclock_scheduler_client_res") + + local retries=10 + local errors=0 + for opt in "${options[@]}" + do + # Override a mclock config param and confirm that no change occurred + local opt_val_orig=$(CEPH_ARGS='' ceph --format=json daemon \ + $(get_asok_path osd.0) config get $opt | jq .$opt | bc) + local opt_val_new=$(echo "$opt_val_orig + 0.1" | bc -l) + ceph tell osd.0 config set $opt $opt_val_new || return 1 + + # Check configuration values + for count in $(seq 0 $(expr $retries - 1)) + do + errors=0 + sleep 2 # Allow time for changes to take effect + + echo "Check configuration values - Attempt#: $count" + # Check configuration value on Mon store (or the default) for the osd + local res=$(ceph config get osd.0 $opt) || return 1 + echo "Mon db (or default): osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check running configuration value using "config show" cmd + res=$(ceph config show osd.0 | grep $opt |\ + awk '{ print $2 }' | bc ) || return 1 + echo "Running config: osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) || \ + $(echo "$res != $opt_val_orig" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check value in the in-memory 'values' map is unmodified + res=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path \ + osd.0) config get $opt | jq .$opt | bc) + echo "Values map: osd.0 $opt = $res" + if (( $(echo "$res == $opt_val_new" | bc -l) || \ + $(echo "$res != $opt_val_orig" | bc -l) )); then + errors=$(expr $errors + 1) + fi + + # Check if we succeeded or exhausted retry count + if [ $errors -eq 0 ] + then + break + elif [ $count -eq $(expr $retries - 1) ] + then + return 1 + fi + done + done + + teardown $dir || return 1 +} + +main mclock-config "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh mclock-config.sh" +# End: diff --git a/qa/standalone/misc/network-ping.sh b/qa/standalone/misc/network-ping.sh new file mode 100755 index 000000000..4745108c5 --- /dev/null +++ b/qa/standalone/misc/network-ping.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--debug_disable_randomized_ping=true " + CEPH_ARGS+="--debug_heartbeat_testing_span=5 " + CEPH_ARGS+="--osd_heartbeat_interval=1 " + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_network_ping_test1() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + sleep 5 + + create_pool foo 16 + + # write some objects + timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1 + + # Get 1 cycle worth of ping data "1 minute" + sleep 10 + flush_pg_stats + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + # Wait another 4 cycles to get "5 minute interval" + sleep 20 + flush_pg_stats + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + + # Wait another 10 cycles to get "15 minute interval" + sleep 50 + flush_pg_stats + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "0" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "1000" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "4" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 0 | tee $dir/json + test "$(cat $dir/json | jq '.entries | length')" = "12" || return 1 + test "$(cat $dir/json | jq '.threshold')" = "0" || return 1 + + # Just check the threshold output matches the input + CEPH_ARGS='' ceph daemon $(get_asok_path mgr.x) dump_osd_network 99 | tee $dir/json + test "$(cat $dir/json | jq '.threshold')" = "99" || return 1 + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_osd_network 98 | tee $dir/json + test "$(cat $dir/json | jq '.threshold')" = "98" || return 1 + + rm -f $dir/json +} + +# Test setting of mon_warn_on_slow_ping_time very low to +# get health warning +function TEST_network_ping_test2() { + local dir=$1 + + export CEPH_ARGS + export EXTRA_OPTS=" --mon_warn_on_slow_ping_time=0.001" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + sleep 5 + ceph osd crush add-bucket dc1 datacenter + ceph osd crush add-bucket dc2 datacenter + ceph osd crush add-bucket dc3 datacenter + ceph osd crush add-bucket rack1 rack + ceph osd crush add-bucket rack2 rack + ceph osd crush add-bucket rack3 rack + ceph osd crush add-bucket host1 host + ceph osd crush add-bucket host2 host + ceph osd crush add-bucket host3 host + ceph osd crush move dc1 root=default + ceph osd crush move dc2 root=default + ceph osd crush move dc3 root=default + ceph osd crush move rack1 datacenter=dc1 + ceph osd crush move rack2 datacenter=dc2 + ceph osd crush move rack3 datacenter=dc3 + ceph osd crush move host1 rack=rack1 + ceph osd crush move host2 rack=rack2 + ceph osd crush move host3 rack=rack3 + ceph osd crush set osd.0 1.0 host=host1 + ceph osd crush set osd.1 1.0 host=host2 + ceph osd crush set osd.2 1.0 host=host3 + ceph osd crush rule create-simple myrule default host firstn + + create_pool foo 16 16 replicated myrule + + # write some objects + timeout 20 rados bench -p foo 10 write -b 4096 --no-cleanup || return 1 + + # Get at least 1 cycle of ping data (this test runs with 5 second cycles of 1 second pings) + sleep 10 + flush_pg_stats + + ceph health | tee $dir/health + grep -q "Slow OSD heartbeats" $dir/health || return 1 + + ceph health detail | tee $dir/health + grep -q "OSD_SLOW_PING_TIME_BACK" $dir/health || return 1 + grep -q "OSD_SLOW_PING_TIME_FRONT" $dir/health || return 1 + grep -q "Slow OSD heartbeats on front from osd[.][0-2] [[]dc[1-3],rack[1-3][]] \ +to osd[.][0-2] [[]dc[1-3],rack[1-3][]]" $dir/health || return 1 + rm -f $dir/health +} + +main network-ping "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh network-ping.sh" +# End: diff --git a/qa/standalone/misc/ok-to-stop.sh b/qa/standalone/misc/ok-to-stop.sh new file mode 100755 index 000000000..dc9e7422f --- /dev/null +++ b/qa/standalone/misc/ok-to-stop.sh @@ -0,0 +1,296 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON_A="127.0.0.1:7150" # git grep '\<7150\>' : there must be only one + export CEPH_MON_B="127.0.0.1:7151" # git grep '\<7151\>' : there must be only one + export CEPH_MON_C="127.0.0.1:7152" # git grep '\<7152\>' : there must be only one + export CEPH_MON_D="127.0.0.1:7153" # git grep '\<7153\>' : there must be only one + export CEPH_MON_E="127.0.0.1:7154" # git grep '\<7154\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + export ORIG_CEPH_ARGS="$CEPH_ARGS" + + local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + kill_daemons $dir KILL || return 1 + teardown $dir || return 1 + done +} + +function TEST_1_mon_checks() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + + ceph mon ok-to-stop dne || return 1 + ! ceph mon ok-to-stop a || return 1 + + ! ceph mon ok-to-add-offline || return 1 + + ! ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm dne || return 1 +} + +function TEST_2_mons_checks() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B || return 1 + + ceph mon ok-to-stop dne || return 1 + ! ceph mon ok-to-stop a || return 1 + ! ceph mon ok-to-stop b || return 1 + ! ceph mon ok-to-stop a b || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm dne || return 1 +} + +function TEST_3_mons_checks() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B || return 1 + run_mon $dir c --public-addr=$CEPH_MON_C || return 1 + wait_for_quorum 60 3 + + ceph mon ok-to-stop dne || return 1 + ceph mon ok-to-stop a || return 1 + ceph mon ok-to-stop b || return 1 + ceph mon ok-to-stop c || return 1 + ! ceph mon ok-to-stop a b || return 1 + ! ceph mon ok-to-stop b c || return 1 + ! ceph mon ok-to-stop a b c || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm c || return 1 + + kill_daemons $dir KILL mon.b + wait_for_quorum 60 2 + + ! ceph mon ok-to-stop a || return 1 + ceph mon ok-to-stop b || return 1 + ! ceph mon ok-to-stop c || return 1 + + ! ceph mon ok-to-add-offline || return 1 + + ! ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ! ceph mon ok-to-rm c || return 1 +} + +function TEST_4_mons_checks() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B || return 1 + run_mon $dir c --public-addr=$CEPH_MON_C || return 1 + run_mon $dir d --public-addr=$CEPH_MON_D || return 1 + wait_for_quorum 60 4 + + ceph mon ok-to-stop dne || return 1 + ceph mon ok-to-stop a || return 1 + ceph mon ok-to-stop b || return 1 + ceph mon ok-to-stop c || return 1 + ceph mon ok-to-stop d || return 1 + ! ceph mon ok-to-stop a b || return 1 + ! ceph mon ok-to-stop c d || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm c || return 1 + + kill_daemons $dir KILL mon.a + wait_for_quorum 60 3 + + ceph mon ok-to-stop a || return 1 + ! ceph mon ok-to-stop b || return 1 + ! ceph mon ok-to-stop c || return 1 + ! ceph mon ok-to-stop d || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm c || return 1 + ceph mon ok-to-rm d || return 1 +} + +function TEST_5_mons_checks() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B || return 1 + run_mon $dir c --public-addr=$CEPH_MON_C || return 1 + run_mon $dir d --public-addr=$CEPH_MON_D || return 1 + run_mon $dir e --public-addr=$CEPH_MON_E || return 1 + wait_for_quorum 60 5 + + ceph mon ok-to-stop dne || return 1 + ceph mon ok-to-stop a || return 1 + ceph mon ok-to-stop b || return 1 + ceph mon ok-to-stop c || return 1 + ceph mon ok-to-stop d || return 1 + ceph mon ok-to-stop e || return 1 + ceph mon ok-to-stop a b || return 1 + ceph mon ok-to-stop c d || return 1 + ! ceph mon ok-to-stop a b c || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm c || return 1 + ceph mon ok-to-rm d || return 1 + ceph mon ok-to-rm e || return 1 + + kill_daemons $dir KILL mon.a + wait_for_quorum 60 4 + + ceph mon ok-to-stop a || return 1 + ceph mon ok-to-stop b || return 1 + ceph mon ok-to-stop c || return 1 + ceph mon ok-to-stop d || return 1 + ceph mon ok-to-stop e || return 1 + + ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ceph mon ok-to-rm b || return 1 + ceph mon ok-to-rm c || return 1 + ceph mon ok-to-rm d || return 1 + ceph mon ok-to-rm e || return 1 + + kill_daemons $dir KILL mon.e + wait_for_quorum 60 3 + + ceph mon ok-to-stop a || return 1 + ! ceph mon ok-to-stop b || return 1 + ! ceph mon ok-to-stop c || return 1 + ! ceph mon ok-to-stop d || return 1 + ceph mon ok-to-stop e || return 1 + + ! ceph mon ok-to-add-offline || return 1 + + ceph mon ok-to-rm a || return 1 + ! ceph mon ok-to-rm b || return 1 + ! ceph mon ok-to-rm c || return 1 + ! ceph mon ok-to-rm d || return 1 + ceph mon ok-to-rm e || return 1 +} + +function TEST_0_mds() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_mds $dir a || return 1 + + ceph osd pool create meta 1 || return 1 + ceph osd pool create data 1 || return 1 + ceph fs new myfs meta data || return 1 + sleep 5 + + ! ceph mds ok-to-stop a || return 1 + ! ceph mds ok-to-stop a dne || return 1 + ceph mds ok-to-stop dne || return 1 + + run_mds $dir b || return 1 + sleep 5 + + ceph mds ok-to-stop a || return 1 + ceph mds ok-to-stop b || return 1 + ! ceph mds ok-to-stop a b || return 1 + ceph mds ok-to-stop a dne1 dne2 || return 1 + ceph mds ok-to-stop b dne || return 1 + ! ceph mds ok-to-stop a b dne || return 1 + ceph mds ok-to-stop dne1 dne2 || return 1 + + kill_daemons $dir KILL mds.a +} + +function TEST_0_osd() { + local dir=$1 + + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + + ceph osd erasure-code-profile set ec-profile m=2 k=2 crush-failure-domain=osd || return 1 + ceph osd pool create ec erasure ec-profile || return 1 + + wait_for_clean || return 1 + + # with min_size 3, we can stop only 1 osd + ceph osd pool set ec min_size 3 || return 1 + wait_for_clean || return 1 + + ceph osd ok-to-stop 0 || return 1 + ceph osd ok-to-stop 1 || return 1 + ceph osd ok-to-stop 2 || return 1 + ceph osd ok-to-stop 3 || return 1 + ! ceph osd ok-to-stop 0 1 || return 1 + ! ceph osd ok-to-stop 2 3 || return 1 + ceph osd ok-to-stop 0 --max 2 | grep '[0]' || return 1 + ceph osd ok-to-stop 1 --max 2 | grep '[1]' || return 1 + + # with min_size 2 we can stop 1 osds + ceph osd pool set ec min_size 2 || return 1 + wait_for_clean || return 1 + + ceph osd ok-to-stop 0 1 || return 1 + ceph osd ok-to-stop 2 3 || return 1 + ! ceph osd ok-to-stop 0 1 2 || return 1 + ! ceph osd ok-to-stop 1 2 3 || return 1 + + ceph osd ok-to-stop 0 --max 2 | grep '[0,1]' || return 1 + ceph osd ok-to-stop 0 --max 20 | grep '[0,1]' || return 1 + ceph osd ok-to-stop 2 --max 2 | grep '[2,3]' || return 1 + ceph osd ok-to-stop 2 --max 20 | grep '[2,3]' || return 1 + + # we should get the same result with one of the osds already down + kill_daemons $dir TERM osd.0 || return 1 + ceph osd down 0 || return 1 + wait_for_peered || return 1 + + ceph osd ok-to-stop 0 || return 1 + ceph osd ok-to-stop 0 1 || return 1 + ! ceph osd ok-to-stop 0 1 2 || return 1 + ! ceph osd ok-to-stop 1 2 3 || return 1 +} + + +main ok-to-stop "$@" diff --git a/qa/standalone/misc/rados-striper.sh b/qa/standalone/misc/rados-striper.sh new file mode 100755 index 000000000..be6349b81 --- /dev/null +++ b/qa/standalone/misc/rados-striper.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Red Hat <contact@redhat.com> +# +# Author: Sebastien Ponce <sebastien.ponce@cern.ch> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7116" # git grep '\<7116\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + # setup + setup $dir || return 1 + + # create a cluster with one monitor and three osds + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + create_rbd_pool || return 1 + + # create toyfile + dd if=/dev/urandom of=$dir/toyfile bs=1234 count=1 + + # put a striped object + rados --pool rbd --striper put toyfile $dir/toyfile || return 1 + + # stat it, with and without striping + rados --pool rbd --striper stat toyfile | cut -d ',' -f 2 > $dir/stripedStat || return 1 + rados --pool rbd stat toyfile.0000000000000000 | cut -d ',' -f 2 > $dir/stat || return 1 + echo ' size 1234' > $dir/refstat + diff -w $dir/stripedStat $dir/refstat || return 1 + diff -w $dir/stat $dir/refstat || return 1 + rados --pool rbd stat toyfile >& $dir/staterror + grep -q 'No such file or directory' $dir/staterror || return 1 + + # get the file back with and without striping + rados --pool rbd --striper get toyfile $dir/stripedGroup || return 1 + diff -w $dir/toyfile $dir/stripedGroup || return 1 + rados --pool rbd get toyfile.0000000000000000 $dir/nonSTripedGroup || return 1 + diff -w $dir/toyfile $dir/nonSTripedGroup || return 1 + + # test truncate + rados --pool rbd --striper truncate toyfile 12 + rados --pool rbd --striper stat toyfile | cut -d ',' -f 2 > $dir/stripedStat || return 1 + rados --pool rbd stat toyfile.0000000000000000 | cut -d ',' -f 2 > $dir/stat || return 1 + echo ' size 12' > $dir/reftrunc + diff -w $dir/stripedStat $dir/reftrunc || return 1 + diff -w $dir/stat $dir/reftrunc || return 1 + + # test xattrs + + rados --pool rbd --striper setxattr toyfile somexattr somevalue || return 1 + rados --pool rbd --striper getxattr toyfile somexattr > $dir/xattrvalue || return 1 + rados --pool rbd getxattr toyfile.0000000000000000 somexattr > $dir/xattrvalue2 || return 1 + echo 'somevalue' > $dir/refvalue + diff -w $dir/xattrvalue $dir/refvalue || return 1 + diff -w $dir/xattrvalue2 $dir/refvalue || return 1 + rados --pool rbd --striper listxattr toyfile > $dir/xattrlist || return 1 + echo 'somexattr' > $dir/reflist + diff -w $dir/xattrlist $dir/reflist || return 1 + rados --pool rbd listxattr toyfile.0000000000000000 | grep -v striper > $dir/xattrlist2 || return 1 + diff -w $dir/xattrlist2 $dir/reflist || return 1 + rados --pool rbd --striper rmxattr toyfile somexattr || return 1 + + local attr_not_found_str="No data available" + [ `uname` = FreeBSD ] && \ + attr_not_found_str="Attribute not found" + expect_failure $dir "$attr_not_found_str" \ + rados --pool rbd --striper getxattr toyfile somexattr || return 1 + expect_failure $dir "$attr_not_found_str" \ + rados --pool rbd getxattr toyfile.0000000000000000 somexattr || return 1 + + # test rm + rados --pool rbd --striper rm toyfile || return 1 + expect_failure $dir 'No such file or directory' \ + rados --pool rbd --striper stat toyfile || return 1 + expect_failure $dir 'No such file or directory' \ + rados --pool rbd stat toyfile.0000000000000000 || return 1 + + # cleanup + teardown $dir || return 1 +} + +main rados-striper "$@" diff --git a/qa/standalone/misc/test-ceph-helpers.sh b/qa/standalone/misc/test-ceph-helpers.sh new file mode 100755 index 000000000..e7805858a --- /dev/null +++ b/qa/standalone/misc/test-ceph-helpers.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014 Red Hat <contact@redhat.com> +# Copyright (C) 2014 Federico Gimenez <fgimenez@coit.es> +# +# Author: Loic Dachary <loic@dachary.org> +# Author: Federico Gimenez <fgimenez@coit.es> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +$CEPH_ROOT/qa/standalone/ceph-helpers.sh TESTS "$@" diff --git a/qa/standalone/misc/test-snaptrim-stats.sh b/qa/standalone/misc/test-snaptrim-stats.sh new file mode 100755 index 000000000..98b3e4fdd --- /dev/null +++ b/qa/standalone/misc/test-snaptrim-stats.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2022 Red Hat <contact@redhat.com> +# +# Author: Sridhar Seshasayee <sseshasa@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--debug-bluestore 20 " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_snaptrim_stats() { + local dir=$1 + local poolname=test + local OSDS=3 + local PGNUM=8 + local PGPNUM=8 + local objects=10 + local WAIT_FOR_UPDATE=10 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off || return 1 + done + + # disable scrubs + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + + # Create a pool + create_pool $poolname $PGNUM $PGPNUM + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + # write a few objects + TESTDATA="testdata.1" + dd if=/dev/urandom of=$TESTDATA bs=4096 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + # create a snapshot, clones + SNAP=1 + rados -p $poolname mksnap snap${SNAP} + TESTDATA="testdata.2" + dd if=/dev/urandom of=$TESTDATA bs=4096 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + # remove the snapshot, should trigger snaptrim + rados -p $poolname rmsnap snap${SNAP} + + # check for snaptrim stats + wait_for_clean || return 1 + sleep $WAIT_FOR_UPDATE + local objects_trimmed=0 + local snaptrim_duration_total=0.0 + for i in $(seq 0 $(expr $PGNUM - 1)) + do + local pgid="${poolid}.${i}" + objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \ + jq '.info.stats.objects_trimmed')) + snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \ + $pgid query | jq '.info.stats.snaptrim_duration') | bc` + done + test $objects_trimmed -eq $objects || return 1 + echo "$snaptrim_duration_total > 0.0" | bc || return 1 + + teardown $dir || return 1 +} + +function TEST_snaptrim_stats_multiple_snaps() { + local dir=$1 + local poolname=test + local OSDS=3 + local PGNUM=8 + local PGPNUM=8 + local objects=10 + local WAIT_FOR_UPDATE=10 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off || return 1 + done + + # disable scrubs + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + + # Create a pool + create_pool $poolname $PGNUM $PGPNUM + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + # write a few objects + local TESTDATA="testdata.0" + dd if=/dev/urandom of=$TESTDATA bs=4096 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + # create snapshots, clones + NUMSNAPS=2 + for i in `seq 1 $NUMSNAPS` + do + rados -p $poolname mksnap snap${i} + TESTDATA="testdata".${i} + dd if=/dev/urandom of=$TESTDATA bs=4096 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + done + + # remove the snapshots, should trigger snaptrim + local total_objects_trimmed=0 + for i in `seq 1 $NUMSNAPS` + do + rados -p $poolname rmsnap snap${i} + + # check for snaptrim stats + wait_for_clean || return 1 + sleep $WAIT_FOR_UPDATE + local objects_trimmed=0 + local snaptrim_duration_total=0.0 + for i in $(seq 0 $(expr $PGNUM - 1)) + do + local pgid="${poolid}.${i}" + objects_trimmed=$(expr $objects_trimmed + $(ceph pg $pgid query | \ + jq '.info.stats.objects_trimmed')) + snaptrim_duration_total=`echo $snaptrim_duration_total + $(ceph pg \ + $pgid query | jq '.info.stats.snaptrim_duration') | bc` + done + test $objects_trimmed -eq $objects || return 1 + echo "$snaptrim_duration_total > 0.0" | bc || return 1 + total_objects_trimmed=$(expr $total_objects_trimmed + $objects_trimmed) + done + + test $total_objects_trimmed -eq $((objects * NUMSNAPS)) || return 1 + + teardown $dir || return 1 +} +main test-snaptrim-stats "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh test-snaptrim-stats.sh" +# End: diff --git a/qa/standalone/misc/ver-health.sh b/qa/standalone/misc/ver-health.sh new file mode 100755 index 000000000..e03f8f4f5 --- /dev/null +++ b/qa/standalone/misc/ver-health.sh @@ -0,0 +1,231 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2020 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON_A="127.0.0.1:7165" # git grep '\<7165\>' : there must be only one + export CEPH_MON_B="127.0.0.1:7166" # git grep '\<7166\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--mon_health_to_clog_tick_interval=1.0 " + export ORIG_CEPH_ARGS="$CEPH_ARGS" + + local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function wait_for_health_string() { + local grep_string=$1 + local seconds=${2:-20} + + # Allow mon to notice version difference + set -o pipefail + PASSED="false" + for ((i=0; i < $seconds; i++)); do + if ceph health | grep -q "$grep_string" + then + PASSED="true" + break + fi + sleep 1 + done + set +o pipefail + + # Make sure health changed + if [ $PASSED = "false" ]; + then + return 1 + fi + return 0 +} + + + +# Test a single OSD with an old version and multiple OSDs with 2 different old versions +function TEST_check_version_health_1() { + local dir=$1 + + # Asssume MON_A is leader? + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + # setup + setup $dir || return 1 + + # create a cluster with two monitors and three osds + run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + sleep 5 + ceph health detail + # should not see this yet + ceph health detail | grep DAEMON_OLD_VERSION && return 1 + + kill_daemons $dir KILL osd.1 + ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 1 + + wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1 + + ceph health detail + # Should notice that osd.1 is a different version + ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1 + ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1 + ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1 + + kill_daemons $dir KILL osd.2 + ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2 + kill_daemons $dir KILL osd.0 + ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0 + + wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + + ceph health detail + ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1 + ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1 +} + +# Test with 1 MON and 1 MDS with an older version, and add 2 OSDs with different versions +function TEST_check_version_health_2() { + local dir=$1 + + # Asssume MON_A is leader? + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + # setup + setup $dir || return 1 + + # create a cluster with all daemon types + run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0 || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0 || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_mgr $dir x || return 1 + run_mgr $dir y || return 1 + run_mds $dir m || return 1 + run_mds $dir n || return 1 + + sleep 5 + ceph health detail + # should not see this yet + ceph health detail | grep DAEMON_OLD_VERSION && return 1 + + kill_daemons $dir KILL mon.b + ceph_debug_version_for_testing=01.00.00-gversion-test run_mon $dir b --mon_warn_older_version_delay=0 + # XXX: Manager doesn't seem to use the test specific config for version + #kill_daemons $dir KILL mgr.x + #ceph_debug_version_for_testing=02.00.00-gversion-test run_mgr $dir x + kill_daemons $dir KILL mds.m + ceph_debug_version_for_testing=01.00.00-gversion-test run_mds $dir m + + wait_for_health_string "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1 + + ceph health detail + # Should notice that mon.b and mds.m is a different version + ceph health detail | grep -q "HEALTH_WARN .*There are daemons running an older version of ceph" || return 1 + ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There are daemons running an older version of ceph" || return 1 + ceph health detail | grep -q "mon.b mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1 + + kill_daemons $dir KILL osd.2 + ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2 + kill_daemons $dir KILL osd.0 + ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0 + + wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + + ceph health detail + ceph health | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "mon.b osd.2 mds.m are running an older version of ceph: 01.00.00-gversion-test" || return 1 + ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1 +} + +# Verify delay handling with same setup as test 1 +function TEST_check_version_health_3() { + local dir=$1 + + # Asssume MON_A is leader? + CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A " + # setup + setup $dir || return 1 + + # create a cluster with two monitors and three osds + run_mon $dir a --public-addr=$CEPH_MON_A || return 1 + run_mon $dir b --public-addr=$CEPH_MON_B || return 1 + + local start_osd_time=$SECONDS + # use memstore for faster bootup + EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 0 || return 1 + EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 1 || return 1 + EXTRA_OPTS=" --osd-objectstore=memstore" run_osd $dir 2 || return 1 + # take the time used for boot osds into consideration + local warn_older_version_delay=$(($SECONDS - $start_osd_time + 20)) + + sleep 5 + ceph health detail + # should not see this yet + ceph health detail | grep DAEMON_OLD_VERSION && return 1 + ceph tell 'mon.*' injectargs "--mon_warn_older_version_delay $warn_older_version_delay" + kill_daemons $dir KILL osd.1 + EXTRA_OPTS=" --osd-objectstore=memstore" \ + ceph_debug_version_for_testing=01.00.00-gversion-test \ + activate_osd $dir 1 + + # Wait 50% of 20 second delay config + sleep 10 + # should not see this yet + ceph health detail | grep DAEMON_OLD_VERSION && return 1 + + # Now make sure that at least 20 seconds have passed + wait_for_health_string "HEALTH_WARN .*There is a daemon running an older version of ceph" 20 || return 1 + + ceph health detail + # Should notice that osd.1 is a different version + ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1 + ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1 + ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1 + + kill_daemons $dir KILL osd.2 + ceph_debug_version_for_testing=01.00.00-gversion-test activate_osd $dir 2 + kill_daemons $dir KILL osd.0 + ceph_debug_version_for_testing=02.00.00-gversion-test activate_osd $dir 0 + + wait_for_health_string "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + + ceph health detail + ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1 + ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1 + ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1 +} + +main ver-health "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh ver-health.sh" +# End: diff --git a/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh b/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh new file mode 100755 index 000000000..276d26aab --- /dev/null +++ b/qa/standalone/mon-stretch/mon-stretch-fail-recovery.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +function run() { + local dir=$1 + shift + + export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one + export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one + export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one + export CEPH_MON_D="127.0.0.1:7143" # git grep '\<7143\>' : there must be only one + export CEPH_MON_E="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + + export BASE_CEPH_ARGS=$CEPH_ARGS + CEPH_ARGS+="--mon-host=$CEPH_MON_A" + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} +TEST_stretched_cluster_failover_add_three_osds(){ + local dir=$1 + local OSDS=8 + setup $dir || return 1 + + run_mon $dir a --public-addr $CEPH_MON_A || return 1 + wait_for_quorum 300 1 || return 1 + + run_mon $dir b --public-addr $CEPH_MON_B || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B" + wait_for_quorum 300 2 || return 1 + + run_mon $dir c --public-addr $CEPH_MON_C || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C" + wait_for_quorum 300 3 || return 1 + + run_mon $dir d --public-addr $CEPH_MON_D || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D" + wait_for_quorum 300 4 || return 1 + + run_mon $dir e --public-addr $CEPH_MON_E || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E" + wait_for_quorum 300 5 || return 1 + + ceph mon set election_strategy connectivity + ceph mon add disallowed_leader e + + run_mgr $dir x || return 1 + run_mgr $dir y || return 1 + run_mgr $dir z || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for zone in iris pze + do + ceph osd crush add-bucket $zone zone + ceph osd crush move $zone root=default + done + + + ceph osd crush add-bucket node-2 host + ceph osd crush add-bucket node-3 host + ceph osd crush add-bucket node-4 host + ceph osd crush add-bucket node-5 host + + ceph osd crush move node-2 zone=iris + ceph osd crush move node-3 zone=iris + ceph osd crush move node-4 zone=pze + ceph osd crush move node-5 zone=pze + + ceph osd crush move osd.0 host=node-2 + ceph osd crush move osd.1 host=node-2 + ceph osd crush move osd.2 host=node-3 + ceph osd crush move osd.3 host=node-3 + ceph osd crush move osd.4 host=node-4 + ceph osd crush move osd.5 host=node-4 + ceph osd crush move osd.6 host=node-5 + ceph osd crush move osd.7 host=node-5 + + ceph mon set_location a zone=iris host=node-2 + ceph mon set_location b zone=iris host=node-3 + ceph mon set_location c zone=pze host=node-4 + ceph mon set_location d zone=pze host=node-5 + + hostname=$(hostname -s) + ceph osd crush remove $hostname || return 1 + ceph osd getcrushmap > crushmap || return 1 + crushtool --decompile crushmap > crushmap.txt || return 1 + sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || return 1 + cat >> crushmap_modified.txt << EOF +rule stretch_rule { + id 1 + type replicated + min_size 1 + max_size 10 + step take iris + step chooseleaf firstn 2 type host + step emit + step take pze + step chooseleaf firstn 2 type host + step emit +} + +# end crush map +EOF + + crushtool --compile crushmap_modified.txt -o crushmap.bin || return 1 + ceph osd setcrushmap -i crushmap.bin || return 1 + local stretched_poolname=stretched_rbdpool + ceph osd pool create $stretched_poolname 32 32 stretch_rule || return 1 + ceph osd pool set $stretched_poolname size 4 || return 1 + + sleep 3 + + ceph mon set_location e zone=arbiter host=node-1 + ceph mon enable_stretch_mode e stretch_rule zone + + kill_daemons $dir KILL mon.c || return 1 + kill_daemons $dir KILL mon.d || return 1 + + kill_daemons $dir KILL osd.4 || return 1 + kill_daemons $dir KILL osd.5 || return 1 + kill_daemons $dir KILL osd.6 || return 1 + kill_daemons $dir KILL osd.7 || return 1 + + ceph -s + + sleep 3 + + run_osd $dir 8 || return 1 + run_osd $dir 9 || return 1 + run_osd $dir 10 || return 1 + + ceph -s + + sleep 3 + + teardown $dir || return 1 +} +main mon-stretch-fail-recovery "$@"
\ No newline at end of file diff --git a/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh b/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh new file mode 100755 index 000000000..7e13f4076 --- /dev/null +++ b/qa/standalone/mon-stretch/mon-stretch-uneven-crush-weights.sh @@ -0,0 +1,145 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +function run() { + local dir=$1 + shift + + export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one + export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one + export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one + export CEPH_MON_D="127.0.0.1:7143" # git grep '\<7143\>' : there must be only one + export CEPH_MON_E="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + + export BASE_CEPH_ARGS=$CEPH_ARGS + CEPH_ARGS+="--mon-host=$CEPH_MON_A" + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} +TEST_stretched_cluster_uneven_weight() { + local dir=$1 + local OSDS=4 + local weight=0.09000 + setup $dir || return 1 + + run_mon $dir a --public-addr $CEPH_MON_A || return 1 + wait_for_quorum 300 1 || return 1 + + run_mon $dir b --public-addr $CEPH_MON_B || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B" + wait_for_quorum 300 2 || return 1 + + run_mon $dir c --public-addr $CEPH_MON_C || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C" + wait_for_quorum 300 3 || return 1 + + run_mon $dir d --public-addr $CEPH_MON_D || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D" + wait_for_quorum 300 4 || return 1 + + run_mon $dir e --public-addr $CEPH_MON_E || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B,$CEPH_MON_C,$CEPH_MON_D,$CEPH_MON_E" + wait_for_quorum 300 5 || return 1 + + ceph mon set election_strategy connectivity + ceph mon add disallowed_leader e + + run_mgr $dir x || return 1 + run_mgr $dir y || return 1 + run_mgr $dir z || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for zone in iris pze + do + ceph osd crush add-bucket $zone zone + ceph osd crush move $zone root=default + done + + ceph osd crush add-bucket node-2 host + ceph osd crush add-bucket node-3 host + ceph osd crush add-bucket node-4 host + ceph osd crush add-bucket node-5 host + + ceph osd crush move node-2 zone=iris + ceph osd crush move node-3 zone=iris + ceph osd crush move node-4 zone=pze + ceph osd crush move node-5 zone=pze + + ceph osd crush move osd.0 host=node-2 + ceph osd crush move osd.1 host=node-3 + ceph osd crush move osd.2 host=node-4 + ceph osd crush move osd.3 host=node-5 + + ceph mon set_location a zone=iris host=node-2 + ceph mon set_location b zone=iris host=node-3 + ceph mon set_location c zone=pze host=node-4 + ceph mon set_location d zone=pze host=node-5 + + hostname=$(hostname -s) + ceph osd crush remove $hostname || return 1 + ceph osd getcrushmap > crushmap || return 1 + crushtool --decompile crushmap > crushmap.txt || return 1 + sed 's/^# end crush map$//' crushmap.txt > crushmap_modified.txt || return 1 + cat >> crushmap_modified.txt << EOF +rule stretch_rule { + id 1 + type replicated + min_size 1 + max_size 10 + step take iris + step chooseleaf firstn 2 type host + step emit + step take pze + step chooseleaf firstn 2 type host + step emit +} +# end crush map +EOF + + crushtool --compile crushmap_modified.txt -o crushmap.bin || return 1 + ceph osd setcrushmap -i crushmap.bin || return 1 + local stretched_poolname=stretched_rbdpool + ceph osd pool create $stretched_poolname 32 32 stretch_rule || return 1 + ceph osd pool set $stretched_poolname size 4 || return 1 + + ceph mon set_location e zone=arbiter host=node-1 || return 1 + ceph mon enable_stretch_mode e stretch_rule zone || return 1 # Enter strech mode + + # reweight to a more round decimal. + ceph osd crush reweight osd.0 $weight + ceph osd crush reweight osd.1 $weight + ceph osd crush reweight osd.2 $weight + ceph osd crush reweight osd.3 $weight + + # Firstly, we test for stretch mode buckets != 2 + ceph osd crush add-bucket sham zone || return 1 + ceph osd crush move sham root=default || return 1 + wait_for_health "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1 + + ceph osd crush rm sham # clear the health warn + wait_for_health_gone "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1 + + # Next, we test for uneven weights across buckets + + ceph osd crush reweight osd.0 0.07000 + + wait_for_health "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1 + + ceph osd crush reweight osd.0 $weight # clear the health warn + + wait_for_health_gone "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1 + + teardown $dir || return 1 +} +main mon-stretched-cluster-uneven-weight "$@"
\ No newline at end of file diff --git a/qa/standalone/mon/health-mute.sh b/qa/standalone/mon/health-mute.sh new file mode 100755 index 000000000..d8e07ca06 --- /dev/null +++ b/qa/standalone/mon/health-mute.sh @@ -0,0 +1,124 @@ +#!/bin/bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7143" # git grep '\<714\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none --mon-pg-warn-min-per-osd 0 --mon-max-pg-per-osd 1000 " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_mute() { + local dir=$1 + setup $dir || return 1 + + set -o pipefail + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create foo 8 + ceph osd pool application enable foo rbd --yes-i-really-mean-it + wait_for_clean || return 1 + + ceph -s + ceph health | grep HEALTH_OK || return 1 + # test warning on setting pool size=1 + ceph osd pool set foo size 1 --yes-i-really-mean-it + ceph -s + ceph health | grep HEALTH_WARN || return 1 + ceph health detail | grep POOL_NO_REDUNDANCY || return 1 + ceph health mute POOL_NO_REDUNDANCY + ceph -s + ceph health | grep HEALTH_OK | grep POOL_NO_REDUNDANCY || return 1 + ceph health unmute POOL_NO_REDUNDANCY + ceph -s + ceph health | grep HEALTH_WARN || return 1 + # restore pool size to default + ceph osd pool set foo size 3 + ceph -s + ceph health | grep HEALTH_OK || return 1 + ceph osd set noup + ceph -s + ceph health detail | grep OSDMAP_FLAGS || return 1 + ceph osd down 0 + ceph -s + ceph health detail | grep OSD_DOWN || return 1 + ceph health detail | grep HEALTH_WARN || return 1 + + ceph health mute OSD_DOWN + ceph health mute OSDMAP_FLAGS + ceph -s + ceph health | grep HEALTH_OK | grep OSD_DOWN | grep OSDMAP_FLAGS || return 1 + ceph health unmute OSD_DOWN + ceph -s + ceph health | grep HEALTH_WARN || return 1 + + # ttl + ceph health mute OSD_DOWN 10s + ceph -s + ceph health | grep HEALTH_OK || return 1 + sleep 15 + ceph -s + ceph health | grep HEALTH_WARN || return 1 + + # sticky + ceph health mute OSDMAP_FLAGS --sticky + ceph osd unset noup + sleep 5 + ceph -s + ceph health | grep OSDMAP_FLAGS || return 1 + ceph osd set noup + ceph -s + ceph health | grep HEALTH_OK || return 1 + + # rachet down on OSD_DOWN count + ceph osd down 0 1 + ceph -s + ceph health detail | grep OSD_DOWN || return 1 + + ceph health mute OSD_DOWN + kill_daemons $dir TERM osd.0 + ceph osd unset noup + sleep 10 + ceph -s + ceph health detail | grep OSD_DOWN || return 1 + ceph health detail | grep '1 osds down' || return 1 + ceph health | grep HEALTH_OK || return 1 + + sleep 10 # give time for mon tick to rachet the mute + ceph osd set noup + ceph health mute OSDMAP_FLAGS + ceph -s + ceph health detail + ceph health | grep HEALTH_OK || return 1 + + ceph osd down 1 + ceph -s + ceph health detail + ceph health detail | grep '2 osds down' || return 1 + + sleep 10 # give time for mute to clear + ceph -s + ceph health detail + ceph health | grep HEALTH_WARN || return 1 + ceph health detail | grep '2 osds down' || return 1 + + teardown $dir || return 1 +} + +main health-mute "$@" diff --git a/qa/standalone/mon/misc.sh b/qa/standalone/mon/misc.sh new file mode 100755 index 000000000..c7fc6d441 --- /dev/null +++ b/qa/standalone/mon/misc.sh @@ -0,0 +1,284 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7102" # git grep '\<7102\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +TEST_POOL=rbd + +function TEST_osd_pool_get_set() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a || return 1 + create_pool $TEST_POOL 8 + + local flag + for flag in nodelete nopgchange nosizechange write_fadvise_dontneed noscrub nodeep-scrub; do + ceph osd pool set $TEST_POOL $flag 0 || return 1 + ! ceph osd dump | grep 'pool ' | grep $flag || return 1 + ceph osd pool set $TEST_POOL $flag 1 || return 1 + ceph osd dump | grep 'pool ' | grep $flag || return 1 + ceph osd pool set $TEST_POOL $flag false || return 1 + ! ceph osd dump | grep 'pool ' | grep $flag || return 1 + ceph osd pool set $TEST_POOL $flag false || return 1 + # check that setting false twice does not toggle to true (bug) + ! ceph osd dump | grep 'pool ' | grep $flag || return 1 + ceph osd pool set $TEST_POOL $flag true || return 1 + ceph osd dump | grep 'pool ' | grep $flag || return 1 + # cleanup + ceph osd pool set $TEST_POOL $flag 0 || return 1 + done + + local size=$(ceph osd pool get $TEST_POOL size|awk '{print $2}') + local min_size=$(ceph osd pool get $TEST_POOL min_size|awk '{print $2}') + local expected_min_size=$(expr $size - $size / 2) + if [ $min_size -ne $expected_min_size ]; then + echo "default min_size is wrong: expected $expected_min_size, got $min_size" + return 1 + fi + + ceph osd pool set $TEST_POOL scrub_min_interval 123456 || return 1 + ceph osd dump | grep 'pool ' | grep 'scrub_min_interval 123456' || return 1 + ceph osd pool set $TEST_POOL scrub_min_interval 0 || return 1 + ceph osd dump | grep 'pool ' | grep 'scrub_min_interval' && return 1 + ceph osd pool set $TEST_POOL scrub_max_interval 123456 || return 1 + ceph osd dump | grep 'pool ' | grep 'scrub_max_interval 123456' || return 1 + ceph osd pool set $TEST_POOL scrub_max_interval 0 || return 1 + ceph osd dump | grep 'pool ' | grep 'scrub_max_interval' && return 1 + ceph osd pool set $TEST_POOL deep_scrub_interval 123456 || return 1 + ceph osd dump | grep 'pool ' | grep 'deep_scrub_interval 123456' || return 1 + ceph osd pool set $TEST_POOL deep_scrub_interval 0 || return 1 + ceph osd dump | grep 'pool ' | grep 'deep_scrub_interval' && return 1 + + #replicated pool size restrict in 1 and 10 + ! ceph osd pool set $TEST_POOL 11 || return 1 + #replicated pool min_size must be between in 1 and size + ! ceph osd pool set $TEST_POOL min_size $(expr $size + 1) || return 1 + ! ceph osd pool set $TEST_POOL min_size 0 || return 1 + + local ecpool=erasepool + create_pool $ecpool 12 12 erasure default || return 1 + #erasue pool size=k+m, min_size=k + local size=$(ceph osd pool get $ecpool size|awk '{print $2}') + local min_size=$(ceph osd pool get $ecpool min_size|awk '{print $2}') + local k=$(expr $min_size - 1) # default min_size=k+1 + #erasure pool size can't change + ! ceph osd pool set $ecpool size $(expr $size + 1) || return 1 + #erasure pool min_size must be between in k and size + ceph osd pool set $ecpool min_size $(expr $k + 1) || return 1 + ! ceph osd pool set $ecpool min_size $(expr $k - 1) || return 1 + ! ceph osd pool set $ecpool min_size $(expr $size + 1) || return 1 + + teardown $dir || return 1 +} + +function TEST_mon_add_to_single_mon() { + local dir=$1 + + fsid=$(uuidgen) + MONA=127.0.0.1:7117 # git grep '\<7117\>' : there must be only one + MONB=127.0.0.1:7118 # git grep '\<7118\>' : there must be only one + CEPH_ARGS_orig=$CEPH_ARGS + CEPH_ARGS="--fsid=$fsid --auth-supported=none " + CEPH_ARGS+="--mon-initial-members=a " + CEPH_ARGS+="--mon-host=$MONA " + + setup $dir || return 1 + run_mon $dir a --public-addr $MONA || return 1 + # wait for the quorum + timeout 120 ceph -s > /dev/null || return 1 + run_mon $dir b --public-addr $MONB || return 1 + teardown $dir || return 1 + + setup $dir || return 1 + run_mon $dir a --public-addr $MONA || return 1 + # without the fix of #5454, mon.a will assert failure at seeing the MMonJoin + # from mon.b + run_mon $dir b --public-addr $MONB || return 1 + # make sure mon.b get's it's join request in first, then + sleep 2 + # wait for the quorum + timeout 120 ceph -s > /dev/null || return 1 + ceph mon dump + ceph mon dump -f json-pretty + local num_mons + num_mons=$(ceph mon dump --format=json 2>/dev/null | jq ".mons | length") || return 1 + [ $num_mons == 2 ] || return 1 + # no reason to take more than 120 secs to get this submitted + timeout 120 ceph mon add b $MONB || return 1 + teardown $dir || return 1 +} + +function TEST_no_segfault_for_bad_keyring() { + local dir=$1 + setup $dir || return 1 + # create a client.admin key and add it to ceph.mon.keyring + ceph-authtool --create-keyring $dir/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *' + ceph-authtool --create-keyring $dir/ceph.client.admin.keyring --gen-key -n client.admin --cap mon 'allow *' + ceph-authtool $dir/ceph.mon.keyring --import-keyring $dir/ceph.client.admin.keyring + CEPH_ARGS_TMP="--fsid=$(uuidgen) --mon-host=127.0.0.1:7102 --auth-supported=cephx " + CEPH_ARGS_orig=$CEPH_ARGS + CEPH_ARGS="$CEPH_ARGS_TMP --keyring=$dir/ceph.mon.keyring " + run_mon $dir a + # create a bad keyring and make sure no segfault occurs when using the bad keyring + echo -e "[client.admin]\nkey = BQAUlgtWoFePIxAAQ9YLzJSVgJX5V1lh5gyctg==" > $dir/bad.keyring + CEPH_ARGS="$CEPH_ARGS_TMP --keyring=$dir/bad.keyring" + ceph osd dump 2> /dev/null + # 139(11|128) means segfault and core dumped + [ $? -eq 139 ] && return 1 + CEPH_ARGS=$CEPH_ARGS_orig + teardown $dir || return 1 +} + +function TEST_mon_features() { + local dir=$1 + setup $dir || return 1 + + fsid=$(uuidgen) + MONA=127.0.0.1:7127 # git grep '\<7127\>' ; there must be only one + MONB=127.0.0.1:7128 # git grep '\<7128\>' ; there must be only one + MONC=127.0.0.1:7129 # git grep '\<7129\>' ; there must be only one + CEPH_ARGS_orig=$CEPH_ARGS + CEPH_ARGS="--fsid=$fsid --auth-supported=none " + CEPH_ARGS+="--mon-host=$MONA,$MONB,$MONC " + CEPH_ARGS+="--mon-debug-no-initial-persistent-features " + CEPH_ARGS+="--mon-debug-no-require-reef " + + run_mon $dir a --public-addr $MONA || return 1 + run_mon $dir b --public-addr $MONB || return 1 + timeout 120 ceph -s > /dev/null || return 1 + + # expect monmap to contain 3 monitors (a, b, and c) + jqinput="$(ceph quorum_status --format=json 2>/dev/null)" + jq_success "$jqinput" '.monmap.mons | length == 3' || return 1 + # quorum contains two monitors + jq_success "$jqinput" '.quorum | length == 2' || return 1 + # quorum's monitor features contain kraken, luminous, mimic, nautilus, + # octopus, pacific, quincy + jqfilter='.features.quorum_mon[]|select(. == "kraken")' + jq_success "$jqinput" "$jqfilter" "kraken" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "luminous")' + jq_success "$jqinput" "$jqfilter" "luminous" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "mimic")' + jq_success "$jqinput" "$jqfilter" "mimic" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "nautilus")' + jq_success "$jqinput" "$jqfilter" "nautilus" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "octopus")' + jq_success "$jqinput" "$jqfilter" "octopus" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "pacific")' + jq_success "$jqinput" "$jqfilter" "pacific" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "quincy")' + jq_success "$jqinput" "$jqfilter" "quincy" || return 1 + jqfilter='.features.quorum_mon[]|select(. == "reef")' + jq_success "$jqinput" "$jqfilter" "reef" || return 1 + + # monmap must have no persistent features set, because we + # don't currently have a quorum made out of all the monitors + # in the monmap. + jqfilter='.monmap.features.persistent | length == 0' + jq_success "$jqinput" "$jqfilter" || return 1 + + # nor do we have any optional features, for that matter. + jqfilter='.monmap.features.optional | length == 0' + jq_success "$jqinput" "$jqfilter" || return 1 + + # validate 'mon feature ls' + + jqinput="$(ceph mon feature ls --format=json 2>/dev/null)" + # k l m n o p q are supported + jqfilter='.all.supported[] | select(. == "kraken")' + jq_success "$jqinput" "$jqfilter" "kraken" || return 1 + jqfilter='.all.supported[] | select(. == "luminous")' + jq_success "$jqinput" "$jqfilter" "luminous" || return 1 + jqfilter='.all.supported[] | select(. == "mimic")' + jq_success "$jqinput" "$jqfilter" "mimic" || return 1 + jqfilter='.all.supported[] | select(. == "nautilus")' + jq_success "$jqinput" "$jqfilter" "nautilus" || return 1 + jqfilter='.all.supported[] | select(. == "octopus")' + jq_success "$jqinput" "$jqfilter" "octopus" || return 1 + jqfilter='.all.supported[] | select(. == "pacific")' + jq_success "$jqinput" "$jqfilter" "pacific" || return 1 + jqfilter='.all.supported[] | select(. == "quincy")' + jq_success "$jqinput" "$jqfilter" "quincy" || return 1 + jqfilter='.all.supported[] | select(. == "reef")' + jq_success "$jqinput" "$jqfilter" "reef" || return 1 + + # start third monitor + run_mon $dir c --public-addr $MONC || return 1 + + wait_for_quorum 300 3 || return 1 + + timeout 300 ceph -s > /dev/null || return 1 + + jqinput="$(ceph quorum_status --format=json 2>/dev/null)" + # expect quorum to have all three monitors + jqfilter='.quorum | length == 3' + jq_success "$jqinput" "$jqfilter" || return 1 + + # quorum's monitor features should have p now too + jqfilter='.features.quorum_mon[]|select(. == "pacific")' + jq_success "$jqinput" "$jqfilter" "pacific" || return 1 + + # persistent too + jqfilter='.monmap.features.persistent[]|select(. == "kraken")' + jq_success "$jqinput" "$jqfilter" "kraken" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "luminous")' + jq_success "$jqinput" "$jqfilter" "luminous" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "mimic")' + jq_success "$jqinput" "$jqfilter" "mimic" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "osdmap-prune")' + jq_success "$jqinput" "$jqfilter" "osdmap-prune" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "nautilus")' + jq_success "$jqinput" "$jqfilter" "nautilus" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "octopus")' + jq_success "$jqinput" "$jqfilter" "octopus" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "pacific")' + jq_success "$jqinput" "$jqfilter" "pacific" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "elector-pinging")' + jq_success "$jqinput" "$jqfilter" "elector-pinging" || return 1 + jqfilter='.monmap.features.persistent | length == 10' + jq_success "$jqinput" "$jqfilter" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "quincy")' + jq_success "$jqinput" "$jqfilter" "quincy" || return 1 + jqfilter='.monmap.features.persistent[]|select(. == "reef")' + jq_success "$jqinput" "$jqfilter" "reef" || return 1 + + CEPH_ARGS=$CEPH_ARGS_orig + # that's all folks. thank you for tuning in. + teardown $dir || return 1 +} + +main misc "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/misc.sh" +# End: diff --git a/qa/standalone/mon/mkfs.sh b/qa/standalone/mon/mkfs.sh new file mode 100755 index 000000000..6650bdb49 --- /dev/null +++ b/qa/standalone/mon/mkfs.sh @@ -0,0 +1,193 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +set -xe +PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' + + +DIR=mkfs +export CEPH_CONF=/dev/null +unset CEPH_ARGS +MON_ID=a +MON_DIR=$DIR/$MON_ID +CEPH_MON=127.0.0.1:7110 # git grep '\<7110\>' : there must be only one +TIMEOUT=360 + +EXTRAOPTS="" + +function setup() { + teardown + mkdir $DIR +} + +function teardown() { + kill_daemons + rm -fr $DIR +} + +function mon_mkfs() { + local fsid=$(uuidgen) + + ceph-mon \ + --id $MON_ID \ + --fsid $fsid \ + $EXTRAOPTS \ + --mkfs \ + --mon-data=$MON_DIR \ + --mon-initial-members=$MON_ID \ + --mon-host=$CEPH_MON \ + "$@" +} + +function mon_run() { + ceph-mon \ + --id $MON_ID \ + --chdir= \ + --mon-osd-full-ratio=.99 \ + --mon-data-avail-crit=1 \ + $EXTRAOPTS \ + --mon-data=$MON_DIR \ + --log-file=$MON_DIR/log \ + --mon-cluster-log-file=$MON_DIR/log \ + --run-dir=$MON_DIR \ + --pid-file=$MON_DIR/pidfile \ + --public-addr $CEPH_MON \ + "$@" +} + +function kill_daemons() { + for pidfile in $(find $DIR -name pidfile) ; do + pid=$(cat $pidfile) + for try in 0 1 1 1 2 3 ; do + kill $pid || break + sleep $try + done + done +} + +function auth_none() { + mon_mkfs --auth-supported=none + + ceph-mon \ + --id $MON_ID \ + --mon-osd-full-ratio=.99 \ + --mon-data-avail-crit=1 \ + $EXTRAOPTS \ + --mon-data=$MON_DIR \ + --extract-monmap $MON_DIR/monmap + + [ -f $MON_DIR/monmap ] || return 1 + + [ ! -f $MON_DIR/keyring ] || return 1 + + mon_run --auth-supported=none + + timeout $TIMEOUT ceph --mon-host $CEPH_MON mon stat || return 1 +} + +function auth_cephx_keyring() { + cat > $DIR/keyring <<EOF +[mon.] + key = AQDUS79S0AF9FRAA2cgRLFscVce0gROn/s9WMg== + caps mon = "allow *" +EOF + + mon_mkfs --keyring=$DIR/keyring + + [ -f $MON_DIR/keyring ] || return 1 + + mon_run + + timeout $TIMEOUT ceph \ + --name mon. \ + --keyring $MON_DIR/keyring \ + --mon-host $CEPH_MON mon stat || return 1 +} + +function auth_cephx_key() { + if [ -f /etc/ceph/keyring ] ; then + echo "Please move /etc/ceph/keyring away for testing!" + return 1 + fi + + local key=$(ceph-authtool --gen-print-key) + + if mon_mkfs --key='corrupted key' ; then + return 1 + else + rm -fr $MON_DIR/store.db + rm -fr $MON_DIR/kv_backend + fi + + mon_mkfs --key=$key + + [ -f $MON_DIR/keyring ] || return 1 + grep $key $MON_DIR/keyring + + mon_run + + timeout $TIMEOUT ceph \ + --name mon. \ + --keyring $MON_DIR/keyring \ + --mon-host $CEPH_MON mon stat || return 1 +} + +function makedir() { + local toodeep=$MON_DIR/toodeep + + # fail if recursive directory creation is needed + ceph-mon \ + --id $MON_ID \ + --mon-osd-full-ratio=.99 \ + --mon-data-avail-crit=1 \ + $EXTRAOPTS \ + --mkfs \ + --mon-data=$toodeep 2>&1 | tee $DIR/makedir.log + grep 'toodeep.*No such file' $DIR/makedir.log > /dev/null + rm $DIR/makedir.log + + # an empty directory does not mean the mon exists + mkdir $MON_DIR + mon_mkfs --auth-supported=none 2>&1 | tee $DIR/makedir.log + ! grep "$MON_DIR already exists" $DIR/makedir.log || return 1 +} + +function idempotent() { + mon_mkfs --auth-supported=none + mon_mkfs --auth-supported=none 2>&1 | tee $DIR/makedir.log + grep "'$MON_DIR' already exists" $DIR/makedir.log > /dev/null || return 1 +} + +function run() { + local actions + actions+="makedir " + actions+="idempotent " + actions+="auth_cephx_key " + actions+="auth_cephx_keyring " + actions+="auth_none " + for action in $actions ; do + setup + $action || return 1 + teardown + done +} + +run + +# Local Variables: +# compile-command: "cd ../.. ; make TESTS=test/mon/mkfs.sh check" +# End: diff --git a/qa/standalone/mon/mon-bind.sh b/qa/standalone/mon/mon-bind.sh new file mode 100755 index 000000000..41982b916 --- /dev/null +++ b/qa/standalone/mon/mon-bind.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Quantum Corp. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +SOCAT_PIDS=() + +function port_forward() { + local source_port=$1 + local target_port=$2 + + socat TCP-LISTEN:${source_port},fork,reuseaddr TCP:localhost:${target_port} & + SOCAT_PIDS+=( $! ) +} + +function cleanup() { + for p in "${SOCAT_PIDS[@]}"; do + kill $p + done + SOCAT_PIDS=() +} + +trap cleanup SIGTERM SIGKILL SIGQUIT SIGINT + +function run() { + local dir=$1 + shift + + export MON_IP=127.0.0.1 + export MONA_PUBLIC=7132 # git grep '\<7132\>' ; there must be only one + export MONB_PUBLIC=7133 # git grep '\<7133\>' ; there must be only one + export MONC_PUBLIC=7134 # git grep '\<7134\>' ; there must be only one + export MONA_BIND=7135 # git grep '\<7135\>' ; there must be only one + export MONB_BIND=7136 # git grep '\<7136\>' ; there must be only one + export MONC_BIND=7137 # git grep '\<7137\>' ; there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir && cleanup || { cleanup; return 1; } + teardown $dir + done +} + +function TEST_mon_client_connect_fails() { + local dir=$1 + + # start the mon with a public-bind-addr that is different + # from the public-addr. + CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC} " + run_mon $dir a --mon-host=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1 + + # now attempt to ping it that should fail. + timeout 3 ceph ping mon.a || return 0 + return 1 +} + +function TEST_mon_client_connect() { + local dir=$1 + + # start the mon with a public-bind-addr that is different + # from the public-addr. + CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC} " + run_mon $dir a --mon-host=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1 + + # now forward the public port to the bind port. + port_forward ${MONA_PUBLIC} ${MONA_BIND} + + # attempt to connect. we expect that to work + ceph ping mon.a || return 1 +} + +function TEST_mon_quorum() { + local dir=$1 + + # start the mon with a public-bind-addr that is different + # from the public-addr. + CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC},${MON_IP}:${MONB_PUBLIC},${MON_IP}:${MONC_PUBLIC} " + run_mon $dir a --public-addr=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1 + run_mon $dir b --public-addr=${MON_IP}:${MONB_PUBLIC} --public-bind-addr=${MON_IP}:${MONB_BIND} || return 1 + run_mon $dir c --public-addr=${MON_IP}:${MONC_PUBLIC} --public-bind-addr=${MON_IP}:${MONC_BIND} || return 1 + + # now forward the public port to the bind port. + port_forward ${MONA_PUBLIC} ${MONA_BIND} + port_forward ${MONB_PUBLIC} ${MONB_BIND} + port_forward ${MONC_PUBLIC} ${MONC_BIND} + + # expect monmap to contain 3 monitors (a, b, and c) + jqinput="$(ceph quorum_status --format=json 2>/dev/null)" + jq_success "$jqinput" '.monmap.mons | length == 3' || return 1 + + # quorum should form + wait_for_quorum 300 3 || return 1 + # expect quorum to have all three monitors + jqfilter='.quorum | length == 3' + jq_success "$jqinput" "$jqfilter" || return 1 +} + +function TEST_put_get() { + local dir=$1 + + # start the mon with a public-bind-addr that is different + # from the public-addr. + CEPH_ARGS+="--mon-host=${MON_IP}:${MONA_PUBLIC},${MON_IP}:${MONB_PUBLIC},${MON_IP}:${MONC_PUBLIC} " + run_mon $dir a --public-addr=${MON_IP}:${MONA_PUBLIC} --public-bind-addr=${MON_IP}:${MONA_BIND} || return 1 + run_mon $dir b --public-addr=${MON_IP}:${MONB_PUBLIC} --public-bind-addr=${MON_IP}:${MONB_BIND} || return 1 + run_mon $dir c --public-addr=${MON_IP}:${MONC_PUBLIC} --public-bind-addr=${MON_IP}:${MONC_BIND} || return 1 + + # now forward the public port to the bind port. + port_forward ${MONA_PUBLIC} ${MONA_BIND} + port_forward ${MONB_PUBLIC} ${MONB_BIND} + port_forward ${MONC_PUBLIC} ${MONC_BIND} + + # quorum should form + wait_for_quorum 300 3 || return 1 + + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + create_pool hello 8 || return 1 + + echo "hello world" > $dir/hello + rados --pool hello put foo $dir/hello || return 1 + rados --pool hello get foo $dir/hello2 || return 1 + diff $dir/hello $dir/hello2 || return 1 +} + +main mon-bind "$@" diff --git a/qa/standalone/mon/mon-created-time.sh b/qa/standalone/mon/mon-created-time.sh new file mode 100755 index 000000000..4b8446059 --- /dev/null +++ b/qa/standalone/mon/mon-created-time.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 SUSE LINUX GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7125" # git grep '\<7125\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_mon_created_time() { + local dir=$1 + + run_mon $dir a || return 1 + + ceph mon dump || return 1 + + if test "$(ceph mon dump 2>/dev/null | sed -n '/created/p' | awk '{print $NF}')"x = ""x ; then + return 1 + fi + + if test "$(ceph mon dump 2>/dev/null | sed -n '/created/p' | awk '{print $NF}')"x = "0.000000"x ; then + return 1 + fi +} + +main mon-created-time "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/mon-created-time.sh" +# End: diff --git a/qa/standalone/mon/mon-handle-forward.sh b/qa/standalone/mon/mon-handle-forward.sh new file mode 100755 index 000000000..01c8f130f --- /dev/null +++ b/qa/standalone/mon/mon-handle-forward.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014,2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + + setup $dir || return 1 + + MONA=127.0.0.1:7300 + MONB=127.0.0.1:7301 + ( + FSID=$(uuidgen) + export CEPH_ARGS + CEPH_ARGS+="--fsid=$FSID --auth-supported=none " + CEPH_ARGS+="--mon-host=$MONA,$MONB " + run_mon $dir a --public-addr $MONA || return 1 + run_mon $dir b --public-addr $MONB || return 1 + ) + + timeout 360 ceph --mon-host-override $MONA mon stat || return 1 + # check that MONB is indeed a peon + ceph --admin-daemon $(get_asok_path mon.b) mon_status | + grep '"peon"' || return 1 + # when the leader ( MONA ) is used, there is no message forwarding + ceph --mon-host-override $MONA osd pool create POOL1 12 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep 'mon_command(.*"POOL1"' $dir/mon.a.log || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.b) log flush || return 1 + grep 'mon_command(.*"POOL1"' $dir/mon.b.log && return 1 + # when the peon ( MONB ) is used, the message is forwarded to the leader + ceph --mon-host-override $MONB osd pool create POOL2 12 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.b) log flush || return 1 + grep 'forward_request.*mon_command(.*"POOL2"' $dir/mon.b.log || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep ' forward(mon_command(.*"POOL2"' $dir/mon.a.log || return 1 + # forwarded messages must retain features from the original connection + features=$(sed -n -e 's|.*127.0.0.1:0.*accept features \([0-9][0-9]*\)|\1|p' < \ + $dir/mon.b.log) + grep ' forward(mon_command(.*"POOL2".*con_features '$features $dir/mon.a.log || return 1 + + teardown $dir || return 1 +} + +main mon-handle-forward "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 TESTS=test/mon/mon-handle-forward.sh check" +# End: diff --git a/qa/standalone/mon/mon-last-epoch-clean.sh b/qa/standalone/mon/mon-last-epoch-clean.sh new file mode 100755 index 000000000..82243103e --- /dev/null +++ b/qa/standalone/mon/mon-last-epoch-clean.sh @@ -0,0 +1,307 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7302" # git grep '\<7105\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function check_lec_equals_pools() { + + local pool_id=$1 + + report=$(ceph report) + lec=$(echo $report | \ + jq '.osdmap_clean_epochs.min_last_epoch_clean') + + if [[ -z "$pool_id" ]]; then + pools=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \ + " select(.floor == $lec) | .poolid")) + + [[ ${#pools[*]} -eq 2 ]] || ( echo $report ; return 1 ) + else + floor=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \ + " select(.poolid == $pool_id) | .floor")) + + [[ $lec -eq $floor ]] || ( echo $report ; return 1 ) + fi + return 0 +} + +function check_lec_lower_than_pool() { + + local pool_id=$1 + [[ -z "$pool_id" ]] && ( echo "expected pool_id as parameter" ; exit 1 ) + + report=$(ceph report) + lec=$(echo $report | \ + jq '.osdmap_clean_epochs.min_last_epoch_clean') + + floor=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \ + " select(.poolid == $pool_id) | .floor")) + + [[ $lec -lt $floor ]] || ( echo $report ; return 1 ) + return 0 +} + +function check_floor_pool_greater_than_pool() { + + local pool_a=$1 + local pool_b=$1 + [[ -z "$pool_a" ]] && ( echo "expected id as first parameter" ; exit 1 ) + [[ -z "$pool_b" ]] && ( echo "expected id as second parameter" ; exit 1 ) + + report=$(ceph report) + + floor_a=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \ + " select(.poolid == $pool_a) | .floor")) + + floor_b=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.last_epoch_clean.per_pool[] |" \ + " select(.poolid == $pool_b) | .floor")) + + [[ $floor_a -gt $floor_b ]] || ( echo $report ; return 1 ) + return 0 +} + +function check_lec_honours_osd() { + + local osd=$1 + + report=$(ceph report) + lec=$(echo $report | \ + jq '.osdmap_clean_epochs.min_last_epoch_clean') + + if [[ -z "$osd" ]]; then + osds=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.osd_epochs[] |" \ + " select(.epoch >= $lec) | .id")) + + [[ ${#osds[*]} -eq 3 ]] || ( echo $report ; return 1 ) + else + epoch=($(echo $report | \ + jq \ + ".osdmap_clean_epochs.osd_epochs[] |" \ + " select(.id == $id) | .epoch")) + [[ ${#epoch[*]} -eq 1 ]] || ( echo $report ; return 1 ) + [[ ${epoch[0]} -ge $lec ]] || ( echo $report ; return 1 ) + fi + + return 0 +} + +function validate_fc() { + report=$(ceph report) + lec=$(echo $report | \ + jq '.osdmap_clean_epochs.min_last_epoch_clean') + osdm_fc=$(echo $report | \ + jq '.osdmap_first_committed') + + [[ $lec -eq $osdm_fc ]] || ( echo $report ; return 1 ) + return 0 +} + +function get_fc_lc_diff() { + report=$(ceph report) + osdm_fc=$(echo $report | \ + jq '.osdmap_first_committed') + osdm_lc=$(echo $report | \ + jq '.osdmap_last_committed') + + echo $((osdm_lc - osdm_fc)) +} + +function get_pool_id() { + + local pn=$1 + [[ -z "$pn" ]] && ( echo "expected pool name as argument" ; exit 1 ) + + report=$(ceph report) + pool_id=$(echo $report | \ + jq ".osdmap.pools[] | select(.pool_name == \"$pn\") | .pool") + + [[ $pool_id -ge 0 ]] || \ + ( echo "unexpected pool id for pool \'$pn\': $pool_id" ; return -1 ) + + echo $pool_id + return 0 +} + +function wait_for_total_num_maps() { + # rip wait_for_health, becaue it's easier than deduplicating the code + local -a delays=($(get_timeout_delays $TIMEOUT .1)) + local -i loop=0 + local -i v_diff=$1 + + while [[ $(get_fc_lc_diff) -gt $v_diff ]]; do + if (( $loop >= ${#delays[*]} )) ; then + echo "maps were not trimmed" + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done +} + +function TEST_mon_last_clean_epoch() { + + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x --mon-warn-on-pool-no-app=false || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + osd_pid=$(cat $dir/osd.2.pid) + + sleep 5 + + ceph tell 'osd.*' injectargs '--osd-beacon-report-interval 10' || exit 1 + ceph tell 'mon.*' injectargs \ + '--mon-min-osdmap-epochs 2 --paxos-service-trim-min 1' || exit 1 + + create_pool foo 32 + create_pool bar 32 + + foo_id=$(get_pool_id "foo") + bar_id=$(get_pool_id "bar") + + [[ $foo_id -lt 0 ]] && ( echo "couldn't find pool 'foo' id" ; exit 1 ) + [[ $bar_id -lt 0 ]] && ( echo "couldn't find pool 'bar' id" ; exit 1 ) + + # no real clue why we are getting these warnings, but let's make them go + # away so we can be happy. + + ceph osd set-full-ratio 0.97 + ceph osd set-backfillfull-ratio 0.97 + + wait_for_health_ok || exit 1 + + pre_map_diff=$(get_fc_lc_diff) + wait_for_total_num_maps 2 + post_map_diff=$(get_fc_lc_diff) + + [[ $post_map_diff -le $pre_map_diff ]] || exit 1 + + pre_map_diff=$post_map_diff + + ceph osd pool set foo size 3 + ceph osd pool set bar size 3 + + wait_for_health_ok || exit 1 + + check_lec_equals_pools || exit 1 + check_lec_honours_osd || exit 1 + validate_fc || exit 1 + + # down osd.2; expected result (because all pools' size equals 3): + # - number of committed maps increase over 2 + # - lec equals fc + # - lec equals osd.2's epoch + # - all pools have floor equal to lec + + while kill $osd_pid ; do sleep 1 ; done + ceph osd out 2 + sleep 5 # seriously, just to make sure things settle; we may not need this. + + # generate some maps + for ((i=0; i <= 10; ++i)); do + ceph osd set noup + sleep 1 + ceph osd unset noup + sleep 1 + done + + post_map_diff=$(get_fc_lc_diff) + [[ $post_map_diff -gt 2 ]] || exit 1 + + validate_fc || exit 1 + check_lec_equals_pools || exit 1 + check_lec_honours_osd 2 || exit 1 + + # adjust pool 'bar' size to 2; expect: + # - number of committed maps still over 2 + # - lec equals fc + # - lec equals pool 'foo' floor + # - pool 'bar' floor greater than pool 'foo' + + ceph osd pool set bar size 2 + + diff_ver=$(get_fc_lc_diff) + [[ $diff_ver -gt 2 ]] || exit 1 + + validate_fc || exit 1 + + check_lec_equals_pools $foo_id || exit 1 + check_lec_lower_than_pool $bar_id || exit 1 + + check_floor_pool_greater_than_pool $bar_id $foo_id || exit 1 + + # set pool 'foo' size to 2; expect: + # - health_ok + # - lec equals pools + # - number of committed maps decreases + # - lec equals fc + + pre_map_diff=$(get_fc_lc_diff) + + ceph osd pool set foo size 2 || exit 1 + wait_for_clean || exit 1 + + check_lec_equals_pools || exit 1 + validate_fc || exit 1 + + if ! wait_for_total_num_maps 2 ; then + post_map_diff=$(get_fc_lc_diff) + # number of maps is decreasing though, right? + [[ $post_map_diff -lt $pre_map_diff ]] || exit 1 + fi + + # bring back osd.2; expect: + # - health_ok + # - lec equals fc + # - number of committed maps equals 2 + # - all pools have floor equal to lec + + pre_map_diff=$(get_fc_lc_diff) + + activate_osd $dir 2 || exit 1 + wait_for_health_ok || exit 1 + validate_fc || exit 1 + check_lec_equals_pools || exit 1 + + if ! wait_for_total_num_maps 2 ; then + post_map_diff=$(get_fc_lc_diff) + # number of maps is decreasing though, right? + [[ $post_map_diff -lt $pre_map_diff ]] || exit 1 + fi + + return 0 +} + +main mon-last-clean-epoch "$@" diff --git a/qa/standalone/mon/mon-osdmap-prune.sh b/qa/standalone/mon/mon-osdmap-prune.sh new file mode 100755 index 000000000..f8f7876bb --- /dev/null +++ b/qa/standalone/mon/mon-osdmap-prune.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +base_test=$CEPH_ROOT/qa/workunits/mon/test_mon_osdmap_prune.sh + +function run() { + + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7115" + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none --mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_osdmap_prune() { + + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + sleep 5 + + # we are getting OSD_OUT_OF_ORDER_FULL health errors, and it's not clear + # why. so, to make the health checks happy, mask those errors. + ceph osd set-full-ratio 0.97 + ceph osd set-backfillfull-ratio 0.97 + + ceph config set osd osd_beacon_report_interval 10 || return 1 + ceph config set mon mon_debug_extra_checks true || return 1 + + ceph config set mon mon_min_osdmap_epochs 100 || return 1 + ceph config set mon mon_osdmap_full_prune_enabled true || return 1 + ceph config set mon mon_osdmap_full_prune_min 200 || return 1 + ceph config set mon mon_osdmap_full_prune_interval 10 || return 1 + ceph config set mon mon_osdmap_full_prune_txsize 100 || return 1 + + + bash -x $base_test || return 1 + + return 0 +} + +main mon-osdmap-prune "$@" + diff --git a/qa/standalone/mon/mon-ping.sh b/qa/standalone/mon/mon-ping.sh new file mode 100755 index 000000000..1f5096be1 --- /dev/null +++ b/qa/standalone/mon/mon-ping.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 SUSE LINUX GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7119" # git grep '\<7119\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_mon_ping() { + local dir=$1 + + run_mon $dir a || return 1 + + ceph ping mon.a || return 1 +} + +main mon-ping "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/mon-ping.sh" +# End: diff --git a/qa/standalone/mon/mon-scrub.sh b/qa/standalone/mon/mon-scrub.sh new file mode 100755 index 000000000..158bd434c --- /dev/null +++ b/qa/standalone/mon/mon-scrub.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7120" # git grep '\<7120\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_mon_scrub() { + local dir=$1 + + run_mon $dir a || return 1 + + ceph mon scrub || return 1 +} + +main mon-scrub "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/mon-scrub.sh" +# End: diff --git a/qa/standalone/mon/mon-seesaw.sh b/qa/standalone/mon/mon-seesaw.sh new file mode 100755 index 000000000..1c97847b9 --- /dev/null +++ b/qa/standalone/mon/mon-seesaw.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON_A="127.0.0.1:7139" # git grep '\<7139\>' : there must be only one + export CEPH_MON_B="127.0.0.1:7141" # git grep '\<7141\>' : there must be only one + export CEPH_MON_C="127.0.0.1:7142" # git grep '\<7142\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + + export BASE_CEPH_ARGS=$CEPH_ARGS + CEPH_ARGS+="--mon-host=$CEPH_MON_A " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_mon_seesaw() { + local dir=$1 + + setup $dir || return + + # start with 1 mon + run_mon $dir aa --public-addr $CEPH_MON_A || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + wait_for_quorum 300 1 || return 1 + + # add in a second + run_mon $dir bb --public-addr $CEPH_MON_B || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_A,$CEPH_MON_B" + wait_for_quorum 300 2 || return 1 + + # remove the first one + ceph mon rm aa || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_B" + sleep 5 + wait_for_quorum 300 1 || return 1 + + # do some stuff that requires the osds be able to communicate with the + # mons. (see http://tracker.ceph.com/issues/17558) + ceph osd pool create foo 8 + rados -p foo bench 1 write + wait_for_clean || return 1 + + # nuke monstore so that it will rejoin (otherwise we get + # "not in monmap and have been in a quorum before; must have been removed" + rm -rf $dir/aa + + # add a back in + # (use a different addr to avoid bind issues) + run_mon $dir aa --public-addr $CEPH_MON_C || return 1 + CEPH_ARGS="$BASE_CEPH_ARGS --mon-host=$CEPH_MON_C,$CEPH_MON_B" + wait_for_quorum 300 2 || return 1 +} + +main mon-seesaw "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/mon-ping.sh" +# End: diff --git a/qa/standalone/mon/osd-crush.sh b/qa/standalone/mon/osd-crush.sh new file mode 100755 index 000000000..aa7cac694 --- /dev/null +++ b/qa/standalone/mon/osd-crush.sh @@ -0,0 +1,196 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7104" # git grep '\<7104\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | ${SED} -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_crush_rule_create_simple() { + local dir=$1 + + run_mon $dir a || return 1 + + ceph --format xml osd crush rule dump replicated_rule | \ + egrep '<op>take</op><item>[^<]+</item><item_name>default</item_name>' | \ + grep '<op>choose_firstn</op><num>0</num><type>osd</type>' || return 1 + local rule=rule0 + local root=host1 + ceph osd crush add-bucket $root host + local failure_domain=osd + ceph osd crush rule create-simple $rule $root $failure_domain || return 1 + ceph osd crush rule create-simple $rule $root $failure_domain 2>&1 | \ + grep "$rule already exists" || return 1 + ceph --format xml osd crush rule dump $rule | \ + egrep '<op>take</op><item>[^<]+</item><item_name>'$root'</item_name>' | \ + grep '<op>choose_firstn</op><num>0</num><type>'$failure_domain'</type>' || return 1 + ceph osd crush rule rm $rule || return 1 +} + +function TEST_crush_rule_dump() { + local dir=$1 + + run_mon $dir a || return 1 + + local rule=rule1 + ceph osd crush rule create-erasure $rule || return 1 + test $(ceph --format json osd crush rule dump $rule | \ + jq ".rule_name == \"$rule\"") == true || return 1 + test $(ceph --format json osd crush rule dump | \ + jq "map(select(.rule_name == \"$rule\")) | length == 1") == true || return 1 + ! ceph osd crush rule dump non_existent_rule || return 1 + ceph osd crush rule rm $rule || return 1 +} + +function TEST_crush_rule_rm() { + local rule=erasure2 + + run_mon $dir a || return 1 + + ceph osd crush rule create-erasure $rule default || return 1 + ceph osd crush rule ls | grep $rule || return 1 + ceph osd crush rule rm $rule || return 1 + ! ceph osd crush rule ls | grep $rule || return 1 +} + +function TEST_crush_rule_create_erasure() { + local dir=$1 + + run_mon $dir a || return 1 + # should have at least one OSD + run_osd $dir 0 || return 1 + + local rule=rule3 + # + # create a new rule with the default profile, implicitly + # + ceph osd crush rule create-erasure $rule || return 1 + ceph osd crush rule create-erasure $rule 2>&1 | \ + grep "$rule already exists" || return 1 + ceph --format xml osd crush rule dump $rule | \ + egrep '<op>take</op><item>[^<]+</item><item_name>default</item_name>' | \ + grep '<op>chooseleaf_indep</op><num>0</num><type>host</type>' || return 1 + ceph osd crush rule rm $rule || return 1 + ! ceph osd crush rule ls | grep $rule || return 1 + # + # create a new rule with the default profile, explicitly + # + ceph osd crush rule create-erasure $rule default || return 1 + ceph osd crush rule ls | grep $rule || return 1 + ceph osd crush rule rm $rule || return 1 + ! ceph osd crush rule ls | grep $rule || return 1 + # + # create a new rule and the default profile, implicitly + # + ceph osd erasure-code-profile rm default || return 1 + ! ceph osd erasure-code-profile ls | grep default || return 1 + ceph osd crush rule create-erasure $rule || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path mon.a) log flush || return 1 + grep 'profile set default' $dir/mon.a.log || return 1 + ceph osd erasure-code-profile ls | grep default || return 1 + ceph osd crush rule rm $rule || return 1 + ! ceph osd crush rule ls | grep $rule || return 1 +} + +function TEST_add_rule_failed() { + local dir=$1 + + run_mon $dir a || return 1 + + local root=host1 + + ceph osd crush add-bucket $root host + ceph osd crush rule create-simple test_rule1 $root osd firstn || return 1 + ceph osd crush rule create-simple test_rule2 $root osd firstn || return 1 + ceph osd getcrushmap > $dir/crushmap || return 1 + crushtool --decompile $dir/crushmap > $dir/crushmap.txt || return 1 + for i in $(seq 3 255) + do + cat <<EOF +rule test_rule$i { + id $i + type replicated + step take $root + step choose firstn 0 type osd + step emit +} +EOF + done >> $dir/crushmap.txt + crushtool --compile $dir/crushmap.txt -o $dir/crushmap || return 1 + ceph osd setcrushmap -i $dir/crushmap || return 1 + ceph osd crush rule create-simple test_rule_nospace $root osd firstn 2>&1 | grep "Error ENOSPC" || return 1 + +} + +function TEST_crush_rename_bucket() { + local dir=$1 + + run_mon $dir a || return 1 + + ceph osd crush add-bucket host1 host + ceph osd tree + ! ceph osd tree | grep host2 || return 1 + ceph osd crush rename-bucket host1 host2 || return 1 + ceph osd tree + ceph osd tree | grep host2 || return 1 + ceph osd crush rename-bucket host1 host2 || return 1 # idempotency + ceph osd crush rename-bucket nonexistent something 2>&1 | grep "Error ENOENT" || return 1 +} + +function TEST_crush_ls_node() { + local dir=$1 + run_mon $dir a || return 1 + ceph osd crush add-bucket default1 root + ceph osd crush add-bucket host1 host + ceph osd crush move host1 root=default1 + ceph osd crush ls default1 | grep host1 || return 1 + ceph osd crush ls default2 2>&1 | grep "Error ENOENT" || return 1 +} + +function TEST_crush_reject_empty() { + local dir=$1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + # should have at least one OSD + run_osd $dir 0 || return 1 + create_rbd_pool || return 1 + + local empty_map=$dir/empty_map + :> $empty_map.txt + crushtool -c $empty_map.txt -o $empty_map.map || return 1 + expect_failure $dir "Error EINVAL" \ + ceph osd setcrushmap -i $empty_map.map || return 1 +} + +main osd-crush "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/osd-crush.sh" +# End: diff --git a/qa/standalone/mon/osd-df.sh b/qa/standalone/mon/osd-df.sh new file mode 100755 index 000000000..962909fdb --- /dev/null +++ b/qa/standalone/mon/osd-df.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7113" # git grep '\<7113\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_osd_df() { + local dir=$1 + setup $dir || return 1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + # normal case + ceph osd df --f json-pretty | grep osd.0 || return 1 + ceph osd df --f json-pretty | grep osd.1 || return 1 + ceph osd df --f json-pretty | grep osd.2 || return 1 + ceph osd df --f json-pretty | grep osd.3 || return 1 + ceph osd df --f json-pretty | grep osd.4 || return 1 + ceph osd df --f json-pretty | grep osd.5 || return 1 + + # filter by device class + osd_class=$(ceph osd crush get-device-class 0) + ceph osd df class $osd_class --f json-pretty | grep 'osd.0' || return 1 + # post-nautilus we require filter-type no more + ceph osd df $osd_class --f json-pretty | grep 'osd.0' || return 1 + ceph osd crush rm-device-class 0 || return 1 + ceph osd crush set-device-class aaa 0 || return 1 + ceph osd df aaa --f json-pretty | grep 'osd.0' || return 1 + ceph osd df aaa --f json-pretty | grep 'osd.1' && return 1 + # reset osd.1's device class + ceph osd crush rm-device-class 0 || return 1 + ceph osd crush set-device-class $osd_class 0 || return 1 + + # filter by crush node + ceph osd df osd.0 --f json-pretty | grep osd.0 || return 1 + ceph osd df osd.0 --f json-pretty | grep osd.1 && return 1 + ceph osd crush move osd.0 root=default host=foo || return 1 + ceph osd crush move osd.1 root=default host=foo || return 1 + ceph osd crush move osd.2 root=default host=foo || return 1 + ceph osd crush move osd.3 root=default host=bar || return 1 + ceph osd crush move osd.4 root=default host=bar || return 1 + ceph osd crush move osd.5 root=default host=bar || return 1 + ceph osd df tree foo --f json-pretty | grep foo || return 1 + ceph osd df tree foo --f json-pretty | grep bar && return 1 + ceph osd df foo --f json-pretty | grep osd.0 || return 1 + ceph osd df foo --f json-pretty | grep osd.1 || return 1 + ceph osd df foo --f json-pretty | grep osd.2 || return 1 + ceph osd df foo --f json-pretty | grep osd.3 && return 1 + ceph osd df foo --f json-pretty | grep osd.4 && return 1 + ceph osd df foo --f json-pretty | grep osd.5 && return 1 + ceph osd df tree bar --f json-pretty | grep bar || return 1 + ceph osd df tree bar --f json-pretty | grep foo && return 1 + ceph osd df bar --f json-pretty | grep osd.0 && return 1 + ceph osd df bar --f json-pretty | grep osd.1 && return 1 + ceph osd df bar --f json-pretty | grep osd.2 && return 1 + ceph osd df bar --f json-pretty | grep osd.3 || return 1 + ceph osd df bar --f json-pretty | grep osd.4 || return 1 + ceph osd df bar --f json-pretty | grep osd.5 || return 1 + + # filter by pool + ceph osd crush rm-device-class all || return 1 + ceph osd crush set-device-class nvme 0 1 3 4 || return 1 + ceph osd crush rule create-replicated nvme-rule default host nvme || return 1 + ceph osd pool create nvme-pool 12 12 nvme-rule || return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.0 || return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.1 || return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.2 && return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.3 || return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.4 || return 1 + ceph osd df nvme-pool --f json-pretty | grep osd.5 && return 1 + + teardown $dir || return 1 +} + +main osd-df "$@" diff --git a/qa/standalone/mon/osd-erasure-code-profile.sh b/qa/standalone/mon/osd-erasure-code-profile.sh new file mode 100755 index 000000000..0afc5fc0b --- /dev/null +++ b/qa/standalone/mon/osd-erasure-code-profile.sh @@ -0,0 +1,240 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7220" # git grep '\<7220\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_set() { + local dir=$1 + local id=$2 + + run_mon $dir a || return 1 + + local profile=myprofile + # + # no key=value pairs : use the default configuration + # + ceph osd erasure-code-profile set $profile 2>&1 || return 1 + ceph osd erasure-code-profile get $profile | \ + grep plugin=jerasure || return 1 + ceph osd erasure-code-profile rm $profile + # + # key=value pairs override the default + # + ceph osd erasure-code-profile set $profile \ + key=value plugin=isa || return 1 + ceph osd erasure-code-profile get $profile | \ + grep -e key=value -e plugin=isa || return 1 + # + # --force is required to override an existing profile + # + ! ceph osd erasure-code-profile set $profile > $dir/out 2>&1 || return 1 + grep 'will not override' $dir/out || return 1 + ceph osd erasure-code-profile set $profile key=other --force || return 1 + ceph osd erasure-code-profile get $profile | \ + grep key=other || return 1 + + ceph osd erasure-code-profile rm $profile # cleanup +} + +function TEST_ls() { + local dir=$1 + local id=$2 + + run_mon $dir a || return 1 + + local profile=myprofile + ! ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile set $profile 2>&1 || return 1 + ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph --format xml osd erasure-code-profile ls | \ + grep "<profile>$profile</profile>" || return 1 + + ceph osd erasure-code-profile rm $profile # cleanup +} + +function TEST_rm() { + local dir=$1 + local id=$2 + + run_mon $dir a || return 1 + + local profile=myprofile + ceph osd erasure-code-profile set $profile 2>&1 || return 1 + ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile rm $profile || return 1 + ! ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile rm WRONG 2>&1 | \ + grep "WRONG does not exist" || return 1 + + ceph osd erasure-code-profile set $profile || return 1 + create_pool poolname 12 12 erasure $profile || return 1 + ! ceph osd erasure-code-profile rm $profile > $dir/out 2>&1 || return 1 + grep "poolname.*using.*$profile" $dir/out || return 1 + ceph osd pool delete poolname poolname --yes-i-really-really-mean-it || return 1 + ceph osd erasure-code-profile rm $profile || return 1 + + ceph osd erasure-code-profile rm $profile # cleanup +} + +function TEST_get() { + local dir=$1 + local id=$2 + + run_mon $dir a || return 1 + + local default_profile=default + ceph osd erasure-code-profile get $default_profile | \ + grep plugin=jerasure || return 1 + ceph --format xml osd erasure-code-profile get $default_profile | \ + grep '<plugin>jerasure</plugin>' || return 1 + ! ceph osd erasure-code-profile get WRONG > $dir/out 2>&1 || return 1 + grep -q "unknown erasure code profile 'WRONG'" $dir/out || return 1 +} + +function TEST_set_idempotent() { + local dir=$1 + local id=$2 + + run_mon $dir a || return 1 + # + # The default profile is set using a code path different from + # ceph osd erasure-code-profile set: verify that it is idempotent, + # as if it was using the same code path. + # + ceph osd erasure-code-profile set default k=2 m=2 2>&1 || return 1 + local profile + # + # Because plugin=jerasure is the default, it uses a slightly + # different code path where defaults (m=1 for instance) are added + # implicitly. + # + profile=profileidempotent1 + ! ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile set $profile k=2 crush-failure-domain=osd 2>&1 || return 1 + ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile set $profile k=2 crush-failure-domain=osd 2>&1 || return 1 + ceph osd erasure-code-profile rm $profile # cleanup + + # + # In the general case the profile is exactly what is on + # + profile=profileidempotent2 + ! ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile set $profile plugin=lrc k=4 m=2 l=3 crush-failure-domain=osd 2>&1 || return 1 + ceph osd erasure-code-profile ls | grep $profile || return 1 + ceph osd erasure-code-profile set $profile plugin=lrc k=4 m=2 l=3 crush-failure-domain=osd 2>&1 || return 1 + ceph osd erasure-code-profile rm $profile # cleanup +} + +function TEST_format_invalid() { + local dir=$1 + + local profile=profile + # osd_pool_default_erasure-code-profile is + # valid JSON but not of the expected type + run_mon $dir a \ + --osd_pool_default_erasure-code-profile 1 || return 1 + ! ceph osd erasure-code-profile set $profile > $dir/out 2>&1 || return 1 + cat $dir/out + grep 'must be a JSON object' $dir/out || return 1 +} + +function TEST_format_json() { + local dir=$1 + + # osd_pool_default_erasure-code-profile is JSON + expected='"plugin":"isa"' + run_mon $dir a \ + --osd_pool_default_erasure-code-profile "{$expected}" || return 1 + ceph --format json osd erasure-code-profile get default | \ + grep "$expected" || return 1 +} + +function TEST_format_plain() { + local dir=$1 + + # osd_pool_default_erasure-code-profile is plain text + expected='"plugin":"isa"' + run_mon $dir a \ + --osd_pool_default_erasure-code-profile "plugin=isa" || return 1 + ceph --format json osd erasure-code-profile get default | \ + grep "$expected" || return 1 +} + +function TEST_profile_k_sanity() { + local dir=$1 + local profile=profile-sanity + + run_mon $dir a || return 1 + + expect_failure $dir 'k must be a multiple of (k + m) / l' \ + ceph osd erasure-code-profile set $profile \ + plugin=lrc \ + l=1 \ + k=1 \ + m=1 || return 1 + + if erasure_code_plugin_exists isa ; then + expect_failure $dir 'k=1 must be >= 2' \ + ceph osd erasure-code-profile set $profile \ + plugin=isa \ + k=1 \ + m=1 || return 1 + else + echo "SKIP because plugin isa has not been built" + fi + + expect_failure $dir 'k=1 must be >= 2' \ + ceph osd erasure-code-profile set $profile \ + plugin=jerasure \ + k=1 \ + m=1 || return 1 +} + +function TEST_invalid_crush_failure_domain() { + local dir=$1 + + run_mon $dir a || return 1 + + local profile=ec_profile + local crush_failure_domain=invalid_failure_domain + + ! ceph osd erasure-code-profile set $profile k=4 m=2 crush-failure-domain=$crush_failure_domain 2>&1 || return 1 +} + +main osd-erasure-code-profile "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/osd-erasure-code-profile.sh" +# End: diff --git a/qa/standalone/mon/osd-pool-create.sh b/qa/standalone/mon/osd-pool-create.sh new file mode 100755 index 000000000..6d2c5ad3e --- /dev/null +++ b/qa/standalone/mon/osd-pool-create.sh @@ -0,0 +1,307 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2013, 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7105" # git grep '\<7105\>' : there must be only one + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + export CEPH_ARGS + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +# Before http://tracker.ceph.com/issues/8307 the invalid profile was created +function TEST_erasure_invalid_profile() { + local dir=$1 + run_mon $dir a || return 1 + local poolname=pool_erasure + local notaprofile=not-a-valid-erasure-code-profile + ! ceph osd pool create $poolname 12 12 erasure $notaprofile || return 1 + ! ceph osd erasure-code-profile ls | grep $notaprofile || return 1 +} + +function TEST_erasure_crush_rule() { + local dir=$1 + run_mon $dir a || return 1 + # + # choose the crush rule used with an erasure coded pool + # + local crush_rule=myrule + ! ceph osd crush rule ls | grep $crush_rule || return 1 + ceph osd crush rule create-erasure $crush_rule + ceph osd crush rule ls | grep $crush_rule + local poolname + poolname=pool_erasure1 + ! ceph --format json osd dump | grep '"crush_rule":1' || return 1 + ceph osd pool create $poolname 12 12 erasure default $crush_rule + ceph --format json osd dump | grep '"crush_rule":1' || return 1 + # + # a crush rule by the name of the pool is implicitly created + # + poolname=pool_erasure2 + ceph osd erasure-code-profile set myprofile + ceph osd pool create $poolname 12 12 erasure myprofile + ceph osd crush rule ls | grep $poolname || return 1 + # + # a non existent crush rule given in argument is an error + # http://tracker.ceph.com/issues/9304 + # + poolname=pool_erasure3 + ! ceph osd pool create $poolname 12 12 erasure myprofile INVALIDRULE || return 1 +} + +function TEST_erasure_code_profile_default() { + local dir=$1 + run_mon $dir a || return 1 + ceph osd erasure-code-profile rm default || return 1 + ! ceph osd erasure-code-profile ls | grep default || return 1 + ceph osd pool create $poolname 12 12 erasure default + ceph osd erasure-code-profile ls | grep default || return 1 +} + +function TEST_erasure_crush_stripe_unit() { + local dir=$1 + # the default stripe unit is used to initialize the pool + run_mon $dir a --public-addr $CEPH_MON + stripe_unit=$(ceph-conf --show-config-value osd_pool_erasure_code_stripe_unit) + eval local $(ceph osd erasure-code-profile get myprofile | grep k=) + stripe_width = $((stripe_unit * k)) + ceph osd pool create pool_erasure 12 12 erasure + ceph --format json osd dump | tee $dir/osd.json + grep '"stripe_width":'$stripe_width $dir/osd.json > /dev/null || return 1 +} + +function TEST_erasure_crush_stripe_unit_padded() { + local dir=$1 + # setting osd_pool_erasure_code_stripe_unit modifies the stripe_width + # and it is padded as required by the default plugin + profile+=" plugin=jerasure" + profile+=" technique=reed_sol_van" + k=4 + profile+=" k=$k" + profile+=" m=2" + actual_stripe_unit=2048 + desired_stripe_unit=$((actual_stripe_unit - 1)) + actual_stripe_width=$((actual_stripe_unit * k)) + run_mon $dir a \ + --osd_pool_erasure_code_stripe_unit $desired_stripe_unit \ + --osd_pool_default_erasure_code_profile "$profile" || return 1 + ceph osd pool create pool_erasure 12 12 erasure + ceph osd dump | tee $dir/osd.json + grep "stripe_width $actual_stripe_width" $dir/osd.json > /dev/null || return 1 +} + +function TEST_erasure_code_pool() { + local dir=$1 + run_mon $dir a || return 1 + ceph --format json osd dump > $dir/osd.json + local expected='"erasure_code_profile":"default"' + ! grep "$expected" $dir/osd.json || return 1 + ceph osd pool create erasurecodes 12 12 erasure + ceph --format json osd dump | tee $dir/osd.json + grep "$expected" $dir/osd.json > /dev/null || return 1 + + ceph osd pool create erasurecodes 12 12 erasure 2>&1 | \ + grep 'already exists' || return 1 + ceph osd pool create erasurecodes 12 12 2>&1 | \ + grep 'cannot change to type replicated' || return 1 +} + +function TEST_replicated_pool_with_rule() { + local dir=$1 + run_mon $dir a + local rule=rule0 + local root=host1 + ceph osd crush add-bucket $root host + local failure_domain=osd + local poolname=mypool + ceph osd crush rule create-simple $rule $root $failure_domain || return 1 + ceph osd crush rule ls | grep $rule + ceph osd pool create $poolname 12 12 replicated $rule || return 1 + rule_id=`ceph osd crush rule dump $rule | grep "rule_id" | awk -F[' ':,] '{print $4}'` + ceph osd pool get $poolname crush_rule 2>&1 | \ + grep "crush_rule: $rule_id" || return 1 + #non-existent crush rule + ceph osd pool create newpool 12 12 replicated non-existent 2>&1 | \ + grep "doesn't exist" || return 1 +} + +function TEST_erasure_code_pool_lrc() { + local dir=$1 + run_mon $dir a || return 1 + + ceph osd erasure-code-profile set LRCprofile \ + plugin=lrc \ + mapping=DD_ \ + layers='[ [ "DDc", "" ] ]' || return 1 + + ceph --format json osd dump > $dir/osd.json + local expected='"erasure_code_profile":"LRCprofile"' + local poolname=erasurecodes + ! grep "$expected" $dir/osd.json || return 1 + ceph osd pool create $poolname 12 12 erasure LRCprofile + ceph --format json osd dump | tee $dir/osd.json + grep "$expected" $dir/osd.json > /dev/null || return 1 + ceph osd crush rule ls | grep $poolname || return 1 +} + +function TEST_replicated_pool() { + local dir=$1 + run_mon $dir a || return 1 + ceph osd pool create replicated 12 12 replicated replicated_rule || return 1 + ceph osd pool create replicated 12 12 replicated replicated_rule 2>&1 | \ + grep 'already exists' || return 1 + # default is replicated + ceph osd pool create replicated1 12 12 || return 1 + # default is replicated, pgp_num = pg_num + ceph osd pool create replicated2 12 || return 1 + ceph osd pool create replicated 12 12 erasure 2>&1 | \ + grep 'cannot change to type erasure' || return 1 +} + +function TEST_no_pool_delete() { + local dir=$1 + run_mon $dir a || return 1 + ceph osd pool create foo 1 || return 1 + ceph tell mon.a injectargs -- --no-mon-allow-pool-delete || return 1 + ! ceph osd pool delete foo foo --yes-i-really-really-mean-it || return 1 + ceph tell mon.a injectargs -- --mon-allow-pool-delete || return 1 + ceph osd pool delete foo foo --yes-i-really-really-mean-it || return 1 +} + +function TEST_utf8_cli() { + local dir=$1 + run_mon $dir a || return 1 + # Hopefully it's safe to include literal UTF-8 characters to test + # the fix for http://tracker.ceph.com/issues/7387. If it turns out + # to not be OK (when is the default encoding *not* UTF-8?), maybe + # the character '黄' can be replaced with the escape $'\xe9\xbb\x84' + OLDLANG="$LANG" + export LANG=en_US.UTF-8 + ceph osd pool create 黄 16 || return 1 + ceph osd lspools 2>&1 | \ + grep "黄" || return 1 + ceph -f json-pretty osd dump | \ + python3 -c "import json; import sys; json.load(sys.stdin)" || return 1 + ceph osd pool delete 黄 黄 --yes-i-really-really-mean-it + export LANG="$OLDLANG" +} + +function check_pool_priority() { + local dir=$1 + shift + local pools=$1 + shift + local spread="$1" + shift + local results="$1" + + setup $dir || return 1 + + EXTRA_OPTS="--debug_allow_any_pool_priority=true" + export EXTRA_OPTS + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + # Add pool 0 too + for i in $(seq 0 $pools) + do + num=$(expr $i + 1) + ceph osd pool create test${num} 1 1 + done + + wait_for_clean || return 1 + for i in $(seq 0 $pools) + do + num=$(expr $i + 1) + ceph osd pool set test${num} recovery_priority $(expr $i \* $spread) + done + + #grep "recovery_priority.*pool set" out/mon.a.log + + bin/ceph osd dump + + # Restart everything so mon converts the priorities + kill_daemons + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + activate_osd $dir 0 || return 1 + activate_osd $dir 1 || return 1 + activate_osd $dir 2 || return 1 + sleep 5 + + grep convert $dir/mon.a.log + ceph osd dump + + pos=1 + for i in $(ceph osd dump | grep ^pool | sed 's/.*recovery_priority //' | awk '{ print $1 }') + do + result=$(echo $results | awk "{ print \$${pos} }") + # A value of 0 is an unset value so sed/awk gets "pool" + if test $result = "0" + then + result="pool" + fi + test "$result" = "$i" || return 1 + pos=$(expr $pos + 1) + done +} + +function TEST_pool_pos_only_prio() { + local dir=$1 + check_pool_priority $dir 20 5 "0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10" || return 1 +} + +function TEST_pool_neg_only_prio() { + local dir=$1 + check_pool_priority $dir 20 -5 "0 0 -1 -1 -2 -2 -3 -3 -4 -4 -5 -5 -6 -6 -7 -7 -8 -8 -9 -9 -10" || return 1 +} + +function TEST_pool_both_prio() { + local dir=$1 + check_pool_priority $dir 20 "5 - 50" "-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10" || return 1 +} + +function TEST_pool_both_prio_no_neg() { + local dir=$1 + check_pool_priority $dir 20 "2 - 4" "-4 -2 0 0 1 1 2 2 3 3 4 5 5 6 6 7 7 8 8 9 10" || return 1 +} + +function TEST_pool_both_prio_no_pos() { + local dir=$1 + check_pool_priority $dir 20 "2 - 36" "-10 -9 -8 -8 -7 -7 -6 -6 -5 -5 -4 -3 -3 -2 -2 -1 -1 0 0 2 4" || return 1 +} + + +main osd-pool-create "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/mon/osd-pool-create.sh" +# End: diff --git a/qa/standalone/mon/osd-pool-df.sh b/qa/standalone/mon/osd-pool-df.sh new file mode 100755 index 000000000..d2b80ec72 --- /dev/null +++ b/qa/standalone/mon/osd-pool-df.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Tencent <contact@tencent.com> +# +# Author: Chang Liu <liuchang0812@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7113" # git grep '\<7113\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_ceph_df() { + local dir=$1 + setup $dir || return 1 + + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + run_mgr $dir x || return 1 + + profile+=" plugin=jerasure" + profile+=" technique=reed_sol_van" + profile+=" k=4" + profile+=" m=2" + profile+=" crush-failure-domain=osd" + + ceph osd erasure-code-profile set ec42profile ${profile} + + local rep_poolname=testcephdf_replicate + local ec_poolname=testcephdf_erasurecode + create_pool $rep_poolname 6 6 replicated + create_pool $ec_poolname 6 6 erasure ec42profile + flush_pg_stats + + local global_avail=`ceph df -f json | jq '.stats.total_avail_bytes'` + local rep_avail=`ceph df -f json | jq '.pools | map(select(.name == "'$rep_poolname'"))[0].stats.max_avail'` + local ec_avail=`ceph df -f json | jq '.pools | map(select(.name == "'$ec_poolname'"))[0].stats.max_avail'` + + echo "${global_avail} >= ${rep_avail}*3" | bc || return 1 + echo "${global_avail} >= ${ec_avail}*1.5" | bc || return 1 + + ceph osd pool delete $rep_poolname $rep_poolname --yes-i-really-really-mean-it + ceph osd pool delete $ec_poolname $ec_poolname --yes-i-really-really-mean-it + ceph osd erasure-code-profile rm ec42profile + teardown $dir || return 1 +} + +main osd-pool-df "$@" diff --git a/qa/standalone/mon/test_pool_quota.sh b/qa/standalone/mon/test_pool_quota.sh new file mode 100755 index 000000000..b87ec2232 --- /dev/null +++ b/qa/standalone/mon/test_pool_quota.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +# +# Generic pool quota test +# + +# Includes + + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:17108" # git grep '\<17108\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +function TEST_pool_quota() { + local dir=$1 + setup $dir || return 1 + + run_mon $dir a || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + local poolname=testquota + create_pool $poolname 20 + local objects=`ceph df detail | grep -w $poolname|awk '{print $3}'` + local bytes=`ceph df detail | grep -w $poolname|awk '{print $4}'` + + echo $objects + echo $bytes + if [ $objects != 'N/A' ] || [ $bytes != 'N/A' ] ; + then + return 1 + fi + + ceph osd pool set-quota $poolname max_objects 1000 + ceph osd pool set-quota $poolname max_bytes 1024 + + objects=`ceph df detail | grep -w $poolname|awk '{print $3}'` + bytes=`ceph df detail | grep -w $poolname|awk '{print $4}'` + + if [ $objects != '1000' ] || [ $bytes != '1K' ] ; + then + return 1 + fi + + ceph osd pool delete $poolname $poolname --yes-i-really-really-mean-it + teardown $dir || return 1 +} + +main testpoolquota diff --git a/qa/standalone/osd-backfill/osd-backfill-prio.sh b/qa/standalone/osd-backfill/osd-backfill-prio.sh new file mode 100755 index 000000000..9749ca34c --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-prio.sh @@ -0,0 +1,522 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 " + CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 " + # Set osd op queue = wpq for the tests. Backfill priority is not + # considered by mclock_scheduler leading to unexpected results. + CEPH_ARGS+="--osd-op-queue=wpq " + export objects=50 + export poolprefix=test + export FORCE_PRIO="254" # See OSD_BACKFILL_PRIORITY_FORCED + export DEGRADED_PRIO="150" # See OSD_BACKFILL_DEGRADED_PRIORITY_BASE + 10 + export NORMAL_PRIO="110" # See OSD_BACKFILL_PRIORITY_BASE + 10 + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function TEST_backfill_priority() { + local dir=$1 + local pools=10 + local OSDS=5 + # size 2 -> 1 means degraded by 1, so add 1 to base prio + local degraded_prio=$(expr $DEGRADED_PRIO + 1) + local max_tries=10 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 3 pools with a pg with the same primaries but second + # replica on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2 + + local PG3 + local POOLNUM3 + local pool3 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2=$test_osd2 + elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ]; + then + PG3="${p}.0" + POOLNUM3=$p + pool3="${poolprefix}$p" + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" -o "pool3" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ]; + then + delete_pool ${poolprefix}$p + fi + done + + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool3 size 1 --yes-i-really-mean-it + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 $pool3 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd set nobackfill + ceph osd set noout + + # Get a pg to want to backfill and quickly force it + # to be preempted. + ceph osd pool set $pool3 size 2 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 3. Item is in progress, adjust priority with no higher priority waiting + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-backfill $PG3 2>&1 | grep -q "doesn't require backfilling"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-backfill" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + ceph osd out osd.$chk_osd1_2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + ceph pg dump pgs + + ceph osd pool set $pool2 size 2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + ceph pg dump pgs + + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting" + ERRORS=$(expr $ERRORS + 1) + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-backfill PG $PG3 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-backfill PG ${PG3} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # 1. Item is queued, re-queue with new priority + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-backfill $PG2 2>&1 | grep -q "doesn't require backfilling"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-backfill" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$FORCE_PRIO" ]; + then + echo "The second force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + flush_pg_stats || return 1 + + # 4. Item is in progress, if higher priority items waiting prempt item + ceph pg cancel-force-backfill $PG3 || return 1 + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio") + if [ "$PRIO" != "$degraded_prio" ]; + then + echo "After cancel-force-backfill PG ${PG3} doesn't have prio $degraded_prio" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The force-recovery PG $PG2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph pg cancel-force-backfill $PG2 || return 1 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item + flush_pg_stats || return 1 + ceph pg force-backfill $PG3 || return 1 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$degraded_prio" ]; + then + echo "After cancel-force-backfill PG ${PG2} doesn't have prio $degraded_prio" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-backfill PG $PG3 didn't get promoted to an in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-backfill PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph osd unset noout + ceph osd unset nobackfill + + wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1 + + ceph pg dump pgs + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + delete_pool $pool3 + kill_daemons $dir || return 1 + return $ERRORS +} + +# +# Show that pool recovery_priority is added to the backfill priority +# +# Create 2 pools with 2 OSDs with different primarys +# pool 1 with recovery_priority 1 +# pool 2 with recovery_priority 2 +# +# Start backfill by changing the pool sizes from 1 to 2 +# Use dump_recovery_reservations to verify priorities +function TEST_backfill_pool_priority() { + local dir=$1 + local pools=3 # Don't assume the first 2 pools are exact what we want + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 2 pools with different primaries which + # means the replica must be on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2_1 + local chk_osd2_2 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ $chk_osd1_1 != $test_osd1 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2_1=$test_osd1 + chk_osd2_2=$test_osd2 + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; + then + delete_pool ${poolprefix}$p + fi + done + + pool1_extra_prio=1 + pool2_extra_prio=2 + # size 2 -> 1 means degraded by 1, so add 1 to base prio + pool1_prio=$(expr $DEGRADED_PRIO + 1 + $pool1_extra_prio) + pool2_prio=$(expr $DEGRADED_PRIO + 1 + $pool2_extra_prio) + + ceph osd pool set $pool1 size 1 --yes-i-really-mean-it + ceph osd pool set $pool1 recovery_priority $pool1_extra_prio + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool2 recovery_priority $pool2_extra_prio + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd pool set $pool1 size 2 + ceph osd pool set $pool2 size 2 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out + echo osd.${chk_osd1_1} + cat $dir/dump.${chk_osd1_1}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out + echo osd.${chk_osd1_2} + cat $dir/dump.${chk_osd1_2}.out + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG ${PG1} didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG ${PG1} didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG ${PG2} didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG $PG2 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + wait_for_clean || return 1 + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + kill_daemons $dir || return 1 + return $ERRORS +} + +main osd-backfill-prio "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-prio.sh" +# End: diff --git a/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh new file mode 100755 index 000000000..f9a144932 --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-recovery-log.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7129" # git grep '\<7129\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 " + CEPH_ARGS+="--osd_mclock_override_recovery_settings=true " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function _common_test() { + local dir=$1 + local extra_opts="$2" + local loglen="$3" + local dupslen="$4" + local objects="$5" + local moreobjects=${6:-0} + + local OSDS=6 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + export EXTRA_OPTS=" $extra_opts" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + create_pool test 1 1 + + for j in $(seq 1 $objects) + do + rados -p test put obj-${j} /etc/passwd + done + + # Mark out all OSDs for this pool + ceph osd out $(ceph pg dump pgs --format=json | jq '.pg_stats[0].up[]') + if [ "$moreobjects" != "0" ]; then + for j in $(seq 1 $moreobjects) + do + rados -p test put obj-more-${j} /etc/passwd + done + fi + sleep 1 + wait_for_clean + + flush_pg_stats + + newprimary=$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary') + kill_daemons + + ERRORS=0 + _objectstore_tool_nodown $dir $newprimary --no-mon-config --pgid 1.0 --op log | tee $dir/result.log + LOGLEN=$(jq '.pg_log_t.log | length' $dir/result.log) + if [ $LOGLEN != "$loglen" ]; then + echo "FAILED: Wrong log length got $LOGLEN (expected $loglen)" + ERRORS=$(expr $ERRORS + 1) + fi + DUPSLEN=$(jq '.pg_log_t.dups | length' $dir/result.log) + if [ $DUPSLEN != "$dupslen" ]; then + echo "FAILED: Wrong dups length got $DUPSLEN (expected $dupslen)" + ERRORS=$(expr $ERRORS + 1) + fi + grep "copy_up_to\|copy_after" $dir/osd.*.log + rm -f $dir/result.log + if [ $ERRORS != "0" ]; then + echo TEST FAILED + return 1 + fi +} + + +# Cause copy_up_to() to only partially copy logs, copy additional dups, and trim dups +function TEST_backfill_log_1() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=1 --osd_max_pg_log_entries=2 --osd_pg_log_dups_tracked=10" 2 8 150 +} + + +# Cause copy_up_to() to only partially copy logs, copy additional dups +function TEST_backfill_log_2() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=1 --osd_max_pg_log_entries=2" 2 148 150 +} + + +# Cause copy_after() to only copy logs, no dups +function TEST_recovery_1() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=50 --osd_max_pg_log_entries=50 --osd_pg_log_dups_tracked=60 --osd_pg_log_trim_min=10" 40 0 40 +} + + +# Cause copy_after() to copy logs with dups +function TEST_recovery_2() { + local dir=$1 + + _common_test $dir "--osd_min_pg_log_entries=150 --osd_max_pg_log_entries=150 --osd_pg_log_dups_tracked=3000 --osd_pg_log_trim_min=10" 151 10 141 20 +} + +main osd-backfill-recovery-log "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-recovery-log.sh" +# End: diff --git a/qa/standalone/osd-backfill/osd-backfill-space.sh b/qa/standalone/osd-backfill/osd-backfill-space.sh new file mode 100755 index 000000000..6a5c69412 --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-space.sh @@ -0,0 +1,1176 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7180" # git grep '\<7180\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 " + CEPH_ARGS+="--fake_statfs_for_testing=3686400 " + CEPH_ARGS+="--osd_max_backfills=10 " + CEPH_ARGS+="--osd_mclock_override_recovery_settings=true " + export objects=600 + export poolprefix=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function get_num_in_state() { + local state=$1 + local expression + expression+="select(contains(\"${state}\"))" + ceph --format json pg dump pgs 2>/dev/null | \ + jq ".pg_stats | [.[] | .state | $expression] | length" +} + + +function wait_for_not_state() { + local state=$1 + local num_in_state=-1 + local cur_in_state + local -a delays=($(get_timeout_delays $2 5)) + local -i loop=0 + + flush_pg_stats || return 1 + while test $(get_num_pgs) == 0 ; do + sleep 1 + done + + while true ; do + cur_in_state=$(get_num_in_state ${state}) + test $cur_in_state = "0" && break + if test $cur_in_state != $num_in_state ; then + loop=0 + num_in_state=$cur_in_state + elif (( $loop >= ${#delays[*]} )) ; then + ceph pg dump pgs + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done + return 0 +} + + +function wait_for_not_backfilling() { + local timeout=$1 + wait_for_not_state backfilling $timeout +} + + +function wait_for_not_activating() { + local timeout=$1 + wait_for_not_state activating $timeout +} + +# All tests are created in an environment which has fake total space +# of 3600K (3686400) which can hold 600 6K replicated objects or +# 200 18K shards of erasure coded objects. For a k=3, m=2 EC pool +# we have a theoretical 54K object but with the chunk size of 4K +# and a rounding of 4K to account for the chunks is 36K max object +# which is ((36K / 3) + 4K) * 200 = 3200K which is 88% of +# 3600K for a shard. + +# Create 2 pools with size 1 +# Write enough data that only 1 pool pg can fit per osd +# Incresase the pool size to 2 +# On 3 OSDs this should result in 1 OSD with overlapping replicas, +# so both pools can't fit. We assume pgid 1.0 and 2.0 won't +# map to the same 2 OSDs. +# At least 1 pool shouldn't have room to backfill +# All other pools should go active+clean +function TEST_backfill_test_simple() { + local dir=$1 + local pools=2 + local OSDS=3 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it + done + + wait_for_clean || return 1 + + # This won't work is if the 2 pools primary and only osds + # are the same. + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + expected="$(expr $pools - 1)" + if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ]; + then + echo "$expected didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + + +# Create 8 pools of size 1 on 20 OSDs +# Write 4K * 600 objects (only 1 pool pg can fit on any given osd) +# Increase pool size to 2 +# At least 1 pool shouldn't have room to backfill +# All other pools should go active+clean +function TEST_backfill_test_multi() { + local dir=$1 + local pools=8 + local OSDS=20 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it + done + + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + full="$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" + if [ "$full" -lt "1" ]; + then + echo "At least one pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + expected="$(expr $pools - $full)" + if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "$expected" ]; + then + echo "$expected didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + ceph status + + ceph status --format=json-pretty > $dir/stat.json + + eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json) + if [ "$SEV" != "HEALTH_WARN" ]; then + echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN" + ERRORS="$(expr $ERRORS + 1)" + fi + eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json) + if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then + echo "PG_BACKFILL_FULL message '$MSG' mismatched" + ERRORS="$(expr $ERRORS + 1)" + fi + rm -f $dir/stat.json + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + # Work around for http://tracker.ceph.com/issues/38195 + kill_daemons $dir #|| return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + + +# To make sure that when 2 pg try to backfill at the same time to +# the same target. This might be covered by the simple test above +# but this makes sure we get it. +# +# Create 10 pools of size 2 and identify 2 that have the same +# non-primary osd. +# Delete all other pools +# Set size to 1 and write 4K * 600 to each pool +# Set size back to 2 +# The 2 pools should race to backfill. +# One pool goes active+clean +# The other goes acitve+...+backfill_toofull +function TEST_backfill_test_sametarget() { + local dir=$1 + local pools=10 + local OSDS=5 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 2 pools with a pg that distinct primaries but second + # replica on the same osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1 + local chk_osd2 + + local PG2 + local POOLNUM2 + local pool2 + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ $p = "1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1=$test_osd1 + chk_osd2=$test_osd2 + elif [ $chk_osd1 != $test_osd1 -a $chk_osd2 = $test_osd2 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; + then + delete_pool ${poolprefix}$p + fi + done + + ceph osd pool set $pool1 size 1 --yes-i-really-mean-it + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for i in $(seq 1 $objects) + do + rados -p $pool1 put obj$i $dir/datafile + rados -p $pool2 put obj$i $dir/datafile + done + + ceph osd pool set $pool1 size 2 + ceph osd pool set $pool2 size 2 + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool $pool1 + delete_pool $pool2 + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + +# 2 pools can't both backfill to a target which has other data +# 1 of the pools has objects that increase from 1024 to 2611 bytes +# +# Write to fill pool which is size 1 +# Take fill pool osd down (other 2 pools must go to the remaining OSDs +# Save an export of data on fill OSD and restart it +# Write an intial 1K to pool1 which has pg 2.0 +# Export 2.0 from non-fillpool OSD don't wait for it to start-up +# Take down fillpool OSD +# Put 1K object version of 2.0 on fillpool OSD +# Put back fillpool data on fillpool OSD +# With fillpool down write 2611 byte objects +# Take down $osd and bring back $fillosd simultaneously +# Wait for backfilling +# One PG will be able to backfill its remaining data +# One PG must get backfill_toofull +function TEST_backfill_multi_partial() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=3 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + + wait_for_clean || return 1 + + # Partially fill an osd + # We have room for 600 6K replicated objects, if we create 2611 byte objects + # there is 3600K - (2611 * 600) = 2070K, so the fill pool and one + # replica from the other 2 is 85% of 3600K + + dd if=/dev/urandom of=$dir/datafile bs=2611 count=1 + for o in $(seq 1 $objects) + do + rados -p fillpool put obj-fill-${o} $dir/datafile + done + + local fillosd=$(get_primary fillpool obj-fill-1) + osd=$(expr $fillosd + 1) + if [ "$osd" = "$OSDS" ]; then + osd="0" + fi + + kill_daemon $dir/osd.$fillosd.pid TERM + ceph osd out osd.$fillosd + + _objectstore_tool_nodown $dir $fillosd --op export-remove --pgid 1.0 --file $dir/fillexport.out || return 1 + activate_osd $dir $fillosd || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=1 + for o in $(seq 1 $objects) + do + rados -p "${poolprefix}1" put obj-1-${o} $dir/datafile + done + + ceph pg dump pgs + # The $osd OSD is started, but we don't wait so we can kill $fillosd at the same time + _objectstore_tool_nowait $dir $osd --op export --pgid 2.0 --file $dir/export.out + kill_daemon $dir/osd.$fillosd.pid TERM + _objectstore_tool_nodown $dir $fillosd --force --op remove --pgid 2.0 + _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out || return 1 + _objectstore_tool_nodown $dir $fillosd --op import --pgid 1.0 --file $dir/fillexport.out || return 1 + ceph pg dump pgs + sleep 20 + ceph pg dump pgs + + # re-write everything + dd if=/dev/urandom of=$dir/datafile bs=2611 count=1 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj-${p}-${o} $dir/datafile + done + done + + kill_daemon $dir/osd.$osd.pid TERM + ceph osd out osd.$osd + + activate_osd $dir $fillosd || return 1 + ceph osd in osd.$fillosd + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + flush_pg_stats || return 1 + ceph pg dump pgs + + ERRORS=0 + if [ "$(get_num_in_state backfill_toofull)" != "1" ]; + then + echo "One PG should be in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(get_num_in_state active+clean)" != "2" ]; + then + echo "Two PGs should be active+clean after one PG completed backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + +# Make sure that the amount of bytes already on the replica doesn't +# cause an out of space condition +# +# Create 1 pool and write 4K * 600 objects +# Remove 25% (150) of the objects with one OSD down (noout set) +# Increase the size of the remaining 75% (450) of the objects to 6K +# Bring back down OSD +# The pool should go active+clean +function TEST_backfill_grow() { + local dir=$1 + local poolname="test" + local OSDS=3 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + sleep 5 + + wait_for_clean || return 1 + + dd if=/dev/urandom of=${dir}/4kdata bs=1k count=4 + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i $dir/4kdata + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set noout + kill_daemons $dir TERM $otherosd || return 1 + + rmobjects=$(expr $objects / 4) + for i in $(seq 1 $rmobjects) + do + rados -p $poolname rm obj$i + done + + dd if=/dev/urandom of=${dir}/6kdata bs=6k count=1 + for i in $(seq $(expr $rmobjects + 1) $objects) + do + rados -p $poolname put obj$i $dir/6kdata + done + + activate_osd $dir $otherosd || return 1 + + ceph tell osd.$primary debug kick_recovery_wq 0 + + sleep 2 + + wait_for_clean || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 + ! grep -q "num_bytes mismatch" $dir/osd.*.log || return 1 +} + +# Create a 5 shard EC pool on 6 OSD cluster +# Fill 1 OSD with 2600K of data take that osd down. +# Write the EC pool on 5 OSDs +# Take down 1 (must contain an EC shard) +# Bring up OSD with fill data +# Not enought room to backfill to partially full OSD +function TEST_ec_backfill_simple() { + local dir=$1 + local EC=$2 + local pools=1 + local OSDS=6 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + + # Partially fill an osd + # We have room for 200 18K replicated objects, if we create 13K objects + # there is only 3600K - (13K * 200) = 1000K which won't hold + # a k=3 shard below ((18K / 3) + 4K) * 200 = 2000K + # Actual usage per shard is 8K * 200 = 1600K because 18K/3 is 6K which + # rounds to 8K. The 2000K is the ceiling on the 18K * 200 = 3600K logical + # bytes in the pool. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=13 + for o in $(seq 1 $ecobjects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + osd=$(expr $fillosd + 1) + if [ "$osd" = "$OSDS" ]; then + osd="0" + fi + + sleep 5 + kill_daemon $dir/osd.$fillosd.pid TERM + ceph osd out osd.$fillosd + sleep 2 + ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 5 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=18 + for o in $(seq 1 $ecobjects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + kill_daemon $dir/osd.$osd.pid TERM + ceph osd out osd.$osd + + activate_osd $dir $fillosd || return 1 + ceph osd in osd.$fillosd + sleep 30 + + ceph pg dump pgs + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ceph pg dump pgs + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +function osdlist() { + local OSDS=$1 + local excludeosd=$2 + + osds="" + for osd in $(seq 0 $(expr $OSDS - 1)) + do + if [ $osd = $excludeosd ]; + then + continue + fi + if [ -n "$osds" ]; then + osds="${osds} " + fi + osds="${osds}${osd}" + done + echo $osds +} + +# Create a pool with size 1 and fill with data so that only 1 EC shard can fit. +# Write data to 2 EC pools mapped to the same OSDs (excluding filled one) +# Remap the last OSD to partially full OSD on both pools +# The 2 pools should race to backfill. +# One pool goes active+clean +# The other goes acitve+...+backfill_toofull +function TEST_ec_backfill_multi() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=6 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # This test requires that shards from 2 different pools + # fit on a given OSD, but both will not fix. I'm using + # making the fillosd plus 1 shard use 75% of the space, + # leaving not enough to be under the 85% set here. + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + + # Partially fill an osd + # We have room for 200 18K replicated objects, if we create 9K objects + # there is only 3600K - (9K * 200) = 1800K which will only hold + # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K + # The actual data will be (12K / 3) * 200 = 800K because the extra + # is the reservation padding for chunking. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=9 + for o in $(seq 1 $ecobjects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + nonfillosds="$(osdlist $OSDS $fillosd)" + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + ceph osd pg-upmap "$(expr $p + 1).0" $nonfillosds + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 15 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 + for o in $(seq 1 $ecobjects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile + done + done + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pg-upmap $(expr $p + 1).0 ${nonfillosds% *} $fillosd + done + + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ceph pg dump pgs + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +# Similar to TEST_ec_backfill_multi but one of the ec pools +# already had some data on the target OSD + +# Create a pool with size 1 and fill with data so that only 1 EC shard can fit. +# Write a small amount of data to 1 EC pool that still includes the filled one +# Take down fillosd with noout set +# Write data to 2 EC pools mapped to the same OSDs (excluding filled one) +# Remap the last OSD to partially full OSD on both pools +# The 2 pools should race to backfill. +# One pool goes active+clean +# The other goes acitve+...+backfill_toofull +function SKIP_TEST_ec_backfill_multi_partial() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=5 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + local lastosd=$(expr $OSDS - 1) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # This test requires that shards from 2 different pools + # fit on a given OSD, but both will not fix. I'm using + # making the fillosd plus 1 shard use 75% of the space, + # leaving not enough to be under the 85% set here. + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + # last osd + ceph osd pg-upmap 1.0 $lastosd + + # Partially fill an osd + # We have room for 200 18K replicated objects, if we create 9K objects + # there is only 3600K - (9K * 200) = 1800K which will only hold + # one k=3 shard below ((12K / 3) + 4K) * 200 = 1600K + # The actual data will be (12K / 3) * 200 = 800K because the extra + # is the reservation padding for chunking. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=9 + for o in $(seq 1 $ecobjects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + nonfillosds="$(osdlist $OSDS $fillosd)" + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd) + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 15 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=1 + for o in $(seq 1 $ecobjects) + do + rados -p "${poolprefix}1" put obj$o-1 $dir/datafile + done + + for p in $(seq 1 $pools) + do + ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $(expr $lastosd - 1)) + done + ceph pg dump pgs + + #ceph osd set noout + #kill_daemons $dir TERM osd.$lastosd || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 + for o in $(seq 1 $ecobjects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o-$p $dir/datafile + done + done + + ceph pg dump pgs + + # Now backfill lastosd by adding back into the upmap + for p in $(seq 1 $pools) + do + ceph osd pg-upmap "$(expr $p + 1).0" $(seq 0 $lastosd) + done + #activate_osd $dir $lastosd || return 1 + #ceph tell osd.0 debug kick_recovery_wq 0 + + sleep 30 + ceph pg dump pgs + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ceph pg dump pgs + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +function SKIP_TEST_ec_backfill_multi_partial() { + local dir=$1 + local EC=$2 + local pools=2 + local OSDS=6 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # Below we need to fit 3200K in 3600K which is 88% + # so set to 90% + ceph osd set-backfillfull-ratio .90 + + ceph osd set-require-min-compat-client luminous + create_pool fillpool 1 1 + ceph osd pool set fillpool size 1 --yes-i-really-mean-it + + # Partially fill an osd + # We have room for 200 48K ec objects, if we create 4k replicated objects + # there is 3600K - (4K * 200) = 2800K which won't hold 2 k=3 shard + # of 200 12K objects which takes ((12K / 3) + 4K) * 200 = 1600K each. + # On the other OSDs 2 * 1600K = 3200K which is 88% of 3600K. + dd if=/dev/urandom of=$dir/datafile bs=1024 count=4 + for o in $(seq 1 $objects) + do + rados -p fillpool put obj$o $dir/datafile + done + + local fillosd=$(get_primary fillpool obj1) + osd=$(expr $fillosd + 1) + if [ "$osd" = "$OSDS" ]; then + osd="0" + fi + + sleep 5 + kill_daemon $dir/osd.$fillosd.pid TERM + ceph osd out osd.$fillosd + sleep 2 + ceph osd erasure-code-profile set ec-profile k=3 m=2 crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + + for p in $(seq 1 $pools) + do + ceph osd pool create "${poolprefix}$p" 1 1 erasure ec-profile + done + + # Can't wait for clean here because we created a stale pg + #wait_for_clean || return 1 + sleep 5 + + ceph pg dump pgs + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=12 + for o in $(seq 1 $objects) + do + for p in $(seq 1 $pools) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + done + + #ceph pg map 2.0 --format=json | jq '.' + kill_daemon $dir/osd.$osd.pid TERM + ceph osd out osd.$osd + + _objectstore_tool_nodown $dir $osd --op export --pgid 2.0 --file $dir/export.out + _objectstore_tool_nodown $dir $fillosd --op import --pgid 2.0 --file $dir/export.out + + activate_osd $dir $fillosd || return 1 + ceph osd in osd.$fillosd + sleep 30 + + wait_for_not_backfilling 1200 || return 1 + wait_for_not_activating 60 || return 1 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep +backfill_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in backfill_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + if [ "$(ceph pg dump pgs | grep -v "^1.0" | grep active+clean | wc -l)" != "1" ]; + then + echo "One didn't finish backfill" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + delete_pool fillpool + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + +# Create 1 EC pool +# Write 200 12K objects ((12K / 3) + 4K) *200) = 1600K +# Take 1 shard's OSD down (with noout set) +# Remove 50 objects ((12K / 3) + 4k) * 50) = 400K +# Write 150 36K objects (grow 150 objects) 2400K +# But there is already 1600K usage so backfill +# would be too full if it didn't account for existing data +# Bring back down OSD so it must backfill +# It should go active+clean taking into account data already there +function TEST_ec_backfill_grow() { + local dir=$1 + local poolname="test" + local OSDS=6 + local k=3 + local m=2 + local ecobjects=$(expr $objects / $k) + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-backfillfull-ratio .85 + + ceph osd set-require-min-compat-client luminous + ceph osd erasure-code-profile set ec-profile k=$k m=$m crush-failure-domain=osd technique=reed_sol_van plugin=jerasure || return 1 + ceph osd pool create $poolname 1 1 erasure ec-profile + + wait_for_clean || return 1 + + dd if=/dev/urandom of=${dir}/12kdata bs=1k count=12 + for i in $(seq 1 $ecobjects) + do + rados -p $poolname put obj$i $dir/12kdata + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set noout + kill_daemons $dir TERM $otherosd || return 1 + + rmobjects=$(expr $ecobjects / 4) + for i in $(seq 1 $rmobjects) + do + rados -p $poolname rm obj$i + done + + dd if=/dev/urandom of=${dir}/36kdata bs=1k count=36 + for i in $(seq $(expr $rmobjects + 1) $ecobjects) + do + rados -p $poolname put obj$i $dir/36kdata + done + + activate_osd $dir $otherosd || return 1 + + ceph tell osd.$primary debug kick_recovery_wq 0 + + sleep 2 + + wait_for_clean || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +main osd-backfill-space "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-space.sh" +# End: diff --git a/qa/standalone/osd-backfill/osd-backfill-stats.sh b/qa/standalone/osd-backfill/osd-backfill-stats.sh new file mode 100755 index 000000000..21b42a4ce --- /dev/null +++ b/qa/standalone/osd-backfill/osd-backfill-stats.sh @@ -0,0 +1,761 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10 " + export margin=10 + export objects=200 + export poolname=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function below_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 )) +} + +function above_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 )) +} + +FIND_UPACT='grep "pg[[]${PG}.*backfilling.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"' +FIND_FIRST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"' +FIND_LAST='grep "pg[[]${PG}.*backfilling.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"' + +function check() { + local dir=$1 + local PG=$2 + local primary=$3 + local type=$4 + local degraded_start=$5 + local degraded_end=$6 + local misplaced_start=$7 + local misplaced_end=$8 + local primary_start=${9:-} + local primary_end=${10:-} + local check_setup=${11:-true} + + local log=$(grep -l +backfilling $dir/osd.$primary.log) + if [ $check_setup = "true" ]; + then + local alllogs=$(grep -l +backfilling $dir/osd.*.log) + if [ "$(echo "$alllogs" | wc -w)" != "1" ]; + then + echo "Test setup failure, a single OSD should have performed backfill" + return 1 + fi + fi + + local addp=" " + if [ "$type" = "erasure" ]; + then + addp="p" + fi + + UPACT=$(eval $FIND_UPACT) + [ -n "$UPACT" ] || return 1 + + # Check 3rd line at start because of false recovery starts + local which="degraded" + FIRST=$(eval $FIND_FIRST) + [ -n "$FIRST" ] || return 1 + below_margin $FIRST $degraded_start || return 1 + LAST=$(eval $FIND_LAST) + [ -n "$LAST" ] || return 1 + above_margin $LAST $degraded_end || return 1 + + # Check 3rd line at start because of false recovery starts + which="misplaced" + FIRST=$(eval $FIND_FIRST) + [ -n "$FIRST" ] || return 1 + below_margin $FIRST $misplaced_start || return 1 + LAST=$(eval $FIND_LAST) + [ -n "$LAST" ] || return 1 + above_margin $LAST $misplaced_end || return 1 + + # This is the value of set into MISSING_ON_PRIMARY + if [ -n "$primary_start" ]; + then + which="shard $primary" + FIRST=$(eval $FIND_FIRST) + [ -n "$FIRST" ] || return 1 + below_margin $FIRST $primary_start || return 1 + LAST=$(eval $FIND_LAST) + [ -n "$LAST" ] || return 1 + above_margin $LAST $primary_end || return 1 + fi +} + +# [1] -> [1, 0, 2] +# degraded 1000 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 1000 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:44:23.531466 22'500 26:617 [1,0,2] 1 [1] 1 0'0 2017-10-27 09:43:44.654882 0'0 2017-10-27 09:43:44.654882 +function TEST_backfill_sizeup() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + ceph osd set nobackfill + ceph osd pool set $poolname size 3 + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + + local degraded=$(expr $objects \* 2) + check $dir $PG $primary replicated $degraded 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + + +# [1] -> [0, 2, 4] +# degraded 1000 -> 0 +# misplaced 500 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 1000 500 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:48:53.326849 22'500 26:603 [0,2,4] 0 [1] 1 0'0 2017-10-27 09:48:13.236253 0'0 2017-10-27 09:48:13.236253 +function TEST_backfill_sizeup_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$primary + ceph osd pool set $poolname size 3 + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + local degraded=$(expr $objects \* 2) + check $dir $PG $primary replicated $degraded 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1 0] -> [1,2]/[1,0] +# misplaced 500 -> 0 +# state: active+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-10-27 09:51:18.800517 22'500 25:570 [1,2] 1 [1,0] 1 0'0 2017-10-27 09:50:40.441274 0'0 2017-10-27 09:50:40.441274 +function TEST_backfill_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$(get_not_primary $poolname obj1) + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + check $dir $PG $primary replicated 0 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [0, 1] -> [0, 2]/[0] +# osd 1 down/out +# degraded 500 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:53:24.051091 22'500 27:719 [0,2] 0 [0] 0 0'0 2017-10-27 09:52:43.188368 0'0 2017-10-27 09:52:43.188368 +function TEST_backfill_down_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + sleep 2 + ceph osd unset nobackfill + + wait_for_clean || return 1 + + check $dir $PG $primary replicated $objects 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1, 0] -> [2, 3, 4] +# degraded 500 -> 0 +# misplaced 1000 -> 0 +# state: active+undersized+degraded+remapped+backfilling + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-27 09:55:50.375722 23'500 27:553 [2,4,3] 2 [1,0] 1 0'0 2017-10-27 09:55:10.230919 0'0 2017-10-27 09:55:10.230919 +function TEST_backfill_out2() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd pool set $poolname size 3 + ceph osd out osd.${otherosd} + ceph osd out osd.${primary} + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 2) + + check $dir $PG $primary replicated $objects 0 $misplaced 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [0,1] -> [2,4,3]/[0,1] +# degraded 1000 -> 0 +# misplaced 1000 -> 500 +# state ends at active+clean+remapped [2,4,3]/[2,4,3,0] +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 1000 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-10-30 18:21:45.995149 19'500 23:1817 [2,4,3] 2 [0,1] 0 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904 +# ENDS: +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-30 18:22:42.293730 19'500 25:2557 [2,4,3] 2 [2,4,3,0] 2 0'0 2017-10-30 18:21:05.109904 0'0 2017-10-30 18:21:05.109904 +function TEST_backfill_sizeup4_allout() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$otherosd + ceph osd out osd.$primary + ceph osd pool set $poolname size 4 + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misdeg=$(expr $objects \* 2) + check $dir $PG $primary replicated $misdeg 0 $misdeg $objects || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1,2,0] -> [3]/[1,2] +# misplaced 1000 -> 500 +# state ends at active+clean+remapped [3]/[3,1] +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 1000 0 0 100 100 active+remapped+backfilling 2017-11-28 19:13:56.092439 21'500 31:790 [3] 3 [1,2] 1 0'0 2017-11-28 19:13:28.698661 0'0 2017-11-28 19:13:28.698661 +function TEST_backfill_remapped() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + sleep 5 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.${otherosd} + for i in $(get_osds $poolname obj1) + do + if [ $i = $primary -o $i = $otherosd ]; + then + continue + fi + ceph osd out osd.$i + break + done + ceph osd out osd.${primary} + ceph osd pool set $poolname size 2 + sleep 2 + + # primary may change due to invalidating the old pg_temp, which was [1,2,0], + # but up_primary (3) chooses [0,1] for acting. + primary=$(get_primary $poolname obj1) + + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 2) + + check $dir $PG $primary replicated 0 0 $misplaced $objects "" "" false || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1,0,2] -> [4,3,NONE]/[1,0,2] +# misplaced 1500 -> 500 +# state ends at active+clean+remapped [4,3,NONE]/[4,3,2] + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 1500 0 0 100 100 active+degraded+remapped+backfilling 2017-10-31 16:53:39.467126 19'500 23:615 [4,3,NONE] 4 [1,0,2] 1 0'0 2017-10-31 16:52:59.624429 0'0 2017-10-31 16:52:59.624429 + + +# ENDS: + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 5 5 active+clean+remapped 2017-10-31 16:48:34.414040 19'500 25:2049 [4,3,NONE] 4 [4,3,2] 4 0'0 2017-10-31 16:46:58.203440 0'0 2017-10-31 16:46:58.203440 +function TEST_backfill_ec_all_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + for o in $(get_osds $poolname obj1) + do + ceph osd out osd.$o + done + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 3) + check $dir $PG $primary erasure 0 0 $misplaced $objects || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1,0,2] -> [4, 0, 2] +# misplaced 500 -> 0 +# active+remapped+backfilling +# +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 0 500 0 0 100 100 active+remapped+backfilling 2017-11-08 18:05:39.036420 24'500 27:742 [4,0,2] 4 [1,0,2] 1 0'0 2017-11-08 18:04:58.697315 0'0 2017-11-08 18:04:58.697315 +function TEST_backfill_ec_prim_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + + ceph osd set nobackfill + ceph osd out osd.$primary + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 3) + check $dir $PG $primary erasure 0 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1,0] -> [1,2] +# degraded 500 -> 0 +# misplaced 1000 -> 0 +# +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 1000 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 14:02:29.439105 24'500 29:1020 [4,3,5] 4 [1,NONE,2] 1 0'0 2017-11-06 14:01:46.509963 0'0 2017-11-06 14:01:46.509963 +function TEST_backfill_ec_down_all_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + ceph osd pool set $poolname min_size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + local allosds=$(get_osds $poolname obj1) + + ceph osd set nobackfill + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + for o in $allosds + do + ceph osd out osd.$o + done + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + flush_pg_stats + + # Wait for recovery to finish + # Can't use wait_for_clean() because state goes from active+undersized+degraded+remapped+backfilling + # to active+undersized+remapped + while(true) + do + if test "$(ceph --format json pg dump pgs | + jq '.pg_stats | [.[] | .state | select(. == "incomplete")] | length')" -ne "0" + then + sleep 2 + continue + fi + break + done + ceph pg dump pgs + for i in $(seq 1 240) + do + if ceph pg dump pgs | grep ^$PG | grep -qv backfilling + then + break + fi + if [ $i = "240" ]; + then + echo "Timeout waiting for recovery to finish" + return 1 + fi + sleep 1 + done + + ceph pg dump pgs + + local misplaced=$(expr $objects \* 2) + check $dir $PG $primary erasure $objects 0 $misplaced 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +# [1,0,2] -> [1,3,2] +# degraded 500 -> 0 +# active+backfilling+degraded +# +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 0 0 0 100 100 active+undersized+degraded+remapped+backfilling 2017-11-06 13:57:25.412322 22'500 28:794 [1,3,2] 1 [1,NONE,2] 1 0'0 2017-11-06 13:54:58.033906 0'0 2017-11-06 13:54:58.033906 +function TEST_backfill_ec_down_out() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + ceph osd pool set $poolname min_size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + # Remember primary during the backfill + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set nobackfill + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + # Primary might change before backfill starts + sleep 2 + primary=$(get_primary $poolname obj1) + ceph osd unset nobackfill + ceph tell osd.$primary get_latest_osdmap + ceph tell osd.$primary debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local misplaced=$(expr $objects \* 2) + check $dir $PG $primary erasure $objects 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +main osd-backfill-stats "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-stats.sh" +# End: diff --git a/qa/standalone/osd/bad-inc-map.sh b/qa/standalone/osd/bad-inc-map.sh new file mode 100755 index 000000000..cc3cf27cc --- /dev/null +++ b/qa/standalone/osd/bad-inc-map.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +mon_port=$(get_unused_port) + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:$mon_port" + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + set -e + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_bad_inc_map() { + local dir=$1 + + run_mon $dir a + run_mgr $dir x + run_osd $dir 0 + run_osd $dir 1 + run_osd $dir 2 + + ceph config set osd.2 osd_inject_bad_map_crc_probability 1 + + # osd map churn + create_pool foo 8 + ceph osd pool set foo min_size 1 + ceph osd pool set foo min_size 2 + + sleep 5 + + # make sure all the OSDs are still up + TIMEOUT=10 wait_for_osd up 0 + TIMEOUT=10 wait_for_osd up 1 + TIMEOUT=10 wait_for_osd up 2 + + # check for the signature in the log + grep "injecting map crc failure" $dir/osd.2.log || return 1 + grep "bailing because last" $dir/osd.2.log || return 1 + + echo success + + delete_pool foo + kill_daemons $dir || return 1 +} + +main bad-inc-map "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh bad-inc-map.sh" +# End: diff --git a/qa/standalone/osd/divergent-priors.sh b/qa/standalone/osd/divergent-priors.sh new file mode 100755 index 000000000..40d72544d --- /dev/null +++ b/qa/standalone/osd/divergent-priors.sh @@ -0,0 +1,855 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # This should multiple of 6 + export loglen=12 + export divisor=3 + export trim=$(expr $loglen / 2) + export DIVERGENT_WRITE=$(expr $trim / $divisor) + export DIVERGENT_REMOVE=$(expr $trim / $divisor) + export DIVERGENT_CREATE=$(expr $trim / $divisor) + export poolname=test + export testobjects=100 + # Fix port???? + export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # so we will not force auth_log_shard to be acting_primary + CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 " + CEPH_ARGS+="--osd_debug_pg_log_writeout=true " + CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +# Special case divergence test +# Test handling of divergent entries with prior_version +# prior to log_tail +# based on qa/tasks/divergent_prior.py +function TEST_divergent() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + local case5=$testobjects + local case3=$(expr $testobjects - 1) + # Write some soon to be divergent + echo 'writing divergent object' + rados -p $poolname put existing_$case5 $dummyfile & + echo 'create missing divergent object' + inject_eio rep data $poolname existing_$case3 $dir 0 || return 1 + rados -p $poolname get existing_$case3 $dir/existing & + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + + sleep 20 + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #5 + if ! grep -q "_merge_object_divergent_entries.*cannot roll back, removing and adding to missing" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +function TEST_divergent_ec() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_ec_pool $poolname true k=2 m=1 || return 1 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Write some soon to be divergent + echo 'writing divergent object' + rados -p $poolname put existing_$testobjects $dummyfile2 & + sleep 1 + rados -p $poolname put existing_$testobjects $dummyfile & + rados -p $poolname mksnap snap1 + rados -p $poolname put existing_$(expr $testobjects - 1) $dummyfile & + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + sleep 5 + #WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # Dump logs + for i in $non_divergent + do + kill_daemons $dir KILL osd.$i || return 1 + _objectstore_tool_nodown $dir $i --op log --pgid $pgid + activate_osd $dir $i || return 1 + done + _objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + #_objectstore_tool_nodown $dir $divergent --op log --pgid $pgid + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + + sleep 20 + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #3 + # XXX: Not reproducing this case +# if ! grep -q "_merge_object_divergent_entries.* missing, .* adjusting" $(find $dir -name '*osd*log') +# then +# echo failure +# return 1 +# fi + # Check for _merge_object_divergent_entries for case #4 + if ! grep -q "_merge_object_divergent_entries.*rolled back" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# Special case divergence test with ceph-objectstore-tool export/remove/import +# Test handling of divergent entries with prior_version +# prior to log_tail and a ceph-objectstore-tool export/import +# based on qa/tasks/divergent_prior2.py +function TEST_divergent_2() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + local divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Do some creates to hit case 2 + echo 'create new divergent objects' + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname create newobject_$i & + done + # Write some soon to be divergent + echo 'writing divergent objects' + for i in $(seq 1 $DIVERGENT_WRITE) + do + rados -p $poolname put existing_$i $dummyfile2 & + done + # Remove some soon to be divergent + echo 'remove divergent objects' + for i in $(seq 1 $DIVERGENT_REMOVE) + do + rmi=$(expr $i + $DIVERGENT_WRITE) + rados -p $poolname rm existing_$rmi & + done + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + # At this point the divergent_priors should have been detected + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + + # export a pg + expfile=$dir/exp.$$.out + _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile + _objectstore_tool_nodown $dir $divergent --op import --file $expfile + + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + wait_for_osd up $divergent + + sleep 20 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname get newobject_$i $dir/existing + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #1 + if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + # Check for _merge_object_divergent_entries for case #2 + if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + rm $dir/$expfile + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# this is the same as case _2 above, except we enable pg autoscaling in order +# to reproduce https://tracker.ceph.com/issues/41816 +function TEST_divergent_3() { + local dir=$1 + + # something that is always there + local dummyfile='/etc/fstab' + local dummyfile2='/etc/resolv.conf' + + local num_osds=3 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + ceph osd set noout + ceph osd set noin + ceph osd set nodown + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + + # reproduce https://tracker.ceph.com/issues/41816 + ceph osd pool set $poolname pg_autoscale_mode on + + divergent=-1 + start_time=$(date +%s) + max_duration=300 + + while [ "$divergent" -le -1 ] + do + flush_pg_stats || return 1 + wait_for_clean || return 1 + + # determine primary + divergent="$(ceph pg dump pgs --format=json | jq '.pg_stats[0].up_primary')" + echo "primary and soon to be divergent is $divergent" + ceph pg dump pgs + + current_time=$(date +%s) + elapsed_time=$(expr $current_time - $start_time) + if [ "$elapsed_time" -gt "$max_duration" ]; then + echo "timed out waiting for divergent" + return 1 + fi + done + + local non_divergent="" + for i in $osds + do + if [ "$i" = "$divergent" ]; then + continue + fi + non_divergent="$non_divergent $i" + done + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + local pgid=$(get_pg $poolname existing_1) + + # blackhole non_divergent + echo "blackholing osds $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) config set objectstore_blackhole 1 + done + + # Do some creates to hit case 2 + echo 'create new divergent objects' + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname create newobject_$i & + done + # Write some soon to be divergent + echo 'writing divergent objects' + for i in $(seq 1 $DIVERGENT_WRITE) + do + rados -p $poolname put existing_$i $dummyfile2 & + done + # Remove some soon to be divergent + echo 'remove divergent objects' + for i in $(seq 1 $DIVERGENT_REMOVE) + do + rmi=$(expr $i + $DIVERGENT_WRITE) + rados -p $poolname rm existing_$rmi & + done + sleep 10 + killall -9 rados + + # kill all the osds but leave divergent in + echo 'killing all the osds' + ceph pg dump pgs + kill_daemons $dir KILL osd || return 1 + for i in $osds + do + ceph osd down osd.$i + done + for i in $non_divergent + do + ceph osd out osd.$i + done + + # bring up non-divergent + echo "bringing up non_divergent $non_divergent" + ceph pg dump pgs + for i in $non_divergent + do + activate_osd $dir $i || return 1 + done + for i in $non_divergent + do + ceph osd in osd.$i + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname="existing_$(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)" + echo "writing non-divergent object $objname" + ceph pg dump pgs + rados -p $poolname put $objname $dummyfile2 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # ensure no recovery of up osds first + echo 'delay recovery' + ceph pg dump pgs + for i in $non_divergent + do + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${i}) set_recovery_delay 100000 + done + + # bring in our divergent friend + echo "revive divergent $divergent" + ceph pg dump pgs + ceph osd set noup + activate_osd $dir $divergent + sleep 5 + + echo 'delay recovery divergent' + ceph pg dump pgs + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) set_recovery_delay 100000 + + ceph osd unset noup + + wait_for_osd up 0 + wait_for_osd up 1 + wait_for_osd up 2 + + ceph pg dump pgs + echo 'wait for peering' + ceph pg dump pgs + rados -p $poolname put foo $dummyfile + + # At this point the divergent_priors should have been detected + + echo "killing divergent $divergent" + ceph pg dump pgs + kill_daemons $dir KILL osd.$divergent + + # export a pg + expfile=$dir/exp.$$.out + _objectstore_tool_nodown $dir $divergent --op export-remove --pgid $pgid --file $expfile + _objectstore_tool_nodown $dir $divergent --op import --file $expfile + + echo "reviving divergent $divergent" + ceph pg dump pgs + activate_osd $dir $divergent + wait_for_osd up $divergent + + sleep 20 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${divergent}) dump_ops_in_flight + + echo "allowing recovery" + ceph pg dump pgs + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in $osds + do + ceph tell osd.$i debug kick_recovery_wq 0 + done + + echo 'reading divergent objects' + ceph pg dump pgs + for i in $(seq 1 $(expr $DIVERGENT_WRITE + $DIVERGENT_REMOVE)) + do + rados -p $poolname get existing_$i $dir/existing || return 1 + done + for i in $(seq 1 $DIVERGENT_CREATE) + do + rados -p $poolname get newobject_$i $dir/existing + done + rm -f $dir/existing + + grep _merge_object_divergent_entries $(find $dir -name '*osd*log') + # Check for _merge_object_divergent_entries for case #1 + if ! grep -q "_merge_object_divergent_entries: more recent entry found:" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + # Check for _merge_object_divergent_entries for case #2 + if ! grep -q "_merge_object_divergent_entries.*prior_version or op type indicates creation" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + rm $dir/$expfile + + delete_pool $poolname + kill_daemons $dir || return 1 +} + + +main divergent-priors "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh divergent-priors.sh" +# End: diff --git a/qa/standalone/osd/ec-error-rollforward.sh b/qa/standalone/osd/ec-error-rollforward.sh new file mode 100755 index 000000000..621e6b13f --- /dev/null +++ b/qa/standalone/osd/ec-error-rollforward.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7132" # git grep '\<7132\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + export margin=10 + export objects=200 + export poolname=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_ec_error_rollforward() { + local dir=$1 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + + ceph osd erasure-code-profile set ec-profile m=2 k=2 crush-failure-domain=osd + ceph osd pool create ec 1 1 erasure ec-profile + + rados -p ec put foo /etc/passwd + + kill -STOP $(cat $dir/osd.2.pid) + + rados -p ec rm foo & + pids="$!" + sleep 1 + rados -p ec rm a & + pids+=" $!" + rados -p ec rm b & + pids+=" $!" + rados -p ec rm c & + pids+=" $!" + sleep 1 + # Use SIGKILL so stopped osd.2 will terminate + # and kill_daemons waits for daemons to die + kill_daemons $dir KILL osd + kill $pids + wait + + activate_osd $dir 0 || return 1 + activate_osd $dir 1 || return 1 + activate_osd $dir 2 || return 1 + activate_osd $dir 3 || return 1 + + wait_for_clean || return 1 +} + +main ec-error-rollforward "$@" diff --git a/qa/standalone/osd/osd-bench.sh b/qa/standalone/osd/osd-bench.sh new file mode 100755 index 000000000..eb1a6a440 --- /dev/null +++ b/qa/standalone/osd/osd-bench.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7106" # git grep '\<7106\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--debug-bluestore 20 " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_bench() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + local osd_bench_small_size_max_iops=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_small_size_max_iops) + local osd_bench_large_size_max_throughput=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_large_size_max_throughput) + local osd_bench_max_block_size=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_max_block_size) + local osd_bench_duration=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_bench_duration) + + # + # block size too high + # + expect_failure $dir osd_bench_max_block_size \ + ceph tell osd.0 bench 1024 $((osd_bench_max_block_size + 1)) || return 1 + + # + # count too high for small (< 1MB) block sizes + # + local bsize=1024 + local max_count=$(($bsize * $osd_bench_duration * $osd_bench_small_size_max_iops)) + expect_failure $dir bench_small_size_max_iops \ + ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1 + + # + # count too high for large (>= 1MB) block sizes + # + local bsize=$((1024 * 1024 + 1)) + local max_count=$(($osd_bench_large_size_max_throughput * $osd_bench_duration)) + expect_failure $dir osd_bench_large_size_max_throughput \ + ceph tell osd.0 bench $(($max_count + 1)) $bsize || return 1 + + # + # default values should work + # + ceph tell osd.0 bench || return 1 + + # + # test object_size < block_size + ceph tell osd.0 bench 10 14456 4444 3 + # + + # + # test object_size < block_size & object_size = 0(default value) + # + ceph tell osd.0 bench 1 14456 +} + +main osd-bench "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh" +# End: diff --git a/qa/standalone/osd/osd-bluefs-volume-ops.sh b/qa/standalone/osd/osd-bluefs-volume-ops.sh new file mode 100755 index 000000000..aedfbc9b5 --- /dev/null +++ b/qa/standalone/osd/osd-bluefs-volume-ops.sh @@ -0,0 +1,497 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +[ `uname` = FreeBSD ] && exit 0 + +function run() { + local dir=$1 + shift + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_bluestore() { + local dir=$1 + + local flimit=$(ulimit -n) + if [ $flimit -lt 1536 ]; then + echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens." + fi + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--bluestore_block_size=2147483648 " + CEPH_ARGS+="--bluestore_block_db_create=true " + CEPH_ARGS+="--bluestore_block_db_size=1073741824 " + CEPH_ARGS+="--bluestore_block_wal_size=536870912 " + CEPH_ARGS+="--bluestore_block_wal_create=true " + CEPH_ARGS+="--bluestore_fsck_on_mount=true " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + run_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + run_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + run_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + sleep 5 + + create_pool foo 16 + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + echo "after bench" + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + while kill $osd_pid1; do sleep 1 ; done + ceph osd down 1 + while kill $osd_pid2; do sleep 1 ; done + ceph osd down 2 + while kill $osd_pid3; do sleep 1 ; done + ceph osd down 3 + + # expand slow devices + ceph-bluestore-tool --path $dir/0 fsck || return 1 + ceph-bluestore-tool --path $dir/1 fsck || return 1 + ceph-bluestore-tool --path $dir/2 fsck || return 1 + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + truncate $dir/0/block -s 4294967296 # 4GB + ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1 + truncate $dir/1/block -s 4311744512 # 4GB + 16MB + ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1 + truncate $dir/2/block -s 4295099392 # 4GB + 129KB + ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1 + truncate $dir/3/block -s 4293918720 # 4GB - 1MB + ceph-bluestore-tool --path $dir/3 bluefs-bdev-expand || return 1 + + # slow, DB, WAL -> slow, DB + ceph-bluestore-tool --path $dir/0 fsck || return 1 + ceph-bluestore-tool --path $dir/1 fsck || return 1 + ceph-bluestore-tool --path $dir/2 fsck || return 1 + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + ceph-bluestore-tool --path $dir/0 bluefs-bdev-sizes + + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.wal \ + --dev-target $dir/0/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, DB, WAL -> slow, WAL + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block.db \ + --dev-target $dir/1/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + # slow, DB, WAL -> slow + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block.wal \ + --devs-source $dir/2/block.db \ + --dev-target $dir/2/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow, DB, WAL -> slow, WAL (negative case) + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block.db \ + --dev-target $dir/3/block.wal \ + --command bluefs-bdev-migrate + + # Migration to WAL is unsupported + if [ $? -eq 0 ]; then + return 1 + fi + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + # slow, DB, WAL -> slow, DB (WAL to slow then slow to DB) + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block.wal \ + --dev-target $dir/3/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block \ + --dev-target $dir/3/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + activate_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + activate_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + activate_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + wait_for_clean || return 1 + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + while kill $osd_pid1; do sleep 1 ; done + ceph osd down 1 + while kill $osd_pid2; do sleep 1 ; done + ceph osd down 2 + while kill $osd_pid3; do sleep 1 ; done + ceph osd down 3 + + # slow, DB -> slow, DB, WAL + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + dd if=/dev/zero of=$dir/0/wal count=512 bs=1M + ceph-bluestore-tool --path $dir/0 \ + --dev-target $dir/0/wal \ + --command bluefs-bdev-new-wal || return 1 + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, WAL -> slow, DB, WAL + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + dd if=/dev/zero of=$dir/1/db count=1024 bs=1M + ceph-bluestore-tool --path $dir/1 \ + --dev-target $dir/1/db \ + --command bluefs-bdev-new-db || return 1 + + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block \ + --dev-target $dir/1/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + # slow -> slow, DB, WAL + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + ceph-bluestore-tool --path $dir/2 \ + --command bluefs-bdev-new-db || return 1 + + ceph-bluestore-tool --path $dir/2 \ + --command bluefs-bdev-new-wal || return 1 + + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block \ + --dev-target $dir/2/block.db \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow, DB -> slow, WAL + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + ceph-bluestore-tool --path $dir/3 \ + --command bluefs-bdev-new-wal || return 1 + + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block.db \ + --dev-target $dir/3/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + activate_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + activate_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + activate_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + while kill $osd_pid1; do sleep 1 ; done + ceph osd down 1 + while kill $osd_pid2; do sleep 1 ; done + ceph osd down 2 + while kill $osd_pid3; do sleep 1 ; done + ceph osd down 3 + + # slow, DB1, WAL -> slow, DB2, WAL + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + dd if=/dev/zero of=$dir/0/db2 count=1024 bs=1M + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.db \ + --dev-target $dir/0/db2 \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, DB, WAL1 -> slow, DB, WAL2 + + dd if=/dev/zero of=$dir/0/wal2 count=512 bs=1M + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.wal \ + --dev-target $dir/0/wal2 \ + --command bluefs-bdev-migrate || return 1 + rm -rf $dir/0/wal + + ceph-bluestore-tool --path $dir/0 fsck || return 1 + + # slow, DB + WAL -> slow, DB2 -> slow + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + dd if=/dev/zero of=$dir/1/db2 count=1024 bs=1M + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block.db \ + --devs-source $dir/1/block.wal \ + --dev-target $dir/1/db2 \ + --command bluefs-bdev-migrate || return 1 + + rm -rf $dir/1/db + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + ceph-bluestore-tool --path $dir/1 \ + --devs-source $dir/1/block.db \ + --dev-target $dir/1/block \ + --command bluefs-bdev-migrate || return 1 + + rm -rf $dir/1/db2 + + ceph-bluestore-tool --path $dir/1 fsck || return 1 + + # slow -> slow, DB (negative case) + ceph-objectstore-tool --type bluestore --data-path $dir/2 \ + --op fsck --no-mon-config || return 1 + + dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block \ + --dev-target $dir/2/db2 \ + --command bluefs-bdev-migrate + + # Migration from slow-only to new device is unsupported + if [ $? -eq 0 ]; then + return 1 + fi + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow + DB + WAL -> slow, DB2 + dd if=/dev/zero of=$dir/2/db2 count=1024 bs=1M + + ceph-bluestore-tool --path $dir/2 \ + --devs-source $dir/2/block \ + --devs-source $dir/2/block.db \ + --devs-source $dir/2/block.wal \ + --dev-target $dir/2/db2 \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/2 fsck || return 1 + + # slow + WAL -> slow2, WAL2 + dd if=/dev/zero of=$dir/3/wal2 count=1024 bs=1M + + ceph-bluestore-tool --path $dir/3 \ + --devs-source $dir/3/block \ + --devs-source $dir/3/block.wal \ + --dev-target $dir/3/wal2 \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/3 fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + activate_osd $dir 1 || return 1 + osd_pid1=$(cat $dir/osd.1.pid) + activate_osd $dir 2 || return 1 + osd_pid2=$(cat $dir/osd.2.pid) + activate_osd $dir 3 || return 1 + osd_pid3=$(cat $dir/osd.3.pid) + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + + wait_for_clean || return 1 +} + +function TEST_bluestore2() { + local dir=$1 + + local flimit=$(ulimit -n) + if [ $flimit -lt 1536 ]; then + echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens." + fi + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--bluestore_block_size=4294967296 " + CEPH_ARGS+="--bluestore_block_db_create=true " + CEPH_ARGS+="--bluestore_block_db_size=1073741824 " + CEPH_ARGS+="--bluestore_block_wal_create=false " + CEPH_ARGS+="--bluestore_fsck_on_mount=true " + CEPH_ARGS+="--osd_pool_default_size=1 " + CEPH_ARGS+="--osd_pool_default_min_size=1 " + CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + sleep 5 + create_pool foo 16 + + retry = 0 + while [[ $retry -le 5 ]]; do + # write some objects + timeout 60 rados bench -p foo 10 write --write-omap --no-cleanup #|| return 1 + + #give RocksDB some time to cooldown and put files to slow level(s) + sleep 10 + + db_used=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.db_used_bytes" ) + spilled_over=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_used_bytes" ) + ((retry+=1)) + test $spilled_over -eq 0 || break + done + test $spilled_over -gt 0 || return 1 + + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + + ceph-bluestore-tool --path $dir/0 \ + --devs-source $dir/0/block.db \ + --dev-target $dir/0/block \ + --command bluefs-bdev-migrate || return 1 + + ceph-bluestore-tool --path $dir/0 \ + --command bluefs-bdev-sizes || return 1 + + ceph-bluestore-tool --path $dir/0 \ + --command fsck || return 1 + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + wait_for_clean || return 1 +} + +function TEST_bluestore_expand() { + local dir=$1 + + local flimit=$(ulimit -n) + if [ $flimit -lt 1536 ]; then + echo "Low open file limit ($flimit), test may fail. Increase to 1536 or higher and retry if that happens." + fi + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--bluestore_block_size=4294967296 " + CEPH_ARGS+="--bluestore_block_db_create=true " + CEPH_ARGS+="--bluestore_block_db_size=1073741824 " + CEPH_ARGS+="--bluestore_block_wal_create=false " + CEPH_ARGS+="--bluestore_fsck_on_mount=true " + CEPH_ARGS+="--osd_pool_default_size=1 " + CEPH_ARGS+="--osd_pool_default_min_size=1 " + CEPH_ARGS+="--bluestore_debug_enforce_settings=ssd " + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + sleep 5 + create_pool foo 16 + + # write some objects + timeout 60 rados bench -p foo 30 write -b 4096 --no-cleanup #|| return 1 + sleep 5 + + total_space_before=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" ) + free_space_before=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2` + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + + # destage allocation to file before expand (in case fast-shutdown skipped that step) + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 allocmap || return 1 + + # expand slow devices + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1 + + requested_space=4294967296 # 4GB + truncate $dir/0/block -s $requested_space + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-expand || return 1 + + # slow, DB, WAL -> slow, DB + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 fsck || return 1 + + # compare allocation-file with RocksDB state + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1 + + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 bluefs-bdev-sizes + + activate_osd $dir 0 || return 1 + osd_pid0=$(cat $dir/osd.0.pid) + + wait_for_clean || return 1 + + total_space_after=$( ceph tell osd.0 perf dump bluefs | jq ".bluefs.slow_total_bytes" ) + free_space_after=`ceph tell osd.0 bluestore bluefs device info | grep "BDEV_SLOW" -A 2 | grep free | cut -d':' -f 2 | cut -d"," -f 1 | cut -d' ' -f 2` + + if [$total_space_after != $requested_space]; then + echo "total_space_after = $total_space_after" + echo "requested_space = $requested_space" + return 1; + fi + + total_space_added=$((total_space_after - total_space_before)) + free_space_added=$((free_space_after - free_space_before)) + + let new_used_space=($total_space_added - $free_space_added) + echo $new_used_space + # allow upto 128KB to be consumed + if [ $new_used_space -gt 131072 ]; then + echo "total_space_added = $total_space_added" + echo "free_space_added = $free_space_added" + return 1; + fi + + # kill + while kill $osd_pid0; do sleep 1 ; done + ceph osd down 0 + + ceph-bluestore-tool --log-file $dir/bluestore_tool.log --path $dir/0 qfsck || return 1 +} + +main osd-bluefs-volume-ops "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bluefs-volume-ops.sh" +# End: diff --git a/qa/standalone/osd/osd-config.sh b/qa/standalone/osd/osd-config.sh new file mode 100755 index 000000000..126c2f7de --- /dev/null +++ b/qa/standalone/osd/osd-config.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_config_init() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local stale=1000 + local cache=500 + run_osd $dir 0 \ + --osd-map-cache-size=$cache \ + --osd-pg-epoch-persisted-max-stale=$stale \ + || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1 +} + +function TEST_config_track() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + local osd_map_cache_size=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_map_cache_size) + local osd_pg_epoch_persisted_max_stale=$(CEPH_ARGS='' ceph-conf \ + --show-config-value osd_pg_epoch_persisted_max_stale) + + # + # increase the osd_pg_epoch_persisted_max_stale above the default cache_size + # + ! grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1 + local stale=$(($osd_map_cache_size * 2)) + ceph tell osd.0 injectargs "--osd-pg-epoch-persisted-max-stale $stale" || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log flush || return 1 + grep 'is not > osd_pg_epoch_persisted_max_stale' $dir/osd.0.log || return 1 + rm $dir/osd.0.log + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.0) log reopen || return 1 +} + +function TEST_default_adjustment() { + a=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin) + b=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin default) + c=$(ceph-osd --no-mon-config --show-config-value rgw_torrent_origin --default-rgw-torrent-origin arg) + [ "$a" != "default" ] || return 1 + [ "$b" = "default" ] || return 1 + [ "$c" = "arg" ] || return 1 + + a=$(ceph-osd --no-mon-config --show-config-value log_to_file) + b=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false) + c=$(ceph-osd --no-mon-config --show-config-value log_to_file --default-log-to-file=false --log-to-file) + [ "$a" = "true" ] || return 1 + [ "$b" = "false" ] || return 1 + [ "$c" = "true" ] || return 1 +} + +main osd-config "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-config.sh" +# End: diff --git a/qa/standalone/osd/osd-copy-from.sh b/qa/standalone/osd/osd-copy-from.sh new file mode 100755 index 000000000..8ac0ab541 --- /dev/null +++ b/qa/standalone/osd/osd-copy-from.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# Author: Sage Weil <sage@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7111" # git grep '\<7111\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_copy_from() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + + # success + rados -p rbd put foo $(which rados) + rados -p rbd cp foo foo2 + rados -p rbd stat foo2 + + # failure + ceph tell osd.\* injectargs -- --osd-debug-inject-copyfrom-error + ! rados -p rbd cp foo foo3 + ! rados -p rbd stat foo3 + + # success again + ceph tell osd.\* injectargs -- --no-osd-debug-inject-copyfrom-error + ! rados -p rbd cp foo foo3 + rados -p rbd stat foo3 +} + +main osd-copy-from "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh" +# End: diff --git a/qa/standalone/osd/osd-dup.sh b/qa/standalone/osd/osd-dup.sh new file mode 100755 index 000000000..ab442c538 --- /dev/null +++ b/qa/standalone/osd/osd-dup.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +[ `uname` = FreeBSD ] && exit 0 + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7146" # git grep '\<7146\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # avoid running out of fds in rados bench + CEPH_ARGS+="--filestore_wbthrottle_xfs_ios_hard_limit=900 " + CEPH_ARGS+="--filestore_wbthrottle_btrfs_ios_hard_limit=900 " + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +main osd-dup "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-dup.sh" +# End: diff --git a/qa/standalone/osd/osd-fast-mark-down.sh b/qa/standalone/osd/osd-fast-mark-down.sh new file mode 100755 index 000000000..0ef9d8ce4 --- /dev/null +++ b/qa/standalone/osd/osd-fast-mark-down.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2016 Piotr Dałek <git@predictor.org.pl> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Piotr Dałek <git@predictor.org.pl> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +MAX_PROPAGATION_TIME=30 + +function run() { + local dir=$1 + shift + rm -f $dir/*.pid + export CEPH_MON="127.0.0.1:7126" # git grep '\<7126\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + + OLD_ARGS=$CEPH_ARGS + CEPH_ARGS+="--osd-fast-fail-on-connection-refused=false " + echo "Ensuring old behavior is there..." + test_fast_kill $dir && (echo "OSDs died too early! Old behavior doesn't work." ; return 1) + + CEPH_ARGS=$OLD_ARGS"--osd-fast-fail-on-connection-refused=true " + OLD_ARGS=$CEPH_ARGS + + CEPH_ARGS=$OLD_ARGS"--ms_type=async --mon-host=$CEPH_MON" + echo "Testing async msgr..." + test_fast_kill $dir || return 1 + + return 0 + +} + +function test_fast_kill() { + # create cluster with 3 osds + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for oi in {0..2}; do + run_osd $dir $oi || return 1 + pids[$oi]=$(cat $dir/osd.$oi.pid) + done + + create_rbd_pool || return 1 + + # make some objects so osds to ensure connectivity between osds + timeout 20 rados -p rbd bench 10 write -b 4096 --max-objects 128 --no-cleanup || return 1 + sleep 1 + + killid=0 + previd=0 + + # kill random osd and see if after max MAX_PROPAGATION_TIME, the osd count decreased. + for i in {1..2}; do + while [ $killid -eq $previd ]; do + killid=${pids[$RANDOM%${#pids[@]}]} + done + previd=$killid + + kill -9 $killid + time_left=$MAX_PROPAGATION_TIME + down_osds=0 + + while [ $time_left -gt 0 ]; do + sleep 1 + time_left=$[$time_left - 1]; + + grep -m 1 -c -F "ms_handle_refused" $dir/osd.*.log > /dev/null + if [ $? -ne 0 ]; then + continue + fi + + down_osds=$(ceph osd tree | grep -c down) + if [ $down_osds -lt $i ]; then + # osds not marked down yet, try again in a second + continue + elif [ $down_osds -gt $i ]; then + echo Too many \($down_osds\) osds died! + return 1 + else + break + fi + done + + if [ $down_osds -lt $i ]; then + echo Killed the OSD, yet it is not marked down + ceph osd tree + return 1 + fi + done + pkill -SIGTERM rados + teardown $dir || return 1 +} + +main osd-fast-mark-down "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-fast-mark-down.sh" +# End: diff --git a/qa/standalone/osd/osd-force-create-pg.sh b/qa/standalone/osd/osd-force-create-pg.sh new file mode 100755 index 000000000..ca4b0239e --- /dev/null +++ b/qa/standalone/osd/osd-force-create-pg.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7145" # git grep '\<7145\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_reuse_id() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create foo 50 || return 1 + wait_for_clean || return 1 + + kill_daemons $dir TERM osd.0 + kill_daemons $dir TERM osd.1 + kill_daemons $dir TERM osd.2 + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force + ceph-objectstore-tool --data-path $dir/1 --op remove --pgid 1.0 --force + ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.0 --force + activate_osd $dir 0 || return 1 + activate_osd $dir 1 || return 1 + activate_osd $dir 2 || return 1 + sleep 10 + ceph pg ls | grep 1.0 | grep stale || return 1 + + ceph osd force-create-pg 1.0 --yes-i-really-mean-it || return 1 + wait_for_clean || return 1 +} + +main osd-force-create-pg "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-force-create-pg.sh" +# End: diff --git a/qa/standalone/osd/osd-markdown.sh b/qa/standalone/osd/osd-markdown.sh new file mode 100755 index 000000000..5c4a78440 --- /dev/null +++ b/qa/standalone/osd/osd-markdown.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Intel <contact@intel.com.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Xiaoxi Chen <xiaoxi.chen@intel.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7108" # git grep '\<7108\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function markdown_N_impl() { + markdown_times=$1 + total_time=$2 + sleeptime=$3 + for i in `seq 1 $markdown_times` + do + # check the OSD is UP + ceph tell osd.0 get_latest_osdmap || return 1 + ceph osd tree + ceph osd tree | grep osd.0 |grep up || return 1 + # mark the OSD down. + # override any dup setting in the environment to ensure we do this + # exactly once (modulo messenger failures, at least; we can't *actually* + # provide exactly-once semantics for mon commands). + ( unset CEPH_CLI_TEST_DUP_COMMAND ; ceph osd down 0 ) + sleep $sleeptime + done +} + + +function TEST_markdown_exceed_maxdown_count() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + create_rbd_pool || return 1 + + # 3+1 times within 300s, osd should stay dead on the 4th time + local count=3 + local sleeptime=10 + local period=300 + ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1 + ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1 + + markdown_N_impl $(($count+1)) $period $sleeptime + # down N+1 times ,the osd.0 should die + ceph osd tree | grep down | grep osd.0 || return 1 +} + +function TEST_markdown_boot() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + create_rbd_pool || return 1 + + # 3 times within 120s, should stay up + local count=3 + local sleeptime=10 + local period=120 + ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1 + ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1 + + markdown_N_impl $count $period $sleeptime + #down N times, osd.0 should be up + sleep 15 # give osd plenty of time to notice and come back up + ceph tell osd.0 get_latest_osdmap || return 1 + ceph osd tree | grep up | grep osd.0 || return 1 +} + +function TEST_markdown_boot_exceed_time() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + create_rbd_pool || return 1 + + # 3+1 times, but over 40s, > 20s, so should stay up + local count=3 + local period=20 + local sleeptime=10 + ceph tell osd.0 injectargs '--osd_max_markdown_count '$count'' || return 1 + ceph tell osd.0 injectargs '--osd_max_markdown_period '$period'' || return 1 + + markdown_N_impl $(($count+1)) $period $sleeptime + sleep 15 # give osd plenty of time to notice and come back up + ceph tell osd.0 get_latest_osdmap || return 1 + ceph osd tree | grep up | grep osd.0 || return 1 +} + +function TEST_osd_stop() { + + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + osd_0_pid=$(cat $dir/osd.0.pid) + ps -p $osd_0_pid || return 1 + + ceph osd tree | grep osd.0 | grep up || return 1 + ceph osd stop osd.0 + sleep 15 # give osd plenty of time to notice and exit + ceph osd tree | grep down | grep osd.0 || return 1 + ! ps -p $osd_0_pid || return 1 +} + +main osd-markdown "$@" diff --git a/qa/standalone/osd/osd-reactivate.sh b/qa/standalone/osd/osd-reactivate.sh new file mode 100755 index 000000000..6d6438629 --- /dev/null +++ b/qa/standalone/osd/osd-reactivate.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# +# Author: Vicente Cheng <freeze.bilsted@gmail.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7122" # git grep '\<7122\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_reactivate() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + kill_daemons $dir TERM osd || return 1 + + ready_path=$dir"/0/ready" + activate_path=$dir"/0/active" + # trigger mkfs again + rm -rf $ready_path $activate_path + activate_osd $dir 0 || return 1 + +} + +main osd-reactivate "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reactivate.sh" +# End: diff --git a/qa/standalone/osd/osd-recovery-prio.sh b/qa/standalone/osd/osd-recovery-prio.sh new file mode 100755 index 000000000..02b65f67a --- /dev/null +++ b/qa/standalone/osd/osd-recovery-prio.sh @@ -0,0 +1,542 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7114" # git grep '\<7114\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --osd_max_backfills=1 --debug_reserver=20 " + # Set osd op queue = wpq for the tests. Recovery priority is not + # considered by mclock_scheduler leading to unexpected results. + CEPH_ARGS+="--osd-op-queue=wpq " + export objects=200 + export poolprefix=test + export FORCE_PRIO="255" # See OSD_RECOVERY_PRIORITY_FORCED + export NORMAL_PRIO="190" # See OSD_RECOVERY_PRIORITY_BASE + 10 + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function TEST_recovery_priority() { + local dir=$1 + local pools=10 + local OSDS=5 + local max_tries=10 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 3 pools with a pg with the same primaries but second + # replica on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2 + + local PG3 + local POOLNUM3 + local pool3 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ -z "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2=$test_osd2 + elif [ -n "$PG2" -a $chk_osd1_1 = $test_osd1 -a $chk_osd1_2 != $test_osd2 -a "$chk_osd2" != $test_osd2 ]; + then + PG3="${p}.0" + POOLNUM3=$p + pool3="${poolprefix}$p" + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" -o "pool3" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 -a $p != $POOLNUM3 ]; + then + delete_pool ${poolprefix}$p + fi + done + + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool3 size 1 --yes-i-really-mean-it + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 $pool3 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd set norecover + ceph osd set noout + + # Get a pg to want to recover and quickly force it + # to be preempted. + ceph osd pool set $pool3 size 2 + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 3. Item is in progress, adjust priority with no higher priority waiting + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-recovery $PG3 2>&1 | grep -q "doesn't require recovery"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-recovery" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + ceph osd out osd.$chk_osd1_2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + ceph pg dump pgs + + ceph osd pool set $pool2 size 2 + sleep 2 + flush_pg_stats || return 1 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + ceph pg dump pgs + + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG1}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "The normal PG ${PG1} doesn't have prio $NORMAL_PRIO queued waiting" + ERRORS=$(expr $ERRORS + 1) + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The first force-recovery PG $PG3 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG3} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # 1. Item is queued, re-queue with new priority + for i in $(seq 1 $max_tries) + do + if ! ceph pg force-recovery $PG2 2>&1 | grep -q "doesn't require recovery"; then + break + fi + if [ "$i" = "$max_tries" ]; then + echo "ERROR: Didn't appear to be able to force-recovery" + ERRORS=$(expr $ERRORS + 1) + fi + sleep 2 + done + sleep 2 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$FORCE_PRIO" ]; + then + echo "The second force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + flush_pg_stats || return 1 + + # 4. Item is in progress, if higher priority items waiting prempt item + #ceph osd unset norecover + ceph pg cancel-force-recovery $PG3 || return 1 + sleep 2 + #ceph osd set norecover + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG3}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "After cancel-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The force-recovery PG $PG2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The first force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph pg cancel-force-recovery $PG2 || return 1 + sleep 5 + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations || return 1 + + # 2. Item is queued, re-queue and preempt because new priority higher than an in progress item + flush_pg_stats || return 1 + ceph pg force-recovery $PG3 || return 1 + sleep 2 + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/out || return 1 + cat $dir/out + PRIO=$(cat $dir/out | jq "(.local_reservations.queues[].items[] | select(.item == \"${PG2}\")).prio") + if [ "$PRIO" != "$NORMAL_PRIO" ]; + then + echo "After cancel-force-recovery PG ${PG3} doesn't have prio $NORMAL_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + + eval ITEM=$(cat $dir/out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG3} ]; + then + echo "The force-recovery PG $PG3 didn't get promoted to an in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $FORCE_PRIO ]; + then + echo "The force-recovery PG ${PG2} doesn't have prio $FORCE_PRIO" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + ceph osd unset noout + ceph osd unset norecover + + wait_for_clean "CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations" || return 1 + + ceph pg dump pgs + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_pgstate_history + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + delete_pool $pool3 + kill_daemons $dir || return 1 + return $ERRORS +} + +# +# Show that pool recovery_priority is added to recovery priority +# +# Create 2 pools with 2 OSDs with different primarys +# pool 1 with recovery_priority 1 +# pool 2 with recovery_priority 2 +# +# Start recovery by changing the pool sizes from 1 to 2 +# Use dump_recovery_reservations to verify priorities +function TEST_recovery_pool_priority() { + local dir=$1 + local pools=3 # Don't assume the first 2 pools are exact what we want + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 2 + done + sleep 5 + + wait_for_clean || return 1 + + ceph pg dump pgs + + # Find 2 pools with different primaries which + # means the replica must be on another osd. + local PG1 + local POOLNUM1 + local pool1 + local chk_osd1_1 + local chk_osd1_2 + + local PG2 + local POOLNUM2 + local pool2 + local chk_osd2_1 + local chk_osd2_2 + + for p in $(seq 1 $pools) + do + ceph pg map ${p}.0 --format=json | jq '.acting[]' > $dir/acting + local test_osd1=$(head -1 $dir/acting) + local test_osd2=$(tail -1 $dir/acting) + if [ -z "$PG1" ]; + then + PG1="${p}.0" + POOLNUM1=$p + pool1="${poolprefix}$p" + chk_osd1_1=$test_osd1 + chk_osd1_2=$test_osd2 + elif [ $chk_osd1_1 != $test_osd1 ]; + then + PG2="${p}.0" + POOLNUM2=$p + pool2="${poolprefix}$p" + chk_osd2_1=$test_osd1 + chk_osd2_2=$test_osd2 + break + fi + done + rm -f $dir/acting + + if [ "$pool2" = "" ]; + then + echo "Failure to find appropirate PGs" + return 1 + fi + + for p in $(seq 1 $pools) + do + if [ $p != $POOLNUM1 -a $p != $POOLNUM2 ]; + then + delete_pool ${poolprefix}$p + fi + done + + pool1_extra_prio=1 + pool2_extra_prio=2 + pool1_prio=$(expr $NORMAL_PRIO + $pool1_extra_prio) + pool2_prio=$(expr $NORMAL_PRIO + $pool2_extra_prio) + + ceph osd pool set $pool1 size 1 --yes-i-really-mean-it + ceph osd pool set $pool1 recovery_priority $pool1_extra_prio + ceph osd pool set $pool2 size 1 --yes-i-really-mean-it + ceph osd pool set $pool2 recovery_priority $pool2_extra_prio + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/data bs=1M count=10 + p=1 + for pname in $pool1 $pool2 + do + for i in $(seq 1 $objects) + do + rados -p ${pname} put obj${i}-p${p} $dir/data + done + p=$(expr $p + 1) + done + + local otherosd=$(get_not_primary $pool1 obj1-p1) + + ceph pg dump pgs + ERRORS=0 + + ceph osd pool set $pool1 size 2 + ceph osd pool set $pool2 size 2 + + # Wait for both PGs to be in recovering state + ceph pg dump pgs + + # Wait for recovery to start + set -o pipefail + count=0 + while(true) + do + if test $(ceph --format json pg dump pgs | + jq '.pg_stats | .[] | .state | contains("recovering")' | grep -c true) == "2" + then + break + fi + sleep 2 + if test "$count" -eq "10" + then + echo "Recovery never started on both PGs" + return 1 + fi + count=$(expr $count + 1) + done + set +o pipefail + ceph pg dump pgs + + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_1}) dump_recovery_reservations > $dir/dump.${chk_osd1_1}.out + echo osd.${chk_osd1_1} + cat $dir/dump.${chk_osd1_1}.out + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${chk_osd1_2}) dump_recovery_reservations > $dir/dump.${chk_osd1_2}.out + echo osd.${chk_osd1_2} + cat $dir/dump.${chk_osd1_2}.out + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG for $pool1 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG1} ]; + then + echo "The primary PG for $pool1 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd1_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool1_prio ]; + then + echo "The primary PG ${PG1} doesn't have prio $pool1_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG for $pool2 didn't become the in progress item" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_1}.out | jq '.local_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + # Using eval will strip double-quotes from item + eval ITEM=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].item') + if [ "$ITEM" != ${PG2} ]; + then + echo "The primary PG $PG2 didn't become the in progress item on remote" + ERRORS=$(expr $ERRORS + 1) + else + PRIO=$(cat $dir/dump.${chk_osd2_2}.out | jq '.remote_reservations.in_progress[0].prio') + if [ "$PRIO" != $pool2_prio ]; + then + echo "The primary PG ${PG2} doesn't have prio $pool2_prio on remote" + ERRORS=$(expr $ERRORS + 1) + fi + fi + + wait_for_clean || return 1 + + if [ $ERRORS != "0" ]; + then + echo "$ERRORS error(s) found" + else + echo TEST PASSED + fi + + delete_pool $pool1 + delete_pool $pool2 + kill_daemons $dir || return 1 + return $ERRORS +} + +main osd-recovery-prio "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-prio.sh" +# End: diff --git a/qa/standalone/osd/osd-recovery-space.sh b/qa/standalone/osd/osd-recovery-space.sh new file mode 100755 index 000000000..3bafc5138 --- /dev/null +++ b/qa/standalone/osd/osd-recovery-space.sh @@ -0,0 +1,176 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_max_backfills=10 " + CEPH_ARGS+="--osd_mclock_override_recovery_settings=true " + export objects=600 + export poolprefix=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function get_num_in_state() { + local state=$1 + local expression + expression+="select(contains(\"${state}\"))" + ceph --format json pg dump pgs 2>/dev/null | \ + jq ".pg_stats | [.[] | .state | $expression] | length" +} + + +function wait_for_state() { + local state=$1 + local cur_in_state + local -a delays=($(get_timeout_delays $2 5)) + local -i loop=0 + + flush_pg_stats || return 1 + while test $(get_num_pgs) == 0 ; do + sleep 1 + done + + while true ; do + cur_in_state=$(get_num_in_state ${state}) + test $cur_in_state -gt 0 && break + if (( $loop >= ${#delays[*]} )) ; then + ceph pg dump pgs + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done + return 0 +} + + +function wait_for_recovery_toofull() { + local timeout=$1 + wait_for_state recovery_toofull $timeout +} + + +# Create 1 pools with size 1 +# set ful-ratio to 50% +# Write data 600 5K (3000K) +# Inject fake_statfs_for_testing to 3600K (83% full) +# Incresase the pool size to 2 +# The pool shouldn't have room to recovery +function TEST_recovery_test_simple() { + local dir=$1 + local pools=1 + local OSDS=2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + export CEPH_ARGS + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + ceph osd set-nearfull-ratio .40 + ceph osd set-backfillfull-ratio .45 + ceph osd set-full-ratio .50 + + for p in $(seq 1 $pools) + do + create_pool "${poolprefix}$p" 1 1 + ceph osd pool set "${poolprefix}$p" size 1 --yes-i-really-mean-it + done + + wait_for_clean || return 1 + + dd if=/dev/urandom of=$dir/datafile bs=1024 count=5 + for o in $(seq 1 $objects) + do + rados -p "${poolprefix}$p" put obj$o $dir/datafile + done + + for o in $(seq 0 $(expr $OSDS - 1)) + do + ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1 + done + sleep 5 + + ceph pg dump pgs + + for p in $(seq 1 $pools) + do + ceph osd pool set "${poolprefix}$p" size 2 + done + + # If this times out, we'll detected errors below + wait_for_recovery_toofull 30 + + ERRORS=0 + if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ]; + then + echo "One pool should have been in recovery_toofull" + ERRORS="$(expr $ERRORS + 1)" + fi + + ceph pg dump pgs + ceph status + ceph status --format=json-pretty > $dir/stat.json + + eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json) + if [ "$SEV" != "HEALTH_ERR" ]; then + echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR" + ERRORS="$(expr $ERRORS + 1)" + fi + eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json) + if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then + echo "PG_RECOVERY_FULL message '$MSG' mismatched" + ERRORS="$(expr $ERRORS + 1)" + fi + rm -f $dir/stat.json + + if [ $ERRORS != "0" ]; + then + return 1 + fi + + for i in $(seq 1 $pools) + do + delete_pool "${poolprefix}$i" + done + kill_daemons $dir || return 1 +} + + +main osd-recovery-space "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh" +# End: diff --git a/qa/standalone/osd/osd-recovery-stats.sh b/qa/standalone/osd/osd-recovery-stats.sh new file mode 100755 index 000000000..ad6f810d7 --- /dev/null +++ b/qa/standalone/osd/osd-recovery-stats.sh @@ -0,0 +1,512 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + # Fix port???? + export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # so we will not force auth_log_shard to be acting_primary + CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 " + export margin=10 + export objects=200 + export poolname=test + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function below_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check <= $target && $check >= $target - $margin ? 0 : 1 )) +} + +function above_margin() { + local -i check=$1 + shift + local -i target=$1 + + return $(( $check >= $target && $check <= $target + $margin ? 0 : 1 )) +} + +FIND_UPACT='grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/"' +FIND_FIRST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | grep -F " ${UPACT}${addp}" | grep -v est | head -1 | sed "s/.* \([0-9]*\)$/\1/"' +FIND_LAST='grep "pg[[]${PG}.*recovering.*update_calc_stats $which " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/"' + +function check() { + local dir=$1 + local PG=$2 + local primary=$3 + local type=$4 + local degraded_start=$5 + local degraded_end=$6 + local misplaced_start=$7 + local misplaced_end=$8 + local primary_start=${9:-} + local primary_end=${10:-} + + local log=$dir/osd.${primary}.log + + local addp=" " + if [ "$type" = "erasure" ]; + then + addp="p" + fi + + UPACT=$(eval $FIND_UPACT) + + # Check 3rd line at start because of false recovery starts + local which="degraded" + FIRST=$(eval $FIND_FIRST) + below_margin $FIRST $degraded_start || return 1 + LAST=$(eval $FIND_LAST) + above_margin $LAST $degraded_end || return 1 + + # Check 3rd line at start because of false recovery starts + which="misplaced" + FIRST=$(eval $FIND_FIRST) + below_margin $FIRST $misplaced_start || return 1 + LAST=$(eval $FIND_LAST) + above_margin $LAST $misplaced_end || return 1 + + # This is the value of set into MISSING_ON_PRIMARY + if [ -n "$primary_start" ]; + then + which="shard $primary" + FIRST=$(eval $FIND_FIRST) + below_margin $FIRST $primary_start || return 1 + LAST=$(eval $FIND_LAST) + above_margin $LAST $primary_end || return 1 + fi +} + +# [1,0,?] -> [1,2,4] +# degraded 500 -> 0 +# active+recovering+degraded + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 0 500 0 0 0 500 500 active+recovering+degraded 2017-11-17 19:27:36.493828 28'500 32:603 [1,2,4] 1 [1,2,4] 1 0'0 2017-11-17 19:27:05.915467 0'0 2017-11-17 19:27:05.915467 +function do_recovery_out1() { + local dir=$1 + shift + local type=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + if [ $type = "erasure" ]; + then + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 $type myprofile + else + create_pool $poolname 1 1 $type + fi + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + # Only 2 OSDs so only 1 not primary + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set norecover + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + check $dir $PG $primary $type $objects 0 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +function TEST_recovery_replicated_out1() { + local dir=$1 + + do_recovery_out1 $dir replicated || return 1 +} + +function TEST_recovery_erasure_out1() { + local dir=$1 + + do_recovery_out1 $dir erasure || return 1 +} + +# [0, 1] -> [2,3,4,5] +# degraded 1000 -> 0 +# misplaced 1000 -> 0 +# missing on primary 500 -> 0 + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 500 1000 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:38:37.453438 22'500 25:394 [2,4,3,5] 2 [2,4,3,5] 2 0'0 2017-10-27 09:37:58.046748 0'0 2017-10-27 09:37:58.046748 +function TEST_recovery_sizeup() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + # Only 2 OSDs so only 1 not primary + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set norecover + ceph osd out osd.$primary osd.$otherosd + ceph osd pool set test size 4 + ceph osd unset norecover + # Get new primary + primary=$(get_primary $poolname obj1) + + ceph tell osd.${primary} debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local degraded=$(expr $objects \* 2) + local misplaced=$(expr $objects \* 2) + local log=$dir/osd.${primary}.log + check $dir $PG $primary replicated $degraded 0 $misplaced 0 $objects 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [0, 1, 2, 4] -> [3, 5] +# misplaced 1000 -> 0 +# missing on primary 500 -> 0 +# active+recovering+degraded + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 500 500 0 1000 0 0 500 500 active+recovering+degraded 2017-10-27 09:34:50.012261 22'500 27:118 [3,5] 3 [3,5] 3 0'0 2017-10-27 09:34:08.617248 0'0 2017-10-27 09:34:08.617248 +function TEST_recovery_sizedown() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + run_osd $dir 4 || return 1 + run_osd $dir 5 || return 1 + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 4 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + # Only 2 OSDs so only 1 not primary + local allosds=$(get_osds $poolname obj1) + + ceph osd set norecover + for osd in $allosds + do + ceph osd out osd.$osd + done + + ceph osd pool set test size 2 + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + # Get new primary + primary=$(get_primary $poolname obj1) + + local misplaced=$(expr $objects \* 2) + local log=$dir/osd.${primary}.log + check $dir $PG $primary replicated 0 0 $misplaced 0 || return 1 + + UPACT=$(grep "pg[[]${PG}.*recovering.*update_calc_stats " $log | tail -1 | sed "s/.*[)] \([[][^ p]*\).*$/\1/") + + # This is the value of set into MISSING_ON_PRIMARY + FIRST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | grep -F " $UPACT " | head -1 | sed "s/.* \([0-9]*\)$/\1/") + below_margin $FIRST $objects || return 1 + LAST=$(grep "pg[[]${PG}.*recovering.*update_calc_stats shard $primary " $log | tail -1 | sed "s/.* \([0-9]*\)$/\1/") + above_margin $LAST 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1] -> [1,2] +# degraded 300 -> 200 +# active+recovering+undersized+degraded + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 100 0 300 0 0 0 100 100 active+recovering+undersized+degraded 2017-11-17 17:16:15.302943 13'500 16:643 [1,2] 1 [1,2] 1 0'0 2017-11-17 17:15:34.985563 0'0 2017-11-17 17:15:34.985563 +function TEST_recovery_undersized() { + local dir=$1 + + local osds=3 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $(seq 0 $(expr $osds - 1)) + do + run_osd $dir $i || return 1 + done + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 1 --yes-i-really-mean-it + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + + ceph osd set norecover + # Mark any osd not the primary (only 1 replica so also has no replica) + for i in $(seq 0 $(expr $osds - 1)) + do + if [ $i = $primary ]; + then + continue + fi + ceph osd out osd.$i + break + done + ceph osd pool set test size 4 + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + # Give extra sleep time because code below doesn't have the sophistication of wait_for_clean() + sleep 10 + flush_pg_stats || return 1 + + # Wait for recovery to finish + # Can't use wait_for_clean() because state goes from active+recovering+undersized+degraded + # to active+undersized+degraded + for i in $(seq 1 300) + do + if ceph pg dump pgs | grep ^$PG | grep -qv recovering + then + break + fi + if [ $i = "300" ]; + then + echo "Timeout waiting for recovery to finish" + return 1 + fi + sleep 1 + done + + # Get new primary + primary=$(get_primary $poolname obj1) + local log=$dir/osd.${primary}.log + + local first_degraded=$(expr $objects \* 3) + local last_degraded=$(expr $objects \* 2) + check $dir $PG $primary replicated $first_degraded $last_degraded 0 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +# [1,0,2] -> [1,3,NONE]/[1,3,2] +# degraded 100 -> 0 +# misplaced 100 -> 100 +# active+recovering+degraded+remapped + +# PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES LOG DISK_LOG STATE STATE_STAMP VERSION REPORTED UP UP_PRIMARY ACTING ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP LAST_DEEP_SCRUB DEEP_SCRUB_STAMP +# 1.0 100 0 100 100 0 0 100 100 active+recovering+degraded+remapped 2017-11-27 21:24:20.851243 18'500 23:618 [1,3,NONE] 1 [1,3,2] 1 0'0 2017-11-27 21:23:39.395242 0'0 2017-11-27 21:23:39.395242 +function TEST_recovery_erasure_remapped() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + run_osd $dir 3 || return 1 + + ceph osd erasure-code-profile set myprofile plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd + create_pool $poolname 1 1 erasure myprofile + ceph osd pool set $poolname min_size 2 + + wait_for_clean || return 1 + + for i in $(seq 1 $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local primary=$(get_primary $poolname obj1) + local PG=$(get_pg $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set norecover + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + ceph osd out osd.${otherosd} + + # Mark osd not the primary and not down/out osd as just out + for i in 0 1 2 3 + do + if [ $i = $primary ]; + then + continue + fi + if [ $i = $otherosd ]; + then + continue + fi + ceph osd out osd.$i + break + done + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + local log=$dir/osd.${primary}.log + check $dir $PG $primary erasure $objects 0 $objects $objects || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +function TEST_recovery_multi() { + local dir=$1 + + local osds=6 + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $(seq 0 $(expr $osds - 1)) + do + run_osd $dir $i || return 1 + done + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 1 + + wait_for_clean || return 1 + + rados -p $poolname put obj1 /dev/null + + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + + ceph osd set noout + ceph osd set norecover + kill $(cat $dir/osd.${otherosd}.pid) + ceph osd down osd.${otherosd} + + local half=$(expr $objects / 2) + for i in $(seq 2 $half) + do + rados -p $poolname put obj$i /dev/null + done + + kill $(cat $dir/osd.${primary}.pid) + ceph osd down osd.${primary} + activate_osd $dir ${otherosd} + sleep 3 + + for i in $(seq $(expr $half + 1) $objects) + do + rados -p $poolname put obj$i /dev/null + done + + local PG=$(get_pg $poolname obj1) + local otherosd=$(get_not_primary $poolname obj$objects) + + ceph osd unset noout + ceph osd out osd.$primary osd.$otherosd + activate_osd $dir ${primary} + sleep 3 + + ceph osd pool set test size 4 + ceph osd unset norecover + ceph tell osd.$(get_primary $poolname obj1) debug kick_recovery_wq 0 + sleep 2 + + wait_for_clean || return 1 + + # Get new primary + primary=$(get_primary $poolname obj1) + + local log=$dir/osd.${primary}.log + check $dir $PG $primary replicated 399 0 300 0 99 0 || return 1 + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +main osd-recovery-stats "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-stats.sh" +# End: diff --git a/qa/standalone/osd/osd-rep-recov-eio.sh b/qa/standalone/osd/osd-rep-recov-eio.sh new file mode 100755 index 000000000..6fea441b3 --- /dev/null +++ b/qa/standalone/osd/osd-rep-recov-eio.sh @@ -0,0 +1,422 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# +# Author: Kefu Chai <kchai@redhat.com> +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +warnings=10 + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7140" # git grep '\<7140\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + # set warning amount in case default changes + run_mon $dir a --mon_osd_warn_num_repaired=$warnings || return 1 + run_mgr $dir x || return 1 + ceph osd pool create foo 8 || return 1 + + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function setup_osds() { + local count=$1 + shift + local type=$1 + + for id in $(seq 0 $(expr $count - 1)) ; do + run_osd${type} $dir $id || return 1 + done + wait_for_clean || return 1 +} + +function get_state() { + local pgid=$1 + local sname=state + ceph --format json pg dump pgs 2>/dev/null | \ + jq -r ".pg_stats | .[] | select(.pgid==\"$pgid\") | .$sname" +} + +function rados_put() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + + for marker in AAA BBB CCCC DDDD ; do + printf "%*s" 1024 $marker + done > $dir/ORIGINAL + # + # get and put an object, compare they are equal + # + rados --pool $poolname put $objname $dir/ORIGINAL || return 1 +} + +function rados_get() { + local dir=$1 + local poolname=$2 + local objname=${3:-SOMETHING} + local expect=${4:-ok} + + # + # Expect a failure to get object + # + if [ $expect = "fail" ]; + then + ! rados --pool $poolname get $objname $dir/COPY + return + fi + # + # Expect hang trying to get object + # + if [ $expect = "hang" ]; + then + timeout 5 rados --pool $poolname get $objname $dir/COPY + test "$?" = "124" + return + fi + # + # get an object, compare with $dir/ORIGINAL + # + rados --pool $poolname get $objname $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + rm $dir/COPY +} + +function rados_get_data() { + local inject=$1 + shift + local dir=$1 + + local poolname=pool-rep + local objname=obj-$inject-$$ + local pgid=$(get_pg $poolname $objname) + + rados_put $dir $poolname $objname || return 1 + inject_$inject rep data $poolname $objname $dir 0 || return 1 + rados_get $dir $poolname $objname || return 1 + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "1" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "1" || return 1 + + local object_osds=($(get_osds $poolname $objname)) + local primary=${object_osds[0]} + local bad_peer=${object_osds[1]} + inject_$inject rep data $poolname $objname $dir 0 || return 1 + inject_$inject rep data $poolname $objname $dir 1 || return 1 + # Force primary to pull from the bad peer, so we can repair it too! + set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1 + rados_get $dir $poolname $objname || return 1 + + # Wait until automatic repair of bad peer is done + wait_for_clean || return 1 + + inject_$inject rep data $poolname $objname $dir 0 || return 1 + inject_$inject rep data $poolname $objname $dir 2 || return 1 + rados_get $dir $poolname $objname || return 1 + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "3" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "4" || return 1 + + inject_$inject rep data $poolname $objname $dir 0 || return 1 + inject_$inject rep data $poolname $objname $dir 1 || return 1 + inject_$inject rep data $poolname $objname $dir 2 || return 1 + rados_get $dir $poolname $objname hang || return 1 + + wait_for_clean + # After hang another repair couldn't happen, so count stays the same + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "3" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "4" || return 1 +} + +function TEST_rados_get_with_eio() { + local dir=$1 + + setup_osds 4 || return 1 + + local poolname=pool-rep + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + rados_get_data eio $dir || return 1 + + delete_pool $poolname +} + +function TEST_rados_repair_warning() { + local dir=$1 + local OBJS=$(expr $warnings + 1) + + setup_osds 4 || return 1 + + local poolname=pool-rep + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + local poolname=pool-rep + local objbase=obj-warn + local inject=eio + + for i in $(seq 1 $OBJS) + do + rados_put $dir $poolname ${objbase}-$i || return 1 + inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1 + rados_get $dir $poolname ${objbase}-$i || return 1 + done + local pgid=$(get_pg $poolname ${objbase}-1) + + local object_osds=($(get_osds $poolname ${objbase}-1)) + local primary=${object_osds[0]} + local bad_peer=${object_osds[1]} + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$OBJS" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$OBJS" || return 1 + + ceph health | grep -q "Too many repaired reads on 1 OSDs" || return 1 + ceph health detail | grep -q "osd.$primary had $OBJS reads repaired" || return 1 + + ceph health mute OSD_TOO_MANY_REPAIRS + set -o pipefail + # Should mute this + ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1 + set +o pipefail + + for i in $(seq 1 $OBJS) + do + inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1 + inject_$inject rep data $poolname ${objbase}-$i $dir 1 || return 1 + # Force primary to pull from the bad peer, so we can repair it too! + set_config osd $primary osd_debug_feed_pullee $bad_peer || return 1 + rados_get $dir $poolname ${objbase}-$i || return 1 + done + + wait_for_clean + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$(expr $OBJS \* 2)" || return 1 + flush_pg_stats + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$(expr $OBJS \* 3)" || return 1 + + # Give mon a chance to notice additional OSD and unmute + # The default tick time is 5 seconds + CHECKTIME=10 + LOOPS=0 + while(true) + do + sleep 1 + if ceph health | grep -q "Too many repaired reads on 2 OSDs" + then + break + fi + LOOPS=$(expr $LOOPS + 1) + if test "$LOOPS" = "$CHECKTIME" + then + echo "Too many repaired reads not seen after $CHECKTIME seconds" + return 1 + fi + done + ceph health detail | grep -q "osd.$primary had $(expr $OBJS \* 2) reads repaired" || return 1 + ceph health detail | grep -q "osd.$bad_peer had $OBJS reads repaired" || return 1 + + delete_pool $poolname +} + +# Test backfill with unfound object +function TEST_rep_backfill_unfound() { + local dir=$1 + local objname=myobject + local lastobj=300 + # Must be between 1 and $lastobj + local testobj=obj250 + + export CEPH_ARGS + CEPH_ARGS+=' --osd_min_pg_log_entries=5 --osd_max_pg_log_entries=10' + setup_osds 3 || return 1 + + local poolname=test-pool + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio rep data $poolname $testobj $dir 0 || return 1 + inject_eio rep data $poolname $testobj $dir 1 || return 1 + + activate_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + for tmp in $(seq 1 360); do + state=$(get_state 2.0) + echo $state | grep backfill_unfound + if [ "$?" = "0" ]; then + break + fi + echo "$state " + sleep 1 + done + + ceph pg dump pgs + ceph pg 2.0 list_unfound | grep -q $testobj || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_pool $poolname +} + +# Test recovery with unfound object +function TEST_rep_recovery_unfound() { + local dir=$1 + local objname=myobject + local lastobj=100 + # Must be between 1 and $lastobj + local testobj=obj75 + + setup_osds 3 || return 1 + + local poolname=test-pool + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + ceph pg dump pgs + + rados_put $dir $poolname $objname || return 1 + + local -a initial_osds=($(get_osds $poolname $objname)) + local last_osd=${initial_osds[-1]} + kill_daemons $dir TERM osd.${last_osd} 2>&2 < /dev/null || return 1 + ceph osd down ${last_osd} || return 1 + ceph osd out ${last_osd} || return 1 + + ceph pg dump pgs + + dd if=/dev/urandom of=${dir}/ORIGINAL bs=1024 count=4 + for i in $(seq 1 $lastobj) + do + rados --pool $poolname put obj${i} $dir/ORIGINAL || return 1 + done + + inject_eio rep data $poolname $testobj $dir 0 || return 1 + inject_eio rep data $poolname $testobj $dir 1 || return 1 + + activate_osd $dir ${last_osd} || return 1 + ceph osd in ${last_osd} || return 1 + + sleep 15 + + for tmp in $(seq 1 100); do + state=$(get_state 2.0) + echo $state | grep -v recovering + if [ "$?" = "0" ]; then + break + fi + echo "$state " + sleep 1 + done + + ceph pg dump pgs + ceph pg 2.0 list_unfound | grep -q $testobj || return 1 + + # Command should hang because object is unfound + timeout 5 rados -p $poolname get $testobj $dir/CHECK + test $? = "124" || return 1 + + ceph pg 2.0 mark_unfound_lost delete + + wait_for_clean || return 1 + + for i in $(seq 1 $lastobj) + do + if [ obj${i} = "$testobj" ]; then + # Doesn't exist anymore + ! rados -p $poolname get $testobj $dir/CHECK || return 1 + else + rados --pool $poolname get obj${i} $dir/CHECK || return 1 + diff -q $dir/ORIGINAL $dir/CHECK || return 1 + fi + done + + rm -f ${dir}/ORIGINAL ${dir}/CHECK + + delete_pool $poolname +} + +main osd-rep-recov-eio.sh "$@" + +# Local Variables: +# compile-command: "cd ../../../build ; make -j4 && ../qa/run-standalone.sh osd-rep-recov-eio.sh" +# End: diff --git a/qa/standalone/osd/osd-reuse-id.sh b/qa/standalone/osd/osd-reuse-id.sh new file mode 100755 index 000000000..b24b6f2eb --- /dev/null +++ b/qa/standalone/osd/osd-reuse-id.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7123" # git grep '\<7123\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_reuse_id() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + destroy_osd $dir 1 || return 1 + run_osd $dir 1 || return 1 +} + +main osd-reuse-id "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-reuse-id.sh" +# End: diff --git a/qa/standalone/osd/pg-split-merge.sh b/qa/standalone/osd/pg-split-merge.sh new file mode 100755 index 000000000..7f2899b60 --- /dev/null +++ b/qa/standalone/osd/pg-split-merge.sh @@ -0,0 +1,203 @@ +#!/usr/bin/env bash +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7147" # git grep '\<7147\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON --mon_min_osdmap_epochs=50 --paxos_service_trim_min=10" + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_a_merge_empty() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create foo 2 || return 1 + ceph osd pool set foo pgp_num 1 || return 1 + + wait_for_clean || return 1 + + # note: we need 1.0 to have the same or more objects than 1.1 + # 1.1 + rados -p foo put foo1 /etc/passwd + rados -p foo put foo2 /etc/passwd + rados -p foo put foo3 /etc/passwd + rados -p foo put foo4 /etc/passwd + # 1.0 + rados -p foo put foo5 /etc/passwd + rados -p foo put foo6 /etc/passwd + rados -p foo put foo8 /etc/passwd + rados -p foo put foo10 /etc/passwd + rados -p foo put foo11 /etc/passwd + rados -p foo put foo12 /etc/passwd + rados -p foo put foo16 /etc/passwd + + wait_for_clean || return 1 + + ceph tell osd.1 config set osd_debug_no_purge_strays true + ceph osd pool set foo size 2 || return 1 + wait_for_clean || return 1 + + kill_daemons $dir TERM osd.2 || return 1 + ceph-objectstore-tool --data-path $dir/2 --op remove --pgid 1.1 --force || return 1 + activate_osd $dir 2 || return 1 + + wait_for_clean || return 1 + + # osd.2: now 1.0 is there but 1.1 is not + + # instantiate 1.1 on osd.2 with last_update=0'0 ('empty'), which is + # the problematic state... then let it merge with 1.0 + ceph tell osd.2 config set osd_debug_no_acting_change true + ceph osd out 0 1 + ceph osd pool set foo pg_num 1 + sleep 5 + ceph tell osd.2 config set osd_debug_no_acting_change false + + # go back to osd.1 being primary, and 3x so the osd.2 copy doesn't get + # removed + ceph osd in 0 1 + ceph osd pool set foo size 3 + + wait_for_clean || return 1 + + # scrub to ensure the osd.3 copy of 1.0 was incomplete (vs missing + # half of its objects). + ceph pg scrub 1.0 + sleep 10 + ceph log last debug + ceph pg ls + ceph pg ls | grep ' active.clean ' || return 1 +} + +function TEST_import_after_merge_and_gap() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph osd pool create foo 2 || return 1 + wait_for_clean || return 1 + rados -p foo bench 3 write -b 1024 --no-cleanup || return 1 + + kill_daemons $dir TERM osd.0 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.1 --file $dir/1.1 --force || return 1 + ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1 + activate_osd $dir 0 || return 1 + + ceph osd pool set foo pg_num 1 + sleep 5 + while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 2 ; do sleep 1 ; done + wait_for_clean || return 1 + + # + kill_daemons $dir TERM osd.0 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1 + # this will import both halves the original pg + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + activate_osd $dir 0 || return 1 + + wait_for_clean || return 1 + + # make a map gap + for f in `seq 1 50` ; do + ceph osd set nodown + ceph osd unset nodown + done + + # poke and prod to ensure last_epech_clean is big, reported to mon, and + # the osd is able to trim old maps + rados -p foo bench 1 write -b 1024 --no-cleanup || return 1 + wait_for_clean || return 1 + ceph tell osd.0 send_beacon + sleep 5 + ceph osd set nodown + ceph osd unset nodown + sleep 5 + + kill_daemons $dir TERM osd.0 || return 1 + + # this should fail.. 1.1 still doesn't exist + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op export-remove --pgid 1.0 --force --file $dir/1.0.later || return 1 + + # this should fail too because of the gap + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 || return 1 + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + + # we can force it... + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.1 --file $dir/1.1 --force || return 1 + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 --force || return 1 + + # ...but the osd won't start, so remove it again. + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1 + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0.later --force || return 1 + + + activate_osd $dir 0 || return 1 + + wait_for_clean || return 1 +} + +function TEST_import_after_split() { + local dir=$1 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + + ceph osd pool create foo 1 || return 1 + wait_for_clean || return 1 + rados -p foo bench 3 write -b 1024 --no-cleanup || return 1 + + kill_daemons $dir TERM osd.0 || return 1 + ceph-objectstore-tool --data-path $dir/0 --op export --pgid 1.0 --file $dir/1.0 --force || return 1 + activate_osd $dir 0 || return 1 + + ceph osd pool set foo pg_num 2 + sleep 5 + while ceph daemon osd.0 perf dump | jq '.osd.numpg' | grep 1 ; do sleep 1 ; done + wait_for_clean || return 1 + + kill_daemons $dir TERM osd.0 || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.0 --force || return 1 + + # this should fail because 1.1 (split child) is there + ! ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + + ceph-objectstore-tool --data-path $dir/0 --op remove --pgid 1.1 --force || return 1 + # now it will work (1.1. is gone) + ceph-objectstore-tool --data-path $dir/0 --op import --pgid 1.0 --file $dir/1.0 || return 1 + + activate_osd $dir 0 || return 1 + + wait_for_clean || return 1 +} + + +main pg-split-merge "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/pg-split-merge.sh" +# End: diff --git a/qa/standalone/osd/repeer-on-acting-back.sh b/qa/standalone/osd/repeer-on-acting-back.sh new file mode 100755 index 000000000..af406ef92 --- /dev/null +++ b/qa/standalone/osd/repeer-on-acting-back.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2020 ZTE Corporation <contact@zte.com.cn> +# +# Author: xie xingguo <xie.xingguo@zte.com.cn> +# Author: Yan Jun <yan.jun8@zte.com.cn> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export poolname=test + export testobjects=100 + export loglen=12 + export trim=$(expr $loglen / 2) + export CEPH_MON="127.0.0.1:7115" # git grep '\<7115\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + # so we will not force auth_log_shard to be acting_primary + CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 " + # use small pg_log settings, so we always do backfill instead of recovery + CEPH_ARGS+="--osd_min_pg_log_entries=$loglen --osd_max_pg_log_entries=$loglen --osd_pg_log_trim_min=$trim " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + + +function TEST_repeer_on_down_acting_member_coming_back() { + local dir=$1 + local dummyfile='/etc/fstab' + + local num_osds=6 + local osds="$(seq 0 $(expr $num_osds - 1))" + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for i in $osds + do + run_osd $dir $i || return 1 + done + + create_pool $poolname 1 1 + ceph osd pool set $poolname size 3 + ceph osd pool set $poolname min_size 2 + local poolid=$(ceph pg dump pools -f json | jq '.pool_stats' | jq '.[].poolid') + local pgid=$poolid.0 + + # enable required feature-bits for upmap + ceph osd set-require-min-compat-client luminous + # reset up to [1,2,3] + ceph osd pg-upmap $pgid 1 2 3 || return 1 + + flush_pg_stats || return 1 + wait_for_clean || return 1 + + echo "writing initial objects" + # write a bunch of objects + for i in $(seq 1 $testobjects) + do + rados -p $poolname put existing_$i $dummyfile + done + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + # reset up to [1,4,5] + ceph osd pg-upmap $pgid 1 4 5 || return 1 + + # wait for peering to complete + sleep 2 + + # make sure osd.2 belongs to current acting set + ceph pg $pgid query | jq '.acting' | grep 2 || return 1 + + # kill osd.2 + kill_daemons $dir KILL osd.2 || return 1 + ceph osd down osd.2 + + # again, wait for peering to complete + sleep 2 + + # osd.2 should have been moved out from acting set + ceph pg $pgid query | jq '.acting' | grep 2 && return 1 + + # bring up osd.2 + activate_osd $dir 2 || return 1 + wait_for_osd up 2 + + # again, wait for peering to complete + sleep 2 + + # primary should be able to re-add osd.2 into acting + ceph pg $pgid query | jq '.acting' | grep 2 || return 1 + + WAIT_FOR_CLEAN_TIMEOUT=20 wait_for_clean + + if ! grep -q "Active: got notify from previous acting member.*, requesting pg_temp change" $(find $dir -name '*osd*log') + then + echo failure + return 1 + fi + echo "success" + + delete_pool $poolname + kill_daemons $dir || return 1 +} + +main repeer-on-acting-back "$@" + +# Local Variables: +# compile-command: "make -j4 && ../qa/run-standalone.sh repeer-on-acting-back.sh" +# End: diff --git a/qa/standalone/osd/repro_long_log.sh b/qa/standalone/osd/repro_long_log.sh new file mode 100755 index 000000000..fa27d7017 --- /dev/null +++ b/qa/standalone/osd/repro_long_log.sh @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: Josh Durgin <jdurgin@redhat.com> +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7100" # git grep '\<7100\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +PGID= + +function test_log_size() +{ + local PGID=$1 + local EXPECTED=$2 + local DUPS_EXPECTED=${3:-0} + ceph tell osd.\* flush_pg_stats + sleep 3 + ceph pg $PGID query | jq .info.stats.log_size + ceph pg $PGID query | jq .info.stats.log_size | grep "${EXPECTED}" + ceph pg $PGID query | jq .info.stats.log_dups_size + ceph pg $PGID query | jq .info.stats.log_dups_size | grep "${DUPS_EXPECTED}" +} + +function setup_log_test() { + local dir=$1 + local which=$2 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create test 1 1 || true + POOL_ID=$(ceph osd dump --format json | jq '.pools[] | select(.pool_name == "test") | .pool') + PGID="${POOL_ID}.0" + + # With 1 PG setting entries per osd 20 results in a target log of 20 + ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 20 || return 1 + ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 20 || return 1 + ceph tell osd.\* injectargs -- --osd-max-pg-log-entries 30 || return 1 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 10 || return 1 + ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1 + + touch $dir/foo + for i in $(seq 1 20) + do + rados -p test put foo $dir/foo || return 1 + done + + test_log_size $PGID 20 || return 1 + + rados -p test rm foo || return 1 + + # generate error entries + for i in $(seq 1 20) + do + rados -p test rm foo + done + + # log should have been trimmed down to min_entries with one extra + test_log_size $PGID 21 || return 1 +} + +function TEST_repro_long_log1() +{ + local dir=$1 + + setup_log_test $dir || return 1 + # regular write should trim the log + rados -p test put foo $dir/foo || return 1 + test_log_size $PGID 22 || return 1 +} + +function TEST_repro_long_log2() +{ + local dir=$1 + + setup_log_test $dir || return 1 + local PRIMARY=$(ceph pg $PGID query | jq '.info.stats.up_primary') + kill_daemons $dir TERM osd.$PRIMARY || return 1 + CEPH_ARGS="--osd-max-pg-log-entries=2 --osd-pg-log-dups-tracked=3 --no-mon-config" ceph-objectstore-tool --data-path $dir/$PRIMARY --pgid $PGID --op trim-pg-log || return 1 + activate_osd $dir $PRIMARY || return 1 + wait_for_clean || return 1 + test_log_size $PGID 21 18 || return 1 +} + +function TEST_trim_max_entries() +{ + local dir=$1 + + setup_log_test $dir || return 1 + + ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1 + ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4 + ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 0 + + # adding log entries, should only trim 4 and add one each time + rados -p test rm foo + test_log_size $PGID 18 || return 1 + rados -p test rm foo + test_log_size $PGID 15 || return 1 + rados -p test rm foo + test_log_size $PGID 12 || return 1 + rados -p test rm foo + test_log_size $PGID 9 || return 1 + rados -p test rm foo + test_log_size $PGID 6 || return 1 + rados -p test rm foo + test_log_size $PGID 3 || return 1 + + # below trim_min + rados -p test rm foo + test_log_size $PGID 4 || return 1 + rados -p test rm foo + test_log_size $PGID 3 || return 1 + rados -p test rm foo + test_log_size $PGID 4 || return 1 + rados -p test rm foo + test_log_size $PGID 3 || return 1 +} + +function TEST_trim_max_entries_with_dups() +{ + local dir=$1 + + setup_log_test $dir || return 1 + + ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1 + ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2 + ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4 + ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1 + + # adding log entries, should only trim 4 and add one each time + # dups should be trimmed to 1 + rados -p test rm foo + test_log_size $PGID 18 2 || return 1 + rados -p test rm foo + test_log_size $PGID 15 6 || return 1 + rados -p test rm foo + test_log_size $PGID 12 10 || return 1 + rados -p test rm foo + test_log_size $PGID 9 14 || return 1 + rados -p test rm foo + test_log_size $PGID 6 18 || return 1 + rados -p test rm foo + test_log_size $PGID 3 20 || return 1 + + # below trim_min + rados -p test rm foo + test_log_size $PGID 4 20 || return 1 + rados -p test rm foo + test_log_size $PGID 3 20 || return 1 + rados -p test rm foo + test_log_size $PGID 4 20 || return 1 + rados -p test rm foo + test_log_size $PGID 3 20 || return 1 +} + +main repro-long-log "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && ../qa/run-standalone.sh repro_long_log.sh" +# End: diff --git a/qa/standalone/scrub/osd-mapper.sh b/qa/standalone/scrub/osd-mapper.sh new file mode 100755 index 000000000..ed18f94f1 --- /dev/null +++ b/qa/standalone/scrub/osd-mapper.sh @@ -0,0 +1,182 @@ +#!/usr/bin/env bash +# -*- mode:text; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +# vim: ts=8 sw=2 smarttab +# +# test the handling of a corrupted SnapMapper DB by Scrub + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +# one clone & multiple snaps (according to the number of parameters) +function make_a_clone() +{ + #turn off '-x' (but remember previous state) + local saved_echo_flag=${-//[^x]/} + set +x + local pool=$1 + local obj=$2 + echo $RANDOM | rados -p $pool put $obj - || return 1 + shift 2 + for snap in $@ ; do + rados -p $pool mksnap $snap || return 1 + done + if [[ -n "$saved_echo_flag" ]]; then set -x; fi +} + +function TEST_truncated_sna_record() { + local dir=$1 + local -A cluster_conf=( + ['osds_num']="3" + ['pgs_in_pool']="4" + ['pool_name']="test" + ) + + local extr_dbg=3 + (( extr_dbg > 1 )) && echo "Dir: $dir" + standard_scrub_cluster $dir cluster_conf + ceph tell osd.* config set osd_stats_update_period_not_scrubbing "1" + ceph tell osd.* config set osd_stats_update_period_scrubbing "1" + + local osdn=${cluster_conf['osds_num']} + local poolid=${cluster_conf['pool_id']} + local poolname=${cluster_conf['pool_name']} + local objname="objxxx" + + # create an object and clone it + make_a_clone $poolname $objname snap01 snap02 || return 1 + make_a_clone $poolname $objname snap13 || return 1 + make_a_clone $poolname $objname snap24 snap25 || return 1 + echo $RANDOM | rados -p $poolname put $objname - || return 1 + + #identify the PG and the primary OSD + local pgid=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.pgid'` + local osd=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'` + echo "pgid is $pgid (primary: osd.$osd)" + # turn on the publishing of test data in the 'scrubber' section of 'pg query' output + set_query_debug $pgid + + # verify the existence of these clones + (( extr_dbg >= 1 )) && rados --format json-pretty -p $poolname listsnaps $objname + + # scrub the PG + ceph pg $pgid deep_scrub || return 1 + + # we aren't just waiting for the scrub to terminate, but also for the + # logs to be published + sleep 3 + ceph pg dump pgs + until grep -a -q -- "event: --^^^^---- ScrubFinished" $dir/osd.$osd.log ; do + sleep 0.2 + done + + ceph pg dump pgs + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + sleep 5 + grep -a -q -v "ERR" $dir/osd.$osd.log || return 1 + + # kill the OSDs + kill_daemons $dir TERM osd || return 1 + + (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/0 dump "p" + (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump "p" | grep -a SNA_ + (( extr_dbg >= 2 )) && grep -a SNA_ /tmp/oo2.dump + (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $dir/2 dump p 2> /dev/null + local num_sna_b4=`ceph-kvstore-tool bluestore-kv $dir/$osd dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_000000000000000[0-9]_000000000000000' \ + | awk -e '{print $2;}' | wc -l` + + for sdn in $(seq 0 $(expr $osdn - 1)) + do + kvdir=$dir/$sdn + echo "corrupting the SnapMapper DB of osd.$sdn (db: $kvdir)" + (( extr_dbg >= 3 )) && ceph-kvstore-tool bluestore-kv $kvdir dump "p" + + # truncate the 'mapping' (SNA_) entry corresponding to the snap13 clone + KY=`ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_0000000000000003_000000000000000' \ + | awk -e '{print $2;}'` + (( extr_dbg >= 1 )) && echo "SNA key: $KY" | cat -v + + tmp_fn1=`mktemp -p /tmp --suffix="_the_val"` + (( extr_dbg >= 1 )) && echo "Value dumped in: $tmp_fn1" + ceph-kvstore-tool bluestore-kv $kvdir get p "$KY" out $tmp_fn1 2> /dev/null + (( extr_dbg >= 2 )) && od -xc $tmp_fn1 + + NKY=${KY:0:-30} + ceph-kvstore-tool bluestore-kv $kvdir rm "p" "$KY" 2> /dev/null + ceph-kvstore-tool bluestore-kv $kvdir set "p" "$NKY" in $tmp_fn1 2> /dev/null + + (( extr_dbg >= 1 )) || rm $tmp_fn1 + done + + orig_osd_args=" ${cluster_conf['osd_args']}" + orig_osd_args=" $(echo $orig_osd_args)" + (( extr_dbg >= 2 )) && echo "Copied OSD args: /$orig_osd_args/ /${orig_osd_args:1}/" + for sdn in $(seq 0 $(expr $osdn - 1)) + do + CEPH_ARGS="$CEPH_ARGS $orig_osd_args" activate_osd $dir $sdn + done + sleep 1 + + for sdn in $(seq 0 $(expr $osdn - 1)) + do + timeout 60 ceph tell osd.$sdn version + done + rados --format json-pretty -p $poolname listsnaps $objname + + # when scrubbing now - we expect the scrub to emit a cluster log ERR message regarding SnapMapper internal inconsistency + ceph osd unset nodeep-scrub || return 1 + ceph osd unset noscrub || return 1 + + # what is the primary now? + local cur_prim=`ceph --format=json-pretty osd map $poolname $objname | jq -r '.up[0]'` + ceph pg dump pgs + sleep 2 + ceph pg $pgid deep_scrub || return 1 + sleep 5 + ceph pg dump pgs + (( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log + grep -a -q "ERR" $dir/osd.$cur_prim.log || return 1 + + # but did we fix the snap issue? let's try scrubbing again + + local prev_err_cnt=`grep -a "ERR" $dir/osd.$cur_prim.log | wc -l` + echo "prev count: $prev_err_cnt" + + # scrub again. No errors expected this time + ceph pg $pgid deep_scrub || return 1 + sleep 5 + ceph pg dump pgs + (( extr_dbg >= 1 )) && grep -a "ERR" $dir/osd.$cur_prim.log + local current_err_cnt=`grep -a "ERR" $dir/osd.$cur_prim.log | wc -l` + (( extr_dbg >= 1 )) && echo "current count: $current_err_cnt" + (( current_err_cnt == prev_err_cnt )) || return 1 + kill_daemons $dir TERM osd || return 1 + kvdir=$dir/$cur_prim + (( extr_dbg >= 2 )) && ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_' \ + | awk -e '{print $2;}' + local num_sna_full=`ceph-kvstore-tool bluestore-kv $kvdir dump p 2> /dev/null | grep -a -e 'SNA_[0-9]_000000000000000[0-9]_000000000000000' \ + | awk -e '{print $2;}' | wc -l` + (( num_sna_full == num_sna_b4 )) || return 1 + return 0 +} + + +main osd-mapper "$@" diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh new file mode 100755 index 000000000..9541852c7 --- /dev/null +++ b/qa/standalone/scrub/osd-recovery-scrub.sh @@ -0,0 +1,352 @@ +#! /usr/bin/env bash +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7124" # git grep '\<7124\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + $func $dir || return 1 + done +} + +# Simple test for "not scheduling scrubs due to active recovery" +# OSD::sched_scrub() called on all OSDs during ticks +function TEST_recovery_scrub_1() { + local dir=$1 + local poolname=test + + TESTDATA="testdata.$$" + OSDS=4 + PGS=1 + OBJECTS=100 + ERRORS=0 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \ + --osd_scrub_interval_randomize_ratio=0.0 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_scrub_during_recovery=false || return 1 + done + + # Create a pool with $PGS pgs + create_pool $poolname $PGS $PGS + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') + + ceph pg dump pgs + + dd if=/dev/urandom of=$TESTDATA bs=1M count=50 + for i in $(seq 1 $OBJECTS) + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + ceph osd pool set $poolname size 4 + + # Wait for recovery to start + set -o pipefail + count=0 + while(true) + do + if ceph --format json pg dump pgs | + jq '.pg_stats | [.[] | .state | contains("recovering")]' | grep -q true + then + break + fi + sleep 2 + if test "$count" -eq "10" + then + echo "Recovery never started" + return 1 + fi + count=$(expr $count + 1) + done + set +o pipefail + ceph pg dump pgs + + sleep 10 + # Work around for http://tracker.ceph.com/issues/38195 + kill_daemons $dir #|| return 1 + + declare -a err_strings + err_strings[0]="not scheduling scrubs due to active recovery" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + grep "not scheduling scrubs" $dir/osd.${osd}.log + done + for err_string in "${err_strings[@]}" + do + found=false + count=0 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + if grep -q "$err_string" $dir/osd.${osd}.log + then + found=true + count=$(expr $count + 1) + fi + done + if [ "$found" = "false" ]; then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + [ $count -eq $OSDS ] || return 1 + done + + teardown $dir || return 1 + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + echo "TEST PASSED" + return 0 +} + +## +# a modified version of wait_for_scrub(), which terminates if the Primary +# of the to-be-scrubbed PG changes +# +# Given the *last_scrub*, wait for scrub to happen on **pgid**. It +# will fail if scrub does not complete within $TIMEOUT seconds. The +# repair is complete whenever the **get_last_scrub_stamp** function +# reports a timestamp different from the one given in argument. +# +# @param pgid the id of the PG +# @param the primary OSD when started +# @param last_scrub timestamp of the last scrub for *pgid* +# @return 0 on success, 1 on error +# +function wait_for_scrub_mod() { + local pgid=$1 + local orig_primary=$2 + local last_scrub="$3" + local sname=${4:-last_scrub_stamp} + + for ((i=0; i < $TIMEOUT; i++)); do + sleep 0.2 + if test "$(get_last_scrub_stamp $pgid $sname)" '>' "$last_scrub" ; then + return 0 + fi + sleep 1 + # are we still the primary? + local current_primary=`bin/ceph pg $pgid query | jq '.acting[0]' ` + if [ $orig_primary != $current_primary ]; then + echo $orig_primary no longer primary for $pgid + return 0 + fi + done + return 1 +} + +## +# A modified version of pg_scrub() +# +# Run scrub on **pgid** and wait until it completes. The pg_scrub +# function will fail if repair does not complete within $TIMEOUT +# seconds. The pg_scrub is complete whenever the +# **get_last_scrub_stamp** function reports a timestamp different from +# the one stored before starting the scrub, or whenever the Primary +# changes. +# +# @param pgid the id of the PG +# @return 0 on success, 1 on error +# +function pg_scrub_mod() { + local pgid=$1 + local last_scrub=$(get_last_scrub_stamp $pgid) + # locate the primary + local my_primary=`bin/ceph pg $pgid query | jq '.acting[0]' ` + local recovery=false + ceph pg scrub $pgid + #ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state" + if ceph --format json pg dump pgs | jq ".pg_stats | .[] | select(.pgid == \"$pgid\") | .state" | grep -q recovering + then + recovery=true + fi + wait_for_scrub_mod $pgid $my_primary "$last_scrub" || return 1 + if test $recovery = "true" + then + return 2 + fi +} + +# Same as wait_background() except that it checks for exit code 2 and bumps recov_scrub_count +function wait_background_check() { + # We extract the PIDS from the variable name + pids=${!1} + + return_code=0 + for pid in $pids; do + wait $pid + retcode=$? + if test $retcode -eq 2 + then + recov_scrub_count=$(expr $recov_scrub_count + 1) + elif test $retcode -ne 0 + then + # If one process failed then return 1 + return_code=1 + fi + done + + # We empty the variable reporting that all process ended + eval "$1=''" + + return $return_code +} + +# osd_scrub_during_recovery=true make sure scrub happens +function TEST_recovery_scrub_2() { + local dir=$1 + local poolname=test + + TESTDATA="testdata.$$" + OSDS=8 + PGS=32 + OBJECTS=40 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \ + --osd_scrub_interval_randomize_ratio=0.0 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_scrub_during_recovery=true --osd_recovery_sleep=10 || return 1 + done + + # Create a pool with $PGS pgs + create_pool $poolname $PGS $PGS + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1M count=50 + for i in $(seq 1 $OBJECTS) + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + ceph osd pool set $poolname size 3 + + ceph pg dump pgs + + # Wait for recovery to start + count=0 + while(true) + do + #ceph --format json pg dump pgs | jq '.pg_stats | [.[].state]' + if test $(ceph --format json pg dump pgs | + jq '.pg_stats | [.[].state]'| grep recovering | wc -l) -ge 2 + then + break + fi + sleep 2 + if test "$count" -eq "10" + then + echo "Not enough recovery started simultaneously" + return 1 + fi + count=$(expr $count + 1) + done + ceph pg dump pgs + + pids="" + recov_scrub_count=0 + for pg in $(seq 0 $(expr $PGS - 1)) + do + run_in_background pids pg_scrub_mod $poolid.$(printf "%x" $pg) + done + wait_background_check pids + return_code=$? + if [ $return_code -ne 0 ]; then return $return_code; fi + + ERRORS=0 + if test $recov_scrub_count -eq 0 + then + echo "No scrubs occurred while PG recovering" + ERRORS=$(expr $ERRORS + 1) + fi + + pidfile=$(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') + pid=$(cat $pidfile) + if ! kill -0 $pid + then + echo "OSD crash occurred" + #tail -100 $dir/osd.0.log + ERRORS=$(expr $ERRORS + 1) + fi + + # Work around for http://tracker.ceph.com/issues/38195 + kill_daemons $dir #|| return 1 + + declare -a err_strings + err_strings[0]="not scheduling scrubs due to active recovery" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + grep "not scheduling scrubs" $dir/osd.${osd}.log + done + for err_string in "${err_strings[@]}" + do + found=false + for osd in $(seq 0 $(expr $OSDS - 1)) + do + if grep "$err_string" $dir/osd.${osd}.log > /dev/null; + then + found=true + fi + done + if [ "$found" = "true" ]; then + echo "Found log message not expected '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + teardown $dir || return 1 + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + echo "TEST PASSED" + return 0 +} + +main osd-recovery-scrub "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh osd-recovery-scrub.sh" +# End: diff --git a/qa/standalone/scrub/osd-scrub-dump.sh b/qa/standalone/scrub/osd-scrub-dump.sh new file mode 100755 index 000000000..f21ec7801 --- /dev/null +++ b/qa/standalone/scrub/osd-scrub-dump.sh @@ -0,0 +1,180 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +MAX_SCRUBS=4 +SCRUB_SLEEP=3 +POOL_SIZE=3 + +function run() { + local dir=$1 + shift + local CHUNK_MAX=5 + + export CEPH_MON="127.0.0.1:7184" # git grep '\<7184\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd_max_scrubs=$MAX_SCRUBS " + CEPH_ARGS+="--osd_shallow_scrub_chunk_max=$CHUNK_MAX " + CEPH_ARGS+="--osd_scrub_sleep=$SCRUB_SLEEP " + CEPH_ARGS+="--osd_pool_default_size=$POOL_SIZE " + # Set scheduler to "wpq" until there's a reliable way to query scrub states + # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the + # scrub sleep to 0 and as a result the checks in the test fail. + CEPH_ARGS+="--osd_op_queue=wpq " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_recover_unexpected() { + local dir=$1 + shift + local OSDS=6 + local PGS=16 + local POOLS=3 + local OBJS=1000 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for o in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $o + done + + for i in $(seq 1 $POOLS) + do + create_pool test$i $PGS $PGS + done + + wait_for_clean || return 1 + + dd if=/dev/urandom of=datafile bs=4k count=2 + for i in $(seq 1 $POOLS) + do + for j in $(seq 1 $OBJS) + do + rados -p test$i put obj$j datafile + done + done + rm datafile + + ceph osd set noscrub + ceph osd set nodeep-scrub + + for qpg in $(ceph pg dump pgs --format=json-pretty | jq '.pg_stats[].pgid') + do + primary=$(ceph pg dump pgs --format=json | jq ".pg_stats[] | select(.pgid == $qpg) | .acting_primary") + eval pg=$qpg # strip quotes around qpg + ceph tell $pg scrub + done + + ceph pg dump pgs + + max=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_scrub_reservations | jq '.osd_max_scrubs') + if [ $max != $MAX_SCRUBS ]; then + echo "ERROR: Incorrect osd_max_scrubs from dump_scrub_reservations" + return 1 + fi + + ceph osd unset noscrub + + ok=false + for i in $(seq 0 300) + do + ceph pg dump pgs + if ceph pg dump pgs | grep '+scrubbing'; then + ok=true + break + fi + sleep 1 + done + if test $ok = "false"; then + echo "ERROR: Test set-up failed no scrubbing" + return 1 + fi + + local total=0 + local zerocount=0 + local maxzerocount=3 + while(true) + do + pass=0 + for o in $(seq 0 $(expr $OSDS - 1)) + do + CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations + scrubs=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations | jq '.scrubs_local + .scrubs_remote') + if [ $scrubs -gt $MAX_SCRUBS ]; then + echo "ERROR: More than $MAX_SCRUBS currently reserved" + return 1 + fi + pass=$(expr $pass + $scrubs) + done + if [ $pass = "0" ]; then + zerocount=$(expr $zerocount + 1) + fi + if [ $zerocount -gt $maxzerocount ]; then + break + fi + total=$(expr $total + $pass) + if [ $total -gt 0 ]; then + # already saw some reservations, so wait longer to avoid excessive over-counting. + # Note the loop itself takes about 2-3 seconds + sleep $(expr $SCRUB_SLEEP - 2) + else + sleep 0.5 + fi + done + + # Check that there are no more scrubs + for i in $(seq 0 5) + do + if ceph pg dump pgs | grep '+scrubbing'; then + echo "ERROR: Extra scrubs after test completion...not expected" + return 1 + fi + sleep $SCRUB_SLEEP + done + + echo $total total reservations seen + + # Sort of arbitraty number based on PGS * POOLS * POOL_SIZE as the number of total scrub + # reservations that must occur. However, the loop above might see the same reservation more + # than once. + actual_reservations=$(expr $PGS \* $POOLS \* $POOL_SIZE) + if [ $total -lt $actual_reservations ]; then + echo "ERROR: Unexpectedly low amount of scrub reservations seen during test" + return 1 + fi + + return 0 +} + + +main osd-scrub-dump "$@" + +# Local Variables: +# compile-command: "cd build ; make check && \ +# ../qa/run-standalone.sh osd-scrub-dump.sh" +# End: diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh new file mode 100755 index 000000000..13b30360c --- /dev/null +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -0,0 +1,6255 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +set -x +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +if [ `uname` = FreeBSD ]; then + # erasure coding overwrites are only tested on Bluestore + # erasure coding on filestore is unsafe + # http://docs.ceph.com/en/latest/rados/operations/erasure-code/#erasure-coding-with-overwrites + use_ec_overwrite=false +else + use_ec_overwrite=true +fi + +# Test development and debugging +# Set to "yes" in order to ignore diff errors and save results to update test +getjson="no" + +# Filter out mtime and local_mtime dates, version, prior_version and last_reqid (client) from any object_info. +jqfilter='def walk(f): + . as $in + | if type == "object" then + reduce keys[] as $key + ( {}; . + { ($key): ($in[$key] | walk(f)) } ) | f + elif type == "array" then map( walk(f) ) | f + else f + end; +walk(if type == "object" then del(.mtime) else . end) +| walk(if type == "object" then del(.local_mtime) else . end) +| walk(if type == "object" then del(.last_reqid) else . end) +| walk(if type == "object" then del(.version) else . end) +| walk(if type == "object" then del(.prior_version) else . end)' + +sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print(json.dumps(ud, sort_keys=True, indent=2))' + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7107" # git grep '\<7107\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd-skip-data-digest=false " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function add_something() { + local dir=$1 + local poolname=$2 + local obj=${3:-SOMETHING} + local scrub=${4:-noscrub} + + if [ "$scrub" = "noscrub" ]; + then + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + else + ceph osd unset noscrub || return 1 + ceph osd unset nodeep-scrub || return 1 + fi + + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put $obj $dir/ORIGINAL || return 1 +} + +# +# Corrupt one copy of a replicated pool +# +function TEST_corrupt_and_repair_replicated() { + local dir=$1 + local poolname=rbd + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + add_something $dir $poolname || return 1 + corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 + # Reproduces http://tracker.ceph.com/issues/8914 + corrupt_and_repair_one $dir $poolname $(get_primary $poolname SOMETHING) || return 1 +} + +# +# Allow repair to be scheduled when some recovering is still undergoing on the same OSD +# +function TEST_allow_repair_during_recovery() { + local dir=$1 + local poolname=rbd + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + run_osd $dir 1 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + add_something $dir $poolname || return 1 + corrupt_and_repair_one $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 +} + +# +# Skip non-repair scrub correctly during recovery +# +function TEST_skip_non_repair_during_recovery() { + local dir=$1 + local poolname=rbd + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + run_osd $dir 1 --osd_scrub_during_recovery=false \ + --osd_repair_during_recovery=true \ + --osd_debug_pretend_recovery_active=true || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + add_something $dir $poolname || return 1 + scrub_and_not_schedule $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 +} + +function scrub_and_not_schedule() { + local dir=$1 + local poolname=$2 + local osd=$3 + + # + # 1) start a non-repair scrub + # + local pg=$(get_pg $poolname SOMETHING) + local last_scrub=$(get_last_scrub_stamp $pg) + ceph pg scrub $pg + + # + # 2) Assure the scrub is not scheduled + # + for ((i=0; i < 3; i++)); do + if test "$(get_last_scrub_stamp $pg)" '>' "$last_scrub" ; then + return 1 + fi + sleep 1 + done + + # + # 3) Access to the file must OK + # + objectstore_tool $dir $osd SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 +} + +function corrupt_and_repair_two() { + local dir=$1 + local poolname=$2 + local first=$3 + local second=$4 + + # + # 1) remove the corresponding file from the OSDs + # + pids="" + run_in_background pids objectstore_tool $dir $first SOMETHING remove + run_in_background pids objectstore_tool $dir $second SOMETHING remove + wait_background pids + return_code=$? + if [ $return_code -ne 0 ]; then return $return_code; fi + + # + # 2) repair the PG + # + local pg=$(get_pg $poolname SOMETHING) + repair $pg + # + # 3) The files must be back + # + pids="" + run_in_background pids objectstore_tool $dir $first SOMETHING list-attrs + run_in_background pids objectstore_tool $dir $second SOMETHING list-attrs + wait_background pids + return_code=$? + if [ $return_code -ne 0 ]; then return $return_code; fi + + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 +} + +# +# 1) add an object +# 2) remove the corresponding file from a designated OSD +# 3) repair the PG +# 4) check that the file has been restored in the designated OSD +# +function corrupt_and_repair_one() { + local dir=$1 + local poolname=$2 + local osd=$3 + + # + # 1) remove the corresponding file from the OSD + # + objectstore_tool $dir $osd SOMETHING remove || return 1 + # + # 2) repair the PG + # + local pg=$(get_pg $poolname SOMETHING) + repair $pg + # + # 3) The file must be back + # + objectstore_tool $dir $osd SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 +} + +function corrupt_and_repair_erasure_coded() { + local dir=$1 + local poolname=$2 + + add_something $dir $poolname || return 1 + + local primary=$(get_primary $poolname SOMETHING) + local -a osds=($(get_osds $poolname SOMETHING | sed -e "s/$primary//")) + local not_primary_first=${osds[0]} + local not_primary_second=${osds[1]} + + # Reproduces http://tracker.ceph.com/issues/10017 + corrupt_and_repair_one $dir $poolname $primary || return 1 + # Reproduces http://tracker.ceph.com/issues/10409 + corrupt_and_repair_one $dir $poolname $not_primary_first || return 1 + corrupt_and_repair_two $dir $poolname $not_primary_first $not_primary_second || return 1 + corrupt_and_repair_two $dir $poolname $primary $not_primary_first || return 1 + +} + +function auto_repair_erasure_coded() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd-deep-scrub-interval=5 \ + --osd-scrub-max-interval=5 \ + --osd-scrub-min-interval=5 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + # Create an EC pool + create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1 + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1 + # Wait for auto repair + local pgid=$(get_pg $poolname SOMETHING) + wait_for_scrub $pgid "$(get_last_scrub_stamp $pgid)" + wait_for_clean || return 1 + # Verify - the file should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 +} + +function TEST_auto_repair_erasure_coded_appends() { + auto_repair_erasure_coded $1 false +} + +function TEST_auto_repair_erasure_coded_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + auto_repair_erasure_coded $1 true + fi +} + +# initiate a scrub, then check for the (expected) 'scrubbing' and the +# (not expected until an error was identified) 'repair' +# Arguments: osd#, pg, sleep time +function initiate_and_fetch_state() { + local the_osd="osd.$1" + local pgid=$2 + local last_scrub=$(get_last_scrub_stamp $pgid) + + set_config "osd" "$1" "osd_scrub_sleep" "$3" + set_config "osd" "$1" "osd_scrub_auto_repair" "true" + + flush_pg_stats + date --rfc-3339=ns + + # note: must initiate a "regular" (periodic) deep scrub - not an operator-initiated one + env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) deep_scrub "$pgid" + env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) scrub "$pgid" + + # wait for 'scrubbing' to appear + for ((i=0; i < 80; i++)); do + + st=`ceph pg $pgid query --format json | jq '.state' ` + echo $i ") state now: " $st + + case "$st" in + *scrubbing*repair* ) echo "found scrub+repair"; return 1;; # PR #41258 should have prevented this + *scrubbing* ) echo "found scrub"; return 0;; + *inconsistent* ) echo "Got here too late. Scrub has already finished"; return 1;; + *recovery* ) echo "Got here too late. Scrub has already finished."; return 1;; + * ) echo $st;; + esac + + if [ $((i % 10)) == 4 ]; then + echo "loop --------> " $i + fi + sleep 0.3 + done + + echo "Timeout waiting for deep-scrub of " $pgid " on " $the_osd " to start" + return 1 +} + +function wait_end_of_scrub() { # osd# pg + local the_osd="osd.$1" + local pgid=$2 + + for ((i=0; i < 40; i++)); do + st=`ceph pg $pgid query --format json | jq '.state' ` + echo "wait-scrub-end state now: " $st + [[ $st =~ (.*scrubbing.*) ]] || break + if [ $((i % 5)) == 4 ] ; then + flush_pg_stats + fi + sleep 0.3 + done + + if [[ $st =~ (.*scrubbing.*) ]] + then + # a timeout + return 1 + fi + return 0 +} + + +function TEST_auto_repair_bluestore_tag() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 3 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + # Set scheduler to "wpq" until there's a reliable way to query scrub states + # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the + # scrub sleep to 0 and as a result the checks in the test fail. + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0 \ + --osd-op-queue=wpq" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1 + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1 + + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + echo "Affected PG " $pgid " w/ primary " $primary + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + initiate_and_fetch_state $primary $pgid "3.0" + r=$? + echo "initiate_and_fetch_state ret: " $r + set_config "osd" "$1" "osd_scrub_sleep" "0" + if [ $r -ne 0 ]; then + return 1 + fi + + wait_end_of_scrub "$primary" "$pgid" || return 1 + ceph pg dump pgs + + # Verify - the file should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1 + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + grep scrub_finish $dir/osd.${primary}.log +} + + +function TEST_auto_repair_bluestore_basic() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1 + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1 + + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + ceph tell $pgid deep_scrub + ceph tell $pgid scrub + + # Wait for auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + wait_for_clean || return 1 + ceph pg dump pgs + # Verify - the file should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1 + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING get-bytes $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + grep scrub_finish $dir/osd.${primary}.log +} + +function TEST_auto_repair_bluestore_scrub() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0 \ + --osd-scrub-backoff-ratio=0" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1 + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1 + + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + ceph tell $pgid scrub + + # Wait for scrub -> auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + ceph pg dump pgs + # Actually this causes 2 scrubs, so we better wait a little longer + sleep 5 + wait_for_clean || return 1 + ceph pg dump pgs + # Verify - the file should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + grep scrub_finish $dir/osd.${primary}.log + + # This should have caused 1 object to be repaired + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "1" || return 1 +} + +function TEST_auto_repair_bluestore_failed() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 10) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj1 remove || return 1 + # obj2 can't be repaired + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1 + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1 + + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + ceph tell $pgid deep_scrub + ceph tell $pgid scrub + + # Wait for auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + wait_for_clean || return 1 + flush_pg_stats + grep scrub_finish $dir/osd.${primary}.log + grep -q "scrub_finish.*still present after re-scrub" $dir/osd.${primary}.log || return 1 + ceph pg dump pgs + ceph pg dump pgs | grep -q "^${pgid}.*+failed_repair" || return 1 + + # Verify - obj1 should be back + # Restarted osd get $ceph_osd_args passed + objectstore_tool $dir $(get_not_primary $poolname obj1) obj1 list-attrs || return 1 + rados --pool $poolname get obj1 $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 + grep scrub_finish $dir/osd.${primary}.log + + # Make it repairable + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 remove || return 1 + repair $pgid + sleep 2 + + flush_pg_stats + ceph pg dump pgs + ceph pg dump pgs | grep -q -e "^${pgid}.* active+clean " -e "^${pgid}.* active+clean+wait " || return 1 + grep scrub_finish $dir/osd.${primary}.log +} + +function TEST_auto_repair_bluestore_failed_norecov() { + local dir=$1 + local poolname=testpool + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-auto-repair=true \ + --osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 2) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 10) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + # obj1 can't be repaired + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj1 remove || return 1 + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj1 rm-attr _ || return 1 + # obj2 can't be repaired + objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1 + objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1 + + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + local last_scrub_stamp="$(get_last_scrub_stamp $pgid)" + ceph tell $pgid deep_scrub + ceph tell $pgid scrub + + # Wait for auto repair + wait_for_scrub $pgid "$last_scrub_stamp" || return 1 + wait_for_clean || return 1 + flush_pg_stats + grep -q "scrub_finish.*present with no repair possible" $dir/osd.${primary}.log || return 1 + ceph pg dump pgs + ceph pg dump pgs | grep -q "^${pgid}.*+failed_repair" || return 1 +} + +function TEST_repair_stats() { + local dir=$1 + local poolname=testpool + local OSDS=2 + local OBJS=30 + # This need to be an even number + local REPAIRS=20 + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 $(expr $OSDS - 1)) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + create_pool $poolname 1 1 || return 1 + ceph osd pool set $poolname size 2 + wait_for_clean || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 $OBJS) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + local other=$(get_not_primary $poolname obj1) + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + + kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1 + kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1 + for i in $(seq 1 $REPAIRS) + do + # Remove from both osd.0 and osd.1 + OSD=$(expr $i % 2) + _objectstore_tool_nodown $dir $OSD obj$i remove || return 1 + done + activate_osd $dir $primary $ceph_osd_args || return 1 + activate_osd $dir $other $ceph_osd_args || return 1 + wait_for_clean || return 1 + + repair $pgid + wait_for_clean || return 1 + ceph pg dump pgs + flush_pg_stats + + # This should have caused 1 object to be repaired + ceph pg $pgid query | jq '.info.stats.stat_sum' + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$REPAIRS" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary ).num_shards_repaired") + test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other ).num_shards_repaired") + test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1 + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$REPAIRS" || return 1 +} + +function TEST_repair_stats_ec() { + local dir=$1 + local poolname=testpool + local OSDS=3 + local OBJS=30 + # This need to be an even number + local REPAIRS=26 + local allow_overwrites=false + + # Launch a cluster with 5 seconds scrub interval + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd-scrub-interval-randomize-ratio=0" + for id in $(seq 0 $(expr $OSDS - 1)) ; do + run_osd $dir $id $ceph_osd_args || return 1 + done + + # Create an EC pool + create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1 + + # Put an object + local payload=ABCDEF + echo $payload > $dir/ORIGINAL + for i in $(seq 1 $OBJS) + do + rados --pool $poolname put obj$i $dir/ORIGINAL || return 1 + done + + # Remove the object from one shard physically + # Restarted osd get $ceph_osd_args passed + local other=$(get_not_primary $poolname obj1) + local pgid=$(get_pg $poolname obj1) + local primary=$(get_primary $poolname obj1) + + kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1 + kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1 + for i in $(seq 1 $REPAIRS) + do + # Remove from both osd.0 and osd.1 + OSD=$(expr $i % 2) + _objectstore_tool_nodown $dir $OSD obj$i remove || return 1 + done + activate_osd $dir $primary $ceph_osd_args || return 1 + activate_osd $dir $other $ceph_osd_args || return 1 + wait_for_clean || return 1 + + repair $pgid + wait_for_clean || return 1 + ceph pg dump pgs + flush_pg_stats + + # This should have caused 1 object to be repaired + ceph pg $pgid query | jq '.info.stats.stat_sum' + COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired') + test "$COUNT" = "$REPAIRS" || return 1 + + for osd in $(seq 0 $(expr $OSDS - 1)) ; do + if [ $osd = $other -o $osd = $primary ]; then + repair=$(expr $REPAIRS / 2) + else + repair="0" + fi + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd )" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd ).num_shards_repaired") + test "$COUNT" = "$repair" || return 1 + done + + ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum" + COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired") + test "$COUNT" = "$REPAIRS" || return 1 +} + +function corrupt_and_repair_jerasure() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 3) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_ec_pool $poolname $allow_overwrites k=2 m=2 || return 1 + corrupt_and_repair_erasure_coded $dir $poolname || return 1 +} + +function TEST_corrupt_and_repair_jerasure_appends() { + corrupt_and_repair_jerasure $1 false +} + +function TEST_corrupt_and_repair_jerasure_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + corrupt_and_repair_jerasure $1 true + fi +} + +function corrupt_and_repair_lrc() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 9) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_ec_pool $poolname $allow_overwrites k=4 m=2 l=3 plugin=lrc || return 1 + corrupt_and_repair_erasure_coded $dir $poolname || return 1 +} + +function TEST_corrupt_and_repair_lrc_appends() { + corrupt_and_repair_lrc $1 false +} + +function TEST_corrupt_and_repair_lrc_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + corrupt_and_repair_lrc $1 true + fi +} + +function unfound_erasure_coded() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + local payload=ABCDEF + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 3) ; do + run_osd $dir $id || return 1 + done + + create_ec_pool $poolname $allow_overwrites k=2 m=2 || return 1 + + add_something $dir $poolname || return 1 + + local primary=$(get_primary $poolname SOMETHING) + local -a osds=($(get_osds $poolname SOMETHING | sed -e "s/$primary//")) + local not_primary_first=${osds[0]} + local not_primary_second=${osds[1]} + local not_primary_third=${osds[2]} + + # + # 1) remove the corresponding file from the OSDs + # + pids="" + run_in_background pids objectstore_tool $dir $not_primary_first SOMETHING remove + run_in_background pids objectstore_tool $dir $not_primary_second SOMETHING remove + run_in_background pids objectstore_tool $dir $not_primary_third SOMETHING remove + wait_background pids + return_code=$? + if [ $return_code -ne 0 ]; then return $return_code; fi + + # + # 2) repair the PG + # + local pg=$(get_pg $poolname SOMETHING) + repair $pg + # + # 3) check pg state + # + # it may take a bit to appear due to mon/mgr asynchrony + for f in `seq 1 60`; do + ceph -s | grep "1/1 objects unfound" && break + sleep 1 + done + ceph -s|grep "4 up" || return 1 + ceph -s|grep "4 in" || return 1 + ceph -s|grep "1/1 objects unfound" || return 1 +} + +function TEST_unfound_erasure_coded_appends() { + unfound_erasure_coded $1 false +} + +function TEST_unfound_erasure_coded_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + unfound_erasure_coded $1 true + fi +} + +# +# list_missing for EC pool +# +function list_missing_erasure_coded() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 2) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1 + + # Put an object and remove the two shards (including primary) + add_something $dir $poolname MOBJ0 || return 1 + local -a osds0=($(get_osds $poolname MOBJ0)) + + # Put another object and remove two shards (excluding primary) + add_something $dir $poolname MOBJ1 || return 1 + local -a osds1=($(get_osds $poolname MOBJ1)) + + # Stop all osd daemons + for id in $(seq 0 2) ; do + kill_daemons $dir TERM osd.$id >&2 < /dev/null || return 1 + done + + id=${osds0[0]} + ceph-objectstore-tool --data-path $dir/$id \ + MOBJ0 remove || return 1 + id=${osds0[1]} + ceph-objectstore-tool --data-path $dir/$id \ + MOBJ0 remove || return 1 + + id=${osds1[1]} + ceph-objectstore-tool --data-path $dir/$id \ + MOBJ1 remove || return 1 + id=${osds1[2]} + ceph-objectstore-tool --data-path $dir/$id \ + MOBJ1 remove || return 1 + + for id in $(seq 0 2) ; do + activate_osd $dir $id >&2 || return 1 + done + create_rbd_pool || return 1 + wait_for_clean || return 1 + + # Get get - both objects should in the same PG + local pg=$(get_pg $poolname MOBJ0) + + # Repair the PG, which triggers the recovering, + # and should mark the object as unfound + repair $pg + + for i in $(seq 0 120) ; do + [ $i -lt 60 ] || return 1 + matches=$(ceph pg $pg list_unfound | egrep "MOBJ0|MOBJ1" | wc -l) + [ $matches -eq 2 ] && break + done +} + +function TEST_list_missing_erasure_coded_appends() { + list_missing_erasure_coded $1 false +} + +function TEST_list_missing_erasure_coded_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + list_missing_erasure_coded $1 true + fi +} + +# +# Corrupt one copy of a replicated pool +# +function TEST_corrupt_scrub_replicated() { + local dir=$1 + local poolname=csr_pool + local total_objs=19 + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_pool foo 1 || return 1 + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} + add_something $dir $poolname $objname || return 1 + + rados --pool $poolname setomapheader $objname hdr-$objname || return 1 + rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1 + done + + # Increase file 1 MB + 1KB + dd if=/dev/zero of=$dir/new.ROBJ19 bs=1024 count=1025 + rados --pool $poolname put $objname $dir/new.ROBJ19 || return 1 + rm -f $dir/new.ROBJ19 + + local pg=$(get_pg $poolname ROBJ0) + local primary=$(get_primary $poolname ROBJ0) + + # Compute an old omap digest and save oi + CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) \ + config set osd_deep_scrub_update_digest_min_age 0 + CEPH_ARGS='' ceph daemon $(get_asok_path osd.1) \ + config set osd_deep_scrub_update_digest_min_age 0 + pg_deep_scrub $pg + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} + + # Alternate corruption between osd.0 and osd.1 + local osd=$(expr $i % 2) + + case $i in + 1) + # Size (deep scrub data_digest too) + local payload=UVWXYZZZ + echo $payload > $dir/CORRUPT + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + ;; + + 2) + # digest (deep scrub only) + local payload=UVWXYZ + echo $payload > $dir/CORRUPT + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + ;; + + 3) + # missing + objectstore_tool $dir $osd $objname remove || return 1 + ;; + + 4) + # Modify omap value (deep scrub only) + objectstore_tool $dir $osd $objname set-omap key-$objname $dir/CORRUPT || return 1 + ;; + + 5) + # Delete omap key (deep scrub only) + objectstore_tool $dir $osd $objname rm-omap key-$objname || return 1 + ;; + + 6) + # Add extra omap key (deep scrub only) + echo extra > $dir/extra-val + objectstore_tool $dir $osd $objname set-omap key2-$objname $dir/extra-val || return 1 + rm $dir/extra-val + ;; + + 7) + # Modify omap header (deep scrub only) + echo -n newheader > $dir/hdr + objectstore_tool $dir $osd $objname set-omaphdr $dir/hdr || return 1 + rm $dir/hdr + ;; + + 8) + rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1 + rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1 + + # Break xattrs + echo -n bad-val > $dir/bad-val + objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir/bad-val || return 1 + objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1 + echo -n val3-$objname > $dir/newval + objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir/newval || return 1 + rm $dir/bad-val $dir/newval + ;; + + 9) + objectstore_tool $dir $osd $objname get-attr _ > $dir/robj9-oi + echo -n D > $dir/change + rados --pool $poolname put $objname $dir/change + objectstore_tool $dir $osd $objname set-attr _ $dir/robj9-oi + rm $dir/oi $dir/change + ;; + + # ROBJ10 must be handled after digests are re-computed by a deep scrub below + # ROBJ11 must be handled with config change before deep scrub + # ROBJ12 must be handled with config change before scrubs + # ROBJ13 must be handled before scrubs + + 14) + echo -n bad-val > $dir/bad-val + objectstore_tool $dir 0 $objname set-attr _ $dir/bad-val || return 1 + objectstore_tool $dir 1 $objname rm-attr _ || return 1 + rm $dir/bad-val + ;; + + 15) + objectstore_tool $dir $osd $objname rm-attr _ || return 1 + ;; + + 16) + objectstore_tool $dir 0 $objname rm-attr snapset || return 1 + echo -n bad-val > $dir/bad-val + objectstore_tool $dir 1 $objname set-attr snapset $dir/bad-val || return 1 + ;; + + 17) + # Deep-scrub only (all replicas are diffent than the object info + local payload=ROBJ17 + echo $payload > $dir/new.ROBJ17 + objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ17 || return 1 + objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ17 || return 1 + ;; + + 18) + # Deep-scrub only (all replicas are diffent than the object info + local payload=ROBJ18 + echo $payload > $dir/new.ROBJ18 + objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ18 || return 1 + objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ18 || return 1 + # Make one replica have a different object info, so a full repair must happen too + objectstore_tool $dir $osd $objname corrupt-info || return 1 + ;; + + 19) + # Set osd-max-object-size smaller than this object's size + + esac + done + + local pg=$(get_pg $poolname ROBJ0) + + ceph tell osd.\* injectargs -- --osd-max-object-size=1048576 + + inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 + inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 + + pg_scrub $pg + + ERRORS=0 + declare -a s_err_strings + err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key" + err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : object info inconsistent " + err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset" + err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key" + err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info" + err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'" + err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error" + err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key" + err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info" + err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info" + err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch" + err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0" + err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error" + err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing" + err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : candidate size 1 info size 7 mismatch" + err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent " + err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr" + err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr .* no longer understand old encoding version 3 < 97: Malformed input" + err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049713/1049720 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes." + err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 1 missing, 8 inconsistent objects" + err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 18 errors" + err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:123a5f55:::ROBJ19:head : size 1049600 > 1048576 is too large" + + for err_string in "${err_strings[@]}" + do + if ! grep -q "$err_string" $dir/osd.${primary}.log + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pg || return 1 + + rados list-inconsistent-obj $pg > $dir/json || return 1 + # Get epoch for repair-get requests + epoch=$(jq .epoch $dir/json) + + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "shards": [ + { + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "ROBJ1", + "key": "", + "snapid": -2, + "hash": 1454963827, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'58", + "prior_version": "21'3", + "last_reqid": "osd.1.0:57", + "user_version": 3, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf5fba2c6", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 9, + "errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ1", + "key": "", + "snapid": -2, + "hash": 1454963827, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'58", + "prior_version": "21'3", + "last_reqid": "osd.1.0:57", + "user_version": 3, + "size": 7, + "mtime": "2018-04-05 14:33:19.804040", + "local_mtime": "2018-04-05 14:33:19.804839", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf5fba2c6", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 3, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ1" + } + }, + { + "shards": [ + { + "errors": [ + "stat_error" + ], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ12", + "key": "", + "snapid": -2, + "hash": 3920199997, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'56", + "prior_version": "43'36", + "last_reqid": "osd.1.0:55", + "user_version": 36, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x067f306a", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "stat_error" + ], + "errors": [], + "object": { + "version": 36, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ12" + } + }, + { + "shards": [ + { + "errors": [ + "stat_error" + ], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ13", + "key": "", + "snapid": -2, + "hash": 2682806379, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'59", + "prior_version": "45'39", + "last_reqid": "osd.1.0:58", + "user_version": 39, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x6441854d", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "stat_error" + ], + "errors": [], + "object": { + "version": 39, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ13" + } + }, + { + "shards": [ + { + "object_info": "bad-val", + "size": 7, + "errors": [ + "info_corrupted" + ], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [ + "info_missing" + ], + "osd": 1, + "primary": true + } + ], + "union_shard_errors": [ + "info_missing", + "info_corrupted" + ], + "errors": [], + "object": { + "version": 0, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ14" + } + }, + { + "shards": [ + { + "object_info": { + "oid": { + "oid": "ROBJ15", + "key": "", + "snapid": -2, + "hash": 504996876, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'49", + "prior_version": "49'45", + "last_reqid": "osd.1.0:48", + "user_version": 45, + "size": 7, + "mtime": "2018-04-05 14:33:29.498969", + "local_mtime": "2018-04-05 14:33:29.499890", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [ + "info_missing" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ15", + "key": "", + "snapid": -2, + "hash": 504996876, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'49", + "prior_version": "49'45", + "last_reqid": "osd.1.0:48", + "user_version": 45, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "info_missing" + ], + "errors": [], + "object": { + "version": 45, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ15" + } + }, + { + "errors": [], + "object": { + "locator": "", + "name": "ROBJ16", + "nspace": "", + "snap": "head", + "version": 0 + }, + "shards": [ + { + "errors": [ + "snapset_missing" + ], + "osd": 0, + "primary": false, + "size": 7 + }, + { + "errors": [ + "snapset_corrupted" + ], + "osd": 1, + "primary": true, + "snapset": "bad-val", + "size": 7 + } + ], + "union_shard_errors": [ + "snapset_missing", + "snapset_corrupted" + ] + }, + { + "errors": [ + "object_info_inconsistency" + ], + "object": { + "locator": "", + "name": "ROBJ18", + "nspace": "", + "snap": "head" + }, + "selected_object_info": { + "alloc_hint_flags": 255, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "shards": [ + { + "errors": [], + "object_info": { + "alloc_hint_flags": 0, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "osd": 0, + "primary": false, + "size": 7 + }, + { + "errors": [], + "object_info": { + "alloc_hint_flags": 255, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [] + }, + { + "object": { + "name": "ROBJ19", + "nspace": "", + "locator": "", + "snap": "head", + "version": 58 + }, + "errors": [ + "size_too_large" + ], + "union_shard_errors": [], + "selected_object_info": { + "oid": { + "oid": "ROBJ19", + "key": "", + "snapid": -2, + "hash": 2868534344, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "63'59", + "prior_version": "63'58", + "last_reqid": "osd.1.0:58", + "user_version": 58, + "size": 1049600, + "mtime": "2019-08-09T23:33:58.340709+0000", + "local_mtime": "2019-08-09T23:33:58.345676+0000", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x3dde0ef3", + "omap_digest": "0xbffddd28", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "errors": [], + "size": 1049600 + }, + { + "osd": 1, + "primary": true, + "errors": [], + "size": 1049600 + } + ] + }, + { + "shards": [ + { + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "errors": [ + "missing" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ3", + "key": "", + "snapid": -2, + "hash": 625845583, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'61", + "prior_version": "25'9", + "last_reqid": "osd.1.0:60", + "user_version": 9, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x00b35dfd", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "missing" + ], + "errors": [], + "object": { + "version": 9, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ3" + } + }, + { + "shards": [ + { + "attrs": [ + { + "Base64": false, + "value": "bad-val", + "name": "key1-ROBJ8" + }, + { + "Base64": false, + "value": "val2-ROBJ8", + "name": "key2-ROBJ8" + } + ], + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "attrs": [ + { + "Base64": false, + "value": "val1-ROBJ8", + "name": "key1-ROBJ8" + }, + { + "Base64": false, + "value": "val3-ROBJ8", + "name": "key3-ROBJ8" + } + ], + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ8", + "key": "", + "snapid": -2, + "hash": 2359695969, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "79'66", + "prior_version": "79'65", + "last_reqid": "client.4554.0:1", + "user_version": 79, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xd6be81dc", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [], + "errors": [ + "attr_value_mismatch", + "attr_name_mismatch" + ], + "object": { + "version": 66, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ8" + } + }, + { + "shards": [ + { + "object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "95'67", + "prior_version": "51'64", + "last_reqid": "client.4649.0:1", + "user_version": 80, + "size": 1, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2b63260d", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 1, + "errors": [], + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'64", + "prior_version": "37'27", + "last_reqid": "osd.1.0:63", + "user_version": 27, + "size": 7, + "mtime": "2018-04-05 14:33:25.352485", + "local_mtime": "2018-04-05 14:33:25.353746", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 1, + "errors": [ + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "95'67", + "prior_version": "51'64", + "last_reqid": "client.4649.0:1", + "user_version": 80, + "size": 1, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2b63260d", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "obj_size_info_mismatch" + ], + "errors": [ + "object_info_inconsistency" + ], + "object": { + "version": 67, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ9" + } + } + ], + "epoch": 0 +} +EOF + + jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save1.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 + fi + + objname=ROBJ9 + # Change data and size again because digest was recomputed + echo -n ZZZ > $dir/change + rados --pool $poolname put $objname $dir/change + # Set one to an even older value + objectstore_tool $dir 0 $objname set-attr _ $dir/robj9-oi + rm $dir/oi $dir/change + + objname=ROBJ10 + objectstore_tool $dir 1 $objname get-attr _ > $dir/oi + rados --pool $poolname setomapval $objname key2-$objname val2-$objname + objectstore_tool $dir 0 $objname set-attr _ $dir/oi + objectstore_tool $dir 1 $objname set-attr _ $dir/oi + rm $dir/oi + + inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 + inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0 + inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1 + + # ROBJ19 won't error this time + ceph tell osd.\* injectargs -- --osd-max-object-size=134217728 + + pg_deep_scrub $pg + + err_strings=() + err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:30259878:::ROBJ15:head : candidate had a missing info key" + err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)], object info inconsistent " + err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:33aca486:::ROBJ18:head : data_digest 0xbd89c912 != data_digest 0x2ddbf8f5 from auth oi 3:33aca486:::ROBJ18:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 54 dd 2ddbf8f5 od ddc3680f alloc_hint [[]0 0 255[]][)]" + err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:33aca486:::ROBJ18:head : failed to pick suitable auth object" + err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:5c7b2c47:::ROBJ16:head : candidate had a corrupt snapset" + err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:5c7b2c47:::ROBJ16:head : candidate had a missing snapset key" + err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:5c7b2c47:::ROBJ16:head : failed to pick suitable object info" + err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:86586531:::ROBJ8:head : attr value mismatch '_key1-ROBJ8', attr name mismatch '_key3-ROBJ8', attr name mismatch '_key2-ROBJ8'" + err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:87abbf36:::ROBJ11:head : candidate had a read error" + err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]" + err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8aa5320e:::ROBJ17:head : data_digest 0x5af0c3ef != data_digest 0x2ddbf8f5 from auth oi 3:8aa5320e:::ROBJ17:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 51 dd 2ddbf8f5 od e9572720 alloc_hint [[]0 0 0[]][)]" + err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8aa5320e:::ROBJ17:head : failed to pick suitable auth object" + err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0xefced57a != omap_digest 0x6a73cc07 from shard 1" + err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:8b55fa4b:::ROBJ7:head : omap_digest 0x6a73cc07 != omap_digest 0xefced57a from auth oi 3:8b55fa4b:::ROBJ7:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a alloc_hint [[]0 0 0[]][)]" + err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:a53c12e8:::ROBJ6:head : omap_digest 0x689ee887 != omap_digest 0x179c919f from shard 1, omap_digest 0x689ee887 != omap_digest 0x179c919f from auth oi 3:a53c12e8:::ROBJ6:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f alloc_hint [[]0 0 0[]][)]" + err_strings[15]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]" + err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:b1f19cbd:::ROBJ10:head : omap_digest 0xa8dd5adc != omap_digest 0xc2025a24 from auth oi 3:b1f19cbd:::ROBJ10:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24 alloc_hint [[]0 0 0[]][)]" + err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:b1f19cbd:::ROBJ10:head : failed to pick suitable auth object" + err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:bc819597:::ROBJ12:head : candidate had a stat error" + err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:c0c86b1d:::ROBJ14:head : candidate had a missing info key" + err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:c0c86b1d:::ROBJ14:head : candidate had a corrupt info" + err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:c0c86b1d:::ROBJ14:head : failed to pick suitable object info" + err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : candidate size 9 info size 7 mismatch" + err_strings[23]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:ce3f1d6a:::ROBJ1:head : data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from shard 0, data_digest 0x2d4a11c2 != data_digest 0x2ddbf8f5 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from auth oi 3:ce3f1d6a:::ROBJ1:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6 alloc_hint [[]0 0 0[]][)], size 9 != size 7 from shard 0" + err_strings[24]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:d60617f9:::ROBJ13:head : candidate had a read error" + err_strings[25]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:d60617f9:::ROBJ13:head : candidate had a stat error" + err_strings[26]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:d60617f9:::ROBJ13:head : failed to pick suitable object info" + err_strings[27]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:e97ce31e:::ROBJ2:head : data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from shard 1, data_digest 0x578a4830 != data_digest 0x2ddbf8f5 from auth oi 3:e97ce31e:::ROBJ2:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918 alloc_hint [[]0 0 0[]][)]" + err_strings[28]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 3:f2a5b2a4:::ROBJ3:head : missing" + err_strings[29]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:f4981d31:::ROBJ4:head : omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from shard 1, omap_digest 0xd7178dfe != omap_digest 0xe2d46ea4 from auth oi 3:f4981d31:::ROBJ4:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4 alloc_hint [[]0 0 0[]][)]" + err_strings[30]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x1a862a41 != omap_digest 0x6cac8f6 from shard 1" + err_strings[31]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 1 soid 3:f4bfd4d1:::ROBJ5:head : omap_digest 0x6cac8f6 != omap_digest 0x1a862a41 from auth oi 3:f4bfd4d1:::ROBJ5:head[(][0-9]*'[0-9]* osd.1.0:[0-9]* dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41 alloc_hint [[]0 0 0[]][)]" + err_strings[32]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : candidate size 3 info size 7 mismatch" + err_strings[33]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard 0 soid 3:ffdb2004:::ROBJ9:head : object info inconsistent " + err_strings[34]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:c0c86b1d:::ROBJ14:head : no '_' attr" + err_strings[35]="log_channel[(]cluster[)] log [[]ERR[]] : deep-scrub [0-9]*[.]0 3:5c7b2c47:::ROBJ16:head : can't decode 'snapset' attr .* no longer understand old encoding version 3 < 97: Malformed input" + err_strings[36]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub : stat mismatch, got 19/19 objects, 0/0 clones, 18/19 dirty, 18/19 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 1049715/1049716 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes." + err_strings[37]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 1 missing, 11 inconsistent objects" + err_strings[38]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 deep-scrub 35 errors" + + for err_string in "${err_strings[@]}" + do + if ! grep -q "$err_string" $dir/osd.${primary}.log + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pg || return 1 + + rados list-inconsistent-obj $pg > $dir/json || return 1 + # Get epoch for repair-get requests + epoch=$(jq .epoch $dir/json) + + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf5fba2c6", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "ROBJ1", + "key": "", + "snapid": -2, + "hash": 1454963827, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'58", + "prior_version": "21'3", + "last_reqid": "osd.1.0:57", + "user_version": 3, + "size": 7, + "mtime": "2018-04-05 14:33:19.804040", + "local_mtime": "2018-04-05 14:33:19.804839", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf5fba2c6", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "data_digest": "0x2d4a11c2", + "omap_digest": "0xf5fba2c6", + "size": 9, + "errors": [ + "data_digest_mismatch_info", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ1", + "key": "", + "snapid": -2, + "hash": 1454963827, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'58", + "prior_version": "21'3", + "last_reqid": "osd.1.0:57", + "user_version": 3, + "size": 7, + "mtime": "2018-04-05 14:33:19.804040", + "local_mtime": "2018-04-05 14:33:19.804839", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf5fba2c6", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "data_digest_mismatch_info", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "data_digest_mismatch", + "size_mismatch" + ], + "object": { + "version": 3, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ1" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xa8dd5adc", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xa8dd5adc", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "alloc_hint_flags": 0, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 3174666125, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ10", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xc2025a24", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 30, + "watchers": {} + }, + "union_shard_errors": [ + "omap_digest_mismatch_info" + ], + "errors": [], + "object": { + "version": 30, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ10" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xa03cef03", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [ + "read_error" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ11", + "key": "", + "snapid": -2, + "hash": 1828574689, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'52", + "prior_version": "41'33", + "last_reqid": "osd.1.0:51", + "user_version": 33, + "size": 7, + "mtime": "2018-04-05 14:33:26.761286", + "local_mtime": "2018-04-05 14:33:26.762368", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xa03cef03", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "read_error" + ], + "errors": [], + "object": { + "version": 33, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ11" + } + }, + { + "shards": [ + { + "errors": [ + "stat_error" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x067f306a", + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ12", + "key": "", + "snapid": -2, + "hash": 3920199997, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'56", + "prior_version": "43'36", + "last_reqid": "osd.1.0:55", + "user_version": 36, + "size": 7, + "mtime": "2018-04-05 14:33:27.460958", + "local_mtime": "2018-04-05 14:33:27.462109", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x067f306a", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "stat_error" + ], + "errors": [], + "object": { + "version": 36, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ12" + } + }, + { + "shards": [ + { + "errors": [ + "stat_error" + ], + "osd": 0, + "primary": false + }, + { + "size": 7, + "errors": [ + "read_error" + ], + "osd": 1, + "primary": true + } + ], + "union_shard_errors": [ + "stat_error", + "read_error" + ], + "errors": [], + "object": { + "version": 0, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ13" + } + }, + { + "shards": [ + { + "object_info": "bad-val", + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x4f14f849", + "size": 7, + "errors": [ + "info_corrupted" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x4f14f849", + "size": 7, + "errors": [ + "info_missing" + ], + "osd": 1, + "primary": true + } + ], + "union_shard_errors": [ + "info_missing", + "info_corrupted" + ], + "errors": [], + "object": { + "version": 0, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ14" + } + }, + { + "shards": [ + { + "object_info": { + "oid": { + "oid": "ROBJ15", + "key": "", + "snapid": -2, + "hash": 504996876, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'49", + "prior_version": "49'45", + "last_reqid": "osd.1.0:48", + "user_version": 45, + "size": 7, + "mtime": "2018-04-05 14:33:29.498969", + "local_mtime": "2018-04-05 14:33:29.499890", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "size": 7, + "errors": [ + "info_missing" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ15", + "key": "", + "snapid": -2, + "hash": 504996876, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'49", + "prior_version": "49'45", + "last_reqid": "osd.1.0:48", + "user_version": 45, + "size": 7, + "mtime": "2018-04-05 14:33:29.498969", + "local_mtime": "2018-04-05 14:33:29.499890", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2d2a4d6e", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "info_missing" + ], + "errors": [], + "object": { + "version": 45, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ15" + } + }, + { + "errors": [], + "object": { + "locator": "", + "name": "ROBJ16", + "nspace": "", + "snap": "head", + "version": 0 + }, + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "errors": [ + "snapset_missing" + ], + "omap_digest": "0x8b699207", + "osd": 0, + "primary": false, + "size": 7 + }, + { + "snapset": "bad-val", + "data_digest": "0x2ddbf8f5", + "errors": [ + "snapset_corrupted" + ], + "omap_digest": "0x8b699207", + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [ + "snapset_missing", + "snapset_corrupted" + ] + }, + { + "errors": [], + "object": { + "locator": "", + "name": "ROBJ17", + "nspace": "", + "snap": "head" + }, + "selected_object_info": { + "alloc_hint_flags": 0, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1884071249, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ17", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xe9572720", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 51, + "watchers": {} + }, + "shards": [ + { + "data_digest": "0x5af0c3ef", + "errors": [ + "data_digest_mismatch_info" + ], + "omap_digest": "0xe9572720", + "osd": 0, + "primary": false, + "size": 7 + }, + { + "data_digest": "0x5af0c3ef", + "errors": [ + "data_digest_mismatch_info" + ], + "omap_digest": "0xe9572720", + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [ + "data_digest_mismatch_info" + ] + }, + { + "errors": [ + "object_info_inconsistency" + ], + "object": { + "locator": "", + "name": "ROBJ18", + "nspace": "", + "snap": "head" + }, + "selected_object_info": { + "alloc_hint_flags": 255, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "shards": [ + { + "data_digest": "0xbd89c912", + "errors": [ + "data_digest_mismatch_info" + ], + "object_info": { + "alloc_hint_flags": 0, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "omap_digest": "0xddc3680f", + "osd": 0, + "primary": false, + "size": 7 + }, + { + "data_digest": "0xbd89c912", + "errors": [ + "data_digest_mismatch_info" + ], + "object_info": { + "alloc_hint_flags": 255, + "data_digest": "0x2ddbf8f5", + "expected_object_size": 0, + "expected_write_size": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "lost": 0, + "manifest": { + "type": 0 + }, + "oid": { + "hash": 1629828556, + "key": "", + "max": 0, + "namespace": "", + "oid": "ROBJ18", + "pool": 3, + "snapid": -2 + }, + "omap_digest": "0xddc3680f", + "size": 7, + "truncate_seq": 0, + "truncate_size": 0, + "user_version": 54, + "watchers": {} + }, + "omap_digest": "0xddc3680f", + "osd": 1, + "primary": true, + "size": 7 + } + ], + "union_shard_errors": [ + "data_digest_mismatch_info" + ] + }, + { + "shards": [ + { + "data_digest": "0x578a4830", + "omap_digest": "0xf8e11918", + "size": 7, + "errors": [ + "data_digest_mismatch_info" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf8e11918", + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ2", + "key": "", + "snapid": -2, + "hash": 2026323607, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'60", + "prior_version": "23'6", + "last_reqid": "osd.1.0:59", + "user_version": 6, + "size": 7, + "mtime": "2018-04-05 14:33:20.498756", + "local_mtime": "2018-04-05 14:33:20.499704", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xf8e11918", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "data_digest_mismatch_info" + ], + "errors": [ + "data_digest_mismatch" + ], + "object": { + "version": 6, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ2" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x00b35dfd", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "errors": [ + "missing" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ3", + "key": "", + "snapid": -2, + "hash": 625845583, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'61", + "prior_version": "25'9", + "last_reqid": "osd.1.0:60", + "user_version": 9, + "size": 7, + "mtime": "2018-04-05 14:33:21.189382", + "local_mtime": "2018-04-05 14:33:21.190446", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x00b35dfd", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "missing" + ], + "errors": [], + "object": { + "version": 9, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ3" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xd7178dfe", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xe2d46ea4", + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ4", + "key": "", + "snapid": -2, + "hash": 2360875311, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'62", + "prior_version": "27'12", + "last_reqid": "osd.1.0:61", + "user_version": 12, + "size": 7, + "mtime": "2018-04-05 14:33:21.862313", + "local_mtime": "2018-04-05 14:33:21.863261", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xe2d46ea4", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "omap_digest_mismatch_info" + ], + "errors": [ + "omap_digest_mismatch" + ], + "object": { + "version": 12, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ4" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x1a862a41", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x06cac8f6", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ5", + "key": "", + "snapid": -2, + "hash": 2334915887, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'63", + "prior_version": "29'15", + "last_reqid": "osd.1.0:62", + "user_version": 15, + "size": 7, + "mtime": "2018-04-05 14:33:22.589300", + "local_mtime": "2018-04-05 14:33:22.590376", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x1a862a41", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "omap_digest_mismatch_info" + ], + "errors": [ + "omap_digest_mismatch" + ], + "object": { + "version": 15, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ5" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x689ee887", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x179c919f", + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ6", + "key": "", + "snapid": -2, + "hash": 390610085, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'54", + "prior_version": "31'18", + "last_reqid": "osd.1.0:53", + "user_version": 18, + "size": 7, + "mtime": "2018-04-05 14:33:23.289188", + "local_mtime": "2018-04-05 14:33:23.290130", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x179c919f", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "omap_digest_mismatch_info" + ], + "errors": [ + "omap_digest_mismatch" + ], + "object": { + "version": 18, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ6" + } + }, + { + "shards": [ + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xefced57a", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x6a73cc07", + "size": 7, + "errors": [ + "omap_digest_mismatch_info" + ], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ7", + "key": "", + "snapid": -2, + "hash": 3529485009, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'53", + "prior_version": "33'21", + "last_reqid": "osd.1.0:52", + "user_version": 21, + "size": 7, + "mtime": "2018-04-05 14:33:23.979658", + "local_mtime": "2018-04-05 14:33:23.980731", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xefced57a", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "omap_digest_mismatch_info" + ], + "errors": [ + "omap_digest_mismatch" + ], + "object": { + "version": 21, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ7" + } + }, + { + "shards": [ + { + "attrs": [ + { + "Base64": false, + "value": "bad-val", + "name": "key1-ROBJ8" + }, + { + "Base64": false, + "value": "val2-ROBJ8", + "name": "key2-ROBJ8" + } + ], + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xd6be81dc", + "size": 7, + "errors": [], + "osd": 0, + "primary": false + }, + { + "attrs": [ + { + "Base64": false, + "value": "val1-ROBJ8", + "name": "key1-ROBJ8" + }, + { + "Base64": false, + "value": "val3-ROBJ8", + "name": "key3-ROBJ8" + } + ], + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xd6be81dc", + "size": 7, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ8", + "key": "", + "snapid": -2, + "hash": 2359695969, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "79'66", + "prior_version": "79'65", + "last_reqid": "client.4554.0:1", + "user_version": 79, + "size": 7, + "mtime": "2018-04-05 14:34:05.598688", + "local_mtime": "2018-04-05 14:34:05.599698", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xd6be81dc", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [], + "errors": [ + "attr_value_mismatch", + "attr_name_mismatch" + ], + "object": { + "version": 66, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ8" + } + }, + { + "shards": [ + { + "object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "51'64", + "prior_version": "37'27", + "last_reqid": "osd.1.0:63", + "user_version": 27, + "size": 7, + "mtime": "2018-04-05 14:33:25.352485", + "local_mtime": "2018-04-05 14:33:25.353746", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "data_digest": "0x1f26fb26", + "omap_digest": "0x2eecc539", + "size": 3, + "errors": [ + "obj_size_info_mismatch" + ], + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "119'68", + "prior_version": "51'64", + "last_reqid": "client.4834.0:1", + "user_version": 81, + "size": 3, + "mtime": "2018-04-05 14:35:01.500659", + "local_mtime": "2018-04-05 14:35:01.502117", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x1f26fb26", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "data_digest": "0x1f26fb26", + "omap_digest": "0x2eecc539", + "size": 3, + "errors": [], + "osd": 1, + "primary": true + } + ], + "selected_object_info": { + "oid": { + "oid": "ROBJ9", + "key": "", + "snapid": -2, + "hash": 537189375, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "119'68", + "prior_version": "51'64", + "last_reqid": "client.4834.0:1", + "user_version": 81, + "size": 3, + "mtime": "2018-04-05 14:35:01.500659", + "local_mtime": "2018-04-05 14:35:01.502117", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest", + "omap_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x1f26fb26", + "omap_digest": "0x2eecc539", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "obj_size_info_mismatch" + ], + "errors": [ + "object_info_inconsistency" + ], + "object": { + "version": 68, + "snap": "head", + "locator": "", + "nspace": "", + "name": "ROBJ9" + } + } + ], + "epoch": 0 +} +EOF + + jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save2.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 + fi + + repair $pg + wait_for_clean + + # This hangs if the repair doesn't work + timeout 30 rados -p $poolname get ROBJ17 $dir/robj17.out || return 1 + timeout 30 rados -p $poolname get ROBJ18 $dir/robj18.out || return 1 + # Even though we couldn't repair all of the introduced errors, we can fix ROBJ17 + diff -q $dir/new.ROBJ17 $dir/robj17.out || return 1 + rm -f $dir/new.ROBJ17 $dir/robj17.out || return 1 + diff -q $dir/new.ROBJ18 $dir/robj18.out || return 1 + rm -f $dir/new.ROBJ18 $dir/robj18.out || return 1 + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it +} + + +# +# Test scrub errors for an erasure coded pool +# +function corrupt_scrub_erasure() { + local dir=$1 + local allow_overwrites=$2 + local poolname=ecpool + local total_objs=7 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + for id in $(seq 0 2) ; do + run_osd $dir $id || return 1 + done + create_rbd_pool || return 1 + create_pool foo 1 + + create_ec_pool $poolname $allow_overwrites k=2 m=1 stripe_unit=2K --force || return 1 + wait_for_clean || return 1 + + for i in $(seq 1 $total_objs) ; do + objname=EOBJ${i} + add_something $dir $poolname $objname || return 1 + + local osd=$(expr $i % 2) + + case $i in + 1) + # Size (deep scrub data_digest too) + local payload=UVWXYZZZ + echo $payload > $dir/CORRUPT + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + ;; + + 2) + # Corrupt EC shard + dd if=/dev/urandom of=$dir/CORRUPT bs=2048 count=1 + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + ;; + + 3) + # missing + objectstore_tool $dir $osd $objname remove || return 1 + ;; + + 4) + rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1 + rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1 + + # Break xattrs + echo -n bad-val > $dir/bad-val + objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir/bad-val || return 1 + objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1 + echo -n val3-$objname > $dir/newval + objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir/newval || return 1 + rm $dir/bad-val $dir/newval + ;; + + 5) + # Corrupt EC shard + dd if=/dev/urandom of=$dir/CORRUPT bs=2048 count=2 + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + ;; + + 6) + objectstore_tool $dir 0 $objname rm-attr hinfo_key || return 1 + echo -n bad-val > $dir/bad-val + objectstore_tool $dir 1 $objname set-attr hinfo_key $dir/bad-val || return 1 + ;; + + 7) + local payload=MAKETHISDIFFERENTFROMOTHEROBJECTS + echo $payload > $dir/DIFFERENT + rados --pool $poolname put $objname $dir/DIFFERENT || return 1 + + # Get hinfo_key from EOBJ1 + objectstore_tool $dir 0 EOBJ1 get-attr hinfo_key > $dir/hinfo + objectstore_tool $dir 0 $objname set-attr hinfo_key $dir/hinfo || return 1 + rm -f $dir/hinfo + ;; + + esac + done + + local pg=$(get_pg $poolname EOBJ0) + + pg_scrub $pg + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pg || return 1 + + rados list-inconsistent-obj $pg > $dir/json || return 1 + # Get epoch for repair-get requests + epoch=$(jq .epoch $dir/json) + + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "shards": [ + { + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4184.0:1", + "user_version": 1, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 9, + "shard": 0, + "errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + }, + { + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4184.0:1", + "user_version": 1, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 1, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ1" + } + }, + { + "shards": [ + { + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "shard": 0, + "errors": [ + "missing" + ], + "osd": 1, + "primary": true + }, + { + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ3", + "key": "", + "snapid": -2, + "hash": 3125668237, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "39'3", + "prior_version": "0'0", + "last_reqid": "client.4252.0:1", + "user_version": 3, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "missing" + ], + "errors": [], + "object": { + "version": 3, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ3" + } + }, + { + "shards": [ + { + "attrs": [ + { + "Base64": false, + "value": "bad-val", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ], + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [], + "size": 2048, + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ] + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val3-EOBJ4", + "name": "key3-EOBJ4" + } + ] + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ4", + "key": "", + "snapid": -2, + "hash": 1618759290, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "45'6", + "prior_version": "45'5", + "last_reqid": "client.4294.0:1", + "user_version": 6, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [], + "errors": [ + "attr_value_mismatch", + "attr_name_mismatch" + ], + "object": { + "version": 6, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ4" + } + }, + { + "shards": [ + { + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4382.0:1", + "user_version": 7, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 4096, + "shard": 0, + "errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + }, + { + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4382.0:1", + "user_version": 7, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 7, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ5" + } + }, + { + "errors": [], + "object": { + "locator": "", + "name": "EOBJ6", + "nspace": "", + "snap": "head", + "version": 8 + }, + "selected_object_info": { + "oid": { + "oid": "EOBJ6", + "key": "", + "snapid": -2, + "hash": 3050890866, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "65'8", + "prior_version": "0'0", + "last_reqid": "client.4418.0:1", + "user_version": 8, + "size": 7, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "errors": [ + "hinfo_missing" + ], + "osd": 0, + "primary": false, + "shard": 2, + "size": 2048 + }, + { + "errors": [ + "hinfo_corrupted" + ], + "osd": 1, + "primary": true, + "shard": 0, + "hashinfo": "bad-val", + "size": 2048 + }, + { + "errors": [], + "osd": 2, + "primary": false, + "shard": 1, + "size": 2048, + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + } + ], + "union_shard_errors": [ + "hinfo_missing", + "hinfo_corrupted" + ] + }, + { + "errors": [ + "hinfo_inconsistency" + ], + "object": { + "locator": "", + "name": "EOBJ7", + "nspace": "", + "snap": "head", + "version": 10 + }, + "selected_object_info": { + "oid": { + "oid": "EOBJ7", + "key": "", + "snapid": -2, + "hash": 3258066308, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "75'10", + "prior_version": "75'9", + "last_reqid": "client.4482.0:1", + "user_version": 10, + "size": 34, + "mtime": "", + "local_mtime": "", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x136e4e27", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + }, + "errors": [], + "osd": 0, + "primary": false, + "shard": 2, + "size": 2048 + }, + { + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + }, + "errors": [], + "osd": 1, + "primary": true, + "shard": 0, + "size": 2048 + }, + { + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + }, + "errors": [], + "osd": 2, + "primary": false, + "shard": 1, + "size": 2048 + } + ], + "union_shard_errors": [] + } + ], + "epoch": 0 +} +EOF + + jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save3.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 + fi + + pg_deep_scrub $pg + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pg || return 1 + + rados list-inconsistent-obj $pg > $dir/json || return 1 + # Get epoch for repair-get requests + epoch=$(jq .epoch $dir/json) + + if [ "$allow_overwrites" = "true" ] + then + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "shards": [ + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4184.0:1", + "user_version": 1, + "size": 7, + "mtime": "2018-04-05 14:31:33.837147", + "local_mtime": "2018-04-05 14:31:33.840763", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 9, + "shard": 0, + "errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + }, + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4184.0:1", + "user_version": 1, + "size": 7, + "mtime": "2018-04-05 14:31:33.837147", + "local_mtime": "2018-04-05 14:31:33.840763", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 1, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ1" + } + }, + { + "shards": [ + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "shard": 0, + "errors": [ + "missing" + ], + "osd": 1, + "primary": true + }, + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ3", + "key": "", + "snapid": -2, + "hash": 3125668237, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "39'3", + "prior_version": "0'0", + "last_reqid": "client.4252.0:1", + "user_version": 3, + "size": 7, + "mtime": "2018-04-05 14:31:46.841145", + "local_mtime": "2018-04-05 14:31:46.844996", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "missing" + ], + "errors": [], + "object": { + "version": 3, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ3" + } + }, + { + "shards": [ + { + "attrs": [ + { + "Base64": false, + "value": "bad-val", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ], + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ], + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 0, + "osd": 1, + "primary": true + }, + { + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val3-EOBJ4", + "name": "key3-EOBJ4" + } + ], + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 1, + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ4", + "key": "", + "snapid": -2, + "hash": 1618759290, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "45'6", + "prior_version": "45'5", + "last_reqid": "client.4294.0:1", + "user_version": 6, + "size": 7, + "mtime": "2018-04-05 14:31:54.663622", + "local_mtime": "2018-04-05 14:31:54.664527", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [], + "errors": [ + "attr_value_mismatch", + "attr_name_mismatch" + ], + "object": { + "version": 6, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ4" + } + }, + { + "shards": [ + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4382.0:1", + "user_version": 7, + "size": 7, + "mtime": "2018-04-05 14:32:12.929161", + "local_mtime": "2018-04-05 14:32:12.934707", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 4096, + "errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "shard": 0, + "osd": 1, + "primary": true + }, + { + "data_digest": "0x00000000", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 1, + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4382.0:1", + "user_version": 7, + "size": 7, + "mtime": "2018-04-05 14:32:12.929161", + "local_mtime": "2018-04-05 14:32:12.934707", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 7, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ5" + } + }, + { + "object": { + "name": "EOBJ6", + "nspace": "", + "locator": "", + "snap": "head", + "version": 8 + }, + "errors": [], + "union_shard_errors": [ + "read_error", + "hinfo_missing", + "hinfo_corrupted" + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ6", + "key": "", + "snapid": -2, + "hash": 3050890866, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "65'8", + "prior_version": "0'0", + "last_reqid": "client.4418.0:1", + "user_version": 8, + "size": 7, + "mtime": "2018-04-05 14:32:20.634116", + "local_mtime": "2018-04-05 14:32:20.637999", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "shard": 2, + "errors": [ + "read_error", + "hinfo_missing" + ], + "size": 2048 + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [ + "read_error", + "hinfo_corrupted" + ], + "size": 2048, + "hashinfo": "bad-val" + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x00000000", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + } + ] + }, + { + "object": { + "name": "EOBJ7", + "nspace": "", + "locator": "", + "snap": "head", + "version": 10 + }, + "errors": [ + "hinfo_inconsistency" + ], + "union_shard_errors": [], + "selected_object_info": { + "oid": { + "oid": "EOBJ7", + "key": "", + "snapid": -2, + "hash": 3258066308, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "75'10", + "prior_version": "75'9", + "last_reqid": "client.4482.0:1", + "user_version": 10, + "size": 34, + "mtime": "2018-04-05 14:32:33.058782", + "local_mtime": "2018-04-05 14:32:33.059679", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x136e4e27", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "shard": 2, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x00000000", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x00000000", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x00000000", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + } + ] + } + ], + "epoch": 0 +} +EOF + + else + + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "shards": [ + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4192.0:1", + "user_version": 1, + "size": 7, + "mtime": "2018-04-05 14:30:10.688009", + "local_mtime": "2018-04-05 14:30:10.691774", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 9, + "shard": 0, + "errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + }, + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ1", + "key": "", + "snapid": -2, + "hash": 560836233, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "27'1", + "prior_version": "0'0", + "last_reqid": "client.4192.0:1", + "user_version": 1, + "size": 7, + "mtime": "2018-04-05 14:30:10.688009", + "local_mtime": "2018-04-05 14:30:10.691774", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 1, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ1" + } + }, + { + "shards": [ + { + "size": 2048, + "errors": [ + "ec_hash_error" + ], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 0, + "osd": 1, + "primary": true + }, + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 1, + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ2", + "key": "", + "snapid": -2, + "hash": 562812377, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "33'2", + "prior_version": "0'0", + "last_reqid": "client.4224.0:1", + "user_version": 2, + "size": 7, + "mtime": "2018-04-05 14:30:14.152945", + "local_mtime": "2018-04-05 14:30:14.154014", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "ec_hash_error" + ], + "errors": [], + "object": { + "version": 2, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ2" + } + }, + { + "shards": [ + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [ + "missing" + ] + }, + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ3", + "key": "", + "snapid": -2, + "hash": 3125668237, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "39'3", + "prior_version": "0'0", + "last_reqid": "client.4258.0:1", + "user_version": 3, + "size": 7, + "mtime": "2018-04-05 14:30:18.875544", + "local_mtime": "2018-04-05 14:30:18.880153", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "missing" + ], + "errors": [], + "object": { + "version": 3, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ3" + } + }, + { + "shards": [ + { + "attrs": [ + { + "Base64": false, + "value": "bad-val", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ], + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x04cfa72f", + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val2-EOBJ4", + "name": "key2-EOBJ4" + } + ] + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x04cfa72f", + "attrs": [ + { + "Base64": false, + "value": "val1-EOBJ4", + "name": "key1-EOBJ4" + }, + { + "Base64": false, + "value": "val3-EOBJ4", + "name": "key3-EOBJ4" + } + ] + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ4", + "key": "", + "snapid": -2, + "hash": 1618759290, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "45'6", + "prior_version": "45'5", + "last_reqid": "client.4296.0:1", + "user_version": 6, + "size": 7, + "mtime": "2018-04-05 14:30:22.271983", + "local_mtime": "2018-04-05 14:30:22.272840", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [], + "errors": [ + "attr_value_mismatch", + "attr_name_mismatch" + ], + "object": { + "version": 6, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ4" + } + }, + { + "shards": [ + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "errors": [], + "shard": 2, + "osd": 0, + "primary": false + }, + { + "object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4384.0:1", + "user_version": 7, + "size": 7, + "mtime": "2018-04-05 14:30:35.162395", + "local_mtime": "2018-04-05 14:30:35.166390", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "size": 4096, + "shard": 0, + "errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "osd": 1, + "primary": true + }, + { + "data_digest": "0x04cfa72f", + "omap_digest": "0xffffffff", + "size": 2048, + "shard": 1, + "errors": [], + "osd": 2, + "primary": false + } + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ5", + "key": "", + "snapid": -2, + "hash": 2918945441, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "59'7", + "prior_version": "0'0", + "last_reqid": "client.4384.0:1", + "user_version": 7, + "size": 7, + "mtime": "2018-04-05 14:30:35.162395", + "local_mtime": "2018-04-05 14:30:35.166390", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "union_shard_errors": [ + "read_error", + "size_mismatch_info", + "obj_size_info_mismatch" + ], + "errors": [ + "size_mismatch" + ], + "object": { + "version": 7, + "snap": "head", + "locator": "", + "nspace": "", + "name": "EOBJ5" + } + }, + { + "object": { + "name": "EOBJ6", + "nspace": "", + "locator": "", + "snap": "head", + "version": 8 + }, + "errors": [], + "union_shard_errors": [ + "read_error", + "hinfo_missing", + "hinfo_corrupted" + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ6", + "key": "", + "snapid": -2, + "hash": 3050890866, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "65'8", + "prior_version": "0'0", + "last_reqid": "client.4420.0:1", + "user_version": 8, + "size": 7, + "mtime": "2018-04-05 14:30:40.914673", + "local_mtime": "2018-04-05 14:30:40.917705", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x2ddbf8f5", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "shard": 2, + "errors": [ + "read_error", + "hinfo_missing" + ], + "size": 2048 + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [ + "read_error", + "hinfo_corrupted" + ], + "size": 2048, + "hashinfo": "bad-val" + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x04cfa72f", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + } + ] + }, + { + "object": { + "name": "EOBJ7", + "nspace": "", + "locator": "", + "snap": "head", + "version": 10 + }, + "errors": [ + "hinfo_inconsistency" + ], + "union_shard_errors": [ + "ec_hash_error" + ], + "selected_object_info": { + "oid": { + "oid": "EOBJ7", + "key": "", + "snapid": -2, + "hash": 3258066308, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "75'10", + "prior_version": "75'9", + "last_reqid": "client.4486.0:1", + "user_version": 10, + "size": 34, + "mtime": "2018-04-05 14:30:50.995009", + "local_mtime": "2018-04-05 14:30:50.996112", + "lost": 0, + "flags": [ + "dirty", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x136e4e27", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "shard": 2, + "errors": [ + "ec_hash_error" + ], + "size": 2048, + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 80717615, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 80717615, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + }, + { + "osd": 1, + "primary": true, + "shard": 0, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x5b7455a8", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + }, + { + "osd": 2, + "primary": false, + "shard": 1, + "errors": [], + "size": 2048, + "omap_digest": "0xffffffff", + "data_digest": "0x5b7455a8", + "hashinfo": { + "cumulative_shard_hashes": [ + { + "hash": 1534350760, + "shard": 0 + }, + { + "hash": 1534491824, + "shard": 1 + }, + { + "hash": 1534350760, + "shard": 2 + } + ], + "total_chunk_size": 2048 + } + } + ] + } + ], + "epoch": 0 +} +EOF + + fi + + jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + if [ "$allow_overwrites" = "true" ] + then + num=4 + else + num=5 + fi + jq '.' $dir/json > save${num}.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 + fi + + ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it +} + +function TEST_corrupt_scrub_erasure_appends() { + corrupt_scrub_erasure $1 false +} + +function TEST_corrupt_scrub_erasure_overwrites() { + if [ "$use_ec_overwrite" = "true" ]; then + corrupt_scrub_erasure $1 true + fi +} + +# +# Test to make sure that a periodic scrub won't cause deep-scrub info to be lost +# +function TEST_periodic_scrub_replicated() { + local dir=$1 + local poolname=psr_pool + local objname=POBJ + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 " + ceph_osd_args+="--osd_scrub_backoff_ratio=0" + run_osd $dir 0 $ceph_osd_args || return 1 + run_osd $dir 1 $ceph_osd_args || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + local osd=0 + add_something $dir $poolname $objname scrub || return 1 + local primary=$(get_primary $poolname $objname) + local pg=$(get_pg $poolname $objname) + + # Add deep-scrub only error + local payload=UVWXYZ + echo $payload > $dir/CORRUPT + # Uses $ceph_osd_args for osd restart + objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1 + + # No scrub information available, so expect failure + set -o pipefail + ! rados list-inconsistent-obj $pg | jq '.' || return 1 + set +o pipefail + + pg_deep_scrub $pg || return 1 + + # Make sure bad object found + rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1 + + flush_pg_stats + local last_scrub=$(get_last_scrub_stamp $pg) + # Fake a schedule scrub + ceph tell $pg scrub || return 1 + # Wait for schedule regular scrub + wait_for_scrub $pg "$last_scrub" + + # It needed to be upgraded + grep -q "Deep scrub errors, upgrading scrub to deep-scrub" $dir/osd.${primary}.log || return 1 + + # Bad object still known + rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1 + + # Can't upgrade with this set + ceph osd set nodeep-scrub + # Let map change propagate to OSDs + ceph tell osd.0 get_latest_osdmap + flush_pg_stats + sleep 5 + + # Fake a schedule scrub + ceph tell $pg scrub || return 1 + # Wait for schedule regular scrub + # to notice scrub and skip it + local found=false + for i in $(seq 14 -1 0) + do + sleep 1 + ! grep -q "Regular scrub skipped due to deep-scrub errors and nodeep-scrub set" $dir/osd.${primary}.log || { found=true ; break; } + echo Time left: $i seconds + done + test $found = "true" || return 1 + + # Bad object still known + rados list-inconsistent-obj $pg | jq '.' | grep -q $objname || return 1 + + flush_pg_stats + # Request a regular scrub and it will be done + pg_scrub $pg + grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1 + + # deep-scrub error is no longer present + rados list-inconsistent-obj $pg | jq '.' | grep -qv $objname || return 1 +} + +function TEST_scrub_warning() { + local dir=$1 + local poolname=psr_pool + local objname=POBJ + local scrubs=5 + local deep_scrubs=5 + local i1_day=86400 + local i7_days=$(calc $i1_day \* 7) + local i14_days=$(calc $i1_day \* 14) + local overdue=0.5 + local conf_overdue_seconds=$(calc $i7_days + $i1_day + \( $i7_days \* $overdue \) ) + local pool_overdue_seconds=$(calc $i14_days + $i1_day + \( $i14_days \* $overdue \) ) + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x --mon_warn_pg_not_scrubbed_ratio=${overdue} --mon_warn_pg_not_deep_scrubbed_ratio=${overdue} || return 1 + run_osd $dir 0 $ceph_osd_args --osd_scrub_backoff_ratio=0 || return 1 + + for i in $(seq 1 $(expr $scrubs + $deep_scrubs)) + do + create_pool $poolname-$i 1 1 || return 1 + wait_for_clean || return 1 + if [ $i = "1" ]; + then + ceph osd pool set $poolname-$i scrub_max_interval $i14_days + fi + if [ $i = $(expr $scrubs + 1) ]; + then + ceph osd pool set $poolname-$i deep_scrub_interval $i14_days + fi + done + + # Only 1 osd + local primary=0 + + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + ceph config set global osd_scrub_interval_randomize_ratio 0 + ceph config set global osd_deep_scrub_randomize_ratio 0 + ceph config set global osd_scrub_max_interval ${i7_days} + ceph config set global osd_deep_scrub_interval ${i7_days} + + # Fake schedule scrubs + for i in $(seq 1 $scrubs) + do + if [ $i = "1" ]; + then + overdue_seconds=$pool_overdue_seconds + else + overdue_seconds=$conf_overdue_seconds + fi + ceph tell ${i}.0 scrub $(expr ${overdue_seconds} + ${i}00) || return 1 + done + # Fake schedule deep scrubs + for i in $(seq $(expr $scrubs + 1) $(expr $scrubs + $deep_scrubs)) + do + if [ $i = "$(expr $scrubs + 1)" ]; + then + overdue_seconds=$pool_overdue_seconds + else + overdue_seconds=$conf_overdue_seconds + fi + ceph tell ${i}.0 deep_scrub $(expr ${overdue_seconds} + ${i}00) || return 1 + done + flush_pg_stats + + ceph health + ceph health detail + ceph health | grep -q " pgs not deep-scrubbed in time" || return 1 + ceph health | grep -q " pgs not scrubbed in time" || return 1 + + # note that the 'ceph tell pg deep_scrub' command now also sets the regular scrub + # time-stamp. I.e. - all 'late for deep scrubbing' pgs are also late for + # regular scrubbing. For now, we'll allow both responses. + COUNT=$(ceph health detail | grep "not scrubbed since" | wc -l) + + if (( $COUNT != $scrubs && $COUNT != $(expr $scrubs+$deep_scrubs) )); then + ceph health detail | grep "not scrubbed since" + return 1 + fi + COUNT=$(ceph health detail | grep "not deep-scrubbed since" | wc -l) + if [ "$COUNT" != $deep_scrubs ]; then + ceph health detail | grep "not deep-scrubbed since" + return 1 + fi +} + +# +# Corrupt snapset in replicated pool +# +function TEST_corrupt_snapset_scrub_rep() { + local dir=$1 + local poolname=csr_pool + local total_objs=2 + + run_mon $dir a --osd_pool_default_size=2 || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + create_rbd_pool || return 1 + wait_for_clean || return 1 + + create_pool foo 1 || return 1 + create_pool $poolname 1 1 || return 1 + wait_for_clean || return 1 + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} + add_something $dir $poolname $objname || return 1 + + rados --pool $poolname setomapheader $objname hdr-$objname || return 1 + rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1 + done + + local pg=$(get_pg $poolname ROBJ0) + local primary=$(get_primary $poolname ROBJ0) + + rados -p $poolname mksnap snap1 + echo -n head_of_snapshot_data > $dir/change + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} + + # Alternate corruption between osd.0 and osd.1 + local osd=$(expr $i % 2) + + case $i in + 1) + rados --pool $poolname put $objname $dir/change + objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1 + ;; + + 2) + rados --pool $poolname put $objname $dir/change + objectstore_tool $dir $osd --head $objname clear-snapset corrupt || return 1 + ;; + + esac + done + rm $dir/change + + pg_scrub $pg + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pg || return 1 + + rados list-inconsistent-obj $pg > $dir/json || return 1 + + jq "$jqfilter" << EOF | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "epoch": 34, + "inconsistents": [ + { + "object": { + "name": "ROBJ1", + "nspace": "", + "locator": "", + "snap": "head", + "version": 8 + }, + "errors": [ + "snapset_inconsistency" + ], + "union_shard_errors": [], + "selected_object_info": { + "oid": { + "oid": "ROBJ1", + "key": "", + "snapid": -2, + "hash": 1454963827, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "24'8", + "prior_version": "21'3", + "last_reqid": "client.4195.0:1", + "user_version": 8, + "size": 21, + "mtime": "2018-04-05 14:35:43.286117", + "local_mtime": "2018-04-05 14:35:43.288990", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x53acb008", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "errors": [], + "size": 21, + "snapset": { + "clones": [ + { + "overlap": "[]", + "size": 7, + "snap": 1, + "snaps": [ + 1 + ] + } + ], + "seq": 1 + } + }, + { + "osd": 1, + "primary": true, + "errors": [], + "size": 21, + "snapset": { + "clones": [], + "seq": 0 + } + } + ] + }, + { + "object": { + "name": "ROBJ2", + "nspace": "", + "locator": "", + "snap": "head", + "version": 10 + }, + "errors": [ + "snapset_inconsistency" + ], + "union_shard_errors": [], + "selected_object_info": { + "oid": { + "oid": "ROBJ2", + "key": "", + "snapid": -2, + "hash": 2026323607, + "max": 0, + "pool": 3, + "namespace": "" + }, + "version": "28'10", + "prior_version": "23'6", + "last_reqid": "client.4223.0:1", + "user_version": 10, + "size": 21, + "mtime": "2018-04-05 14:35:48.326856", + "local_mtime": "2018-04-05 14:35:48.328097", + "lost": 0, + "flags": [ + "dirty", + "omap", + "data_digest" + ], + "truncate_seq": 0, + "truncate_size": 0, + "data_digest": "0x53acb008", + "omap_digest": "0xffffffff", + "expected_object_size": 0, + "expected_write_size": 0, + "alloc_hint_flags": 0, + "manifest": { + "type": 0 + }, + "watchers": {} + }, + "shards": [ + { + "osd": 0, + "primary": false, + "errors": [], + "size": 21, + "snapset": { + "clones": [], + "seq": 0 + } + }, + { + "osd": 1, + "primary": true, + "errors": [], + "size": 21, + "snapset": { + "clones": [ + { + "overlap": "[]", + "size": 7, + "snap": 1, + "snaps": [ + 1 + ] + } + ], + "seq": 1 + } + } + ] + } + ] +} +EOF + + jq "$jqfilter" $dir/json | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save6.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 + fi + + ERRORS=0 + declare -a err_strings + err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ1:head : snapset inconsistent" + err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid [0-9]*:.*:::ROBJ2:head : snapset inconsistent" + err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 [0-9]*:.*:::ROBJ1:1 : is an unexpected clone" + err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub : stat mismatch, got 3/4 objects, 1/2 clones, 3/4 dirty, 3/4 omap, 0/0 pinned, 0/0 hit_set_archive, 0/0 whiteouts, 49/56 bytes, 0/0 manifest objects, 0/0 hit_set_archive bytes." + err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 0 missing, 2 inconsistent objects" + err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 4 errors" + + for err_string in "${err_strings[@]}" + do + if ! grep -q "$err_string" $dir/osd.${primary}.log + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it +} + +function TEST_request_scrub_priority() { + local dir=$1 + local poolname=psr_pool + local objname=POBJ + local OBJECTS=64 + local PGS=8 + + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 + run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 " + ceph_osd_args+="--osd_scrub_backoff_ratio=0" + run_osd $dir 0 $ceph_osd_args || return 1 + + create_pool $poolname $PGS $PGS || return 1 + wait_for_clean || return 1 + + local osd=0 + add_something $dir $poolname $objname noscrub || return 1 + local primary=$(get_primary $poolname $objname) + local pg=$(get_pg $poolname $objname) + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + local otherpgs + for i in $(seq 0 $(expr $PGS - 1)) + do + opg="${poolid}.${i}" + if [ "$opg" = "$pg" ]; then + continue + fi + otherpgs="${otherpgs}${opg} " + local other_last_scrub=$(get_last_scrub_stamp $pg) + # Fake a schedule scrub + ceph tell $opg scrub $opg || return 1 + done + + sleep 15 + flush_pg_stats + + # Request a regular scrub and it will be done + local last_scrub=$(get_last_scrub_stamp $pg) + ceph pg scrub $pg + + ceph osd unset noscrub || return 1 + ceph osd unset nodeep-scrub || return 1 + + wait_for_scrub $pg "$last_scrub" + + for opg in $otherpgs $pg + do + wait_for_scrub $opg "$other_last_scrub" + done + + # Verify that the requested scrub ran first + grep "log_channel.*scrub ok" $dir/osd.${primary}.log | grep -v purged_snaps | head -1 | sed 's/.*[[]DBG[]]//' | grep -q $pg || return 1 +} + + +main osd-scrub-repair "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh osd-scrub-repair.sh" +# End: diff --git a/qa/standalone/scrub/osd-scrub-snaps.sh b/qa/standalone/scrub/osd-scrub-snaps.sh new file mode 100755 index 000000000..c543b48a1 --- /dev/null +++ b/qa/standalone/scrub/osd-scrub-snaps.sh @@ -0,0 +1,1188 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +# Test development and debugging +# Set to "yes" in order to ignore diff errors and save results to update test +getjson="no" + +jqfilter='.inconsistents' +sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print ( json.dumps(ud, sort_keys=True, indent=2) )' + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7121" # git grep '\<7121\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function create_scenario() { + local dir=$1 + local poolname=$2 + local TESTDATA=$3 + local osd=$4 + + SNAP=1 + rados -p $poolname mksnap snap${SNAP} + dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP} + rados -p $poolname put obj1 $TESTDATA + rados -p $poolname put obj5 $TESTDATA + rados -p $poolname put obj3 $TESTDATA + for i in `seq 6 14` + do rados -p $poolname put obj${i} $TESTDATA + done + + SNAP=2 + rados -p $poolname mksnap snap${SNAP} + dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP} + rados -p $poolname put obj5 $TESTDATA + + SNAP=3 + rados -p $poolname mksnap snap${SNAP} + dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP} + rados -p $poolname put obj3 $TESTDATA + + SNAP=4 + rados -p $poolname mksnap snap${SNAP} + dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP} + rados -p $poolname put obj5 $TESTDATA + rados -p $poolname put obj2 $TESTDATA + + SNAP=5 + rados -p $poolname mksnap snap${SNAP} + SNAP=6 + rados -p $poolname mksnap snap${SNAP} + dd if=/dev/urandom of=$TESTDATA bs=256 count=${SNAP} + rados -p $poolname put obj5 $TESTDATA + + SNAP=7 + rados -p $poolname mksnap snap${SNAP} + + rados -p $poolname rm obj4 + rados -p $poolname rm obj16 + rados -p $poolname rm obj2 + + kill_daemons $dir TERM osd || return 1 + + # Don't need to use ceph_objectstore_tool() function because osd stopped + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj1)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" --force remove || return 1 + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":2)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove || return 1 + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":1)" + OBJ5SAVE="$JSON" + # Starts with a snapmap + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + grep SNA_ $dir/drk.log + grep "^[pm].*SNA_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1 + ceph-objectstore-tool --data-path $dir/${osd} --rmtype nosnapmap "$JSON" remove || return 1 + # Check that snapmap is stil there + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + grep SNA_ $dir/drk.log + grep "^[pm].*SNA_.*[.]1[.]obj5[.][.]$" $dir/drk.log || return 1 + rm -f $dir/drk.log + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj5 | grep \"snapid\":4)" + dd if=/dev/urandom of=$TESTDATA bs=256 count=18 + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1 + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj3)" + dd if=/dev/urandom of=$TESTDATA bs=256 count=15 + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1 + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj4 | grep \"snapid\":7)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" remove || return 1 + + # Starts with a snapmap + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + grep SNA_ $dir/drk.log + grep "^[pm].*SNA_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --op list obj16 | grep \"snapid\":7)" + ceph-objectstore-tool --data-path $dir/${osd} --rmtype snapmap "$JSON" remove || return 1 + # Check that snapmap is now removed + ceph-kvstore-tool bluestore-kv $dir/${osd} list 2> /dev/null > $dir/drk.log + grep SNA_ $dir/drk.log + ! grep "^[pm].*SNA_.*[.]7[.]obj16[.][.]$" $dir/drk.log || return 1 + rm -f $dir/drk.log + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj2)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" rm-attr snapset || return 1 + + # Create a clone which isn't in snapset and doesn't have object info + JSON="$(echo "$OBJ5SAVE" | sed s/snapid\":1/snapid\":7/)" + dd if=/dev/urandom of=$TESTDATA bs=256 count=7 + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-bytes $TESTDATA || return 1 + + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj6)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj7)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset corrupt || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj8)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset seq || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj9)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_size || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj10)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clone_overlap || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj11)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset clones || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj12)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset head || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj13)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset snaps || return 1 + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj14)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" clear-snapset size || return 1 + + echo "garbage" > $dir/bad + JSON="$(ceph-objectstore-tool --data-path $dir/${osd} --head --op list obj15)" + ceph-objectstore-tool --data-path $dir/${osd} "$JSON" set-attr snapset $dir/bad || return 1 + rm -f $dir/bad + return 0 +} + +function TEST_scrub_snaps() { + local dir=$1 + local poolname=test + local OBJS=16 + local OSDS=1 + + TESTDATA="testdata.$$" + + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # All scrubs done manually. Don't want any unexpected scheduled scrubs. + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $OBJS` + do + rados -p $poolname put obj${i} $TESTDATA + done + + local primary=$(get_primary $poolname obj1) + + create_scenario $dir $poolname $TESTDATA $primary || return 1 + + rm -f $TESTDATA + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + activate_osd $dir $osd || return 1 + done + ceph tell osd.* config set osd_shallow_scrub_chunk_max 25 + ceph tell osd.* config set osd_shallow_scrub_chunk_min 5 + ceph tell osd.* config set osd_pg_stat_report_interval_max 1 + + + wait_for_clean || return 1 + + ceph tell osd.* config get osd_shallow_scrub_chunk_max + ceph tell osd.* config get osd_shallow_scrub_chunk_min + ceph tell osd.* config get osd_pg_stat_report_interval_max + ceph tell osd.* config get osd_scrub_chunk_max + ceph tell osd.* config get osd_scrub_chunk_min + + local pgid="${poolid}.0" + if ! pg_scrub "$pgid" ; then + return 1 + fi + + test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" = "2" || return 1 + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pgid || return 1 + + rados list-inconsistent-obj $pgid > $dir/json || return 1 + + # The injected snapshot errors with a single copy pool doesn't + # see object errors because all the issues are detected by + # comparing copies. + jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "epoch": 17, + "inconsistents": [] +} +EOF + + jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + + rados list-inconsistent-snapset $pgid > $dir/json || return 1 + + jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "inconsistents": [ + { + "errors": [ + "headless" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj1" + }, + { + "errors": [ + "size_mismatch" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj10" + }, + { + "errors": [ + "headless" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj11" + }, + { + "errors": [ + "size_mismatch" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj14" + }, + { + "errors": [ + "headless" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj6" + }, + { + "errors": [ + "headless" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj7" + }, + { + "errors": [ + "size_mismatch" + ], + "snap": 1, + "locator": "", + "nspace": "", + "name": "obj9" + }, + { + "errors": [ + "headless" + ], + "snap": 4, + "locator": "", + "nspace": "", + "name": "obj2" + }, + { + "errors": [ + "size_mismatch" + ], + "snap": 4, + "locator": "", + "nspace": "", + "name": "obj5" + }, + { + "errors": [ + "headless" + ], + "snap": 7, + "locator": "", + "nspace": "", + "name": "obj2" + }, + { + "errors": [ + "info_missing", + "headless" + ], + "snap": 7, + "locator": "", + "nspace": "", + "name": "obj5" + }, + { + "name": "obj10", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "????", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + }, + { + "extra clones": [ + 1 + ], + "errors": [ + "extra_clones" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj11", + "snapset": { + "seq": 1, + "clones": [] + } + }, + { + "name": "obj14", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": 1033, + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + }, + { + "errors": [ + "snapset_corrupted" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj15" + }, + { + "extra clones": [ + 7, + 4 + ], + "errors": [ + "snapset_missing", + "extra_clones" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj2" + }, + { + "errors": [ + "size_mismatch" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj3", + "snapset": { + "seq": 3, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "[]", + "snaps": [ + 1 + ] + }, + { + "snap": 3, + "size": 256, + "overlap": "[]", + "snaps": [ + 3, + 2 + ] + } + ] + } + }, + { + "missing": [ + 7 + ], + "errors": [ + "clone_missing" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj4", + "snapset": { + "seq": 7, + "clones": [ + { + "snap": 7, + "size": 1032, + "overlap": "[]", + "snaps": [ + 7, + 6, + 5, + 4, + 3, + 2, + 1 + ] + } + ] + } + }, + { + "missing": [ + 2, + 1 + ], + "extra clones": [ + 7 + ], + "errors": [ + "extra_clones", + "clone_missing" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj5", + "snapset": { + "seq": 6, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "[]", + "snaps": [ + 1 + ] + }, + { + "snap": 2, + "size": 256, + "overlap": "[]", + "snaps": [ + 2 + ] + }, + { + "snap": 4, + "size": 512, + "overlap": "[]", + "snaps": [ + 4, + 3 + ] + }, + { + "snap": 6, + "size": 1024, + "overlap": "[]", + "snaps": [ + 6, + 5 + ] + } + ] + } + }, + { + "extra clones": [ + 1 + ], + "errors": [ + "extra_clones" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj6", + "snapset": { + "seq": 1, + "clones": [] + } + }, + { + "extra clones": [ + 1 + ], + "errors": [ + "extra_clones" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj7", + "snapset": { + "seq": 0, + "clones": [] + } + }, + { + "errors": [ + "snapset_error" + ], + "snap": "head", + "locator": "", + "nspace": "", + "name": "obj8", + "snapset": { + "seq": 0, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + } + }, + { + "name": "obj9", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": "????", + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + } + ], + "epoch": 20 +} +EOF + + jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save1.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1 + fi + + pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid') + pids="" + for pidfile in ${pidfiles} + do + pids+="$(cat $pidfile) " + done + + ERRORS=0 + + for i in `seq 1 7` + do + rados -p $poolname rmsnap snap$i + done + sleep 5 + local -i loop=0 + while ceph pg dump pgs | grep -q snaptrim; + do + if ceph pg dump pgs | grep -q snaptrim_error; + then + break + fi + sleep 2 + loop+=1 + if (( $loop >= 10 )) ; then + ERRORS=$(expr $ERRORS + 1) + break + fi + done + ceph pg dump pgs + + for pid in $pids + do + if ! kill -0 $pid + then + echo "OSD Crash occurred" + ERRORS=$(expr $ERRORS + 1) + fi + done + + kill_daemons $dir || return 1 + + declare -a err_strings + err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj10:.* : is missing in clone_overlap" + err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : no '_' attr" + err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:7 : is an unexpected clone" + err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*::obj5:4 : on disk size [(]4608[)] does not match object info size [(]512[)] adjusted for ondisk to [(]512[)]" + err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:2" + err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj5:head : expected clone .*:::obj5:1" + err_strings[6]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj5:head : 2 missing clone[(]s[)]" + err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj8:head : snaps.seq not set" + err_strings[8]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj7:1 : is an unexpected clone" + err_strings[9]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj3:head : on disk size [(]3840[)] does not match object info size [(]768[)] adjusted for ondisk to [(]768[)]" + err_strings[10]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj6:1 : is an unexpected clone" + err_strings[11]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:head : no 'snapset' attr" + err_strings[12]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:7 : clone ignored due to missing snapset" + err_strings[13]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj2:4 : clone ignored due to missing snapset" + err_strings[14]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj4:head : expected clone .*:::obj4:7" + err_strings[15]="log_channel[(]cluster[)] log [[]INF[]] : scrub [0-9]*[.]0 .*:::obj4:head : 1 missing clone[(]s[)]" + err_strings[16]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj1:1 : is an unexpected clone" + err_strings[17]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj9:1 : is missing in clone_size" + err_strings[18]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj11:1 : is an unexpected clone" + err_strings[19]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj14:1 : size 1032 != clone_size 1033" + err_strings[20]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub 20 errors" + err_strings[21]="log_channel[(]cluster[)] log [[]ERR[]] : scrub [0-9]*[.]0 .*:::obj15:head : can't decode 'snapset' attr " + err_strings[22]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: {1, 2, 3, 4, 5, 6, 7} ...repaired" + + for err_string in "${err_strings[@]}" + do + if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null; + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + echo "TEST PASSED" + return 0 +} + +function _scrub_snaps_multi() { + local dir=$1 + local poolname=test + local OBJS=16 + local OSDS=2 + local which=$2 + + TESTDATA="testdata.$$" + + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # All scrubs done manually. Don't want any unexpected scheduled scrubs. + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $OBJS` + do + rados -p $poolname put obj${i} $TESTDATA + done + + local primary=$(get_primary $poolname obj1) + local replica=$(get_not_primary $poolname obj1) + + eval create_scenario $dir $poolname $TESTDATA \$$which || return 1 + + rm -f $TESTDATA + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + activate_osd $dir $osd || return 1 + done + + ceph tell osd.* config set osd_shallow_scrub_chunk_max 3 + ceph tell osd.* config set osd_shallow_scrub_chunk_min 3 + ceph tell osd.* config set osd_scrub_chunk_min 3 + ceph tell osd.* config set osd_pg_stat_report_interval_max 1 + wait_for_clean || return 1 + + local pgid="${poolid}.0" + if ! pg_scrub "$pgid" ; then + return 1 + fi + + test "$(grep "_scan_snaps start" $dir/osd.${primary}.log | wc -l)" -gt "3" || return 1 + test "$(grep "_scan_snaps start" $dir/osd.${replica}.log | wc -l)" -gt "3" || return 1 + + rados list-inconsistent-pg $poolname > $dir/json || return 1 + # Check pg count + test $(jq '. | length' $dir/json) = "1" || return 1 + # Check pgid + test $(jq -r '.[0]' $dir/json) = $pgid || return 1 + + rados list-inconsistent-obj $pgid --format=json-pretty + + rados list-inconsistent-snapset $pgid > $dir/json || return 1 + + # Since all of the snapshots on the primary is consistent there are no errors here + if [ $which = "replica" ]; + then + scruberrors="20" + jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "epoch": 23, + "inconsistents": [] +} +EOF + +else + scruberrors="30" + jq "$jqfilter" << EOF | python3 -c "$sortkeys" > $dir/checkcsjson +{ + "epoch": 23, + "inconsistents": [ + { + "name": "obj10", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "size_mismatch" + ] + }, + { + "name": "obj11", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "headless" + ] + }, + { + "name": "obj14", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "size_mismatch" + ] + }, + { + "name": "obj6", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "headless" + ] + }, + { + "name": "obj7", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "headless" + ] + }, + { + "name": "obj9", + "nspace": "", + "locator": "", + "snap": 1, + "errors": [ + "size_mismatch" + ] + }, + { + "name": "obj5", + "nspace": "", + "locator": "", + "snap": 7, + "errors": [ + "info_missing", + "headless" + ] + }, + { + "name": "obj10", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "????", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + }, + { + "name": "obj11", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [] + }, + "errors": [ + "extra_clones" + ], + "extra clones": [ + 1 + ] + }, + { + "name": "obj14", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": 1033, + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + }, + { + "name": "obj5", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 6, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "[]", + "snaps": [ + 1 + ] + }, + { + "snap": 2, + "size": 256, + "overlap": "[]", + "snaps": [ + 2 + ] + }, + { + "snap": 4, + "size": 512, + "overlap": "[]", + "snaps": [ + 4, + 3 + ] + }, + { + "snap": 6, + "size": 1024, + "overlap": "[]", + "snaps": [ + 6, + 5 + ] + } + ] + }, + "errors": [ + "extra_clones" + ], + "extra clones": [ + 7 + ] + }, + { + "name": "obj6", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [] + }, + "errors": [ + "extra_clones" + ], + "extra clones": [ + 1 + ] + }, + { + "name": "obj7", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 0, + "clones": [] + }, + "errors": [ + "extra_clones" + ], + "extra clones": [ + 1 + ] + }, + { + "name": "obj8", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 0, + "clones": [ + { + "snap": 1, + "size": 1032, + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [ + "snapset_error" + ] + }, + { + "name": "obj9", + "nspace": "", + "locator": "", + "snap": "head", + "snapset": { + "seq": 1, + "clones": [ + { + "snap": 1, + "size": "????", + "overlap": "[]", + "snaps": [ + 1 + ] + } + ] + }, + "errors": [] + } + ] +} +EOF +fi + + jq "$jqfilter" $dir/json | python3 -c "$sortkeys" > $dir/csjson + multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1 + if test $getjson = "yes" + then + jq '.' $dir/json > save1.json + fi + + if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null; + then + jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-snap.json || return 1 + fi + + pidfiles=$(find $dir 2>/dev/null | grep 'osd[^/]*\.pid') + pids="" + for pidfile in ${pidfiles} + do + pids+="$(cat $pidfile) " + done + + ERRORS=0 + + # When removing snapshots with a corrupt replica, it crashes. + # See http://tracker.ceph.com/issues/23875 + if [ $which = "primary" ]; + then + for i in `seq 1 7` + do + rados -p $poolname rmsnap snap$i + done + sleep 5 + local -i loop=0 + while ceph pg dump pgs | grep -q snaptrim; + do + if ceph pg dump pgs | grep -q snaptrim_error; + then + break + fi + sleep 2 + loop+=1 + if (( $loop >= 10 )) ; then + ERRORS=$(expr $ERRORS + 1) + break + fi + done + fi + ceph pg dump pgs + + for pid in $pids + do + if ! kill -0 $pid + then + echo "OSD Crash occurred" + ERRORS=$(expr $ERRORS + 1) + fi + done + + kill_daemons $dir || return 1 + + declare -a err_strings + err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj4:7 : missing" + err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj3:head : size 3840 != size 768 from auth oi" + err_strings[2]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:1 : missing" + err_strings[3]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj5:2 : missing" + err_strings[4]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] soid .*:::obj5:4 : size 4608 != size 512 from auth oi" + err_strings[5]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 soid .*:::obj5:7 : failed to pick suitable object info" + err_strings[6]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 shard [0-1] .*:::obj1:head : missing" + err_strings[7]="log_channel[(]cluster[)] log [[]ERR[]] : [0-9]*[.]0 scrub ${scruberrors} errors" + + for err_string in "${err_strings[@]}" + do + if ! grep "$err_string" $dir/osd.${primary}.log > /dev/null; + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + # Check replica specific messages + declare -a rep_err_strings + osd=$(eval echo \$$which) + rep_err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : osd[.][0-9]* found snap mapper error on pg 1.0 oid 1:461f8b5e:::obj16:7 snaps missing in mapper, should be: {1, 2, 3, 4, 5, 6, 7} ...repaired" + for err_string in "${rep_err_strings[@]}" + do + if ! grep "$err_string" $dir/osd.${osd}.log > /dev/null; + then + echo "Missing log message '$err_string'" + ERRORS=$(expr $ERRORS + 1) + fi + done + + if [ $ERRORS != "0" ]; + then + echo "TEST FAILED WITH $ERRORS ERRORS" + return 1 + fi + + echo "TEST PASSED" + return 0 +} + +function TEST_scrub_snaps_replica() { + local dir=$1 + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1" + _scrub_snaps_multi $dir replica + err=$? + CEPH_ARGS=$ORIG_ARGS + return $err +} + +function TEST_scrub_snaps_primary() { + local dir=$1 + ORIG_ARGS=$CEPH_ARGS + CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1" + _scrub_snaps_multi $dir primary + err=$? + CEPH_ARGS=$ORIG_ARGS + return $err +} + +main osd-scrub-snaps "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh osd-scrub-snaps.sh" +# End: diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh new file mode 100755 index 000000000..73f165380 --- /dev/null +++ b/qa/standalone/scrub/osd-scrub-test.sh @@ -0,0 +1,664 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2018 Red Hat <contact@redhat.com> +# +# Author: David Zafman <dzafman@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh +source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7138" # git grep '\<7138\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_scrub_test() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + + TESTDATA="testdata.$$" + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local primary=$(get_primary $poolname obj1) + local otherosd=$(get_not_primary $poolname obj1) + if [ "$otherosd" = "2" ]; + then + local anotherosd="0" + else + local anotherosd="2" + fi + + objectstore_tool $dir $anotherosd obj1 set-bytes /etc/fstab + + local pgid="${poolid}.0" + pg_deep_scrub "$pgid" || return 1 + + ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1 + test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1 + + ceph osd out $primary + wait_for_clean || return 1 + + pg_deep_scrub "$pgid" || return 1 + + test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "2" || return 1 + test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1 + ceph pg dump pgs | grep ^${pgid} | grep -q -- +inconsistent || return 1 + + ceph osd in $primary + wait_for_clean || return 1 + + repair "$pgid" || return 1 + wait_for_clean || return 1 + + # This sets up the test after we've repaired with previous primary has old value + test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "2" || return 1 + ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1 + + ceph osd out $primary + wait_for_clean || return 1 + + test "$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_scrub_errors')" = "0" || return 1 + test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "0" || return 1 + test "$(ceph pg $pgid query | jq '.peer_info[1].stats.stat_sum.num_scrub_errors')" = "0" || return 1 + ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1 +} + +# Grab year-month-day +DATESED="s/\([0-9]*-[0-9]*-[0-9]*\).*/\1/" +DATEFORMAT="%Y-%m-%d" + +function check_dump_scrubs() { + local primary=$1 + local sched_time_check="$2" + local deadline_check="$3" + + DS="$(CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) dump_scrubs)" + # use eval to drop double-quotes + eval SCHED_TIME=$(echo $DS | jq '.[0].sched_time') + test $(echo $SCHED_TIME | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $sched_time_check") || return 1 + # use eval to drop double-quotes + eval DEADLINE=$(echo $DS | jq '.[0].deadline') + test $(echo $DEADLINE | sed $DATESED) = $(date +${DATEFORMAT} -d "now + $deadline_check") || return 1 +} + +function TEST_interval_changes() { + local poolname=test + local OSDS=2 + local objects=10 + # Don't assume how internal defaults are set + local day="$(expr 24 \* 60 \* 60)" + local week="$(expr $day \* 7)" + local min_interval=$day + local max_interval=$week + local WAIT_FOR_UPDATE=15 + + TESTDATA="testdata.$$" + + # This min scrub interval results in 30 seconds backoff time + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_scrub_min_interval=$min_interval --osd_scrub_max_interval=$max_interval --osd_scrub_interval_randomize_ratio=0 || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + local poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local primary=$(get_primary $poolname obj1) + + # Check initial settings from above (min 1 day, min 1 week) + check_dump_scrubs $primary "1 day" "1 week" || return 1 + + # Change global osd_scrub_min_interval to 2 days + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_min_interval $(expr $day \* 2) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "2 days" "1 week" || return 1 + + # Change global osd_scrub_max_interval to 2 weeks + CEPH_ARGS='' ceph --admin-daemon $(get_asok_path osd.${primary}) config set osd_scrub_max_interval $(expr $week \* 2) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "2 days" "2 week" || return 1 + + # Change pool osd_scrub_min_interval to 3 days + ceph osd pool set $poolname scrub_min_interval $(expr $day \* 3) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "3 days" "2 week" || return 1 + + # Change pool osd_scrub_max_interval to 3 weeks + ceph osd pool set $poolname scrub_max_interval $(expr $week \* 3) + sleep $WAIT_FOR_UPDATE + check_dump_scrubs $primary "3 days" "3 week" || return 1 +} + +function TEST_scrub_extended_sleep() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + + TESTDATA="testdata.$$" + + DAY=$(date +%w) + # Handle wrap + if [ "$DAY" -ge "4" ]; + then + DAY="0" + fi + # Start after 2 days in case we are near midnight + DAY_START=$(expr $DAY + 2) + DAY_END=$(expr $DAY + 3) + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --osd_scrub_sleep=0 \ + --osd_scrub_extended_sleep=20 \ + --bluestore_cache_autotune=false \ + --osd_deep_scrub_randomize_ratio=0.0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_scrub_begin_week_day=$DAY_START \ + --osd_scrub_end_week_day=$DAY_END \ + || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + + # Trigger a scrub on a PG + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub=$(get_last_scrub_stamp $pgid) + ceph tell $pgid scrub || return 1 + + # Allow scrub to start extended sleep + PASSED="false" + for ((i=0; i < 15; i++)); do + if grep -q "scrub state.*, sleeping" $dir/osd.${primary}.log + then + PASSED="true" + break + fi + sleep 1 + done + + # Check that extended sleep was triggered + if [ $PASSED = "false" ]; + then + return 1 + fi + + # release scrub to run after extended sleep finishes + ceph tell osd.$primary config set osd_scrub_begin_week_day 0 + ceph tell osd.$primary config set osd_scrub_end_week_day 0 + + # Due to extended sleep, the scrub should not be done within 20 seconds + # but test up to 10 seconds and make sure it happens by 25 seconds. + count=0 + PASSED="false" + for ((i=0; i < 25; i++)); do + count=$(expr $count + 1) + if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then + # Did scrub run too soon? + if [ $count -lt "10" ]; + then + return 1 + fi + PASSED="true" + break + fi + sleep 1 + done + + # Make sure scrub eventually ran + if [ $PASSED = "false" ]; + then + return 1 + fi +} + +function _scrub_abort() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=1000 + local type=$2 + + TESTDATA="testdata.$$" + if test $type = "scrub"; + then + stopscrub="noscrub" + check="noscrub" + else + stopscrub="nodeep-scrub" + check="nodeep_scrub" + fi + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + # Set scheduler to "wpq" until there's a reliable way to query scrub + # states with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" + # overrides the scrub sleep to 0 and as a result the checks in the + # test fail. + run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \ + --osd_deep_scrub_randomize_ratio=0.0 \ + --osd_scrub_sleep=5.0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_op_queue=wpq || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local primary=$(get_primary $poolname obj1) + local pgid="${poolid}.0" + + ceph tell $pgid $type || return 1 + # deep-scrub won't start without scrub noticing + if [ "$type" = "deep_scrub" ]; + then + ceph tell $pgid scrub || return 1 + fi + + # Wait for scrubbing to start + set -o pipefail + found="no" + for i in $(seq 0 200) + do + flush_pg_stats + if ceph pg dump pgs | grep ^$pgid| grep -q "scrubbing" + then + found="yes" + #ceph pg dump pgs + break + fi + done + set +o pipefail + + if test $found = "no"; + then + echo "Scrubbing never started" + return 1 + fi + + ceph osd set $stopscrub + if [ "$type" = "deep_scrub" ]; + then + ceph osd set noscrub + fi + + # Wait for scrubbing to end + set -o pipefail + for i in $(seq 0 200) + do + flush_pg_stats + if ceph pg dump pgs | grep ^$pgid | grep -q "scrubbing" + then + continue + fi + #ceph pg dump pgs + break + done + set +o pipefail + + sleep 5 + + if ! grep "$check set, aborting" $dir/osd.${primary}.log + then + echo "Abort not seen in log" + return 1 + fi + + local last_scrub=$(get_last_scrub_stamp $pgid) + ceph config set osd "osd_scrub_sleep" "0.1" + + ceph osd unset $stopscrub + if [ "$type" = "deep_scrub" ]; + then + ceph osd unset noscrub + fi + TIMEOUT=$(($objects / 2)) + wait_for_scrub $pgid "$last_scrub" || return 1 +} + +function TEST_scrub_abort() { + local dir=$1 + _scrub_abort $dir scrub +} + +function TEST_deep_scrub_abort() { + local dir=$1 + _scrub_abort $dir deep_scrub +} + +function TEST_scrub_permit_time() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + + TESTDATA="testdata.$$" + + run_mon $dir a --osd_pool_default_size=3 || return 1 + run_mgr $dir x || return 1 + local scrub_begin_hour=$(date -d '2 hour ago' +"%H" | sed 's/^0//') + local scrub_end_hour=$(date -d '1 hour ago' +"%H" | sed 's/^0//') + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd --bluestore_cache_autotune=false \ + --osd_deep_scrub_randomize_ratio=0.0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_scrub_begin_hour=$scrub_begin_hour \ + --osd_scrub_end_hour=$scrub_end_hour || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + + # Trigger a scrub on a PG + local pgid=$(get_pg $poolname SOMETHING) + local primary=$(get_primary $poolname SOMETHING) + local last_scrub=$(get_last_scrub_stamp $pgid) + # If we don't specify an amount of time to subtract from + # current time to set last_scrub_stamp, it sets the deadline + # back by osd_max_interval which would cause the time permit checking + # to be skipped. Set back 1 day, the default scrub_min_interval. + ceph tell $pgid scrub $(( 24 * 60 * 60 )) || return 1 + + # Scrub should not run + for ((i=0; i < 30; i++)); do + if test "$(get_last_scrub_stamp $pgid)" '>' "$last_scrub" ; then + return 1 + fi + sleep 1 + done +} + +# a test to recreate the problem described in bug #52901 - setting 'noscrub' +# without explicitly preventing deep scrubs made the PG 'unscrubable'. +# Fixed by PR#43521 +function TEST_just_deep_scrubs() { + local dir=$1 + local -A cluster_conf=( + ['osds_num']="3" + ['pgs_in_pool']="4" + ['pool_name']="test" + ) + + standard_scrub_cluster $dir cluster_conf + local poolid=${cluster_conf['pool_id']} + local poolname=${cluster_conf['pool_name']} + echo "Pool: $poolname : $poolid" + + TESTDATA="testdata.$$" + local objects=15 + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + # set both 'no scrub' & 'no deep-scrub', then request a deep-scrub. + # we do not expect to see the scrub scheduled. + + ceph osd set noscrub || return 1 + ceph osd set nodeep-scrub || return 1 + sleep 6 # the 'noscrub' command takes a long time to reach the OSDs + local now_is=`date -I"ns"` + declare -A sched_data + local pgid="${poolid}.2" + + # turn on the publishing of test data in the 'scrubber' section of 'pg query' output + set_query_debug $pgid + + extract_published_sch $pgid $now_is $now_is sched_data + local saved_last_stamp=${sched_data['query_last_stamp']} + local dbg_counter_at_start=${sched_data['query_scrub_seq']} + echo "test counter @ start: $dbg_counter_at_start" + + ceph pg $pgid deep_scrub + + sleep 5 # 5s is the 'pg dump' interval + declare -A sc_data_2 + extract_published_sch $pgid $now_is $now_is sc_data_2 + echo "test counter @ should show no change: " ${sc_data_2['query_scrub_seq']} + (( ${sc_data_2['dmp_last_duration']} == 0)) || return 1 + (( ${sc_data_2['query_scrub_seq']} == $dbg_counter_at_start)) || return 1 + + # unset the 'no deep-scrub'. Deep scrubbing should start now. + ceph osd unset nodeep-scrub || return 1 + sleep 5 + declare -A expct_qry_duration=( ['query_last_duration']="0" ['query_last_duration_neg']="not0" ) + sc_data_2=() + echo "test counter @ should be higher than before the unset: " ${sc_data_2['query_scrub_seq']} + wait_any_cond $pgid 10 $saved_last_stamp expct_qry_duration "WaitingAfterScrub " sc_data_2 || return 1 +} + +function TEST_dump_scrub_schedule() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + + TESTDATA="testdata.$$" + + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + + # Set scheduler to "wpq" until there's a reliable way to query scrub states + # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the + # scrub sleep to 0 and as a result the checks in the test fail. + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_scrub_backoff_ratio=0.0 \ + --osd_op_queue=wpq \ + --osd_scrub_sleep=0.2" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd $ceph_osd_args|| return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local pgid="${poolid}.0" + local now_is=`date -I"ns"` + + # before the scrubbing starts + + # last scrub duration should be 0. The scheduling data should show + # a time in the future: + # e.g. 'periodic scrub scheduled @ 2021-10-12T20:32:43.645168+0000' + + declare -A expct_starting=( ['query_active']="false" ['query_is_future']="true" ['query_schedule']="scrub scheduled" ) + declare -A sched_data + extract_published_sch $pgid $now_is "2019-10-12T20:32:43.645168+0000" sched_data + schedule_against_expected sched_data expct_starting "initial" + (( ${sched_data['dmp_last_duration']} == 0)) || return 1 + echo "last-scrub --- " ${sched_data['query_last_scrub']} + + # + # step 1: scrub once (mainly to ensure there is no urgency to scrub) + # + + saved_last_stamp=${sched_data['query_last_stamp']} + ceph tell osd.* config set osd_scrub_sleep "0" + ceph pg deep-scrub $pgid + ceph pg scrub $pgid + + # wait for the 'last duration' entries to change. Note that the 'dump' one will need + # up to 5 seconds to sync + + sleep 5 + sched_data=() + declare -A expct_qry_duration=( ['query_last_duration']="0" ['query_last_duration_neg']="not0" ) + wait_any_cond $pgid 10 $saved_last_stamp expct_qry_duration "WaitingAfterScrub " sched_data || return 1 + # verify that 'pg dump' also shows the change in last_scrub_duration + sched_data=() + declare -A expct_dmp_duration=( ['dmp_last_duration']="0" ['dmp_last_duration_neg']="not0" ) + wait_any_cond $pgid 10 $saved_last_stamp expct_dmp_duration "WaitingAfterScrub_dmp " sched_data || return 1 + + sleep 2 + + # + # step 2: set noscrub and request a "periodic scrub". Watch for the change in the 'is the scrub + # scheduled for the future' value + # + + ceph tell osd.* config set osd_scrub_chunk_max "3" || return 1 + ceph tell osd.* config set osd_scrub_sleep "1.0" || return 1 + ceph osd set noscrub || return 1 + sleep 2 + saved_last_stamp=${sched_data['query_last_stamp']} + + ceph pg $pgid scrub + sleep 1 + sched_data=() + declare -A expct_scrub_peri_sched=( ['query_is_future']="false" ) + wait_any_cond $pgid 10 $saved_last_stamp expct_scrub_peri_sched "waitingBeingScheduled" sched_data || return 1 + + # note: the induced change in 'last_scrub_stamp' that we've caused above, is by itself not a publish-stats + # trigger. Thus it might happen that the information in 'pg dump' will not get updated here. Do not expect + # 'dmp_is_future' to follow 'query_is_future' without a good reason + ## declare -A expct_scrub_peri_sched_dmp=( ['dmp_is_future']="false" ) + ## wait_any_cond $pgid 15 $saved_last_stamp expct_scrub_peri_sched_dmp "waitingBeingScheduled" sched_data || echo "must be fixed" + + # + # step 3: allow scrubs. Watch for the conditions during the scrubbing + # + + saved_last_stamp=${sched_data['query_last_stamp']} + ceph osd unset noscrub + + declare -A cond_active=( ['query_active']="true" ) + sched_data=() + wait_any_cond $pgid 10 $saved_last_stamp cond_active "WaitingActive " sched_data || return 1 + + # check for pg-dump to show being active. But if we see 'query_active' being reset - we've just + # missed it. + declare -A cond_active_dmp=( ['dmp_state_has_scrubbing']="true" ['query_active']="false" ) + sched_data=() + wait_any_cond $pgid 10 $saved_last_stamp cond_active_dmp "WaitingActive " sched_data || return 1 +} + +function TEST_pg_dump_objects_scrubbed() { + local dir=$1 + local poolname=test + local OSDS=3 + local objects=15 + local timeout=10 + + TESTDATA="testdata.$$" + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd || return 1 + done + + # Create a pool with a single pg + create_pool $poolname 1 1 + wait_for_clean || return 1 + poolid=$(ceph osd dump | grep "^pool.*[']${poolname}[']" | awk '{ print $2 }') + + dd if=/dev/urandom of=$TESTDATA bs=1032 count=1 + for i in `seq 1 $objects` + do + rados -p $poolname put obj${i} $TESTDATA + done + rm -f $TESTDATA + + local pgid="${poolid}.0" + #Trigger a scrub on a PG + pg_scrub $pgid || return 1 + test "$(ceph pg $pgid query | jq '.info.stats.objects_scrubbed')" '=' $objects || return 1 + + teardown $dir || return 1 +} + +main osd-scrub-test "$@" + +# Local Variables: +# compile-command: "cd build ; make -j4 && \ +# ../qa/run-standalone.sh osd-scrub-test.sh" +# End: diff --git a/qa/standalone/scrub/osd-unexpected-clone.sh b/qa/standalone/scrub/osd-unexpected-clone.sh new file mode 100755 index 000000000..6895bfee6 --- /dev/null +++ b/qa/standalone/scrub/osd-unexpected-clone.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Intel <contact@intel.com.com> +# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com> +# +# Author: Xiaoxi Chen <xiaoxi.chen@intel.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + export -n CEPH_CLI_TEST_DUP_COMMAND + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_recover_unexpected() { + local dir=$1 + + run_mon $dir a || return 1 + run_mgr $dir x || return 1 + run_osd $dir 0 || return 1 + run_osd $dir 1 || return 1 + run_osd $dir 2 || return 1 + + ceph osd pool create foo 1 + rados -p foo put foo /etc/passwd + rados -p foo mksnap snap + rados -p foo put foo /etc/group + + wait_for_clean || return 1 + + local osd=$(get_primary foo foo) + + JSON=`objectstore_tool $dir $osd --op list foo | grep snapid.:1` + echo "JSON is $JSON" + rm -f $dir/_ $dir/data + objectstore_tool $dir $osd "$JSON" get-attr _ > $dir/_ || return 1 + objectstore_tool $dir $osd "$JSON" get-bytes $dir/data || return 1 + + rados -p foo rmsnap snap + + sleep 5 + + objectstore_tool $dir $osd "$JSON" set-bytes $dir/data || return 1 + objectstore_tool $dir $osd "$JSON" set-attr _ $dir/_ || return 1 + + sleep 5 + + ceph pg repair 1.0 || return 1 + + sleep 10 + + ceph log last + + # make sure osds are still up + timeout 60 ceph tell osd.0 version || return 1 + timeout 60 ceph tell osd.1 version || return 1 + timeout 60 ceph tell osd.2 version || return 1 +} + + +main osd-unexpected-clone "$@" + +# Local Variables: +# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh" +# End: diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh new file mode 100644 index 000000000..6816d71de --- /dev/null +++ b/qa/standalone/scrub/scrub-helpers.sh @@ -0,0 +1,302 @@ +#!/usr/bin/env bash +# @file scrub-helpers.sh +# @brief a collection of bash functions useful for scrub standalone tests +# + +# extract_published_sch() +# +# Use the output from both 'ceph pg dump pgs' and 'ceph pg x.x query' commands to determine +# the published scrub scheduling status of a given PG. +# +# $1: pg id +# $2: 'current' time to compare to +# $3: an additional time-point to compare to +# $4: [out] dictionary +# +function extract_published_sch() { + local pgn="$1" + local -n dict=$4 # a ref to the in/out dictionary + local current_time=$2 + local extra_time=$3 + local extr_dbg=1 # note: 3 and above leave some temp files around + + #turn off '-x' (but remember previous state) + local saved_echo_flag=${-//[^x]/} + set +x + + (( extr_dbg >= 3 )) && ceph pg dump pgs -f json-pretty >> /tmp/a_dmp$$ + (( extr_dbg >= 3 )) && ceph pg $1 query -f json-pretty >> /tmp/a_qry$$ + + from_dmp=`ceph pg dump pgs -f json-pretty | jq -r --arg pgn "$pgn" --arg extra_dt "$extra_time" --arg current_dt "$current_time" '[ + [[.pg_stats[]] | group_by(.pg_stats)][0][0] | + [.[] | + select(has("pgid") and .pgid == $pgn) | + + (.dmp_stat_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|first) else . end)) | + (.dmp_when_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|last) else "0" end)) | + + [ { + dmp_pg_state: .state, + dmp_state_has_scrubbing: (.state | test(".*scrub.*";"i")), + dmp_last_duration:.last_scrub_duration, + dmp_schedule: .dmp_stat_part, + dmp_schedule_at: .dmp_when_part, + dmp_is_future: ( .dmp_when_part > $current_dt ), + dmp_vs_date: ( .dmp_when_part > $extra_dt ), + dmp_reported_epoch: .reported_epoch, + dmp_seq: .reported_seq + }] ]][][][]'` + + (( extr_dbg >= 2 )) && echo "from pg dump pg: $from_dmp" + (( extr_dbg >= 2 )) && echo "query output:" + (( extr_dbg >= 2 )) && ceph pg $1 query -f json-pretty | awk -e '/scrubber/,/agent_state/ {print;}' + + from_qry=`ceph pg $1 query -f json-pretty | jq -r --arg extra_dt "$extra_time" --arg current_dt "$current_time" --arg spt "'" ' + . | + (.q_stat_part=((.scrubber.schedule// "-") | if test(".*@.*") then (split(" @ ")|first) else . end)) | + (.q_when_part=((.scrubber.schedule// "0") | if test(".*@.*") then (split(" @ ")|last) else "0" end)) | + (.q_when_is_future=(.q_when_part > $current_dt)) | + (.q_vs_date=(.q_when_part > $extra_dt)) | + { + query_epoch: .epoch, + query_seq: .info.stats.reported_seq, + query_active: (.scrubber | if has("active") then .active else "bug" end), + query_schedule: .q_stat_part, + query_schedule_at: .q_when_part, + query_last_duration: .info.stats.last_scrub_duration, + query_last_stamp: .info.history.last_scrub_stamp, + query_last_scrub: (.info.history.last_scrub| sub($spt;"x") ), + query_is_future: .q_when_is_future, + query_vs_date: .q_vs_date, + query_scrub_seq: .scrubber.test_sequence + } + '` + (( extr_dbg >= 1 )) && echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"' + + # note that using a ref to an associative array directly is tricky. Instead - we are copying: + local -A dict_src=`echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"'` + dict=() + for k in "${!dict_src[@]}"; do dict[$k]=${dict_src[$k]}; done + + if [[ -n "$saved_echo_flag" ]]; then set -x; fi +} + +# query the PG, until any of the conditions in the 'expected' array are met +# +# A condition may be negated by an additional entry in the 'expected' array. Its +# form should be: +# key: the original key, with a "_neg" suffix; +# Value: not checked +# +# $1: pg id +# $2: max retries +# $3: a date to use in comparisons +# $4: set of K/V conditions +# $5: debug message +# $6: [out] the results array +function wait_any_cond() { + local pgid="$1" + local retries=$2 + local cmp_date=$3 + local -n ep=$4 + local -n out_array=$6 + local -A sc_data + local extr_dbg=2 + + #turn off '-x' (but remember previous state) + local saved_echo_flag=${-//[^x]/} + set +x + + local now_is=`date -I"ns"` + (( extr_dbg >= 2 )) && echo "waiting for any condition ($5): pg:$pgid dt:$cmp_date ($retries retries)" + + for i in $(seq 1 $retries) + do + sleep 0.5 + extract_published_sch $pgid $now_is $cmp_date sc_data + (( extr_dbg >= 4 )) && echo "${sc_data['dmp_last_duration']}" + (( extr_dbg >= 4 )) && echo "----> loop: $i ~ ${sc_data['dmp_last_duration']} / " ${sc_data['query_vs_date']} " / ${sc_data['dmp_is_future']}" + (( extr_dbg >= 2 )) && echo "--> loop: $i ~ ${sc_data['query_active']} / ${sc_data['query_seq']} / ${sc_data['dmp_seq']} " \ + "/ ${sc_data['query_is_future']} / ${sc_data['query_last_stamp']} / ${sc_data['query_schedule']} %%% ${!ep[@]}" + + # perform schedule_against_expected(), but with slightly different out-messages behaviour + for k_ref in "${!ep[@]}" + do + (( extr_dbg >= 3 )) && echo "key is $k_ref" + # is this a real key, or just a negation flag for another key?? + [[ $k_ref =~ "_neg" ]] && continue + + local act_val=${sc_data[$k_ref]} + local exp_val=${ep[$k_ref]} + + # possible negation? look for a matching key + local neg_key="${k_ref}_neg" + (( extr_dbg >= 3 )) && echo "neg-key is $neg_key" + if [ -v 'ep[$neg_key]' ]; then + is_neg=1 + else + is_neg=0 + fi + + (( extr_dbg >= 1 )) && echo "key is $k_ref: negation:$is_neg # expected: $exp_val # in actual: $act_val" + is_eq=0 + [[ $exp_val == $act_val ]] && is_eq=1 + if (($is_eq ^ $is_neg)) + then + echo "$5 - '$k_ref' actual value ($act_val) matches expected ($exp_val) (negation: $is_neg)" + for k in "${!sc_data[@]}"; do out_array[$k]=${sc_data[$k]}; done + if [[ -n "$saved_echo_flag" ]]; then set -x; fi + return 0 + fi + done + done + + echo "$5: wait_any_cond(): failure. Note: query-active=${sc_data['query_active']}" + if [[ -n "$saved_echo_flag" ]]; then set -x; fi + return 1 +} + + +# schedule_against_expected() +# +# Compare the scrub scheduling state collected by extract_published_sch() to a set of expected values. +# All values are expected to match. +# +# $1: the published scheduling state +# $2: a set of conditions to verify +# $3: text to be echoed for a failed match +# +function schedule_against_expected() { + local -n dict=$1 # a ref to the published state + local -n ep=$2 # the expected results + local extr_dbg=1 + + # turn off '-x' (but remember previous state) + local saved_echo_flag=${-//[^x]/} + set +x + + (( extr_dbg >= 1 )) && echo "-- - comparing:" + for k_ref in "${!ep[@]}" + do + local act_val=${dict[$k_ref]} + local exp_val=${ep[$k_ref]} + (( extr_dbg >= 1 )) && echo "key is " $k_ref " expected: " $exp_val " in actual: " $act_val + if [[ $exp_val != $act_val ]] + then + echo "$3 - '$k_ref' actual value ($act_val) differs from expected ($exp_val)" + echo '####################################################^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^' + + if [[ -n "$saved_echo_flag" ]]; then set -x; fi + return 1 + fi + done + + if [[ -n "$saved_echo_flag" ]]; then set -x; fi + return 0 +} + + +# Start the cluster "nodes" and create a pool for testing. +# +# The OSDs are started with a set of parameters aimed in creating a repeatable +# and stable scrub sequence: +# - no scrub randomizations/backoffs +# - no autoscaler +# +# $1: the test directory +# $2: [in/out] an array of configuration values +# +# The function adds/updates the configuration dictionary with the name of the +# pool created, and its ID. +# +# Argument 2 might look like this: +# +# declare -A test_conf=( +# ['osds_num']="3" +# ['pgs_in_pool']="7" +# ['extras']="--extra1 --extra2" +# ['pool_name']="testpl" +# ) +function standard_scrub_cluster() { + local dir=$1 + local -n args=$2 + + local OSDS=${args['osds_num']:-"3"} + local pg_num=${args['pgs_in_pool']:-"8"} + local poolname="${args['pool_name']:-test}" + args['pool_name']=$poolname + local extra_pars=${args['extras']} + local debug_msg=${args['msg']:-"dbg"} + + # turn off '-x' (but remember previous state) + local saved_echo_flag=${-//[^x]/} + set +x + + run_mon $dir a --osd_pool_default_size=$OSDS || return 1 + run_mgr $dir x || return 1 + + local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_scrub_backoff_ratio=0.0 \ + --osd_pool_default_pg_autoscale_mode=off \ + --osd_pg_stat_report_interval_max=1 \ + $extra_pars" + + for osd in $(seq 0 $(expr $OSDS - 1)) + do + run_osd $dir $osd $(echo $ceph_osd_args) || return 1 + done + + create_pool $poolname $pg_num $pg_num + wait_for_clean || return 1 + + # update the in/out 'args' with the ID of the new pool + sleep 1 + name_n_id=`ceph osd dump | awk '/^pool.*'$poolname'/ { gsub(/'"'"'/," ",$3); print $3," ", $2}'` + echo "standard_scrub_cluster: $debug_msg: test pool is $name_n_id" + args['pool_id']="${name_n_id##* }" + args['osd_args']=$ceph_osd_args + if [[ -n "$saved_echo_flag" ]]; then set -x; fi +} + + +# Start the cluster "nodes" and create a pool for testing - wpq version. +# +# A variant of standard_scrub_cluster() that selects the wpq scheduler and sets a value to +# osd_scrub_sleep. To be used when the test is attempting to "catch" the scrubber during an +# ongoing scrub. +# +# See standard_scrub_cluster() for more details. +# +# $1: the test directory +# $2: [in/out] an array of configuration values +# $3: osd_scrub_sleep +# +# The function adds/updates the configuration dictionary with the name of the +# pool created, and its ID. +function standard_scrub_wpq_cluster() { + local dir=$1 + local -n conf=$2 + local osd_sleep=$3 + + conf['extras']=" --osd_op_queue=wpq --osd_scrub_sleep=$osd_sleep ${conf['extras']}" + + standard_scrub_cluster $dir conf || return 1 +} + + +# A debug flag is set for the PG specified, causing the 'pg query' command to display +# an additional 'scrub sessions counter' field. +# +# $1: PG id +# +function set_query_debug() { + local pgid=$1 + local prim_osd=`ceph pg dump pgs_brief | \ + awk -v pg="^$pgid" -n -e '$0 ~ pg { print(gensub(/[^0-9]*([0-9]+).*/,"\\\\1","g",$5)); }' ` + + echo "Setting scrub debug data. Primary for $pgid is $prim_osd" + CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.$prim_osd) \ + scrubdebug $pgid set sessions +} + diff --git a/qa/standalone/special/ceph_objectstore_tool.py b/qa/standalone/special/ceph_objectstore_tool.py new file mode 100755 index 000000000..98a2c8723 --- /dev/null +++ b/qa/standalone/special/ceph_objectstore_tool.py @@ -0,0 +1,2045 @@ +#!/usr/bin/python3 + +from subprocess import call, check_output, DEVNULL + +import filecmp +import os +import subprocess +import math +import time +import sys +import re +import logging +import json +import tempfile +import platform + +logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.WARNING, + datefmt="%FT%T") + + +def wait_for_health(): + print("Wait for health_ok...", end="") + tries = 0 + while call("{path}/ceph health 2> /dev/null | grep -v 'HEALTH_OK\|HEALTH_WARN' > /dev/null".format(path=CEPH_BIN), shell=True) == 0: + tries += 1 + if tries == 150: + raise Exception("Time exceeded to go to health") + time.sleep(1) + print("DONE") + + +def get_pool_id(name, nullfd): + cmd = "{path}/ceph osd pool stats {pool}".format(pool=name, path=CEPH_BIN).split() + # pool {pool} id # .... grab the 4 field + return check_output(cmd, stderr=nullfd).decode().split()[3] + + +# return a list of unique PGS given an osd subdirectory +def get_osd_pgs(SUBDIR, ID): + PGS = [] + if ID: + endhead = re.compile("{id}.*_head$".format(id=ID)) + DIR = os.path.join(SUBDIR, "current") + PGS += [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and (ID is None or endhead.match(f))] + PGS = [re.sub("_head", "", p) for p in PGS if "_head" in p] + return PGS + + +# return a sorted list of unique PGs given a directory +def get_pgs(DIR, ID): + OSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0] + PGS = [] + for d in OSDS: + SUBDIR = os.path.join(DIR, d) + PGS += get_osd_pgs(SUBDIR, ID) + return sorted(set(PGS)) + + +# return a sorted list of PGS a subset of ALLPGS that contain objects with prefix specified +def get_objs(ALLPGS, prefix, DIR, ID): + OSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0] + PGS = [] + for d in OSDS: + DIRL2 = os.path.join(DIR, d) + SUBDIR = os.path.join(DIRL2, "current") + for p in ALLPGS: + PGDIR = p + "_head" + if not os.path.isdir(os.path.join(SUBDIR, PGDIR)): + continue + FINALDIR = os.path.join(SUBDIR, PGDIR) + # See if there are any objects there + if any(f for f in [val for _, _, fl in os.walk(FINALDIR) for val in fl] if f.startswith(prefix)): + PGS += [p] + return sorted(set(PGS)) + + +# return a sorted list of OSDS which have data from a given PG +def get_osds(PG, DIR): + ALLOSDS = [f for f in os.listdir(DIR) if os.path.isdir(os.path.join(DIR, f)) and f.find("osd") == 0] + OSDS = [] + for d in ALLOSDS: + DIRL2 = os.path.join(DIR, d) + SUBDIR = os.path.join(DIRL2, "current") + PGDIR = PG + "_head" + if not os.path.isdir(os.path.join(SUBDIR, PGDIR)): + continue + OSDS += [d] + return sorted(OSDS) + + +def get_lines(filename): + tmpfd = open(filename, "r") + line = True + lines = [] + while line: + line = tmpfd.readline().rstrip('\n') + if line: + lines += [line] + tmpfd.close() + os.unlink(filename) + return lines + + +def cat_file(level, filename): + if level < logging.getLogger().getEffectiveLevel(): + return + print("File: " + filename) + with open(filename, "r") as f: + while True: + line = f.readline().rstrip('\n') + if not line: + break + print(line) + print("<EOF>") + + +def vstart(new, opt="-o osd_pool_default_pg_autoscale_mode=off"): + print("vstarting....", end="") + NEW = new and "-n" or "-k" + call("MON=1 OSD=4 MDS=0 MGR=1 CEPH_PORT=7400 MGR_PYTHON_PATH={path}/src/pybind/mgr {path}/src/vstart.sh --filestore --short -l {new} -d {opt} > /dev/null 2>&1".format(new=NEW, opt=opt, path=CEPH_ROOT), shell=True) + print("DONE") + + +def test_failure(cmd, errmsg, tty=False): + if tty: + try: + ttyfd = open("/dev/tty", "rwb") + except Exception as e: + logging.info(str(e)) + logging.info("SKIP " + cmd) + return 0 + TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid()) + tmpfd = open(TMPFILE, "wb") + + logging.debug(cmd) + if tty: + ret = call(cmd, shell=True, stdin=ttyfd, stdout=ttyfd, stderr=tmpfd) + ttyfd.close() + else: + ret = call(cmd, shell=True, stderr=tmpfd) + tmpfd.close() + if ret == 0: + logging.error(cmd) + logging.error("Should have failed, but got exit 0") + return 1 + lines = get_lines(TMPFILE) + matched = [ l for l in lines if errmsg in l ] + if any(matched): + logging.info("Correctly failed with message \"" + matched[0] + "\"") + return 0 + else: + logging.error("Command: " + cmd ) + logging.error("Bad messages to stderr \"" + str(lines) + "\"") + logging.error("Expected \"" + errmsg + "\"") + return 1 + + +def get_nspace(num): + if num == 0: + return "" + return "ns{num}".format(num=num) + + +def verify(DATADIR, POOL, NAME_PREFIX, db): + TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid()) + ERRORS = 0 + for rawnsfile in [f for f in os.listdir(DATADIR) if f.split('-')[1].find(NAME_PREFIX) == 0]: + nsfile = rawnsfile.split("__")[0] + clone = rawnsfile.split("__")[1] + nspace = nsfile.split("-")[0] + file = nsfile.split("-")[1] + # Skip clones + if clone != "head": + continue + path = os.path.join(DATADIR, rawnsfile) + try: + os.unlink(TMPFILE) + except: + pass + cmd = "{path}/rados -p {pool} -N '{nspace}' get {file} {out}".format(pool=POOL, file=file, out=TMPFILE, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=DEVNULL, stderr=DEVNULL) + cmd = "diff -q {src} {result}".format(src=path, result=TMPFILE) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("{file} data not imported properly".format(file=file)) + ERRORS += 1 + try: + os.unlink(TMPFILE) + except: + pass + for key, val in db[nspace][file]["xattr"].items(): + cmd = "{path}/rados -p {pool} -N '{nspace}' getxattr {name} {key}".format(pool=POOL, name=file, key=key, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + getval = check_output(cmd, shell=True, stderr=DEVNULL).decode() + logging.debug("getxattr {key} {val}".format(key=key, val=getval)) + if getval != val: + logging.error("getxattr of key {key} returned wrong val: {get} instead of {orig}".format(key=key, get=getval, orig=val)) + ERRORS += 1 + continue + hdr = db[nspace][file].get("omapheader", "") + cmd = "{path}/rados -p {pool} -N '{nspace}' getomapheader {name} {file}".format(pool=POOL, name=file, nspace=nspace, file=TMPFILE, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=DEVNULL) + if ret != 0: + logging.error("rados getomapheader returned {ret}".format(ret=ret)) + ERRORS += 1 + else: + getlines = get_lines(TMPFILE) + assert(len(getlines) == 0 or len(getlines) == 1) + if len(getlines) == 0: + gethdr = "" + else: + gethdr = getlines[0] + logging.debug("header: {hdr}".format(hdr=gethdr)) + if gethdr != hdr: + logging.error("getomapheader returned wrong val: {get} instead of {orig}".format(get=gethdr, orig=hdr)) + ERRORS += 1 + for key, val in db[nspace][file]["omap"].items(): + cmd = "{path}/rados -p {pool} -N '{nspace}' getomapval {name} {key} {file}".format(pool=POOL, name=file, key=key, nspace=nspace, file=TMPFILE, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=DEVNULL) + if ret != 0: + logging.error("getomapval returned {ret}".format(ret=ret)) + ERRORS += 1 + continue + getlines = get_lines(TMPFILE) + if len(getlines) != 1: + logging.error("Bad data from getomapval {lines}".format(lines=getlines)) + ERRORS += 1 + continue + getval = getlines[0] + logging.debug("getomapval {key} {val}".format(key=key, val=getval)) + if getval != val: + logging.error("getomapval returned wrong val: {get} instead of {orig}".format(get=getval, orig=val)) + ERRORS += 1 + try: + os.unlink(TMPFILE) + except: + pass + return ERRORS + + +def check_journal(jsondict): + errors = 0 + if 'header' not in jsondict: + logging.error("Key 'header' not in dump-journal") + errors += 1 + elif 'max_size' not in jsondict['header']: + logging.error("Key 'max_size' not in dump-journal header") + errors += 1 + else: + print("\tJournal max_size = {size}".format(size=jsondict['header']['max_size'])) + if 'entries' not in jsondict: + logging.error("Key 'entries' not in dump-journal output") + errors += 1 + elif len(jsondict['entries']) == 0: + logging.info("No entries in journal found") + else: + errors += check_journal_entries(jsondict['entries']) + return errors + + +def check_journal_entries(entries): + errors = 0 + for enum in range(len(entries)): + if 'offset' not in entries[enum]: + logging.error("No 'offset' key in entry {e}".format(e=enum)) + errors += 1 + if 'seq' not in entries[enum]: + logging.error("No 'seq' key in entry {e}".format(e=enum)) + errors += 1 + if 'transactions' not in entries[enum]: + logging.error("No 'transactions' key in entry {e}".format(e=enum)) + errors += 1 + elif len(entries[enum]['transactions']) == 0: + logging.error("No transactions found in entry {e}".format(e=enum)) + errors += 1 + else: + errors += check_entry_transactions(entries[enum], enum) + return errors + + +def check_entry_transactions(entry, enum): + errors = 0 + for tnum in range(len(entry['transactions'])): + if 'trans_num' not in entry['transactions'][tnum]: + logging.error("Key 'trans_num' missing from entry {e} trans {t}".format(e=enum, t=tnum)) + errors += 1 + elif entry['transactions'][tnum]['trans_num'] != tnum: + ft = entry['transactions'][tnum]['trans_num'] + logging.error("Bad trans_num ({ft}) entry {e} trans {t}".format(ft=ft, e=enum, t=tnum)) + errors += 1 + if 'ops' not in entry['transactions'][tnum]: + logging.error("Key 'ops' missing from entry {e} trans {t}".format(e=enum, t=tnum)) + errors += 1 + else: + errors += check_transaction_ops(entry['transactions'][tnum]['ops'], enum, tnum) + return errors + + +def check_transaction_ops(ops, enum, tnum): + if len(ops) == 0: + logging.warning("No ops found in entry {e} trans {t}".format(e=enum, t=tnum)) + errors = 0 + for onum in range(len(ops)): + if 'op_num' not in ops[onum]: + logging.error("Key 'op_num' missing from entry {e} trans {t} op {o}".format(e=enum, t=tnum, o=onum)) + errors += 1 + elif ops[onum]['op_num'] != onum: + fo = ops[onum]['op_num'] + logging.error("Bad op_num ({fo}) from entry {e} trans {t} op {o}".format(fo=fo, e=enum, t=tnum, o=onum)) + errors += 1 + if 'op_name' not in ops[onum]: + logging.error("Key 'op_name' missing from entry {e} trans {t} op {o}".format(e=enum, t=tnum, o=onum)) + errors += 1 + return errors + + +def test_dump_journal(CFSD_PREFIX, osds): + ERRORS = 0 + pid = os.getpid() + TMPFILE = r"/tmp/tmp.{pid}".format(pid=pid) + + for osd in osds: + # Test --op dump-journal by loading json + cmd = (CFSD_PREFIX + "--op dump-journal --format json").format(osd=osd) + logging.debug(cmd) + tmpfd = open(TMPFILE, "wb") + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd)) + ERRORS += 1 + continue + tmpfd.close() + tmpfd = open(TMPFILE, "r") + jsondict = json.load(tmpfd) + tmpfd.close() + os.unlink(TMPFILE) + + journal_errors = check_journal(jsondict) + if journal_errors != 0: + logging.error(jsondict) + ERRORS += journal_errors + + return ERRORS + +CEPH_BUILD_DIR = os.environ.get('CEPH_BUILD_DIR') +CEPH_BIN = os.environ.get('CEPH_BIN') +CEPH_ROOT = os.environ.get('CEPH_ROOT') + +if not CEPH_BUILD_DIR: + CEPH_BUILD_DIR=os.getcwd() + os.putenv('CEPH_BUILD_DIR', CEPH_BUILD_DIR) + CEPH_BIN=os.path.join(CEPH_BUILD_DIR, 'bin') + os.putenv('CEPH_BIN', CEPH_BIN) + CEPH_ROOT=os.path.dirname(CEPH_BUILD_DIR) + os.putenv('CEPH_ROOT', CEPH_ROOT) + CEPH_LIB=os.path.join(CEPH_BUILD_DIR, 'lib') + os.putenv('CEPH_LIB', CEPH_LIB) + +try: + os.mkdir("td") +except: + pass # ok if this is already there +CEPH_DIR = os.path.join(CEPH_BUILD_DIR, os.path.join("td", "cot_dir")) +CEPH_CONF = os.path.join(CEPH_DIR, 'ceph.conf') + +def kill_daemons(): + call("{path}/init-ceph -c {conf} stop > /dev/null 2>&1".format(conf=CEPH_CONF, path=CEPH_BIN), shell=True) + + +def check_data(DATADIR, TMPFILE, OSDDIR, SPLIT_NAME): + repcount = 0 + ERRORS = 0 + for rawnsfile in [f for f in os.listdir(DATADIR) if f.split('-')[1].find(SPLIT_NAME) == 0]: + nsfile = rawnsfile.split("__")[0] + clone = rawnsfile.split("__")[1] + nspace = nsfile.split("-")[0] + file = nsfile.split("-")[1] + "__" + clone + # Skip clones + if clone != "head": + continue + path = os.path.join(DATADIR, rawnsfile) + tmpfd = open(TMPFILE, "wb") + cmd = "find {dir} -name '{file}_*_{nspace}_*'".format(dir=OSDDIR, file=file, nspace=nspace) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret: + logging.critical("INTERNAL ERROR") + return 1 + tmpfd.close() + obj_locs = get_lines(TMPFILE) + if len(obj_locs) == 0: + logging.error("Can't find imported object {name}".format(name=file)) + ERRORS += 1 + for obj_loc in obj_locs: + # For btrfs skip snap_* dirs + if re.search("/snap_[0-9]*/", obj_loc) is not None: + continue + repcount += 1 + cmd = "diff -q {src} {obj_loc}".format(src=path, obj_loc=obj_loc) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("{file} data not imported properly into {obj}".format(file=file, obj=obj_loc)) + ERRORS += 1 + return ERRORS, repcount + + +def set_osd_weight(CFSD_PREFIX, osd_ids, osd_path, weight): + # change the weight of osd.0 to math.pi in the newest osdmap of given osd + osdmap_file = tempfile.NamedTemporaryFile(delete=True) + cmd = (CFSD_PREFIX + "--op get-osdmap --file {osdmap_file}").format(osd=osd_path, + osdmap_file=osdmap_file.name) + output = check_output(cmd, shell=True).decode() + epoch = int(re.findall('#(\d+)', output)[0]) + + new_crush_file = tempfile.NamedTemporaryFile(delete=True) + old_crush_file = tempfile.NamedTemporaryFile(delete=True) + ret = call("{path}/osdmaptool --export-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name, + crush_file=old_crush_file.name, path=CEPH_BIN), + stdout=DEVNULL, + stderr=DEVNULL, + shell=True) + assert(ret == 0) + + for osd_id in osd_ids: + cmd = "{path}/crushtool -i {crush_file} --reweight-item osd.{osd} {weight} -o {new_crush_file}".format(osd=osd_id, + crush_file=old_crush_file.name, + weight=weight, + new_crush_file=new_crush_file.name, path=CEPH_BIN) + ret = call(cmd, stdout=DEVNULL, shell=True) + assert(ret == 0) + old_crush_file, new_crush_file = new_crush_file, old_crush_file + + # change them back, since we don't need to preapre for another round + old_crush_file, new_crush_file = new_crush_file, old_crush_file + old_crush_file.close() + + ret = call("{path}/osdmaptool --import-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name, + crush_file=new_crush_file.name, path=CEPH_BIN), + stdout=DEVNULL, + stderr=DEVNULL, + shell=True) + assert(ret == 0) + + # Minimum test of --dry-run by using it, but not checking anything + cmd = CFSD_PREFIX + "--op set-osdmap --file {osdmap_file} --epoch {epoch} --force --dry-run" + cmd = cmd.format(osd=osd_path, osdmap_file=osdmap_file.name, epoch=epoch) + ret = call(cmd, stdout=DEVNULL, shell=True) + assert(ret == 0) + + # osdmaptool increases the epoch of the changed osdmap, so we need to force the tool + # to use use a different epoch than the one in osdmap + cmd = CFSD_PREFIX + "--op set-osdmap --file {osdmap_file} --epoch {epoch} --force" + cmd = cmd.format(osd=osd_path, osdmap_file=osdmap_file.name, epoch=epoch) + ret = call(cmd, stdout=DEVNULL, shell=True) + + return ret == 0 + +def get_osd_weights(CFSD_PREFIX, osd_ids, osd_path): + osdmap_file = tempfile.NamedTemporaryFile(delete=True) + cmd = (CFSD_PREFIX + "--op get-osdmap --file {osdmap_file}").format(osd=osd_path, + osdmap_file=osdmap_file.name) + ret = call(cmd, stdout=DEVNULL, shell=True) + if ret != 0: + return None + # we have to read the weights from the crush map, even we can query the weights using + # osdmaptool, but please keep in mind, they are different: + # item weights in crush map versus weight associated with each osd in osdmap + crush_file = tempfile.NamedTemporaryFile(delete=True) + ret = call("{path}/osdmaptool --export-crush {crush_file} {osdmap_file}".format(osdmap_file=osdmap_file.name, + crush_file=crush_file.name, path=CEPH_BIN), + stdout=DEVNULL, + shell=True) + assert(ret == 0) + output = check_output("{path}/crushtool --tree -i {crush_file} | tail -n {num_osd}".format(crush_file=crush_file.name, + num_osd=len(osd_ids), path=CEPH_BIN), + stderr=DEVNULL, + shell=True).decode() + weights = [] + for line in output.strip().split('\n'): + print(line) + linev = re.split('\s+', line) + if linev[0] == '': + linev.pop(0) + print('linev %s' % linev) + weights.append(float(linev[2])) + + return weights + + +def test_get_set_osdmap(CFSD_PREFIX, osd_ids, osd_paths): + print("Testing get-osdmap and set-osdmap") + errors = 0 + kill_daemons() + weight = 1 / math.e # just some magic number in [0, 1] + changed = [] + for osd_path in osd_paths: + if set_osd_weight(CFSD_PREFIX, osd_ids, osd_path, weight): + changed.append(osd_path) + else: + logging.warning("Failed to change the weights: {0}".format(osd_path)) + # i am pissed off if none of the store gets changed + if not changed: + errors += 1 + + for osd_path in changed: + weights = get_osd_weights(CFSD_PREFIX, osd_ids, osd_path) + if not weights: + errors += 1 + continue + if any(abs(w - weight) > 1e-5 for w in weights): + logging.warning("Weight is not changed: {0} != {1}".format(weights, weight)) + errors += 1 + return errors + +def test_get_set_inc_osdmap(CFSD_PREFIX, osd_path): + # incrementals are not used unless we need to build an MOSDMap to update + # OSD's peers, so an obvious way to test it is simply overwrite an epoch + # with a different copy, and read it back to see if it matches. + kill_daemons() + file_e2 = tempfile.NamedTemporaryFile(delete=True) + cmd = (CFSD_PREFIX + "--op get-inc-osdmap --file {file}").format(osd=osd_path, + file=file_e2.name) + output = check_output(cmd, shell=True).decode() + epoch = int(re.findall('#(\d+)', output)[0]) + # backup e1 incremental before overwriting it + epoch -= 1 + file_e1_backup = tempfile.NamedTemporaryFile(delete=True) + cmd = CFSD_PREFIX + "--op get-inc-osdmap --epoch {epoch} --file {file}" + ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True) + if ret: return 1 + # overwrite e1 with e2 + cmd = CFSD_PREFIX + "--op set-inc-osdmap --force --epoch {epoch} --file {file}" + ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e2.name), shell=True) + if ret: return 1 + # Use dry-run to set back to e1 which shouldn't happen + cmd = CFSD_PREFIX + "--op set-inc-osdmap --dry-run --epoch {epoch} --file {file}" + ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True) + if ret: return 1 + # read from e1 + file_e1_read = tempfile.NamedTemporaryFile(delete=True) + cmd = CFSD_PREFIX + "--op get-inc-osdmap --epoch {epoch} --file {file}" + ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_read.name), shell=True) + if ret: return 1 + errors = 0 + try: + if not filecmp.cmp(file_e2.name, file_e1_read.name, shallow=False): + logging.error("{{get,set}}-inc-osdmap mismatch {0} != {1}".format(file_e2.name, file_e1_read.name)) + errors += 1 + finally: + # revert the change with file_e1_backup + cmd = CFSD_PREFIX + "--op set-inc-osdmap --epoch {epoch} --file {file}" + ret = call(cmd.format(osd=osd_path, epoch=epoch, file=file_e1_backup.name), shell=True) + if ret: + logging.error("Failed to revert the changed inc-osdmap") + errors += 1 + + return errors + + +def test_removeall(CFSD_PREFIX, db, OBJREPPGS, REP_POOL, CEPH_BIN, OSDDIR, REP_NAME, NUM_CLONED_REP_OBJECTS): + # Test removeall + TMPFILE = r"/tmp/tmp.{pid}".format(pid=os.getpid()) + nullfd = open(os.devnull, "w") + errors=0 + print("Test removeall") + kill_daemons() + test_force_remove = 0 + for nspace in db.keys(): + for basename in db[nspace].keys(): + JSON = db[nspace][basename]['json'] + for pg in OBJREPPGS: + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + + if int(basename.split(REP_NAME)[1]) <= int(NUM_CLONED_REP_OBJECTS): + cmd = (CFSD_PREFIX + "'{json}' remove").format(osd=osd, json=JSON) + errors += test_failure(cmd, "Clones are present, use removeall to delete everything") + if not test_force_remove: + + cmd = (CFSD_PREFIX + " '{json}' set-attr snapset /dev/null").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Test set-up to corrupt snapset failed for {json}".format(json=JSON)) + errors += 1 + # Do the removeall since this test failed to set-up + else: + test_force_remove = 1 + + cmd = (CFSD_PREFIX + " '{json}' --force remove").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("forced remove with corrupt snapset failed for {json}".format(json=JSON)) + errors += 1 + continue + + cmd = (CFSD_PREFIX + " --force --dry-run '{json}' remove").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("remove with --force failed for {json}".format(json=JSON)) + errors += 1 + + cmd = (CFSD_PREFIX + " --dry-run '{json}' removeall").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("removeall failed for {json}".format(json=JSON)) + errors += 1 + + cmd = (CFSD_PREFIX + " '{json}' removeall").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("removeall failed for {json}".format(json=JSON)) + errors += 1 + + tmpfd = open(TMPFILE, "w") + cmd = (CFSD_PREFIX + "--op list --pgid {pg} --namespace {ns} {name}").format(osd=osd, pg=pg, ns=nspace, name=basename) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd)) + errors += 1 + tmpfd.close() + lines = get_lines(TMPFILE) + if len(lines) != 0: + logging.error("Removeall didn't remove all objects {ns}/{name} : {lines}".format(ns=nspace, name=basename, lines=lines)) + errors += 1 + vstart(new=False) + wait_for_health() + cmd = "{path}/rados -p {pool} rmsnap snap1".format(pool=REP_POOL, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("rados rmsnap failed") + errors += 1 + time.sleep(2) + wait_for_health() + return errors + + +def main(argv): + stdout = sys.stdout.buffer + if len(argv) > 1 and argv[1] == "debug": + nullfd = stdout + else: + nullfd = DEVNULL + + call("rm -fr {dir}; mkdir -p {dir}".format(dir=CEPH_DIR), shell=True) + os.chdir(CEPH_DIR) + os.environ["CEPH_DIR"] = CEPH_DIR + OSDDIR = "dev" + REP_POOL = "rep_pool" + REP_NAME = "REPobject" + EC_POOL = "ec_pool" + EC_NAME = "ECobject" + if len(argv) > 0 and argv[0] == 'large': + PG_COUNT = 12 + NUM_REP_OBJECTS = 200 + NUM_CLONED_REP_OBJECTS = 50 + NUM_EC_OBJECTS = 12 + NUM_NSPACES = 4 + # Larger data sets for first object per namespace + DATALINECOUNT = 50000 + # Number of objects to do xattr/omap testing on + ATTR_OBJS = 10 + else: + PG_COUNT = 4 + NUM_REP_OBJECTS = 2 + NUM_CLONED_REP_OBJECTS = 2 + NUM_EC_OBJECTS = 2 + NUM_NSPACES = 2 + # Larger data sets for first object per namespace + DATALINECOUNT = 10 + # Number of objects to do xattr/omap testing on + ATTR_OBJS = 2 + ERRORS = 0 + pid = os.getpid() + TESTDIR = "/tmp/test.{pid}".format(pid=pid) + DATADIR = "/tmp/data.{pid}".format(pid=pid) + CFSD_PREFIX = CEPH_BIN + "/ceph-objectstore-tool --no-mon-config --data-path " + OSDDIR + "/{osd} " + PROFNAME = "testecprofile" + + os.environ['CEPH_CONF'] = CEPH_CONF + vstart(new=True) + wait_for_health() + + cmd = "{path}/ceph osd pool create {pool} {pg} {pg} replicated".format(pool=REP_POOL, pg=PG_COUNT, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + time.sleep(2) + REPID = get_pool_id(REP_POOL, nullfd) + + print("Created Replicated pool #{repid}".format(repid=REPID)) + + cmd = "{path}/ceph osd erasure-code-profile set {prof} crush-failure-domain=osd".format(prof=PROFNAME, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + cmd = "{path}/ceph osd erasure-code-profile get {prof}".format(prof=PROFNAME, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + cmd = "{path}/ceph osd pool create {pool} {pg} {pg} erasure {prof}".format(pool=EC_POOL, prof=PROFNAME, pg=PG_COUNT, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + ECID = get_pool_id(EC_POOL, nullfd) + + print("Created Erasure coded pool #{ecid}".format(ecid=ECID)) + + print("Creating {objs} objects in replicated pool".format(objs=(NUM_REP_OBJECTS*NUM_NSPACES))) + cmd = "mkdir -p {datadir}".format(datadir=DATADIR) + logging.debug(cmd) + call(cmd, shell=True) + + db = {} + + objects = range(1, NUM_REP_OBJECTS + 1) + nspaces = range(NUM_NSPACES) + for n in nspaces: + nspace = get_nspace(n) + + db[nspace] = {} + + for i in objects: + NAME = REP_NAME + "{num}".format(num=i) + LNAME = nspace + "-" + NAME + DDNAME = os.path.join(DATADIR, LNAME) + DDNAME += "__head" + + cmd = "rm -f " + DDNAME + logging.debug(cmd) + call(cmd, shell=True) + + if i == 1: + dataline = range(DATALINECOUNT) + else: + dataline = range(1) + fd = open(DDNAME, "w") + data = "This is the replicated data for " + LNAME + "\n" + for _ in dataline: + fd.write(data) + fd.close() + + cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=REP_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd) + if ret != 0: + logging.critical("Rados put command failed with {ret}".format(ret=ret)) + return 1 + + db[nspace][NAME] = {} + + if i < ATTR_OBJS + 1: + keys = range(i) + else: + keys = range(0) + db[nspace][NAME]["xattr"] = {} + for k in keys: + if k == 0: + continue + mykey = "key{i}-{k}".format(i=i, k=k) + myval = "val{i}-{k}".format(i=i, k=k) + cmd = "{path}/rados -p {pool} -N '{nspace}' setxattr {name} {key} {val}".format(pool=REP_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("setxattr failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[nspace][NAME]["xattr"][mykey] = myval + + # Create omap header in all objects but REPobject1 + if i < ATTR_OBJS + 1 and i != 1: + myhdr = "hdr{i}".format(i=i) + cmd = "{path}/rados -p {pool} -N '{nspace}' setomapheader {name} {hdr}".format(pool=REP_POOL, name=NAME, hdr=myhdr, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.critical("setomapheader failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[nspace][NAME]["omapheader"] = myhdr + + db[nspace][NAME]["omap"] = {} + for k in keys: + if k == 0: + continue + mykey = "okey{i}-{k}".format(i=i, k=k) + myval = "oval{i}-{k}".format(i=i, k=k) + cmd = "{path}/rados -p {pool} -N '{nspace}' setomapval {name} {key} {val}".format(pool=REP_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.critical("setomapval failed with {ret}".format(ret=ret)) + db[nspace][NAME]["omap"][mykey] = myval + + # Create some clones + cmd = "{path}/rados -p {pool} mksnap snap1".format(pool=REP_POOL, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True) + + objects = range(1, NUM_CLONED_REP_OBJECTS + 1) + nspaces = range(NUM_NSPACES) + for n in nspaces: + nspace = get_nspace(n) + + for i in objects: + NAME = REP_NAME + "{num}".format(num=i) + LNAME = nspace + "-" + NAME + DDNAME = os.path.join(DATADIR, LNAME) + # First clone + CLONENAME = DDNAME + "__1" + DDNAME += "__head" + + cmd = "mv -f " + DDNAME + " " + CLONENAME + logging.debug(cmd) + call(cmd, shell=True) + + if i == 1: + dataline = range(DATALINECOUNT) + else: + dataline = range(1) + fd = open(DDNAME, "w") + data = "This is the replicated data after a snapshot for " + LNAME + "\n" + for _ in dataline: + fd.write(data) + fd.close() + + cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=REP_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd) + if ret != 0: + logging.critical("Rados put command failed with {ret}".format(ret=ret)) + return 1 + + print("Creating {objs} objects in erasure coded pool".format(objs=(NUM_EC_OBJECTS*NUM_NSPACES))) + + objects = range(1, NUM_EC_OBJECTS + 1) + nspaces = range(NUM_NSPACES) + for n in nspaces: + nspace = get_nspace(n) + + for i in objects: + NAME = EC_NAME + "{num}".format(num=i) + LNAME = nspace + "-" + NAME + DDNAME = os.path.join(DATADIR, LNAME) + DDNAME += "__head" + + cmd = "rm -f " + DDNAME + logging.debug(cmd) + call(cmd, shell=True) + + if i == 1: + dataline = range(DATALINECOUNT) + else: + dataline = range(1) + fd = open(DDNAME, "w") + data = "This is the erasure coded data for " + LNAME + "\n" + for j in dataline: + fd.write(data) + fd.close() + + cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=EC_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd) + if ret != 0: + logging.critical("Erasure coded pool creation failed with {ret}".format(ret=ret)) + return 1 + + db[nspace][NAME] = {} + + db[nspace][NAME]["xattr"] = {} + if i < ATTR_OBJS + 1: + keys = range(i) + else: + keys = range(0) + for k in keys: + if k == 0: + continue + mykey = "key{i}-{k}".format(i=i, k=k) + myval = "val{i}-{k}".format(i=i, k=k) + cmd = "{path}/rados -p {pool} -N '{nspace}' setxattr {name} {key} {val}".format(pool=EC_POOL, name=NAME, key=mykey, val=myval, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("setxattr failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[nspace][NAME]["xattr"][mykey] = myval + + # Omap isn't supported in EC pools + db[nspace][NAME]["omap"] = {} + + logging.debug(db) + + kill_daemons() + + if ERRORS: + logging.critical("Unable to set up test") + return 1 + + ALLREPPGS = get_pgs(OSDDIR, REPID) + logging.debug(ALLREPPGS) + ALLECPGS = get_pgs(OSDDIR, ECID) + logging.debug(ALLECPGS) + + OBJREPPGS = get_objs(ALLREPPGS, REP_NAME, OSDDIR, REPID) + logging.debug(OBJREPPGS) + OBJECPGS = get_objs(ALLECPGS, EC_NAME, OSDDIR, ECID) + logging.debug(OBJECPGS) + + ONEPG = ALLREPPGS[0] + logging.debug(ONEPG) + osds = get_osds(ONEPG, OSDDIR) + ONEOSD = osds[0] + logging.debug(ONEOSD) + + print("Test invalid parameters") + # On export can't use stdout to a terminal + cmd = (CFSD_PREFIX + "--op export --pgid {pg}").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "stdout is a tty and no --file filename specified", tty=True) + + # On export can't use stdout to a terminal + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file -").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "stdout is a tty and no --file filename specified", tty=True) + + # Prep a valid ec export file for import failure tests + ONEECPG = ALLECPGS[0] + osds = get_osds(ONEECPG, OSDDIR) + ONEECOSD = osds[0] + OTHERFILE = "/tmp/foo.{pid}".format(pid=pid) + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=ONEECOSD, pg=ONEECPG, file=OTHERFILE) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + + os.unlink(OTHERFILE) + + # Prep a valid export file for import failure tests + OTHERFILE = "/tmp/foo.{pid}".format(pid=pid) + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=ONEOSD, pg=ONEPG, file=OTHERFILE) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + + # On import can't specify a different pgid than the file + TMPPG="{pool}.80".format(pool=REPID) + cmd = (CFSD_PREFIX + "--op import --pgid 12.dd --file {file}").format(osd=ONEOSD, pg=TMPPG, file=OTHERFILE) + ERRORS += test_failure(cmd, "specified pgid 12.dd does not match actual pgid") + + os.unlink(OTHERFILE) + cmd = (CFSD_PREFIX + "--op import --file {FOO}").format(osd=ONEOSD, FOO=OTHERFILE) + ERRORS += test_failure(cmd, "file: {FOO}: No such file or directory".format(FOO=OTHERFILE)) + + cmd = "{path}/ceph-objectstore-tool --no-mon-config --data-path BAD_DATA_PATH --op list".format(path=CEPH_BIN) + ERRORS += test_failure(cmd, "data-path: BAD_DATA_PATH: No such file or directory") + + cmd = (CFSD_PREFIX + "--journal-path BAD_JOURNAL_PATH --op list").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "journal-path: BAD_JOURNAL_PATH: No such file or directory") + + cmd = (CFSD_PREFIX + "--journal-path /bin --op list").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "journal-path: /bin: (21) Is a directory") + + # On import can't use stdin from a terminal + cmd = (CFSD_PREFIX + "--op import --pgid {pg}").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "stdin is a tty and no --file filename specified", tty=True) + + # On import can't use stdin from a terminal + cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file -").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "stdin is a tty and no --file filename specified", tty=True) + + # Specify a bad --type + os.mkdir(OSDDIR + "/fakeosd") + cmd = ("{path}/ceph-objectstore-tool --no-mon-config --data-path " + OSDDIR + "/{osd} --type foobar --op list --pgid {pg}").format(osd="fakeosd", pg=ONEPG, path=CEPH_BIN) + ERRORS += test_failure(cmd, "Unable to create store of type foobar") + + # Don't specify a data-path + cmd = "{path}/ceph-objectstore-tool --no-mon-config --type memstore --op list --pgid {pg}".format(pg=ONEPG, path=CEPH_BIN) + ERRORS += test_failure(cmd, "Must provide --data-path") + + cmd = (CFSD_PREFIX + "--op remove --pgid 2.0").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "Please use export-remove or you must use --force option") + + cmd = (CFSD_PREFIX + "--force --op remove").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "Must provide pgid") + + # Don't secify a --op nor object command + cmd = CFSD_PREFIX.format(osd=ONEOSD) + ERRORS += test_failure(cmd, "Must provide --op or object command...") + + # Specify a bad --op command + cmd = (CFSD_PREFIX + "--op oops").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, statfs)") + + # Provide just the object param not a command + cmd = (CFSD_PREFIX + "object").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "Invalid syntax, missing command") + + # Provide an object name that doesn't exist + cmd = (CFSD_PREFIX + "NON_OBJECT get-bytes").format(osd=ONEOSD) + ERRORS += test_failure(cmd, "No object id 'NON_OBJECT' found") + + # Provide an invalid object command + cmd = (CFSD_PREFIX + "--pgid {pg} '' notacommand").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Unknown object command 'notacommand'") + + cmd = (CFSD_PREFIX + "foo list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "No object id 'foo' found or invalid JSON specified") + + cmd = (CFSD_PREFIX + "'{{\"oid\":\"obj4\",\"key\":\"\",\"snapid\":-1,\"hash\":2826278768,\"max\":0,\"pool\":1,\"namespace\":\"\"}}' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Without --pgid the object '{\"oid\":\"obj4\",\"key\":\"\",\"snapid\":-1,\"hash\":2826278768,\"max\":0,\"pool\":1,\"namespace\":\"\"}' must be a JSON array") + + cmd = (CFSD_PREFIX + "'[]' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Object '[]' must be a JSON array with 2 elements") + + cmd = (CFSD_PREFIX + "'[\"1.0\"]' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Object '[\"1.0\"]' must be a JSON array with 2 elements") + + cmd = (CFSD_PREFIX + "'[\"1.0\", 5, 8, 9]' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Object '[\"1.0\", 5, 8, 9]' must be a JSON array with 2 elements") + + cmd = (CFSD_PREFIX + "'[1, 2]' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Object '[1, 2]' must be a JSON array with the first element a string") + + cmd = (CFSD_PREFIX + "'[\"1.3\",{{\"snapid\":\"not an int\"}}]' list-omap").format(osd=ONEOSD, pg=ONEPG) + ERRORS += test_failure(cmd, "Decode object JSON error: value type is 2 not 4") + + TMPFILE = r"/tmp/tmp.{pid}".format(pid=pid) + ALLPGS = OBJREPPGS + OBJECPGS + OSDS = get_osds(ALLPGS[0], OSDDIR) + osd = OSDS[0] + + print("Test all --op dump-journal") + ALLOSDS = [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0] + ERRORS += test_dump_journal(CFSD_PREFIX, ALLOSDS) + + # Test --op list and generate json for all objects + print("Test --op list variants") + + # retrieve all objects from all PGs + tmpfd = open(TMPFILE, "wb") + cmd = (CFSD_PREFIX + "--op list --format json").format(osd=osd) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd)) + ERRORS += 1 + tmpfd.close() + lines = get_lines(TMPFILE) + JSONOBJ = sorted(set(lines)) + (pgid, coll, jsondict) = json.loads(JSONOBJ[0])[0] + + # retrieve all objects in a given PG + tmpfd = open(OTHERFILE, "ab") + cmd = (CFSD_PREFIX + "--op list --pgid {pg} --format json").format(osd=osd, pg=pgid) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd)) + ERRORS += 1 + tmpfd.close() + lines = get_lines(OTHERFILE) + JSONOBJ = sorted(set(lines)) + (other_pgid, other_coll, other_jsondict) = json.loads(JSONOBJ[0])[0] + + if pgid != other_pgid or jsondict != other_jsondict or coll != other_coll: + logging.error("the first line of --op list is different " + "from the first line of --op list --pgid {pg}".format(pg=pgid)) + ERRORS += 1 + + # retrieve all objects with a given name in a given PG + tmpfd = open(OTHERFILE, "wb") + cmd = (CFSD_PREFIX + "--op list --pgid {pg} {object} --format json").format(osd=osd, pg=pgid, object=jsondict['oid']) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from {cmd}".format(ret=ret, cmd=cmd)) + ERRORS += 1 + tmpfd.close() + lines = get_lines(OTHERFILE) + JSONOBJ = sorted(set(lines)) + (other_pgid, other_coll, other_jsondict) in json.loads(JSONOBJ[0])[0] + + if pgid != other_pgid or jsondict != other_jsondict or coll != other_coll: + logging.error("the first line of --op list is different " + "from the first line of --op list --pgid {pg} {object}".format(pg=pgid, object=jsondict['oid'])) + ERRORS += 1 + + print("Test --op list by generating json for all objects using default format") + for pg in ALLPGS: + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + tmpfd = open(TMPFILE, "ab") + cmd = (CFSD_PREFIX + "--op list --pgid {pg}").format(osd=osd, pg=pg) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from --op list request".format(ret=ret)) + ERRORS += 1 + + tmpfd.close() + lines = get_lines(TMPFILE) + JSONOBJ = sorted(set(lines)) + for JSON in JSONOBJ: + (pgid, jsondict) = json.loads(JSON) + # Skip clones for now + if jsondict['snapid'] != -2: + continue + db[jsondict['namespace']][jsondict['oid']]['json'] = json.dumps((pgid, jsondict)) + # print db[jsondict['namespace']][jsondict['oid']]['json'] + if jsondict['oid'].find(EC_NAME) == 0 and 'shard_id' not in jsondict: + logging.error("Malformed JSON {json}".format(json=JSON)) + ERRORS += 1 + + # Test get-bytes + print("Test get-bytes and set-bytes") + for nspace in db.keys(): + for basename in db[nspace].keys(): + file = os.path.join(DATADIR, nspace + "-" + basename + "__head") + JSON = db[nspace][basename]['json'] + GETNAME = "/tmp/getbytes.{pid}".format(pid=pid) + TESTNAME = "/tmp/testbytes.{pid}".format(pid=pid) + SETNAME = "/tmp/setbytes.{pid}".format(pid=pid) + BADNAME = "/tmp/badbytes.{pid}".format(pid=pid) + for pg in OBJREPPGS: + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + try: + os.unlink(GETNAME) + except: + pass + cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-bytes {fname}").format(osd=osd, pg=pg, json=JSON, fname=GETNAME) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret}".format(ret=ret)) + ERRORS += 1 + continue + cmd = "diff -q {file} {getfile}".format(file=file, getfile=GETNAME) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Data from get-bytes differ") + logging.debug("Got:") + cat_file(logging.DEBUG, GETNAME) + logging.debug("Expected:") + cat_file(logging.DEBUG, file) + ERRORS += 1 + fd = open(SETNAME, "w") + data = "put-bytes going into {file}\n".format(file=file) + fd.write(data) + fd.close() + cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' set-bytes {sname}").format(osd=osd, pg=pg, json=JSON, sname=SETNAME) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-bytes".format(ret=ret)) + ERRORS += 1 + fd = open(TESTNAME, "wb") + cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' get-bytes -").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=fd) + fd.close() + if ret != 0: + logging.error("Bad exit status {ret} from get-bytes".format(ret=ret)) + ERRORS += 1 + cmd = "diff -q {setfile} {testfile}".format(setfile=SETNAME, testfile=TESTNAME) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Data after set-bytes differ") + logging.debug("Got:") + cat_file(logging.DEBUG, TESTNAME) + logging.debug("Expected:") + cat_file(logging.DEBUG, SETNAME) + ERRORS += 1 + + # Use set-bytes with --dry-run and make sure contents haven't changed + fd = open(BADNAME, "w") + data = "Bad data for --dry-run in {file}\n".format(file=file) + fd.write(data) + fd.close() + cmd = (CFSD_PREFIX + "--dry-run --pgid {pg} '{json}' set-bytes {sname}").format(osd=osd, pg=pg, json=JSON, sname=BADNAME) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from set-bytes --dry-run".format(ret=ret)) + ERRORS += 1 + fd = open(TESTNAME, "wb") + cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' get-bytes -").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=fd) + fd.close() + if ret != 0: + logging.error("Bad exit status {ret} from get-bytes".format(ret=ret)) + ERRORS += 1 + cmd = "diff -q {setfile} {testfile}".format(setfile=SETNAME, testfile=TESTNAME) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Data after set-bytes --dry-run changed!") + logging.debug("Got:") + cat_file(logging.DEBUG, TESTNAME) + logging.debug("Expected:") + cat_file(logging.DEBUG, SETNAME) + ERRORS += 1 + + fd = open(file, "rb") + cmd = (CFSD_PREFIX + "--pgid {pg} '{json}' set-bytes").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdin=fd) + if ret != 0: + logging.error("Bad exit status {ret} from set-bytes to restore object".format(ret=ret)) + ERRORS += 1 + fd.close() + + try: + os.unlink(GETNAME) + except: + pass + try: + os.unlink(TESTNAME) + except: + pass + try: + os.unlink(SETNAME) + except: + pass + try: + os.unlink(BADNAME) + except: + pass + + # Test get-attr, set-attr, rm-attr, get-omaphdr, set-omaphdr, get-omap, set-omap, rm-omap + print("Test get-attr, set-attr, rm-attr, get-omaphdr, set-omaphdr, get-omap, set-omap, rm-omap") + for nspace in db.keys(): + for basename in db[nspace].keys(): + file = os.path.join(DATADIR, nspace + "-" + basename + "__head") + JSON = db[nspace][basename]['json'] + for pg in OBJREPPGS: + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + for key, val in db[nspace][basename]["xattr"].items(): + attrkey = "_" + key + cmd = (CFSD_PREFIX + " '{json}' get-attr {key}").format(osd=osd, json=JSON, key=attrkey) + logging.debug(cmd) + getval = check_output(cmd, shell=True).decode() + if getval != val: + logging.error("get-attr of key {key} returned wrong val: {get} instead of {orig}".format(key=attrkey, get=getval, orig=val)) + ERRORS += 1 + continue + # set-attr to bogus value "foobar" + cmd = ("echo -n foobar | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-attr".format(ret=ret)) + ERRORS += 1 + continue + # Test set-attr with dry-run + cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from set-attr".format(ret=ret)) + ERRORS += 1 + continue + # Check the set-attr + cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + getval = check_output(cmd, shell=True).decode() + if ret != 0: + logging.error("Bad exit status {ret} from get-attr".format(ret=ret)) + ERRORS += 1 + continue + if getval != "foobar": + logging.error("Check of set-attr failed because we got {val}".format(val=getval)) + ERRORS += 1 + continue + # Test rm-attr + cmd = (CFSD_PREFIX + "'{json}' rm-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from rm-attr".format(ret=ret)) + ERRORS += 1 + continue + # Check rm-attr with dry-run + cmd = (CFSD_PREFIX + "--dry-run '{json}' rm-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from rm-attr".format(ret=ret)) + ERRORS += 1 + continue + cmd = (CFSD_PREFIX + "'{json}' get-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd, stdout=nullfd) + if ret == 0: + logging.error("For rm-attr expect get-attr to fail, but it succeeded") + ERRORS += 1 + # Put back value + cmd = ("echo -n {val} | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-attr {key}").format(osd=osd, pg=pg, json=JSON, key=attrkey, val=val) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-attr".format(ret=ret)) + ERRORS += 1 + continue + + hdr = db[nspace][basename].get("omapheader", "") + cmd = (CFSD_PREFIX + "'{json}' get-omaphdr").format(osd=osd, json=JSON) + logging.debug(cmd) + gethdr = check_output(cmd, shell=True).decode() + if gethdr != hdr: + logging.error("get-omaphdr was wrong: {get} instead of {orig}".format(get=gethdr, orig=hdr)) + ERRORS += 1 + continue + # set-omaphdr to bogus value "foobar" + cmd = ("echo -n foobar | " + CFSD_PREFIX + "'{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret)) + ERRORS += 1 + continue + # Check the set-omaphdr + cmd = (CFSD_PREFIX + "'{json}' get-omaphdr").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + gethdr = check_output(cmd, shell=True).decode() + if ret != 0: + logging.error("Bad exit status {ret} from get-omaphdr".format(ret=ret)) + ERRORS += 1 + continue + if gethdr != "foobar": + logging.error("Check of set-omaphdr failed because we got {val}".format(val=getval)) + ERRORS += 1 + continue + # Test dry-run with set-omaphdr + cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run '{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret)) + ERRORS += 1 + continue + # Put back value + cmd = ("echo -n {val} | " + CFSD_PREFIX + "'{json}' set-omaphdr").format(osd=osd, pg=pg, json=JSON, val=hdr) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-omaphdr".format(ret=ret)) + ERRORS += 1 + continue + + for omapkey, val in db[nspace][basename]["omap"].items(): + cmd = (CFSD_PREFIX + " '{json}' get-omap {key}").format(osd=osd, json=JSON, key=omapkey) + logging.debug(cmd) + getval = check_output(cmd, shell=True).decode() + if getval != val: + logging.error("get-omap of key {key} returned wrong val: {get} instead of {orig}".format(key=omapkey, get=getval, orig=val)) + ERRORS += 1 + continue + # set-omap to bogus value "foobar" + cmd = ("echo -n foobar | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-omap".format(ret=ret)) + ERRORS += 1 + continue + # Check set-omap with dry-run + cmd = ("echo -n dryrunbroken | " + CFSD_PREFIX + "--dry-run --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from set-omap".format(ret=ret)) + ERRORS += 1 + continue + # Check the set-omap + cmd = (CFSD_PREFIX + " --pgid {pg} '{json}' get-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + getval = check_output(cmd, shell=True).decode() + if ret != 0: + logging.error("Bad exit status {ret} from get-omap".format(ret=ret)) + ERRORS += 1 + continue + if getval != "foobar": + logging.error("Check of set-omap failed because we got {val}".format(val=getval)) + ERRORS += 1 + continue + # Test rm-omap + cmd = (CFSD_PREFIX + "'{json}' rm-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from rm-omap".format(ret=ret)) + ERRORS += 1 + # Check rm-omap with dry-run + cmd = (CFSD_PREFIX + "--dry-run '{json}' rm-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Bad exit status {ret} from rm-omap".format(ret=ret)) + ERRORS += 1 + cmd = (CFSD_PREFIX + "'{json}' get-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd, stdout=nullfd) + if ret == 0: + logging.error("For rm-omap expect get-omap to fail, but it succeeded") + ERRORS += 1 + # Put back value + cmd = ("echo -n {val} | " + CFSD_PREFIX + " --pgid {pg} '{json}' set-omap {key}").format(osd=osd, pg=pg, json=JSON, key=omapkey, val=val) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret} from set-omap".format(ret=ret)) + ERRORS += 1 + continue + + # Test dump + print("Test dump") + for nspace in db.keys(): + for basename in db[nspace].keys(): + file = os.path.join(DATADIR, nspace + "-" + basename + "__head") + JSON = db[nspace][basename]['json'] + jsondict = json.loads(JSON) + for pg in OBJREPPGS: + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + if int(basename.split(REP_NAME)[1]) > int(NUM_CLONED_REP_OBJECTS): + continue + logging.debug("REPobject " + JSON) + cmd = (CFSD_PREFIX + " '{json}' dump | grep '\"snap\": 1,' > /dev/null").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Invalid dump for {json}".format(json=JSON)) + ERRORS += 1 + if 'shard_id' in jsondict[1]: + logging.debug("ECobject " + JSON) + for pg in OBJECPGS: + OSDS = get_osds(pg, OSDDIR) + jsondict = json.loads(JSON) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + if int(basename.split(EC_NAME)[1]) > int(NUM_EC_OBJECTS): + continue + # Fix shard_id since we only have one json instance for each object + jsondict[1]['shard_id'] = int(pg.split('s')[1]) + cmd = (CFSD_PREFIX + " '{json}' dump | grep '\"hinfo\": [{{]' > /dev/null").format(osd=osd, json=json.dumps((pg, jsondict[1]))) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Invalid dump for {json}".format(json=JSON)) + + print("Test list-attrs get-attr") + ATTRFILE = r"/tmp/attrs.{pid}".format(pid=pid) + VALFILE = r"/tmp/val.{pid}".format(pid=pid) + for nspace in db.keys(): + for basename in db[nspace].keys(): + file = os.path.join(DATADIR, nspace + "-" + basename) + JSON = db[nspace][basename]['json'] + jsondict = json.loads(JSON) + + if 'shard_id' in jsondict[1]: + logging.debug("ECobject " + JSON) + found = 0 + for pg in OBJECPGS: + OSDS = get_osds(pg, OSDDIR) + # Fix shard_id since we only have one json instance for each object + jsondict[1]['shard_id'] = int(pg.split('s')[1]) + JSON = json.dumps((pg, jsondict[1])) + for osd in OSDS: + cmd = (CFSD_PREFIX + " --tty '{json}' get-attr hinfo_key").format(osd=osd, json=JSON) + logging.debug("TRY: " + cmd) + try: + out = check_output(cmd, shell=True, stderr=subprocess.STDOUT).decode() + logging.debug("FOUND: {json} in {osd} has value '{val}'".format(osd=osd, json=JSON, val=out)) + found += 1 + except subprocess.CalledProcessError as e: + logging.debug("Error message: {output}".format(output=e.output)) + if "No such file or directory" not in str(e.output) and \ + "No data available" not in str(e.output) and \ + "not contained by pg" not in str(e.output): + raise + # Assuming k=2 m=1 for the default ec pool + if found != 3: + logging.error("{json} hinfo_key found {found} times instead of 3".format(json=JSON, found=found)) + ERRORS += 1 + + for pg in ALLPGS: + # Make sure rep obj with rep pg or ec obj with ec pg + if ('shard_id' in jsondict[1]) != (pg.find('s') > 0): + continue + if 'shard_id' in jsondict[1]: + # Fix shard_id since we only have one json instance for each object + jsondict[1]['shard_id'] = int(pg.split('s')[1]) + JSON = json.dumps((pg, jsondict[1])) + OSDS = get_osds(pg, OSDDIR) + for osd in OSDS: + DIR = os.path.join(OSDDIR, os.path.join(osd, os.path.join("current", "{pg}_head".format(pg=pg)))) + fnames = [f for f in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, f)) + and f.split("_")[0] == basename and f.split("_")[4] == nspace] + if not fnames: + continue + afd = open(ATTRFILE, "wb") + cmd = (CFSD_PREFIX + " '{json}' list-attrs").format(osd=osd, json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=afd) + afd.close() + if ret != 0: + logging.error("list-attrs failed with {ret}".format(ret=ret)) + ERRORS += 1 + continue + keys = get_lines(ATTRFILE) + values = dict(db[nspace][basename]["xattr"]) + for key in keys: + if key == "_" or key == "snapset" or key == "hinfo_key": + continue + key = key.strip("_") + if key not in values: + logging.error("Unexpected key {key} present".format(key=key)) + ERRORS += 1 + continue + exp = values.pop(key) + vfd = open(VALFILE, "wb") + cmd = (CFSD_PREFIX + " '{json}' get-attr {key}").format(osd=osd, json=JSON, key="_" + key) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=vfd) + vfd.close() + if ret != 0: + logging.error("get-attr failed with {ret}".format(ret=ret)) + ERRORS += 1 + continue + lines = get_lines(VALFILE) + val = lines[0] + if exp != val: + logging.error("For key {key} got value {got} instead of {expected}".format(key=key, got=val, expected=exp)) + ERRORS += 1 + if len(values) != 0: + logging.error("Not all keys found, remaining keys:") + print(values) + + print("Test --op meta-list") + tmpfd = open(TMPFILE, "wb") + cmd = (CFSD_PREFIX + "--op meta-list").format(osd=ONEOSD) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Bad exit status {ret} from --op meta-list request".format(ret=ret)) + ERRORS += 1 + + print("Test get-bytes on meta") + tmpfd.close() + lines = get_lines(TMPFILE) + JSONOBJ = sorted(set(lines)) + for JSON in JSONOBJ: + (pgid, jsondict) = json.loads(JSON) + if pgid != "meta": + logging.error("pgid incorrect for --op meta-list {pgid}".format(pgid=pgid)) + ERRORS += 1 + if jsondict['namespace'] != "": + logging.error("namespace non null --op meta-list {ns}".format(ns=jsondict['namespace'])) + ERRORS += 1 + logging.info(JSON) + try: + os.unlink(GETNAME) + except: + pass + cmd = (CFSD_PREFIX + "'{json}' get-bytes {fname}").format(osd=ONEOSD, json=JSON, fname=GETNAME) + logging.debug(cmd) + ret = call(cmd, shell=True) + if ret != 0: + logging.error("Bad exit status {ret}".format(ret=ret)) + ERRORS += 1 + + try: + os.unlink(GETNAME) + except: + pass + try: + os.unlink(TESTNAME) + except: + pass + + print("Test pg info") + for pg in ALLREPPGS + ALLECPGS: + for osd in get_osds(pg, OSDDIR): + cmd = (CFSD_PREFIX + "--op info --pgid {pg} | grep '\"pgid\": \"{pg}\"'").format(osd=osd, pg=pg) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Getting info failed for pg {pg} from {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + ERRORS += 1 + + print("Test pg logging") + if len(ALLREPPGS + ALLECPGS) == len(OBJREPPGS + OBJECPGS): + logging.warning("All PGs have objects, so no log without modify entries") + for pg in ALLREPPGS + ALLECPGS: + for osd in get_osds(pg, OSDDIR): + tmpfd = open(TMPFILE, "wb") + cmd = (CFSD_PREFIX + "--op log --pgid {pg}").format(osd=osd, pg=pg) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=tmpfd) + if ret != 0: + logging.error("Getting log failed for pg {pg} from {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + ERRORS += 1 + HASOBJ = pg in OBJREPPGS + OBJECPGS + MODOBJ = False + for line in get_lines(TMPFILE): + if line.find("modify") != -1: + MODOBJ = True + break + if HASOBJ != MODOBJ: + logging.error("Bad log for pg {pg} from {osd}".format(pg=pg, osd=osd)) + MSG = (HASOBJ and [""] or ["NOT "])[0] + print("Log should {msg}have a modify entry".format(msg=MSG)) + ERRORS += 1 + + try: + os.unlink(TMPFILE) + except: + pass + + print("Test list-pgs") + for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]: + + CHECK_PGS = get_osd_pgs(os.path.join(OSDDIR, osd), None) + CHECK_PGS = sorted(CHECK_PGS) + + cmd = (CFSD_PREFIX + "--op list-pgs").format(osd=osd) + logging.debug(cmd) + TEST_PGS = check_output(cmd, shell=True).decode().split("\n") + TEST_PGS = sorted(TEST_PGS)[1:] # Skip extra blank line + + if TEST_PGS != CHECK_PGS: + logging.error("list-pgs got wrong result for osd.{osd}".format(osd=osd)) + logging.error("Expected {pgs}".format(pgs=CHECK_PGS)) + logging.error("Got {pgs}".format(pgs=TEST_PGS)) + ERRORS += 1 + + EXP_ERRORS = 0 + print("Test pg export --dry-run") + pg = ALLREPPGS[0] + osd = get_osds(pg, OSDDIR)[0] + fname = "/tmp/fname.{pid}".format(pid=pid) + cmd = (CFSD_PREFIX + "--dry-run --op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Exporting --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + EXP_ERRORS += 1 + elif os.path.exists(fname): + logging.error("Exporting --dry-run created file") + EXP_ERRORS += 1 + + cmd = (CFSD_PREFIX + "--dry-run --op export --pgid {pg} > {file}").format(osd=osd, pg=pg, file=fname) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Exporting --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + EXP_ERRORS += 1 + else: + outdata = get_lines(fname) + if len(outdata) > 0: + logging.error("Exporting --dry-run to stdout not empty") + logging.error("Data: " + outdata) + EXP_ERRORS += 1 + + os.mkdir(TESTDIR) + for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]: + os.mkdir(os.path.join(TESTDIR, osd)) + print("Test pg export") + for pg in ALLREPPGS + ALLECPGS: + for osd in get_osds(pg, OSDDIR): + mydir = os.path.join(TESTDIR, osd) + fname = os.path.join(mydir, pg) + if pg == ALLREPPGS[0]: + cmd = (CFSD_PREFIX + "--op export --pgid {pg} > {file}").format(osd=osd, pg=pg, file=fname) + elif pg == ALLREPPGS[1]: + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file - > {file}").format(osd=osd, pg=pg, file=fname) + else: + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Exporting failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + EXP_ERRORS += 1 + + ERRORS += EXP_ERRORS + + print("Test clear-data-digest") + for nspace in db.keys(): + for basename in db[nspace].keys(): + JSON = db[nspace][basename]['json'] + cmd = (CFSD_PREFIX + "'{json}' clear-data-digest").format(osd='osd0', json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Clearing data digest failed for {json}".format(json=JSON)) + ERRORS += 1 + break + cmd = (CFSD_PREFIX + "'{json}' dump | grep '\"data_digest\": \"0xff'").format(osd='osd0', json=JSON) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Data digest not cleared for {json}".format(json=JSON)) + ERRORS += 1 + break + break + break + + print("Test pg removal") + RM_ERRORS = 0 + for pg in ALLREPPGS + ALLECPGS: + for osd in get_osds(pg, OSDDIR): + # This should do nothing + cmd = (CFSD_PREFIX + "--op remove --pgid {pg} --dry-run").format(pg=pg, osd=osd) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Removing --dry-run failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + RM_ERRORS += 1 + cmd = (CFSD_PREFIX + "--force --op remove --pgid {pg}").format(pg=pg, osd=osd) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Removing failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + RM_ERRORS += 1 + + ERRORS += RM_ERRORS + + IMP_ERRORS = 0 + if EXP_ERRORS == 0 and RM_ERRORS == 0: + print("Test pg import") + for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]: + dir = os.path.join(TESTDIR, osd) + PGS = [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))] + for pg in PGS: + file = os.path.join(dir, pg) + # Make sure this doesn't crash + cmd = (CFSD_PREFIX + "--op dump-export --file {file}").format(osd=osd, file=file) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Dump-export failed from {file} with {ret}".format(file=file, ret=ret)) + IMP_ERRORS += 1 + # This should do nothing + cmd = (CFSD_PREFIX + "--op import --file {file} --dry-run").format(osd=osd, file=file) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret)) + IMP_ERRORS += 1 + if pg == PGS[0]: + cmd = ("cat {file} |".format(file=file) + CFSD_PREFIX + "--op import").format(osd=osd) + elif pg == PGS[1]: + cmd = (CFSD_PREFIX + "--op import --file - --pgid {pg} < {file}").format(osd=osd, file=file, pg=pg) + else: + cmd = (CFSD_PREFIX + "--op import --file {file}").format(osd=osd, file=file) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret)) + IMP_ERRORS += 1 + else: + logging.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES") + + ERRORS += IMP_ERRORS + logging.debug(cmd) + + if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: + print("Verify replicated import data") + data_errors, _ = check_data(DATADIR, TMPFILE, OSDDIR, REP_NAME) + ERRORS += data_errors + else: + logging.warning("SKIPPING CHECKING IMPORT DATA DUE TO PREVIOUS FAILURES") + + print("Test all --op dump-journal again") + ALLOSDS = [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0] + ERRORS += test_dump_journal(CFSD_PREFIX, ALLOSDS) + + vstart(new=False) + wait_for_health() + + if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: + print("Verify erasure coded import data") + ERRORS += verify(DATADIR, EC_POOL, EC_NAME, db) + # Check replicated data/xattr/omap using rados + print("Verify replicated import data using rados") + ERRORS += verify(DATADIR, REP_POOL, REP_NAME, db) + + if EXP_ERRORS == 0: + NEWPOOL = "rados-import-pool" + cmd = "{path}/ceph osd pool create {pool} 8".format(pool=NEWPOOL, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + + print("Test rados import") + first = True + for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]: + dir = os.path.join(TESTDIR, osd) + for pg in [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]: + if pg.find("{id}.".format(id=REPID)) != 0: + continue + file = os.path.join(dir, pg) + if first: + first = False + # This should do nothing + cmd = "{path}/rados import -p {pool} --dry-run {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Rados import --dry-run failed from {file} with {ret}".format(file=file, ret=ret)) + ERRORS += 1 + cmd = "{path}/rados -p {pool} ls".format(pool=NEWPOOL, path=CEPH_BIN) + logging.debug(cmd) + data = check_output(cmd, shell=True).decode() + if data: + logging.error("'{data}'".format(data=data)) + logging.error("Found objects after dry-run") + ERRORS += 1 + cmd = "{path}/rados import -p {pool} {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Rados import failed from {file} with {ret}".format(file=file, ret=ret)) + ERRORS += 1 + cmd = "{path}/rados import -p {pool} --no-overwrite {file}".format(pool=NEWPOOL, file=file, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Rados import --no-overwrite failed from {file} with {ret}".format(file=file, ret=ret)) + ERRORS += 1 + + ERRORS += verify(DATADIR, NEWPOOL, REP_NAME, db) + else: + logging.warning("SKIPPING IMPORT-RADOS TESTS DUE TO PREVIOUS FAILURES") + + # Clear directories of previous portion + call("/bin/rm -rf {dir}".format(dir=TESTDIR), shell=True) + call("/bin/rm -rf {dir}".format(dir=DATADIR), shell=True) + os.mkdir(TESTDIR) + os.mkdir(DATADIR) + + # Cause SPLIT_POOL to split and test import with object/log filtering + print("Testing import all objects after a split") + SPLIT_POOL = "split_pool" + PG_COUNT = 1 + SPLIT_OBJ_COUNT = 5 + SPLIT_NSPACE_COUNT = 2 + SPLIT_NAME = "split" + cmd = "{path}/ceph osd pool create {pool} {pg} {pg} replicated".format(pool=SPLIT_POOL, pg=PG_COUNT, path=CEPH_BIN) + logging.debug(cmd) + call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + SPLITID = get_pool_id(SPLIT_POOL, nullfd) + pool_size = int(check_output("{path}/ceph osd pool get {pool} size".format(pool=SPLIT_POOL, path=CEPH_BIN), shell=True, stderr=nullfd).decode().split(" ")[1]) + EXP_ERRORS = 0 + RM_ERRORS = 0 + IMP_ERRORS = 0 + + objects = range(1, SPLIT_OBJ_COUNT + 1) + nspaces = range(SPLIT_NSPACE_COUNT) + for n in nspaces: + nspace = get_nspace(n) + + for i in objects: + NAME = SPLIT_NAME + "{num}".format(num=i) + LNAME = nspace + "-" + NAME + DDNAME = os.path.join(DATADIR, LNAME) + DDNAME += "__head" + + cmd = "rm -f " + DDNAME + logging.debug(cmd) + call(cmd, shell=True) + + if i == 1: + dataline = range(DATALINECOUNT) + else: + dataline = range(1) + fd = open(DDNAME, "w") + data = "This is the split data for " + LNAME + "\n" + for _ in dataline: + fd.write(data) + fd.close() + + cmd = "{path}/rados -p {pool} -N '{nspace}' put {name} {ddname}".format(pool=SPLIT_POOL, name=NAME, ddname=DDNAME, nspace=nspace, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stderr=nullfd) + if ret != 0: + logging.critical("Rados put command failed with {ret}".format(ret=ret)) + return 1 + + wait_for_health() + kill_daemons() + + for osd in [f for f in os.listdir(OSDDIR) if os.path.isdir(os.path.join(OSDDIR, f)) and f.find("osd") == 0]: + os.mkdir(os.path.join(TESTDIR, osd)) + + pg = "{pool}.0".format(pool=SPLITID) + EXPORT_PG = pg + + export_osds = get_osds(pg, OSDDIR) + for osd in export_osds: + mydir = os.path.join(TESTDIR, osd) + fname = os.path.join(mydir, pg) + cmd = (CFSD_PREFIX + "--op export --pgid {pg} --file {file}").format(osd=osd, pg=pg, file=fname) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + if ret != 0: + logging.error("Exporting failed for pg {pg} on {osd} with {ret}".format(pg=pg, osd=osd, ret=ret)) + EXP_ERRORS += 1 + + ERRORS += EXP_ERRORS + + if EXP_ERRORS == 0: + vstart(new=False) + wait_for_health() + + cmd = "{path}/ceph osd pool set {pool} pg_num 2".format(pool=SPLIT_POOL, path=CEPH_BIN) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd, stderr=nullfd) + time.sleep(5) + wait_for_health() + + kill_daemons() + + # Now 2 PGs, poolid.0 and poolid.1 + # make note of pgs before we remove the pgs... + osds = get_osds("{pool}.0".format(pool=SPLITID), OSDDIR); + for seed in range(2): + pg = "{pool}.{seed}".format(pool=SPLITID, seed=seed) + + for osd in osds: + cmd = (CFSD_PREFIX + "--force --op remove --pgid {pg}").format(pg=pg, osd=osd) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + + which = 0 + for osd in osds: + # This is weird. The export files are based on only the EXPORT_PG + # and where that pg was before the split. Use 'which' to use all + # export copies in import. + mydir = os.path.join(TESTDIR, export_osds[which]) + fname = os.path.join(mydir, EXPORT_PG) + which += 1 + cmd = (CFSD_PREFIX + "--op import --pgid {pg} --file {file}").format(osd=osd, pg=EXPORT_PG, file=fname) + logging.debug(cmd) + ret = call(cmd, shell=True, stdout=nullfd) + if ret != 0: + logging.error("Import failed from {file} with {ret}".format(file=file, ret=ret)) + IMP_ERRORS += 1 + + ERRORS += IMP_ERRORS + + # Start up again to make sure imports didn't corrupt anything + if IMP_ERRORS == 0: + print("Verify split import data") + data_errors, count = check_data(DATADIR, TMPFILE, OSDDIR, SPLIT_NAME) + ERRORS += data_errors + if count != (SPLIT_OBJ_COUNT * SPLIT_NSPACE_COUNT * pool_size): + logging.error("Incorrect number of replicas seen {count}".format(count=count)) + ERRORS += 1 + vstart(new=False) + wait_for_health() + + call("/bin/rm -rf {dir}".format(dir=TESTDIR), shell=True) + call("/bin/rm -rf {dir}".format(dir=DATADIR), shell=True) + + ERRORS += test_removeall(CFSD_PREFIX, db, OBJREPPGS, REP_POOL, CEPH_BIN, OSDDIR, REP_NAME, NUM_CLONED_REP_OBJECTS) + + # vstart() starts 4 OSDs + ERRORS += test_get_set_osdmap(CFSD_PREFIX, list(range(4)), ALLOSDS) + ERRORS += test_get_set_inc_osdmap(CFSD_PREFIX, ALLOSDS[0]) + + kill_daemons() + CORES = [f for f in os.listdir(CEPH_DIR) if f.startswith("core.")] + if CORES: + CORE_DIR = os.path.join("/tmp", "cores.{pid}".format(pid=os.getpid())) + os.mkdir(CORE_DIR) + call("/bin/mv {ceph_dir}/core.* {core_dir}".format(ceph_dir=CEPH_DIR, core_dir=CORE_DIR), shell=True) + logging.error("Failure due to cores found") + logging.error("See {core_dir} for cores".format(core_dir=CORE_DIR)) + ERRORS += len(CORES) + + if ERRORS == 0: + print("TEST PASSED") + return 0 + else: + print("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS)) + return 1 + + +def remove_btrfs_subvolumes(path): + if platform.system() == "FreeBSD": + return + result = subprocess.Popen("stat -f -c '%%T' %s" % path, shell=True, stdout=subprocess.PIPE) + for line in result.stdout: + filesystem = line.decode('utf-8').rstrip('\n') + if filesystem == "btrfs": + result = subprocess.Popen("sudo btrfs subvolume list %s" % path, shell=True, stdout=subprocess.PIPE) + for line in result.stdout: + subvolume = line.decode('utf-8').split()[8] + # extracting the relative volume name + m = re.search(".*(%s.*)" % path, subvolume) + if m: + found = m.group(1) + call("sudo btrfs subvolume delete %s" % found, shell=True) + + +if __name__ == "__main__": + status = 1 + try: + status = main(sys.argv[1:]) + finally: + kill_daemons() + os.chdir(CEPH_BUILD_DIR) + remove_btrfs_subvolumes(CEPH_DIR) + call("/bin/rm -fr {dir}".format(dir=CEPH_DIR), shell=True) + sys.exit(status) diff --git a/qa/standalone/special/test-failure.sh b/qa/standalone/special/test-failure.sh new file mode 100755 index 000000000..cede887d2 --- /dev/null +++ b/qa/standalone/special/test-failure.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +set -ex + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:7202" # git grep '\<7202\>' : there must be only one + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_failure_log() { + local dir=$1 + + cat > $dir/test_failure.log << EOF +This is a fake log file +* +* +* +* +* +This ends the fake log file +EOF + + # Test fails + return 1 +} + +function TEST_failure_core_only() { + local dir=$1 + + run_mon $dir a || return 1 + kill_daemons $dir SEGV mon 5 + return 0 +} + +main test_failure "$@" diff --git a/qa/suites/.qa b/qa/suites/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/big/.qa b/qa/suites/big/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/big/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/big/rados-thrash/% b/qa/suites/big/rados-thrash/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/big/rados-thrash/% diff --git a/qa/suites/big/rados-thrash/.qa b/qa/suites/big/rados-thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/big/rados-thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/big/rados-thrash/ceph/.qa b/qa/suites/big/rados-thrash/ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/big/rados-thrash/ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/big/rados-thrash/ceph/cephadm.yaml b/qa/suites/big/rados-thrash/ceph/cephadm.yaml new file mode 100644 index 000000000..a225422c5 --- /dev/null +++ b/qa/suites/big/rados-thrash/ceph/cephadm.yaml @@ -0,0 +1,8 @@ +overrides: + kernel: + hwe: true +tasks: +- install: +- nvme_loop: +- cephadm: + roleless: true diff --git a/qa/suites/big/rados-thrash/clusters/.qa b/qa/suites/big/rados-thrash/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/big/rados-thrash/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/big/rados-thrash/clusters/big.yaml b/qa/suites/big/rados-thrash/clusters/big.yaml new file mode 100644 index 000000000..d9a8de8e7 --- /dev/null +++ b/qa/suites/big/rados-thrash/clusters/big.yaml @@ -0,0 +1,68 @@ +roles: +- [host.a, client.0] +- [host.b, client.1] +- [host.c, client.2] +- [host.d, client.3] +- [host.e, client.4] +- [host.f, client.5] +- [host.g, client.6] +- [host.h, client.7] +- [host.i, client.8] +- [host.j, client.9] +- [host.k, client.10] +- [host.l, client.11] +- [host.m, client.12] +- [host.n, client.13] +- [host.o, client.14] +- [host.p, client.15] +- [host.q, client.16] +- [host.r, client.17] +- [host.s, client.18] +- [host.t, client.19] +- [host.u, client.20] +- [host.v, client.21] +- [host.w, client.22] +- [host.x, client.23] +- [host.y, client.24] +- [host.z, client.25] +- [host.aa, client.26] +- [host.ab, client.27] +- [host.ac, client.28] +- [host.ad, client.29] +- [host.ae, client.30] +- [host.af, client.31] +- [host.ag, client.32] +- [host.ah, client.33] +- [host.ai, client.34] +- [host.aj, client.35] +- [host.ak, client.36] +- [host.al, client.37] +- [host.am, client.38] +- [host.an, client.39] +- [host.ao, client.40] +- [host.ap, client.41] +- [host.aq, client.42] +- [host.ar, client.43] +- [host.as, client.44] +- [host.at, client.45] +- [host.au, client.46] +- [host.av, client.47] +- [host.aw, client.48] +- [host.ax, client.49] +- [host.ay, client.50] +- [host.az, client.51] +- [host.ba, client.52] +- [host.bb, client.53] +- [host.bc, client.54] +- [host.bd, client.55] +- [host.be, client.56] +- [host.bf, client.57] +- [host.bg, client.58] +- [host.bh, client.59] +- [host.bi, client.60] +- [host.bj, client.61] +- [host.bk, client.62] +- [host.bl, client.63] +- [host.bm, client.64] +- [host.bn, client.65] +- [host.bo, client.66] diff --git a/qa/suites/big/rados-thrash/clusters/medium.yaml b/qa/suites/big/rados-thrash/clusters/medium.yaml new file mode 100644 index 000000000..05a16b053 --- /dev/null +++ b/qa/suites/big/rados-thrash/clusters/medium.yaml @@ -0,0 +1,22 @@ +roles: +- [host.a, client.0] +- [host.b, client.1] +- [host.c, client.2] +- [host.d, client.3] +- [host.e, client.4] +- [host.f, client.5] +- [host.g, client.6] +- [host.h, client.7] +- [host.i, client.8] +- [host.j, client.9] +- [host.k, client.10] +- [host.l, client.11] +- [host.m, client.12] +- [host.n, client.13] +- [host.o, client.14] +- [host.p, client.15] +- [host.q, client.16] +- [host.r, client.17] +- [host.s, client.18] +- [host.t, client.19] +- [host.u, client.20] diff --git a/qa/suites/big/rados-thrash/clusters/small.yaml b/qa/suites/big/rados-thrash/clusters/small.yaml new file mode 100644 index 000000000..0ff9f33f1 --- /dev/null +++ b/qa/suites/big/rados-thrash/clusters/small.yaml @@ -0,0 +1,6 @@ +roles: +- [host.a, client.0] +- [host.b, client.1] +- [host.c, client.2] +- [host.d, client.3] +- [host.e, client.4] diff --git a/qa/suites/big/rados-thrash/openstack.yaml b/qa/suites/big/rados-thrash/openstack.yaml new file mode 100644 index 000000000..4d6edcd07 --- /dev/null +++ b/qa/suites/big/rados-thrash/openstack.yaml @@ -0,0 +1,8 @@ +openstack: + - machine: + disk: 40 # GB + ram: 8000 # MB + cpus: 1 + volumes: # attached to each instance + count: 3 + size: 10 # GB diff --git a/qa/suites/big/rados-thrash/workloads/.qa b/qa/suites/big/rados-thrash/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/big/rados-thrash/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/big/rados-thrash/workloads/radosbench.yaml b/qa/suites/big/rados-thrash/workloads/radosbench.yaml new file mode 100644 index 000000000..60f72886c --- /dev/null +++ b/qa/suites/big/rados-thrash/workloads/radosbench.yaml @@ -0,0 +1,3 @@ +tasks: +- radosbench: + time: 300 diff --git a/qa/suites/big/rados-thrash/workloads/snaps-few-objects.yaml b/qa/suites/big/rados-thrash/workloads/snaps-few-objects.yaml new file mode 100644 index 000000000..b73bb6781 --- /dev/null +++ b/qa/suites/big/rados-thrash/workloads/snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + ops: 4000 + max_seconds: 3600 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/buildpackages/.qa b/qa/suites/buildpackages/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/buildpackages/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/buildpackages/any/% b/qa/suites/buildpackages/any/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/buildpackages/any/% diff --git a/qa/suites/buildpackages/any/.qa b/qa/suites/buildpackages/any/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/buildpackages/any/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/buildpackages/any/distros b/qa/suites/buildpackages/any/distros new file mode 120000 index 000000000..0e1f13037 --- /dev/null +++ b/qa/suites/buildpackages/any/distros @@ -0,0 +1 @@ +.qa/distros/all
\ No newline at end of file diff --git a/qa/suites/buildpackages/any/tasks/.qa b/qa/suites/buildpackages/any/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/buildpackages/any/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/buildpackages/any/tasks/release.yaml b/qa/suites/buildpackages/any/tasks/release.yaml new file mode 100644 index 000000000..d7a3b62c8 --- /dev/null +++ b/qa/suites/buildpackages/any/tasks/release.yaml @@ -0,0 +1,8 @@ +# --suite buildpackages/any --ceph v10.0.1 --filter centos_7,ubuntu_14.04 +roles: + - [client.0] +tasks: + - install: + - exec: + client.0: + - ceph --version | grep 'version ' diff --git a/qa/suites/buildpackages/tests/% b/qa/suites/buildpackages/tests/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/buildpackages/tests/% diff --git a/qa/suites/buildpackages/tests/.qa b/qa/suites/buildpackages/tests/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/buildpackages/tests/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/buildpackages/tests/distros b/qa/suites/buildpackages/tests/distros new file mode 120000 index 000000000..0e1f13037 --- /dev/null +++ b/qa/suites/buildpackages/tests/distros @@ -0,0 +1 @@ +.qa/distros/all
\ No newline at end of file diff --git a/qa/suites/buildpackages/tests/tasks/.qa b/qa/suites/buildpackages/tests/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/buildpackages/tests/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/buildpackages/tests/tasks/release.yaml b/qa/suites/buildpackages/tests/tasks/release.yaml new file mode 100644 index 000000000..05e87789d --- /dev/null +++ b/qa/suites/buildpackages/tests/tasks/release.yaml @@ -0,0 +1,20 @@ +# --suite buildpackages/tests --ceph v10.0.1 --filter centos_7.2,ubuntu_14.04 +overrides: + ansible.cephlab: + playbook: users.yml + buildpackages: + good_machine: + disk: 20 # GB + ram: 2000 # MB + cpus: 2 + min_machine: + disk: 10 # GB + ram: 1000 # MB + cpus: 1 +roles: + - [client.0] +tasks: + - install: + - exec: + client.0: + - ceph --version | grep 'version ' diff --git a/qa/suites/ceph-ansible/.qa b/qa/suites/ceph-ansible/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-ansible/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-ansible/smoke/.qa b/qa/suites/ceph-ansible/smoke/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-ansible/smoke/basic/% b/qa/suites/ceph-ansible/smoke/basic/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/% diff --git a/qa/suites/ceph-ansible/smoke/basic/.qa b/qa/suites/ceph-ansible/smoke/basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-ansible/smoke/basic/0-clusters/.qa b/qa/suites/ceph-ansible/smoke/basic/0-clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/0-clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-ansible/smoke/basic/0-clusters/3-node.yaml b/qa/suites/ceph-ansible/smoke/basic/0-clusters/3-node.yaml new file mode 100644 index 000000000..86dd366b9 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/0-clusters/3-node.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + 3-node cluster + install and run ceph-ansible on a mon.a node alone with ceph +roles: +- [mon.a, mds.a, osd.0, osd.1, osd.2] +- [mon.b, mgr.x, osd.3, osd.4, osd.5] +- [mon.c, mgr.y, osd.6, osd.7, osd.8, client.0] +openstack: +- volumes: # attached to each instance + count: 3 + size: 10 # GB diff --git a/qa/suites/ceph-ansible/smoke/basic/0-clusters/4-node.yaml b/qa/suites/ceph-ansible/smoke/basic/0-clusters/4-node.yaml new file mode 100644 index 000000000..b1754432c --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/0-clusters/4-node.yaml @@ -0,0 +1,13 @@ +meta: +- desc: | + 4-node cluster + install and run ceph-ansible on installer.0 stand alone node +roles: +- [mon.a, mds.a, osd.0, osd.1, osd.2] +- [mon.b, mgr.x, osd.3, osd.4, osd.5] +- [mon.c, mgr.y, osd.6, osd.7, osd.8, client.0] +- [installer.0] +openstack: +- volumes: # attached to each instance + count: 3 + size: 10 # GB diff --git a/qa/suites/ceph-ansible/smoke/basic/1-distros/.qa b/qa/suites/ceph-ansible/smoke/basic/1-distros/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/1-distros/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-ansible/smoke/basic/1-distros/centos_latest.yaml b/qa/suites/ceph-ansible/smoke/basic/1-distros/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/1-distros/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/ceph-ansible/smoke/basic/1-distros/ubuntu_latest.yaml b/qa/suites/ceph-ansible/smoke/basic/1-distros/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/1-distros/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/ceph-ansible/smoke/basic/2-ceph/.qa b/qa/suites/ceph-ansible/smoke/basic/2-ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/2-ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml b/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml new file mode 100644 index 000000000..7e7ede3e3 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/2-ceph/ceph_ansible.yaml @@ -0,0 +1,36 @@ +meta: +- desc: "Build the ceph cluster using ceph-ansible" + +overrides: + ceph_ansible: + ansible-version: '2.9' + vars: + ceph_conf_overrides: + global: + osd default pool size: 2 + mon pg warn min per osd: 2 + osd pool default pg num: 64 + osd pool default pgp num: 64 + mon_max_pg_per_osd: 1024 + ceph_test: true + ceph_stable_release: nautilus + osd_scenario: lvm + journal_size: 1024 + osd_auto_discovery: false + ceph_origin: repository + ceph_repository: dev + ceph_mgr_modules: + - status + - restful + cephfs_pools: + - name: "cephfs_data" + pg_num: "64" + rule_name: "replicated_rule" + - name: "cephfs_metadata" + pg_num: "64" + rule_name: "replicated_rule" + dashboard_enabled: false +tasks: +- ssh-keys: +- ceph_ansible: +- install.ship_utilities: diff --git a/qa/suites/ceph-ansible/smoke/basic/3-config/.qa b/qa/suites/ceph-ansible/smoke/basic/3-config/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/3-config/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-ansible/smoke/basic/3-config/bluestore_with_dmcrypt.yaml b/qa/suites/ceph-ansible/smoke/basic/3-config/bluestore_with_dmcrypt.yaml new file mode 100644 index 000000000..604e757ad --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/3-config/bluestore_with_dmcrypt.yaml @@ -0,0 +1,8 @@ +meta: +- desc: "use bluestore + dmcrypt option" + +overrides: + ceph_ansible: + vars: + osd_objectstore: bluestore + dmcrypt: True diff --git a/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_off.yaml b/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_off.yaml new file mode 100644 index 000000000..4bbd1c7c5 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_off.yaml @@ -0,0 +1,7 @@ +meta: +- desc: "without dmcrypt" + +overrides: + ceph_ansible: + vars: + dmcrypt: False diff --git a/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_on.yaml b/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_on.yaml new file mode 100644 index 000000000..12d63d325 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/3-config/dmcrypt_on.yaml @@ -0,0 +1,7 @@ +meta: +- desc: "use dmcrypt option" + +overrides: + ceph_ansible: + vars: + dmcrypt: True diff --git a/qa/suites/ceph-ansible/smoke/basic/4-tasks/.qa b/qa/suites/ceph-ansible/smoke/basic/4-tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/4-tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-ansible/smoke/basic/4-tasks/ceph-admin-commands.yaml b/qa/suites/ceph-ansible/smoke/basic/4-tasks/ceph-admin-commands.yaml new file mode 100644 index 000000000..33642d5cf --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/4-tasks/ceph-admin-commands.yaml @@ -0,0 +1,7 @@ +meta: +- desc: "Run ceph-admin-commands.sh" +tasks: +- workunit: + clients: + client.0: + - ceph-tests/ceph-admin-commands.sh diff --git a/qa/suites/ceph-ansible/smoke/basic/4-tasks/rbd_import_export.yaml b/qa/suites/ceph-ansible/smoke/basic/4-tasks/rbd_import_export.yaml new file mode 100644 index 000000000..9495934e6 --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/4-tasks/rbd_import_export.yaml @@ -0,0 +1,7 @@ +meta: +- desc: "Run the rbd import/export tests" +tasks: +- workunit: + clients: + client.0: + - rbd/import_export.sh diff --git a/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml b/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml new file mode 100644 index 000000000..8e389134b --- /dev/null +++ b/qa/suites/ceph-ansible/smoke/basic/4-tasks/rest.yaml @@ -0,0 +1,15 @@ +tasks: +- exec: + mgr.x: + - systemctl stop ceph-mgr.target + - sleep 5 + - ceph -s +- exec: + mon.a: + - ceph restful create-key admin + - ceph restful create-self-signed-cert + - ceph restful restart +- workunit: + clients: + client.0: + - rest/test-restful.sh diff --git a/qa/suites/ceph-deploy/% b/qa/suites/ceph-deploy/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/ceph-deploy/% diff --git a/qa/suites/ceph-deploy/.qa b/qa/suites/ceph-deploy/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-deploy/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-deploy/cluster/.qa b/qa/suites/ceph-deploy/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-deploy/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-deploy/cluster/4node.yaml b/qa/suites/ceph-deploy/cluster/4node.yaml new file mode 100644 index 000000000..bf4a7f986 --- /dev/null +++ b/qa/suites/ceph-deploy/cluster/4node.yaml @@ -0,0 +1,15 @@ +overrides: + ansible.cephlab: + vars: + quick_lvs_to_create: 4 +openstack: + - machine: + disk: 10 + volumes: + count: 4 + size: 20 +roles: +- [mon.a, mgr.y, osd.0, osd.1] +- [mon.b, osd.2, osd.3] +- [mon.c, osd.4, osd.5] +- [mgr.x, client.0] diff --git a/qa/suites/ceph-deploy/config/.qa b/qa/suites/ceph-deploy/config/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-deploy/config/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-deploy/config/ceph_volume_bluestore.yaml b/qa/suites/ceph-deploy/config/ceph_volume_bluestore.yaml new file mode 100644 index 000000000..e484e6120 --- /dev/null +++ b/qa/suites/ceph-deploy/config/ceph_volume_bluestore.yaml @@ -0,0 +1,7 @@ +overrides: + ceph-deploy: + use-ceph-volume: True + bluestore: True + conf: + osd: + bluestore fsck on mount: true diff --git a/qa/suites/ceph-deploy/config/ceph_volume_bluestore_dmcrypt.yaml b/qa/suites/ceph-deploy/config/ceph_volume_bluestore_dmcrypt.yaml new file mode 100644 index 000000000..d424b6423 --- /dev/null +++ b/qa/suites/ceph-deploy/config/ceph_volume_bluestore_dmcrypt.yaml @@ -0,0 +1,8 @@ +overrides: + ceph-deploy: + use-ceph-volume: True + bluestore: True + dmcrypt: True + conf: + osd: + bluestore fsck on mount: true diff --git a/qa/suites/ceph-deploy/config/ceph_volume_dmcrypt_off.yaml b/qa/suites/ceph-deploy/config/ceph_volume_dmcrypt_off.yaml new file mode 100644 index 000000000..097014414 --- /dev/null +++ b/qa/suites/ceph-deploy/config/ceph_volume_dmcrypt_off.yaml @@ -0,0 +1,3 @@ +overrides: + ceph-deploy: + use-ceph-volume: True diff --git a/qa/suites/ceph-deploy/distros/.qa b/qa/suites/ceph-deploy/distros/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-deploy/distros/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-deploy/distros/centos_latest.yaml b/qa/suites/ceph-deploy/distros/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/ceph-deploy/distros/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/ceph-deploy/distros/ubuntu_latest.yaml b/qa/suites/ceph-deploy/distros/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/ceph-deploy/distros/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/ceph-deploy/python_versions/.qa b/qa/suites/ceph-deploy/python_versions/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-deploy/python_versions/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-deploy/python_versions/python_2.yaml b/qa/suites/ceph-deploy/python_versions/python_2.yaml new file mode 100644 index 000000000..51c865bfa --- /dev/null +++ b/qa/suites/ceph-deploy/python_versions/python_2.yaml @@ -0,0 +1,3 @@ +overrides: + ceph-deploy: + python_version: "2" diff --git a/qa/suites/ceph-deploy/python_versions/python_3.yaml b/qa/suites/ceph-deploy/python_versions/python_3.yaml new file mode 100644 index 000000000..22deecaea --- /dev/null +++ b/qa/suites/ceph-deploy/python_versions/python_3.yaml @@ -0,0 +1,3 @@ +overrides: + ceph-deploy: + python_version: "3" diff --git a/qa/suites/ceph-deploy/tasks/.qa b/qa/suites/ceph-deploy/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/ceph-deploy/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/ceph-deploy/tasks/ceph-admin-commands.yaml b/qa/suites/ceph-deploy/tasks/ceph-admin-commands.yaml new file mode 100644 index 000000000..b7dbfe1ae --- /dev/null +++ b/qa/suites/ceph-deploy/tasks/ceph-admin-commands.yaml @@ -0,0 +1,12 @@ +meta: +- desc: "test basic ceph admin commands" +tasks: +- ssh_keys: +- print: "**** done ssh_keys" +- ceph-deploy: +- print: "**** done ceph-deploy" +- workunit: + clients: + client.0: + - ceph-tests/ceph-admin-commands.sh +- print: "**** done ceph-tests/ceph-admin-commands.sh" diff --git a/qa/suites/ceph-deploy/tasks/rbd_import_export.yaml b/qa/suites/ceph-deploy/tasks/rbd_import_export.yaml new file mode 100644 index 000000000..1c09735a6 --- /dev/null +++ b/qa/suites/ceph-deploy/tasks/rbd_import_export.yaml @@ -0,0 +1,9 @@ +meta: +- desc: "Setup cluster using ceph-deploy, Run the rbd import/export tests" +tasks: +- ssh-keys: +- ceph-deploy: +- workunit: + clients: + client.0: + - rbd/import_export.sh diff --git a/qa/suites/cephmetrics/% b/qa/suites/cephmetrics/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/cephmetrics/% diff --git a/qa/suites/cephmetrics/.qa b/qa/suites/cephmetrics/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/cephmetrics/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/cephmetrics/0-clusters/.qa b/qa/suites/cephmetrics/0-clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/cephmetrics/0-clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/cephmetrics/0-clusters/3-node.yaml b/qa/suites/cephmetrics/0-clusters/3-node.yaml new file mode 100644 index 000000000..3935e7cc3 --- /dev/null +++ b/qa/suites/cephmetrics/0-clusters/3-node.yaml @@ -0,0 +1,11 @@ +meta: +- desc: "4-node cluster" +roles: +- [mon.a, mds.a, osd.0, osd.1, osd.2] +- [mon.b, mgr.x, osd.3, osd.4, osd.5] +- [mon.c, mgr.y, osd.6, osd.7, osd.8, client.0] +- [cephmetrics.0] +openstack: +- volumes: # attached to each instance + count: 3 + size: 10 # GB diff --git a/qa/suites/cephmetrics/1-distros/.qa b/qa/suites/cephmetrics/1-distros/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/cephmetrics/1-distros/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/cephmetrics/1-distros/centos_latest.yaml b/qa/suites/cephmetrics/1-distros/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/cephmetrics/1-distros/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/cephmetrics/1-distros/ubuntu_latest.yaml b/qa/suites/cephmetrics/1-distros/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/cephmetrics/1-distros/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/cephmetrics/2-ceph/.qa b/qa/suites/cephmetrics/2-ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/cephmetrics/2-ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml b/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml new file mode 100644 index 000000000..309f50600 --- /dev/null +++ b/qa/suites/cephmetrics/2-ceph/ceph_ansible.yaml @@ -0,0 +1,32 @@ +meta: +- desc: "Build the ceph cluster using ceph-ansible" + +overrides: + ceph_ansible: + vars: + ceph_conf_overrides: + global: + osd default pool size: 2 + mon pg warn min per osd: 2 + osd pool default pg num: 64 + osd pool default pgp num: 64 + mon_max_pg_per_osd: 1024 + ceph_test: true + ceph_stable_release: luminous + osd_scenario: collocated + journal_size: 1024 + osd_auto_discovery: false + ceph_origin: repository + ceph_repository: dev + ceph_mgr_modules: + - status + - restful + cephfs_pools: + - name: "cephfs_data" + pg_num: "64" + - name: "cephfs_metadata" + pg_num: "64" +tasks: +- ssh-keys: +- ceph_ansible: +- install.ship_utilities: diff --git a/qa/suites/cephmetrics/3-ceph-config/.qa b/qa/suites/cephmetrics/3-ceph-config/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/cephmetrics/3-ceph-config/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/cephmetrics/3-ceph-config/bluestore_with_dmcrypt.yaml b/qa/suites/cephmetrics/3-ceph-config/bluestore_with_dmcrypt.yaml new file mode 100644 index 000000000..16db8ab27 --- /dev/null +++ b/qa/suites/cephmetrics/3-ceph-config/bluestore_with_dmcrypt.yaml @@ -0,0 +1,8 @@ +meta: +- desc: "use bluestore + dmcrypt" + +overrides: + ceph_ansible: + vars: + osd_objectstore: bluestore + dmcrypt: True diff --git a/qa/suites/cephmetrics/3-ceph-config/bluestore_without_dmcrypt.yaml b/qa/suites/cephmetrics/3-ceph-config/bluestore_without_dmcrypt.yaml new file mode 100644 index 000000000..fc879fc8e --- /dev/null +++ b/qa/suites/cephmetrics/3-ceph-config/bluestore_without_dmcrypt.yaml @@ -0,0 +1,8 @@ +meta: +- desc: "use bluestore without dmcrypt" + +overrides: + ceph_ansible: + vars: + osd_objectstore: bluestore + dmcrypt: False diff --git a/qa/suites/cephmetrics/3-ceph-config/dmcrypt_off.yaml b/qa/suites/cephmetrics/3-ceph-config/dmcrypt_off.yaml new file mode 100644 index 000000000..4bbd1c7c5 --- /dev/null +++ b/qa/suites/cephmetrics/3-ceph-config/dmcrypt_off.yaml @@ -0,0 +1,7 @@ +meta: +- desc: "without dmcrypt" + +overrides: + ceph_ansible: + vars: + dmcrypt: False diff --git a/qa/suites/cephmetrics/3-ceph-config/dmcrypt_on.yaml b/qa/suites/cephmetrics/3-ceph-config/dmcrypt_on.yaml new file mode 100644 index 000000000..519ad1d79 --- /dev/null +++ b/qa/suites/cephmetrics/3-ceph-config/dmcrypt_on.yaml @@ -0,0 +1,7 @@ +meta: +- desc: "with dmcrypt" + +overrides: + ceph_ansible: + vars: + dmcrypt: True diff --git a/qa/suites/cephmetrics/4-epel/.qa b/qa/suites/cephmetrics/4-epel/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/cephmetrics/4-epel/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/cephmetrics/4-epel/no_epel.yaml b/qa/suites/cephmetrics/4-epel/no_epel.yaml new file mode 100644 index 000000000..1538fd7f8 --- /dev/null +++ b/qa/suites/cephmetrics/4-epel/no_epel.yaml @@ -0,0 +1,7 @@ +meta: + - desc: "Without EPEL" +overrides: + cephmetrics: + group_vars: + all: + use_epel: false diff --git a/qa/suites/cephmetrics/4-epel/use_epel.yaml b/qa/suites/cephmetrics/4-epel/use_epel.yaml new file mode 100644 index 000000000..d496a43ea --- /dev/null +++ b/qa/suites/cephmetrics/4-epel/use_epel.yaml @@ -0,0 +1,7 @@ +meta: + - desc: "Using EPEL" +overrides: + cephmetrics: + group_vars: + all: + use_epel: true diff --git a/qa/suites/cephmetrics/5-containers/.qa b/qa/suites/cephmetrics/5-containers/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/cephmetrics/5-containers/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/cephmetrics/5-containers/containerized.yaml b/qa/suites/cephmetrics/5-containers/containerized.yaml new file mode 100644 index 000000000..686de08a2 --- /dev/null +++ b/qa/suites/cephmetrics/5-containers/containerized.yaml @@ -0,0 +1,10 @@ +meta: + - desc: "Containerized prometheus and grafana" +overrides: + cephmetrics: + group_vars: + all: + prometheus: + containerized: true + grafana: + containerized: true diff --git a/qa/suites/cephmetrics/5-containers/no_containers.yaml b/qa/suites/cephmetrics/5-containers/no_containers.yaml new file mode 100644 index 000000000..29c690939 --- /dev/null +++ b/qa/suites/cephmetrics/5-containers/no_containers.yaml @@ -0,0 +1,10 @@ +meta: + - desc: "Packaged prometheus and grafana" +overrides: + cephmetrics: + group_vars: + all: + prometheus: + containerized: false + grafana: + containerized: false diff --git a/qa/suites/cephmetrics/6-tasks/.qa b/qa/suites/cephmetrics/6-tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/cephmetrics/6-tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/cephmetrics/6-tasks/cephmetrics.yaml b/qa/suites/cephmetrics/6-tasks/cephmetrics.yaml new file mode 100644 index 000000000..15f90394c --- /dev/null +++ b/qa/suites/cephmetrics/6-tasks/cephmetrics.yaml @@ -0,0 +1,4 @@ +meta: +- desc: "Deploy cephmetrics and run integration tests" +tasks: +- cephmetrics: diff --git a/qa/suites/crimson-rados-experimental/.qa b/qa/suites/crimson-rados-experimental/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/crimson-rados-experimental/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/crimson-rados-experimental/seastore/.qa b/qa/suites/crimson-rados-experimental/seastore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/% b/qa/suites/crimson-rados-experimental/seastore/basic/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/% diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/.qa b/qa/suites/crimson-rados-experimental/seastore/basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/.qa b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml new file mode 100644 index 000000000..d8e5898b9 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-1.yaml @@ -0,0 +1,14 @@ +overrides: + ceph-deploy: + conf: + global: + osd pool default size: 2 + osd crush chooseleaf type: 0 + osd pool default pg num: 128 + osd pool default pgp num: 128 + ceph: + conf: + osd: + osd shutdown pgref assert: true +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-2.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-2.yaml new file mode 100644 index 000000000..9774de688 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/clusters/fixed-2.yaml @@ -0,0 +1,15 @@ +roles: +- [mon.a, osd.0, osd.1, client.0, node-exporter.a] +- [mgr.x, osd.2, osd.3, client.1, prometheus.a, node-exporter.b] +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true + global: + ms cluster mode: crc + ms service mode: crc + ms client mode: crc + ms mon service mode: crc + ms mon cluster mode: crc + ms mon client mode: crc diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/crimson_qa_overrides.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/crimson_qa_overrides.yaml new file mode 120000 index 000000000..2bf67af1b --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/crimson_qa_overrides.yaml @@ -0,0 +1 @@ +.qa/config/crimson_qa_overrides.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/.qa b/qa/suites/crimson-rados-experimental/seastore/basic/deploy/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/deploy/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml new file mode 100644 index 000000000..c22f08eec --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/deploy/ceph.yaml @@ -0,0 +1,18 @@ +overrides: + install: + ceph: + flavor: crimson +tasks: +- install: +- ceph: + conf: + osd: + debug monc: 20 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 + flavor: crimson diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/.qa b/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml new file mode 100644 index 000000000..713d93225 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/objectstore/seastore.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + fs: xfs + conf: + osd: + osd objectstore: seastore diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/.qa b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml new file mode 100644 index 000000000..ad8c92142 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/rados_api_tests.yaml @@ -0,0 +1,28 @@ +overrides: + ceph: + log-ignorelist: + - reached quota + - but it is still running + - overall HEALTH_ + - \(POOL_FULL\) + - \(SMALLER_PGP_NUM\) + - \(CACHE_POOL_NO_HIT_SET\) + - \(CACHE_POOL_NEAR_FULL\) + - \(POOL_APP_NOT_ENABLED\) + - \(PG_AVAILABILITY\) + - \(PG_DEGRADED\) + conf: + client: + debug ms: 1 + mon: + mon warn on pool no app: false + osd: + osd class load list: "*" + osd class default list: "*" + osd blocked scrub grace period: 3600 +tasks: +- workunit: + clients: + client.0: + - rados/test.sh + - rados/test_pool_quota.sh diff --git a/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml new file mode 100644 index 000000000..f135107c7 --- /dev/null +++ b/qa/suites/crimson-rados-experimental/seastore/basic/tasks/readwrite.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + crush_tunables: optimal + conf: + mon: + mon osd initial require min compat client: luminous + osd: + osd_discard_disconnected_ops: false +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/crimson-rados/.qa b/qa/suites/crimson-rados/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/basic/% b/qa/suites/crimson-rados/basic/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/crimson-rados/basic/% diff --git a/qa/suites/crimson-rados/basic/.qa b/qa/suites/crimson-rados/basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/basic/centos_8.stream.yaml b/qa/suites/crimson-rados/basic/centos_8.stream.yaml new file mode 120000 index 000000000..5dceec7e2 --- /dev/null +++ b/qa/suites/crimson-rados/basic/centos_8.stream.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_8.stream.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/basic/clusters/.qa b/qa/suites/crimson-rados/basic/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/basic/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/basic/clusters/fixed-2.yaml b/qa/suites/crimson-rados/basic/clusters/fixed-2.yaml new file mode 100644 index 000000000..9774de688 --- /dev/null +++ b/qa/suites/crimson-rados/basic/clusters/fixed-2.yaml @@ -0,0 +1,15 @@ +roles: +- [mon.a, osd.0, osd.1, client.0, node-exporter.a] +- [mgr.x, osd.2, osd.3, client.1, prometheus.a, node-exporter.b] +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true + global: + ms cluster mode: crc + ms service mode: crc + ms client mode: crc + ms mon service mode: crc + ms mon cluster mode: crc + ms mon client mode: crc diff --git a/qa/suites/crimson-rados/basic/crimson-supported-all-distro b/qa/suites/crimson-rados/basic/crimson-supported-all-distro new file mode 120000 index 000000000..a5b729b9e --- /dev/null +++ b/qa/suites/crimson-rados/basic/crimson-supported-all-distro @@ -0,0 +1 @@ +.qa/distros/crimson-supported-all-distro/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/basic/crimson_qa_overrides.yaml b/qa/suites/crimson-rados/basic/crimson_qa_overrides.yaml new file mode 120000 index 000000000..2bf67af1b --- /dev/null +++ b/qa/suites/crimson-rados/basic/crimson_qa_overrides.yaml @@ -0,0 +1 @@ +.qa/config/crimson_qa_overrides.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/basic/deploy/.qa b/qa/suites/crimson-rados/basic/deploy/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/basic/deploy/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/basic/deploy/ceph.yaml b/qa/suites/crimson-rados/basic/deploy/ceph.yaml new file mode 100644 index 000000000..c22f08eec --- /dev/null +++ b/qa/suites/crimson-rados/basic/deploy/ceph.yaml @@ -0,0 +1,18 @@ +overrides: + install: + ceph: + flavor: crimson +tasks: +- install: +- ceph: + conf: + osd: + debug monc: 20 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 + flavor: crimson diff --git a/qa/suites/crimson-rados/basic/deploy/cephadm.yaml.disabled b/qa/suites/crimson-rados/basic/deploy/cephadm.yaml.disabled new file mode 100644 index 000000000..0c2062240 --- /dev/null +++ b/qa/suites/crimson-rados/basic/deploy/cephadm.yaml.disabled @@ -0,0 +1,16 @@ +# no need to verify os + flavor + sha1 +verify_ceph_hash: false +tasks: +- cephadm: + conf: + mgr: + debug ms: 1 + debug mgr: 20 + debug osd: 10 +- cephadm.shell: + mon.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls diff --git a/qa/suites/crimson-rados/basic/tasks/.qa b/qa/suites/crimson-rados/basic/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/basic/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/basic/tasks/rados_api_tests.yaml b/qa/suites/crimson-rados/basic/tasks/rados_api_tests.yaml new file mode 100644 index 000000000..ad8c92142 --- /dev/null +++ b/qa/suites/crimson-rados/basic/tasks/rados_api_tests.yaml @@ -0,0 +1,28 @@ +overrides: + ceph: + log-ignorelist: + - reached quota + - but it is still running + - overall HEALTH_ + - \(POOL_FULL\) + - \(SMALLER_PGP_NUM\) + - \(CACHE_POOL_NO_HIT_SET\) + - \(CACHE_POOL_NEAR_FULL\) + - \(POOL_APP_NOT_ENABLED\) + - \(PG_AVAILABILITY\) + - \(PG_DEGRADED\) + conf: + client: + debug ms: 1 + mon: + mon warn on pool no app: false + osd: + osd class load list: "*" + osd class default list: "*" + osd blocked scrub grace period: 3600 +tasks: +- workunit: + clients: + client.0: + - rados/test.sh + - rados/test_pool_quota.sh diff --git a/qa/suites/crimson-rados/basic/tasks/rados_python.yaml b/qa/suites/crimson-rados/basic/tasks/rados_python.yaml new file mode 100644 index 000000000..aa8719d9f --- /dev/null +++ b/qa/suites/crimson-rados/basic/tasks/rados_python.yaml @@ -0,0 +1,20 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(PG_ + - \(OSD_ + - \(OBJECT_ + - \(POOL_APP_NOT_ENABLED\) + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- workunit: + timeout: 1h + clients: + client.0: + - rados/test_python.sh -m 'not (wait or tier or ec or bench or stats)' diff --git a/qa/suites/crimson-rados/basic/tasks/readwrite.yaml b/qa/suites/crimson-rados/basic/tasks/readwrite.yaml new file mode 100644 index 000000000..f135107c7 --- /dev/null +++ b/qa/suites/crimson-rados/basic/tasks/readwrite.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + crush_tunables: optimal + conf: + mon: + mon osd initial require min compat client: luminous + osd: + osd_discard_disconnected_ops: false +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/crimson-rados/rbd/% b/qa/suites/crimson-rados/rbd/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/crimson-rados/rbd/% diff --git a/qa/suites/crimson-rados/rbd/.qa b/qa/suites/crimson-rados/rbd/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/rbd/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/rbd/centos_8.stream.yaml b/qa/suites/crimson-rados/rbd/centos_8.stream.yaml new file mode 120000 index 000000000..5dceec7e2 --- /dev/null +++ b/qa/suites/crimson-rados/rbd/centos_8.stream.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_8.stream.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/rbd/clusters/.qa b/qa/suites/crimson-rados/rbd/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/rbd/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/rbd/clusters/fixed-1.yaml b/qa/suites/crimson-rados/rbd/clusters/fixed-1.yaml new file mode 100644 index 000000000..d204f3eb2 --- /dev/null +++ b/qa/suites/crimson-rados/rbd/clusters/fixed-1.yaml @@ -0,0 +1,21 @@ +overrides: + ceph-deploy: + conf: + global: + osd pool default size: 2 + osd crush chooseleaf type: 0 + osd pool default pg num: 128 + osd pool default pgp num: 128 + ceph: + conf: + osd: + osd shutdown pgref assert: true + global: + ms cluster mode: crc + ms service mode: crc + ms client mode: crc + ms mon service mode: crc + ms mon cluster mode: crc + ms mon client mode: crc +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] diff --git a/qa/suites/crimson-rados/rbd/crimson-supported-all-distro b/qa/suites/crimson-rados/rbd/crimson-supported-all-distro new file mode 120000 index 000000000..a5b729b9e --- /dev/null +++ b/qa/suites/crimson-rados/rbd/crimson-supported-all-distro @@ -0,0 +1 @@ +.qa/distros/crimson-supported-all-distro/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/rbd/crimson_qa_overrides.yaml b/qa/suites/crimson-rados/rbd/crimson_qa_overrides.yaml new file mode 120000 index 000000000..2bf67af1b --- /dev/null +++ b/qa/suites/crimson-rados/rbd/crimson_qa_overrides.yaml @@ -0,0 +1 @@ +.qa/config/crimson_qa_overrides.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/rbd/deploy/.qa b/qa/suites/crimson-rados/rbd/deploy/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/rbd/deploy/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/rbd/deploy/ceph.yaml b/qa/suites/crimson-rados/rbd/deploy/ceph.yaml new file mode 100644 index 000000000..c22f08eec --- /dev/null +++ b/qa/suites/crimson-rados/rbd/deploy/ceph.yaml @@ -0,0 +1,18 @@ +overrides: + install: + ceph: + flavor: crimson +tasks: +- install: +- ceph: + conf: + osd: + debug monc: 20 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 + flavor: crimson diff --git a/qa/suites/crimson-rados/rbd/deploy/cephadm.yaml.disabled b/qa/suites/crimson-rados/rbd/deploy/cephadm.yaml.disabled new file mode 100644 index 000000000..0c2062240 --- /dev/null +++ b/qa/suites/crimson-rados/rbd/deploy/cephadm.yaml.disabled @@ -0,0 +1,16 @@ +# no need to verify os + flavor + sha1 +verify_ceph_hash: false +tasks: +- cephadm: + conf: + mgr: + debug ms: 1 + debug mgr: 20 + debug osd: 10 +- cephadm.shell: + mon.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls diff --git a/qa/suites/crimson-rados/rbd/tasks/.qa b/qa/suites/crimson-rados/rbd/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/rbd/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests.yaml new file mode 100644 index 000000000..b0d019bbe --- /dev/null +++ b/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - \(POOL_APP_NOT_ENABLED\) + - is full \(reached quota + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/crimson/test_crimson_librbd.sh + env: + RBD_FEATURES: "61" + diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests_old_format.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests_old_format.yaml new file mode 100644 index 000000000..a6b85f7b3 --- /dev/null +++ b/qa/suites/crimson-rados/rbd/tasks/rbd_api_tests_old_format.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - \(POOL_APP_NOT_ENABLED\) + - is full \(reached quota + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/crimson/test_crimson_librbd.sh diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_cls_tests.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_cls_tests.yaml new file mode 100644 index 000000000..51b35e2e1 --- /dev/null +++ b/qa/suites/crimson-rados/rbd/tasks/rbd_cls_tests.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + clients: + client.0: + - cls/test_cls_rbd.sh + - cls/test_cls_lock.sh + - cls/test_cls_journal.sh diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_lock_and_fence.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_lock_and_fence.yaml new file mode 100644 index 000000000..d2c80ad65 --- /dev/null +++ b/qa/suites/crimson-rados/rbd/tasks/rbd_lock_and_fence.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_lock_fence.sh diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests.yaml new file mode 100644 index 000000000..fd65589e0 --- /dev/null +++ b/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests.yaml @@ -0,0 +1,16 @@ +overrides: + ceph: + log-ignorelist: + - \(SLOW_OPS\) + - slow request + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh -m 'not skip_if_crimson' + env: + RBD_FEATURES: "61" diff --git a/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests_old_format.yaml b/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests_old_format.yaml new file mode 100644 index 000000000..95140de6d --- /dev/null +++ b/qa/suites/crimson-rados/rbd/tasks/rbd_python_api_tests_old_format.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - \(SLOW_OPS\) + - slow request + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh -m 'not skip_if_crimson' diff --git a/qa/suites/crimson-rados/singleton/% b/qa/suites/crimson-rados/singleton/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/crimson-rados/singleton/% diff --git a/qa/suites/crimson-rados/singleton/.qa b/qa/suites/crimson-rados/singleton/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/crimson-rados/singleton/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/crimson-rados/singleton/all/.qa b/qa/suites/crimson-rados/singleton/all/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/crimson-rados/singleton/all/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/crimson-rados/singleton/all/osd-backfill.yaml b/qa/suites/crimson-rados/singleton/all/osd-backfill.yaml new file mode 100644 index 000000000..f475d5dc3 --- /dev/null +++ b/qa/suites/crimson-rados/singleton/all/osd-backfill.yaml @@ -0,0 +1,29 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: + flavor: crimson +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OBJECT_ + conf: + osd: + osd min pg log entries: 5 +- osd_backfill: diff --git a/qa/suites/crimson-rados/singleton/crimson-supported-all-distro b/qa/suites/crimson-rados/singleton/crimson-supported-all-distro new file mode 120000 index 000000000..a5b729b9e --- /dev/null +++ b/qa/suites/crimson-rados/singleton/crimson-supported-all-distro @@ -0,0 +1 @@ +.qa/distros/crimson-supported-all-distro/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/singleton/crimson_qa_overrides.yaml b/qa/suites/crimson-rados/singleton/crimson_qa_overrides.yaml new file mode 120000 index 000000000..2bf67af1b --- /dev/null +++ b/qa/suites/crimson-rados/singleton/crimson_qa_overrides.yaml @@ -0,0 +1 @@ +.qa/config/crimson_qa_overrides.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/singleton/objectstore b/qa/suites/crimson-rados/singleton/objectstore new file mode 120000 index 000000000..dbccf5ad9 --- /dev/null +++ b/qa/suites/crimson-rados/singleton/objectstore @@ -0,0 +1 @@ +../thrash/objectstore
\ No newline at end of file diff --git a/qa/suites/crimson-rados/singleton/rados.yaml b/qa/suites/crimson-rados/singleton/rados.yaml new file mode 120000 index 000000000..e95c99ef2 --- /dev/null +++ b/qa/suites/crimson-rados/singleton/rados.yaml @@ -0,0 +1 @@ +./.qa/suites/rados/singleton/rados.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/% b/qa/suites/crimson-rados/thrash/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/crimson-rados/thrash/% diff --git a/qa/suites/crimson-rados/thrash/.qa b/qa/suites/crimson-rados/thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/.qa b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled new file mode 120000 index 000000000..5393a7554 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml.disabled @@ -0,0 +1 @@ +.qa/overrides/2-size-2-min-size.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml new file mode 120000 index 000000000..5ff70eadf --- /dev/null +++ b/qa/suites/crimson-rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml @@ -0,0 +1 @@ +.qa/overrides/3-size-2-min-size.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/1-pg-log-overrides/.qa b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/1-pg-log-overrides/normal_pg_log.yaml b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/normal_pg_log.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/normal_pg_log.yaml diff --git a/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml.disabled b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml.disabled new file mode 120000 index 000000000..abd86d7d9 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/1-pg-log-overrides/short_pg_log.yaml.disabled @@ -0,0 +1 @@ +.qa/overrides/short_pg_log.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/$ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/$ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/$ diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/.qa b/qa/suites/crimson-rados/thrash/2-recovery-overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/default.yaml b/qa/suites/crimson-rados/thrash/2-recovery-overrides/default.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/default.yaml diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled new file mode 120000 index 000000000..47afd7020 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-active-recovery.yaml.disabled @@ -0,0 +1 @@ +.qa/overrides/more-active-recovery.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled new file mode 100644 index 000000000..0bbc72db7 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml.disabled @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_async_recovery_min_cost: 1 + osd_object_clean_region_max_num_intervals: 1000 diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled new file mode 100644 index 000000000..4aed086bc --- /dev/null +++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-async-recovery.yaml.disabled @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + osd_async_recovery_min_cost: 1 diff --git a/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled new file mode 100644 index 000000000..88f15f2f6 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/2-recovery-overrides/more-partial-recovery.yaml.disabled @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + osd_object_clean_region_max_num_intervals: 1000 diff --git a/qa/suites/crimson-rados/thrash/centos_8.stream.yaml b/qa/suites/crimson-rados/thrash/centos_8.stream.yaml new file mode 120000 index 000000000..5dceec7e2 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/centos_8.stream.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_8.stream.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/clusters/+ b/qa/suites/crimson-rados/thrash/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/crimson-rados/thrash/clusters/+ diff --git a/qa/suites/crimson-rados/thrash/clusters/.qa b/qa/suites/crimson-rados/thrash/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/clusters/fixed-2.yaml b/qa/suites/crimson-rados/thrash/clusters/fixed-2.yaml new file mode 100644 index 000000000..9774de688 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/clusters/fixed-2.yaml @@ -0,0 +1,15 @@ +roles: +- [mon.a, osd.0, osd.1, client.0, node-exporter.a] +- [mgr.x, osd.2, osd.3, client.1, prometheus.a, node-exporter.b] +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true + global: + ms cluster mode: crc + ms service mode: crc + ms client mode: crc + ms mon service mode: crc + ms mon cluster mode: crc + ms mon client mode: crc diff --git a/qa/suites/crimson-rados/thrash/clusters/openstack.yaml.disabled b/qa/suites/crimson-rados/thrash/clusters/openstack.yaml.disabled new file mode 100644 index 000000000..e559d9126 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/clusters/openstack.yaml.disabled @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/suites/crimson-rados/thrash/crimson-supported-all-distro b/qa/suites/crimson-rados/thrash/crimson-supported-all-distro new file mode 120000 index 000000000..a5b729b9e --- /dev/null +++ b/qa/suites/crimson-rados/thrash/crimson-supported-all-distro @@ -0,0 +1 @@ +.qa/distros/crimson-supported-all-distro/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/crimson_qa_overrides.yaml b/qa/suites/crimson-rados/thrash/crimson_qa_overrides.yaml new file mode 120000 index 000000000..2bf67af1b --- /dev/null +++ b/qa/suites/crimson-rados/thrash/crimson_qa_overrides.yaml @@ -0,0 +1 @@ +.qa/config/crimson_qa_overrides.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/deploy/.qa b/qa/suites/crimson-rados/thrash/deploy/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/deploy/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/deploy/ceph.yaml b/qa/suites/crimson-rados/thrash/deploy/ceph.yaml new file mode 100644 index 000000000..ecad09cfe --- /dev/null +++ b/qa/suites/crimson-rados/thrash/deploy/ceph.yaml @@ -0,0 +1,11 @@ +overrides: + install: + ceph: + flavor: crimson +tasks: +- install: +- ceph: + conf: + osd: + debug monc: 20 + flavor: crimson diff --git a/qa/suites/crimson-rados/thrash/deploy/cephadm.yaml.disabled b/qa/suites/crimson-rados/thrash/deploy/cephadm.yaml.disabled new file mode 100644 index 000000000..0c2062240 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/deploy/cephadm.yaml.disabled @@ -0,0 +1,16 @@ +# no need to verify os + flavor + sha1 +verify_ceph_hash: false +tasks: +- cephadm: + conf: + mgr: + debug ms: 1 + debug mgr: 20 + debug osd: 10 +- cephadm.shell: + mon.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls diff --git a/qa/suites/crimson-rados/thrash/objectstore/.qa b/qa/suites/crimson-rados/thrash/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml b/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml new file mode 100644 index 000000000..99c532f11 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/objectstore/bluestore.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + bluestore compression algorithm: snappy + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + bluestore rocksdb cf: false + log to stderr: true + err to stderr: true + log flush on exit: true + log to file: false diff --git a/qa/suites/crimson-rados/thrash/thrashers/.qa b/qa/suites/crimson-rados/thrash/thrashers/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/thrashers/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/thrashers/default.yaml b/qa/suites/crimson-rados/thrash/thrashers/default.yaml new file mode 100644 index 000000000..5ffbcbd7f --- /dev/null +++ b/qa/suites/crimson-rados/thrash/thrashers/default.yaml @@ -0,0 +1,35 @@ +overrides: + ceph: + wait-for-scrub: false + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 + osd delete sleep: 1 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 2400 + dump_ops_enable: false + sighup_delay: 0 + min_in: 3 + noscrub_toggle_delay: 0 + chance_down: 0 + chance_thrash_pg_upmap: 0 + reweight_osd: 0 + thrash_primary_affinity: false + ceph_objectstore_tool: false + chance_inject_pause_short: 0 + chance_thrash_cluster_full: 0 diff --git a/qa/suites/crimson-rados/thrash/thrashosds-health.yaml b/qa/suites/crimson-rados/thrash/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/crimson-rados/thrash/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/workloads/.qa b/qa/suites/crimson-rados/thrash/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/crimson-rados/thrash/workloads/admin_socket_objecter_requests.yaml b/qa/suites/crimson-rados/thrash/workloads/admin_socket_objecter_requests.yaml new file mode 100644 index 000000000..8c9764ade --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/admin_socket_objecter_requests.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + client.0: + admin socket: /var/run/ceph/ceph-$name.asok +tasks: +- radosbench: + clients: [client.0] + time: 150 +- admin_socket: + client.0: + objecter_requests: + test: "http://git.ceph.com/?p={repo};a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}" diff --git a/qa/suites/crimson-rados/thrash/workloads/pool-snaps-few-objects.yaml b/qa/suites/crimson-rados/thrash/workloads/pool-snaps-few-objects.yaml new file mode 100644 index 000000000..fff5cda6e --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/pool-snaps-few-objects.yaml @@ -0,0 +1,20 @@ +override: + conf: + osd: + osd deep scrub update digest min age: 0 +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + pool_snaps: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 0 + # TODO: CEPH_OSD_OP_COPY_FROM + copy_from: 0 + diff --git a/qa/suites/crimson-rados/thrash/workloads/radosbench-high-concurrency.yaml b/qa/suites/crimson-rados/thrash/workloads/radosbench-high-concurrency.yaml new file mode 100644 index 000000000..902c4b56a --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/radosbench-high-concurrency.yaml @@ -0,0 +1,49 @@ +overrides: + ceph: + conf: + client.0: + debug ms: 1 + debug objecter: 20 + debug rados: 20 +tasks: +- full_sequential: + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 diff --git a/qa/suites/crimson-rados/thrash/workloads/radosbench.yaml b/qa/suites/crimson-rados/thrash/workloads/radosbench.yaml new file mode 100644 index 000000000..071f55e39 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/radosbench.yaml @@ -0,0 +1,24 @@ +overrides: + ceph: + conf: + client.0: + debug ms: 1 + debug objecter: 20 + debug rados: 20 +tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 diff --git a/qa/suites/crimson-rados/thrash/workloads/small-objects-balanced.yaml b/qa/suites/crimson-rados/thrash/workloads/small-objects-balanced.yaml new file mode 100644 index 000000000..0c50dc136 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/small-objects-balanced.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + crush_tunables: jewel +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + balance_reads: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 0 + # TODO: CEPH_OSD_OP_COPY_FROM + copy_from: 0 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/crimson-rados/thrash/workloads/small-objects-localized.yaml b/qa/suites/crimson-rados/thrash/workloads/small-objects-localized.yaml new file mode 100644 index 000000000..df5c114f1 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/small-objects-localized.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + crush_tunables: jewel +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + localize_reads: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 0 + # TODO: CEPH_OSD_OP_COPY_FROM + copy_from: 0 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/crimson-rados/thrash/workloads/small-objects.yaml b/qa/suites/crimson-rados/thrash/workloads/small-objects.yaml new file mode 100644 index 000000000..32928c303 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/small-objects.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + crush_tunables: jewel +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 0 + # TODO: CEPH_OSD_OP_COPY_FROM + copy_from: 0 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-balanced.yaml b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-balanced.yaml new file mode 100644 index 000000000..1161c3cc2 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-balanced.yaml @@ -0,0 +1,15 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + balance_reads: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 0 + # TODO: CEPH_OSD_OP_COPY_FROM + copy_from: 0 diff --git a/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-localized.yaml b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-localized.yaml new file mode 100644 index 000000000..80af0def0 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects-localized.yaml @@ -0,0 +1,15 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + localize_reads: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 0 + # TODO: CEPH_OSD_OP_COPY_FROM + copy_from: 0 diff --git a/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects.yaml b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects.yaml new file mode 100644 index 000000000..0694ffcd0 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/snaps-few-objects.yaml @@ -0,0 +1,14 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 0 + # TODO: CEPH_OSD_OP_COPY_FROM + copy_from: 0 diff --git a/qa/suites/crimson-rados/thrash/workloads/write_fadvise_dontneed.yaml b/qa/suites/crimson-rados/thrash/workloads/write_fadvise_dontneed.yaml new file mode 100644 index 000000000..606dcae69 --- /dev/null +++ b/qa/suites/crimson-rados/thrash/workloads/write_fadvise_dontneed.yaml @@ -0,0 +1,8 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + write_fadvise_dontneed: true + op_weights: + write: 100 diff --git a/qa/suites/dummy/% b/qa/suites/dummy/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/dummy/% diff --git a/qa/suites/dummy/.qa b/qa/suites/dummy/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/dummy/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/dummy/all/.qa b/qa/suites/dummy/all/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/dummy/all/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/dummy/all/nop.yaml b/qa/suites/dummy/all/nop.yaml new file mode 100644 index 000000000..0f00ffc88 --- /dev/null +++ b/qa/suites/dummy/all/nop.yaml @@ -0,0 +1,6 @@ +roles: + - [mon.a, mgr.x, mds.a, osd.0, osd.1, client.0] + +tasks: + - nop: + diff --git a/qa/suites/experimental/.qa b/qa/suites/experimental/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/experimental/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/experimental/multimds/% b/qa/suites/experimental/multimds/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/experimental/multimds/% diff --git a/qa/suites/experimental/multimds/.qa b/qa/suites/experimental/multimds/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/experimental/multimds/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/experimental/multimds/clusters/.qa b/qa/suites/experimental/multimds/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/experimental/multimds/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/experimental/multimds/clusters/7-multimds.yaml b/qa/suites/experimental/multimds/clusters/7-multimds.yaml new file mode 100644 index 000000000..7b2763f19 --- /dev/null +++ b/qa/suites/experimental/multimds/clusters/7-multimds.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, mgr.x, mds.a, mds.d] +- [mon.b, mgr.y, mds.b, mds.e] +- [mon.c, mgr.z, mds.c, mds.f] +- [osd.0] +- [osd.1] +- [osd.2] +- [client.0] diff --git a/qa/suites/experimental/multimds/tasks/.qa b/qa/suites/experimental/multimds/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/experimental/multimds/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml b/qa/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml new file mode 100644 index 000000000..bee01a835 --- /dev/null +++ b/qa/suites/experimental/multimds/tasks/fsstress_thrash_subtrees.yaml @@ -0,0 +1,15 @@ +tasks: +- install: +- ceph: + conf: + mds: + mds thrash exports: 1 + mds debug subtrees: 1 + mds debug scatterstat: 1 + mds verify scatter: 1 +- ceph-fuse: +- workunit: + clients: + client.0: + - suites/fsstress.sh + diff --git a/qa/suites/fs/.qa b/qa/suites/fs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/32bits/% b/qa/suites/fs/32bits/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/32bits/% diff --git a/qa/suites/fs/32bits/.qa b/qa/suites/fs/32bits/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/32bits/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/32bits/begin b/qa/suites/fs/32bits/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/32bits/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/32bits/clusters/.qa b/qa/suites/fs/32bits/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/32bits/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/32bits/clusters/fixed-2-ucephfs.yaml b/qa/suites/fs/32bits/clusters/fixed-2-ucephfs.yaml new file mode 120000 index 000000000..b0c41a89a --- /dev/null +++ b/qa/suites/fs/32bits/clusters/fixed-2-ucephfs.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/fixed-2-ucephfs.yaml
\ No newline at end of file diff --git a/qa/suites/fs/32bits/conf b/qa/suites/fs/32bits/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/32bits/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/32bits/distro b/qa/suites/fs/32bits/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/32bits/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/32bits/mount/.qa b/qa/suites/fs/32bits/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/32bits/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/32bits/mount/fuse.yaml b/qa/suites/fs/32bits/mount/fuse.yaml new file mode 120000 index 000000000..0e55da9fb --- /dev/null +++ b/qa/suites/fs/32bits/mount/fuse.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/fuse.yaml
\ No newline at end of file diff --git a/qa/suites/fs/32bits/objectstore-ec b/qa/suites/fs/32bits/objectstore-ec new file mode 120000 index 000000000..affe29493 --- /dev/null +++ b/qa/suites/fs/32bits/objectstore-ec @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec
\ No newline at end of file diff --git a/qa/suites/fs/32bits/overrides/+ b/qa/suites/fs/32bits/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/32bits/overrides/+ diff --git a/qa/suites/fs/32bits/overrides/.qa b/qa/suites/fs/32bits/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/32bits/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/32bits/overrides/faked-ino.yaml b/qa/suites/fs/32bits/overrides/faked-ino.yaml new file mode 100644 index 000000000..102df684d --- /dev/null +++ b/qa/suites/fs/32bits/overrides/faked-ino.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + client use faked inos: true diff --git a/qa/suites/fs/32bits/overrides/ignorelist_health.yaml b/qa/suites/fs/32bits/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/32bits/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/32bits/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/32bits/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/32bits/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/32bits/tasks/.qa b/qa/suites/fs/32bits/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/32bits/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_fsstress.yaml new file mode 120000 index 000000000..c2e859fff --- /dev/null +++ b/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml
\ No newline at end of file diff --git a/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_pjd.yaml b/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_pjd.yaml new file mode 100644 index 000000000..f7784383b --- /dev/null +++ b/qa/suites/fs/32bits/tasks/cfuse_workunit_suites_pjd.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + client: + fuse set user groups: true +tasks: +- workunit: + timeout: 6h + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/fs/bugs/.qa b/qa/suites/fs/bugs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/bugs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/% b/qa/suites/fs/bugs/client_trim_caps/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/% diff --git a/qa/suites/fs/bugs/client_trim_caps/.qa b/qa/suites/fs/bugs/client_trim_caps/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/begin b/qa/suites/fs/bugs/client_trim_caps/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/centos_latest.yaml b/qa/suites/fs/bugs/client_trim_caps/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/clusters/.qa b/qa/suites/fs/bugs/client_trim_caps/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/clusters/small-cluster.yaml b/qa/suites/fs/bugs/client_trim_caps/clusters/small-cluster.yaml new file mode 100644 index 000000000..5cd97a3ae --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/clusters/small-cluster.yaml @@ -0,0 +1,11 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, mds.a, mds.b, client.0] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +- machine: + disk: 100 # GB +log-rotate: + ceph-mds: 10G + ceph-osd: 10G diff --git a/qa/suites/fs/bugs/client_trim_caps/conf b/qa/suites/fs/bugs/client_trim_caps/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/objectstore/.qa b/qa/suites/fs/bugs/client_trim_caps/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/overrides/+ b/qa/suites/fs/bugs/client_trim_caps/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/overrides/+ diff --git a/qa/suites/fs/bugs/client_trim_caps/overrides/.qa b/qa/suites/fs/bugs/client_trim_caps/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_health.yaml b/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/overrides/no_client_pidfile.yaml b/qa/suites/fs/bugs/client_trim_caps/overrides/no_client_pidfile.yaml new file mode 120000 index 000000000..8888f3327 --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/overrides/no_client_pidfile.yaml @@ -0,0 +1 @@ +.qa/overrides/no_client_pidfile.yaml
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/tasks/.qa b/qa/suites/fs/bugs/client_trim_caps/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i24137.yaml b/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i24137.yaml new file mode 100644 index 000000000..098f7d053 --- /dev/null +++ b/qa/suites/fs/bugs/client_trim_caps/tasks/trim-i24137.yaml @@ -0,0 +1,14 @@ +# Note this test is unlikely to exercise the code as expected in the future: +# "It's too tricky to arrange inodes in session->caps. we don't know if it +# still works in the future." -Zheng + +tasks: +- exec: + mon.a: + - ceph config set mds mds_min_caps_per_client 1 +- background_exec: + mon.a: + - "sleep 30 && ceph config set mds mds_max_caps_per_client 1" +- exec: + client.0: + - ceph_test_trim_caps diff --git a/qa/suites/fs/cephadm/.qa b/qa/suites/fs/cephadm/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/cephadm/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/cephadm/multivolume/% b/qa/suites/fs/cephadm/multivolume/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/cephadm/multivolume/% diff --git a/qa/suites/fs/cephadm/multivolume/.qa b/qa/suites/fs/cephadm/multivolume/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/cephadm/multivolume/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/cephadm/multivolume/0-start.yaml b/qa/suites/fs/cephadm/multivolume/0-start.yaml new file mode 100644 index 000000000..13dc863d2 --- /dev/null +++ b/qa/suites/fs/cephadm/multivolume/0-start.yaml @@ -0,0 +1,39 @@ +roles: +- - host.a + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 +- - host.b + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - client.1 +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true +tasks: +- install: +- cephadm: + roleless: true +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls +- cephadm.shell: + host.a: + - ceph fs volume create foo + - ceph fs volume create bar +- fs.ready: + timeout: 300 diff --git a/qa/suites/fs/cephadm/multivolume/1-mount.yaml b/qa/suites/fs/cephadm/multivolume/1-mount.yaml new file mode 100644 index 000000000..713621972 --- /dev/null +++ b/qa/suites/fs/cephadm/multivolume/1-mount.yaml @@ -0,0 +1,7 @@ +tasks: + - ceph-fuse: + client.0: + cephfs_name: foo + - ceph-fuse: + client.1: + cephfs_name: bar diff --git a/qa/suites/fs/cephadm/multivolume/2-workload/.qa b/qa/suites/fs/cephadm/multivolume/2-workload/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/cephadm/multivolume/2-workload/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/cephadm/multivolume/2-workload/dbench.yaml b/qa/suites/fs/cephadm/multivolume/2-workload/dbench.yaml new file mode 120000 index 000000000..9fb8adcea --- /dev/null +++ b/qa/suites/fs/cephadm/multivolume/2-workload/dbench.yaml @@ -0,0 +1 @@ +.qa/suites/fs/workload/tasks/5-workunit/suites/dbench.yaml
\ No newline at end of file diff --git a/qa/suites/fs/cephadm/multivolume/distro/.qa b/qa/suites/fs/cephadm/multivolume/distro/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/cephadm/multivolume/distro/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/cephadm/multivolume/distro/single-container-host.yaml b/qa/suites/fs/cephadm/multivolume/distro/single-container-host.yaml new file mode 120000 index 000000000..7406e749c --- /dev/null +++ b/qa/suites/fs/cephadm/multivolume/distro/single-container-host.yaml @@ -0,0 +1 @@ +.qa/distros/single-container-host.yaml
\ No newline at end of file diff --git a/qa/suites/fs/cephadm/renamevolume/% b/qa/suites/fs/cephadm/renamevolume/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/cephadm/renamevolume/% diff --git a/qa/suites/fs/cephadm/renamevolume/.qa b/qa/suites/fs/cephadm/renamevolume/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/cephadm/renamevolume/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/cephadm/renamevolume/0-start.yaml b/qa/suites/fs/cephadm/renamevolume/0-start.yaml new file mode 100644 index 000000000..727837580 --- /dev/null +++ b/qa/suites/fs/cephadm/renamevolume/0-start.yaml @@ -0,0 +1,38 @@ +roles: +- - host.a + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 +- - host.b + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - client.1 +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true +tasks: +- install: +- cephadm: + roleless: true +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls +- cephadm.shell: + host.a: + - ceph fs volume create foo +- fs.ready: + timeout: 300 diff --git a/qa/suites/fs/cephadm/renamevolume/1-rename.yaml b/qa/suites/fs/cephadm/renamevolume/1-rename.yaml new file mode 100644 index 000000000..7f9bc8906 --- /dev/null +++ b/qa/suites/fs/cephadm/renamevolume/1-rename.yaml @@ -0,0 +1,11 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs volume rename foo bar --yes-i-really-mean-it +- fs.ready: + timeout: 300 +- cephadm.shell: + host.a: + - | + set -ex + ceph orch ls mds --format=json | jq ".[] | .service_name" | grep "mds.bar" diff --git a/qa/suites/fs/cephadm/renamevolume/distro/.qa b/qa/suites/fs/cephadm/renamevolume/distro/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/cephadm/renamevolume/distro/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/cephadm/renamevolume/distro/single-container-host.yaml b/qa/suites/fs/cephadm/renamevolume/distro/single-container-host.yaml new file mode 120000 index 000000000..7406e749c --- /dev/null +++ b/qa/suites/fs/cephadm/renamevolume/distro/single-container-host.yaml @@ -0,0 +1 @@ +.qa/distros/single-container-host.yaml
\ No newline at end of file diff --git a/qa/suites/fs/cephadm/renamevolume/overrides/.qa b/qa/suites/fs/cephadm/renamevolume/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/cephadm/renamevolume/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/cephadm/renamevolume/overrides/ignorelist_health.yaml b/qa/suites/fs/cephadm/renamevolume/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/cephadm/renamevolume/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/% b/qa/suites/fs/fscrypt/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/fscrypt/% diff --git a/qa/suites/fs/fscrypt/.qa b/qa/suites/fs/fscrypt/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/fscrypt/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/begin b/qa/suites/fs/fscrypt/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/fscrypt/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/bluestore-bitmap.yaml b/qa/suites/fs/fscrypt/bluestore-bitmap.yaml new file mode 120000 index 000000000..fb603bc9a --- /dev/null +++ b/qa/suites/fs/fscrypt/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/clusters/.qa b/qa/suites/fs/fscrypt/clusters/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/fs/fscrypt/clusters/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/clusters/1-mds-1-client.yaml b/qa/suites/fs/fscrypt/clusters/1-mds-1-client.yaml new file mode 120000 index 000000000..64bdb79fe --- /dev/null +++ b/qa/suites/fs/fscrypt/clusters/1-mds-1-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-1-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/conf b/qa/suites/fs/fscrypt/conf new file mode 120000 index 000000000..6d4712984 --- /dev/null +++ b/qa/suites/fs/fscrypt/conf @@ -0,0 +1 @@ +.qa/cephfs/conf/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/distro/$ b/qa/suites/fs/fscrypt/distro/$ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/fscrypt/distro/$ diff --git a/qa/suites/fs/fscrypt/distro/.qa b/qa/suites/fs/fscrypt/distro/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/fscrypt/distro/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/distro/centos_latest.yaml b/qa/suites/fs/fscrypt/distro/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/fs/fscrypt/distro/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/distro/ubuntu_latest.yaml b/qa/suites/fs/fscrypt/distro/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/fs/fscrypt/distro/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/mount/.qa b/qa/suites/fs/fscrypt/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/fscrypt/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/mount/kclient/% b/qa/suites/fs/fscrypt/mount/kclient/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/fscrypt/mount/kclient/% diff --git a/qa/suites/fs/fscrypt/mount/kclient/.qa b/qa/suites/fs/fscrypt/mount/kclient/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/fscrypt/mount/kclient/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/.qa b/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/v1.yaml b/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/v1.yaml new file mode 120000 index 000000000..3315775c1 --- /dev/null +++ b/qa/suites/fs/fscrypt/mount/kclient/mount-syntax/v1.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/kclient/mount-syntax/v1.yaml
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/mount/kclient/mount.yaml b/qa/suites/fs/fscrypt/mount/kclient/mount.yaml new file mode 120000 index 000000000..9967f23e2 --- /dev/null +++ b/qa/suites/fs/fscrypt/mount/kclient/mount.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/kclient/mount.yaml
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/mount/kclient/overrides/% b/qa/suites/fs/fscrypt/mount/kclient/overrides/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/fscrypt/mount/kclient/overrides/% diff --git a/qa/suites/fs/fscrypt/mount/kclient/overrides/.qa b/qa/suites/fs/fscrypt/mount/kclient/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/fscrypt/mount/kclient/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/.qa b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/.qa b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/k-testing.yaml b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/k-testing.yaml new file mode 120000 index 000000000..bec80be29 --- /dev/null +++ b/qa/suites/fs/fscrypt/mount/kclient/overrides/distro/testing/k-testing.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/overrides/+ b/qa/suites/fs/fscrypt/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/fscrypt/overrides/+ diff --git a/qa/suites/fs/fscrypt/overrides/.qa b/qa/suites/fs/fscrypt/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/fscrypt/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/overrides/ignorelist_health.yaml b/qa/suites/fs/fscrypt/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/fscrypt/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/overrides/ignorelist_health_more.yaml b/qa/suites/fs/fscrypt/overrides/ignorelist_health_more.yaml new file mode 100644 index 000000000..948352527 --- /dev/null +++ b/qa/suites/fs/fscrypt/overrides/ignorelist_health_more.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + log-ignorelist: + - Reduced data availability + - Degraded data redundancy diff --git a/qa/suites/fs/fscrypt/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/fscrypt/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/fscrypt/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/overrides/pg-warn.yaml b/qa/suites/fs/fscrypt/overrides/pg-warn.yaml new file mode 100644 index 000000000..4ae54a40d --- /dev/null +++ b/qa/suites/fs/fscrypt/overrides/pg-warn.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + mon pg warn min per osd: 0 diff --git a/qa/suites/fs/fscrypt/tasks/.qa b/qa/suites/fs/fscrypt/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/fscrypt/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/fscrypt/tasks/fscrypt-common.yaml b/qa/suites/fs/fscrypt/tasks/fscrypt-common.yaml new file mode 100644 index 000000000..5cb34d981 --- /dev/null +++ b/qa/suites/fs/fscrypt/tasks/fscrypt-common.yaml @@ -0,0 +1,5 @@ +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_fscrypt diff --git a/qa/suites/fs/fscrypt/tasks/fscrypt-dbench.yaml b/qa/suites/fs/fscrypt/tasks/fscrypt-dbench.yaml new file mode 100644 index 000000000..324bc64c3 --- /dev/null +++ b/qa/suites/fs/fscrypt/tasks/fscrypt-dbench.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + timeout: 6h + clients: + client.0: + - fs/fscrypt.sh none dbench + - fs/fscrypt.sh unlocked dbench diff --git a/qa/suites/fs/fscrypt/tasks/fscrypt-ffsb.yaml b/qa/suites/fs/fscrypt/tasks/fscrypt-ffsb.yaml new file mode 100644 index 000000000..0c70fe963 --- /dev/null +++ b/qa/suites/fs/fscrypt/tasks/fscrypt-ffsb.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + timeout: 6h + clients: + client.0: + - fs/fscrypt.sh none ffsb + - fs/fscrypt.sh unlocked ffsb diff --git a/qa/suites/fs/fscrypt/tasks/fscrypt-iozone.yaml b/qa/suites/fs/fscrypt/tasks/fscrypt-iozone.yaml new file mode 100644 index 000000000..5e9bb5aa3 --- /dev/null +++ b/qa/suites/fs/fscrypt/tasks/fscrypt-iozone.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + timeout: 6h + clients: + client.0: + - fs/fscrypt.sh none iozone + - fs/fscrypt.sh unlocked iozone diff --git a/qa/suites/fs/fscrypt/tasks/fscrypt-pjd.yaml b/qa/suites/fs/fscrypt/tasks/fscrypt-pjd.yaml new file mode 100644 index 000000000..6d8f709bd --- /dev/null +++ b/qa/suites/fs/fscrypt/tasks/fscrypt-pjd.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + timeout: 6h + clients: + client.0: + - fs/fscrypt.sh none pjd + - fs/fscrypt.sh unlocked pjd diff --git a/qa/suites/fs/full/% b/qa/suites/fs/full/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/full/% diff --git a/qa/suites/fs/full/.qa b/qa/suites/fs/full/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/full/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/full/begin b/qa/suites/fs/full/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/full/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/full/clusters/.qa b/qa/suites/fs/full/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/full/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml b/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml new file mode 120000 index 000000000..517b76547 --- /dev/null +++ b/qa/suites/fs/full/clusters/1-node-1-mds-1-osd.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-node-1-mds-1-osd.yaml
\ No newline at end of file diff --git a/qa/suites/fs/full/conf b/qa/suites/fs/full/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/full/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/full/distro b/qa/suites/fs/full/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/full/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/full/mount/.qa b/qa/suites/fs/full/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/full/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/full/mount/fuse.yaml b/qa/suites/fs/full/mount/fuse.yaml new file mode 100644 index 000000000..8338cc493 --- /dev/null +++ b/qa/suites/fs/full/mount/fuse.yaml @@ -0,0 +1,2 @@ +tasks: + - ceph-fuse: diff --git a/qa/suites/fs/full/objectstore/.qa b/qa/suites/fs/full/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/full/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/fs/full/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/full/overrides.yaml b/qa/suites/fs/full/overrides.yaml new file mode 100644 index 000000000..921528d66 --- /dev/null +++ b/qa/suites/fs/full/overrides.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + conf: + mgr: + debug client: 20 + log-ignorelist: + - OSD full dropping all updates + - OSD near full + - pausewr flag + - failsafe engaged, dropping updates + - failsafe disengaged, no longer dropping + - is full \(reached quota + - POOL_FULL + - POOL_NEARFULL + - POOL_BACKFILLFULL + - PG_DEGRADED + - OSD_OUT_OF_ORDER_FULL + - OSD_NEARFULL + - OSD_FULL diff --git a/qa/suites/fs/full/overrides/+ b/qa/suites/fs/full/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/full/overrides/+ diff --git a/qa/suites/fs/full/overrides/.qa b/qa/suites/fs/full/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/full/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/full/overrides/ignorelist_health.yaml b/qa/suites/fs/full/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/full/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/full/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/full/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/full/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/full/overrides/no_client_pidfile.yaml b/qa/suites/fs/full/overrides/no_client_pidfile.yaml new file mode 120000 index 000000000..8888f3327 --- /dev/null +++ b/qa/suites/fs/full/overrides/no_client_pidfile.yaml @@ -0,0 +1 @@ +.qa/overrides/no_client_pidfile.yaml
\ No newline at end of file diff --git a/qa/suites/fs/full/tasks/.qa b/qa/suites/fs/full/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/full/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/full/tasks/mgr-osd-full.yaml b/qa/suites/fs/full/tasks/mgr-osd-full.yaml new file mode 100644 index 000000000..b4f673e39 --- /dev/null +++ b/qa/suites/fs/full/tasks/mgr-osd-full.yaml @@ -0,0 +1,31 @@ +overrides: + ceph: + conf: + global: + osd_pool_default_size: 1 + osd_pool_default_min_size: 1 + client: + debug ms: 1 + debug client: 20 + mds: + debug ms: 1 + debug mds: 20 + osd: # force bluestore since it's required for ec overwrites + osd objectstore: bluestore + bluestore block size: 1073741824 +tasks: +- workunit: + cleanup: true + clients: + client.0: + - fs/full/subvolume_rm.sh +- workunit: + cleanup: true + clients: + client.0: + - fs/full/subvolume_clone.sh +- workunit: + cleanup: true + clients: + client.0: + - fs/full/subvolume_snapshot_rm.sh diff --git a/qa/suites/fs/functional/% b/qa/suites/fs/functional/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/functional/% diff --git a/qa/suites/fs/functional/.qa b/qa/suites/fs/functional/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/functional/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/functional/begin b/qa/suites/fs/functional/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/functional/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/functional/clusters/.qa b/qa/suites/fs/functional/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/functional/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/functional/clusters/1a3s-mds-4c-client.yaml b/qa/suites/fs/functional/clusters/1a3s-mds-4c-client.yaml new file mode 120000 index 000000000..5c722a30b --- /dev/null +++ b/qa/suites/fs/functional/clusters/1a3s-mds-4c-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1a3s-mds-4c-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/functional/conf b/qa/suites/fs/functional/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/functional/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/functional/distro b/qa/suites/fs/functional/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/functional/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/functional/mount b/qa/suites/fs/functional/mount new file mode 120000 index 000000000..e3600f453 --- /dev/null +++ b/qa/suites/fs/functional/mount @@ -0,0 +1 @@ +.qa/cephfs/mount/
\ No newline at end of file diff --git a/qa/suites/fs/functional/objectstore/.qa b/qa/suites/fs/functional/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/functional/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/functional/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/functional/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/fs/functional/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/functional/objectstore/bluestore-ec-root.yaml b/qa/suites/fs/functional/objectstore/bluestore-ec-root.yaml new file mode 120000 index 000000000..4edebd682 --- /dev/null +++ b/qa/suites/fs/functional/objectstore/bluestore-ec-root.yaml @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec/bluestore-ec-root.yaml
\ No newline at end of file diff --git a/qa/suites/fs/functional/overrides/+ b/qa/suites/fs/functional/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/functional/overrides/+ diff --git a/qa/suites/fs/functional/overrides/.qa b/qa/suites/fs/functional/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/functional/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/functional/overrides/ignorelist_health.yaml b/qa/suites/fs/functional/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/functional/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/functional/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/functional/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/functional/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/functional/overrides/no_client_pidfile.yaml b/qa/suites/fs/functional/overrides/no_client_pidfile.yaml new file mode 120000 index 000000000..8888f3327 --- /dev/null +++ b/qa/suites/fs/functional/overrides/no_client_pidfile.yaml @@ -0,0 +1 @@ +.qa/overrides/no_client_pidfile.yaml
\ No newline at end of file diff --git a/qa/suites/fs/functional/tasks/.qa b/qa/suites/fs/functional/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/functional/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/functional/tasks/acls.yaml b/qa/suites/fs/functional/tasks/acls.yaml new file mode 100644 index 000000000..c44f6e00d --- /dev/null +++ b/qa/suites/fs/functional/tasks/acls.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-ignorelist: + - Reduced data availability + - Degraded data redundancy +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_acls diff --git a/qa/suites/fs/functional/tasks/admin.yaml b/qa/suites/fs/functional/tasks/admin.yaml new file mode 100644 index 000000000..97ecc4cf4 --- /dev/null +++ b/qa/suites/fs/functional/tasks/admin.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + conf: + global: + lockdep: true + log-ignorelist: + - missing required features +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_admin diff --git a/qa/suites/fs/functional/tasks/alternate-pool.yaml b/qa/suites/fs/functional/tasks/alternate-pool.yaml new file mode 100644 index 000000000..7a1caa473 --- /dev/null +++ b/qa/suites/fs/functional/tasks/alternate-pool.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + log-ignorelist: + - bad backtrace + - object missing on disk + - error reading table object + - error reading sessionmap + - unmatched fragstat + - unmatched rstat + - was unreadable, recreating it now + - Scrub error on inode + - Metadata damage detected + - MDS_FAILED + - MDS_DAMAGE + +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_recovery_pool diff --git a/qa/suites/fs/functional/tasks/asok_dump_tree.yaml b/qa/suites/fs/functional/tasks/asok_dump_tree.yaml new file mode 100644 index 000000000..7fa561470 --- /dev/null +++ b/qa/suites/fs/functional/tasks/asok_dump_tree.yaml @@ -0,0 +1,4 @@ +tasks: +- cephfs_test_runner: + modules: + - tasks.cephfs.test_dump_tree diff --git a/qa/suites/fs/functional/tasks/auto-repair.yaml b/qa/suites/fs/functional/tasks/auto-repair.yaml new file mode 100644 index 000000000..1a3ff3efb --- /dev/null +++ b/qa/suites/fs/functional/tasks/auto-repair.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + log-ignorelist: + - force file system read-only + - bad backtrace + - MDS in read-only mode + - \(MDS_READ_ONLY\) +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_auto_repair diff --git a/qa/suites/fs/functional/tasks/backtrace.yaml b/qa/suites/fs/functional/tasks/backtrace.yaml new file mode 100644 index 000000000..6d80c32cc --- /dev/null +++ b/qa/suites/fs/functional/tasks/backtrace.yaml @@ -0,0 +1,4 @@ +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_backtrace diff --git a/qa/suites/fs/functional/tasks/cap-flush.yaml b/qa/suites/fs/functional/tasks/cap-flush.yaml new file mode 100644 index 000000000..f063654a9 --- /dev/null +++ b/qa/suites/fs/functional/tasks/cap-flush.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + log-ignorelist: + - Replacing daemon mds.a +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_cap_flush diff --git a/qa/suites/fs/functional/tasks/client-limits.yaml b/qa/suites/fs/functional/tasks/client-limits.yaml new file mode 100644 index 000000000..b6b3c9858 --- /dev/null +++ b/qa/suites/fs/functional/tasks/client-limits.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + log-ignorelist: + - responding to mclientcaps\(revoke\) + - not advance its oldest_client_tid + - failing to advance its oldest client/flush tid + - Too many inodes in cache + - failing to respond to cache pressure + - slow requests are blocked + - failing to respond to capability release + - MDS cache is too large + - \(MDS_CLIENT_OLDEST_TID\) + - \(MDS_CACHE_OVERSIZED\) +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_client_limits diff --git a/qa/suites/fs/functional/tasks/client-readahead.yaml b/qa/suites/fs/functional/tasks/client-readahead.yaml new file mode 100644 index 000000000..7c5c850e6 --- /dev/null +++ b/qa/suites/fs/functional/tasks/client-readahead.yaml @@ -0,0 +1,5 @@ +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_readahead diff --git a/qa/suites/fs/functional/tasks/client-recovery.yaml b/qa/suites/fs/functional/tasks/client-recovery.yaml new file mode 100644 index 000000000..e67acc3ab --- /dev/null +++ b/qa/suites/fs/functional/tasks/client-recovery.yaml @@ -0,0 +1,16 @@ +# The task interferes with the network, so we need +# to permit OSDs to complain about that. +overrides: + ceph: + log-ignorelist: + - evicting unresponsive client + - but it is still running + - slow request + - MDS_CLIENT_LATE_RELEASE + - t responding to mclientcaps + - file system flag refuse_client_session is set +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_client_recovery diff --git a/qa/suites/fs/functional/tasks/damage.yaml b/qa/suites/fs/functional/tasks/damage.yaml new file mode 100644 index 000000000..7703aee93 --- /dev/null +++ b/qa/suites/fs/functional/tasks/damage.yaml @@ -0,0 +1,27 @@ +overrides: + ceph: + log-ignorelist: + - bad backtrace + - object missing on disk + - error reading table object + - error reading sessionmap + - Error loading MDS rank + - missing journal object + - Error recovering journal + - error decoding table object + - failed to read JournalPointer + - Corrupt directory entry + - Corrupt fnode header + - corrupt sessionmap header + - Corrupt dentry + - Scrub error on inode + - Metadata damage detected + - MDS_READ_ONLY + - force file system read-only + - with standby daemon mds + - MDS abort because newly corrupt dentry +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_damage + diff --git a/qa/suites/fs/functional/tasks/data-scan.yaml b/qa/suites/fs/functional/tasks/data-scan.yaml new file mode 100644 index 000000000..b704a0036 --- /dev/null +++ b/qa/suites/fs/functional/tasks/data-scan.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + log-ignorelist: + - bad backtrace + - object missing on disk + - error reading table object + - error reading sessionmap + - unmatched fragstat + - unmatched rstat + - was unreadable, recreating it now + - Scrub error on inode + - Metadata damage detected + - inconsistent rstat on inode + - Error recovering journal +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_data_scan diff --git a/qa/suites/fs/functional/tasks/exports.yaml b/qa/suites/fs/functional/tasks/exports.yaml new file mode 100644 index 000000000..76819fee9 --- /dev/null +++ b/qa/suites/fs/functional/tasks/exports.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + log-ignorelist: + - Replacing daemon mds +tasks: +- cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_exports diff --git a/qa/suites/fs/functional/tasks/forward-scrub.yaml b/qa/suites/fs/functional/tasks/forward-scrub.yaml new file mode 100644 index 000000000..961d25db0 --- /dev/null +++ b/qa/suites/fs/functional/tasks/forward-scrub.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-ignorelist: + - inode wrongly marked free + - bad backtrace on inode + - inode table repaired for inode + - Scrub error on inode + - Scrub error on dir + - Metadata damage detected +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_forward_scrub diff --git a/qa/suites/fs/functional/tasks/fragment.yaml b/qa/suites/fs/functional/tasks/fragment.yaml new file mode 100644 index 000000000..482caad85 --- /dev/null +++ b/qa/suites/fs/functional/tasks/fragment.yaml @@ -0,0 +1,5 @@ + +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_fragment diff --git a/qa/suites/fs/functional/tasks/journal-repair.yaml b/qa/suites/fs/functional/tasks/journal-repair.yaml new file mode 100644 index 000000000..0dae6143c --- /dev/null +++ b/qa/suites/fs/functional/tasks/journal-repair.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-ignorelist: + - bad backtrace on directory inode + - error reading table object + - Metadata damage detected + - slow requests are blocked + - Behind on trimming + - error reading sessionmap +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_journal_repair diff --git a/qa/suites/fs/functional/tasks/mds-flush.yaml b/qa/suites/fs/functional/tasks/mds-flush.yaml new file mode 100644 index 000000000..be8c4bee6 --- /dev/null +++ b/qa/suites/fs/functional/tasks/mds-flush.yaml @@ -0,0 +1,4 @@ +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_flush diff --git a/qa/suites/fs/functional/tasks/mds-full.yaml b/qa/suites/fs/functional/tasks/mds-full.yaml new file mode 100644 index 000000000..9399890c4 --- /dev/null +++ b/qa/suites/fs/functional/tasks/mds-full.yaml @@ -0,0 +1,38 @@ +overrides: + ceph: + cephfs: + ec_profile: + - disabled + log-ignorelist: + - OSD full dropping all updates + - OSD near full + - pausewr flag + - failsafe engaged, dropping updates + - failsafe disengaged, no longer dropping + - is full \(reached quota + - POOL_FULL + - POOL_BACKFILLFULL + - PG_RECOVERY_FULL + - PG_DEGRADED + conf: + mon: + mon osd nearfull ratio: 0.6 + mon osd backfillfull ratio: 0.6 + mon osd full ratio: 0.7 + osd: + osd mon report interval: 5 + osd objectstore: memstore + osd failsafe full ratio: 1.0 + memstore device bytes: 200000000 + client.0: + debug client: 20 + debug objecter: 20 + debug objectcacher: 20 + client.1: + debug client: 20 + debug objecter: 20 + debug objectcacher: 20 +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_full diff --git a/qa/suites/fs/functional/tasks/mds_creation_retry.yaml b/qa/suites/fs/functional/tasks/mds_creation_retry.yaml new file mode 100644 index 000000000..fd23aa8ba --- /dev/null +++ b/qa/suites/fs/functional/tasks/mds_creation_retry.yaml @@ -0,0 +1,6 @@ +tasks: +-mds_creation_failure: +- workunit: + clients: + all: [fs/misc/trivial_sync.sh] + diff --git a/qa/suites/fs/functional/tasks/metrics.yaml b/qa/suites/fs/functional/tasks/metrics.yaml new file mode 100644 index 000000000..7e5ac4150 --- /dev/null +++ b/qa/suites/fs/functional/tasks/metrics.yaml @@ -0,0 +1,5 @@ +tasks: +- cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_mds_metrics diff --git a/qa/suites/fs/functional/tasks/multimds_misc.yaml b/qa/suites/fs/functional/tasks/multimds_misc.yaml new file mode 100644 index 000000000..8cdf90310 --- /dev/null +++ b/qa/suites/fs/functional/tasks/multimds_misc.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + log-ignorelist: + - Scrub error on inode + +tasks: +- cephfs_test_runner: + modules: + - tasks.cephfs.test_multimds_misc diff --git a/qa/suites/fs/functional/tasks/openfiletable.yaml b/qa/suites/fs/functional/tasks/openfiletable.yaml new file mode 100644 index 000000000..20cfa4f37 --- /dev/null +++ b/qa/suites/fs/functional/tasks/openfiletable.yaml @@ -0,0 +1,4 @@ +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_openfiletable diff --git a/qa/suites/fs/functional/tasks/pool-perm.yaml b/qa/suites/fs/functional/tasks/pool-perm.yaml new file mode 100644 index 000000000..41392fe51 --- /dev/null +++ b/qa/suites/fs/functional/tasks/pool-perm.yaml @@ -0,0 +1,4 @@ +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_pool_perm diff --git a/qa/suites/fs/functional/tasks/quota.yaml b/qa/suites/fs/functional/tasks/quota.yaml new file mode 100644 index 000000000..d44021e48 --- /dev/null +++ b/qa/suites/fs/functional/tasks/quota.yaml @@ -0,0 +1,4 @@ +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_quota diff --git a/qa/suites/fs/functional/tasks/recovery-fs.yaml b/qa/suites/fs/functional/tasks/recovery-fs.yaml new file mode 100644 index 000000000..d354e9fbe --- /dev/null +++ b/qa/suites/fs/functional/tasks/recovery-fs.yaml @@ -0,0 +1,4 @@ +tasks: +- cephfs_test_runner: + modules: + - tasks.cephfs.test_recovery_fs diff --git a/qa/suites/fs/functional/tasks/scrub.yaml b/qa/suites/fs/functional/tasks/scrub.yaml new file mode 100644 index 000000000..09e666849 --- /dev/null +++ b/qa/suites/fs/functional/tasks/scrub.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + log-ignorelist: + - Replacing daemon mds + - Scrub error on inode + - Behind on trimming + - Metadata damage detected + - bad backtrace on inode + - overall HEALTH_ + - \(MDS_TRIM\) + conf: + mds: + mds log max segments: 1 + mds cache max size: 1000 +tasks: +- cephfs_test_runner: + modules: + - tasks.cephfs.test_scrub_checks + - tasks.cephfs.test_scrub diff --git a/qa/suites/fs/functional/tasks/sessionmap.yaml b/qa/suites/fs/functional/tasks/sessionmap.yaml new file mode 100644 index 000000000..c12632e77 --- /dev/null +++ b/qa/suites/fs/functional/tasks/sessionmap.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + log-ignorelist: + - client session with non-allowable root +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_sessionmap diff --git a/qa/suites/fs/functional/tasks/snap-schedule.yaml b/qa/suites/fs/functional/tasks/snap-schedule.yaml new file mode 100644 index 000000000..f2e62b050 --- /dev/null +++ b/qa/suites/fs/functional/tasks/snap-schedule.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + conf: + mgr: + debug mgr: 20 + debug ms: 1 + debug finisher: 20 + debug client: 20 + log-whitelist: + - OSD full dropping all updates + - OSD near full + - pausewr flag + - failsafe engaged, dropping updates + - failsafe disengaged, no longer dropping + - is full \(reached quota + - POOL_FULL + - POOL_BACKFILLFULL + +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_snap_schedules diff --git a/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml b/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml new file mode 100644 index 000000000..7bbcf000f --- /dev/null +++ b/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml @@ -0,0 +1,30 @@ +overrides: + ceph: + conf: + mgr: + debug mgr: 20 + debug ms: 1 + debug finisher: 20 + debug client: 20 + log-whitelist: + - OSD full dropping all updates + - OSD near full + - pausewr flag + - failsafe engaged, dropping updates + - failsafe disengaged, no longer dropping + - is full \(reached quota + - POOL_FULL + - POOL_BACKFILLFULL + +overrides: + kclient: + snapdirname: .customsnapkernel + ceph: + conf: + client: + client snapdir: .customsnapfuse + +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_snap_schedules.TestSnapSchedulesSnapdir diff --git a/qa/suites/fs/functional/tasks/snapshots.yaml b/qa/suites/fs/functional/tasks/snapshots.yaml new file mode 100644 index 000000000..d5951468b --- /dev/null +++ b/qa/suites/fs/functional/tasks/snapshots.yaml @@ -0,0 +1,16 @@ +overrides: + check-counter: + dry_run: true + ceph: + log-ignorelist: + - evicting unresponsive client + - RECENT_CRASH + +tasks: +- exec: + mon.a: + - ceph config set mgr mgr/crash/warn_recent_interval 0 +- cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_snapshots diff --git a/qa/suites/fs/functional/tasks/strays.yaml b/qa/suites/fs/functional/tasks/strays.yaml new file mode 100644 index 000000000..3866ce510 --- /dev/null +++ b/qa/suites/fs/functional/tasks/strays.yaml @@ -0,0 +1,4 @@ +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_strays diff --git a/qa/suites/fs/functional/tasks/test_journal_migration.yaml b/qa/suites/fs/functional/tasks/test_journal_migration.yaml new file mode 100644 index 000000000..f5c4dbc70 --- /dev/null +++ b/qa/suites/fs/functional/tasks/test_journal_migration.yaml @@ -0,0 +1,4 @@ +tasks: +- cephfs_test_runner: + modules: + - tasks.cephfs.test_journal_migration diff --git a/qa/suites/fs/functional/tasks/truncate_delay.yaml b/qa/suites/fs/functional/tasks/truncate_delay.yaml new file mode 100644 index 000000000..acd1a5a04 --- /dev/null +++ b/qa/suites/fs/functional/tasks/truncate_delay.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + client: + ms_inject_delay_probability: 1 + ms_inject_delay_type: osd + ms_inject_delay_max: 5 + client_oc_max_dirty_age: 1 +tasks: +- exec: + client.0: + - cd $TESTDIR/mnt.0 && dd if=/dev/zero of=./foo count=100 + - sleep 2 + - cd $TESTDIR/mnt.0 && truncate --size 0 ./foo diff --git a/qa/suites/fs/functional/tasks/workunit/.qa b/qa/suites/fs/functional/tasks/workunit/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/functional/tasks/workunit/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/functional/tasks/workunit/dir-max-entries.yaml b/qa/suites/fs/functional/tasks/workunit/dir-max-entries.yaml new file mode 100644 index 000000000..087dcc3d5 --- /dev/null +++ b/qa/suites/fs/functional/tasks/workunit/dir-max-entries.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - fs/maxentries diff --git a/qa/suites/fs/functional/tasks/workunit/quota.yaml b/qa/suites/fs/functional/tasks/workunit/quota.yaml new file mode 100644 index 000000000..7ac8714c5 --- /dev/null +++ b/qa/suites/fs/functional/tasks/workunit/quota.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - fs/quota diff --git a/qa/suites/fs/libcephfs/% b/qa/suites/fs/libcephfs/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/libcephfs/% diff --git a/qa/suites/fs/libcephfs/.qa b/qa/suites/fs/libcephfs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/libcephfs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/begin b/qa/suites/fs/libcephfs/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/libcephfs/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/clusters/.qa b/qa/suites/fs/libcephfs/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/libcephfs/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/clusters/1-mds-1-client-coloc.yaml b/qa/suites/fs/libcephfs/clusters/1-mds-1-client-coloc.yaml new file mode 120000 index 000000000..d15ecfda0 --- /dev/null +++ b/qa/suites/fs/libcephfs/clusters/1-mds-1-client-coloc.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-1-client-coloc.yaml
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/conf b/qa/suites/fs/libcephfs/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/libcephfs/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/distro b/qa/suites/fs/libcephfs/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/libcephfs/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/objectstore/.qa b/qa/suites/fs/libcephfs/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/libcephfs/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/libcephfs/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/fs/libcephfs/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/overrides/+ b/qa/suites/fs/libcephfs/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/libcephfs/overrides/+ diff --git a/qa/suites/fs/libcephfs/overrides/.qa b/qa/suites/fs/libcephfs/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/libcephfs/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/overrides/ignorelist_health.yaml b/qa/suites/fs/libcephfs/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/libcephfs/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/libcephfs/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/libcephfs/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/overrides/no_client_pidfile.yaml b/qa/suites/fs/libcephfs/overrides/no_client_pidfile.yaml new file mode 120000 index 000000000..8888f3327 --- /dev/null +++ b/qa/suites/fs/libcephfs/overrides/no_client_pidfile.yaml @@ -0,0 +1 @@ +.qa/overrides/no_client_pidfile.yaml
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/tasks/.qa b/qa/suites/fs/libcephfs/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/libcephfs/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/tasks/client.yaml b/qa/suites/fs/libcephfs/tasks/client.yaml new file mode 100644 index 000000000..bfdfee4a8 --- /dev/null +++ b/qa/suites/fs/libcephfs/tasks/client.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + client: + debug ms: 1 + debug client: 20 + mds: + debug ms: 1 + debug mds: 20 +tasks: +- workunit: + clients: + client.0: + - client/test.sh diff --git a/qa/suites/fs/libcephfs/tasks/ino_release_cb.yaml b/qa/suites/fs/libcephfs/tasks/ino_release_cb.yaml new file mode 100644 index 000000000..5b5247489 --- /dev/null +++ b/qa/suites/fs/libcephfs/tasks/ino_release_cb.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + conf: + client: + debug ms: 1 + debug client: 20 + mds: + debug ms: 1 + debug mds: 20 +tasks: +- exec: + mon.a: + - ceph config set mds mds_min_caps_per_client 1 +- background_exec: + mon.a: + - "sleep 30 && ceph config set mds mds_max_caps_per_client 1" +- exec: + client.0: + - ceph_test_ino_release_cb diff --git a/qa/suites/fs/libcephfs/tasks/libcephfs/+ b/qa/suites/fs/libcephfs/tasks/libcephfs/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/libcephfs/tasks/libcephfs/+ diff --git a/qa/suites/fs/libcephfs/tasks/libcephfs/.qa b/qa/suites/fs/libcephfs/tasks/libcephfs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/libcephfs/tasks/libcephfs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/tasks/libcephfs/frag.yaml b/qa/suites/fs/libcephfs/tasks/libcephfs/frag.yaml new file mode 120000 index 000000000..5e5cdaed8 --- /dev/null +++ b/qa/suites/fs/libcephfs/tasks/libcephfs/frag.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/frag.yaml
\ No newline at end of file diff --git a/qa/suites/fs/libcephfs/tasks/libcephfs/test.yaml b/qa/suites/fs/libcephfs/tasks/libcephfs/test.yaml new file mode 100644 index 000000000..70afa2da3 --- /dev/null +++ b/qa/suites/fs/libcephfs/tasks/libcephfs/test.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + conf: + client: + debug ms: 1 + debug client: 20 + mds: + debug ms: 1 + debug mds: 20 +tasks: +- check-counter: + counters: + mds: + - "mds.dir_split" +- workunit: + clients: + client.0: + - libcephfs/test.sh diff --git a/qa/suites/fs/libcephfs/tasks/libcephfs_python.yaml b/qa/suites/fs/libcephfs/tasks/libcephfs_python.yaml new file mode 100644 index 000000000..68ccd579c --- /dev/null +++ b/qa/suites/fs/libcephfs/tasks/libcephfs_python.yaml @@ -0,0 +1,17 @@ +overrides: + ceph-fuse: + disabled: true + kclient: + disabled: true + install: + ceph: + extra_system_packages: + deb: + - python3-pytest + rpm: + - python3-pytest +tasks: +- workunit: + clients: + client.0: + - fs/test_python.sh diff --git a/qa/suites/fs/mirror-ha/% b/qa/suites/fs/mirror-ha/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mirror-ha/% diff --git a/qa/suites/fs/mirror-ha/.qa b/qa/suites/fs/mirror-ha/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror-ha/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/begin b/qa/suites/fs/mirror-ha/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/mirror-ha/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/cephfs-mirror/+ b/qa/suites/fs/mirror-ha/cephfs-mirror/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mirror-ha/cephfs-mirror/+ diff --git a/qa/suites/fs/mirror-ha/cephfs-mirror/.qa b/qa/suites/fs/mirror-ha/cephfs-mirror/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror-ha/cephfs-mirror/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/cephfs-mirror/1-volume-create-rm.yaml b/qa/suites/fs/mirror-ha/cephfs-mirror/1-volume-create-rm.yaml new file mode 100644 index 000000000..4ee16e1c9 --- /dev/null +++ b/qa/suites/fs/mirror-ha/cephfs-mirror/1-volume-create-rm.yaml @@ -0,0 +1,14 @@ +meta: +- desc: create/rm volumes and set configs + +tasks: + - exec: + mon.a: + - "ceph fs volume create dc" + - "ceph fs volume create dc-backup" + - full_sequential_finally: + - exec: + mon.a: + - ceph config set mon mon_allow_pool_delete true + - ceph fs volume rm dc --yes-i-really-mean-it + - ceph fs volume rm dc-backup --yes-i-really-mean-it diff --git a/qa/suites/fs/mirror-ha/cephfs-mirror/2-three-per-cluster.yaml b/qa/suites/fs/mirror-ha/cephfs-mirror/2-three-per-cluster.yaml new file mode 100644 index 000000000..095f0893a --- /dev/null +++ b/qa/suites/fs/mirror-ha/cephfs-mirror/2-three-per-cluster.yaml @@ -0,0 +1,12 @@ +meta: +- desc: run one cephfs-mirror daemon on primary cluster +tasks: +- cephfs-mirror: + client: client.mirror1 + run_in_foreground: True +- cephfs-mirror: + client: client.mirror2 + run_in_foreground: True +- cephfs-mirror: + client: client.mirror3 + run_in_foreground: True diff --git a/qa/suites/fs/mirror-ha/clients/+ b/qa/suites/fs/mirror-ha/clients/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mirror-ha/clients/+ diff --git a/qa/suites/fs/mirror-ha/clients/.qa b/qa/suites/fs/mirror-ha/clients/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror-ha/clients/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/clients/mirror.yaml b/qa/suites/fs/mirror-ha/clients/mirror.yaml new file mode 100644 index 000000000..620c821e1 --- /dev/null +++ b/qa/suites/fs/mirror-ha/clients/mirror.yaml @@ -0,0 +1,32 @@ +meta: +- desc: configure the permissions for client.mirror +overrides: + ceph: + conf: + client: + debug cephfs_mirror: 20 + log to stderr: false + # make these predictable + client.mirror1: + admin socket: /var/run/ceph/cephfs-mirror1.asok + pid file: /var/run/ceph/cephfs-mirror1.pid + client.mirror2: + admin socket: /var/run/ceph/cephfs-mirror2.asok + pid file: /var/run/ceph/cephfs-mirror2.pid + client.mirror3: + admin socket: /var/run/ceph/cephfs-mirror3.asok + pid file: /var/run/ceph/cephfs-mirror3.pid +tasks: +- exec: + client.mirror1: + - "sudo ceph auth caps client.mirror1 mon 'profile cephfs-mirror' mds 'allow r' osd 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*' mgr 'allow r'" + client.mirror2: + - "sudo ceph auth caps client.mirror2 mon 'profile cephfs-mirror' mds 'allow r' osd 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*' mgr 'allow r'" + client.mirror3: + - "sudo ceph auth caps client.mirror3 mon 'profile cephfs-mirror' mds 'allow r' osd 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*' mgr 'allow r'" + client.mirror_remote: + - "sudo ceph auth caps client.mirror_remote mon 'allow r' mds 'allow rwps' osd 'allow rw tag cephfs *=*' mgr 'allow r'" + client.1: + - "sudo ceph auth caps client.0 mon 'allow r' mds 'allow rwps' osd 'allow rw tag cephfs *=*' mgr 'allow r'" + client.2: + - "sudo ceph auth caps client.1 mon 'allow r' mds 'allow rwps' osd 'allow rw tag cephfs *=*' mgr 'allow r'" diff --git a/qa/suites/fs/mirror-ha/cluster/+ b/qa/suites/fs/mirror-ha/cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mirror-ha/cluster/+ diff --git a/qa/suites/fs/mirror-ha/cluster/.qa b/qa/suites/fs/mirror-ha/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror-ha/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/cluster/1-node.yaml b/qa/suites/fs/mirror-ha/cluster/1-node.yaml new file mode 100644 index 000000000..cc70c106d --- /dev/null +++ b/qa/suites/fs/mirror-ha/cluster/1-node.yaml @@ -0,0 +1,20 @@ +meta: +- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 5 mdss +roles: +- - mon.a + - mgr.x + - mds.a + - mds.b + - mds.c + - mds.d + - mds.e + - osd.0 + - osd.1 + - osd.2 + - client.0 + - client.1 + - client.2 + - client.mirror1 + - client.mirror2 + - client.mirror3 + - client.mirror_remote
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/objectstore/.qa b/qa/suites/fs/mirror-ha/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror-ha/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/mirror-ha/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/fs/mirror-ha/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/overrides/+ b/qa/suites/fs/mirror-ha/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mirror-ha/overrides/+ diff --git a/qa/suites/fs/mirror-ha/overrides/.qa b/qa/suites/fs/mirror-ha/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror-ha/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml b/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml new file mode 100644 index 000000000..d40fa4cb8 --- /dev/null +++ b/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(FS_DEGRADED\) + - \(MDS_FAILED\) + - \(MDS_DEGRADED\) + - \(FS_WITH_FAILED_MDS\) + - \(MDS_DAMAGE\) + - \(MDS_ALL_DOWN\) + - \(MDS_UP_LESS_THAN_MAX\) + - \(FS_INLINE_DATA_DEPRECATED\) + - Reduced data availability + - Degraded data redundancy diff --git a/qa/suites/fs/mirror-ha/supported-random-distro$ b/qa/suites/fs/mirror-ha/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/mirror-ha/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/workloads/.qa b/qa/suites/fs/mirror-ha/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror-ha/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/workloads/cephfs-mirror-ha-workunit.yaml b/qa/suites/fs/mirror-ha/workloads/cephfs-mirror-ha-workunit.yaml new file mode 100644 index 000000000..ce4dddf78 --- /dev/null +++ b/qa/suites/fs/mirror-ha/workloads/cephfs-mirror-ha-workunit.yaml @@ -0,0 +1,33 @@ +meta: +- desc: run the cephfs_mirror_ha.sh workunit to test cephfs-mirror daemon in HA active/active mode + +overrides: + ceph: + conf: + mgr: + debug client: 10 + +tasks: + - ceph-fuse: + client.1: + cephfs_name: dc + client.2: + cephfs_name: dc-backup + - cephfs_mirror_thrash: + randomize: False + max_thrash_delay: 10 + - workunit: + subdir: mirror + cleanup: False + clients: + client.1: [fs/cephfs_mirror_ha_gen.sh] + timeout: 1h + - exec: + client.2: + - "echo verifying synchronized snapshots..." + - workunit: + subdir: mirror + cleanup: False + clients: + client.2: [fs/cephfs_mirror_ha_verify.sh] + timeout: 3h
\ No newline at end of file diff --git a/qa/suites/fs/mirror/% b/qa/suites/fs/mirror/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mirror/% diff --git a/qa/suites/fs/mirror/.qa b/qa/suites/fs/mirror/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror/begin b/qa/suites/fs/mirror/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/mirror/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/mirror/cephfs-mirror/.qa b/qa/suites/fs/mirror/cephfs-mirror/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror/cephfs-mirror/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror/cephfs-mirror/one-per-cluster.yaml b/qa/suites/fs/mirror/cephfs-mirror/one-per-cluster.yaml new file mode 100644 index 000000000..c355a9db5 --- /dev/null +++ b/qa/suites/fs/mirror/cephfs-mirror/one-per-cluster.yaml @@ -0,0 +1,5 @@ +meta: +- desc: run one cephfs-mirror daemon on primary cluster +tasks: +- cephfs-mirror: + client: client.mirror diff --git a/qa/suites/fs/mirror/clients/+ b/qa/suites/fs/mirror/clients/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mirror/clients/+ diff --git a/qa/suites/fs/mirror/clients/.qa b/qa/suites/fs/mirror/clients/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror/clients/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror/clients/mirror.yaml b/qa/suites/fs/mirror/clients/mirror.yaml new file mode 100644 index 000000000..1a68fea8b --- /dev/null +++ b/qa/suites/fs/mirror/clients/mirror.yaml @@ -0,0 +1,18 @@ +meta: +- desc: configure the permissions for client.mirror +overrides: + ceph: + conf: + client: + debug cephfs_mirror: 20 + log to stderr: false + # make these predictable + client.mirror: + admin socket: /var/run/ceph/cephfs-mirror.asok + pid file: /var/run/ceph/cephfs-mirror.pid +tasks: +- exec: + client.mirror: + - "sudo ceph auth caps client.mirror mon 'profile cephfs-mirror' mds 'allow r' osd 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*' mgr 'allow r'" + client.mirror_remote: + - "sudo ceph auth caps client.mirror_remote mon 'allow r' mds 'allow rwps' osd 'allow rw tag cephfs *=*' mgr 'allow r'" diff --git a/qa/suites/fs/mirror/cluster/+ b/qa/suites/fs/mirror/cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mirror/cluster/+ diff --git a/qa/suites/fs/mirror/cluster/.qa b/qa/suites/fs/mirror/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror/cluster/1-node.yaml b/qa/suites/fs/mirror/cluster/1-node.yaml new file mode 100644 index 000000000..a9748e071 --- /dev/null +++ b/qa/suites/fs/mirror/cluster/1-node.yaml @@ -0,0 +1,17 @@ +meta: +- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 5 mdss +roles: +- - mon.a + - mgr.x + - mds.a + - mds.b + - mds.c + - mds.d + - mds.e + - osd.0 + - osd.1 + - osd.2 + - client.0 + - client.1 + - client.mirror + - client.mirror_remote
\ No newline at end of file diff --git a/qa/suites/fs/mirror/mount/.qa b/qa/suites/fs/mirror/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror/mount/fuse.yaml b/qa/suites/fs/mirror/mount/fuse.yaml new file mode 100644 index 000000000..1fdf55ab4 --- /dev/null +++ b/qa/suites/fs/mirror/mount/fuse.yaml @@ -0,0 +1,2 @@ +tasks: + - ceph-fuse: [client.0, client.1] diff --git a/qa/suites/fs/mirror/objectstore/.qa b/qa/suites/fs/mirror/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/mirror/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/fs/mirror/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/mirror/overrides/+ b/qa/suites/fs/mirror/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mirror/overrides/+ diff --git a/qa/suites/fs/mirror/overrides/.qa b/qa/suites/fs/mirror/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror/overrides/whitelist_health.yaml b/qa/suites/fs/mirror/overrides/whitelist_health.yaml new file mode 100644 index 000000000..d40fa4cb8 --- /dev/null +++ b/qa/suites/fs/mirror/overrides/whitelist_health.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(FS_DEGRADED\) + - \(MDS_FAILED\) + - \(MDS_DEGRADED\) + - \(FS_WITH_FAILED_MDS\) + - \(MDS_DAMAGE\) + - \(MDS_ALL_DOWN\) + - \(MDS_UP_LESS_THAN_MAX\) + - \(FS_INLINE_DATA_DEPRECATED\) + - Reduced data availability + - Degraded data redundancy diff --git a/qa/suites/fs/mirror/supported-random-distros$ b/qa/suites/fs/mirror/supported-random-distros$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/mirror/supported-random-distros$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/mirror/tasks/.qa b/qa/suites/fs/mirror/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mirror/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mirror/tasks/mirror.yaml b/qa/suites/fs/mirror/tasks/mirror.yaml new file mode 100644 index 000000000..07c1e24ef --- /dev/null +++ b/qa/suites/fs/mirror/tasks/mirror.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + mgr: + debug client: 10 + +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_mirroring.TestMirroring diff --git a/qa/suites/fs/mixed-clients/% b/qa/suites/fs/mixed-clients/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mixed-clients/% diff --git a/qa/suites/fs/mixed-clients/.qa b/qa/suites/fs/mixed-clients/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mixed-clients/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/begin b/qa/suites/fs/mixed-clients/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/mixed-clients/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/clusters/.qa b/qa/suites/fs/mixed-clients/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mixed-clients/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/clusters/1a3s-mds-2c-client.yaml b/qa/suites/fs/mixed-clients/clusters/1a3s-mds-2c-client.yaml new file mode 120000 index 000000000..c190ea92f --- /dev/null +++ b/qa/suites/fs/mixed-clients/clusters/1a3s-mds-2c-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1a3s-mds-2c-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/conf b/qa/suites/fs/mixed-clients/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/mixed-clients/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/distro/$ b/qa/suites/fs/mixed-clients/distro/$ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mixed-clients/distro/$ diff --git a/qa/suites/fs/mixed-clients/distro/.qa b/qa/suites/fs/mixed-clients/distro/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mixed-clients/distro/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/distro/centos_8.yaml b/qa/suites/fs/mixed-clients/distro/centos_8.yaml new file mode 120000 index 000000000..380a1443b --- /dev/null +++ b/qa/suites/fs/mixed-clients/distro/centos_8.yaml @@ -0,0 +1 @@ +.qa/distros/all/centos_8.yaml
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/distro/rhel_8.yaml b/qa/suites/fs/mixed-clients/distro/rhel_8.yaml new file mode 120000 index 000000000..133acf27b --- /dev/null +++ b/qa/suites/fs/mixed-clients/distro/rhel_8.yaml @@ -0,0 +1 @@ +.qa/distros/all/rhel_8.yaml
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/distro/ubuntu/+ b/qa/suites/fs/mixed-clients/distro/ubuntu/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mixed-clients/distro/ubuntu/+ diff --git a/qa/suites/fs/mixed-clients/distro/ubuntu/.qa b/qa/suites/fs/mixed-clients/distro/ubuntu/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mixed-clients/distro/ubuntu/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/distro/ubuntu/latest.yaml b/qa/suites/fs/mixed-clients/distro/ubuntu/latest.yaml new file mode 120000 index 000000000..0a708b4db --- /dev/null +++ b/qa/suites/fs/mixed-clients/distro/ubuntu/latest.yaml @@ -0,0 +1 @@ +.qa/distros/all/ubuntu_22.04.yaml
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/distro/ubuntu/overrides.yaml b/qa/suites/fs/mixed-clients/distro/ubuntu/overrides.yaml new file mode 100644 index 000000000..fdd7f5e5a --- /dev/null +++ b/qa/suites/fs/mixed-clients/distro/ubuntu/overrides.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + valgrind: + exit_on_first_error: false diff --git a/qa/suites/fs/mixed-clients/kclient-overrides b/qa/suites/fs/mixed-clients/kclient-overrides new file mode 120000 index 000000000..58b04fb24 --- /dev/null +++ b/qa/suites/fs/mixed-clients/kclient-overrides @@ -0,0 +1 @@ +.qa/cephfs/mount/kclient/overrides/
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/objectstore-ec b/qa/suites/fs/mixed-clients/objectstore-ec new file mode 120000 index 000000000..affe29493 --- /dev/null +++ b/qa/suites/fs/mixed-clients/objectstore-ec @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/overrides/+ b/qa/suites/fs/mixed-clients/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/mixed-clients/overrides/+ diff --git a/qa/suites/fs/mixed-clients/overrides/.qa b/qa/suites/fs/mixed-clients/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mixed-clients/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/overrides/ignorelist_health.yaml b/qa/suites/fs/mixed-clients/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/mixed-clients/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/mixed-clients/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/mixed-clients/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/overrides/osd-asserts.yaml b/qa/suites/fs/mixed-clients/overrides/osd-asserts.yaml new file mode 120000 index 000000000..f290c749b --- /dev/null +++ b/qa/suites/fs/mixed-clients/overrides/osd-asserts.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/osd-asserts.yaml
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/tasks/.qa b/qa/suites/fs/mixed-clients/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/mixed-clients/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml b/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml new file mode 100644 index 000000000..78b2d7611 --- /dev/null +++ b/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_dbench_iozone.yaml @@ -0,0 +1,18 @@ +tasks: +- parallel: + - user-workload + - kclient-workload +user-workload: + sequential: + - ceph-fuse: [client.0] + - workunit: + clients: + client.0: + - suites/iozone.sh +kclient-workload: + sequential: + - kclient: [client.1] + - workunit: + clients: + client.1: + - suites/dbench.sh diff --git a/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml b/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml new file mode 100644 index 000000000..d637ff989 --- /dev/null +++ b/qa/suites/fs/mixed-clients/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml @@ -0,0 +1,18 @@ +tasks: +- parallel: + - user-workload + - kclient-workload +user-workload: + sequential: + - ceph-fuse: [client.0] + - workunit: + clients: + client.0: + - suites/blogbench.sh +kclient-workload: + sequential: + - kclient: [client.1] + - workunit: + clients: + client.1: + - kernel_untar_build.sh diff --git a/qa/suites/fs/multiclient/% b/qa/suites/fs/multiclient/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/multiclient/% diff --git a/qa/suites/fs/multiclient/.qa b/qa/suites/fs/multiclient/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/multiclient/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/begin b/qa/suites/fs/multiclient/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/multiclient/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/clusters/.qa b/qa/suites/fs/multiclient/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/multiclient/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/clusters/1-mds-2-client.yaml b/qa/suites/fs/multiclient/clusters/1-mds-2-client.yaml new file mode 120000 index 000000000..9f4f161a3 --- /dev/null +++ b/qa/suites/fs/multiclient/clusters/1-mds-2-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-2-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/clusters/1-mds-3-client.yaml b/qa/suites/fs/multiclient/clusters/1-mds-3-client.yaml new file mode 120000 index 000000000..6b25e07c4 --- /dev/null +++ b/qa/suites/fs/multiclient/clusters/1-mds-3-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-3-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/conf b/qa/suites/fs/multiclient/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/multiclient/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/distros/.qa b/qa/suites/fs/multiclient/distros/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/multiclient/distros/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/distros/ubuntu_latest.yaml b/qa/suites/fs/multiclient/distros/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/fs/multiclient/distros/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/mount/.qa b/qa/suites/fs/multiclient/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/multiclient/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/mount/fuse.yaml b/qa/suites/fs/multiclient/mount/fuse.yaml new file mode 120000 index 000000000..0e55da9fb --- /dev/null +++ b/qa/suites/fs/multiclient/mount/fuse.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/fuse.yaml
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/mount/kclient.yaml.disabled b/qa/suites/fs/multiclient/mount/kclient.yaml.disabled new file mode 100644 index 000000000..f00f16aea --- /dev/null +++ b/qa/suites/fs/multiclient/mount/kclient.yaml.disabled @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- kclient: diff --git a/qa/suites/fs/multiclient/objectstore-ec b/qa/suites/fs/multiclient/objectstore-ec new file mode 120000 index 000000000..affe29493 --- /dev/null +++ b/qa/suites/fs/multiclient/objectstore-ec @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/overrides/+ b/qa/suites/fs/multiclient/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/multiclient/overrides/+ diff --git a/qa/suites/fs/multiclient/overrides/.qa b/qa/suites/fs/multiclient/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/multiclient/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/overrides/ignorelist_health.yaml b/qa/suites/fs/multiclient/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/multiclient/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/multiclient/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/multiclient/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/tasks/.qa b/qa/suites/fs/multiclient/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/multiclient/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml new file mode 100644 index 000000000..e6d6ef99b --- /dev/null +++ b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml @@ -0,0 +1,14 @@ +tasks: +- cephfs_test_runner: + modules: + - tasks.cephfs.test_misc + +overrides: + ceph: + log-ignorelist: + - evicting unresponsive client + - POOL_APP_NOT_ENABLED + - has not responded to cap revoke by MDS for over + - MDS_CLIENT_LATE_RELEASE + - responding to mclientcaps + - RECENT_CRASH diff --git a/qa/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled b/qa/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled new file mode 100644 index 000000000..888de867f --- /dev/null +++ b/qa/suites/fs/multiclient/tasks/fsx-mpi.yaml.disabled @@ -0,0 +1,17 @@ +# make sure we get the same MPI version on all hosts +tasks: +- pexec: + clients: + - cd $TESTDIR + - wget http://download.ceph.com/qa/fsx-mpi.c + - mpicc fsx-mpi.c -o fsx-mpi + - rm fsx-mpi.c + - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt +- ssh_keys: +- mpi: + exec: sudo $TESTDIR/fsx-mpi -o 1MB -N 50000 -p 10000 -l 1048576 $TESTDIR/gmnt/test + workdir: $TESTDIR/gmnt +- pexec: + clients: + - rm $TESTDIR/gmnt + - rm $TESTDIR/fsx-mpi diff --git a/qa/suites/fs/multiclient/tasks/ior-shared-file.yaml b/qa/suites/fs/multiclient/tasks/ior-shared-file.yaml new file mode 100644 index 000000000..8293595e2 --- /dev/null +++ b/qa/suites/fs/multiclient/tasks/ior-shared-file.yaml @@ -0,0 +1,31 @@ +# make sure we get the same MPI version on all hosts +tasks: +- pexec: + clients: + - set -x + - cd $TESTDIR + # partially or incorrectly installed mpich will create a mess and the + # configure script or the build process (which is initiated using "make" + # command) for the ior project will fail + - sudo apt purge -y mpich + - sudo apt install -y mpich + - wget http://download.ceph.com/qa/ior-3.3.0.tar.bz2 + - tar xvfj ior-3.3.0.tar.bz2 + - cd ior-3.3.0 + - ./configure + - make + - make install DESTDIR=$TESTDIR/binary/ + - cd $TESTDIR/ + - sudo apt install -y tree + - tree binary/ + - rm ior-3.3.0.tar.bz2 + - rm -r ior-3.3.0 + - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt +- ssh_keys: +- mpi: + exec: $TESTDIR/binary/usr/local/bin/ior -e -w -r -W -b 10m -a POSIX -o $TESTDIR/gmnt/ior.testfile +- pexec: + clients: + - rm -f $TESTDIR/gmnt/ior.testfile + - rm -f $TESTDIR/gmnt + - rm -rf $TESTDIR/binary diff --git a/qa/suites/fs/multiclient/tasks/mdtest.yaml b/qa/suites/fs/multiclient/tasks/mdtest.yaml new file mode 100644 index 000000000..32720e488 --- /dev/null +++ b/qa/suites/fs/multiclient/tasks/mdtest.yaml @@ -0,0 +1,34 @@ +# make sure we get the same MPI version on all hosts +tasks: +- pexec: + clients: + - set -x + - cd $TESTDIR + - sudo apt purge -y mpich + - sudo apt install -y mpich + # use ior project instead of mdtest project because latter has been + # merged into former. See: + # https://github.com/MDTEST-LANL/mdtest/blob/master/README.md + - wget http://download.ceph.com/qa/ior-3.3.0.tar.bz2 + - tar xvfj ior-3.3.0.tar.bz2 + - cd ior-3.3.0 + # this option was set originall when mdtest binary was built using + # mdtest PR and not through ior project. + #- MPI_CC=mpicc make + - ./configure + - make + - make install DESTDIR=$TESTDIR/binary/ + - cd $TESTDIR/ + - sudo apt install -y tree + - tree binary/ + - rm ior-3.3.0.tar.bz2 + - rm -r ior-3.3.0 + - ln -s $TESTDIR/mnt.* $TESTDIR/gmnt +- ssh_keys: +- mpi: + exec: $TESTDIR/binary/usr/local/bin/mdtest -d $TESTDIR/gmnt -I 20 -z 5 -b 2 -R +- pexec: + clients: + - rm -f $TESTDIR/gmnt/ior.testfile + - rm -f $TESTDIR/gmnt + - rm -rf $TESTDIR/binary diff --git a/qa/suites/fs/multifs/% b/qa/suites/fs/multifs/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/multifs/% diff --git a/qa/suites/fs/multifs/.qa b/qa/suites/fs/multifs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/multifs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/multifs/begin b/qa/suites/fs/multifs/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/multifs/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/multifs/clusters/.qa b/qa/suites/fs/multifs/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/multifs/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/multifs/clusters/1a3s-mds-2c-client.yaml b/qa/suites/fs/multifs/clusters/1a3s-mds-2c-client.yaml new file mode 120000 index 000000000..c190ea92f --- /dev/null +++ b/qa/suites/fs/multifs/clusters/1a3s-mds-2c-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1a3s-mds-2c-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/multifs/conf b/qa/suites/fs/multifs/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/multifs/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/multifs/distro b/qa/suites/fs/multifs/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/multifs/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/multifs/mount b/qa/suites/fs/multifs/mount new file mode 120000 index 000000000..e3600f453 --- /dev/null +++ b/qa/suites/fs/multifs/mount @@ -0,0 +1 @@ +.qa/cephfs/mount/
\ No newline at end of file diff --git a/qa/suites/fs/multifs/objectstore-ec b/qa/suites/fs/multifs/objectstore-ec new file mode 120000 index 000000000..affe29493 --- /dev/null +++ b/qa/suites/fs/multifs/objectstore-ec @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec
\ No newline at end of file diff --git a/qa/suites/fs/multifs/overrides/+ b/qa/suites/fs/multifs/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/multifs/overrides/+ diff --git a/qa/suites/fs/multifs/overrides/.qa b/qa/suites/fs/multifs/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/multifs/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/multifs/overrides/ignorelist_health.yaml b/qa/suites/fs/multifs/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/multifs/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/multifs/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/multifs/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/multifs/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/multifs/overrides/mon-debug.yaml b/qa/suites/fs/multifs/overrides/mon-debug.yaml new file mode 100644 index 000000000..24b454c00 --- /dev/null +++ b/qa/suites/fs/multifs/overrides/mon-debug.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mon: + debug mon: 20 diff --git a/qa/suites/fs/multifs/tasks/.qa b/qa/suites/fs/multifs/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/multifs/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/multifs/tasks/failover.yaml b/qa/suites/fs/multifs/tasks/failover.yaml new file mode 100644 index 000000000..9c403c76d --- /dev/null +++ b/qa/suites/fs/multifs/tasks/failover.yaml @@ -0,0 +1,20 @@ +overrides: + ceph: + log-ignorelist: + - Replacing daemon mds + - \(MDS_INSUFFICIENT_STANDBY\) + - \(MDS_ALL_DOWN\) + - \(MDS_UP_LESS_THAN_MAX\) + - \(MDS_DAMAGE\) + - \(FS_DEGRADED\) + ceph-fuse: + disabled: true +tasks: + - exec: + mon.a: + - ceph config set mgr mgr/crash/warn_recent_interval 0 + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_failover + diff --git a/qa/suites/fs/multifs/tasks/multifs-auth.yaml b/qa/suites/fs/multifs/tasks/multifs-auth.yaml new file mode 100644 index 000000000..ed1bdb475 --- /dev/null +++ b/qa/suites/fs/multifs/tasks/multifs-auth.yaml @@ -0,0 +1,5 @@ +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_multifs_auth diff --git a/qa/suites/fs/nfs/% b/qa/suites/fs/nfs/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/nfs/% diff --git a/qa/suites/fs/nfs/.qa b/qa/suites/fs/nfs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/nfs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/nfs/cluster/+ b/qa/suites/fs/nfs/cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/nfs/cluster/+ diff --git a/qa/suites/fs/nfs/cluster/.qa b/qa/suites/fs/nfs/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/nfs/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/nfs/cluster/1-node.yaml b/qa/suites/fs/nfs/cluster/1-node.yaml new file mode 100644 index 000000000..8eeec7d2d --- /dev/null +++ b/qa/suites/fs/nfs/cluster/1-node.yaml @@ -0,0 +1,16 @@ +meta: +- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 2 mds, 1 client +roles: +- - host.a + - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +tasks: +- install: +- cephadm: +- cephadm.shell: + host.a: + - ceph orch apply mds a diff --git a/qa/suites/fs/nfs/overrides/.qa b/qa/suites/fs/nfs/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/nfs/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/nfs/overrides/ignorelist_health.yaml b/qa/suites/fs/nfs/overrides/ignorelist_health.yaml new file mode 100644 index 000000000..8bfe4dc6f --- /dev/null +++ b/qa/suites/fs/nfs/overrides/ignorelist_health.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(FS_DEGRADED\) + - \(MDS_FAILED\) + - \(MDS_DEGRADED\) + - \(FS_WITH_FAILED_MDS\) + - \(MDS_DAMAGE\) + - \(MDS_ALL_DOWN\) + - \(MDS_UP_LESS_THAN_MAX\) + - \(FS_INLINE_DATA_DEPRECATED\) + - \(OSD_DOWN\) diff --git a/qa/suites/fs/nfs/supported-random-distros$ b/qa/suites/fs/nfs/supported-random-distros$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/nfs/supported-random-distros$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/nfs/tasks/.qa b/qa/suites/fs/nfs/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/nfs/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/nfs/tasks/nfs.yaml b/qa/suites/fs/nfs/tasks/nfs.yaml new file mode 100644 index 000000000..aa966bff2 --- /dev/null +++ b/qa/suites/fs/nfs/tasks/nfs.yaml @@ -0,0 +1,4 @@ +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_nfs diff --git a/qa/suites/fs/permission/% b/qa/suites/fs/permission/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/permission/% diff --git a/qa/suites/fs/permission/.qa b/qa/suites/fs/permission/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/permission/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/permission/begin b/qa/suites/fs/permission/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/permission/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/permission/clusters/.qa b/qa/suites/fs/permission/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/permission/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/permission/clusters/fixed-2-ucephfs.yaml b/qa/suites/fs/permission/clusters/fixed-2-ucephfs.yaml new file mode 120000 index 000000000..b0c41a89a --- /dev/null +++ b/qa/suites/fs/permission/clusters/fixed-2-ucephfs.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/fixed-2-ucephfs.yaml
\ No newline at end of file diff --git a/qa/suites/fs/permission/conf b/qa/suites/fs/permission/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/permission/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/permission/distro b/qa/suites/fs/permission/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/permission/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/permission/mount/.qa b/qa/suites/fs/permission/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/permission/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/permission/mount/fuse.yaml b/qa/suites/fs/permission/mount/fuse.yaml new file mode 120000 index 000000000..0e55da9fb --- /dev/null +++ b/qa/suites/fs/permission/mount/fuse.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/fuse.yaml
\ No newline at end of file diff --git a/qa/suites/fs/permission/objectstore-ec b/qa/suites/fs/permission/objectstore-ec new file mode 120000 index 000000000..affe29493 --- /dev/null +++ b/qa/suites/fs/permission/objectstore-ec @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec
\ No newline at end of file diff --git a/qa/suites/fs/permission/overrides/+ b/qa/suites/fs/permission/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/permission/overrides/+ diff --git a/qa/suites/fs/permission/overrides/.qa b/qa/suites/fs/permission/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/permission/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/permission/overrides/ignorelist_health.yaml b/qa/suites/fs/permission/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/permission/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/permission/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/permission/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/permission/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/permission/tasks/.qa b/qa/suites/fs/permission/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/permission/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/permission/tasks/cfuse_workunit_misc.yaml b/qa/suites/fs/permission/tasks/cfuse_workunit_misc.yaml new file mode 100644 index 000000000..ca026c45f --- /dev/null +++ b/qa/suites/fs/permission/tasks/cfuse_workunit_misc.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + conf: + client: + client acl type: posix_acl +tasks: +- workunit: + clients: + all: + - fs/misc/acl.sh + - fs/misc/chmod.sh + - fs/misc/dac_override.sh diff --git a/qa/suites/fs/permission/tasks/cfuse_workunit_suites_pjd.yaml b/qa/suites/fs/permission/tasks/cfuse_workunit_suites_pjd.yaml new file mode 100644 index 000000000..a81a3b46e --- /dev/null +++ b/qa/suites/fs/permission/tasks/cfuse_workunit_suites_pjd.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + conf: + client: + fuse set user groups: true + client acl type: posix_acl +tasks: +- workunit: + timeout: 6h + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/fs/shell/% b/qa/suites/fs/shell/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/shell/% diff --git a/qa/suites/fs/shell/.qa b/qa/suites/fs/shell/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/shell/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/shell/begin b/qa/suites/fs/shell/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/shell/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/shell/clusters/.qa b/qa/suites/fs/shell/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/shell/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/shell/clusters/1-mds-1-client-coloc.yaml b/qa/suites/fs/shell/clusters/1-mds-1-client-coloc.yaml new file mode 120000 index 000000000..d15ecfda0 --- /dev/null +++ b/qa/suites/fs/shell/clusters/1-mds-1-client-coloc.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-1-client-coloc.yaml
\ No newline at end of file diff --git a/qa/suites/fs/shell/conf b/qa/suites/fs/shell/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/shell/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/shell/distro b/qa/suites/fs/shell/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/shell/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/shell/mount/.qa b/qa/suites/fs/shell/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/shell/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/shell/mount/fuse.yaml b/qa/suites/fs/shell/mount/fuse.yaml new file mode 120000 index 000000000..0e55da9fb --- /dev/null +++ b/qa/suites/fs/shell/mount/fuse.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/fuse.yaml
\ No newline at end of file diff --git a/qa/suites/fs/shell/objectstore/.qa b/qa/suites/fs/shell/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/shell/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/shell/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/shell/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/fs/shell/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/shell/overrides/+ b/qa/suites/fs/shell/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/shell/overrides/+ diff --git a/qa/suites/fs/shell/overrides/.qa b/qa/suites/fs/shell/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/shell/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/shell/overrides/ignorelist_health.yaml b/qa/suites/fs/shell/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/shell/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/shell/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/shell/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/shell/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/shell/overrides/no_client_pidfile.yaml b/qa/suites/fs/shell/overrides/no_client_pidfile.yaml new file mode 120000 index 000000000..8888f3327 --- /dev/null +++ b/qa/suites/fs/shell/overrides/no_client_pidfile.yaml @@ -0,0 +1 @@ +.qa/overrides/no_client_pidfile.yaml
\ No newline at end of file diff --git a/qa/suites/fs/shell/tasks/.qa b/qa/suites/fs/shell/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/shell/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/shell/tasks/cephfs-shell.yaml b/qa/suites/fs/shell/tasks/cephfs-shell.yaml new file mode 100644 index 000000000..9708252e9 --- /dev/null +++ b/qa/suites/fs/shell/tasks/cephfs-shell.yaml @@ -0,0 +1,8 @@ +# Right now, cephfs-shell is only available as a package on Ubuntu +# This overrides the random distribution that's chosen in the other yaml fragments. +os_type: ubuntu +os_version: "20.04" +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_cephfs_shell diff --git a/qa/suites/fs/snaps/% b/qa/suites/fs/snaps/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/snaps/% diff --git a/qa/suites/fs/snaps/.qa b/qa/suites/fs/snaps/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/snaps/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/snaps/begin b/qa/suites/fs/snaps/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/snaps/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/snaps/clusters/.qa b/qa/suites/fs/snaps/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/snaps/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/snaps/clusters/1a3s-mds-1c-client.yaml b/qa/suites/fs/snaps/clusters/1a3s-mds-1c-client.yaml new file mode 120000 index 000000000..4ab7357dc --- /dev/null +++ b/qa/suites/fs/snaps/clusters/1a3s-mds-1c-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1a3s-mds-1c-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/snaps/conf b/qa/suites/fs/snaps/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/snaps/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/snaps/distro b/qa/suites/fs/snaps/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/snaps/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/snaps/mount b/qa/suites/fs/snaps/mount new file mode 120000 index 000000000..e3600f453 --- /dev/null +++ b/qa/suites/fs/snaps/mount @@ -0,0 +1 @@ +.qa/cephfs/mount/
\ No newline at end of file diff --git a/qa/suites/fs/snaps/objectstore-ec b/qa/suites/fs/snaps/objectstore-ec new file mode 120000 index 000000000..affe29493 --- /dev/null +++ b/qa/suites/fs/snaps/objectstore-ec @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec
\ No newline at end of file diff --git a/qa/suites/fs/snaps/overrides/+ b/qa/suites/fs/snaps/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/snaps/overrides/+ diff --git a/qa/suites/fs/snaps/overrides/.qa b/qa/suites/fs/snaps/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/snaps/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/snaps/overrides/ignorelist_health.yaml b/qa/suites/fs/snaps/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/snaps/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/snaps/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/snaps/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/snaps/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/snaps/tasks/.qa b/qa/suites/fs/snaps/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/snaps/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/snaps/tasks/workunit/.qa b/qa/suites/fs/snaps/tasks/workunit/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/snaps/tasks/workunit/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/snaps/tasks/workunit/snaps.yaml b/qa/suites/fs/snaps/tasks/workunit/snaps.yaml new file mode 100644 index 000000000..dd5a0abd4 --- /dev/null +++ b/qa/suites/fs/snaps/tasks/workunit/snaps.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + timeout: 6h + clients: + all: + - fs/snaps diff --git a/qa/suites/fs/thrash/.qa b/qa/suites/fs/thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/% b/qa/suites/fs/thrash/multifs/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/thrash/multifs/% diff --git a/qa/suites/fs/thrash/multifs/.qa b/qa/suites/fs/thrash/multifs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/begin b/qa/suites/fs/thrash/multifs/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/thrash/multifs/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/clusters/.qa b/qa/suites/fs/thrash/multifs/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/clusters/1a3s-mds-2c-client.yaml b/qa/suites/fs/thrash/multifs/clusters/1a3s-mds-2c-client.yaml new file mode 120000 index 000000000..c190ea92f --- /dev/null +++ b/qa/suites/fs/thrash/multifs/clusters/1a3s-mds-2c-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1a3s-mds-2c-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/conf b/qa/suites/fs/thrash/multifs/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/thrash/multifs/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/distro b/qa/suites/fs/thrash/multifs/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/mount b/qa/suites/fs/thrash/multifs/mount new file mode 120000 index 000000000..e3600f453 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/mount @@ -0,0 +1 @@ +.qa/cephfs/mount/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/msgr-failures/.qa b/qa/suites/fs/thrash/multifs/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/msgr-failures/none.yaml b/qa/suites/fs/thrash/multifs/msgr-failures/none.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/thrash/multifs/msgr-failures/none.yaml diff --git a/qa/suites/fs/thrash/multifs/msgr-failures/osd-mds-delay.yaml b/qa/suites/fs/thrash/multifs/msgr-failures/osd-mds-delay.yaml new file mode 100644 index 000000000..17cbc5b90 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/msgr-failures/osd-mds-delay.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + conf: + osd: + ms inject socket failures: 2500 + ms inject delay type: client mds + ms inject delay probability: .005 + ms inject delay max: 1 + mon client directed command retry: 5 + mds: + ms inject socket failures: 2500 + ms inject delay type: client mds osd + ms inject delay probability: .005 + ms inject delay max: 1 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/fs/thrash/multifs/objectstore/.qa b/qa/suites/fs/thrash/multifs/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/thrash/multifs/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/overrides/+ b/qa/suites/fs/thrash/multifs/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/thrash/multifs/overrides/+ diff --git a/qa/suites/fs/thrash/multifs/overrides/.qa b/qa/suites/fs/thrash/multifs/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/overrides/client-shutdown.yaml b/qa/suites/fs/thrash/multifs/overrides/client-shutdown.yaml new file mode 100644 index 000000000..30b2ea981 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/overrides/client-shutdown.yaml @@ -0,0 +1,6 @@ +# Lengthen the timeout for thrashed MDS +overrides: + ceph: + conf: + client: + client_shutdown_timeout: 120 diff --git a/qa/suites/fs/thrash/multifs/overrides/frag.yaml b/qa/suites/fs/thrash/multifs/overrides/frag.yaml new file mode 120000 index 000000000..5e5cdaed8 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/overrides/frag.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/frag.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/overrides/ignorelist_health.yaml b/qa/suites/fs/thrash/multifs/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/thrash/multifs/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/overrides/multifs.yaml b/qa/suites/fs/thrash/multifs/overrides/multifs.yaml new file mode 100644 index 000000000..faf7838c2 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/overrides/multifs.yaml @@ -0,0 +1,16 @@ +overrides: + ceph: + cephfs: + fs: + - name: a + - name: b + ceph-fuse: + client.0: + cephfs_name: a + client.1: + cephfs_name: b + kclient: + client.0: + cephfs_name: a + client.1: + cephfs_name: b diff --git a/qa/suites/fs/thrash/multifs/overrides/session_timeout.yaml b/qa/suites/fs/thrash/multifs/overrides/session_timeout.yaml new file mode 120000 index 000000000..fce0318c5 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/overrides/session_timeout.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/session_timeout.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/overrides/thrashosds-health.yaml b/qa/suites/fs/thrash/multifs/overrides/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/fs/thrash/multifs/overrides/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/tasks/% b/qa/suites/fs/thrash/multifs/tasks/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/% diff --git a/qa/suites/fs/thrash/multifs/tasks/.qa b/qa/suites/fs/thrash/multifs/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/tasks/1-thrash/.qa b/qa/suites/fs/thrash/multifs/tasks/1-thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml new file mode 100644 index 000000000..33748cea5 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml @@ -0,0 +1,7 @@ +tasks: +- mds_thrash: + +overrides: + ceph: + log-ignorelist: + - Replacing daemon mds diff --git a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml new file mode 100644 index 000000000..fbbe16151 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) +tasks: +- mon_thrash: + check_mds_failover: True + revive_delay: 20 + thrash_delay: 10 diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/.qa b/qa/suites/fs/thrash/multifs/tasks/2-workunit/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_snaptests.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_snaptests.yaml new file mode 100644 index 000000000..dd5a0abd4 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_snaptests.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + timeout: 6h + clients: + all: + - fs/snaps diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml new file mode 120000 index 000000000..c2e859fff --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml new file mode 100644 index 000000000..f7784383b --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_suites_pjd.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + client: + fuse set user groups: true +tasks: +- workunit: + timeout: 6h + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml new file mode 120000 index 000000000..a1df03277 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/cfuse_workunit_trivial_sync.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_trivial_sync.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/ffsb.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/ffsb.yaml new file mode 100644 index 000000000..7e4f711a2 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/ffsb.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-ignorelist: + - SLOW_OPS + - slow request + conf: + osd: + filestore flush min: 0 +tasks: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/fs/thrash/multifs/tasks/2-workunit/iozone.yaml b/qa/suites/fs/thrash/multifs/tasks/2-workunit/iozone.yaml new file mode 100644 index 000000000..9270f3c51 --- /dev/null +++ b/qa/suites/fs/thrash/multifs/tasks/2-workunit/iozone.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/fs/thrash/workloads/% b/qa/suites/fs/thrash/workloads/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/thrash/workloads/% diff --git a/qa/suites/fs/thrash/workloads/.qa b/qa/suites/fs/thrash/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/begin b/qa/suites/fs/thrash/workloads/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/thrash/workloads/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/clusters/.qa b/qa/suites/fs/thrash/workloads/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/clusters/1a5s-mds-1c-client.yaml b/qa/suites/fs/thrash/workloads/clusters/1a5s-mds-1c-client.yaml new file mode 120000 index 000000000..2ab33af1c --- /dev/null +++ b/qa/suites/fs/thrash/workloads/clusters/1a5s-mds-1c-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1a5s-mds-1c-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/conf b/qa/suites/fs/thrash/workloads/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/thrash/workloads/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/distro b/qa/suites/fs/thrash/workloads/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/mount b/qa/suites/fs/thrash/workloads/mount new file mode 120000 index 000000000..e3600f453 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/mount @@ -0,0 +1 @@ +.qa/cephfs/mount/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/msgr-failures/.qa b/qa/suites/fs/thrash/workloads/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/msgr-failures/none.yaml b/qa/suites/fs/thrash/workloads/msgr-failures/none.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/thrash/workloads/msgr-failures/none.yaml diff --git a/qa/suites/fs/thrash/workloads/msgr-failures/osd-mds-delay.yaml b/qa/suites/fs/thrash/workloads/msgr-failures/osd-mds-delay.yaml new file mode 100644 index 000000000..17cbc5b90 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/msgr-failures/osd-mds-delay.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + conf: + osd: + ms inject socket failures: 2500 + ms inject delay type: client mds + ms inject delay probability: .005 + ms inject delay max: 1 + mon client directed command retry: 5 + mds: + ms inject socket failures: 2500 + ms inject delay type: client mds osd + ms inject delay probability: .005 + ms inject delay max: 1 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/fs/thrash/workloads/objectstore-ec b/qa/suites/fs/thrash/workloads/objectstore-ec new file mode 120000 index 000000000..affe29493 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/objectstore-ec @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/% b/qa/suites/fs/thrash/workloads/overrides/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/% diff --git a/qa/suites/fs/thrash/workloads/overrides/.qa b/qa/suites/fs/thrash/workloads/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/client-shutdown.yaml b/qa/suites/fs/thrash/workloads/overrides/client-shutdown.yaml new file mode 100644 index 000000000..30b2ea981 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/client-shutdown.yaml @@ -0,0 +1,6 @@ +# Lengthen the timeout for thrashed MDS +overrides: + ceph: + conf: + client: + client_shutdown_timeout: 120 diff --git a/qa/suites/fs/thrash/workloads/overrides/frag.yaml b/qa/suites/fs/thrash/workloads/overrides/frag.yaml new file mode 120000 index 000000000..5e5cdaed8 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/frag.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/frag.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/ignorelist_health.yaml b/qa/suites/fs/thrash/workloads/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/thrash/workloads/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/.qa b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/no.yaml b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/no.yaml new file mode 100644 index 000000000..91b453679 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/no.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mds: + mds oft prefetch dirfrags: false diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/yes.yaml b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/yes.yaml new file mode 100644 index 000000000..bd202f988 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_dirfrags/yes.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mds: + mds oft prefetch dirfrags: true diff --git a/qa/suites/fs/thrash/workloads/overrides/prefetch_entire_dirfrags b/qa/suites/fs/thrash/workloads/overrides/prefetch_entire_dirfrags new file mode 120000 index 000000000..9b8024fba --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/prefetch_entire_dirfrags @@ -0,0 +1 @@ +.qa/cephfs/overrides/prefetch_entire_dirfrags
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/races.yaml b/qa/suites/fs/thrash/workloads/overrides/races.yaml new file mode 100644 index 000000000..e7d753896 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/races.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mds: + mds_sleep_rank_change: 5000000.0 diff --git a/qa/suites/fs/thrash/workloads/overrides/session_timeout.yaml b/qa/suites/fs/thrash/workloads/overrides/session_timeout.yaml new file mode 120000 index 000000000..fce0318c5 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/session_timeout.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/session_timeout.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/overrides/thrashosds-health.yaml b/qa/suites/fs/thrash/workloads/overrides/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/fs/thrash/workloads/overrides/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/ranks/.qa b/qa/suites/fs/thrash/workloads/ranks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/ranks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/ranks/1.yaml b/qa/suites/fs/thrash/workloads/ranks/1.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/thrash/workloads/ranks/1.yaml diff --git a/qa/suites/fs/thrash/workloads/ranks/3.yaml b/qa/suites/fs/thrash/workloads/ranks/3.yaml new file mode 100644 index 000000000..9ed043c14 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/ranks/3.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + cephfs: + max_mds: 3 + check-counter: + counters: + mds: + - mds.exported + - mds.imported diff --git a/qa/suites/fs/thrash/workloads/ranks/5.yaml b/qa/suites/fs/thrash/workloads/ranks/5.yaml new file mode 100644 index 000000000..ed89cef3a --- /dev/null +++ b/qa/suites/fs/thrash/workloads/ranks/5.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + cephfs: + max_mds: 5 + check-counter: + counters: + mds: + - mds.exported + - mds.imported diff --git a/qa/suites/fs/thrash/workloads/tasks/% b/qa/suites/fs/thrash/workloads/tasks/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/% diff --git a/qa/suites/fs/thrash/workloads/tasks/.qa b/qa/suites/fs/thrash/workloads/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/.qa b/qa/suites/fs/thrash/workloads/tasks/1-thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml new file mode 100644 index 000000000..33748cea5 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml @@ -0,0 +1,7 @@ +tasks: +- mds_thrash: + +overrides: + ceph: + log-ignorelist: + - Replacing daemon mds diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml new file mode 100644 index 000000000..fbbe16151 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) +tasks: +- mon_thrash: + check_mds_failover: True + revive_delay: 20 + thrash_delay: 10 diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml new file mode 100644 index 000000000..037d399a7 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - MDS_SLOW_METADATA_IO + - MDS_TRIM +tasks: +- thrashosds: diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/.qa b/qa/suites/fs/thrash/workloads/tasks/2-workunit/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/.qa b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/snaps.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/snaps.yaml new file mode 100644 index 000000000..dd5a0abd4 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/snaps.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + timeout: 6h + clients: + all: + - fs/snaps diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/trivial_sync.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/trivial_sync.yaml new file mode 120000 index 000000000..a1df03277 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/fs/trivial_sync.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_trivial_sync.yaml
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/.qa b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/ffsb.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/ffsb.yaml new file mode 100644 index 000000000..7e4f711a2 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/ffsb.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-ignorelist: + - SLOW_OPS + - slow request + conf: + osd: + filestore flush min: 0 +tasks: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/fsstress.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/fsstress.yaml new file mode 100644 index 000000000..bae220292 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/fsstress.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + timeout: 6h + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/iozone.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/iozone.yaml new file mode 100644 index 000000000..9270f3c51 --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/iozone.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/pjd.yaml b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/pjd.yaml new file mode 100644 index 000000000..f7784383b --- /dev/null +++ b/qa/suites/fs/thrash/workloads/tasks/2-workunit/suites/pjd.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + client: + fuse set user groups: true +tasks: +- workunit: + timeout: 6h + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/fs/top/% b/qa/suites/fs/top/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/top/% diff --git a/qa/suites/fs/top/.qa b/qa/suites/fs/top/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/top/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/top/begin b/qa/suites/fs/top/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/top/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/top/cluster/+ b/qa/suites/fs/top/cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/top/cluster/+ diff --git a/qa/suites/fs/top/cluster/.qa b/qa/suites/fs/top/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/top/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/top/cluster/1-node.yaml b/qa/suites/fs/top/cluster/1-node.yaml new file mode 100644 index 000000000..48c4996e7 --- /dev/null +++ b/qa/suites/fs/top/cluster/1-node.yaml @@ -0,0 +1,12 @@ +meta: +- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 2 mds, 2 clients +roles: +- - mon.a + - mgr.x + - mds.a + - mds.b + - osd.0 + - osd.1 + - osd.2 + - client.0 + - client.1 diff --git a/qa/suites/fs/top/mount/.qa b/qa/suites/fs/top/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/top/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/top/mount/fuse.yaml b/qa/suites/fs/top/mount/fuse.yaml new file mode 120000 index 000000000..0e55da9fb --- /dev/null +++ b/qa/suites/fs/top/mount/fuse.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/fuse.yaml
\ No newline at end of file diff --git a/qa/suites/fs/top/objectstore/.qa b/qa/suites/fs/top/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/top/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/top/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/top/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/fs/top/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/top/overrides/.qa b/qa/suites/fs/top/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/top/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/top/overrides/ignorelist_health.yaml b/qa/suites/fs/top/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/top/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/top/supported-random-distros$ b/qa/suites/fs/top/supported-random-distros$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/top/supported-random-distros$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/top/tasks/.qa b/qa/suites/fs/top/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/top/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/top/tasks/fstop.yaml b/qa/suites/fs/top/tasks/fstop.yaml new file mode 100644 index 000000000..406f6804f --- /dev/null +++ b/qa/suites/fs/top/tasks/fstop.yaml @@ -0,0 +1,4 @@ +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_fstop diff --git a/qa/suites/fs/traceless/% b/qa/suites/fs/traceless/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/traceless/% diff --git a/qa/suites/fs/traceless/.qa b/qa/suites/fs/traceless/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/traceless/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/traceless/begin b/qa/suites/fs/traceless/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/traceless/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/traceless/clusters/.qa b/qa/suites/fs/traceless/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/traceless/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/traceless/clusters/fixed-2-ucephfs.yaml b/qa/suites/fs/traceless/clusters/fixed-2-ucephfs.yaml new file mode 120000 index 000000000..b0c41a89a --- /dev/null +++ b/qa/suites/fs/traceless/clusters/fixed-2-ucephfs.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/fixed-2-ucephfs.yaml
\ No newline at end of file diff --git a/qa/suites/fs/traceless/conf b/qa/suites/fs/traceless/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/traceless/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/traceless/distro b/qa/suites/fs/traceless/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/traceless/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/traceless/mount/.qa b/qa/suites/fs/traceless/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/traceless/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/traceless/mount/fuse.yaml b/qa/suites/fs/traceless/mount/fuse.yaml new file mode 120000 index 000000000..0e55da9fb --- /dev/null +++ b/qa/suites/fs/traceless/mount/fuse.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/fuse.yaml
\ No newline at end of file diff --git a/qa/suites/fs/traceless/objectstore-ec b/qa/suites/fs/traceless/objectstore-ec new file mode 120000 index 000000000..affe29493 --- /dev/null +++ b/qa/suites/fs/traceless/objectstore-ec @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec
\ No newline at end of file diff --git a/qa/suites/fs/traceless/overrides/+ b/qa/suites/fs/traceless/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/traceless/overrides/+ diff --git a/qa/suites/fs/traceless/overrides/.qa b/qa/suites/fs/traceless/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/traceless/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/traceless/overrides/frag.yaml b/qa/suites/fs/traceless/overrides/frag.yaml new file mode 120000 index 000000000..5e5cdaed8 --- /dev/null +++ b/qa/suites/fs/traceless/overrides/frag.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/frag.yaml
\ No newline at end of file diff --git a/qa/suites/fs/traceless/overrides/ignorelist_health.yaml b/qa/suites/fs/traceless/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/traceless/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/traceless/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/traceless/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/traceless/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/traceless/tasks/.qa b/qa/suites/fs/traceless/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/traceless/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml new file mode 120000 index 000000000..8702f4f3d --- /dev/null +++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_blogbench.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_suites_blogbench.yaml
\ No newline at end of file diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml new file mode 120000 index 000000000..b0f876c3c --- /dev/null +++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_dbench.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_suites_dbench.yaml
\ No newline at end of file diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml new file mode 120000 index 000000000..01e889b23 --- /dev/null +++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_ffsb.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_suites_ffsb.yaml
\ No newline at end of file diff --git a/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml new file mode 120000 index 000000000..c2e859fff --- /dev/null +++ b/qa/suites/fs/traceless/tasks/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml
\ No newline at end of file diff --git a/qa/suites/fs/traceless/traceless/.qa b/qa/suites/fs/traceless/traceless/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/traceless/traceless/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/traceless/traceless/50pc.yaml b/qa/suites/fs/traceless/traceless/50pc.yaml new file mode 100644 index 000000000..e0418bcb2 --- /dev/null +++ b/qa/suites/fs/traceless/traceless/50pc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mds: + mds inject traceless reply probability: .5 diff --git a/qa/suites/fs/upgrade/.qa b/qa/suites/fs/upgrade/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/.qa b/qa/suites/fs/upgrade/featureful_client/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/% b/qa/suites/fs/upgrade/featureful_client/old_client/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/% diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/bluestore-bitmap.yaml new file mode 120000 index 000000000..17ad98e79 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/bluestore-bitmap.yaml @@ -0,0 +1 @@ +../../../../../cephfs/objectstore-ec/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/centos_8.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/centos_8.yaml new file mode 120000 index 000000000..5dceec7e2 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/centos_8.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_8.stream.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/clusters/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/clusters/1-mds-2-client-micro.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/clusters/1-mds-2-client-micro.yaml new file mode 120000 index 000000000..feb68f343 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/clusters/1-mds-2-client-micro.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-2-client-micro.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/conf b/qa/suites/fs/upgrade/featureful_client/old_client/conf new file mode 120000 index 000000000..6d4712984 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/conf @@ -0,0 +1 @@ +.qa/cephfs/conf/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/% b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/% diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/no.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/no.yaml new file mode 100644 index 000000000..f9e95daa9 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/no.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + cephfs: + max_mds: 1 diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/yes.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/yes.yaml new file mode 100644 index 000000000..b3a9b5d67 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/multimds/yes.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + cephfs: + max_mds: 2 diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/pg-warn.yaml new file mode 100644 index 000000000..4ae54a40d --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/overrides/pg-warn.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + mon pg warn min per osd: 0 diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/% b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/% diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/0-octopus.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/0-octopus.yaml new file mode 100644 index 000000000..e7774423f --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/0-octopus.yaml @@ -0,0 +1,39 @@ +meta: +- desc: | + install ceph/octopus latest +tasks: +- install: + branch: octopus + exclude_packages: + - librados3 + - ceph-mgr-dashboard + - ceph-mgr-diskprediction-local + - ceph-mgr-rook + - ceph-mgr-cephadm + - cephadm + - ceph-volume + extra_packages: ['librados2'] +- print: "**** done installing octopus" +- ceph: + log-ignorelist: + - overall HEALTH_ + - \(FS_ + - \(MDS_ + - \(OSD_ + - \(MON_DOWN\) + - \(CACHE_POOL_ + - \(POOL_ + - \(MGR_DOWN\) + - \(PG_ + - \(SMALLER_PGP_NUM\) + - Monitor daemon marked osd + - Behind on trimming + - Manager daemon + conf: + global: + mon warn on pool no app: false + ms bind msgr2: false +- exec: + osd.0: + - ceph osd set-require-min-compat-client octopus +- print: "**** done ceph" diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/1-client.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/1-client.yaml new file mode 100644 index 000000000..976d6e265 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/1-client.yaml @@ -0,0 +1,8 @@ +tasks: +- ceph-fuse: +- print: "**** done octopus client" +- workunit: + clients: + all: + - suites/fsstress.sh +- print: "**** done fsstress" diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/2-upgrade.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/2-upgrade.yaml new file mode 100644 index 000000000..26c185946 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/2-upgrade.yaml @@ -0,0 +1,48 @@ +overrides: + ceph: + log-ignorelist: + - scrub mismatch + - ScrubResult + - wrongly marked + - \(POOL_APP_NOT_ENABLED\) + - \(SLOW_OPS\) + - overall HEALTH_ + - \(MON_MSGR2_NOT_ENABLED\) + - slow request + conf: + global: + bluestore warn on legacy statfs: false + bluestore warn on no per pool omap: false + mon: + mon warn on osd down out interval zero: false + +tasks: +- mds_pre_upgrade: +- print: "**** done mds pre-upgrade sequence" +- install.upgrade: + # upgrade the single cluster node, which is running all the mon/mds/osd/mgr daemons + mon.a: + branch: quincy +- print: "**** done install.upgrade the host" +- ceph.restart: + daemons: [mon.*, mgr.*] + mon-health-to-clog: false + wait-for-healthy: false +- ceph.healthy: +- ceph.restart: + daemons: [osd.*] + wait-for-healthy: false + wait-for-osds-up: true +- ceph.stop: [mds.*] +- ceph.restart: + daemons: [mds.*] + wait-for-healthy: false + wait-for-osds-up: true +- exec: + mon.a: + - ceph osd dump -f json-pretty + - ceph versions + - ceph osd require-osd-release quincy + - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done +- ceph.healthy: +- print: "**** done ceph.restart" diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/.qa b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/no.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/no.yaml new file mode 100644 index 000000000..b495eb41b --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/no.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + clients: + all: + - suites/fsstress.sh +- print: "**** done fsstress" diff --git a/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/quincy.yaml b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/quincy.yaml new file mode 100644 index 000000000..138d8f4e2 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/old_client/tasks/3-compat_client/quincy.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + log-ignorelist: + - missing required features +tasks: +- exec: + mon.a: + - ceph fs dump --format=json-pretty + - ceph fs required_client_features cephfs add metric_collect +- sleep: + duration: 5 +- fs.clients_evicted: diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/% b/qa/suites/fs/upgrade/featureful_client/upgraded_client/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/% diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/.qa b/qa/suites/fs/upgrade/featureful_client/upgraded_client/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/bluestore-bitmap.yaml new file mode 120000 index 000000000..17ad98e79 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/bluestore-bitmap.yaml @@ -0,0 +1 @@ +../../../../../cephfs/objectstore-ec/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/centos_8.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/centos_8.yaml new file mode 120000 index 000000000..5dceec7e2 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/centos_8.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_8.stream.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/.qa b/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/1-mds-2-client-micro.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/1-mds-2-client-micro.yaml new file mode 120000 index 000000000..feb68f343 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/clusters/1-mds-2-client-micro.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-2-client-micro.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/conf b/qa/suites/fs/upgrade/featureful_client/upgraded_client/conf new file mode 120000 index 000000000..6d4712984 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/conf @@ -0,0 +1 @@ +.qa/cephfs/conf/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/% b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/% diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/.qa b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/.qa b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/no.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/no.yaml new file mode 100644 index 000000000..f9e95daa9 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/no.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + cephfs: + max_mds: 1 diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/yes.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/yes.yaml new file mode 100644 index 000000000..b3a9b5d67 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/multimds/yes.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + cephfs: + max_mds: 2 diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/pg-warn.yaml new file mode 100644 index 000000000..4ae54a40d --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/overrides/pg-warn.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + mon pg warn min per osd: 0 diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/% b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/% diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/.qa b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/0-octopus.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/0-octopus.yaml new file mode 100644 index 000000000..e7774423f --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/0-octopus.yaml @@ -0,0 +1,39 @@ +meta: +- desc: | + install ceph/octopus latest +tasks: +- install: + branch: octopus + exclude_packages: + - librados3 + - ceph-mgr-dashboard + - ceph-mgr-diskprediction-local + - ceph-mgr-rook + - ceph-mgr-cephadm + - cephadm + - ceph-volume + extra_packages: ['librados2'] +- print: "**** done installing octopus" +- ceph: + log-ignorelist: + - overall HEALTH_ + - \(FS_ + - \(MDS_ + - \(OSD_ + - \(MON_DOWN\) + - \(CACHE_POOL_ + - \(POOL_ + - \(MGR_DOWN\) + - \(PG_ + - \(SMALLER_PGP_NUM\) + - Monitor daemon marked osd + - Behind on trimming + - Manager daemon + conf: + global: + mon warn on pool no app: false + ms bind msgr2: false +- exec: + osd.0: + - ceph osd set-require-min-compat-client octopus +- print: "**** done ceph" diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/1-client.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/1-client.yaml new file mode 100644 index 000000000..c9b4c046f --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/1-client.yaml @@ -0,0 +1,11 @@ +nuke-on-error: false +overrides: + nuke-on-error: false +tasks: +- ceph-fuse: +- print: "**** done octopus client" +#- workunit: +# clients: +# all: +# - suites/fsstress.sh +- print: "**** done fsstress" diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/2-upgrade.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/2-upgrade.yaml new file mode 100644 index 000000000..e5ea8b19c --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/2-upgrade.yaml @@ -0,0 +1,48 @@ +overrides: + ceph: + log-ignorelist: + - scrub mismatch + - ScrubResult + - wrongly marked + - \(POOL_APP_NOT_ENABLED\) + - \(SLOW_OPS\) + - overall HEALTH_ + - \(MON_MSGR2_NOT_ENABLED\) + - slow request + conf: + global: + bluestore warn on legacy statfs: false + bluestore warn on no per pool omap: false + mon: + mon warn on osd down out interval zero: false + +tasks: +- mds_pre_upgrade: +- print: "**** done mds pre-upgrade sequence" +- install.upgrade: + # upgrade the single cluster node, which is running all the mon/mds/osd/mgr daemons + mon.a: + branch: quincy +- print: "**** done install.upgrade the host" +- ceph.restart: + daemons: [mon.*, mgr.*] + mon-health-to-clog: false + wait-for-healthy: false +- ceph.healthy: +- ceph.restart: + daemons: [osd.*] + wait-for-healthy: false + wait-for-osds-up: true +- ceph.stop: [mds.*] +- ceph.restart: + daemons: [mds.*] + wait-for-healthy: false + wait-for-osds-up: true +- exec: + mon.a: + - ceph versions + - ceph osd dump -f json-pretty + - ceph osd require-osd-release quincy + - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done +- ceph.healthy: +- print: "**** done ceph.restart" diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/3-client-upgrade.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/3-client-upgrade.yaml new file mode 100644 index 000000000..251c349ac --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/3-client-upgrade.yaml @@ -0,0 +1,15 @@ +tasks: +- install.upgrade: + client.0: + branch: quincy +- print: "**** done install.upgrade on client.0" +- ceph-fuse: + client.0: + mounted: false + client.1: + skip: true +- ceph-fuse: + client.0: + client.1: + skip: true +- print: "**** done remount client" diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/4-compat_client.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/4-compat_client.yaml new file mode 100644 index 000000000..d8f260007 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/4-compat_client.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-ignorelist: + - missing required features +tasks: +- exec: + mon.a: + - ceph fs dump --format=json-pretty + - ceph fs required_client_features cephfs add metric_collect +- sleep: + duration: 5 +# client.0 is upgraded and client.1 is evicted by the MDS due to missing +# feature compat set +- fs.clients_evicted: + clients: + client.0: False + client.1: True diff --git a/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/5-client-sanity.yaml b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/5-client-sanity.yaml new file mode 100644 index 000000000..e206457e6 --- /dev/null +++ b/qa/suites/fs/upgrade/featureful_client/upgraded_client/tasks/5-client-sanity.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + clients: + client.0: + - suites/fsstress.sh +- print: "**** done fsstress" diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/% diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/bluestore-bitmap.yaml new file mode 120000 index 000000000..fb603bc9a --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/centos_8.stream_container_tools.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/centos_8.stream_container_tools.yaml new file mode 120000 index 000000000..7a86f967f --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/centos_8.stream_container_tools.yaml @@ -0,0 +1 @@ +.qa/distros/podman/centos_8.stream_container_tools.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/conf b/qa/suites/fs/upgrade/mds_upgrade_sequence/conf new file mode 120000 index 000000000..6d4712984 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/conf @@ -0,0 +1 @@ +.qa/cephfs/conf/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/no.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/no.yaml new file mode 100644 index 000000000..868415bcb --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/no.yaml @@ -0,0 +1,3 @@ +teuthology: + variables: + fail_fs: false diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/yes.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/yes.yaml new file mode 100644 index 000000000..411ff3814 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/fail_fs/yes.yaml @@ -0,0 +1,3 @@ +teuthology: + variables: + fail_fs: true diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/% diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/pg-warn.yaml new file mode 100644 index 000000000..4ae54a40d --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/pg-warn.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + mon pg warn min per osd: 0 diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/syntax.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/syntax.yaml new file mode 100644 index 000000000..84d5d43b2 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/syntax.yaml @@ -0,0 +1,3 @@ +overrides: + kclient: + syntax: 'v1' diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/roles.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/roles.yaml new file mode 100644 index 000000000..bce4ecd34 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/roles.yaml @@ -0,0 +1,11 @@ +roles: +- - host.a + - client.0 + - osd.0 + - osd.1 + - osd.2 +- - host.b + - client.1 + - osd.3 + - osd.4 + - osd.5 diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/% diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/pacific.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/pacific.yaml new file mode 100644 index 000000000..6432d7080 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/pacific.yaml @@ -0,0 +1,32 @@ +meta: +- desc: | + setup ceph/pacific + +tasks: +- install: + branch: pacific + exclude_packages: + - ceph-volume +- print: "**** done install task..." +- cephadm: + image: quay.ceph.io/ceph-ci/ceph:pacific + roleless: true + cephadm_branch: pacific + cephadm_git_url: https://github.com/ceph/ceph + conf: + osd: + #set config option for which cls modules are allowed to be loaded / used + osd_class_load_list: "*" + osd_class_default_list: "*" +- print: "**** done end installing pacific cephadm ..." +- cephadm.shell: + host.a: + - ceph config set mgr mgr/cephadm/use_repo_digest true --force +- print: "**** done cephadm.shell ceph config set mgr..." +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/v16.2.4.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/v16.2.4.yaml new file mode 100644 index 000000000..36bfb1b91 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/v16.2.4.yaml @@ -0,0 +1,34 @@ +teuthology: + postmerge: + - if yaml.teuthology.variables.fail_fs then reject() end + +meta: +- desc: | + setup ceph/pacific v16.2.4 + +tasks: +# Disable metrics sending by kclient as it may crash (assert) a v16.2.4 MDS +- pexec: + clients: + - sudo modprobe -r ceph + - sudo modprobe ceph disable_send_metrics=on +- install: + tag: v16.2.4 + exclude_packages: + - ceph-volume +- print: "**** done install task..." +- cephadm: + roleless: true + image: quay.io/ceph/ceph:v16.2.4 + cephadm_branch: v16.2.4 + cephadm_git_url: https://github.com/ceph/ceph + # needed for v16.2.4 due to --skip-admin-label + avoid_pacific_features: true +- print: "**** done starting v16.2.4" +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/% b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/% diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/0-create.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/0-create.yaml new file mode 100644 index 000000000..5ee0022c6 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/0-create.yaml @@ -0,0 +1,5 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs volume create cephfs --placement=4 + - ceph fs dump diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/1.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/1.yaml new file mode 100644 index 000000000..8c1cd2fe0 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/1.yaml @@ -0,0 +1,4 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs set cephfs max_mds 1 diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/2.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/2.yaml new file mode 100644 index 000000000..fcd3b1ea4 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/1-ranks/2.yaml @@ -0,0 +1,4 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs set cephfs max_mds 2 diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/no.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/no.yaml new file mode 100644 index 000000000..3dbc81089 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/no.yaml @@ -0,0 +1,4 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs set cephfs allow_standby_replay false diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/yes.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/yes.yaml new file mode 100644 index 000000000..fb894425e --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/2-allow_standby_replay/yes.yaml @@ -0,0 +1,4 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs set cephfs allow_standby_replay true diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/.qa b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/no.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/no.yaml new file mode 100644 index 000000000..107f30ecd --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/no.yaml @@ -0,0 +1,4 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs set cephfs inline_data false diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/yes.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/yes.yaml new file mode 100644 index 000000000..246ed71b4 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/3-inline/yes.yaml @@ -0,0 +1,4 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs set cephfs inline_data true --yes-i-really-really-mean-it diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/4-verify.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/4-verify.yaml new file mode 100644 index 000000000..e71365ad1 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/1-volume/4-verify.yaml @@ -0,0 +1,7 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs dump + - ceph --format=json fs dump | jq -e ".filesystems | length == 1" + - while ! ceph --format=json mds versions | jq -e ". | add == 4"; do sleep 1; done +- fs.pre_upgrade_save: diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml new file mode 100644 index 000000000..92b9dda84 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml @@ -0,0 +1,3 @@ +tasks: +- kclient: +- print: "**** done client" diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/3-upgrade-mgr-staggered.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/3-upgrade-mgr-staggered.yaml new file mode 100644 index 000000000..fac9e29db --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/3-upgrade-mgr-staggered.yaml @@ -0,0 +1,18 @@ +teuthology: + premerge: | + if not yaml.teuthology.variables.fail_fs then reject() end +upgrade-tasks: + sequential: + - cephadm.shell: + env: [sha1] + host.a: + - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force + - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force + - ceph config set global log_to_journald false --force + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mgr + - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done + - ceph versions | jq -e '.mgr | length == 1' + - ceph versions | jq -e '.mgr | keys' | grep $sha1 + - ceph versions | jq -e '.overall | length == 2' + - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 2' + - ceph orch ps diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/+ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/+ diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/fail_fs.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/fail_fs.yaml new file mode 100644 index 000000000..fbde19a22 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/4-config-upgrade/fail_fs.yaml @@ -0,0 +1,15 @@ +teuthology: + premerge: | + local set = yaml.teuthology.variables.fail_fs + local cmd = "ceph config set mgr mgr/orchestrator/fail_fs "..tostring(set) + local cmds = yaml_fragment['upgrade-tasks'].sequential[0]['cephadm.shell']['host.a'] + if set then + py_attrgetter(cmds).append "ceph config set mgr mgr/orchestrator/fail_fs true" + else + py_attrgetter(cmds).append "ceph config set mgr mgr/orchestrator/fail_fs false || true" + end +upgrade-tasks: + sequential: + - cephadm.shell: + env: [sha1] + host.a: [] diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/5-upgrade-with-workload.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/5-upgrade-with-workload.yaml new file mode 100644 index 000000000..392b1e66d --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/5-upgrade-with-workload.yaml @@ -0,0 +1,35 @@ +tasks: +- parallel: + - upgrade-tasks + - workload-tasks + +upgrade-tasks: + sequential: + - cephadm.shell: + env: [sha1] + host.a: + - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force + - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force + - ceph config set global log_to_journald false --force + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 + - cephadm.shell: + env: [sha1] + host.a: + - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph fs dump; ceph orch upgrade status ; ceph health detail ; sleep 30 ; done + - ceph orch ps + - ceph orch upgrade status + - ceph health detail + - ceph versions + - echo "wait for servicemap items w/ changing names to refresh" + - sleep 60 + - ceph orch ps + - ceph versions + - ceph versions | jq -e '.overall | length == 1' + - ceph versions | jq -e '.overall | keys' | grep $sha1 + +workload-tasks: + sequential: + - workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/6-verify.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/6-verify.yaml new file mode 100644 index 000000000..c2b657e5a --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/6-verify.yaml @@ -0,0 +1,5 @@ +tasks: +- cephadm.shell: + host.a: + - ceph fs dump +- fs.post_upgrade_checks: diff --git a/qa/suites/fs/upgrade/nofs/% b/qa/suites/fs/upgrade/nofs/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/% diff --git a/qa/suites/fs/upgrade/nofs/.qa b/qa/suites/fs/upgrade/nofs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/nofs/README b/qa/suites/fs/upgrade/nofs/README new file mode 100644 index 000000000..e7f6960ef --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/README @@ -0,0 +1,3 @@ +This test just verifies that upgrades work with no file system present. In +particular, catch that MDSMonitor doesn't blow up somehow with version +mismatches. diff --git a/qa/suites/fs/upgrade/nofs/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/nofs/bluestore-bitmap.yaml new file mode 120000 index 000000000..fb603bc9a --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/nofs/centos_8.yaml b/qa/suites/fs/upgrade/nofs/centos_8.yaml new file mode 120000 index 000000000..5dceec7e2 --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/centos_8.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_8.stream.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/nofs/conf b/qa/suites/fs/upgrade/nofs/conf new file mode 120000 index 000000000..6d4712984 --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/conf @@ -0,0 +1 @@ +.qa/cephfs/conf/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/nofs/no-mds-cluster.yaml b/qa/suites/fs/upgrade/nofs/no-mds-cluster.yaml new file mode 100644 index 000000000..33c6fb16b --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/no-mds-cluster.yaml @@ -0,0 +1,6 @@ +roles: +- [mon.a, mon.b, mon.c, mgr.x, mgr.y, osd.0, osd.1, osd.2, osd.3] +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/suites/fs/upgrade/nofs/overrides/% b/qa/suites/fs/upgrade/nofs/overrides/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/overrides/% diff --git a/qa/suites/fs/upgrade/nofs/overrides/.qa b/qa/suites/fs/upgrade/nofs/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/nofs/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/nofs/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/nofs/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/nofs/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/nofs/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/nofs/overrides/pg-warn.yaml new file mode 100644 index 000000000..4ae54a40d --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/overrides/pg-warn.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + mon pg warn min per osd: 0 diff --git a/qa/suites/fs/upgrade/nofs/tasks/% b/qa/suites/fs/upgrade/nofs/tasks/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/tasks/% diff --git a/qa/suites/fs/upgrade/nofs/tasks/.qa b/qa/suites/fs/upgrade/nofs/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/nofs/tasks/0-pacific.yaml b/qa/suites/fs/upgrade/nofs/tasks/0-pacific.yaml new file mode 100644 index 000000000..b74accc69 --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/tasks/0-pacific.yaml @@ -0,0 +1,39 @@ +meta: +- desc: | + install ceph/pacific latest +tasks: +- install: + branch: pacific + exclude_packages: + - librados3 + - ceph-mgr-dashboard + - ceph-mgr-diskprediction-local + - ceph-mgr-rook + - ceph-mgr-cephadm + - cephadm + - ceph-volume + extra_packages: ['librados2'] +- print: "**** done installing pacific" +- ceph: + log-ignorelist: + - overall HEALTH_ + - \(FS_ + - \(MDS_ + - \(OSD_ + - \(MON_DOWN\) + - \(CACHE_POOL_ + - \(POOL_ + - \(MGR_DOWN\) + - \(PG_ + - \(SMALLER_PGP_NUM\) + - Monitor daemon marked osd + - Behind on trimming + - Manager daemon + conf: + global: + mon warn on pool no app: false + ms bind msgr2: false +- exec: + osd.0: + - ceph osd set-require-min-compat-client pacific +- print: "**** done ceph" diff --git a/qa/suites/fs/upgrade/nofs/tasks/1-upgrade.yaml b/qa/suites/fs/upgrade/nofs/tasks/1-upgrade.yaml new file mode 100644 index 000000000..858142871 --- /dev/null +++ b/qa/suites/fs/upgrade/nofs/tasks/1-upgrade.yaml @@ -0,0 +1,44 @@ +overrides: + ceph: + log-ignorelist: + - scrub mismatch + - ScrubResult + - wrongly marked + - \(POOL_APP_NOT_ENABLED\) + - \(SLOW_OPS\) + - overall HEALTH_ + - \(MON_MSGR2_NOT_ENABLED\) + - slow request + conf: + global: + bluestore warn on legacy statfs: false + bluestore warn on no per pool omap: false + mon: + mon warn on osd down out interval zero: false + +tasks: +- print: "*** upgrading, no cephfs present" +- exec: + mon.a: + - ceph fs dump +- install.upgrade: + mon.a: +- print: "**** done install.upgrade" +- ceph.restart: + daemons: [mon.*, mgr.*] + mon-health-to-clog: false + wait-for-healthy: false +- ceph.healthy: +- ceph.restart: + daemons: [osd.*] + wait-for-healthy: false + wait-for-osds-up: true +- exec: + mon.a: + - ceph versions + - ceph osd dump -f json-pretty + - ceph fs dump + - ceph osd require-osd-release quincy + - for f in `ceph osd pool ls` ; do ceph osd pool set $f pg_autoscale_mode off ; done +- ceph.healthy: +- print: "**** done ceph.restart" diff --git a/qa/suites/fs/upgrade/upgraded_client/% b/qa/suites/fs/upgrade/upgraded_client/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/% diff --git a/qa/suites/fs/upgrade/upgraded_client/.qa b/qa/suites/fs/upgrade/upgraded_client/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/bluestore-bitmap.yaml b/qa/suites/fs/upgrade/upgraded_client/bluestore-bitmap.yaml new file mode 120000 index 000000000..675dce056 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/bluestore-bitmap.yaml @@ -0,0 +1 @@ +../../../../cephfs/objectstore-ec/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/centos_8.yaml b/qa/suites/fs/upgrade/upgraded_client/centos_8.yaml new file mode 120000 index 000000000..5dceec7e2 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/centos_8.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_8.stream.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/clusters/% b/qa/suites/fs/upgrade/upgraded_client/clusters/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/clusters/% diff --git a/qa/suites/fs/upgrade/upgraded_client/clusters/.qa b/qa/suites/fs/upgrade/upgraded_client/clusters/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/clusters/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/clusters/1-mds-1-client-micro.yaml b/qa/suites/fs/upgrade/upgraded_client/clusters/1-mds-1-client-micro.yaml new file mode 120000 index 000000000..50ffb6d53 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/clusters/1-mds-1-client-micro.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1-mds-1-client-micro.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/conf b/qa/suites/fs/upgrade/upgraded_client/conf new file mode 120000 index 000000000..6d4712984 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/conf @@ -0,0 +1 @@ +.qa/cephfs/conf/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/overrides/% b/qa/suites/fs/upgrade/upgraded_client/overrides/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/overrides/% diff --git a/qa/suites/fs/upgrade/upgraded_client/overrides/.qa b/qa/suites/fs/upgrade/upgraded_client/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_health.yaml b/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/overrides/pg-warn.yaml b/qa/suites/fs/upgrade/upgraded_client/overrides/pg-warn.yaml new file mode 100644 index 000000000..4ae54a40d --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/overrides/pg-warn.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + mon pg warn min per osd: 0 diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/% b/qa/suites/fs/upgrade/upgraded_client/tasks/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/% diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/.qa b/qa/suites/fs/upgrade/upgraded_client/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/nautilus.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/nautilus.yaml new file mode 100644 index 000000000..02f541eaf --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/nautilus.yaml @@ -0,0 +1,53 @@ +meta: +- desc: | + install ceph/nautilus latest +tasks: +- install: + branch: nautilus + exclude_packages: + - cephadm + - ceph-mgr-cephadm + - ceph-immutable-object-cache + - python3-rados + - python3-rgw + - python3-rbd + - python3-cephfs + - ceph-volume + extra_packages: + - python-rados + - python-rgw + - python-rbd + - python-cephfs + # For kernel_untar_build workunit + extra_system_packages: + - bison + - flex + - elfutils-libelf-devel + - openssl-devel + - NetworkManager + - iproute + - util-linux +- print: "**** done installing nautilus" +- ceph: + log-ignorelist: + - overall HEALTH_ + - \(FS_ + - \(MDS_ + - \(OSD_ + - \(MON_DOWN\) + - \(CACHE_POOL_ + - \(POOL_ + - \(MGR_DOWN\) + - \(PG_ + - \(SMALLER_PGP_NUM\) + - Monitor daemon marked osd + - Behind on trimming + - Manager daemon + conf: + global: + mon warn on pool no app: false + ms bind msgr2: false +- exec: + osd.0: + - ceph osd set-require-min-compat-client nautilus +- print: "**** done ceph" diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/pacific.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/pacific.yaml new file mode 100644 index 000000000..defb03922 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/0-from/pacific.yaml @@ -0,0 +1,53 @@ +meta: +- desc: | + install ceph/pacific latest +tasks: +- install: + branch: pacific + exclude_packages: + - cephadm + - ceph-mgr-cephadm + - ceph-immutable-object-cache + - python3-rados + - python3-rgw + - python3-rbd + - python3-cephfs + - ceph-volume + extra_packages: + - python-rados + - python-rgw + - python-rbd + - python-cephfs + # For kernel_untar_build workunit + extra_system_packages: + - bison + - flex + - elfutils-libelf-devel + - openssl-devel + - NetworkManager + - iproute + - util-linux +- print: "**** done installing pacific" +- ceph: + log-ignorelist: + - overall HEALTH_ + - \(FS_ + - \(MDS_ + - \(OSD_ + - \(MON_DOWN\) + - \(CACHE_POOL_ + - \(POOL_ + - \(MGR_DOWN\) + - \(PG_ + - \(SMALLER_PGP_NUM\) + - Monitor daemon marked osd + - Behind on trimming + - Manager daemon + conf: + global: + mon warn on pool no app: false + ms bind msgr2: false +- exec: + osd.0: + - ceph osd set-require-min-compat-client pacific +- print: "**** done ceph" diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/.qa b/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/mount b/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/mount new file mode 120000 index 000000000..e3600f453 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/1-mount/mount @@ -0,0 +1 @@ +.qa/cephfs/mount/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/.qa b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/% b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/% diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/fuse-upgrade.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/fuse-upgrade.yaml new file mode 100644 index 000000000..34c85a00e --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/fuse-upgrade.yaml @@ -0,0 +1,14 @@ +teuthology: + postmerge: + - if not is_fuse() then reject() end +tasks: +- ceph-fuse: + client.0: + mounted: false +- print: "**** done unmount client.0" +- install.upgrade: + client.0: +- print: "**** done install.upgrade on client.0" +- ceph-fuse: + client.0: +- print: "**** done remount client" diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/kclient.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/kclient.yaml new file mode 100644 index 000000000..ecc705600 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/0-clients/kclient.yaml @@ -0,0 +1,8 @@ +teuthology: + postmerge: + # Once can we make sure the distro kernels have included the newops fixes + # we can remove the is_kupstream() restriction. While since the Nautilus + # will only support the 'v1' mount syntax, so don't touch the mount syntax + # restriction. + - if not is_kupstream() or syntax_version() == 'v2' then reject() end +tasks: diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/1-client-sanity.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/1-client-sanity.yaml new file mode 100644 index 000000000..9508cce65 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/new_ops/1-client-sanity.yaml @@ -0,0 +1,4 @@ +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_newops diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/% b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/% diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/.qa b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/0-client-upgrade.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/0-client-upgrade.yaml new file mode 100644 index 000000000..2d948af19 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/0-client-upgrade.yaml @@ -0,0 +1,14 @@ +teuthology: + postmerge: + - if not is_fuse() then reject() end +tasks: +- ceph-fuse: + client.0: + mounted: false +- print: "**** done unmount client.0" +- install.upgrade: + client.0: +- print: "**** done install.upgrade on client.0" +- ceph-fuse: + client.0: +- print: "**** done remount client.0" diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/.qa b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/blogbench.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/blogbench.yaml new file mode 120000 index 000000000..a2f8b3052 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/blogbench.yaml @@ -0,0 +1 @@ +.qa/suites/fs/workload/tasks/5-workunit/suites/blogbench.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/dbench.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/dbench.yaml new file mode 120000 index 000000000..9fb8adcea --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/dbench.yaml @@ -0,0 +1 @@ +.qa/suites/fs/workload/tasks/5-workunit/suites/dbench.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/fsstress.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/fsstress.yaml new file mode 120000 index 000000000..dc777f36d --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/fsstress.yaml @@ -0,0 +1 @@ +.qa/suites/fs/workload/tasks/5-workunit/suites/fsstress.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/iozone.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/iozone.yaml new file mode 120000 index 000000000..f4d0ead4f --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/iozone.yaml @@ -0,0 +1 @@ +.qa/suites/fs/workload/tasks/5-workunit/suites/iozone.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/kernel_untar_build.yaml b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/kernel_untar_build.yaml new file mode 120000 index 000000000..317ebf8c4 --- /dev/null +++ b/qa/suites/fs/upgrade/upgraded_client/tasks/2-workload/stress_tests/1-tests/kernel_untar_build.yaml @@ -0,0 +1 @@ +.qa/suites/fs/workload/tasks/5-workunit/kernel_untar_build.yaml
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/% b/qa/suites/fs/valgrind/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/valgrind/% diff --git a/qa/suites/fs/valgrind/.qa b/qa/suites/fs/valgrind/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/valgrind/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/begin b/qa/suites/fs/valgrind/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/valgrind/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/centos_latest.yaml b/qa/suites/fs/valgrind/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/fs/valgrind/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/debug.yaml b/qa/suites/fs/valgrind/debug.yaml new file mode 100644 index 000000000..5eed99cbb --- /dev/null +++ b/qa/suites/fs/valgrind/debug.yaml @@ -0,0 +1,4 @@ +overrides: + install: + ceph: + debuginfo: true diff --git a/qa/suites/fs/valgrind/mirror/% b/qa/suites/fs/valgrind/mirror/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/% diff --git a/qa/suites/fs/valgrind/mirror/.qa b/qa/suites/fs/valgrind/mirror/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/mirror/cephfs-mirror/.qa b/qa/suites/fs/valgrind/mirror/cephfs-mirror/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/cephfs-mirror/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/mirror/cephfs-mirror/one-per-cluster.yaml b/qa/suites/fs/valgrind/mirror/cephfs-mirror/one-per-cluster.yaml new file mode 100644 index 000000000..4112a0af0 --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/cephfs-mirror/one-per-cluster.yaml @@ -0,0 +1,7 @@ +meta: +- desc: run one cephfs-mirror daemon on primary cluster + +tasks: +- cephfs-mirror: + client: client.mirror + valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] diff --git a/qa/suites/fs/valgrind/mirror/clients/.qa b/qa/suites/fs/valgrind/mirror/clients/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/clients/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/mirror/clients/mirror.yaml b/qa/suites/fs/valgrind/mirror/clients/mirror.yaml new file mode 100644 index 000000000..1a68fea8b --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/clients/mirror.yaml @@ -0,0 +1,18 @@ +meta: +- desc: configure the permissions for client.mirror +overrides: + ceph: + conf: + client: + debug cephfs_mirror: 20 + log to stderr: false + # make these predictable + client.mirror: + admin socket: /var/run/ceph/cephfs-mirror.asok + pid file: /var/run/ceph/cephfs-mirror.pid +tasks: +- exec: + client.mirror: + - "sudo ceph auth caps client.mirror mon 'profile cephfs-mirror' mds 'allow r' osd 'allow rw tag cephfs metadata=*, allow r tag cephfs data=*' mgr 'allow r'" + client.mirror_remote: + - "sudo ceph auth caps client.mirror_remote mon 'allow r' mds 'allow rwps' osd 'allow rw tag cephfs *=*' mgr 'allow r'" diff --git a/qa/suites/fs/valgrind/mirror/cluster/.qa b/qa/suites/fs/valgrind/mirror/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/mirror/cluster/1-node.yaml b/qa/suites/fs/valgrind/mirror/cluster/1-node.yaml new file mode 100644 index 000000000..cae4aca5e --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/cluster/1-node.yaml @@ -0,0 +1,17 @@ +meta: +- desc: 1 ceph cluster with 1 mon, 1 mgr, 3 osds, 5 mdss +roles: +- - mon.a + - mgr.x + - mds.a + - mds.b + - mds.c + - mds.d + - mds.e + - osd.0 + - osd.1 + - osd.2 + - client.0 + - client.1 + - client.mirror + - client.mirror_remote diff --git a/qa/suites/fs/valgrind/mirror/mount/.qa b/qa/suites/fs/valgrind/mirror/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/mirror/mount/fuse.yaml b/qa/suites/fs/valgrind/mirror/mount/fuse.yaml new file mode 100644 index 000000000..1fdf55ab4 --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/mount/fuse.yaml @@ -0,0 +1,2 @@ +tasks: + - ceph-fuse: [client.0, client.1] diff --git a/qa/suites/fs/valgrind/mirror/overrides/.qa b/qa/suites/fs/valgrind/mirror/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml b/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml new file mode 100644 index 000000000..d40fa4cb8 --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(FS_DEGRADED\) + - \(MDS_FAILED\) + - \(MDS_DEGRADED\) + - \(FS_WITH_FAILED_MDS\) + - \(MDS_DAMAGE\) + - \(MDS_ALL_DOWN\) + - \(MDS_UP_LESS_THAN_MAX\) + - \(FS_INLINE_DATA_DEPRECATED\) + - Reduced data availability + - Degraded data redundancy diff --git a/qa/suites/fs/valgrind/mirror/tasks/.qa b/qa/suites/fs/valgrind/mirror/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/mirror/tasks/mirror.yaml b/qa/suites/fs/valgrind/mirror/tasks/mirror.yaml new file mode 100644 index 000000000..07c1e24ef --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/tasks/mirror.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + mgr: + debug client: 10 + +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_mirroring.TestMirroring diff --git a/qa/suites/fs/verify/% b/qa/suites/fs/verify/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/verify/% diff --git a/qa/suites/fs/verify/.qa b/qa/suites/fs/verify/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/verify/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/verify/begin b/qa/suites/fs/verify/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/verify/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/verify/clusters/.qa b/qa/suites/fs/verify/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/verify/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/verify/clusters/1a5s-mds-1c-client.yaml b/qa/suites/fs/verify/clusters/1a5s-mds-1c-client.yaml new file mode 120000 index 000000000..2ab33af1c --- /dev/null +++ b/qa/suites/fs/verify/clusters/1a5s-mds-1c-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1a5s-mds-1c-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/conf b/qa/suites/fs/verify/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/verify/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/verify/distro/$ b/qa/suites/fs/verify/distro/$ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/verify/distro/$ diff --git a/qa/suites/fs/verify/distro/.qa b/qa/suites/fs/verify/distro/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/verify/distro/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/verify/distro/centos_8.yaml b/qa/suites/fs/verify/distro/centos_8.yaml new file mode 120000 index 000000000..380a1443b --- /dev/null +++ b/qa/suites/fs/verify/distro/centos_8.yaml @@ -0,0 +1 @@ +.qa/distros/all/centos_8.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/distro/rhel_8.yaml b/qa/suites/fs/verify/distro/rhel_8.yaml new file mode 120000 index 000000000..133acf27b --- /dev/null +++ b/qa/suites/fs/verify/distro/rhel_8.yaml @@ -0,0 +1 @@ +.qa/distros/all/rhel_8.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/distro/ubuntu/+ b/qa/suites/fs/verify/distro/ubuntu/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/verify/distro/ubuntu/+ diff --git a/qa/suites/fs/verify/distro/ubuntu/.qa b/qa/suites/fs/verify/distro/ubuntu/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/verify/distro/ubuntu/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/verify/distro/ubuntu/latest.yaml b/qa/suites/fs/verify/distro/ubuntu/latest.yaml new file mode 120000 index 000000000..162964882 --- /dev/null +++ b/qa/suites/fs/verify/distro/ubuntu/latest.yaml @@ -0,0 +1 @@ +.qa/distros/all/ubuntu_20.04.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/distro/ubuntu/overrides.yaml b/qa/suites/fs/verify/distro/ubuntu/overrides.yaml new file mode 100644 index 000000000..fdd7f5e5a --- /dev/null +++ b/qa/suites/fs/verify/distro/ubuntu/overrides.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + valgrind: + exit_on_first_error: false diff --git a/qa/suites/fs/verify/mount/.qa b/qa/suites/fs/verify/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/verify/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/verify/mount/fuse.yaml b/qa/suites/fs/verify/mount/fuse.yaml new file mode 120000 index 000000000..0e55da9fb --- /dev/null +++ b/qa/suites/fs/verify/mount/fuse.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/fuse.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/mount/kclient/+ b/qa/suites/fs/verify/mount/kclient/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/verify/mount/kclient/+ diff --git a/qa/suites/fs/verify/mount/kclient/.qa b/qa/suites/fs/verify/mount/kclient/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/verify/mount/kclient/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/verify/mount/kclient/k-testing.yaml b/qa/suites/fs/verify/mount/kclient/k-testing.yaml new file mode 120000 index 000000000..bec80be29 --- /dev/null +++ b/qa/suites/fs/verify/mount/kclient/k-testing.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/kclient/overrides/distro/testing/k-testing.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/mount/kclient/mount.yaml b/qa/suites/fs/verify/mount/kclient/mount.yaml new file mode 120000 index 000000000..9967f23e2 --- /dev/null +++ b/qa/suites/fs/verify/mount/kclient/mount.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/kclient/mount.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/mount/kclient/ms-die-on-skipped.yaml b/qa/suites/fs/verify/mount/kclient/ms-die-on-skipped.yaml new file mode 120000 index 000000000..1912a0c91 --- /dev/null +++ b/qa/suites/fs/verify/mount/kclient/ms-die-on-skipped.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/kclient/overrides/ms-die-on-skipped.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/objectstore-ec b/qa/suites/fs/verify/objectstore-ec new file mode 120000 index 000000000..affe29493 --- /dev/null +++ b/qa/suites/fs/verify/objectstore-ec @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec
\ No newline at end of file diff --git a/qa/suites/fs/verify/overrides/+ b/qa/suites/fs/verify/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/verify/overrides/+ diff --git a/qa/suites/fs/verify/overrides/.qa b/qa/suites/fs/verify/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/verify/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/verify/overrides/ignorelist_health.yaml b/qa/suites/fs/verify/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/verify/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/verify/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/verify/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/overrides/mon-debug.yaml b/qa/suites/fs/verify/overrides/mon-debug.yaml new file mode 100644 index 000000000..6ed3e6d52 --- /dev/null +++ b/qa/suites/fs/verify/overrides/mon-debug.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + mon: + debug ms: 1 + debug mon: 20 diff --git a/qa/suites/fs/verify/overrides/session_timeout.yaml b/qa/suites/fs/verify/overrides/session_timeout.yaml new file mode 120000 index 000000000..fce0318c5 --- /dev/null +++ b/qa/suites/fs/verify/overrides/session_timeout.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/session_timeout.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/ranks/.qa b/qa/suites/fs/verify/ranks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/verify/ranks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/verify/ranks/1.yaml b/qa/suites/fs/verify/ranks/1.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/verify/ranks/1.yaml diff --git a/qa/suites/fs/verify/ranks/3.yaml b/qa/suites/fs/verify/ranks/3.yaml new file mode 100644 index 000000000..9ed043c14 --- /dev/null +++ b/qa/suites/fs/verify/ranks/3.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + cephfs: + max_mds: 3 + check-counter: + counters: + mds: + - mds.exported + - mds.imported diff --git a/qa/suites/fs/verify/ranks/5.yaml b/qa/suites/fs/verify/ranks/5.yaml new file mode 100644 index 000000000..ed89cef3a --- /dev/null +++ b/qa/suites/fs/verify/ranks/5.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + cephfs: + max_mds: 5 + check-counter: + counters: + mds: + - mds.exported + - mds.imported diff --git a/qa/suites/fs/verify/tasks/.qa b/qa/suites/fs/verify/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/verify/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/verify/tasks/dbench.yaml b/qa/suites/fs/verify/tasks/dbench.yaml new file mode 120000 index 000000000..b0f876c3c --- /dev/null +++ b/qa/suites/fs/verify/tasks/dbench.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_suites_dbench.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/tasks/fsstress.yaml b/qa/suites/fs/verify/tasks/fsstress.yaml new file mode 120000 index 000000000..c2e859fff --- /dev/null +++ b/qa/suites/fs/verify/tasks/fsstress.yaml @@ -0,0 +1 @@ +.qa/cephfs/tasks/cfuse_workunit_suites_fsstress.yaml
\ No newline at end of file diff --git a/qa/suites/fs/verify/validater/.qa b/qa/suites/fs/verify/validater/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/verify/validater/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/verify/validater/lockdep.yaml b/qa/suites/fs/verify/validater/lockdep.yaml new file mode 100644 index 000000000..25f84355c --- /dev/null +++ b/qa/suites/fs/verify/validater/lockdep.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + lockdep: true diff --git a/qa/suites/fs/verify/validater/valgrind.yaml b/qa/suites/fs/verify/validater/valgrind.yaml new file mode 100644 index 000000000..930872fc7 --- /dev/null +++ b/qa/suites/fs/verify/validater/valgrind.yaml @@ -0,0 +1,29 @@ +overrides: + install: + ceph: + debuginfo: true + ceph: + # Valgrind makes everything slow, so ignore slow requests and extend heartbeat grace + log-ignorelist: + - slow request + - SLOW_OPS + - MON_DOWN + conf: + global: + osd heartbeat grace: 60 + mds heartbeat grace: 60 + mds beacon grace: 60 + mds: + mds valgrind exit: true + mon: + mon osd crush smoke test: false + osd: + osd fast shutdown: false + valgrind: + mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] + mds: [--tool=memcheck] + watchdog: + daemon_restart: normal + ceph-fuse: + client.0: + valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] diff --git a/qa/suites/fs/volumes/% b/qa/suites/fs/volumes/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/volumes/% diff --git a/qa/suites/fs/volumes/.qa b/qa/suites/fs/volumes/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/volumes/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/volumes/begin b/qa/suites/fs/volumes/begin new file mode 120000 index 000000000..77af91f7d --- /dev/null +++ b/qa/suites/fs/volumes/begin @@ -0,0 +1 @@ +.qa/cephfs/begin/
\ No newline at end of file diff --git a/qa/suites/fs/volumes/clusters/.qa b/qa/suites/fs/volumes/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/volumes/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/volumes/clusters/1a3s-mds-4c-client.yaml b/qa/suites/fs/volumes/clusters/1a3s-mds-4c-client.yaml new file mode 120000 index 000000000..5c722a30b --- /dev/null +++ b/qa/suites/fs/volumes/clusters/1a3s-mds-4c-client.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1a3s-mds-4c-client.yaml
\ No newline at end of file diff --git a/qa/suites/fs/volumes/conf b/qa/suites/fs/volumes/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/volumes/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/volumes/distro b/qa/suites/fs/volumes/distro new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/fs/volumes/distro @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/fs/volumes/mount b/qa/suites/fs/volumes/mount new file mode 120000 index 000000000..e3600f453 --- /dev/null +++ b/qa/suites/fs/volumes/mount @@ -0,0 +1 @@ +.qa/cephfs/mount/
\ No newline at end of file diff --git a/qa/suites/fs/volumes/objectstore/.qa b/qa/suites/fs/volumes/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/volumes/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/volumes/objectstore/bluestore-bitmap.yaml b/qa/suites/fs/volumes/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/fs/volumes/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/fs/volumes/overrides/+ b/qa/suites/fs/volumes/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/volumes/overrides/+ diff --git a/qa/suites/fs/volumes/overrides/.qa b/qa/suites/fs/volumes/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/volumes/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/volumes/overrides/ignorelist_health.yaml b/qa/suites/fs/volumes/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/volumes/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/volumes/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/volumes/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/volumes/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/volumes/overrides/no_client_pidfile.yaml b/qa/suites/fs/volumes/overrides/no_client_pidfile.yaml new file mode 120000 index 000000000..8888f3327 --- /dev/null +++ b/qa/suites/fs/volumes/overrides/no_client_pidfile.yaml @@ -0,0 +1 @@ +.qa/overrides/no_client_pidfile.yaml
\ No newline at end of file diff --git a/qa/suites/fs/volumes/tasks/.qa b/qa/suites/fs/volumes/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/volumes/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/volumes/tasks/volumes/% b/qa/suites/fs/volumes/tasks/volumes/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/volumes/tasks/volumes/% diff --git a/qa/suites/fs/volumes/tasks/volumes/.qa b/qa/suites/fs/volumes/tasks/volumes/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/volumes/tasks/volumes/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/volumes/tasks/volumes/overrides.yaml b/qa/suites/fs/volumes/tasks/volumes/overrides.yaml new file mode 100644 index 000000000..fb15e5079 --- /dev/null +++ b/qa/suites/fs/volumes/tasks/volumes/overrides.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + conf: + mgr: + debug client: 20 + debug ms: 1 + debug finisher: 20 + debug mgr: 20 + log-ignorelist: + - OSD full dropping all updates + - OSD near full + - pausewr flag + - failsafe engaged, dropping updates + - failsafe disengaged, no longer dropping + - is full \(reached quota + - POOL_FULL + - POOL_BACKFILLFULL diff --git a/qa/suites/fs/volumes/tasks/volumes/test/.qa b/qa/suites/fs/volumes/tasks/volumes/test/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/volumes/tasks/volumes/test/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/volumes/tasks/volumes/test/basic.yaml b/qa/suites/fs/volumes/tasks/volumes/test/basic.yaml new file mode 100644 index 000000000..b4c65cfc5 --- /dev/null +++ b/qa/suites/fs/volumes/tasks/volumes/test/basic.yaml @@ -0,0 +1,8 @@ +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_volumes.TestVolumes + - tasks.cephfs.test_volumes.TestSubvolumeGroups + - tasks.cephfs.test_volumes.TestSubvolumes + - tasks.cephfs.test_subvolume.TestSubvolume diff --git a/qa/suites/fs/volumes/tasks/volumes/test/clone.yaml b/qa/suites/fs/volumes/tasks/volumes/test/clone.yaml new file mode 100644 index 000000000..e0c1f0150 --- /dev/null +++ b/qa/suites/fs/volumes/tasks/volumes/test/clone.yaml @@ -0,0 +1,5 @@ +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_volumes.TestSubvolumeSnapshotClones diff --git a/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml b/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml new file mode 100644 index 000000000..ec8335fe0 --- /dev/null +++ b/qa/suites/fs/volumes/tasks/volumes/test/finisher_per_module.yaml @@ -0,0 +1,13 @@ +tasks: + - check-counter: + counters: + mgr: + - name: "finisher-volumes.complete_latency.avgcount" + min: 4 + - name: "finisher-volumes.queue_len" + expected_val: 0 + + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_volumes.TestPerModuleFinsherThread diff --git a/qa/suites/fs/volumes/tasks/volumes/test/misc.yaml b/qa/suites/fs/volumes/tasks/volumes/test/misc.yaml new file mode 100644 index 000000000..1f6fd2b2d --- /dev/null +++ b/qa/suites/fs/volumes/tasks/volumes/test/misc.yaml @@ -0,0 +1,5 @@ +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_volumes.TestMisc diff --git a/qa/suites/fs/volumes/tasks/volumes/test/snapshot.yaml b/qa/suites/fs/volumes/tasks/volumes/test/snapshot.yaml new file mode 100644 index 000000000..d68201137 --- /dev/null +++ b/qa/suites/fs/volumes/tasks/volumes/test/snapshot.yaml @@ -0,0 +1,6 @@ +tasks: + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.cephfs.test_volumes.TestSubvolumeGroupSnapshots + - tasks.cephfs.test_volumes.TestSubvolumeSnapshots diff --git a/qa/suites/fs/workload/% b/qa/suites/fs/workload/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/workload/% diff --git a/qa/suites/fs/workload/.qa b/qa/suites/fs/workload/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/0-rhel_8.yaml b/qa/suites/fs/workload/0-rhel_8.yaml new file mode 120000 index 000000000..c9abcd7b8 --- /dev/null +++ b/qa/suites/fs/workload/0-rhel_8.yaml @@ -0,0 +1 @@ +.qa/distros/podman/rhel_8.6_container_tools_rhel8.yaml
\ No newline at end of file diff --git a/qa/suites/fs/workload/begin/+ b/qa/suites/fs/workload/begin/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/workload/begin/+ diff --git a/qa/suites/fs/workload/begin/.qa b/qa/suites/fs/workload/begin/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/begin/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/begin/0-install.yaml b/qa/suites/fs/workload/begin/0-install.yaml new file mode 120000 index 000000000..3b1852973 --- /dev/null +++ b/qa/suites/fs/workload/begin/0-install.yaml @@ -0,0 +1 @@ +.qa/cephfs/begin/0-install.yaml
\ No newline at end of file diff --git a/qa/suites/fs/workload/begin/1-cephadm.yaml b/qa/suites/fs/workload/begin/1-cephadm.yaml new file mode 100644 index 000000000..a58ea5725 --- /dev/null +++ b/qa/suites/fs/workload/begin/1-cephadm.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true +tasks: +- cephadm: + roleless: false +- cephadm.shell: + mon.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls +- cephadm.shell: + mon.a: + - ceph fs dump + - ceph osd dump +- fs.ready: + timeout: 300 diff --git a/qa/suites/fs/workload/begin/2-logrotate.yaml b/qa/suites/fs/workload/begin/2-logrotate.yaml new file mode 120000 index 000000000..9d6e7ba83 --- /dev/null +++ b/qa/suites/fs/workload/begin/2-logrotate.yaml @@ -0,0 +1 @@ +.qa/cephfs/begin/2-logrotate.yaml
\ No newline at end of file diff --git a/qa/suites/fs/workload/clusters/.qa b/qa/suites/fs/workload/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/clusters/1a11s-mds-1c-client-3node.yaml b/qa/suites/fs/workload/clusters/1a11s-mds-1c-client-3node.yaml new file mode 120000 index 000000000..884134573 --- /dev/null +++ b/qa/suites/fs/workload/clusters/1a11s-mds-1c-client-3node.yaml @@ -0,0 +1 @@ +.qa/cephfs/clusters/1a11s-mds-1c-client-3node.yaml
\ No newline at end of file diff --git a/qa/suites/fs/workload/conf b/qa/suites/fs/workload/conf new file mode 120000 index 000000000..16e8cc44b --- /dev/null +++ b/qa/suites/fs/workload/conf @@ -0,0 +1 @@ +.qa/cephfs/conf
\ No newline at end of file diff --git a/qa/suites/fs/workload/mount/.qa b/qa/suites/fs/workload/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/mount/fuse.yaml b/qa/suites/fs/workload/mount/fuse.yaml new file mode 120000 index 000000000..0e55da9fb --- /dev/null +++ b/qa/suites/fs/workload/mount/fuse.yaml @@ -0,0 +1 @@ +.qa/cephfs/mount/fuse.yaml
\ No newline at end of file diff --git a/qa/suites/fs/workload/mount/kclient/% b/qa/suites/fs/workload/mount/kclient/% new file mode 100644 index 000000000..b8626c4cf --- /dev/null +++ b/qa/suites/fs/workload/mount/kclient/% @@ -0,0 +1 @@ +4 diff --git a/qa/suites/fs/workload/mount/kclient/.qa b/qa/suites/fs/workload/mount/kclient/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/mount/kclient/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/mount/kclient/base b/qa/suites/fs/workload/mount/kclient/base new file mode 120000 index 000000000..22f94e150 --- /dev/null +++ b/qa/suites/fs/workload/mount/kclient/base @@ -0,0 +1 @@ +.qa/cephfs/mount/kclient/
\ No newline at end of file diff --git a/qa/suites/fs/workload/mount/kclient/ms_mode/.qa b/qa/suites/fs/workload/mount/kclient/ms_mode/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/mount/kclient/ms_mode/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/mount/kclient/ms_mode/crc.yaml b/qa/suites/fs/workload/mount/kclient/ms_mode/crc.yaml new file mode 100644 index 000000000..7efada467 --- /dev/null +++ b/qa/suites/fs/workload/mount/kclient/ms_mode/crc.yaml @@ -0,0 +1,3 @@ +overrides: + kclient: + mntopts: ["ms_mode=crc"] diff --git a/qa/suites/fs/workload/mount/kclient/ms_mode/legacy.yaml b/qa/suites/fs/workload/mount/kclient/ms_mode/legacy.yaml new file mode 100644 index 000000000..8a68a7756 --- /dev/null +++ b/qa/suites/fs/workload/mount/kclient/ms_mode/legacy.yaml @@ -0,0 +1,3 @@ +overrides: + kclient: + mntopts: ["ms_mode=legacy"] diff --git a/qa/suites/fs/workload/mount/kclient/ms_mode/secure.yaml b/qa/suites/fs/workload/mount/kclient/ms_mode/secure.yaml new file mode 100644 index 000000000..b4a4221d5 --- /dev/null +++ b/qa/suites/fs/workload/mount/kclient/ms_mode/secure.yaml @@ -0,0 +1,3 @@ +overrides: + kclient: + mntopts: ["ms_mode=secure"] diff --git a/qa/suites/fs/workload/mount/kclient/wsync/.qa b/qa/suites/fs/workload/mount/kclient/wsync/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/mount/kclient/wsync/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/mount/kclient/wsync/no.yaml b/qa/suites/fs/workload/mount/kclient/wsync/no.yaml new file mode 100644 index 000000000..1ed9e9953 --- /dev/null +++ b/qa/suites/fs/workload/mount/kclient/wsync/no.yaml @@ -0,0 +1,3 @@ +overrides: + kclient: + mntopts: ["nowsync"] diff --git a/qa/suites/fs/workload/mount/kclient/wsync/yes.yaml b/qa/suites/fs/workload/mount/kclient/wsync/yes.yaml new file mode 100644 index 000000000..2061bac11 --- /dev/null +++ b/qa/suites/fs/workload/mount/kclient/wsync/yes.yaml @@ -0,0 +1,3 @@ +overrides: + kclient: + mntopts: ["wsync"] diff --git a/qa/suites/fs/workload/objectstore-ec b/qa/suites/fs/workload/objectstore-ec new file mode 120000 index 000000000..affe29493 --- /dev/null +++ b/qa/suites/fs/workload/objectstore-ec @@ -0,0 +1 @@ +.qa/cephfs/objectstore-ec
\ No newline at end of file diff --git a/qa/suites/fs/workload/omap_limit/.qa b/qa/suites/fs/workload/omap_limit/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/omap_limit/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/omap_limit/10.yaml b/qa/suites/fs/workload/omap_limit/10.yaml new file mode 100644 index 000000000..eec30f681 --- /dev/null +++ b/qa/suites/fs/workload/omap_limit/10.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd_max_omap_entries_per_request: 10 diff --git a/qa/suites/fs/workload/omap_limit/10000.yaml b/qa/suites/fs/workload/omap_limit/10000.yaml new file mode 100644 index 000000000..c4bea5538 --- /dev/null +++ b/qa/suites/fs/workload/omap_limit/10000.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd_max_omap_entries_per_request: 10000 diff --git a/qa/suites/fs/workload/overrides/+ b/qa/suites/fs/workload/overrides/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/workload/overrides/+ diff --git a/qa/suites/fs/workload/overrides/.qa b/qa/suites/fs/workload/overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/overrides/cephsqlite-timeout.yaml b/qa/suites/fs/workload/overrides/cephsqlite-timeout.yaml new file mode 100644 index 000000000..5bd92554f --- /dev/null +++ b/qa/suites/fs/workload/overrides/cephsqlite-timeout.yaml @@ -0,0 +1,7 @@ +# increase lock renewal timeout: OSD stress from small clusters may cause +# spurious timeouts +overrides: + ceph: + conf: + mgr: + cephsqlite lock renewal timeout: 900000 diff --git a/qa/suites/fs/workload/overrides/frag.yaml b/qa/suites/fs/workload/overrides/frag.yaml new file mode 120000 index 000000000..5e5cdaed8 --- /dev/null +++ b/qa/suites/fs/workload/overrides/frag.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/frag.yaml
\ No newline at end of file diff --git a/qa/suites/fs/workload/overrides/ignorelist_health.yaml b/qa/suites/fs/workload/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..5cb891a95 --- /dev/null +++ b/qa/suites/fs/workload/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/workload/overrides/ignorelist_wrongly_marked_down.yaml b/qa/suites/fs/workload/overrides/ignorelist_wrongly_marked_down.yaml new file mode 120000 index 000000000..f317cb714 --- /dev/null +++ b/qa/suites/fs/workload/overrides/ignorelist_wrongly_marked_down.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
\ No newline at end of file diff --git a/qa/suites/fs/workload/overrides/osd-asserts.yaml b/qa/suites/fs/workload/overrides/osd-asserts.yaml new file mode 120000 index 000000000..f290c749b --- /dev/null +++ b/qa/suites/fs/workload/overrides/osd-asserts.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/osd-asserts.yaml
\ No newline at end of file diff --git a/qa/suites/fs/workload/overrides/session_timeout.yaml b/qa/suites/fs/workload/overrides/session_timeout.yaml new file mode 120000 index 000000000..fce0318c5 --- /dev/null +++ b/qa/suites/fs/workload/overrides/session_timeout.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/session_timeout.yaml
\ No newline at end of file diff --git a/qa/suites/fs/workload/ranks/.qa b/qa/suites/fs/workload/ranks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/ranks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/ranks/1.yaml b/qa/suites/fs/workload/ranks/1.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/workload/ranks/1.yaml diff --git a/qa/suites/fs/workload/ranks/multi/% b/qa/suites/fs/workload/ranks/multi/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/workload/ranks/multi/% diff --git a/qa/suites/fs/workload/ranks/multi/export-check.yaml b/qa/suites/fs/workload/ranks/multi/export-check.yaml new file mode 100644 index 000000000..80f210861 --- /dev/null +++ b/qa/suites/fs/workload/ranks/multi/export-check.yaml @@ -0,0 +1,6 @@ +overrides: + check-counter: + counters: + mds: + - mds.exported + - mds.imported diff --git a/qa/suites/fs/workload/ranks/multi/n/3.yaml b/qa/suites/fs/workload/ranks/multi/n/3.yaml new file mode 100644 index 000000000..9e6bddce0 --- /dev/null +++ b/qa/suites/fs/workload/ranks/multi/n/3.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + cephfs: + max_mds: 3 diff --git a/qa/suites/fs/workload/ranks/multi/n/5.yaml b/qa/suites/fs/workload/ranks/multi/n/5.yaml new file mode 100644 index 000000000..2265aa0f2 --- /dev/null +++ b/qa/suites/fs/workload/ranks/multi/n/5.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + cephfs: + max_mds: 5 diff --git a/qa/suites/fs/workload/ranks/multi/replication/always.yaml b/qa/suites/fs/workload/ranks/multi/replication/always.yaml new file mode 100644 index 000000000..099ced663 --- /dev/null +++ b/qa/suites/fs/workload/ranks/multi/replication/always.yaml @@ -0,0 +1,18 @@ +# To exercise lock/witness code paths more regularly, try to get all +# directories replicated. +overrides: + ceph: + conf: + mds: + mds_bal_replicate_threshold: 1 +# Note: dir_update is only sent by an MDS trying to replicate a dir. +# dir_update is always sent for root, so the count should be more than 2 + check-counter: + counters: + mds: + - + name: mds_cache.dir_update + min: 3 + - + name: mds_cache.dir_update_receipt + min: 3 diff --git a/qa/suites/fs/workload/ranks/multi/replication/default.yaml b/qa/suites/fs/workload/ranks/multi/replication/default.yaml new file mode 100644 index 000000000..272977942 --- /dev/null +++ b/qa/suites/fs/workload/ranks/multi/replication/default.yaml @@ -0,0 +1 @@ +# Use default (8000) diff --git a/qa/suites/fs/workload/standby-replay.yaml b/qa/suites/fs/workload/standby-replay.yaml new file mode 100644 index 000000000..b47d312bf --- /dev/null +++ b/qa/suites/fs/workload/standby-replay.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + cephfs: + standby_replay: true diff --git a/qa/suites/fs/workload/tasks/% b/qa/suites/fs/workload/tasks/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/workload/tasks/% diff --git a/qa/suites/fs/workload/tasks/.qa b/qa/suites/fs/workload/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/tasks/0-subvolume/$ b/qa/suites/fs/workload/tasks/0-subvolume/$ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/workload/tasks/0-subvolume/$ diff --git a/qa/suites/fs/workload/tasks/0-subvolume/.qa b/qa/suites/fs/workload/tasks/0-subvolume/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/tasks/0-subvolume/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/tasks/0-subvolume/no-subvolume.yaml b/qa/suites/fs/workload/tasks/0-subvolume/no-subvolume.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/workload/tasks/0-subvolume/no-subvolume.yaml diff --git a/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated-and-quota.yaml b/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated-and-quota.yaml new file mode 100644 index 000000000..7129e54b5 --- /dev/null +++ b/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated-and-quota.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + subvols: + create: 2 + subvol_options: "--namespace-isolated --size 25000000000" + ceph-fuse: + client.0: + mount_subvol_num: 0 + kclient: + client.0: + mount_subvol_num: 1 diff --git a/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated.yaml b/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated.yaml new file mode 100644 index 000000000..2ac901fef --- /dev/null +++ b/qa/suites/fs/workload/tasks/0-subvolume/with-namespace-isolated.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + subvols: + create: 2 + subvol_options: "--namespace-isolated" + ceph-fuse: + client.0: + mount_subvol_num: 0 + kclient: + client.0: + mount_subvol_num: 1 diff --git a/qa/suites/fs/workload/tasks/0-subvolume/with-no-extra-options.yaml b/qa/suites/fs/workload/tasks/0-subvolume/with-no-extra-options.yaml new file mode 100644 index 000000000..40f98c2bc --- /dev/null +++ b/qa/suites/fs/workload/tasks/0-subvolume/with-no-extra-options.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + subvols: + create: 2 + ceph-fuse: + client.0: + mount_subvol_num: 0 + kclient: + client.0: + mount_subvol_num: 1 diff --git a/qa/suites/fs/workload/tasks/0-subvolume/with-quota.yaml b/qa/suites/fs/workload/tasks/0-subvolume/with-quota.yaml new file mode 100644 index 000000000..6cda00d4a --- /dev/null +++ b/qa/suites/fs/workload/tasks/0-subvolume/with-quota.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + subvols: + create: 2 + subvol_options: "--size 25000000000" + ceph-fuse: + client.0: + mount_subvol_num: 0 + kclient: + client.0: + mount_subvol_num: 1 diff --git a/qa/suites/fs/workload/tasks/1-check-counter.yaml b/qa/suites/fs/workload/tasks/1-check-counter.yaml new file mode 100644 index 000000000..6339ddb7c --- /dev/null +++ b/qa/suites/fs/workload/tasks/1-check-counter.yaml @@ -0,0 +1,2 @@ +tasks: +- check-counter: {} diff --git a/qa/suites/fs/workload/tasks/2-scrub/.qa b/qa/suites/fs/workload/tasks/2-scrub/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/tasks/2-scrub/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/tasks/2-scrub/no.yaml b/qa/suites/fs/workload/tasks/2-scrub/no.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/workload/tasks/2-scrub/no.yaml diff --git a/qa/suites/fs/workload/tasks/2-scrub/yes.yaml b/qa/suites/fs/workload/tasks/2-scrub/yes.yaml new file mode 100644 index 000000000..52978a2bb --- /dev/null +++ b/qa/suites/fs/workload/tasks/2-scrub/yes.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-ignorelist: + - slow metadata IO + - SLOW_OPS + - slow request +tasks: +- fwd_scrub: + scrub_timeout: 900 + sleep_between_iterations: 1 diff --git a/qa/suites/fs/workload/tasks/3-snaps/.qa b/qa/suites/fs/workload/tasks/3-snaps/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/tasks/3-snaps/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/tasks/3-snaps/no.yaml b/qa/suites/fs/workload/tasks/3-snaps/no.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/workload/tasks/3-snaps/no.yaml diff --git a/qa/suites/fs/workload/tasks/3-snaps/yes.yaml b/qa/suites/fs/workload/tasks/3-snaps/yes.yaml new file mode 100644 index 000000000..598f7e215 --- /dev/null +++ b/qa/suites/fs/workload/tasks/3-snaps/yes.yaml @@ -0,0 +1,30 @@ +overrides: + ceph: + conf: + mgr: + debug cephsqlite: 20 + check-counter: + counters: + mds: + - mds.root_rsnaps + - mds_server.req_mksnap_latency.avgcount + - mds_server.req_rmsnap_latency.avgcount +tasks: +- exec: + mon.a: + - ceph mgr module enable snap_schedule + - ceph config set mgr mgr/snap_schedule/allow_m_granularity true + - ceph config set mgr mgr/snap_schedule/dump_on_update true + - ceph fs snap-schedule add --fs=cephfs --path=/ --snap_schedule=1M + - ceph fs snap-schedule retention add --fs=cephfs --path=/ --retention-spec-or-period=6M3h + - ceph fs snap-schedule status --fs=cephfs --path=/ + - ceph fs snap-schedule list --fs=cephfs --path=/ --recursive=true + - date +%s > START_TIME +- full_sequential_finally: + - exec: + mon.a: + # Ensure that we have some snaps which get deleted (so check-counters does not fail) + - date +%s > END_TIME + - START_TIME=$(cat START_TIME); END_TIME=$(cat END_TIME); DIFF_TIME=$((600-(END_TIME-START_TIME))); if [ "$DIFF_TIME" -gt 0 ]; then sleep "$DIFF_TIME"; fi + - ceph fs snap-schedule status --fs=cephfs --path=/ + - ceph fs snap-schedule list --fs=cephfs --path=/ --recursive=true diff --git a/qa/suites/fs/workload/tasks/4-flush/.qa b/qa/suites/fs/workload/tasks/4-flush/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/tasks/4-flush/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/tasks/4-flush/no.yaml b/qa/suites/fs/workload/tasks/4-flush/no.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/fs/workload/tasks/4-flush/no.yaml diff --git a/qa/suites/fs/workload/tasks/4-flush/yes.yaml b/qa/suites/fs/workload/tasks/4-flush/yes.yaml new file mode 100644 index 000000000..4a3f7a11c --- /dev/null +++ b/qa/suites/fs/workload/tasks/4-flush/yes.yaml @@ -0,0 +1,4 @@ +tasks: +- background_exec: + mon.a: + - while sleep 13; do ceph tell mds.cephfs:0 flush journal; done diff --git a/qa/suites/fs/workload/tasks/5-workunit/.qa b/qa/suites/fs/workload/tasks/5-workunit/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/tasks/5-workunit/direct_io.yaml b/qa/suites/fs/workload/tasks/5-workunit/direct_io.yaml new file mode 100644 index 000000000..6c6ea1422 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/direct_io.yaml @@ -0,0 +1,9 @@ +overrides: + check-counter: + dry_run: true +tasks: +- workunit: + clients: + all: + - direct_io + diff --git a/qa/suites/fs/workload/tasks/5-workunit/fs/.qa b/qa/suites/fs/workload/tasks/5-workunit/fs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/fs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/tasks/5-workunit/fs/misc.yaml b/qa/suites/fs/workload/tasks/5-workunit/fs/misc.yaml new file mode 100644 index 000000000..4a9d0b4c5 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/fs/misc.yaml @@ -0,0 +1,10 @@ +overrides: + check-counter: + counters: + mds: + - "mds.dir_split" +tasks: +- workunit: + clients: + all: + - fs/misc diff --git a/qa/suites/fs/workload/tasks/5-workunit/fs/norstats.yaml b/qa/suites/fs/workload/tasks/5-workunit/fs/norstats.yaml new file mode 100644 index 000000000..d48df5471 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/fs/norstats.yaml @@ -0,0 +1,17 @@ +overrides: + check-counter: + counters: + mds: + - "mds.dir_split" +tasks: +- workunit: + clients: + all: + - fs/norstats +overrides: + kclient: + rbytes: false + ceph: + conf: + client: + client dirsize rbytes: false diff --git a/qa/suites/fs/workload/tasks/5-workunit/fs/test_o_trunc.yaml b/qa/suites/fs/workload/tasks/5-workunit/fs/test_o_trunc.yaml new file mode 100644 index 000000000..7b2b7c536 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/fs/test_o_trunc.yaml @@ -0,0 +1,8 @@ +overrides: + check-counter: + dry_run: true +tasks: +- workunit: + clients: + all: + - fs/test_o_trunc.sh diff --git a/qa/suites/fs/workload/tasks/5-workunit/kernel_untar_build.yaml b/qa/suites/fs/workload/tasks/5-workunit/kernel_untar_build.yaml new file mode 100644 index 000000000..602d34162 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/kernel_untar_build.yaml @@ -0,0 +1,10 @@ +overrides: + check-counter: + counters: + mds: + - "mds.dir_split" +tasks: +- workunit: + clients: + all: + - kernel_untar_build.sh diff --git a/qa/suites/fs/workload/tasks/5-workunit/postgres.yaml b/qa/suites/fs/workload/tasks/5-workunit/postgres.yaml new file mode 100644 index 000000000..7e71dbc88 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/postgres.yaml @@ -0,0 +1,36 @@ +# I would expect setting the context on the postgresql database directories +# would correctly trickle down to the files created by the postgresql daemon, +# but this does not appear to work. I would still see denials like: + +# type=AVC msg=audit(1655861665.521:21354): avc: denied { create } for pid=131994 comm="postmaster" name="replorigin_checkpoint.tmp" scontext=system_u:system_r:postgresql_t:s0 tcontext=system_u:object_r:cephfs_t:s0 tclass=file permissive=1' + +# Instead, we'll just set the context for the mount and be done with it. I've +# left in the context setting for the directories below. + +overrides: + ceph-fuse: + client.0: + mountpoint: /tmp/cephfs + mntopts: ["context=system_u:object_r:postgresql_db_t:s0"] + kclient: + client.0: + mountpoint: /tmp/cephfs + mntopts: ["context=system_u:object_r:postgresql_db_t:s0"] +tasks: +- exec: + client.0: + - sudo ls -l /tmp/cephfs/ && sudo df -h /tmp/cephfs/ + - sudo mkdir -m 755 --context=system_u:system_r:postgresql_t:s0 /tmp/cephfs/postgres && sudo chown postgres:postgres /tmp/cephfs/postgres + - sudo -u postgres -- mkdir -m 700 --context=system_u:system_r:postgresql_t:s0 /tmp/cephfs/postgres/data + - sudo semanage fcontext -a -t postgresql_db_t "/tmp/cephfs/postgres(/.*)?" + - sudo grep -i postgresql /etc/selinux/targeted/contexts/files/file_contexts.local + - sudo restorecon -R -v /tmp/cephfs/postgres + - sudo ls -lZaR /tmp/cephfs/postgres/ + - sudo mkdir -p /etc/systemd/system/postgresql.service.d/ && printf '[Service]\nEnvironment=PGDATA=/tmp/cephfs/postgres/data\nEnvironment=PGLOG=/tmp/cephfs/postgres/pgstartup.log\n' | sudo tee /etc/systemd/system/postgresql.service.d/env.conf + - sudo -u postgres -- postgresql-setup --initdb + - sudo ls -lZaR /tmp/cephfs/postgres/ + - sudo systemctl start postgresql + - sudo -u postgres -- pgbench -s 32 -i + - sudo -u postgres -- pgbench -c 100 -j 4 --progress=5 --time=900 + - sudo systemctl stop postgresql + - sudo ls -lZaR /tmp/cephfs/postgres/ diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/.qa b/qa/suites/fs/workload/tasks/5-workunit/suites/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/suites/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/blogbench.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/blogbench.yaml new file mode 100644 index 000000000..4c4bf2f9e --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/suites/blogbench.yaml @@ -0,0 +1,10 @@ +overrides: + check-counter: + counters: + mds: + - "mds.dir_split" +tasks: +- workunit: + clients: + all: + - suites/blogbench.sh diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/dbench.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/dbench.yaml new file mode 100644 index 000000000..41b2bc8ed --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/suites/dbench.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/ffsb.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/ffsb.yaml new file mode 100644 index 000000000..9bc925ab0 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/suites/ffsb.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + log-ignorelist: + - SLOW_OPS + - slow request + conf: + osd: + filestore flush min: 0 + osd heartbeat grace: 60 + check-counter: + counters: + mds: + - "mds.dir_split" +tasks: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/fsstress.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/fsstress.yaml new file mode 100644 index 000000000..bae220292 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/suites/fsstress.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + timeout: 6h + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/fsx.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/fsx.yaml new file mode 100644 index 000000000..12d456cf4 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/suites/fsx.yaml @@ -0,0 +1,10 @@ +overrides: + check-counter: + counters: + mds: + - "mds.dir_split" +tasks: +- workunit: + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/fsync-tester.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/fsync-tester.yaml new file mode 100644 index 000000000..9aaf6d0c4 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/suites/fsync-tester.yaml @@ -0,0 +1,8 @@ +overrides: + check-counter: + dry_run: true +tasks: +- workunit: + clients: + all: + - suites/fsync-tester.sh diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/iogen.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/iogen.yaml new file mode 100644 index 000000000..b8beb9e97 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/suites/iogen.yaml @@ -0,0 +1,8 @@ +overrides: + check-counter: + dry_run: true +tasks: +- workunit: + clients: + all: + - suites/iogen.sh diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/iozone.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/iozone.yaml new file mode 100644 index 000000000..bf5fd25b3 --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/suites/iozone.yaml @@ -0,0 +1,8 @@ +overrides: + check-counter: + dry_run: true +tasks: +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/fs/workload/tasks/5-workunit/suites/pjd.yaml b/qa/suites/fs/workload/tasks/5-workunit/suites/pjd.yaml new file mode 100644 index 000000000..37e315f7e --- /dev/null +++ b/qa/suites/fs/workload/tasks/5-workunit/suites/pjd.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + conf: + client: + fuse set user groups: true + fuse default permissions: false +tasks: +- workunit: + timeout: 6h + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/hadoop/.qa b/qa/suites/hadoop/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/hadoop/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/hadoop/basic/% b/qa/suites/hadoop/basic/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/hadoop/basic/% diff --git a/qa/suites/hadoop/basic/.qa b/qa/suites/hadoop/basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/hadoop/basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/hadoop/basic/clusters/.qa b/qa/suites/hadoop/basic/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/hadoop/basic/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/hadoop/basic/clusters/fixed-3.yaml b/qa/suites/hadoop/basic/clusters/fixed-3.yaml new file mode 100644 index 000000000..56b0be4cf --- /dev/null +++ b/qa/suites/hadoop/basic/clusters/fixed-3.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + client: + client permissions: false +roles: +- [mon.0, mds.a, osd.0, hadoop.master.0] +- [mon.1, mgr.x, osd.1, hadoop.slave.0] +- [mon.2, mgr.y, hadoop.slave.1, client.0] +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB diff --git a/qa/suites/hadoop/basic/distros/.qa b/qa/suites/hadoop/basic/distros/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/hadoop/basic/distros/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/hadoop/basic/distros/ubuntu_latest.yaml b/qa/suites/hadoop/basic/distros/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/hadoop/basic/distros/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/hadoop/basic/tasks/.qa b/qa/suites/hadoop/basic/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/hadoop/basic/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/hadoop/basic/tasks/repl.yaml b/qa/suites/hadoop/basic/tasks/repl.yaml new file mode 100644 index 000000000..60cdcca32 --- /dev/null +++ b/qa/suites/hadoop/basic/tasks/repl.yaml @@ -0,0 +1,8 @@ +tasks: +- ssh_keys: +- install: +- ceph: +- hadoop: +- workunit: + clients: + client.0: [hadoop/repl.sh] diff --git a/qa/suites/hadoop/basic/tasks/terasort.yaml b/qa/suites/hadoop/basic/tasks/terasort.yaml new file mode 100644 index 000000000..4377894f5 --- /dev/null +++ b/qa/suites/hadoop/basic/tasks/terasort.yaml @@ -0,0 +1,10 @@ +tasks: +- ssh_keys: +- install: +- ceph: +- hadoop: +- workunit: + clients: + client.0: [hadoop/terasort.sh] + env: + NUM_RECORDS: "10000000" diff --git a/qa/suites/hadoop/basic/tasks/wordcount.yaml b/qa/suites/hadoop/basic/tasks/wordcount.yaml new file mode 100644 index 000000000..b84941b81 --- /dev/null +++ b/qa/suites/hadoop/basic/tasks/wordcount.yaml @@ -0,0 +1,8 @@ +tasks: +- ssh_keys: +- install: +- ceph: +- hadoop: +- workunit: + clients: + client.0: [hadoop/wordcount.sh] diff --git a/qa/suites/krbd/.qa b/qa/suites/krbd/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/basic/% b/qa/suites/krbd/basic/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/basic/% diff --git a/qa/suites/krbd/basic/.qa b/qa/suites/krbd/basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/basic/bluestore-bitmap.yaml b/qa/suites/krbd/basic/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/krbd/basic/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/basic/ceph/.qa b/qa/suites/krbd/basic/ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/basic/ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/basic/ceph/ceph.yaml b/qa/suites/krbd/basic/ceph/ceph.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/krbd/basic/ceph/ceph.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/krbd/basic/clusters/.qa b/qa/suites/krbd/basic/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/basic/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/basic/clusters/fixed-1.yaml b/qa/suites/krbd/basic/clusters/fixed-1.yaml new file mode 120000 index 000000000..02df5dd0c --- /dev/null +++ b/qa/suites/krbd/basic/clusters/fixed-1.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-1.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/basic/conf.yaml b/qa/suites/krbd/basic/conf.yaml new file mode 100644 index 000000000..41292fa81 --- /dev/null +++ b/qa/suites/krbd/basic/conf.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false + ms die on skipped message: false + client: + rbd default features: 37 diff --git a/qa/suites/krbd/basic/ms_mode/.qa b/qa/suites/krbd/basic/ms_mode/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/basic/ms_mode/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/basic/ms_mode/crc$/.qa b/qa/suites/krbd/basic/ms_mode/crc$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/basic/ms_mode/crc$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/basic/ms_mode/crc$/crc-rxbounce.yaml b/qa/suites/krbd/basic/ms_mode/crc$/crc-rxbounce.yaml new file mode 100644 index 000000000..4d27d0113 --- /dev/null +++ b/qa/suites/krbd/basic/ms_mode/crc$/crc-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc,rxbounce diff --git a/qa/suites/krbd/basic/ms_mode/crc$/crc.yaml b/qa/suites/krbd/basic/ms_mode/crc$/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/qa/suites/krbd/basic/ms_mode/crc$/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/qa/suites/krbd/basic/ms_mode/legacy$/.qa b/qa/suites/krbd/basic/ms_mode/legacy$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/basic/ms_mode/legacy$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/basic/ms_mode/legacy$/legacy-rxbounce.yaml b/qa/suites/krbd/basic/ms_mode/legacy$/legacy-rxbounce.yaml new file mode 100644 index 000000000..244e45cbc --- /dev/null +++ b/qa/suites/krbd/basic/ms_mode/legacy$/legacy-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy,rxbounce diff --git a/qa/suites/krbd/basic/ms_mode/legacy$/legacy.yaml b/qa/suites/krbd/basic/ms_mode/legacy$/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/qa/suites/krbd/basic/ms_mode/legacy$/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/qa/suites/krbd/basic/ms_mode/secure.yaml b/qa/suites/krbd/basic/ms_mode/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/qa/suites/krbd/basic/ms_mode/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/qa/suites/krbd/basic/tasks/.qa b/qa/suites/krbd/basic/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/basic/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/basic/tasks/krbd_deep_flatten.yaml b/qa/suites/krbd/basic/tasks/krbd_deep_flatten.yaml new file mode 100644 index 000000000..a821b73a3 --- /dev/null +++ b/qa/suites/krbd/basic/tasks/krbd_deep_flatten.yaml @@ -0,0 +1,5 @@ +tasks: +- cram: + clients: + client.0: + - qa/rbd/krbd_deep_flatten.t diff --git a/qa/suites/krbd/basic/tasks/krbd_discard.yaml b/qa/suites/krbd/basic/tasks/krbd_discard.yaml new file mode 100644 index 000000000..59ec5b943 --- /dev/null +++ b/qa/suites/krbd/basic/tasks/krbd_discard.yaml @@ -0,0 +1,9 @@ +tasks: +- cram: + clients: + client.0: + - qa/rbd/krbd_discard.t + - qa/rbd/krbd_discard_512b.t + - qa/rbd/krbd_discard_4M.t + - qa/rbd/krbd_zeroout.t + - qa/rbd/krbd_discard_granularity.t diff --git a/qa/suites/krbd/basic/tasks/krbd_huge_image.yaml b/qa/suites/krbd/basic/tasks/krbd_huge_image.yaml new file mode 100644 index 000000000..15ff033c7 --- /dev/null +++ b/qa/suites/krbd/basic/tasks/krbd_huge_image.yaml @@ -0,0 +1,5 @@ +tasks: +- cram: + clients: + client.0: + - qa/rbd/krbd_huge_image.t diff --git a/qa/suites/krbd/basic/tasks/krbd_modprobe.yaml b/qa/suites/krbd/basic/tasks/krbd_modprobe.yaml new file mode 100644 index 000000000..22f02cd40 --- /dev/null +++ b/qa/suites/krbd/basic/tasks/krbd_modprobe.yaml @@ -0,0 +1,5 @@ +tasks: +- cram: + clients: + client.0: + - qa/rbd/krbd_modprobe.t diff --git a/qa/suites/krbd/basic/tasks/krbd_msgr_segments.yaml b/qa/suites/krbd/basic/tasks/krbd_msgr_segments.yaml new file mode 100644 index 000000000..cfa524e70 --- /dev/null +++ b/qa/suites/krbd/basic/tasks/krbd_msgr_segments.yaml @@ -0,0 +1,5 @@ +tasks: +- cram: + clients: + client.0: + - qa/rbd/krbd_msgr_segments.t diff --git a/qa/suites/krbd/basic/tasks/krbd_parent_overlap.yaml b/qa/suites/krbd/basic/tasks/krbd_parent_overlap.yaml new file mode 100644 index 000000000..9bcf1fa35 --- /dev/null +++ b/qa/suites/krbd/basic/tasks/krbd_parent_overlap.yaml @@ -0,0 +1,5 @@ +tasks: +- cram: + clients: + client.0: + - qa/rbd/krbd_parent_overlap.t diff --git a/qa/suites/krbd/basic/tasks/krbd_read_only.yaml b/qa/suites/krbd/basic/tasks/krbd_read_only.yaml new file mode 100644 index 000000000..8194b89ce --- /dev/null +++ b/qa/suites/krbd/basic/tasks/krbd_read_only.yaml @@ -0,0 +1,6 @@ +tasks: +- cram: + clients: + client.0: + - qa/rbd/krbd_blkroset.t + - qa/rbd/krbd_get_features.t diff --git a/qa/suites/krbd/basic/tasks/krbd_whole_object_zeroout.yaml b/qa/suites/krbd/basic/tasks/krbd_whole_object_zeroout.yaml new file mode 100644 index 000000000..3b0ff8d1e --- /dev/null +++ b/qa/suites/krbd/basic/tasks/krbd_whole_object_zeroout.yaml @@ -0,0 +1,5 @@ +tasks: +- cram: + clients: + client.0: + - qa/rbd/krbd_whole_object_zeroout.t diff --git a/qa/suites/krbd/fsx/% b/qa/suites/krbd/fsx/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/fsx/% diff --git a/qa/suites/krbd/fsx/.qa b/qa/suites/krbd/fsx/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/ceph/.qa b/qa/suites/krbd/fsx/ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/ceph/ceph.yaml b/qa/suites/krbd/fsx/ceph/ceph.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/krbd/fsx/ceph/ceph.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/krbd/fsx/clusters/.qa b/qa/suites/krbd/fsx/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/clusters/3-node.yaml b/qa/suites/krbd/fsx/clusters/3-node.yaml new file mode 100644 index 000000000..0433ec9be --- /dev/null +++ b/qa/suites/krbd/fsx/clusters/3-node.yaml @@ -0,0 +1,14 @@ +# fixed-3.yaml, but with two additional clients on the same target +roles: +- [mon.a, mon.c, mgr.x, osd.0, osd.1, osd.2, osd.3] +- [mon.b, mgr.y, osd.4, osd.5, osd.6, osd.7] +- [client.0, client.1, client.2] +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/krbd/fsx/conf.yaml b/qa/suites/krbd/fsx/conf.yaml new file mode 100644 index 000000000..eb6d72a80 --- /dev/null +++ b/qa/suites/krbd/fsx/conf.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false + ms die on skipped message: false diff --git a/qa/suites/krbd/fsx/features/.qa b/qa/suites/krbd/fsx/features/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/features/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/features/no-object-map.yaml b/qa/suites/krbd/fsx/features/no-object-map.yaml new file mode 100644 index 000000000..809c77093 --- /dev/null +++ b/qa/suites/krbd/fsx/features/no-object-map.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + client: + # layering, deep-flatten + rbd default features: 33 diff --git a/qa/suites/krbd/fsx/features/object-map.yaml b/qa/suites/krbd/fsx/features/object-map.yaml new file mode 100644 index 000000000..35e7e9d0b --- /dev/null +++ b/qa/suites/krbd/fsx/features/object-map.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + client: + # layering, exclusive-lock, object-map, fast-diff, deep-flatten + rbd default features: 61 diff --git a/qa/suites/krbd/fsx/ms_mode$/.qa b/qa/suites/krbd/fsx/ms_mode$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/ms_mode$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/ms_mode$/crc-rxbounce.yaml b/qa/suites/krbd/fsx/ms_mode$/crc-rxbounce.yaml new file mode 100644 index 000000000..fb9c3dec2 --- /dev/null +++ b/qa/suites/krbd/fsx/ms_mode$/crc-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc,rxbounce,read_from_replica=balance diff --git a/qa/suites/krbd/fsx/ms_mode$/crc.yaml b/qa/suites/krbd/fsx/ms_mode$/crc.yaml new file mode 100644 index 000000000..d11be3887 --- /dev/null +++ b/qa/suites/krbd/fsx/ms_mode$/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc,read_from_replica=balance diff --git a/qa/suites/krbd/fsx/ms_mode$/legacy-rxbounce.yaml b/qa/suites/krbd/fsx/ms_mode$/legacy-rxbounce.yaml new file mode 100644 index 000000000..3306c1e57 --- /dev/null +++ b/qa/suites/krbd/fsx/ms_mode$/legacy-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy,rxbounce,read_from_replica=balance diff --git a/qa/suites/krbd/fsx/ms_mode$/legacy.yaml b/qa/suites/krbd/fsx/ms_mode$/legacy.yaml new file mode 100644 index 000000000..2b7116c03 --- /dev/null +++ b/qa/suites/krbd/fsx/ms_mode$/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy,read_from_replica=balance diff --git a/qa/suites/krbd/fsx/ms_mode$/secure.yaml b/qa/suites/krbd/fsx/ms_mode$/secure.yaml new file mode 100644 index 000000000..671b73f9c --- /dev/null +++ b/qa/suites/krbd/fsx/ms_mode$/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure,read_from_replica=balance diff --git a/qa/suites/krbd/fsx/objectstore/.qa b/qa/suites/krbd/fsx/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/objectstore/bluestore-bitmap.yaml b/qa/suites/krbd/fsx/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/krbd/fsx/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/striping/.qa b/qa/suites/krbd/fsx/striping/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/striping/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/striping/default/% b/qa/suites/krbd/fsx/striping/default/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/fsx/striping/default/% diff --git a/qa/suites/krbd/fsx/striping/default/.qa b/qa/suites/krbd/fsx/striping/default/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/striping/default/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/striping/default/msgr-failures/.qa b/qa/suites/krbd/fsx/striping/default/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/striping/default/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/striping/default/msgr-failures/few.yaml b/qa/suites/krbd/fsx/striping/default/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/krbd/fsx/striping/default/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/krbd/fsx/striping/default/msgr-failures/many.yaml b/qa/suites/krbd/fsx/striping/default/msgr-failures/many.yaml new file mode 100644 index 000000000..e3855297d --- /dev/null +++ b/qa/suites/krbd/fsx/striping/default/msgr-failures/many.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/krbd/fsx/striping/default/randomized-striping-off.yaml b/qa/suites/krbd/fsx/striping/default/randomized-striping-off.yaml new file mode 100644 index 000000000..0bf96a8d0 --- /dev/null +++ b/qa/suites/krbd/fsx/striping/default/randomized-striping-off.yaml @@ -0,0 +1,3 @@ +overrides: + rbd_fsx: + randomized_striping: false diff --git a/qa/suites/krbd/fsx/striping/fancy/% b/qa/suites/krbd/fsx/striping/fancy/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/fsx/striping/fancy/% diff --git a/qa/suites/krbd/fsx/striping/fancy/.qa b/qa/suites/krbd/fsx/striping/fancy/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/striping/fancy/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/striping/fancy/msgr-failures/.qa b/qa/suites/krbd/fsx/striping/fancy/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/striping/fancy/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/striping/fancy/msgr-failures/few.yaml b/qa/suites/krbd/fsx/striping/fancy/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/krbd/fsx/striping/fancy/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/krbd/fsx/striping/fancy/randomized-striping-on.yaml b/qa/suites/krbd/fsx/striping/fancy/randomized-striping-on.yaml new file mode 100644 index 000000000..c2823e4ed --- /dev/null +++ b/qa/suites/krbd/fsx/striping/fancy/randomized-striping-on.yaml @@ -0,0 +1,3 @@ +overrides: + rbd_fsx: + randomized_striping: true diff --git a/qa/suites/krbd/fsx/tasks/.qa b/qa/suites/krbd/fsx/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/fsx/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/fsx/tasks/fsx-1-client.yaml b/qa/suites/krbd/fsx/tasks/fsx-1-client.yaml new file mode 100644 index 000000000..b0af9829f --- /dev/null +++ b/qa/suites/krbd/fsx/tasks/fsx-1-client.yaml @@ -0,0 +1,10 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 20000 + krbd: true + readbdy: 512 + writebdy: 512 + truncbdy: 512 + holebdy: 512 + punch_holes: true diff --git a/qa/suites/krbd/fsx/tasks/fsx-3-client.yaml b/qa/suites/krbd/fsx/tasks/fsx-3-client.yaml new file mode 100644 index 000000000..5b8e37012 --- /dev/null +++ b/qa/suites/krbd/fsx/tasks/fsx-3-client.yaml @@ -0,0 +1,10 @@ +tasks: +- rbd_fsx: + clients: [client.0, client.1, client.2] + ops: 10000 + krbd: true + readbdy: 512 + writebdy: 512 + truncbdy: 512 + holebdy: 512 + punch_holes: true diff --git a/qa/suites/krbd/ms_modeless/% b/qa/suites/krbd/ms_modeless/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/ms_modeless/% diff --git a/qa/suites/krbd/ms_modeless/.qa b/qa/suites/krbd/ms_modeless/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/ms_modeless/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/ms_modeless/bluestore-bitmap.yaml b/qa/suites/krbd/ms_modeless/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/krbd/ms_modeless/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/ms_modeless/ceph/.qa b/qa/suites/krbd/ms_modeless/ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/ms_modeless/ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/ms_modeless/ceph/ceph.yaml b/qa/suites/krbd/ms_modeless/ceph/ceph.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/krbd/ms_modeless/ceph/ceph.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/krbd/ms_modeless/clusters/.qa b/qa/suites/krbd/ms_modeless/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/ms_modeless/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/ms_modeless/clusters/fixed-3.yaml b/qa/suites/krbd/ms_modeless/clusters/fixed-3.yaml new file mode 120000 index 000000000..f75a848b8 --- /dev/null +++ b/qa/suites/krbd/ms_modeless/clusters/fixed-3.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/ms_modeless/conf.yaml b/qa/suites/krbd/ms_modeless/conf.yaml new file mode 100644 index 000000000..eb6d72a80 --- /dev/null +++ b/qa/suites/krbd/ms_modeless/conf.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false + ms die on skipped message: false diff --git a/qa/suites/krbd/ms_modeless/tasks/.qa b/qa/suites/krbd/ms_modeless/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/ms_modeless/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/ms_modeless/tasks/krbd_default_map_options.yaml b/qa/suites/krbd/ms_modeless/tasks/krbd_default_map_options.yaml new file mode 100644 index 000000000..c8c12f173 --- /dev/null +++ b/qa/suites/krbd/ms_modeless/tasks/krbd_default_map_options.yaml @@ -0,0 +1,5 @@ +tasks: +- cram: + clients: + client.0: + - qa/rbd/krbd_default_map_options.t diff --git a/qa/suites/krbd/ms_modeless/tasks/krbd_rxbounce.yaml b/qa/suites/krbd/ms_modeless/tasks/krbd_rxbounce.yaml new file mode 100644 index 000000000..4ecd0e83e --- /dev/null +++ b/qa/suites/krbd/ms_modeless/tasks/krbd_rxbounce.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_rxbounce.sh diff --git a/qa/suites/krbd/rbd-nomount/% b/qa/suites/krbd/rbd-nomount/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/% diff --git a/qa/suites/krbd/rbd-nomount/.qa b/qa/suites/krbd/rbd-nomount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/bluestore-bitmap.yaml b/qa/suites/krbd/rbd-nomount/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/clusters/.qa b/qa/suites/krbd/rbd-nomount/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/clusters/fixed-3.yaml b/qa/suites/krbd/rbd-nomount/clusters/fixed-3.yaml new file mode 120000 index 000000000..f75a848b8 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/clusters/fixed-3.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/conf.yaml b/qa/suites/krbd/rbd-nomount/conf.yaml new file mode 100644 index 000000000..41292fa81 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/conf.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false + ms die on skipped message: false + client: + rbd default features: 37 diff --git a/qa/suites/krbd/rbd-nomount/install/.qa b/qa/suites/krbd/rbd-nomount/install/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/install/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/install/ceph.yaml b/qa/suites/krbd/rbd-nomount/install/ceph.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/install/ceph.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/.qa b/qa/suites/krbd/rbd-nomount/ms_mode/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/ms_mode/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/crc$/.qa b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc-rxbounce.yaml b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc-rxbounce.yaml new file mode 100644 index 000000000..4d27d0113 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc,rxbounce diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc.yaml b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/ms_mode/crc$/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/.qa b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy-rxbounce.yaml b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy-rxbounce.yaml new file mode 100644 index 000000000..244e45cbc --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy,rxbounce diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy.yaml b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/ms_mode/legacy$/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml b/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/ms_mode/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/qa/suites/krbd/rbd-nomount/msgr-failures/.qa b/qa/suites/krbd/rbd-nomount/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml b/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml b/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml new file mode 100644 index 000000000..e3855297d --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/msgr-failures/many.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/krbd/rbd-nomount/tasks/.qa b/qa/suites/krbd/rbd-nomount/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_data_pool.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_data_pool.yaml new file mode 100644 index 000000000..35b9d67eb --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_data_pool.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_data_pool.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_exclusive_option.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_exclusive_option.yaml new file mode 100644 index 000000000..567deebfd --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_exclusive_option.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_exclusive_option.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_fallocate.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_fallocate.yaml new file mode 100644 index 000000000..a72869824 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_fallocate.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_fallocate.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_huge_osdmap.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_huge_osdmap.yaml new file mode 100644 index 000000000..3148b32a1 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_huge_osdmap.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + mon: + mon max osd: 60000 +tasks: +- workunit: + clients: + all: + - rbd/krbd_huge_osdmap.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_latest_osdmap_on_map.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_latest_osdmap_on_map.yaml new file mode 100644 index 000000000..522be6a42 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_latest_osdmap_on_map.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_latest_osdmap_on_map.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_namespaces.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_namespaces.yaml new file mode 100644 index 000000000..4d6519a25 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_namespaces.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_namespaces.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_enumerate.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_enumerate.yaml new file mode 100644 index 000000000..c326507ac --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_enumerate.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_udev_enumerate.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netlink_enobufs.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netlink_enobufs.yaml new file mode 100644 index 000000000..ed1b2ae63 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netlink_enobufs.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-ignorelist: + - pauserd,pausewr flag\(s\) set + +tasks: +- workunit: + clients: + all: + - rbd/krbd_udev_netlink_enobufs.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netns.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netns.yaml new file mode 100644 index 000000000..21e06e388 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_netns.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_udev_netns.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_symlinks.yaml b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_symlinks.yaml new file mode 100644 index 000000000..ee79932f5 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/krbd_udev_symlinks.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_udev_symlinks.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml new file mode 100644 index 000000000..675b98e73 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_concurrent.yaml @@ -0,0 +1,10 @@ +tasks: +- workunit: + clients: + all: + - rbd/concurrent.sh +# Options for rbd/concurrent.sh (default values shown) +# env: +# RBD_CONCURRENT_ITER: 100 +# RBD_CONCURRENT_COUNT: 5 +# RBD_CONCURRENT_DELAY: 5 diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml new file mode 100644 index 000000000..ea421eec1 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_huge_tickets.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/huge-tickets.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml new file mode 100644 index 000000000..e5017e118 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_image_read.yaml @@ -0,0 +1,15 @@ +tasks: +- workunit: + clients: + all: + - rbd/image_read.sh +# Options for rbd/image_read.sh (default values shown) +# env: +# IMAGE_READ_LOCAL_FILES: 'false' +# IMAGE_READ_FORMAT: '2' +# IMAGE_READ_VERBOSE: 'true' +# IMAGE_READ_PAGE_SIZE: '4096' +# IMAGE_READ_OBJECT_ORDER: '22' +# IMAGE_READ_TEST_CLONES: 'true' +# IMAGE_READ_DOUBLE_ORDER: 'true' +# IMAGE_READ_HALF_ORDER: 'false' diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml new file mode 100644 index 000000000..aa155827c --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_kernel.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/kernel.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml new file mode 100644 index 000000000..c1529398b --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_snapshot_io.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/map-snapshot-io.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml new file mode 100644 index 000000000..c2160997c --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_map_unmap.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/map-unmap.sh diff --git a/qa/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml b/qa/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml new file mode 100644 index 000000000..c493cfaf4 --- /dev/null +++ b/qa/suites/krbd/rbd-nomount/tasks/rbd_simple_big.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + clients: + all: + - rbd/simple_big.sh + diff --git a/qa/suites/krbd/rbd/% b/qa/suites/krbd/rbd/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/rbd/% diff --git a/qa/suites/krbd/rbd/.qa b/qa/suites/krbd/rbd/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd/bluestore-bitmap.yaml b/qa/suites/krbd/rbd/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/krbd/rbd/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/rbd/clusters/.qa b/qa/suites/krbd/rbd/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd/clusters/fixed-3.yaml b/qa/suites/krbd/rbd/clusters/fixed-3.yaml new file mode 120000 index 000000000..f75a848b8 --- /dev/null +++ b/qa/suites/krbd/rbd/clusters/fixed-3.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/rbd/conf.yaml b/qa/suites/krbd/rbd/conf.yaml new file mode 100644 index 000000000..41292fa81 --- /dev/null +++ b/qa/suites/krbd/rbd/conf.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false + ms die on skipped message: false + client: + rbd default features: 37 diff --git a/qa/suites/krbd/rbd/ms_mode/.qa b/qa/suites/krbd/rbd/ms_mode/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd/ms_mode/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd/ms_mode/crc$/.qa b/qa/suites/krbd/rbd/ms_mode/crc$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd/ms_mode/crc$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd/ms_mode/crc$/crc-rxbounce.yaml b/qa/suites/krbd/rbd/ms_mode/crc$/crc-rxbounce.yaml new file mode 100644 index 000000000..4d27d0113 --- /dev/null +++ b/qa/suites/krbd/rbd/ms_mode/crc$/crc-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc,rxbounce diff --git a/qa/suites/krbd/rbd/ms_mode/crc$/crc.yaml b/qa/suites/krbd/rbd/ms_mode/crc$/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/qa/suites/krbd/rbd/ms_mode/crc$/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/qa/suites/krbd/rbd/ms_mode/legacy$/.qa b/qa/suites/krbd/rbd/ms_mode/legacy$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd/ms_mode/legacy$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd/ms_mode/legacy$/legacy-rxbounce.yaml b/qa/suites/krbd/rbd/ms_mode/legacy$/legacy-rxbounce.yaml new file mode 100644 index 000000000..244e45cbc --- /dev/null +++ b/qa/suites/krbd/rbd/ms_mode/legacy$/legacy-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy,rxbounce diff --git a/qa/suites/krbd/rbd/ms_mode/legacy$/legacy.yaml b/qa/suites/krbd/rbd/ms_mode/legacy$/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/qa/suites/krbd/rbd/ms_mode/legacy$/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/qa/suites/krbd/rbd/ms_mode/secure.yaml b/qa/suites/krbd/rbd/ms_mode/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/qa/suites/krbd/rbd/ms_mode/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/qa/suites/krbd/rbd/msgr-failures/.qa b/qa/suites/krbd/rbd/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd/msgr-failures/few.yaml b/qa/suites/krbd/rbd/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/krbd/rbd/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/krbd/rbd/msgr-failures/many.yaml b/qa/suites/krbd/rbd/msgr-failures/many.yaml new file mode 100644 index 000000000..e3855297d --- /dev/null +++ b/qa/suites/krbd/rbd/msgr-failures/many.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/krbd/rbd/tasks/.qa b/qa/suites/krbd/rbd/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/rbd/tasks/rbd_fio.yaml b/qa/suites/krbd/rbd/tasks/rbd_fio.yaml new file mode 100644 index 000000000..01088fa46 --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_fio.yaml @@ -0,0 +1,11 @@ +tasks: +- install: +- ceph: null +- rbd_fio: + client.0: + fio-io-size: 90% + formats: [2] + features: [[layering,exclusive-lock]] + io-engine: sync + rw: randrw + runtime: 900 diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml new file mode 100644 index 000000000..699cde82c --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_kernel_untar_build.yaml @@ -0,0 +1,12 @@ +tasks: +- install: + extra_system_packages: + deb: ['bison', 'flex', 'libelf-dev', 'libssl-dev'] + rpm: ['bison', 'flex', 'elfutils-libelf-devel', 'openssl-devel'] +- ceph: +- rbd: + all: +- workunit: + clients: + all: + - kernel_untar_build.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml new file mode 100644 index 000000000..d779eea23 --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_dbench.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- rbd: + all: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml new file mode 100644 index 000000000..5204bb87f --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_ffsb.yaml @@ -0,0 +1,10 @@ +tasks: +- install: +- ceph: +- rbd: + all: + image_size: 20480 +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml new file mode 100644 index 000000000..f9d62fefc --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- rbd: + all: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml new file mode 100644 index 000000000..f765b74a6 --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsstress_ext4.yaml @@ -0,0 +1,10 @@ +tasks: +- install: +- ceph: +- rbd: + all: + fs_type: ext4 +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml new file mode 100644 index 000000000..39d4e04f2 --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_fsx.yaml @@ -0,0 +1,20 @@ +tasks: +- install: + extra_system_packages: + deb: + - libaio-dev + - libtool-bin + - uuid-dev + - xfslibs-dev + rpm: + - libaio-devel + - libtool + - libuuid-devel + - xfsprogs-devel +- ceph: +- rbd: + all: +- workunit: + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml new file mode 100644 index 000000000..eb8f18d60 --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_suites_iozone.yaml @@ -0,0 +1,10 @@ +tasks: +- install: +- ceph: +- rbd: + all: + image_size: 20480 +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml b/qa/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml new file mode 100644 index 000000000..7c2796b2a --- /dev/null +++ b/qa/suites/krbd/rbd/tasks/rbd_workunit_trivial_sync.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- rbd: + all: +- workunit: + clients: + all: [fs/misc/trivial_sync.sh] diff --git a/qa/suites/krbd/singleton-msgr-failures/% b/qa/suites/krbd/singleton-msgr-failures/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/% diff --git a/qa/suites/krbd/singleton-msgr-failures/.qa b/qa/suites/krbd/singleton-msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/singleton-msgr-failures/bluestore-bitmap.yaml b/qa/suites/krbd/singleton-msgr-failures/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/singleton-msgr-failures/conf.yaml b/qa/suites/krbd/singleton-msgr-failures/conf.yaml new file mode 100644 index 000000000..5e7ed992e --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/conf.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false + client: + rbd default features: 37 diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/.qa b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc-rxbounce.yaml b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc-rxbounce.yaml new file mode 100644 index 000000000..4d27d0113 --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc,rxbounce diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc.yaml b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy-rxbounce.yaml b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy-rxbounce.yaml new file mode 100644 index 000000000..244e45cbc --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy,rxbounce diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy.yaml b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/qa/suites/krbd/singleton-msgr-failures/ms_mode$/secure.yaml b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/ms_mode$/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/qa/suites/krbd/singleton-msgr-failures/msgr-failures/.qa b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/singleton-msgr-failures/msgr-failures/few.yaml b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/krbd/singleton-msgr-failures/msgr-failures/many.yaml b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/many.yaml new file mode 100644 index 000000000..e3855297d --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/msgr-failures/many.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/krbd/singleton-msgr-failures/tasks/.qa b/qa/suites/krbd/singleton-msgr-failures/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/singleton-msgr-failures/tasks/rbd_xfstests.yaml b/qa/suites/krbd/singleton-msgr-failures/tasks/rbd_xfstests.yaml new file mode 100644 index 000000000..c94af0207 --- /dev/null +++ b/qa/suites/krbd/singleton-msgr-failures/tasks/rbd_xfstests.yaml @@ -0,0 +1,38 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, mgr.x, mds.a, osd.3, osd.4, osd.5] +- [client.0] +- [client.1] +openstack: +- volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: +- rbd.xfstests: + client.0: &ref + test_image: 'test_image-0' + test_size: 5120 # MB + scratch_image: 'scratch_image-0' + scratch_size: 15360 # MB + fs_type: ext4 + tests: '-g auto -g blockdev -x clone' + exclude: + - generic/042 + - generic/392 + - generic/044 + - generic/045 + - generic/046 + - generic/223 + - ext4/002 # removed upstream + - ext4/304 + - generic/388 + - generic/405 + - generic/422 + - shared/298 # lockdep false positive + randomize: true + client.1: + <<: *ref + test_image: 'test_image-1' + scratch_image: 'scratch_image-1' diff --git a/qa/suites/krbd/singleton/% b/qa/suites/krbd/singleton/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/singleton/% diff --git a/qa/suites/krbd/singleton/.qa b/qa/suites/krbd/singleton/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/singleton/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/singleton/bluestore-bitmap.yaml b/qa/suites/krbd/singleton/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/krbd/singleton/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/singleton/conf.yaml b/qa/suites/krbd/singleton/conf.yaml new file mode 100644 index 000000000..41292fa81 --- /dev/null +++ b/qa/suites/krbd/singleton/conf.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false + ms die on skipped message: false + client: + rbd default features: 37 diff --git a/qa/suites/krbd/singleton/ms_mode$/.qa b/qa/suites/krbd/singleton/ms_mode$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/singleton/ms_mode$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/singleton/ms_mode$/crc-rxbounce.yaml b/qa/suites/krbd/singleton/ms_mode$/crc-rxbounce.yaml new file mode 100644 index 000000000..4d27d0113 --- /dev/null +++ b/qa/suites/krbd/singleton/ms_mode$/crc-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc,rxbounce diff --git a/qa/suites/krbd/singleton/ms_mode$/crc.yaml b/qa/suites/krbd/singleton/ms_mode$/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/qa/suites/krbd/singleton/ms_mode$/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/qa/suites/krbd/singleton/ms_mode$/legacy-rxbounce.yaml b/qa/suites/krbd/singleton/ms_mode$/legacy-rxbounce.yaml new file mode 100644 index 000000000..244e45cbc --- /dev/null +++ b/qa/suites/krbd/singleton/ms_mode$/legacy-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy,rxbounce diff --git a/qa/suites/krbd/singleton/ms_mode$/legacy.yaml b/qa/suites/krbd/singleton/ms_mode$/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/qa/suites/krbd/singleton/ms_mode$/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/qa/suites/krbd/singleton/ms_mode$/secure.yaml b/qa/suites/krbd/singleton/ms_mode$/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/qa/suites/krbd/singleton/ms_mode$/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/qa/suites/krbd/singleton/tasks/.qa b/qa/suites/krbd/singleton/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/singleton/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/singleton/tasks/krbd_watch_errors.yaml b/qa/suites/krbd/singleton/tasks/krbd_watch_errors.yaml new file mode 100644 index 000000000..5e30ef2ba --- /dev/null +++ b/qa/suites/krbd/singleton/tasks/krbd_watch_errors.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + conf: + global: + osd pool default size: 1 + osd: + osd shutdown pgref assert: true +roles: +- [mon.a, mgr.x, osd.0, client.0] + +tasks: +- install: + extra_system_packages: + - fio +- ceph: +- workunit: + clients: + all: + - rbd/krbd_watch_errors.sh diff --git a/qa/suites/krbd/thrash/% b/qa/suites/krbd/thrash/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/thrash/% diff --git a/qa/suites/krbd/thrash/.qa b/qa/suites/krbd/thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/thrash/bluestore-bitmap.yaml b/qa/suites/krbd/thrash/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/krbd/thrash/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/thrash/ceph/.qa b/qa/suites/krbd/thrash/ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/thrash/ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/thrash/ceph/ceph.yaml b/qa/suites/krbd/thrash/ceph/ceph.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/krbd/thrash/ceph/ceph.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/krbd/thrash/clusters/.qa b/qa/suites/krbd/thrash/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/thrash/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/thrash/clusters/fixed-3.yaml b/qa/suites/krbd/thrash/clusters/fixed-3.yaml new file mode 120000 index 000000000..f75a848b8 --- /dev/null +++ b/qa/suites/krbd/thrash/clusters/fixed-3.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/thrash/conf.yaml b/qa/suites/krbd/thrash/conf.yaml new file mode 100644 index 000000000..41292fa81 --- /dev/null +++ b/qa/suites/krbd/thrash/conf.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false + ms die on skipped message: false + client: + rbd default features: 37 diff --git a/qa/suites/krbd/thrash/ms_mode$/.qa b/qa/suites/krbd/thrash/ms_mode$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/thrash/ms_mode$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/thrash/ms_mode$/crc-rxbounce.yaml b/qa/suites/krbd/thrash/ms_mode$/crc-rxbounce.yaml new file mode 100644 index 000000000..4d27d0113 --- /dev/null +++ b/qa/suites/krbd/thrash/ms_mode$/crc-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc,rxbounce diff --git a/qa/suites/krbd/thrash/ms_mode$/crc.yaml b/qa/suites/krbd/thrash/ms_mode$/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/qa/suites/krbd/thrash/ms_mode$/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/qa/suites/krbd/thrash/ms_mode$/legacy-rxbounce.yaml b/qa/suites/krbd/thrash/ms_mode$/legacy-rxbounce.yaml new file mode 100644 index 000000000..244e45cbc --- /dev/null +++ b/qa/suites/krbd/thrash/ms_mode$/legacy-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy,rxbounce diff --git a/qa/suites/krbd/thrash/ms_mode$/legacy.yaml b/qa/suites/krbd/thrash/ms_mode$/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/qa/suites/krbd/thrash/ms_mode$/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/qa/suites/krbd/thrash/ms_mode$/secure.yaml b/qa/suites/krbd/thrash/ms_mode$/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/qa/suites/krbd/thrash/ms_mode$/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/qa/suites/krbd/thrash/thrashers/.qa b/qa/suites/krbd/thrash/thrashers/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/thrash/thrashers/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/thrash/thrashers/backoff.yaml b/qa/suites/krbd/thrash/thrashers/backoff.yaml new file mode 100644 index 000000000..a98fec611 --- /dev/null +++ b/qa/suites/krbd/thrash/thrashers/backoff.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + osd: + osd backoff on peering: true + osd backoff on degraded: true + log-ignorelist: + - wrongly marked me down + - objects unfound and apparently lost +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 diff --git a/qa/suites/krbd/thrash/thrashers/mon-thrasher.yaml b/qa/suites/krbd/thrash/thrashers/mon-thrasher.yaml new file mode 100644 index 000000000..4ef5fcaea --- /dev/null +++ b/qa/suites/krbd/thrash/thrashers/mon-thrasher.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + log-ignorelist: + - \(MON_DOWN\) +tasks: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 diff --git a/qa/suites/krbd/thrash/thrashers/pggrow.yaml b/qa/suites/krbd/thrash/thrashers/pggrow.yaml new file mode 100644 index 000000000..07a227325 --- /dev/null +++ b/qa/suites/krbd/thrash/thrashers/pggrow.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 diff --git a/qa/suites/krbd/thrash/thrashers/upmap.yaml b/qa/suites/krbd/thrash/thrashers/upmap.yaml new file mode 100644 index 000000000..f7d456627 --- /dev/null +++ b/qa/suites/krbd/thrash/thrashers/upmap.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + crush_tunables: optimal + conf: + mon: + mon osd initial require min compat client: luminous + log-ignorelist: + - wrongly marked me down + - objects unfound and apparently lost +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + chance_thrash_pg_upmap: 3 + chance_thrash_pg_upmap_items: 3 diff --git a/qa/suites/krbd/thrash/thrashosds-health.yaml b/qa/suites/krbd/thrash/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/krbd/thrash/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/thrash/workloads/.qa b/qa/suites/krbd/thrash/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/thrash/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/thrash/workloads/krbd_diff_continuous.yaml b/qa/suites/krbd/thrash/workloads/krbd_diff_continuous.yaml new file mode 100644 index 000000000..5907718d5 --- /dev/null +++ b/qa/suites/krbd/thrash/workloads/krbd_diff_continuous.yaml @@ -0,0 +1,12 @@ +overrides: + install: + ceph: + extra_system_packages: + - pv +tasks: +- workunit: + clients: + all: + - rbd/diff_continuous.sh + env: + RBD_DEVICE_TYPE: "krbd" diff --git a/qa/suites/krbd/thrash/workloads/rbd_fio.yaml b/qa/suites/krbd/thrash/workloads/rbd_fio.yaml new file mode 100644 index 000000000..157210f53 --- /dev/null +++ b/qa/suites/krbd/thrash/workloads/rbd_fio.yaml @@ -0,0 +1,11 @@ +tasks: +- rbd_fio: + client.0: + fio-io-size: 100% + formats: [2] + features: [[layering,exclusive-lock]] + io-engine: libaio + rw: randrw + bs: 1024 + io-depth: 256 + runtime: 1200 diff --git a/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml b/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml new file mode 100644 index 000000000..4ae7d6909 --- /dev/null +++ b/qa/suites/krbd/thrash/workloads/rbd_workunit_suites_ffsb.yaml @@ -0,0 +1,8 @@ +tasks: +- rbd: + all: + image_size: 20480 +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/krbd/unmap/% b/qa/suites/krbd/unmap/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/unmap/% diff --git a/qa/suites/krbd/unmap/.qa b/qa/suites/krbd/unmap/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/unmap/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/unmap/ceph/.qa b/qa/suites/krbd/unmap/ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/unmap/ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/unmap/ceph/ceph.yaml b/qa/suites/krbd/unmap/ceph/ceph.yaml new file mode 100644 index 000000000..c33664028 --- /dev/null +++ b/qa/suites/krbd/unmap/ceph/ceph.yaml @@ -0,0 +1,16 @@ +overrides: + ceph: + crush_tunables: bobtail + mon_bind_addrvec: false + mon_bind_msgr2: false + conf: + global: + cephx require version: 1 + cephx service require version: 1 + ms bind msgr2: false +tasks: +- install: +- ceph: +- exec: + client.0: + - "ceph osd getcrushmap -o /dev/stdout | crushtool -d - | sed -e 's/alg straw2/alg straw/g' | crushtool -c /dev/stdin -o /dev/stdout | ceph osd setcrushmap -i /dev/stdin" diff --git a/qa/suites/krbd/unmap/clusters/.qa b/qa/suites/krbd/unmap/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/unmap/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/unmap/clusters/separate-client.yaml b/qa/suites/krbd/unmap/clusters/separate-client.yaml new file mode 100644 index 000000000..be1343189 --- /dev/null +++ b/qa/suites/krbd/unmap/clusters/separate-client.yaml @@ -0,0 +1,16 @@ +# fixed-1.yaml, but with client.0 on a separate target +overrides: + ceph-deploy: + conf: + global: + osd pool default size: 2 + osd crush chooseleaf type: 0 + osd pool default pg num: 128 + osd pool default pgp num: 128 +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2] +- [client.0] +openstack: +- volumes: # attached to each instance + count: 3 + size: 10 # GB diff --git a/qa/suites/krbd/unmap/conf.yaml b/qa/suites/krbd/unmap/conf.yaml new file mode 100644 index 000000000..e52341f29 --- /dev/null +++ b/qa/suites/krbd/unmap/conf.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false + client: + rbd default features: 1 # pre-single-major is v3.13, so layering only diff --git a/qa/suites/krbd/unmap/kernels/.qa b/qa/suites/krbd/unmap/kernels/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/unmap/kernels/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/unmap/kernels/pre-single-major.yaml b/qa/suites/krbd/unmap/kernels/pre-single-major.yaml new file mode 100644 index 000000000..a5636b45e --- /dev/null +++ b/qa/suites/krbd/unmap/kernels/pre-single-major.yaml @@ -0,0 +1,10 @@ +overrides: + kernel: + client.0: + branch: nightly_pre-single-major # v3.12.z +tasks: +- exec: + client.0: + - "modprobe -r rbd" + - "modprobe --first-time rbd" + - "test ! -f /sys/module/rbd/parameters/single_major" diff --git a/qa/suites/krbd/unmap/kernels/single-major-off.yaml b/qa/suites/krbd/unmap/kernels/single-major-off.yaml new file mode 100644 index 000000000..9dc2488ef --- /dev/null +++ b/qa/suites/krbd/unmap/kernels/single-major-off.yaml @@ -0,0 +1,6 @@ +tasks: +- exec: + client.0: + - "modprobe -r rbd" + - "modprobe --first-time rbd single_major=0" + - "grep -q N /sys/module/rbd/parameters/single_major" diff --git a/qa/suites/krbd/unmap/kernels/single-major-on.yaml b/qa/suites/krbd/unmap/kernels/single-major-on.yaml new file mode 100644 index 000000000..c3889f34a --- /dev/null +++ b/qa/suites/krbd/unmap/kernels/single-major-on.yaml @@ -0,0 +1,6 @@ +tasks: +- exec: + client.0: + - "modprobe -r rbd" + - "modprobe --first-time rbd single_major=1" + - "grep -q Y /sys/module/rbd/parameters/single_major" diff --git a/qa/suites/krbd/unmap/tasks/.qa b/qa/suites/krbd/unmap/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/unmap/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/unmap/tasks/unmap.yaml b/qa/suites/krbd/unmap/tasks/unmap.yaml new file mode 100644 index 000000000..435061b45 --- /dev/null +++ b/qa/suites/krbd/unmap/tasks/unmap.yaml @@ -0,0 +1,5 @@ +tasks: +- cram: + clients: + client.0: + - src/test/cli-integration/rbd/unmap.t diff --git a/qa/suites/krbd/wac/.qa b/qa/suites/krbd/wac/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/wac/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/wac/sysfs/% b/qa/suites/krbd/wac/sysfs/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/wac/sysfs/% diff --git a/qa/suites/krbd/wac/sysfs/.qa b/qa/suites/krbd/wac/sysfs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/wac/sysfs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/wac/sysfs/bluestore-bitmap.yaml b/qa/suites/krbd/wac/sysfs/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/krbd/wac/sysfs/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/wac/sysfs/ceph/.qa b/qa/suites/krbd/wac/sysfs/ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/wac/sysfs/ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/wac/sysfs/ceph/ceph.yaml b/qa/suites/krbd/wac/sysfs/ceph/ceph.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/krbd/wac/sysfs/ceph/ceph.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/krbd/wac/sysfs/clusters/.qa b/qa/suites/krbd/wac/sysfs/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/wac/sysfs/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/wac/sysfs/clusters/fixed-1.yaml b/qa/suites/krbd/wac/sysfs/clusters/fixed-1.yaml new file mode 120000 index 000000000..02df5dd0c --- /dev/null +++ b/qa/suites/krbd/wac/sysfs/clusters/fixed-1.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-1.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/wac/sysfs/conf.yaml b/qa/suites/krbd/wac/sysfs/conf.yaml new file mode 100644 index 000000000..41292fa81 --- /dev/null +++ b/qa/suites/krbd/wac/sysfs/conf.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false + ms die on skipped message: false + client: + rbd default features: 37 diff --git a/qa/suites/krbd/wac/sysfs/tasks/.qa b/qa/suites/krbd/wac/sysfs/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/wac/sysfs/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml b/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml new file mode 100644 index 000000000..cd1ba930f --- /dev/null +++ b/qa/suites/krbd/wac/sysfs/tasks/stable_writes.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + all: + - rbd/krbd_stable_writes.sh diff --git a/qa/suites/krbd/wac/wac/% b/qa/suites/krbd/wac/wac/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/krbd/wac/wac/% diff --git a/qa/suites/krbd/wac/wac/.qa b/qa/suites/krbd/wac/wac/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/wac/wac/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/wac/wac/bluestore-bitmap.yaml b/qa/suites/krbd/wac/wac/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/krbd/wac/wac/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/wac/wac/ceph/.qa b/qa/suites/krbd/wac/wac/ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/wac/wac/ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/wac/wac/ceph/ceph.yaml b/qa/suites/krbd/wac/wac/ceph/ceph.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/krbd/wac/wac/ceph/ceph.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/krbd/wac/wac/clusters/.qa b/qa/suites/krbd/wac/wac/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/wac/wac/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/wac/wac/clusters/fixed-3.yaml b/qa/suites/krbd/wac/wac/clusters/fixed-3.yaml new file mode 120000 index 000000000..f75a848b8 --- /dev/null +++ b/qa/suites/krbd/wac/wac/clusters/fixed-3.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3.yaml
\ No newline at end of file diff --git a/qa/suites/krbd/wac/wac/conf.yaml b/qa/suites/krbd/wac/wac/conf.yaml new file mode 100644 index 000000000..41292fa81 --- /dev/null +++ b/qa/suites/krbd/wac/wac/conf.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false + ms die on skipped message: false + client: + rbd default features: 37 diff --git a/qa/suites/krbd/wac/wac/tasks/.qa b/qa/suites/krbd/wac/wac/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/wac/wac/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/wac/wac/tasks/wac.yaml b/qa/suites/krbd/wac/wac/tasks/wac.yaml new file mode 100644 index 000000000..524d29a43 --- /dev/null +++ b/qa/suites/krbd/wac/wac/tasks/wac.yaml @@ -0,0 +1,8 @@ +tasks: +- exec: + client.0: + - "dmesg -C" +- workunit: + clients: + all: + - rbd/krbd_wac.sh diff --git a/qa/suites/krbd/wac/wac/verify/.qa b/qa/suites/krbd/wac/wac/verify/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/wac/wac/verify/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/wac/wac/verify/many-resets.yaml b/qa/suites/krbd/wac/wac/verify/many-resets.yaml new file mode 100644 index 000000000..1f434fd28 --- /dev/null +++ b/qa/suites/krbd/wac/wac/verify/many-resets.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME +tasks: +- exec: + client.0: + - "dmesg | grep -q 'libceph: osd.* socket closed'" + - "dmesg | grep -q 'libceph: osd.* socket error on write'" diff --git a/qa/suites/krbd/wac/wac/verify/no-resets.yaml b/qa/suites/krbd/wac/wac/verify/no-resets.yaml new file mode 100644 index 000000000..2728479da --- /dev/null +++ b/qa/suites/krbd/wac/wac/verify/no-resets.yaml @@ -0,0 +1,5 @@ +tasks: +- exec: + client.0: + - "! dmesg | grep -q 'libceph: osd.* socket closed'" + - "! dmesg | grep -q 'libceph: osd.* socket error on write'" diff --git a/qa/suites/mixed-clients/.qa b/qa/suites/mixed-clients/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/mixed-clients/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/mixed-clients/basic/.qa b/qa/suites/mixed-clients/basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/mixed-clients/basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/mixed-clients/basic/clusters/.qa b/qa/suites/mixed-clients/basic/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/mixed-clients/basic/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/mixed-clients/basic/clusters/fixed-3.yaml b/qa/suites/mixed-clients/basic/clusters/fixed-3.yaml new file mode 100644 index 000000000..134bca1b9 --- /dev/null +++ b/qa/suites/mixed-clients/basic/clusters/fixed-3.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mgr.x, mds.a, osd.0, osd.1] +- [mon.b, mon.c, osd.2, osd.3, client.0] +- [client.1] diff --git a/qa/suites/mixed-clients/basic/objectstore b/qa/suites/mixed-clients/basic/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/mixed-clients/basic/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/mixed-clients/basic/tasks/.qa b/qa/suites/mixed-clients/basic/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/mixed-clients/basic/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml new file mode 100644 index 000000000..bb347be7f --- /dev/null +++ b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_dbench_iozone.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- install: + branch: dumpling +- ceph: +- parallel: + - user-workload + - kclient-workload +user-workload: + sequential: + - ceph-fuse: [client.0] + - workunit: + clients: + client.0: + - suites/iozone.sh +kclient-workload: + sequential: + - kclient: [client.1] + - workunit: + clients: + client.1: + - suites/dbench.sh diff --git a/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml new file mode 100644 index 000000000..2c32a61e8 --- /dev/null +++ b/qa/suites/mixed-clients/basic/tasks/kernel_cfuse_workunits_untarbuild_blogbench.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- install: + branch: dumpling +- ceph: +- parallel: + - user-workload + - kclient-workload +user-workload: + sequential: + - ceph-fuse: [client.0] + - workunit: + clients: + client.0: + - suites/blogbench.sh +kclient-workload: + sequential: + - kclient: [client.1] + - workunit: + clients: + client.1: + - kernel_untar_build.sh diff --git a/qa/suites/netsplit/% b/qa/suites/netsplit/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/netsplit/% diff --git a/qa/suites/netsplit/.qa b/qa/suites/netsplit/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/netsplit/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/netsplit/ceph.yaml b/qa/suites/netsplit/ceph.yaml new file mode 100644 index 000000000..ddf54b3a3 --- /dev/null +++ b/qa/suites/netsplit/ceph.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + conf: + global: + mon election default strategy: 3 + mon: + mon min osdmap epochs: 25 + paxos service trim min: 5 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +# thrashing monitors may make mgr have trouble w/ its keepalive + log-whitelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(MON_DOWN\) +# slow mons -> slow peering -> PG_AVAILABILITY + - \(PG_AVAILABILITY\) + - \(SLOW_OPS\) +tasks: +- install: +- ceph: diff --git a/qa/suites/netsplit/cluster.yaml b/qa/suites/netsplit/cluster.yaml new file mode 100644 index 000000000..0681feca2 --- /dev/null +++ b/qa/suites/netsplit/cluster.yaml @@ -0,0 +1,13 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, osd.3] +- [mon.b, mgr.y, osd.4, osd.5, osd.6, osd.7, client.0] +- [mon.c] +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/netsplit/msgr.yaml b/qa/suites/netsplit/msgr.yaml new file mode 120000 index 000000000..775a723fd --- /dev/null +++ b/qa/suites/netsplit/msgr.yaml @@ -0,0 +1 @@ +../../msgr/async.yaml
\ No newline at end of file diff --git a/qa/suites/netsplit/rados.yaml b/qa/suites/netsplit/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/netsplit/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/netsplit/supported-random-distro$ b/qa/suites/netsplit/supported-random-distro$ new file mode 120000 index 000000000..8d1b6f6c8 --- /dev/null +++ b/qa/suites/netsplit/supported-random-distro$ @@ -0,0 +1 @@ +../rados/basic/supported-random-distro$/
\ No newline at end of file diff --git a/qa/suites/netsplit/tests/.qa b/qa/suites/netsplit/tests/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/netsplit/tests/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/netsplit/tests/mon_pool_ops.yaml b/qa/suites/netsplit/tests/mon_pool_ops.yaml new file mode 100644 index 000000000..5b41c05fa --- /dev/null +++ b/qa/suites/netsplit/tests/mon_pool_ops.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + conf: + global: + mon election default strategy: 3 +tasks: +- workunit: + clients: + client.0: + - mon/pool_ops.sh +- netsplit.disconnect: [mon.a, mon.c] +- workunit: + clients: + client.0: + - mon/pool_ops.sh +- netsplit.reconnect: [mon.a, mon.c] +- netsplit.disconnect: [mon.b, mon.c] +- workunit: + clients: + client.0: + - mon/pool_ops.sh
\ No newline at end of file diff --git a/qa/suites/orch/.qa b/qa/suites/orch/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/.qa b/qa/suites/orch/cephadm/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/mds_upgrade_sequence b/qa/suites/orch/cephadm/mds_upgrade_sequence new file mode 120000 index 000000000..24aa41c10 --- /dev/null +++ b/qa/suites/orch/cephadm/mds_upgrade_sequence @@ -0,0 +1 @@ +.qa/suites/fs/upgrade/mds_upgrade_sequence/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/% b/qa/suites/orch/cephadm/mgr-nfs-upgrade/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/% diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/.qa b/qa/suites/orch/cephadm/mgr-nfs-upgrade/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/0-centos_8.stream_container_tools.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/0-centos_8.stream_container_tools.yaml new file mode 120000 index 000000000..7a86f967f --- /dev/null +++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/0-centos_8.stream_container_tools.yaml @@ -0,0 +1 @@ +.qa/distros/podman/centos_8.stream_container_tools.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/.qa b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.0.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.0.yaml new file mode 100644 index 000000000..beba37428 --- /dev/null +++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.0.yaml @@ -0,0 +1,8 @@ +tasks: +- cephadm: + roleless: true + image: quay.io/ceph/ceph:v16.2.0 + cephadm_branch: v16.2.0 + cephadm_git_url: https://github.com/ceph/ceph + # needed for v16.2.0 due to --skip-admin-label + avoid_pacific_features: true diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.4.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.4.yaml new file mode 100644 index 000000000..1cbe5a134 --- /dev/null +++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.4.yaml @@ -0,0 +1,8 @@ +tasks: +- cephadm: + roleless: true + image: quay.io/ceph/ceph:v16.2.4 + cephadm_branch: v16.2.4 + cephadm_git_url: https://github.com/ceph/ceph + # needed for v16.2.4 due to --skip-admin-label + avoid_pacific_features: true diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.5.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.5.yaml new file mode 100644 index 000000000..381088d5b --- /dev/null +++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-bootstrap/16.2.5.yaml @@ -0,0 +1,6 @@ +tasks: +- cephadm: + roleless: true + image: quay.io/ceph/ceph:v16.2.5 + cephadm_branch: v16.2.5 + cephadm_git_url: https://github.com/ceph/ceph diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-start.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-start.yaml new file mode 100644 index 000000000..2d9f09a4e --- /dev/null +++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/1-start.yaml @@ -0,0 +1,29 @@ +tasks: +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls +roles: +- - host.a + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 +- - host.b + - osd.4 + - osd.5 + - osd.6 + - osd.7 +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/2-nfs.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/2-nfs.yaml new file mode 100644 index 000000000..34680fc8a --- /dev/null +++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/2-nfs.yaml @@ -0,0 +1,29 @@ +tasks: + +# stop kernel nfs server, if running +- vip.exec: + all-hosts: + - systemctl stop nfs-server + +- cephadm.shell: + host.a: + - ceph fs volume create foofs + +- cephadm.wait_for_service: + service: mds.foofs + +- cephadm.shell: + host.a: + - ceph nfs cluster create foo --placement=2 || ceph nfs cluster create cephfs foo --placement=2 + - ceph nfs export create cephfs --fsname foofs --clusterid foo --binding /fake || ceph nfs export create cephfs --fsname foofs --cluster-id foo --pseudo-path /fake + + # we can't do wait_for_service here because with octopus it's nfs.ganesha-foo not nfs.foo + - while ! ceph orch ls | grep nfs | grep 2/2 ; do sleep 1 ; done + +- vip.exec: + host.a: + - mkdir /mnt/foo + - while ! mount -t nfs $(hostname):/fake /mnt/foo -o sync ; do sleep 5 ; done + - echo test > /mnt/foo/testfile + - sync + diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/3-upgrade-with-workload.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/3-upgrade-with-workload.yaml new file mode 100644 index 000000000..ec901a92e --- /dev/null +++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/3-upgrade-with-workload.yaml @@ -0,0 +1,43 @@ +tasks: +- parallel: + - upgrade-tasks + - workload-tasks + +upgrade-tasks: + sequential: + - cephadm.shell: + env: [sha1] + host.a: + - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force + - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force + - ceph config set global log_to_journald false --force + - ceph mgr module enable nfs --force + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 + - cephadm.shell: + env: [sha1] + host.a: + - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; ceph health detail ; sleep 30 ; done + - ceph orch ps + - ceph versions + - echo "wait for servicemap items w/ changing names to refresh" + - sleep 60 + - ceph orch ps + - ceph orch upgrade status + - ceph health detail + - ceph versions + - ceph versions | jq -e '.overall | length == 1' + - ceph versions | jq -e '.overall | keys' | grep $sha1 + + # this should be a no-op, but confirms nfs.ganesha-foo was remapped to nfs.foo + - cephadm.wait_for_service: + service: nfs.foo + +workload-tasks: + sequential: + - exec: + host.a: + - cd /mnt/foo && dbench 5 -t 600 || true # might fail with ESTALE + # make sure mount works + - umount /mnt/foo + - while ! mount -t nfs $(hostname):/fake /mnt/foo ; do sleep 5 ; done + - cd /mnt/foo && dbench 5 -t 5 diff --git a/qa/suites/orch/cephadm/mgr-nfs-upgrade/4-final.yaml b/qa/suites/orch/cephadm/mgr-nfs-upgrade/4-final.yaml new file mode 100644 index 000000000..3a9169659 --- /dev/null +++ b/qa/suites/orch/cephadm/mgr-nfs-upgrade/4-final.yaml @@ -0,0 +1,10 @@ +tasks: +- vip.exec: + host.a: + - umount /mnt/foo +- cephadm.shell: + host.a: + - ceph nfs cluster ls | grep foo + - ceph nfs export ls foo --detailed + - rados -p .nfs --all ls - + - ceph config get mgr mgr/cephadm/migration_current | grep 6 diff --git a/qa/suites/orch/cephadm/nfs b/qa/suites/orch/cephadm/nfs new file mode 120000 index 000000000..628e2a2a2 --- /dev/null +++ b/qa/suites/orch/cephadm/nfs @@ -0,0 +1 @@ +.qa/suites/fs/nfs/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/orchestrator_cli/% b/qa/suites/orch/cephadm/orchestrator_cli/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/orchestrator_cli/% diff --git a/qa/suites/orch/cephadm/orchestrator_cli/.qa b/qa/suites/orch/cephadm/orchestrator_cli/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/orchestrator_cli/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/orchestrator_cli/0-random-distro$ b/qa/suites/orch/cephadm/orchestrator_cli/0-random-distro$ new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/orch/cephadm/orchestrator_cli/0-random-distro$ @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/orchestrator_cli/2-node-mgr.yaml b/qa/suites/orch/cephadm/orchestrator_cli/2-node-mgr.yaml new file mode 120000 index 000000000..8a0b9123b --- /dev/null +++ b/qa/suites/orch/cephadm/orchestrator_cli/2-node-mgr.yaml @@ -0,0 +1 @@ +.qa/clusters/2-node-mgr.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/orchestrator_cli/agent b/qa/suites/orch/cephadm/orchestrator_cli/agent new file mode 120000 index 000000000..154924209 --- /dev/null +++ b/qa/suites/orch/cephadm/orchestrator_cli/agent @@ -0,0 +1 @@ +../smoke/agent
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/orchestrator_cli/orchestrator_cli.yaml b/qa/suites/orch/cephadm/orchestrator_cli/orchestrator_cli.yaml new file mode 100644 index 000000000..3e6e7f955 --- /dev/null +++ b/qa/suites/orch/cephadm/orchestrator_cli/orchestrator_cli.yaml @@ -0,0 +1,19 @@ + +tasks: + - install: + - ceph: + # tests may leave mgrs broken, so don't try and call into them + # to invoke e.g. pg dump during teardown. + wait-for-scrub: false + log-ignorelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(DEVICE_IDENT_ON\) + - \(DEVICE_FAULT_ON\) + - \(PG_ + - replacing it with standby + - No standby daemons available + - \(POOL_APP_NOT_ENABLED\) + - cephfs_test_runner: + modules: + - tasks.mgr.test_orchestrator_cli diff --git a/qa/suites/orch/cephadm/osds/% b/qa/suites/orch/cephadm/osds/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/osds/% diff --git a/qa/suites/orch/cephadm/osds/.qa b/qa/suites/orch/cephadm/osds/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/osds/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/osds/0-distro b/qa/suites/orch/cephadm/osds/0-distro new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/orch/cephadm/osds/0-distro @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/osds/0-nvme-loop.yaml b/qa/suites/orch/cephadm/osds/0-nvme-loop.yaml new file mode 120000 index 000000000..5206b6edd --- /dev/null +++ b/qa/suites/orch/cephadm/osds/0-nvme-loop.yaml @@ -0,0 +1 @@ +.qa/overrides/nvme_loop.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/osds/1-start.yaml b/qa/suites/orch/cephadm/osds/1-start.yaml new file mode 100644 index 000000000..4331d7c66 --- /dev/null +++ b/qa/suites/orch/cephadm/osds/1-start.yaml @@ -0,0 +1,25 @@ +tasks: +- cephadm: + roleless: true +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls + - ceph orch ls | grep '^osd.all-available-devices ' +roles: +- - host.a + - client.0 +- - host.b + - client.1 +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/orch/cephadm/osds/2-ops/.qa b/qa/suites/orch/cephadm/osds/2-ops/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/osds/2-ops/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/osds/2-ops/repave-all.yaml b/qa/suites/orch/cephadm/osds/2-ops/repave-all.yaml new file mode 100644 index 000000000..16413aba8 --- /dev/null +++ b/qa/suites/orch/cephadm/osds/2-ops/repave-all.yaml @@ -0,0 +1,13 @@ +tasks: +- cephadm.shell: + host.a: + - | + set -e + set -x + ceph orch ps + ceph orch device ls + ceph osd tree + for osd in `ceph osd ls` ; do + ceph orch osd rm $osd --force --zap --replace + done + while ceph orch osd rm ls | wc | grep ^1 ; do sleep 10 ; done diff --git a/qa/suites/orch/cephadm/osds/2-ops/rm-zap-add.yaml b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-add.yaml new file mode 100644 index 000000000..09be72f11 --- /dev/null +++ b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-add.yaml @@ -0,0 +1,17 @@ +tasks: +- cephadm.shell: + host.a: + - | + set -e + set -x + ceph orch ps + ceph orch device ls + DEVID=$(ceph device ls | grep osd.1 | awk '{print $1}') + HOST=$(ceph orch device ls | grep $DEVID | awk '{print $1}') + DEV=$(ceph orch device ls | grep $DEVID | awk '{print $2}') + echo "host $HOST, dev $DEV, devid $DEVID" + ceph orch osd rm 1 + while ceph orch osd rm status | grep ^1 ; do sleep 5 ; done + ceph orch device zap $HOST $DEV --force + ceph orch daemon add osd $HOST:$DEV + while ! ceph osd dump | grep osd.1 | grep up ; do sleep 5 ; done diff --git a/qa/suites/orch/cephadm/osds/2-ops/rm-zap-flag.yaml b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-flag.yaml new file mode 100644 index 000000000..8f07f6d53 --- /dev/null +++ b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-flag.yaml @@ -0,0 +1,15 @@ +tasks: +- cephadm.shell: + host.a: + - | + set -e + set -x + ceph orch ps + ceph orch device ls + DEVID=$(ceph device ls | grep osd.1 | awk '{print $1}') + HOST=$(ceph orch device ls | grep "$DEVID" | awk '{print $1}') + DEV=$(ceph orch device ls | grep "$DEVID" | awk '{print $2}') + echo "host $HOST, dev $DEV, devid $DEVID" + ceph orch osd rm --zap --replace 1 + while ceph orch osd rm status | grep ^1 ; do sleep 5 ; done + while ! ceph osd dump | grep osd.1 | grep "up\s*in" ; do sleep 5 ; done diff --git a/qa/suites/orch/cephadm/osds/2-ops/rm-zap-wait.yaml b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-wait.yaml new file mode 100644 index 000000000..78161aa49 --- /dev/null +++ b/qa/suites/orch/cephadm/osds/2-ops/rm-zap-wait.yaml @@ -0,0 +1,16 @@ +tasks: +- cephadm.shell: + host.a: + - | + set -e + set -x + ceph orch ps + ceph orch device ls + DEVID=$(ceph device ls | grep osd.1 | awk '{print $1}') + HOST=$(ceph orch device ls | grep $DEVID | awk '{print $1}') + DEV=$(ceph orch device ls | grep $DEVID | awk '{print $2}') + echo "host $HOST, dev $DEV, devid $DEVID" + ceph orch osd rm 1 + while ceph orch osd rm status | grep ^1 ; do sleep 5 ; done + ceph orch device zap $HOST $DEV --force + while ! ceph osd dump | grep osd.1 | grep up ; do sleep 5 ; done diff --git a/qa/suites/orch/cephadm/osds/2-ops/rmdir-reactivate.yaml b/qa/suites/orch/cephadm/osds/2-ops/rmdir-reactivate.yaml new file mode 100644 index 000000000..a971a02e4 --- /dev/null +++ b/qa/suites/orch/cephadm/osds/2-ops/rmdir-reactivate.yaml @@ -0,0 +1,20 @@ +tasks: +- cephadm.shell: + host.a: + - | + set -e + set -x + ceph orch ps + HOST=$(hostname -s) + OSD=$(ceph orch ps $HOST | grep osd | head -n 1 | awk '{print $1}') + echo "host $HOST, osd $OSD" + ceph orch daemon stop $OSD + while ceph orch ps | grep $OSD | grep running ; do sleep 5 ; done + ceph auth export $OSD > k + ceph orch daemon rm $OSD --force + ceph orch ps --refresh + while ceph orch ps | grep $OSD ; do sleep 5 ; done + ceph auth add $OSD -i k + ceph cephadm osd activate $HOST + while ! ceph orch ps | grep $OSD | grep running ; do sleep 5 ; done +- cephadm.healthy: diff --git a/qa/suites/orch/cephadm/rbd_iscsi b/qa/suites/orch/cephadm/rbd_iscsi new file mode 120000 index 000000000..f0073a119 --- /dev/null +++ b/qa/suites/orch/cephadm/rbd_iscsi @@ -0,0 +1 @@ +.qa/suites/rbd/iscsi
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-roleless/% b/qa/suites/orch/cephadm/smoke-roleless/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/% diff --git a/qa/suites/orch/cephadm/smoke-roleless/.qa b/qa/suites/orch/cephadm/smoke-roleless/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-roleless/0-distro b/qa/suites/orch/cephadm/smoke-roleless/0-distro new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/0-distro @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-roleless/0-nvme-loop.yaml b/qa/suites/orch/cephadm/smoke-roleless/0-nvme-loop.yaml new file mode 120000 index 000000000..5206b6edd --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/0-nvme-loop.yaml @@ -0,0 +1 @@ +.qa/overrides/nvme_loop.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-roleless/1-start.yaml b/qa/suites/orch/cephadm/smoke-roleless/1-start.yaml new file mode 100644 index 000000000..018356f8f --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/1-start.yaml @@ -0,0 +1,24 @@ +tasks: +- cephadm: + roleless: true +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls +roles: +- - host.a + - client.0 +- - host.b + - client.1 +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/.qa b/qa/suites/orch/cephadm/smoke-roleless/2-services/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/basic.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/basic.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/basic.yaml diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/client-keyring.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/client-keyring.yaml new file mode 100644 index 000000000..f00800471 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/client-keyring.yaml @@ -0,0 +1,40 @@ +tasks: +- cephadm.shell: + host.a: + - ceph orch host label add `hostname` foo + - ceph auth get-or-create client.foo mon 'allow r' + - ceph orch client-keyring set client.foo label:foo --mode 770 --owner 11111:22222 +- exec: + host.a: + - while ! test -e /etc/ceph/ceph.client.foo.keyring ; do sleep 1 ; done + - ls -al /etc/ceph/ceph.client.foo.keyring | grep rwxrwx--- + - ls -al /etc/ceph/ceph.client.foo.keyring | grep 11111 + - ls -al /etc/ceph/ceph.client.foo.keyring | grep 22222 + - test -e /etc/ceph/ceph.conf +- exec: + host.b: + - test ! -e /etc/ceph/ceph.client.foo.keyring +- cephadm.shell: + host.b: + - ceph orch host label add `hostname` foo +- exec: + host.b: + - while ! test -e /etc/ceph/ceph.client.foo.keyring ; do sleep 1 ; done + - ls -al /etc/ceph/ceph.client.foo.keyring | grep rwxrwx--- + - ls -al /etc/ceph/ceph.client.foo.keyring | grep 11111 + - ls -al /etc/ceph/ceph.client.foo.keyring | grep 22222 +- cephadm.shell: + host.b: + - ceph orch host label rm `hostname` foo +- exec: + host.b: + - while test -e /etc/ceph/ceph.client.foo.keyring ; do sleep 1 ; done +- exec: + host.a: + - test -e /etc/ceph/ceph.client.foo.keyring +- cephadm.shell: + host.a: + - ceph orch client-keyring rm client.foo +- exec: + host.a: + - while test -e /etc/ceph/ceph.client.foo.keyring ; do sleep 1 ; done diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/iscsi.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/iscsi.yaml new file mode 100644 index 000000000..7f57076db --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/iscsi.yaml @@ -0,0 +1,8 @@ +tasks: +- cephadm.shell: + host.a: + - ceph osd pool create foo + - rbd pool init foo + - ceph orch apply iscsi foo u p +- cephadm.wait_for_service: + service: iscsi.foo diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/jaeger.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/jaeger.yaml new file mode 100644 index 000000000..ad102fedd --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/jaeger.yaml @@ -0,0 +1,12 @@ +tasks: +- cephadm.shell: + host.a: + - ceph orch apply jaeger +- cephadm.wait_for_service: + service: elasticsearch +- cephadm.wait_for_service: + service: jaeger-collector +- cephadm.wait_for_service: + service: jaeger-query +- cephadm.wait_for_service: + service: jaeger-agent
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/mirror.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/mirror.yaml new file mode 100644 index 000000000..681e1e04a --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/mirror.yaml @@ -0,0 +1,9 @@ +tasks: +- cephadm.shell: + host.a: + - ceph orch apply rbd-mirror "--placement=*" + - ceph orch apply cephfs-mirror "--placement=*" +- cephadm.wait_for_service: + service: rbd-mirror +- cephadm.wait_for_service: + service: cephfs-mirror diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-haproxy-proto.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-haproxy-proto.yaml new file mode 100644 index 000000000..477e5c443 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-haproxy-proto.yaml @@ -0,0 +1,35 @@ +tasks: +- vip: + +# make sure cephadm notices the new IP +- cephadm.shell: + host.a: + - ceph orch device ls --refresh + +# stop kernel nfs server, if running +- vip.exec: + all-hosts: + - systemctl stop nfs-server + +# use nfs module to create cluster and export +- cephadm.shell: + host.a: + - ceph fs volume create fs1 + - ceph nfs cluster create happy --ingress --virtual-ip={{VIP0}} --ingress-mode=haproxy-protocol + - ceph nfs export create cephfs --fsname fs1 --cluster-id happy --pseudo-path /d1 + +# wait for services to start +- cephadm.wait_for_service: + service: nfs.happy +- cephadm.wait_for_service: + service: ingress.nfs.happy + +# make sure mount can be reached over VIP, ensuring both that +# keepalived is maintaining the VIP and that the nfs has bound to it +- vip.exec: + host.a: + - mkdir /mnt/happy + - sleep 1 + - mount -t nfs {{VIP0}}:/d1 /mnt/happy + - echo test > /mnt/happy/testfile + - sync diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-bucket.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-bucket.yaml new file mode 100644 index 000000000..3f4964978 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-bucket.yaml @@ -0,0 +1,89 @@ +tasks: +- vip: + +# make sure cephadm notices the new IP +- cephadm.shell: + host.a: + - ceph orch device ls --refresh + +# stop kernel nfs server, if running +- vip.exec: + all-hosts: + - systemctl stop nfs-server + +- cephadm.shell: + host.a: + - ceph orch apply rgw foorgw --port 8800 + - ceph nfs cluster create foo --ingress --virtual-ip {{VIP0}}/{{VIPPREFIXLEN}} + +- vip.exec: + host.a: + - dnf install -y python3-boto3 || apt install -y python3-boto3 + - /home/ubuntu/cephtest/cephadm shell radosgw-admin user create --uid foouser --display-name foo > /tmp/user.json + +- python: + host.a: | + import boto3 + import json + + with open('/tmp/user.json', 'rt') as f: + info = json.loads(f.read()) + s3 = boto3.resource( + 's3', + aws_access_key_id=info['keys'][0]['access_key'], + aws_secret_access_key=info['keys'][0]['secret_key'], + endpoint_url='http://localhost:8800', + ) + bucket = s3.Bucket('foobucket') + bucket.create() + bucket.put_object(Key='myobject', Body='thebody') + +- cephadm.shell: + host.a: + - ceph nfs export create rgw --bucket foobucket --cluster-id foo --pseudo-path /foobucket + +- cephadm.wait_for_service: + service: nfs.foo +- cephadm.wait_for_service: + service: ingress.nfs.foo + +## export and mount + +- vip.exec: + host.a: + - mkdir /mnt/foo + - sleep 5 + - mount -t nfs {{VIP0}}:/foobucket /mnt/foo + - find /mnt/foo -ls + - grep thebody /mnt/foo/myobject + - echo test > /mnt/foo/newobject + - sync + +- python: + host.a: | + import boto3 + import json + from io import BytesIO + + with open('/tmp/user.json', 'rt') as f: + info = json.loads(f.read()) + s3 = boto3.resource( + 's3', + aws_access_key_id=info['keys'][0]['access_key'], + aws_secret_access_key=info['keys'][0]['secret_key'], + endpoint_url='http://localhost:8800', + ) + bucket = s3.Bucket('foobucket') + data = BytesIO() + bucket.download_fileobj(Fileobj=data, Key='newobject') + print(data.getvalue()) + assert data.getvalue().decode() == 'test\n' + +- vip.exec: + host.a: + - umount /mnt/foo + +- cephadm.shell: + host.a: + - ceph nfs export rm foo /foobucket + - ceph nfs cluster rm foo diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-user.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-user.yaml new file mode 100644 index 000000000..721aecfc3 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress-rgw-user.yaml @@ -0,0 +1,90 @@ +tasks: +- vip: + +# make sure cephadm notices the new IP +- cephadm.shell: + host.a: + - ceph orch device ls --refresh + +# stop kernel nfs server, if running +- vip.exec: + all-hosts: + - systemctl stop nfs-server + +- cephadm.shell: + host.a: + - ceph orch apply rgw foorgw --port 8800 + - ceph nfs cluster create foo --ingress --virtual-ip {{VIP0}}/{{VIPPREFIXLEN}} + +- vip.exec: + host.a: + - dnf install -y python3-boto3 || apt install -y python3-boto3 + - /home/ubuntu/cephtest/cephadm shell radosgw-admin user create --uid foouser --display-name foo > /tmp/user.json + +- python: + host.a: | + import boto3 + import json + + with open('/tmp/user.json', 'rt') as f: + info = json.loads(f.read()) + s3 = boto3.resource( + 's3', + aws_access_key_id=info['keys'][0]['access_key'], + aws_secret_access_key=info['keys'][0]['secret_key'], + endpoint_url='http://localhost:8800', + ) + bucket = s3.Bucket('foobucket') + bucket.create() + bucket.put_object(Key='myobject', Body='thebody') + +- cephadm.shell: + host.a: + - ceph nfs export create rgw --cluster-id foo --pseudo-path /foouser --user-id foouser + +- cephadm.wait_for_service: + service: nfs.foo +- cephadm.wait_for_service: + service: ingress.nfs.foo + +## export and mount + +- vip.exec: + host.a: + - mkdir /mnt/foo + - sleep 5 + - mount -t nfs {{VIP0}}:/foouser /mnt/foo + - test -d /mnt/foo/foobucket + - find /mnt/foo -ls + - grep thebody /mnt/foo/foobucket/myobject + - echo test > /mnt/foo/foobucket/newobject + - sync + +- python: + host.a: | + import boto3 + import json + from io import BytesIO + + with open('/tmp/user.json', 'rt') as f: + info = json.loads(f.read()) + s3 = boto3.resource( + 's3', + aws_access_key_id=info['keys'][0]['access_key'], + aws_secret_access_key=info['keys'][0]['secret_key'], + endpoint_url='http://localhost:8800', + ) + bucket = s3.Bucket('foobucket') + data = BytesIO() + bucket.download_fileobj(Fileobj=data, Key='newobject') + print(data.getvalue()) + assert data.getvalue().decode() == 'test\n' + +- vip.exec: + host.a: + - umount /mnt/foo + +- cephadm.shell: + host.a: + - ceph nfs export rm foo /foouser + - ceph nfs cluster rm foo diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress.yaml new file mode 100644 index 000000000..b4e843df2 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress.yaml @@ -0,0 +1,68 @@ +tasks: +- vip: + +# make sure cephadm notices the new IP +- cephadm.shell: + host.a: + - ceph orch device ls --refresh + +# stop kernel nfs server, if running +- vip.exec: + all-hosts: + - systemctl stop nfs-server + +- cephadm.shell: + host.a: + - ceph fs volume create foofs + +# deploy nfs + ingress +- cephadm.apply: + specs: + - service_type: nfs + service_id: foo + placement: + count: 2 + spec: + port: 12049 + - service_type: ingress + service_id: nfs.foo + spec: + backend_service: nfs.foo + frontend_port: 2049 + monitor_port: 9002 + virtual_ip: "{{VIP0}}/{{VIPPREFIXLEN}}" +- cephadm.wait_for_service: + service: nfs.foo +- cephadm.wait_for_service: + service: ingress.nfs.foo + +## export and mount + +- cephadm.shell: + host.a: + - ceph nfs export create cephfs --fsname foofs --cluster-id foo --pseudo-path /fake + +- vip.exec: + host.a: + - mkdir /mnt/foo + - sleep 5 + - mount -t nfs {{VIP0}}:/fake /mnt/foo + - echo test > /mnt/foo/testfile + - sync + +# take each gateway down in turn and ensure things still work +- cephadm.shell: + volumes: + - /mnt/foo:/mnt/foo + host.a: + - | + echo "Check with each haproxy down in turn..." + for haproxy in `ceph orch ps | grep ^haproxy.nfs.foo. | awk '{print $1}'`; do + ceph orch daemon stop $haproxy + while ! ceph orch ps | grep $haproxy | grep stopped; do sleep 1 ; done + cat /mnt/foo/testfile + echo $haproxy > /mnt/foo/testfile + sync + ceph orch daemon start $haproxy + while ! ceph orch ps | grep $haproxy | grep running; do sleep 1 ; done + done diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress2.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress2.yaml new file mode 100644 index 000000000..a47dd9d76 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-ingress2.yaml @@ -0,0 +1,70 @@ +tasks: +- vip: + +# make sure cephadm notices the new IP +- cephadm.shell: + host.a: + - ceph orch device ls --refresh + +# stop kernel nfs server, if running +- vip.exec: + all-hosts: + - systemctl stop nfs-server + +- cephadm.shell: + host.a: + - ceph fs volume create foofs + - ceph nfs cluster create foo --ingress --virtual-ip {{VIP0}}/{{VIPPREFIXLEN}} --port 2999 + - ceph nfs export create cephfs --fsname foofs --cluster-id foo --pseudo-path /fake + +- cephadm.wait_for_service: + service: nfs.foo +- cephadm.wait_for_service: + service: ingress.nfs.foo + +## export and mount + +- vip.exec: + host.a: + - mkdir /mnt/foo + - sleep 5 + - mount -t nfs {{VIP0}}:/fake /mnt/foo -o port=2999 + - echo test > /mnt/foo/testfile + - sync + +# take each gateway down in turn and ensure things still work +- cephadm.shell: + volumes: + - /mnt/foo:/mnt/foo + host.a: + - | + echo "Check with each haproxy down in turn..." + for haproxy in `ceph orch ps | grep ^haproxy.nfs.foo. | awk '{print $1}'`; do + ceph orch daemon stop $haproxy + while ! ceph orch ps | grep $haproxy | grep stopped; do sleep 1 ; done + cat /mnt/foo/testfile + echo $haproxy > /mnt/foo/testfile + sync + ceph orch daemon start $haproxy + while ! ceph orch ps | grep $haproxy | grep running; do sleep 1 ; done + done + +# take each ganesha down in turn. +# simulate "failure" by deleting the container +- vip.exec: + all-hosts: + - | + echo "Check with $(hostname) ganesha(s) down..." + for c in `systemctl | grep ceph- | grep @nfs | awk '{print $1}'`; do + cid=`echo $c | sed 's/@/-/'` + id=`echo $c | cut -d @ -f 2 | sed 's/.service$//'` + fsid=`echo $c | cut -d @ -f 1 | cut -d - -f 2-` + echo "Removing daemon $id fsid $fsid..." + sudo $TESTDIR/cephadm rm-daemon --fsid $fsid --name $id + + echo "Waking up cephadm..." + sudo $TESTDIR/cephadm shell -- ceph orch ps --refresh + + while ! timeout 1 cat /mnt/foo/testfile ; do true ; done + echo "Mount is back!" + done diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-keepalive-only.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-keepalive-only.yaml new file mode 100644 index 000000000..ba5afed47 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs-keepalive-only.yaml @@ -0,0 +1,55 @@ +tasks: +- vip: + +# make sure cephadm notices the new IP +- cephadm.shell: + host.a: + - ceph orch device ls --refresh + +# stop kernel nfs server, if running +- vip.exec: + all-hosts: + - systemctl stop nfs-server + +- cephadm.shell: + host.a: + - ceph fs volume create foofs + +# deploy nfs + keepalive-only ingress service +- cephadm.apply: + specs: + - service_type: nfs + service_id: foo + placement: + count: 1 + spec: + port: 2049 + virtual_ip: "{{VIP0}}" + - service_type: ingress + service_id: nfs.foo + placement: + count: 1 + spec: + backend_service: nfs.foo + monitor_port: 9002 + virtual_ip: "{{VIP0}}/{{VIPPREFIXLEN}}" + keepalive_only: true +- cephadm.wait_for_service: + service: nfs.foo +- cephadm.wait_for_service: + service: ingress.nfs.foo + +# export and mount +- cephadm.shell: + host.a: + - ceph nfs export create cephfs --fsname foofs --cluster-id foo --pseudo-path /fake + +# make sure mount can be reached over VIP, ensuring both that +# keepalived is maintaining the VIP and that the nfs has bound to it +- vip.exec: + host.a: + - mkdir /mnt/foo + - sleep 5 + - mount -t nfs {{VIP0}}:/fake /mnt/foo + - echo test > /mnt/foo/testfile + - sync diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs.yaml new file mode 100644 index 000000000..194f4e9de --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs.yaml @@ -0,0 +1,13 @@ +tasks: + +# stop kernel nfs server, if running +- vip.exec: + all-hosts: + - systemctl stop nfs-server + +- cephadm.apply: + specs: + - service_type: nfs + service_id: foo +- cephadm.wait_for_service: + service: nfs.foo diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs2.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs2.yaml new file mode 100644 index 000000000..959c5aa77 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nfs2.yaml @@ -0,0 +1,12 @@ +tasks: + +# stop kernel nfs server, if running +- vip.exec: + all-hosts: + - systemctl stop nfs-server + +- cephadm.shell: + host.a: + - ceph nfs cluster create foo +- cephadm.wait_for_service: + service: nfs.foo diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/nvmeof.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/nvmeof.yaml new file mode 100644 index 000000000..4c5e26740 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/nvmeof.yaml @@ -0,0 +1,8 @@ +tasks: +- cephadm.shell: + host.a: + - ceph osd pool create foo + - rbd pool init foo + - ceph orch apply nvmeof foo +- cephadm.wait_for_service: + service: nvmeof.foo diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw-ingress.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw-ingress.yaml new file mode 100644 index 000000000..710edab73 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw-ingress.yaml @@ -0,0 +1,60 @@ +tasks: +- vip: + +# make sure cephadm notices the new IP +- cephadm.shell: + host.a: + - ceph orch device ls --refresh + +# deploy rgw + ingress +- cephadm.apply: + specs: + - service_type: rgw + service_id: foo + placement: + count: 4 + host_pattern: "*" + spec: + rgw_frontend_port: 8000 + - service_type: ingress + service_id: rgw.foo + placement: + count: 2 + spec: + backend_service: rgw.foo + frontend_port: 9000 + monitor_port: 9001 + virtual_ip: "{{VIP0}}/{{VIPPREFIXLEN}}" +- cephadm.wait_for_service: + service: rgw.foo +- cephadm.wait_for_service: + service: ingress.rgw.foo + +# take each component down in turn and ensure things still work +- cephadm.shell: + host.a: + - | + echo "Check while healthy..." + curl http://{{VIP0}}:9000/ + + # stop each rgw in turn + echo "Check with each rgw stopped in turn..." + for rgw in `ceph orch ps | grep ^rgw.foo. | awk '{print $1}'`; do + ceph orch daemon stop $rgw + while ! ceph orch ps | grep $rgw | grep stopped; do sleep 1 ; done + while ! curl http://{{VIP0}}:9000/ ; do sleep 1 ; done + ceph orch daemon start $rgw + while ! ceph orch ps | grep $rgw | grep running; do sleep 1 ; done + done + + # stop each haproxy in turn + echo "Check with each haproxy down in turn..." + for haproxy in `ceph orch ps | grep ^haproxy.rgw.foo. | awk '{print $1}'`; do + ceph orch daemon stop $haproxy + while ! ceph orch ps | grep $haproxy | grep stopped; do sleep 1 ; done + while ! curl http://{{VIP0}}:9000/ ; do sleep 1 ; done + ceph orch daemon start $haproxy + while ! ceph orch ps | grep $haproxy | grep running; do sleep 1 ; done + done + + while ! curl http://{{VIP0}}:9000/ ; do sleep 1 ; done diff --git a/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw.yaml b/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw.yaml new file mode 100644 index 000000000..cb2c6f4b6 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/2-services/rgw.yaml @@ -0,0 +1,12 @@ +tasks: +- cephadm.apply: + specs: + - service_type: rgw + service_id: foo + placement: + count_per_host: 4 + host_pattern: "*" + spec: + rgw_frontend_port: 8000 +- cephadm.wait_for_service: + service: rgw.foo diff --git a/qa/suites/orch/cephadm/smoke-roleless/3-final.yaml b/qa/suites/orch/cephadm/smoke-roleless/3-final.yaml new file mode 100644 index 000000000..bb938848c --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-roleless/3-final.yaml @@ -0,0 +1,10 @@ +tasks: +- cephadm.shell: + host.a: + - stat -c '%u %g' /var/log/ceph | grep '167 167' + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls + - ceph orch ls | grep '^osd.all-available-devices ' diff --git a/qa/suites/orch/cephadm/smoke-singlehost/% b/qa/suites/orch/cephadm/smoke-singlehost/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-singlehost/% diff --git a/qa/suites/orch/cephadm/smoke-singlehost/.qa b/qa/suites/orch/cephadm/smoke-singlehost/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-singlehost/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-singlehost/0-random-distro$ b/qa/suites/orch/cephadm/smoke-singlehost/0-random-distro$ new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-singlehost/0-random-distro$ @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-singlehost/1-start.yaml b/qa/suites/orch/cephadm/smoke-singlehost/1-start.yaml new file mode 100644 index 000000000..ca6019c66 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-singlehost/1-start.yaml @@ -0,0 +1,27 @@ +tasks: +- cephadm: + roleless: true + single_host_defaults: true +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls +roles: +- - host.a + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/orch/cephadm/smoke-singlehost/2-services/.qa b/qa/suites/orch/cephadm/smoke-singlehost/2-services/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-singlehost/2-services/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-singlehost/2-services/basic.yaml b/qa/suites/orch/cephadm/smoke-singlehost/2-services/basic.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-singlehost/2-services/basic.yaml diff --git a/qa/suites/orch/cephadm/smoke-singlehost/2-services/rgw.yaml b/qa/suites/orch/cephadm/smoke-singlehost/2-services/rgw.yaml new file mode 100644 index 000000000..cb2c6f4b6 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-singlehost/2-services/rgw.yaml @@ -0,0 +1,12 @@ +tasks: +- cephadm.apply: + specs: + - service_type: rgw + service_id: foo + placement: + count_per_host: 4 + host_pattern: "*" + spec: + rgw_frontend_port: 8000 +- cephadm.wait_for_service: + service: rgw.foo diff --git a/qa/suites/orch/cephadm/smoke-singlehost/3-final.yaml b/qa/suites/orch/cephadm/smoke-singlehost/3-final.yaml new file mode 100644 index 000000000..02f5b289c --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-singlehost/3-final.yaml @@ -0,0 +1,8 @@ +tasks: +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls diff --git a/qa/suites/orch/cephadm/smoke-small/% b/qa/suites/orch/cephadm/smoke-small/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-small/% diff --git a/qa/suites/orch/cephadm/smoke-small/.qa b/qa/suites/orch/cephadm/smoke-small/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-small/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-small/0-distro/centos_8.stream_container_tools_crun.yaml b/qa/suites/orch/cephadm/smoke-small/0-distro/centos_8.stream_container_tools_crun.yaml new file mode 120000 index 000000000..83fe02026 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-small/0-distro/centos_8.stream_container_tools_crun.yaml @@ -0,0 +1 @@ +../.qa/distros/container-hosts/centos_8.stream_container_tools_crun.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-small/0-nvme-loop.yaml b/qa/suites/orch/cephadm/smoke-small/0-nvme-loop.yaml new file mode 120000 index 000000000..5206b6edd --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-small/0-nvme-loop.yaml @@ -0,0 +1 @@ +.qa/overrides/nvme_loop.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-small/agent/.qa b/qa/suites/orch/cephadm/smoke-small/agent/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-small/agent/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-small/agent/off.yaml b/qa/suites/orch/cephadm/smoke-small/agent/off.yaml new file mode 100644 index 000000000..f37c651bc --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-small/agent/off.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mgr: + mgr/cephadm/use_agent: false diff --git a/qa/suites/orch/cephadm/smoke-small/agent/on.yaml b/qa/suites/orch/cephadm/smoke-small/agent/on.yaml new file mode 100644 index 000000000..90ac298b6 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-small/agent/on.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mgr: + mgr/cephadm/use_agent: true diff --git a/qa/suites/orch/cephadm/smoke-small/fixed-2.yaml b/qa/suites/orch/cephadm/smoke-small/fixed-2.yaml new file mode 100644 index 000000000..61090a165 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-small/fixed-2.yaml @@ -0,0 +1,29 @@ +roles: +- - mon.a + - mgr.y + - osd.0 + - client.0 + - ceph.rgw.foo.a + - node-exporter.a + - alertmanager.a +- - mon.b + - mgr.x + - osd.1 + - client.1 + - prometheus.a + - grafana.a + - node-exporter.b +- - mon.c + - mgr.z + - osd.2 + - client.2 + - node-exporter.c +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/orch/cephadm/smoke-small/mon_election b/qa/suites/orch/cephadm/smoke-small/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-small/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke-small/start.yaml b/qa/suites/orch/cephadm/smoke-small/start.yaml new file mode 100644 index 000000000..77f493ca1 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke-small/start.yaml @@ -0,0 +1,16 @@ +tasks: +- cephadm: + conf: + mgr: + debug ms: 1 + debug mgr: 20 +- cephadm.shell: + mon.a: + - stat -c '%u %g' /var/log/ceph | grep '167 167' + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls + - ceph orch ls --format yaml + - ceph orch ls | grep '^osd ' diff --git a/qa/suites/orch/cephadm/smoke/% b/qa/suites/orch/cephadm/smoke/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/smoke/% diff --git a/qa/suites/orch/cephadm/smoke/.qa b/qa/suites/orch/cephadm/smoke/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke/0-distro b/qa/suites/orch/cephadm/smoke/0-distro new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/orch/cephadm/smoke/0-distro @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke/0-nvme-loop.yaml b/qa/suites/orch/cephadm/smoke/0-nvme-loop.yaml new file mode 120000 index 000000000..5206b6edd --- /dev/null +++ b/qa/suites/orch/cephadm/smoke/0-nvme-loop.yaml @@ -0,0 +1 @@ +.qa/overrides/nvme_loop.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke/agent/.qa b/qa/suites/orch/cephadm/smoke/agent/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke/agent/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke/agent/off.yaml b/qa/suites/orch/cephadm/smoke/agent/off.yaml new file mode 100644 index 000000000..f37c651bc --- /dev/null +++ b/qa/suites/orch/cephadm/smoke/agent/off.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mgr: + mgr/cephadm/use_agent: false diff --git a/qa/suites/orch/cephadm/smoke/agent/on.yaml b/qa/suites/orch/cephadm/smoke/agent/on.yaml new file mode 100644 index 000000000..90ac298b6 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke/agent/on.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + mgr: + mgr/cephadm/use_agent: true diff --git a/qa/suites/orch/cephadm/smoke/fixed-2.yaml b/qa/suites/orch/cephadm/smoke/fixed-2.yaml new file mode 100644 index 000000000..e93564aa9 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke/fixed-2.yaml @@ -0,0 +1,32 @@ +roles: +- - mon.a + - mon.c + - mgr.y + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 + - ceph.rgw.foo.a + - node-exporter.a + - alertmanager.a +- - mon.b + - mgr.x + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - client.1 + - prometheus.a + - grafana.a + - node-exporter.b + - ceph.iscsi.iscsi.a +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/orch/cephadm/smoke/mon_election b/qa/suites/orch/cephadm/smoke/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/smoke/start.yaml b/qa/suites/orch/cephadm/smoke/start.yaml new file mode 100644 index 000000000..77f493ca1 --- /dev/null +++ b/qa/suites/orch/cephadm/smoke/start.yaml @@ -0,0 +1,16 @@ +tasks: +- cephadm: + conf: + mgr: + debug ms: 1 + debug mgr: 20 +- cephadm.shell: + mon.a: + - stat -c '%u %g' /var/log/ceph | grep '167 167' + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls + - ceph orch ls --format yaml + - ceph orch ls | grep '^osd ' diff --git a/qa/suites/orch/cephadm/thrash/% b/qa/suites/orch/cephadm/thrash/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/% diff --git a/qa/suites/orch/cephadm/thrash/.qa b/qa/suites/orch/cephadm/thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/thrash/0-distro b/qa/suites/orch/cephadm/thrash/0-distro new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/0-distro @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/thrash/1-start.yaml b/qa/suites/orch/cephadm/thrash/1-start.yaml new file mode 100644 index 000000000..a1b89e44d --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/1-start.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- cephadm: + conf: + mgr: + debug ms: 1 + debug mgr: 20 diff --git a/qa/suites/orch/cephadm/thrash/2-thrash.yaml b/qa/suites/orch/cephadm/thrash/2-thrash.yaml new file mode 100644 index 000000000..05e0f8e76 --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/2-thrash.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 + osd delete sleep: 1 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 diff --git a/qa/suites/orch/cephadm/thrash/3-tasks/.qa b/qa/suites/orch/cephadm/thrash/3-tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/3-tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/thrash/3-tasks/rados_api_tests.yaml b/qa/suites/orch/cephadm/thrash/3-tasks/rados_api_tests.yaml new file mode 120000 index 000000000..34e657e04 --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/3-tasks/rados_api_tests.yaml @@ -0,0 +1 @@ +.qa/suites/rados/thrash/workloads/rados_api_tests.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/thrash/3-tasks/radosbench.yaml b/qa/suites/orch/cephadm/thrash/3-tasks/radosbench.yaml new file mode 120000 index 000000000..dad17e0de --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/3-tasks/radosbench.yaml @@ -0,0 +1 @@ +.qa/suites/rados/thrash/workloads/radosbench.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/thrash/3-tasks/small-objects.yaml b/qa/suites/orch/cephadm/thrash/3-tasks/small-objects.yaml new file mode 120000 index 000000000..6aa66aa37 --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/3-tasks/small-objects.yaml @@ -0,0 +1 @@ +.qa/suites/rados/thrash/workloads/small-objects.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/thrash/3-tasks/snaps-few-objects.yaml b/qa/suites/orch/cephadm/thrash/3-tasks/snaps-few-objects.yaml new file mode 120000 index 000000000..c9cc4cd3e --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/3-tasks/snaps-few-objects.yaml @@ -0,0 +1 @@ +.qa/suites/rados/thrash/workloads/snaps-few-objects.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/thrash/fixed-2.yaml b/qa/suites/orch/cephadm/thrash/fixed-2.yaml new file mode 120000 index 000000000..5c3e0593c --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/fixed-2.yaml @@ -0,0 +1 @@ +../smoke/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/thrash/msgr b/qa/suites/orch/cephadm/thrash/msgr new file mode 120000 index 000000000..57bee80db --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/msgr @@ -0,0 +1 @@ +.qa/msgr
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/thrash/root.yaml b/qa/suites/orch/cephadm/thrash/root.yaml new file mode 100644 index 000000000..bedb31d5d --- /dev/null +++ b/qa/suites/orch/cephadm/thrash/root.yaml @@ -0,0 +1,3 @@ +overrides: + cephadm: + cephadm_mode: root diff --git a/qa/suites/orch/cephadm/upgrade/% b/qa/suites/orch/cephadm/upgrade/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/% diff --git a/qa/suites/orch/cephadm/upgrade/.qa b/qa/suites/orch/cephadm/upgrade/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/upgrade/1-start-distro/.qa b/qa/suites/orch/cephadm/upgrade/1-start-distro/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/1-start-distro/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-centos_8.stream_container-tools.yaml b/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-centos_8.stream_container-tools.yaml new file mode 100644 index 000000000..bb9a220e6 --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-centos_8.stream_container-tools.yaml @@ -0,0 +1,39 @@ +os_type: centos +os_version: "8.stream" + +tasks: +- pexec: + all: + - sudo cp /etc/containers/registries.conf /etc/containers/registries.conf.backup + - sudo dnf -y module reset container-tools + - sudo dnf -y module install container-tools + - sudo cp /etc/containers/registries.conf.backup /etc/containers/registries.conf +- cephadm: + image: quay.io/ceph/ceph:v16.2.0 + cephadm_branch: v16.2.0 + cephadm_git_url: https://github.com/ceph/ceph + # avoid --cap-add=PTRACE + --privileged for older cephadm versions + allow_ptrace: false + avoid_pacific_features: true + +roles: +- - mon.a + - mon.c + - mgr.y + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 + - node-exporter.a + - alertmanager.a +- - mon.b + - mgr.x + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - client.1 + - prometheus.a + - grafana.a + - node-exporter.b diff --git a/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml b/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml new file mode 100644 index 000000000..d3d9de83e --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/1-start-distro/1-start-ubuntu_20.04.yaml @@ -0,0 +1,33 @@ +os_type: ubuntu +os_version: "20.04" + +tasks: +- cephadm: + image: quay.io/ceph/ceph:v16.2.0 + cephadm_branch: v16.2.0 + cephadm_git_url: https://github.com/ceph/ceph + # avoid --cap-add=PTRACE + --privileged for older cephadm versions + allow_ptrace: false + avoid_pacific_features: true + +roles: +- - mon.a + - mon.c + - mgr.y + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 + - node-exporter.a + - alertmanager.a +- - mon.b + - mgr.x + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - client.1 + - prometheus.a + - grafana.a + - node-exporter.b diff --git a/qa/suites/orch/cephadm/upgrade/2-repo_digest/.qa b/qa/suites/orch/cephadm/upgrade/2-repo_digest/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/2-repo_digest/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/upgrade/2-repo_digest/defaut.yaml b/qa/suites/orch/cephadm/upgrade/2-repo_digest/defaut.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/2-repo_digest/defaut.yaml diff --git a/qa/suites/orch/cephadm/upgrade/2-repo_digest/repo_digest.yaml b/qa/suites/orch/cephadm/upgrade/2-repo_digest/repo_digest.yaml new file mode 100644 index 000000000..2e6bbfd92 --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/2-repo_digest/repo_digest.yaml @@ -0,0 +1,4 @@ +tasks: +- cephadm.shell: + mon.a: + - ceph config set mgr mgr/cephadm/use_repo_digest false --force diff --git a/qa/suites/orch/cephadm/upgrade/3-upgrade/.qa b/qa/suites/orch/cephadm/upgrade/3-upgrade/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/3-upgrade/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/upgrade/3-upgrade/simple.yaml b/qa/suites/orch/cephadm/upgrade/3-upgrade/simple.yaml new file mode 100644 index 000000000..f10a49bea --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/3-upgrade/simple.yaml @@ -0,0 +1,21 @@ +tasks: +- cephadm.shell: + env: [sha1] + mon.a: + # setup rgw + - radosgw-admin realm create --rgw-realm=r --default + - radosgw-admin zonegroup create --rgw-zonegroup=default --master --default + - radosgw-admin zone create --rgw-zonegroup=default --rgw-zone=z --master --default + - radosgw-admin period update --rgw-realm=r --commit + - ceph orch apply rgw foo --realm r --zone z --placement=2 --port=8000 + # simple rgw spec (will have no "spec" field) to make sure that works with rgw spec migration + - ceph orch apply rgw smpl + # setup iscsi + - ceph osd pool create foo + - rbd pool init foo + - ceph orch apply iscsi foo u p + - sleep 120 + - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force + - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force + - ceph config set global log_to_journald false --force + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 diff --git a/qa/suites/orch/cephadm/upgrade/3-upgrade/staggered.yaml b/qa/suites/orch/cephadm/upgrade/3-upgrade/staggered.yaml new file mode 100644 index 000000000..280714e4e --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/3-upgrade/staggered.yaml @@ -0,0 +1,132 @@ +tasks: +- cephadm.shell: + env: [sha1] + mon.a: + # setup rgw + - radosgw-admin realm create --rgw-realm=r --default + - radosgw-admin zonegroup create --rgw-zonegroup=default --master --default + - radosgw-admin zone create --rgw-zonegroup=default --rgw-zone=z --master --default + - radosgw-admin period update --rgw-realm=r --commit + - ceph orch apply rgw foo --realm r --zone z --placement=2 --port=8000 + # setup iscsi + - ceph osd pool create foo + - rbd pool init foo + - ceph orch apply iscsi foo u p + - sleep 180 + - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force + - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force + - ceph config set global log_to_journald false --force + # get some good info on the state of things pre-upgrade. Useful for debugging + - ceph orch ps + - ceph versions + - ceph -s + - ceph orch ls + # doing staggered upgrade requires mgr daemons being on a version that contains the staggered upgrade code + # until there is a stable version that contains it, we can test by manually upgrading a mgr daemon + - ceph orch daemon redeploy "mgr.$(ceph mgr dump -f json | jq .standbys | jq .[] | jq -r .name)" --image quay.ceph.io/ceph-ci/ceph:$sha1 + - ceph orch ps --refresh + - sleep 180 + # gather more possible debugging info + - ceph orch ps + - ceph versions + - ceph -s + - ceph health detail + # check that there are two different versions found for mgr daemon (which implies we upgraded one) + - ceph versions | jq -e '.mgr | length == 2' + - ceph mgr fail + - sleep 180 + # now try upgrading the other mgr + - ceph orch daemon redeploy "mgr.$(ceph mgr dump -f json | jq .standbys | jq .[] | jq -r .name)" --image quay.ceph.io/ceph-ci/ceph:$sha1 + - ceph orch ps --refresh + - sleep 180 + # gather more possible debugging info + - ceph orch ps + - ceph versions + - ceph health detail + - ceph -s + - ceph mgr fail + - sleep 180 + # gather more debugging info + - ceph orch ps + - ceph versions + - ceph -s + - ceph health detail + # now that both mgrs should have been redeployed with the new version, we should be back on only 1 version for the mgrs + - ceph versions | jq -e '.mgr | length == 1' + - ceph mgr fail + - sleep 180 + # debugging info + - ceph orch ps + - ceph orch ls + - ceph versions + # to make sure mgr daemons upgrade is fully completed, including being deployed by a mgr on a new version + # also serves as an early failure if manually upgrading the mgrs failed as --daemon-types won't be recognized + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mgr + - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done + # verify only one version found for mgrs and that their version hash matches what we are upgrading to + - ceph versions | jq -e '.mgr | length == 1' + - ceph versions | jq -e '.mgr | keys' | grep $sha1 + # verify overall we still see two versions, basically to make sure --daemon-types wasn't ignored and all daemons upgraded + - ceph versions | jq -e '.overall | length == 2' + # check that exactly two daemons have been upgraded to the new image (our 2 mgr daemons) + - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 2' + - ceph orch upgrade status + - ceph health detail + # upgrade only the mons on one of the two hosts + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mon --hosts $(ceph orch ps | grep mgr.x | awk '{print $2}') + - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done + - ceph orch ps + # verify two different version seen for mons + - ceph versions | jq -e '.mon | length == 2' + - ceph orch upgrade status + - ceph health detail + # upgrade mons on the other hosts + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mon --hosts $(ceph orch ps | grep mgr.y | awk '{print $2}') + - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done + - ceph orch ps + # verify all mons now on same version and version hash matches what we are upgrading to + - ceph versions | jq -e '.mon | length == 1' + - ceph versions | jq -e '.mon | keys' | grep $sha1 + # verify exactly 5 daemons are now upgraded (2 mgrs, 3 mons) + - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 5' + - ceph orch upgrade status + - ceph health detail + # upgrade exactly 2 osd daemons + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types osd --limit 2 + - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done + - ceph orch ps + # verify two different versions now seen for osds + - ceph versions | jq -e '.osd | length == 2' + # verify exactly 7 daemons have been upgraded (2 mgrs, 3 mons, 2 osds) + - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 7' + - ceph orch upgrade status + - ceph health detail + # upgrade one more osd + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types crash,osd --limit 1 + - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done + - ceph orch ps + - ceph versions | jq -e '.osd | length == 2' + # verify now 8 daemons have been upgraded + - ceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e '.up_to_date | length == 8' + # upgrade the rest of the osds + - ceph orch upgrade status + - ceph health detail + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types crash,osd + - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done + - ceph orch ps + # verify all osds are now on same version and version hash matches what we are upgrading to + - ceph versions | jq -e '.osd | length == 1' + - ceph versions | jq -e '.osd | keys' | grep $sha1 + - ceph orch upgrade status + - ceph health detail + # upgrade the rgw daemons using --services + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --services rgw.foo + - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done + - ceph orch ps + # verify all rgw daemons on same version and version hash matches what we are upgrading to + - ceph versions | jq -e '.rgw | length == 1' + - ceph versions | jq -e '.rgw | keys' | grep $sha1 + - ceph orch upgrade status + - ceph health detail + # run upgrade one more time with no filter parameters to make sure anything left gets upgraded + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 diff --git a/qa/suites/orch/cephadm/upgrade/4-wait.yaml b/qa/suites/orch/cephadm/upgrade/4-wait.yaml new file mode 100644 index 000000000..4010c58ed --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/4-wait.yaml @@ -0,0 +1,16 @@ +tasks: +- cephadm.shell: + env: [sha1] + mon.a: + - while ceph orch upgrade status | jq '.in_progress' | grep true && ! ceph orch upgrade status | jq '.message' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; ceph health detail ; sleep 30 ; done + - ceph orch ps + - ceph versions + - echo "wait for servicemap items w/ changing names to refresh" + - sleep 60 + - ceph orch ps + - ceph versions + - ceph orch upgrade status + - ceph health detail + - ceph versions | jq -e '.overall | length == 1' + - ceph versions | jq -e '.overall | keys' | grep $sha1 + - ceph orch ls | grep '^osd ' diff --git a/qa/suites/orch/cephadm/upgrade/5-upgrade-ls.yaml b/qa/suites/orch/cephadm/upgrade/5-upgrade-ls.yaml new file mode 100644 index 000000000..799458bc5 --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/5-upgrade-ls.yaml @@ -0,0 +1,6 @@ +tasks: +- cephadm.shell: + mon.a: + - ceph orch upgrade ls + - ceph orch upgrade ls --image quay.io/ceph/ceph --show-all-versions | grep 16.2.0 + - ceph orch upgrade ls --image quay.io/ceph/ceph --tags | grep v16.2.2 diff --git a/qa/suites/orch/cephadm/upgrade/agent b/qa/suites/orch/cephadm/upgrade/agent new file mode 120000 index 000000000..154924209 --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/agent @@ -0,0 +1 @@ +../smoke/agent
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/upgrade/mon_election b/qa/suites/orch/cephadm/upgrade/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/orch/cephadm/upgrade/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/with-work/% b/qa/suites/orch/cephadm/with-work/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/% diff --git a/qa/suites/orch/cephadm/with-work/.qa b/qa/suites/orch/cephadm/with-work/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/with-work/0-distro b/qa/suites/orch/cephadm/with-work/0-distro new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/0-distro @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/with-work/fixed-2.yaml b/qa/suites/orch/cephadm/with-work/fixed-2.yaml new file mode 120000 index 000000000..5c3e0593c --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/fixed-2.yaml @@ -0,0 +1 @@ +../smoke/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/with-work/mode/.qa b/qa/suites/orch/cephadm/with-work/mode/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/mode/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/with-work/mode/packaged.yaml b/qa/suites/orch/cephadm/with-work/mode/packaged.yaml new file mode 100644 index 000000000..ba8d43218 --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/mode/packaged.yaml @@ -0,0 +1,5 @@ +overrides: + cephadm: + cephadm_mode: cephadm-package + install: + extra_packages: [cephadm] diff --git a/qa/suites/orch/cephadm/with-work/mode/root.yaml b/qa/suites/orch/cephadm/with-work/mode/root.yaml new file mode 100644 index 000000000..bedb31d5d --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/mode/root.yaml @@ -0,0 +1,3 @@ +overrides: + cephadm: + cephadm_mode: root diff --git a/qa/suites/orch/cephadm/with-work/mon_election b/qa/suites/orch/cephadm/with-work/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/with-work/msgr b/qa/suites/orch/cephadm/with-work/msgr new file mode 120000 index 000000000..57bee80db --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/msgr @@ -0,0 +1 @@ +.qa/msgr
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/with-work/start.yaml b/qa/suites/orch/cephadm/with-work/start.yaml new file mode 100644 index 000000000..a1b89e44d --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/start.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- cephadm: + conf: + mgr: + debug ms: 1 + debug mgr: 20 diff --git a/qa/suites/orch/cephadm/with-work/tasks/.qa b/qa/suites/orch/cephadm/with-work/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/with-work/tasks/rados_api_tests.yaml b/qa/suites/orch/cephadm/with-work/tasks/rados_api_tests.yaml new file mode 120000 index 000000000..2ce80f969 --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/tasks/rados_api_tests.yaml @@ -0,0 +1 @@ +.qa/suites/rados/basic/tasks/rados_api_tests.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/with-work/tasks/rados_python.yaml b/qa/suites/orch/cephadm/with-work/tasks/rados_python.yaml new file mode 120000 index 000000000..210ad8f18 --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/tasks/rados_python.yaml @@ -0,0 +1 @@ +.qa/suites/rados/basic/tasks/rados_python.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/with-work/tasks/rotate-keys.yaml b/qa/suites/orch/cephadm/with-work/tasks/rotate-keys.yaml new file mode 100644 index 000000000..5b91c6ed3 --- /dev/null +++ b/qa/suites/orch/cephadm/with-work/tasks/rotate-keys.yaml @@ -0,0 +1,16 @@ +tasks: +- cephadm.shell: + mon.a: + - | + set -ex + for f in osd.0 osd.1 osd.2 osd.3 osd.4 osd.5 osd.6 osd.7 mgr.y mgr.x + do + echo "rotating key for $f" + K=$(ceph auth get-key $f) + NK="$K" + ceph orch daemon rotate-key $f + while [ "$K" == "$NK" ]; do + sleep 5 + NK=$(ceph auth get-key $f) + done + done diff --git a/qa/suites/orch/cephadm/workunits/% b/qa/suites/orch/cephadm/workunits/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/% diff --git a/qa/suites/orch/cephadm/workunits/.qa b/qa/suites/orch/cephadm/workunits/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/workunits/0-distro b/qa/suites/orch/cephadm/workunits/0-distro new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/0-distro @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/workunits/agent b/qa/suites/orch/cephadm/workunits/agent new file mode 120000 index 000000000..154924209 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/agent @@ -0,0 +1 @@ +../smoke/agent
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/workunits/mon_election b/qa/suites/orch/cephadm/workunits/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/workunits/task/.qa b/qa/suites/orch/cephadm/workunits/task/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/workunits/task/test_adoption.yaml b/qa/suites/orch/cephadm/workunits/task/test_adoption.yaml new file mode 100644 index 000000000..e04fc1eea --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_adoption.yaml @@ -0,0 +1,11 @@ +roles: +- [mon.a, mgr.x, osd.0, client.0] +tasks: +- install: +- exec: + mon.a: + - yum install -y python3 || apt install -y python3 +- workunit: + clients: + client.0: + - cephadm/test_adoption.sh diff --git a/qa/suites/orch/cephadm/workunits/task/test_ca_signed_key.yaml b/qa/suites/orch/cephadm/workunits/task/test_ca_signed_key.yaml new file mode 100644 index 000000000..7bf51f719 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_ca_signed_key.yaml @@ -0,0 +1,31 @@ +roles: +- - host.a + - mon.a + - mgr.a + - osd.0 + - client.0 +- - host.b + - mon.b + - mgr.b + - osd.1 + - client.1 +overrides: + cephadm: + use-ca-signed-key: True +tasks: +- install: +- cephadm: +- cephadm.shell: + host.a: + - | + set -ex + HOSTNAMES=$(ceph orch host ls --format json | jq -r '.[] | .hostname') + for host in $HOSTNAMES; do + # do a check-host on each host to make sure it's reachable + ceph cephadm check-host ${host} 2> ${host}-ok.txt + HOST_OK=$(cat ${host}-ok.txt) + if ! grep -q "Host looks OK" <<< "$HOST_OK"; then + printf "Failed host check:\n\n$HOST_OK" + exit 1 + fi + done diff --git a/qa/suites/orch/cephadm/workunits/task/test_cephadm.yaml b/qa/suites/orch/cephadm/workunits/task/test_cephadm.yaml new file mode 100644 index 000000000..4d253517c --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_cephadm.yaml @@ -0,0 +1,11 @@ +roles: +- [mon.a, mgr.x, osd.0, client.0] +tasks: +- install: +- exec: + mon.a: + - yum install -y python3 || apt install -y python3 +- workunit: + clients: + client.0: + - cephadm/test_cephadm.sh diff --git a/qa/suites/orch/cephadm/workunits/task/test_cephadm_repos.yaml b/qa/suites/orch/cephadm/workunits/task/test_cephadm_repos.yaml new file mode 100644 index 000000000..4a1ac88de --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_cephadm_repos.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, mgr.x, osd.0, client.0] +tasks: +- workunit: + no_coverage_and_limits: true + clients: + client.0: + - cephadm/test_repos.sh diff --git a/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/+ b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/+ diff --git a/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/.qa b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/centos_8.stream_container_tools.yaml b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/centos_8.stream_container_tools.yaml new file mode 120000 index 000000000..7a86f967f --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/centos_8.stream_container_tools.yaml @@ -0,0 +1 @@ +.qa/distros/podman/centos_8.stream_container_tools.yaml
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/test_iscsi_container.yaml b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/test_iscsi_container.yaml new file mode 100644 index 000000000..19d302c87 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_iscsi_container/test_iscsi_container.yaml @@ -0,0 +1,21 @@ +roles: +- - host.a + - osd.0 + - osd.1 + - osd.2 + - mon.a + - mgr.a + - client.0 +tasks: +- install: +- cephadm: +- cephadm.shell: + host.a: + - ceph osd pool create foo + - rbd pool init foo + - ceph orch apply iscsi foo u p +- workunit: + clients: + client.0: + - cephadm/test_iscsi_pids_limit.sh + - cephadm/test_iscsi_etc_hosts.sh diff --git a/qa/suites/orch/cephadm/workunits/task/test_orch_cli.yaml b/qa/suites/orch/cephadm/workunits/task/test_orch_cli.yaml new file mode 100644 index 000000000..ec65fb116 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_orch_cli.yaml @@ -0,0 +1,17 @@ +roles: +- - host.a + - osd.0 + - osd.1 + - osd.2 + - mon.a + - mgr.a + - client.0 +tasks: +- install: +- cephadm: +- cephadm.shell: + host.a: + - ceph orch apply mds a +- cephfs_test_runner: + modules: + - tasks.cephadm_cases.test_cli diff --git a/qa/suites/orch/cephadm/workunits/task/test_orch_cli_mon.yaml b/qa/suites/orch/cephadm/workunits/task/test_orch_cli_mon.yaml new file mode 100644 index 000000000..2a33dc839 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_orch_cli_mon.yaml @@ -0,0 +1,45 @@ +roles: +- - host.a + - osd.0 + - osd.1 + - osd.2 + - mon.a + - mgr.a + - client.0 +- - host.b + - osd.3 + - osd.4 + - osd.5 + - mon.b + - mgr.b + - client.1 +- - host.c + - osd.6 + - osd.7 + - osd.8 + - mon.c + - mgr.c + - client.2 +- - host.d + - osd.9 + - osd.10 + - osd.11 + - mon.d + - mgr.d + - client.3 +- - host.e + - osd.12 + - osd.13 + - osd.14 + - mon.e + - mgr.e + - client.4 +tasks: +- install: +- cephadm: +- cephadm.shell: + host.a: + - ceph orch apply mds a +- cephfs_test_runner: + modules: + - tasks.cephadm_cases.test_cli_mon diff --git a/qa/suites/orch/cephadm/workunits/task/test_rgw_multisite.yaml b/qa/suites/orch/cephadm/workunits/task/test_rgw_multisite.yaml new file mode 100644 index 000000000..976e3730c --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_rgw_multisite.yaml @@ -0,0 +1,40 @@ +roles: +- - host.a + - mon.a + - mgr.a + - osd.0 +- - host.b + - mon.b + - mgr.b + - osd.1 +- - host.c + - mon.c + - osd.2 +tasks: +- install: +- cephadm: +- cephadm.shell: + host.a: + - ceph mgr module enable rgw +- rgw_module.apply: + specs: + - rgw_realm: myrealm1 + rgw_zonegroup: myzonegroup1 + rgw_zone: myzone1 + spec: + rgw_frontend_port: 5500 +- cephadm.shell: + host.a: + - | + set -e + set -x + while true; do TOKEN=$(ceph rgw realm tokens | jq -r '.[0].token'); echo $TOKEN; if [ "$TOKEN" != "master zone has no endpoint" ]; then break; fi; sleep 5; done + TOKENS=$(ceph rgw realm tokens) + echo $TOKENS | jq --exit-status '.[0].realm == "myrealm1"' + echo $TOKENS | jq --exit-status '.[0].token' + TOKEN_JSON=$(ceph rgw realm tokens | jq -r '.[0].token' | base64 --decode) + echo $TOKEN_JSON | jq --exit-status '.realm_name == "myrealm1"' + echo $TOKEN_JSON | jq --exit-status '.endpoint | test("http://.+:\\d+")' + echo $TOKEN_JSON | jq --exit-status '.realm_id | test("^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$")' + echo $TOKEN_JSON | jq --exit-status '.access_key' + echo $TOKEN_JSON | jq --exit-status '.secret' diff --git a/qa/suites/orch/cephadm/workunits/task/test_set_mon_crush_locations.yaml b/qa/suites/orch/cephadm/workunits/task/test_set_mon_crush_locations.yaml new file mode 100644 index 000000000..6d9bd1525 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_set_mon_crush_locations.yaml @@ -0,0 +1,62 @@ +roles: +- - host.a + - osd.0 + - mon.a + - mgr.a +- - host.b + - osd.1 + - mon.b + - mgr.b +- - host.c + - osd.2 + - mon.c +tasks: +- install: +- cephadm: +- cephadm.apply: + specs: + - service_type: mon + service_id: foo + placement: + count: 3 + spec: + crush_locations: + host.a: + - datacenter=a + host.b: + - datacenter=b + - rack=2 + host.c: + - datacenter=a + - rack=3 +- cephadm.shell: + host.a: + - | + set -ex + # since we don't know the real hostnames before the test, the next + # bit is in order to replace the fake hostnames "host.a/b/c" with + # the actual names cephadm knows the host by within the mon spec + ceph orch host ls --format json | jq -r '.[] | .hostname' > realnames + echo $'host.a\nhost.b\nhost.c' > fakenames + echo $'a\nb\nc' > mon_ids + echo $'{datacenter=a}\n{datacenter=b,rack=2}\n{datacenter=a,rack=3}' > crush_locs + ceph orch ls --service-name mon --export > mon.yaml + MONSPEC=`cat mon.yaml` + echo "$MONSPEC" + while read realname <&3 && read fakename <&4; do + MONSPEC="${MONSPEC//$fakename/$realname}" + done 3<realnames 4<fakenames + echo "$MONSPEC" > mon.yaml + cat mon.yaml + # now the spec should have the real hostnames, so let's re-apply + ceph orch apply -i mon.yaml + sleep 90 + ceph orch ps --refresh + ceph orch ls --service-name mon --export > mon.yaml; ceph orch apply -i mon.yaml + sleep 90 + ceph mon dump + ceph mon dump --format json + # verify all the crush locations got set from "ceph mon dump" output + while read monid <&3 && read crushloc <&4; do + ceph mon dump --format json | jq --arg monid "$monid" --arg crushloc "$crushloc" -e '.mons | .[] | select(.name == $monid) | .crush_location == $crushloc' + done 3<mon_ids 4<crush_locs diff --git a/qa/suites/orch/rook/.qa b/qa/suites/orch/rook/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/rook/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/rook/smoke/% b/qa/suites/orch/rook/smoke/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/rook/smoke/% diff --git a/qa/suites/orch/rook/smoke/.qa b/qa/suites/orch/rook/smoke/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/rook/smoke/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/rook/smoke/0-distro/.qa b/qa/suites/orch/rook/smoke/0-distro/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/rook/smoke/0-distro/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/rook/smoke/0-distro/ubuntu_20.04.yaml b/qa/suites/orch/rook/smoke/0-distro/ubuntu_20.04.yaml new file mode 120000 index 000000000..f62164f91 --- /dev/null +++ b/qa/suites/orch/rook/smoke/0-distro/ubuntu_20.04.yaml @@ -0,0 +1 @@ +.qa/distros/container-hosts/ubuntu_20.04.yaml
\ No newline at end of file diff --git a/qa/suites/orch/rook/smoke/0-kubeadm.yaml b/qa/suites/orch/rook/smoke/0-kubeadm.yaml new file mode 100644 index 000000000..33915f571 --- /dev/null +++ b/qa/suites/orch/rook/smoke/0-kubeadm.yaml @@ -0,0 +1,2 @@ +tasks: +- kubeadm: diff --git a/qa/suites/orch/rook/smoke/0-nvme-loop.yaml b/qa/suites/orch/rook/smoke/0-nvme-loop.yaml new file mode 120000 index 000000000..5206b6edd --- /dev/null +++ b/qa/suites/orch/rook/smoke/0-nvme-loop.yaml @@ -0,0 +1 @@ +.qa/overrides/nvme_loop.yaml
\ No newline at end of file diff --git a/qa/suites/orch/rook/smoke/1-rook.yaml b/qa/suites/orch/rook/smoke/1-rook.yaml new file mode 100644 index 000000000..8182845e9 --- /dev/null +++ b/qa/suites/orch/rook/smoke/1-rook.yaml @@ -0,0 +1,2 @@ +tasks: +- rook: diff --git a/qa/suites/orch/rook/smoke/2-workload/.qa b/qa/suites/orch/rook/smoke/2-workload/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/rook/smoke/2-workload/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/rook/smoke/2-workload/none.yaml b/qa/suites/orch/rook/smoke/2-workload/none.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/orch/rook/smoke/2-workload/none.yaml diff --git a/qa/suites/orch/rook/smoke/2-workload/radosbench.yaml b/qa/suites/orch/rook/smoke/2-workload/radosbench.yaml new file mode 100644 index 000000000..fd71605c8 --- /dev/null +++ b/qa/suites/orch/rook/smoke/2-workload/radosbench.yaml @@ -0,0 +1,5 @@ +tasks: +- install: + host.a: +- radosbench: + clients: [client.a] diff --git a/qa/suites/orch/rook/smoke/cluster/.qa b/qa/suites/orch/rook/smoke/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/rook/smoke/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/rook/smoke/cluster/1-node.yaml b/qa/suites/orch/rook/smoke/cluster/1-node.yaml new file mode 100644 index 000000000..d18510f6e --- /dev/null +++ b/qa/suites/orch/rook/smoke/cluster/1-node.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + osd crush chooseleaf type: 0 + +roles: +- - host.a + - client.a diff --git a/qa/suites/orch/rook/smoke/cluster/3-node.yaml b/qa/suites/orch/rook/smoke/cluster/3-node.yaml new file mode 100644 index 000000000..d79a9f786 --- /dev/null +++ b/qa/suites/orch/rook/smoke/cluster/3-node.yaml @@ -0,0 +1,7 @@ +roles: +- - host.a + - client.a +- - host.b + - client.b +- - host.c + - client.c diff --git a/qa/suites/orch/rook/smoke/k8s/.qa b/qa/suites/orch/rook/smoke/k8s/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/rook/smoke/k8s/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/rook/smoke/k8s/1.21.yaml b/qa/suites/orch/rook/smoke/k8s/1.21.yaml new file mode 100644 index 000000000..9e57a477f --- /dev/null +++ b/qa/suites/orch/rook/smoke/k8s/1.21.yaml @@ -0,0 +1,3 @@ +overrides: + kubeadm: + version: "1.21" diff --git a/qa/suites/orch/rook/smoke/net/.qa b/qa/suites/orch/rook/smoke/net/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/rook/smoke/net/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/rook/smoke/net/calico.yaml b/qa/suites/orch/rook/smoke/net/calico.yaml new file mode 100644 index 000000000..7e838c6c8 --- /dev/null +++ b/qa/suites/orch/rook/smoke/net/calico.yaml @@ -0,0 +1,3 @@ +overrides: + kubeadm: + pod_network: calico diff --git a/qa/suites/orch/rook/smoke/net/flannel.yaml b/qa/suites/orch/rook/smoke/net/flannel.yaml new file mode 100644 index 000000000..8a1a20691 --- /dev/null +++ b/qa/suites/orch/rook/smoke/net/flannel.yaml @@ -0,0 +1,3 @@ +overrides: + kubeadm: + pod_network: flannel diff --git a/qa/suites/orch/rook/smoke/net/host.yaml b/qa/suites/orch/rook/smoke/net/host.yaml new file mode 100644 index 000000000..d25725afd --- /dev/null +++ b/qa/suites/orch/rook/smoke/net/host.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + spec: + mon: + allowMultiplePerNode: false + network: + provider: host diff --git a/qa/suites/orch/rook/smoke/rook/.qa b/qa/suites/orch/rook/smoke/rook/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/rook/smoke/rook/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/rook/smoke/rook/1.7.2.yaml b/qa/suites/orch/rook/smoke/rook/1.7.2.yaml new file mode 100644 index 000000000..de96c5815 --- /dev/null +++ b/qa/suites/orch/rook/smoke/rook/1.7.2.yaml @@ -0,0 +1,4 @@ +overrides: + rook: + rook_image: rook/ceph:v1.7.2 + rook_branch: v1.7.2 diff --git a/qa/suites/orch/rook/smoke/rook/master.yaml b/qa/suites/orch/rook/smoke/rook/master.yaml new file mode 100644 index 000000000..72b1cec72 --- /dev/null +++ b/qa/suites/orch/rook/smoke/rook/master.yaml @@ -0,0 +1,3 @@ +overrides: + rook: + rook_image: rook/ceph:master diff --git a/qa/suites/perf-basic/% b/qa/suites/perf-basic/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/perf-basic/% diff --git a/qa/suites/perf-basic/.qa b/qa/suites/perf-basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/perf-basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/perf-basic/ceph.yaml b/qa/suites/perf-basic/ceph.yaml new file mode 100644 index 000000000..72f66cf5f --- /dev/null +++ b/qa/suites/perf-basic/ceph.yaml @@ -0,0 +1,23 @@ +meta: +- desc: | + perf-basic is a basic performance suite. + Must be run on bare-metal machines. + On VMs performance results will be inconsistent + and can't be compared across runs. + Run ceph on a single node. + Use xfs beneath the osds. + Setup rgw on client.0 + +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] +tasks: +- install: +- ceph: + fs: xfs + wait-for-scrub: false + log-ignorelist: + - \(PG_ + - \(OSD_ + - \(OBJECT_ + - overall HEALTH +- ssh_keys: diff --git a/qa/suites/perf-basic/objectstore/.qa b/qa/suites/perf-basic/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/perf-basic/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/perf-basic/objectstore/bluestore.yaml b/qa/suites/perf-basic/objectstore/bluestore.yaml new file mode 100644 index 000000000..699db42d7 --- /dev/null +++ b/qa/suites/perf-basic/objectstore/bluestore.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + fs: xfs + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + diff --git a/qa/suites/perf-basic/settings/.qa b/qa/suites/perf-basic/settings/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/perf-basic/settings/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/perf-basic/settings/optimized.yaml b/qa/suites/perf-basic/settings/optimized.yaml new file mode 100644 index 000000000..8b53498db --- /dev/null +++ b/qa/suites/perf-basic/settings/optimized.yaml @@ -0,0 +1,78 @@ +meta: +- desc: | + Use debug level 0/0 for performance tests. + +overrides: + ceph: + conf: + mon: + debug mon: "0/0" + debug ms: "0/0" + debug paxos: "0/0" + osd: + debug filestore: "0/0" + debug journal: "0/0" + debug ms: "0/0" + debug osd: "0/0" + global: + auth client required: none + auth cluster required: none + auth service required: none + auth supported: none + + debug lockdep: "0/0" + debug context: "0/0" + debug crush: "0/0" + debug mds: "0/0" + debug mds balancer: "0/0" + debug mds locker: "0/0" + debug mds log: "0/0" + debug mds log expire: "0/0" + debug mds migrator: "0/0" + debug buffer: "0/0" + debug timer: "0/0" + debug filer: "0/0" + debug striper: "0/0" + debug objecter: "0/0" + debug rados: "0/0" + debug rbd: "0/0" + debug rbd mirror: "0/0" + debug rbd replay: "0/0" + debug journaler: "0/0" + debug objectcacher: "0/0" + debug client: "0/0" + debug osd: "0/0" + debug optracker: "0/0" + debug objclass: "0/0" + debug filestore: "0/0" + debug journal: "0/0" + debug ms: "0/0" + debug mon: "0/0" + debug monc: "0/0" + debug paxos: "0/0" + debug tp: "0/0" + debug auth: "0/0" + debug crypto: "0/0" + debug finisher: "0/0" + debug heartbeatmap: "0/0" + debug perfcounter: "0/0" + debug rgw: "0/0" + debug rgw sync: "0/0" + debug civetweb: "0/0" + debug javaclient: "0/0" + debug asok: "0/0" + debug throttle: "0/0" + debug refs: "0/0" + debug compressor: "0/0" + debug bluestore: "0/0" + debug bluefs: "0/0" + debug bdev: "0/0" + debug kstore: "0/0" + debug rocksdb: "0/0" + debug leveldb: "0/0" + debug memdb: "0/0" + debug fuse: "0/0" + debug mgr: "0/0" + debug mgrc: "0/0" + debug dpdk: "0/0" + debug eventtrace: "0/0" diff --git a/qa/suites/perf-basic/ubuntu_latest.yaml b/qa/suites/perf-basic/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/perf-basic/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/perf-basic/workloads/.qa b/qa/suites/perf-basic/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/perf-basic/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/perf-basic/workloads/client_endpoint_rbd_4K_rand_write.yaml b/qa/suites/perf-basic/workloads/client_endpoint_rbd_4K_rand_write.yaml new file mode 100644 index 000000000..1e9832b06 --- /dev/null +++ b/qa/suites/perf-basic/workloads/client_endpoint_rbd_4K_rand_write.yaml @@ -0,0 +1,32 @@ +meta: +- desc: | + Run librbdfio benchmark using cbt client endpoint for rbd. + 4K randwrite workload. + +tasks: +- cbt: + benchmarks: + fio: + client_endpoints: 'fiotest' + op_size: [4096] + time: 300 + mode: ['randwrite'] + norandommap: True + size: 4096 + iodepth: [32] + osd_ra: [4096] + pool_profile: 'rbd' + log_avg_msec: 100 + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + rbd: + pg_size: 256 + pgp_size: 256 + replication: 3 + + client_endpoints: + fiotest: + driver: 'librbd' diff --git a/qa/suites/perf-basic/workloads/fio_4K_rand_write.yaml b/qa/suites/perf-basic/workloads/fio_4K_rand_write.yaml new file mode 100644 index 000000000..0b1c492b8 --- /dev/null +++ b/qa/suites/perf-basic/workloads/fio_4K_rand_write.yaml @@ -0,0 +1,29 @@ +meta: +- desc: | + Run librbdfio benchmark using cbt. + 4K randwrite workload. + +tasks: +- cbt: + benchmarks: + librbdfio: + op_size: [4096] + time: 300 + mode: ['randwrite'] + norandommap: True + vol_size: 4096 + procs_per_volume: [1] + volumes_per_client: [2] + iodepth: [32] + osd_ra: [4096] + pool_profile: 'rbd' + log_avg_msec: 100 + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + rbd: + pg_size: 256 + pgp_size: 256 + replication: 3 diff --git a/qa/suites/perf-basic/workloads/radosbench_4K_write.yaml b/qa/suites/perf-basic/workloads/radosbench_4K_write.yaml new file mode 100644 index 000000000..d0a825bf5 --- /dev/null +++ b/qa/suites/perf-basic/workloads/radosbench_4K_write.yaml @@ -0,0 +1,28 @@ +meta: +- desc: | + Run radosbench benchmark using cbt. + 4K write workload. + +tasks: +- cbt: + benchmarks: + radosbench: + concurrent_ops: 4 + concurrent_procs: 2 + op_size: [4096] + pool_monitoring_list: + - collectl + pool_profile: 'replicated' + run_monitoring_list: + - collectl + time: 300 + write_only: true + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + replicated: + pg_size: 256 + pgp_size: 256 + replication: 'replicated' diff --git a/qa/suites/powercycle/.qa b/qa/suites/powercycle/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/powercycle/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/powercycle/osd/% b/qa/suites/powercycle/osd/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/powercycle/osd/% diff --git a/qa/suites/powercycle/osd/.qa b/qa/suites/powercycle/osd/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/powercycle/osd/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/powercycle/osd/clusters/.qa b/qa/suites/powercycle/osd/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/powercycle/osd/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/powercycle/osd/clusters/3osd-1per-target.yaml b/qa/suites/powercycle/osd/clusters/3osd-1per-target.yaml new file mode 100644 index 000000000..2fbcd0180 --- /dev/null +++ b/qa/suites/powercycle/osd/clusters/3osd-1per-target.yaml @@ -0,0 +1,5 @@ +roles: +- [mon.a, mon.b, mon.c, mgr.x, mgr.y, mds.a, client.0] +- [osd.0] +- [osd.1] +- [osd.2] diff --git a/qa/suites/powercycle/osd/ignorelist_health.yaml b/qa/suites/powercycle/osd/ignorelist_health.yaml new file mode 100644 index 000000000..bce5e9588 --- /dev/null +++ b/qa/suites/powercycle/osd/ignorelist_health.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + log-ignorelist: + - \(MDS_TRIM\) + - \(MDS_SLOW_REQUEST\) + - MDS_SLOW_METADATA_IO + - Behind on trimming diff --git a/qa/suites/powercycle/osd/objectstore b/qa/suites/powercycle/osd/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/powercycle/osd/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/powercycle/osd/powercycle/.qa b/qa/suites/powercycle/osd/powercycle/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/powercycle/osd/powercycle/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/powercycle/osd/powercycle/default.yaml b/qa/suites/powercycle/osd/powercycle/default.yaml new file mode 100644 index 000000000..9e0ed4769 --- /dev/null +++ b/qa/suites/powercycle/osd/powercycle/default.yaml @@ -0,0 +1,26 @@ +tasks: +- install: + extra_system_packages: + deb: + - bison + - flex + - libelf-dev + - libssl-dev + - libaio-dev + - libtool-bin + - uuid-dev + - xfslibs-dev + rpm: + - bison + - flex + - elfutils-libelf-devel + - openssl-devel + - libaio-devel + - libtool + - libuuid-devel + - xfsprogs-devel +- ceph: +- thrashosds: + chance_down: 1.0 + powercycle: true + timeout: 600 diff --git a/qa/suites/powercycle/osd/supported-all-distro b/qa/suites/powercycle/osd/supported-all-distro new file mode 120000 index 000000000..ca82dde58 --- /dev/null +++ b/qa/suites/powercycle/osd/supported-all-distro @@ -0,0 +1 @@ +.qa/distros/supported-all-distro
\ No newline at end of file diff --git a/qa/suites/powercycle/osd/tasks/.qa b/qa/suites/powercycle/osd/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml b/qa/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml new file mode 100644 index 000000000..3b1a8920b --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/admin_socket_objecter_requests.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + client.0: + admin socket: /var/run/ceph/ceph-$name.asok +tasks: +- radosbench: + clients: [client.0] + time: 60 +- admin_socket: + client.0: + objecter_requests: + test: "http://git.ceph.com/?p={repo};a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}" diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml new file mode 100644 index 000000000..87f8f57cc --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_kernel_untar_build.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + conf: + client: + fuse_default_permissions: 0 +tasks: +- ceph-fuse: +- workunit: + timeout: 6h + clients: + all: + - kernel_untar_build.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml new file mode 100644 index 000000000..683d3f592 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_misc.yaml @@ -0,0 +1,7 @@ +tasks: +- ceph-fuse: +- workunit: + timeout: 6h + clients: + all: + - fs/misc diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml new file mode 100644 index 000000000..9f3fa7b18 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_ffsb.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + osd: + filestore flush min: 0 + mds: + debug ms: 1 + debug mds: 20 +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml new file mode 100644 index 000000000..5908d951b --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml new file mode 100644 index 000000000..94031518e --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsx.yaml @@ -0,0 +1,7 @@ +tasks: +- ceph-fuse: +- workunit: + timeout: 6h + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml new file mode 100644 index 000000000..2cbb03c77 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + conf: + global: + osd_pg_log_dups_tracked: 10000 + +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsync-tester.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml new file mode 100644 index 000000000..cb9e2e019 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_pjd.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + client: + fuse set user groups: true +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml new file mode 100644 index 000000000..f3efafa2e --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_truncate_delay.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + client: + ms_inject_delay_probability: 1 + ms_inject_delay_type: osd + ms_inject_delay_max: 5 + client_oc_max_dirty_age: 1 +tasks: +- ceph-fuse: +- exec: + client.0: + - dd if=/dev/zero of=./foo count=100 + - sleep 2 + - truncate --size 0 ./foo diff --git a/qa/suites/powercycle/osd/tasks/rados_api_tests.yaml b/qa/suites/powercycle/osd/tasks/rados_api_tests.yaml new file mode 100644 index 000000000..d2970908c --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/rados_api_tests.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + log-ignorelist: + - reached quota + - \(POOL_APP_NOT_ENABLED\) + - \(PG_AVAILABILITY\) + conf: + mon: + mon warn on pool no app: false + osd: + osd class load list: "*" + osd class default list: "*" +tasks: +- ceph-fuse: +- workunit: + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/powercycle/osd/tasks/radosbench.yaml b/qa/suites/powercycle/osd/tasks/radosbench.yaml new file mode 100644 index 000000000..91573f907 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/radosbench.yaml @@ -0,0 +1,38 @@ +tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 diff --git a/qa/suites/powercycle/osd/tasks/readwrite.yaml b/qa/suites/powercycle/osd/tasks/readwrite.yaml new file mode 100644 index 000000000..c53e52b08 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/readwrite.yaml @@ -0,0 +1,9 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/powercycle/osd/tasks/snaps-few-objects.yaml b/qa/suites/powercycle/osd/tasks/snaps-few-objects.yaml new file mode 100644 index 000000000..aa82d973a --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/powercycle/osd/tasks/snaps-many-objects.yaml b/qa/suites/powercycle/osd/tasks/snaps-many-objects.yaml new file mode 100644 index 000000000..1ffe4e148 --- /dev/null +++ b/qa/suites/powercycle/osd/tasks/snaps-many-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/powercycle/osd/thrashosds-health.yaml b/qa/suites/powercycle/osd/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/powercycle/osd/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rados/.qa b/qa/suites/rados/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/basic/% b/qa/suites/rados/basic/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/basic/% diff --git a/qa/suites/rados/basic/.qa b/qa/suites/rados/basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/basic/ceph.yaml b/qa/suites/rados/basic/ceph.yaml new file mode 100644 index 000000000..c12a671f0 --- /dev/null +++ b/qa/suites/rados/basic/ceph.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + conf: + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- install: + extra_system_packages: + rpm: + - sqlite-devel + deb: + - sqlite3 +- ceph: diff --git a/qa/suites/rados/basic/clusters/+ b/qa/suites/rados/basic/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/basic/clusters/+ diff --git a/qa/suites/rados/basic/clusters/.qa b/qa/suites/rados/basic/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/basic/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/basic/clusters/fixed-2.yaml b/qa/suites/rados/basic/clusters/fixed-2.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rados/basic/clusters/fixed-2.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rados/basic/clusters/openstack.yaml b/qa/suites/rados/basic/clusters/openstack.yaml new file mode 100644 index 000000000..e559d9126 --- /dev/null +++ b/qa/suites/rados/basic/clusters/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/suites/rados/basic/mon_election b/qa/suites/rados/basic/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/basic/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/basic/msgr b/qa/suites/rados/basic/msgr new file mode 120000 index 000000000..57bee80db --- /dev/null +++ b/qa/suites/rados/basic/msgr @@ -0,0 +1 @@ +.qa/msgr
\ No newline at end of file diff --git a/qa/suites/rados/basic/msgr-failures/.qa b/qa/suites/rados/basic/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/basic/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/basic/msgr-failures/few.yaml b/qa/suites/rados/basic/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/rados/basic/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/basic/msgr-failures/many.yaml b/qa/suites/rados/basic/msgr-failures/many.yaml new file mode 100644 index 000000000..075d959a7 --- /dev/null +++ b/qa/suites/rados/basic/msgr-failures/many.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 1500 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/basic/objectstore b/qa/suites/rados/basic/objectstore new file mode 120000 index 000000000..848c65f9e --- /dev/null +++ b/qa/suites/rados/basic/objectstore @@ -0,0 +1 @@ +.qa/objectstore_debug
\ No newline at end of file diff --git a/qa/suites/rados/basic/rados.yaml b/qa/suites/rados/basic/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/basic/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/basic/supported-random-distro$ b/qa/suites/rados/basic/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rados/basic/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/basic/tasks/.qa b/qa/suites/rados/basic/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/basic/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/basic/tasks/libcephsqlite.yaml b/qa/suites/rados/basic/tasks/libcephsqlite.yaml new file mode 100644 index 000000000..12498fb15 --- /dev/null +++ b/qa/suites/rados/basic/tasks/libcephsqlite.yaml @@ -0,0 +1,24 @@ +overrides: + ceph: + conf: + client: + debug ms: 1 + debug client: 20 + debug cephsqlite: 20 + log-ignorelist: + - POOL_APP_NOT_ENABLED + - do not have an application enabled +tasks: +- exec: + client.0: + - ceph osd pool create cephsqlite + - ceph auth get-or-create client.libcephsqlite mon 'profile simple-rados-client-with-blocklist' osd 'allow rwx pool=cephsqlite' >> /etc/ceph/ceph.keyring +- exec: + client.0: + - ceph_test_libcephsqlite --id libcephsqlite --no-log-to-stderr +- workunit: + clients: + client.0: + - rados/test_libcephsqlite.sh cephsqlite + env: + CEPH_ARGS: --id libcephsqlite --no-log-to-stderr diff --git a/qa/suites/rados/basic/tasks/rados_api_tests.yaml b/qa/suites/rados/basic/tasks/rados_api_tests.yaml new file mode 100644 index 000000000..f765663a3 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_api_tests.yaml @@ -0,0 +1,28 @@ +overrides: + ceph: + log-ignorelist: + - reached quota + - but it is still running + - overall HEALTH_ + - \(POOL_FULL\) + - \(SMALLER_PGP_NUM\) + - \(CACHE_POOL_NO_HIT_SET\) + - \(CACHE_POOL_NEAR_FULL\) + - \(POOL_APP_NOT_ENABLED\) + - \(PG_AVAILABILITY\) + - \(PG_DEGRADED\) + conf: + client: + debug ms: 1 + mon: + mon warn on pool no app: false + osd: + osd class load list: "*" + osd class default list: "*" +tasks: +- workunit: + clients: + client.0: + - rados/test.sh + - rados/test_pool_quota.sh + diff --git a/qa/suites/rados/basic/tasks/rados_cls_all.yaml b/qa/suites/rados/basic/tasks/rados_cls_all.yaml new file mode 100644 index 000000000..8896ccb44 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_cls_all.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - \(PG_AVAILABILITY\) + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd_class_load_list: "*" + osd_class_default_list: "*" +tasks: +- workunit: + clients: + client.0: + - cls diff --git a/qa/suites/rados/basic/tasks/rados_python.yaml b/qa/suites/rados/basic/tasks/rados_python.yaml new file mode 100644 index 000000000..18e150c6c --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_python.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(PG_ + - \(OSD_ + - \(OBJECT_ + - \(POOL_APP_NOT_ENABLED\) + install: + ceph: + extra_system_packages: + rpm: + - python3-pytest + deb: + - python3-pytest +tasks: +- workunit: + timeout: 1h + clients: + client.0: + - rados/test_python.sh diff --git a/qa/suites/rados/basic/tasks/rados_stress_watch.yaml b/qa/suites/rados/basic/tasks/rados_stress_watch.yaml new file mode 100644 index 000000000..9ff0a4150 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_stress_watch.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - \(TOO_FEW_PGS\) + - \(POOL_APP_NOT_ENABLED\) +tasks: +- workunit: + clients: + client.0: + - rados/stress_watch.sh diff --git a/qa/suites/rados/basic/tasks/rados_striper.yaml b/qa/suites/rados/basic/tasks/rados_striper.yaml new file mode 100644 index 000000000..cafd824b0 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_striper.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- exec: + client.0: + - ceph_test_rados_striper_api_io + - ceph_test_rados_striper_api_aio + - ceph_test_rados_striper_api_striping + diff --git a/qa/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml new file mode 100644 index 000000000..53effb42d --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_big.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(POOL_APP_NOT_ENABLED\) +tasks: +- workunit: + clients: + all: + - rados/load-gen-big.sh diff --git a/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml new file mode 100644 index 000000000..847aedb21 --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mix.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(POOL_APP_NOT_ENABLED\) +tasks: +- workunit: + clients: + all: + - rados/load-gen-mix.sh diff --git a/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml new file mode 100644 index 000000000..b25392ffa --- /dev/null +++ b/qa/suites/rados/basic/tasks/rados_workunit_loadgen_mostlyread.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(POOL_APP_NOT_ENABLED\) +tasks: +- workunit: + clients: + all: + - rados/load-gen-mostlyread.sh diff --git a/qa/suites/rados/basic/tasks/readwrite.yaml b/qa/suites/rados/basic/tasks/readwrite.yaml new file mode 100644 index 000000000..fc13e67f0 --- /dev/null +++ b/qa/suites/rados/basic/tasks/readwrite.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + crush_tunables: optimal + conf: + mon: + mon osd initial require min compat client: luminous + osd: + osd_discard_disconnected_ops: false + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/rados/basic/tasks/repair_test.yaml b/qa/suites/rados/basic/tasks/repair_test.yaml new file mode 100644 index 000000000..383acc956 --- /dev/null +++ b/qa/suites/rados/basic/tasks/repair_test.yaml @@ -0,0 +1,32 @@ +overrides: + ceph: + wait-for-scrub: false + log-ignorelist: + - candidate had a stat error + - candidate had a read error + - deep-scrub 0 missing, 1 inconsistent objects + - deep-scrub 0 missing, 4 inconsistent objects + - deep-scrub [0-9]+ errors + - '!= omap_digest' + - '!= data_digest' + - repair 0 missing, 1 inconsistent objects + - repair 0 missing, 4 inconsistent objects + - repair [0-9]+ errors, [0-9]+ fixed + - scrub 0 missing, 1 inconsistent objects + - scrub [0-9]+ errors + - 'size 1 != size' + - attr name mismatch + - Regular scrub request, deep-scrub details will be lost + - candidate size [0-9]+ info size [0-9]+ mismatch + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + filestore debug inject read err: true + bluestore debug inject read err: true +tasks: +- repair_test: + diff --git a/qa/suites/rados/basic/tasks/scrub_test.yaml b/qa/suites/rados/basic/tasks/scrub_test.yaml new file mode 100644 index 000000000..424657343 --- /dev/null +++ b/qa/suites/rados/basic/tasks/scrub_test.yaml @@ -0,0 +1,31 @@ +overrides: + ceph: + wait-for-scrub: false + log-ignorelist: + - '!= data_digest' + - '!= omap_digest' + - '!= size' + - 'deep-scrub 0 missing, 1 inconsistent objects' + - 'deep-scrub [0-9]+ errors' + - 'repair 0 missing, 1 inconsistent objects' + - 'repair [0-9]+ errors, [0-9]+ fixed' + - 'shard [0-9]+ .* : missing' + - 'deep-scrub 1 missing, 1 inconsistent objects' + - 'does not match object info size' + - 'attr name mistmatch' + - 'deep-scrub 1 missing, 0 inconsistent objects' + - 'failed to pick suitable auth object' + - 'candidate size [0-9]+ info size [0-9]+ mismatch' + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OSD_SCRUB_ERRORS\) + - \(TOO_FEW_PGS\) + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd deep scrub update digest min age: 0 + osd skip data digest: false +tasks: +- scrub_test: diff --git a/qa/suites/rados/cephadm/.qa b/qa/suites/rados/cephadm/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/cephadm/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/cephadm/osds b/qa/suites/rados/cephadm/osds new file mode 120000 index 000000000..ace6d7c06 --- /dev/null +++ b/qa/suites/rados/cephadm/osds @@ -0,0 +1 @@ +.qa/suites/orch/cephadm/osds/
\ No newline at end of file diff --git a/qa/suites/rados/cephadm/smoke b/qa/suites/rados/cephadm/smoke new file mode 120000 index 000000000..bb2347907 --- /dev/null +++ b/qa/suites/rados/cephadm/smoke @@ -0,0 +1 @@ +.qa/suites/orch/cephadm/smoke
\ No newline at end of file diff --git a/qa/suites/rados/cephadm/smoke-singlehost b/qa/suites/rados/cephadm/smoke-singlehost new file mode 120000 index 000000000..458d624e2 --- /dev/null +++ b/qa/suites/rados/cephadm/smoke-singlehost @@ -0,0 +1 @@ +.qa/suites/orch/cephadm/smoke-singlehost/
\ No newline at end of file diff --git a/qa/suites/rados/cephadm/workunits b/qa/suites/rados/cephadm/workunits new file mode 120000 index 000000000..d2ed9d78a --- /dev/null +++ b/qa/suites/rados/cephadm/workunits @@ -0,0 +1 @@ +.qa/suites/orch/cephadm/workunits/
\ No newline at end of file diff --git a/qa/suites/rados/dashboard/% b/qa/suites/rados/dashboard/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/dashboard/% diff --git a/qa/suites/rados/dashboard/.qa b/qa/suites/rados/dashboard/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/dashboard/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/dashboard/0-single-container-host.yaml b/qa/suites/rados/dashboard/0-single-container-host.yaml new file mode 120000 index 000000000..7406e749c --- /dev/null +++ b/qa/suites/rados/dashboard/0-single-container-host.yaml @@ -0,0 +1 @@ +.qa/distros/single-container-host.yaml
\ No newline at end of file diff --git a/qa/suites/rados/dashboard/debug/.qa b/qa/suites/rados/dashboard/debug/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/dashboard/debug/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/dashboard/debug/mgr.yaml b/qa/suites/rados/dashboard/debug/mgr.yaml new file mode 120000 index 000000000..651e5f8a8 --- /dev/null +++ b/qa/suites/rados/dashboard/debug/mgr.yaml @@ -0,0 +1 @@ +.qa/debug/mgr.yaml
\ No newline at end of file diff --git a/qa/suites/rados/dashboard/mon_election b/qa/suites/rados/dashboard/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/dashboard/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/dashboard/random-objectstore$ b/qa/suites/rados/dashboard/random-objectstore$ new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rados/dashboard/random-objectstore$ @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rados/dashboard/tasks/.qa b/qa/suites/rados/dashboard/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/dashboard/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/dashboard/tasks/dashboard.yaml b/qa/suites/rados/dashboard/tasks/dashboard.yaml new file mode 100644 index 000000000..e7622f8c3 --- /dev/null +++ b/qa/suites/rados/dashboard/tasks/dashboard.yaml @@ -0,0 +1,72 @@ +roles: +- [mgr.x, mon.a, mon.c, mds.a, mds.c, osd.0, client.0] +- [mgr.y, mgr.z, mon.b, mds.b, osd.1, osd.2, osd.3, client.1] + +overrides: + ceph: + conf: + osd: + osd mclock override recovery settings: true + mgr: + mon warn on pool no app: false + +tasks: + - install: + - ceph: + # tests may leave mgrs broken, so don't try and call into them + # to invoke e.g. pg dump during teardown. + wait-for-scrub: false + log-ignorelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(PG_ + - replacing it with standby + - No standby daemons available + - \(FS_DEGRADED\) + - \(MDS_FAILED\) + - \(MDS_DEGRADED\) + - \(FS_WITH_FAILED_MDS\) + - \(MDS_DAMAGE\) + - \(MDS_ALL_DOWN\) + - \(MDS_UP_LESS_THAN_MAX\) + - \(OSD_DOWN\) + - \(OSD_HOST_DOWN\) + - \(POOL_APP_NOT_ENABLED\) + - \(OSDMAP_FLAGS\) + - \(OSD_FLAGS\) + - \(TELEMETRY_CHANGED\) + - pauserd,pausewr flag\(s\) set + - Monitor daemon marked osd\.[[:digit:]]+ down, but it is still running + - evicting unresponsive client .+ + - MON_DOWN + - rgw: [client.0] + - cephfs_test_runner: + fail_on_skip: false + modules: + - tasks.mgr.test_dashboard + - tasks.mgr.dashboard.test_api + - tasks.mgr.dashboard.test_auth + - tasks.mgr.dashboard.test_cephfs + - tasks.mgr.dashboard.test_cluster + - tasks.mgr.dashboard.test_cluster_configuration + - tasks.mgr.dashboard.test_crush_rule + - tasks.mgr.dashboard.test_erasure_code_profile + - tasks.mgr.dashboard.test_health + - tasks.mgr.dashboard.test_host + - tasks.mgr.dashboard.test_logs + - tasks.mgr.dashboard.test_mgr_module + - tasks.mgr.dashboard.test_monitor + - tasks.mgr.dashboard.test_motd + - tasks.mgr.dashboard.test_orchestrator + - tasks.mgr.dashboard.test_osd + - tasks.mgr.dashboard.test_perf_counters + - tasks.mgr.dashboard.test_pool + - tasks.mgr.dashboard.test_rbd + - tasks.mgr.dashboard.test_rbd_mirroring + - tasks.mgr.dashboard.test_requests + - tasks.mgr.dashboard.test_rgw + - tasks.mgr.dashboard.test_role + - tasks.mgr.dashboard.test_settings + - tasks.mgr.dashboard.test_summary + - tasks.mgr.dashboard.test_telemetry + - tasks.mgr.dashboard.test_user diff --git a/qa/suites/rados/dashboard/tasks/e2e.yaml b/qa/suites/rados/dashboard/tasks/e2e.yaml new file mode 100644 index 000000000..cb6ffb22f --- /dev/null +++ b/qa/suites/rados/dashboard/tasks/e2e.yaml @@ -0,0 +1,23 @@ +roles: +# 3 osd roles on host.a is required for cephadm task. It checks if the cluster is healthy. +# More daemons will be deployed on both hosts in e2e tests. +- - host.a + - osd.0 + - osd.1 + - osd.2 + - mon.a + - mgr.a + - client.0 +- - host.b + - client.1 +tasks: +- install: +- cephadm: +- workunit: + clients: + client.1: + - cephadm/create_iscsi_disks.sh +- workunit: + clients: + client.0: + - cephadm/test_dashboard_e2e.sh diff --git a/qa/suites/rados/mgr/% b/qa/suites/rados/mgr/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/mgr/% diff --git a/qa/suites/rados/mgr/.qa b/qa/suites/rados/mgr/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/mgr/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/mgr/clusters/+ b/qa/suites/rados/mgr/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/mgr/clusters/+ diff --git a/qa/suites/rados/mgr/clusters/.qa b/qa/suites/rados/mgr/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/mgr/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/mgr/clusters/2-node-mgr.yaml b/qa/suites/rados/mgr/clusters/2-node-mgr.yaml new file mode 120000 index 000000000..8a0b9123b --- /dev/null +++ b/qa/suites/rados/mgr/clusters/2-node-mgr.yaml @@ -0,0 +1 @@ +.qa/clusters/2-node-mgr.yaml
\ No newline at end of file diff --git a/qa/suites/rados/mgr/debug/.qa b/qa/suites/rados/mgr/debug/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/mgr/debug/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/mgr/debug/mgr.yaml b/qa/suites/rados/mgr/debug/mgr.yaml new file mode 120000 index 000000000..651e5f8a8 --- /dev/null +++ b/qa/suites/rados/mgr/debug/mgr.yaml @@ -0,0 +1 @@ +.qa/debug/mgr.yaml
\ No newline at end of file diff --git a/qa/suites/rados/mgr/mgr_ttl_cache/.qa b/qa/suites/rados/mgr/mgr_ttl_cache/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/mgr/mgr_ttl_cache/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/mgr/mgr_ttl_cache/disable.yaml b/qa/suites/rados/mgr/mgr_ttl_cache/disable.yaml new file mode 120000 index 000000000..d7db486dd --- /dev/null +++ b/qa/suites/rados/mgr/mgr_ttl_cache/disable.yaml @@ -0,0 +1 @@ +.qa/mgr_ttl_cache/disable.yaml
\ No newline at end of file diff --git a/qa/suites/rados/mgr/mgr_ttl_cache/enable.yaml b/qa/suites/rados/mgr/mgr_ttl_cache/enable.yaml new file mode 120000 index 000000000..18286a656 --- /dev/null +++ b/qa/suites/rados/mgr/mgr_ttl_cache/enable.yaml @@ -0,0 +1 @@ +.qa/mgr_ttl_cache/enable.yaml
\ No newline at end of file diff --git a/qa/suites/rados/mgr/mon_election b/qa/suites/rados/mgr/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/mgr/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/mgr/random-objectstore$ b/qa/suites/rados/mgr/random-objectstore$ new file mode 120000 index 000000000..848c65f9e --- /dev/null +++ b/qa/suites/rados/mgr/random-objectstore$ @@ -0,0 +1 @@ +.qa/objectstore_debug
\ No newline at end of file diff --git a/qa/suites/rados/mgr/supported-random-distro$ b/qa/suites/rados/mgr/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/mgr/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/mgr/tasks/.qa b/qa/suites/rados/mgr/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/mgr/tasks/crash.yaml b/qa/suites/rados/mgr/tasks/crash.yaml new file mode 100644 index 000000000..9d2ba535e --- /dev/null +++ b/qa/suites/rados/mgr/tasks/crash.yaml @@ -0,0 +1,18 @@ + +tasks: + - install: + - ceph: + # tests may leave mgrs broken, so don't try and call into them + # to invoke e.g. pg dump during teardown. + wait-for-scrub: false + log-ignorelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(PG_ + - \(RECENT_CRASH\) + - replacing it with standby + - No standby daemons available + - \(POOL_APP_NOT_ENABLED\) + - cephfs_test_runner: + modules: + - tasks.mgr.test_crash diff --git a/qa/suites/rados/mgr/tasks/failover.yaml b/qa/suites/rados/mgr/tasks/failover.yaml new file mode 100644 index 000000000..6d1e0d557 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/failover.yaml @@ -0,0 +1,17 @@ + +tasks: + - install: + - ceph: + # tests may leave mgrs broken, so don't try and call into them + # to invoke e.g. pg dump during teardown. + wait-for-scrub: false + log-ignorelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(PG_ + - replacing it with standby + - No standby daemons available + - \(POOL_APP_NOT_ENABLED\) + - cephfs_test_runner: + modules: + - tasks.mgr.test_failover diff --git a/qa/suites/rados/mgr/tasks/insights.yaml b/qa/suites/rados/mgr/tasks/insights.yaml new file mode 100644 index 000000000..f7c82cf7f --- /dev/null +++ b/qa/suites/rados/mgr/tasks/insights.yaml @@ -0,0 +1,20 @@ + +tasks: + - install: + - ceph: + # tests may leave mgrs broken, so don't try and call into them + # to invoke e.g. pg dump during teardown. + wait-for-scrub: false + log-ignorelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(MGR_INSIGHTS_WARNING\) + - \(insights_health_check + - \(PG_ + - \(RECENT_CRASH\) + - replacing it with standby + - No standby daemons available + - \(POOL_APP_NOT_ENABLED\) + - cephfs_test_runner: + modules: + - tasks.mgr.test_insights diff --git a/qa/suites/rados/mgr/tasks/module_selftest.yaml b/qa/suites/rados/mgr/tasks/module_selftest.yaml new file mode 100644 index 000000000..4403d9fff --- /dev/null +++ b/qa/suites/rados/mgr/tasks/module_selftest.yaml @@ -0,0 +1,28 @@ + +tasks: + - install: + - ceph: + # tests may leave mgrs broken, so don't try and call into them + # to invoke e.g. pg dump during teardown. + wait-for-scrub: false + log-ignorelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(PG_ + - replacing it with standby + - No standby daemons available + - Reduced data availability + - Degraded data redundancy + - objects misplaced + - Synthetic exception in serve + - influxdb python module not found + - \(MGR_ZABBIX_ + - foo bar + - Failed to open Telegraf + - evicting unresponsive client + - 1 mgr modules have recently crashed \(RECENT_MGR_MODULE_CRASH\) + - \(POOL_APP_NOT_ENABLED\) + - cephfs_test_runner: + modules: + - tasks.mgr.test_module_selftest + fail_on_skip: false diff --git a/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml b/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml new file mode 100644 index 000000000..de1d592df --- /dev/null +++ b/qa/suites/rados/mgr/tasks/per_module_finisher_stats.yaml @@ -0,0 +1,45 @@ +tasks: + - install: + - ceph: + wait-for-scrub: false + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) + - check-counter: + counters: + mgr: + - name: "finisher-balancer.complete_latency.avgcount" + min: 1 + - name: "finisher-balancer.queue_len" + expected_val: 0 + - name: "finisher-crash.complete_latency.avgcount" + min: 2 + - name: "finisher-crash.queue_len" + expected_val: 0 + - name: "finisher-devicehealth.complete_latency.avgcount" + min: 1 + - name: "finisher-devicehealth.queue_len" + expected_val: 0 + - name: "finisher-iostat.complete_latency.avgcount" + min: 1 + - name: "finisher-iostat.queue_len" + expected_val: 0 + - name: "finisher-pg_autoscaler.complete_latency.avgcount" + min: 1 + - name: "finisher-pg_autoscaler.queue_len" + expected_val: 0 + - name: "finisher-progress.complete_latency.avgcount" + min: 2 + - name: "finisher-progress.queue_len" + expected_val: 0 + - name: "finisher-status.complete_latency.avgcount" + min: 2 + - name: "finisher-status.queue_len" + expected_val: 0 + - name: "finisher-telemetry.complete_latency.avgcount" + min: 2 + - name: "finisher-telemetry.queue_len" + expected_val: 0 + - workunit: + clients: + client.0: + - mgr/test_per_module_finisher.sh diff --git a/qa/suites/rados/mgr/tasks/progress.yaml b/qa/suites/rados/mgr/tasks/progress.yaml new file mode 100644 index 000000000..183a9a29a --- /dev/null +++ b/qa/suites/rados/mgr/tasks/progress.yaml @@ -0,0 +1,30 @@ +overrides: + ceph: + conf: + osd: + osd mclock profile: high_recovery_ops +tasks: + - install: + - ceph: + config: + global: + osd pool default size : 3 + osd pool default min size : 2 + # tests may leave mgrs broken, so don't try and call into them + # to invoke e.g. pg dump during teardown. + wait-for-scrub: false + log-ignorelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(MDS_ALL_DOWN\) + - \(MDS_UP_LESS_THAN_MAX\) + - \(FS_WITH_FAILED_MDS\) + - \(FS_DEGRADED\) + - \(PG_ + - \(OSDMAP_FLAGS\) + - replacing it with standby + - No standby daemons available + - \(POOL_APP_NOT_ENABLED\) + - cephfs_test_runner: + modules: + - tasks.mgr.test_progress diff --git a/qa/suites/rados/mgr/tasks/prometheus.yaml b/qa/suites/rados/mgr/tasks/prometheus.yaml new file mode 100644 index 000000000..fd0e23a35 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/prometheus.yaml @@ -0,0 +1,17 @@ + +tasks: + - install: + - ceph: + # tests may leave mgrs broken, so don't try and call into them + # to invoke e.g. pg dump during teardown. + wait-for-scrub: false + log-ignorelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(PG_ + - replacing it with standby + - No standby daemons available + - \(POOL_APP_NOT_ENABLED\) + - cephfs_test_runner: + modules: + - tasks.mgr.test_prometheus diff --git a/qa/suites/rados/mgr/tasks/workunits.yaml b/qa/suites/rados/mgr/tasks/workunits.yaml new file mode 100644 index 000000000..a48274033 --- /dev/null +++ b/qa/suites/rados/mgr/tasks/workunits.yaml @@ -0,0 +1,17 @@ +tasks: + - install: + - ceph: + # tests may leave mgrs broken, so don't try and call into them + # to invoke e.g. pg dump during teardown. + wait-for-scrub: false + log-ignorelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(PG_ + - replacing it with standby + - No standby daemons available + - \(POOL_APP_NOT_ENABLED\) + - workunit: + clients: + client.0: + - mgr/test_localpool.sh diff --git a/qa/suites/rados/monthrash/% b/qa/suites/rados/monthrash/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/monthrash/% diff --git a/qa/suites/rados/monthrash/.qa b/qa/suites/rados/monthrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/monthrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/monthrash/ceph.yaml b/qa/suites/rados/monthrash/ceph.yaml new file mode 100644 index 000000000..8055fe372 --- /dev/null +++ b/qa/suites/rados/monthrash/ceph.yaml @@ -0,0 +1,28 @@ +overrides: + ceph: + conf: + client: + debug monc: 20 + debug ms: 1 + mon: + mon min osdmap epochs: 25 + paxos service trim min: 5 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 + mon scrub inject crc mismatch: 0.01 + mon scrub inject missing keys: 0.05 +# thrashing monitors may make mgr have trouble w/ its keepalive + log-ignorelist: + - ScrubResult + - scrub mismatch + - overall HEALTH_ + - \(MGR_DOWN\) +# slow mons -> slow peering -> PG_AVAILABILITY + - \(PG_AVAILABILITY\) + - \(SLOW_OPS\) + - slow request +tasks: +- install: +- ceph: diff --git a/qa/suites/rados/monthrash/clusters/.qa b/qa/suites/rados/monthrash/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/monthrash/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/monthrash/clusters/3-mons.yaml b/qa/suites/rados/monthrash/clusters/3-mons.yaml new file mode 100644 index 000000000..4b721ef80 --- /dev/null +++ b/qa/suites/rados/monthrash/clusters/3-mons.yaml @@ -0,0 +1,7 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, mgr.x, osd.3, osd.4, osd.5, client.0] +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB diff --git a/qa/suites/rados/monthrash/clusters/9-mons.yaml b/qa/suites/rados/monthrash/clusters/9-mons.yaml new file mode 100644 index 000000000..a2874c1d0 --- /dev/null +++ b/qa/suites/rados/monthrash/clusters/9-mons.yaml @@ -0,0 +1,7 @@ +roles: +- [mon.a, mon.b, mon.c, mon.d, mon.e, osd.0, osd.1, osd.2] +- [mon.f, mon.g, mon.h, mon.i, mgr.x, osd.3, osd.4, osd.5, client.0] +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB diff --git a/qa/suites/rados/monthrash/mon_election b/qa/suites/rados/monthrash/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/monthrash/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/monthrash/msgr b/qa/suites/rados/monthrash/msgr new file mode 120000 index 000000000..57bee80db --- /dev/null +++ b/qa/suites/rados/monthrash/msgr @@ -0,0 +1 @@ +.qa/msgr
\ No newline at end of file diff --git a/qa/suites/rados/monthrash/msgr-failures/.qa b/qa/suites/rados/monthrash/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/monthrash/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/monthrash/msgr-failures/few.yaml b/qa/suites/rados/monthrash/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/rados/monthrash/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml b/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml new file mode 100644 index 000000000..83b136518 --- /dev/null +++ b/qa/suites/rados/monthrash/msgr-failures/mon-delay.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms inject delay type: mon + ms inject delay probability: .005 + ms inject delay max: 1 + ms inject internal delays: .002 + mon client directed command retry: 5 + mgr: + debug monc: 10 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/monthrash/objectstore b/qa/suites/rados/monthrash/objectstore new file mode 120000 index 000000000..848c65f9e --- /dev/null +++ b/qa/suites/rados/monthrash/objectstore @@ -0,0 +1 @@ +.qa/objectstore_debug
\ No newline at end of file diff --git a/qa/suites/rados/monthrash/rados.yaml b/qa/suites/rados/monthrash/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/monthrash/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/monthrash/supported-random-distro$ b/qa/suites/rados/monthrash/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/monthrash/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/monthrash/thrashers/.qa b/qa/suites/rados/monthrash/thrashers/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/monthrash/thrashers/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml b/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml new file mode 100644 index 000000000..f4c98ae27 --- /dev/null +++ b/qa/suites/rados/monthrash/thrashers/force-sync-many.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) + - \(TOO_FEW_PGS\) + - \(POOL_APP_NOT_ENABLED\) +tasks: +- mon_thrash: + revive_delay: 90 + thrash_delay: 1 + store_thrash: true + thrash_many: true diff --git a/qa/suites/rados/monthrash/thrashers/many.yaml b/qa/suites/rados/monthrash/thrashers/many.yaml new file mode 100644 index 000000000..2f5de97e3 --- /dev/null +++ b/qa/suites/rados/monthrash/thrashers/many.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + mon client ping interval: 4 + mon client ping timeout: 12 +tasks: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 + thrash_many: true + freeze_mon_duration: 20 + freeze_mon_probability: 10 diff --git a/qa/suites/rados/monthrash/thrashers/one.yaml b/qa/suites/rados/monthrash/thrashers/one.yaml new file mode 100644 index 000000000..3a71edaf1 --- /dev/null +++ b/qa/suites/rados/monthrash/thrashers/one.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) + - \(POOL_APP_NOT_ENABLED\) +tasks: +- mon_thrash: + revive_delay: 20 + thrash_delay: 1 diff --git a/qa/suites/rados/monthrash/thrashers/sync-many.yaml b/qa/suites/rados/monthrash/thrashers/sync-many.yaml new file mode 100644 index 000000000..6bb25b7eb --- /dev/null +++ b/qa/suites/rados/monthrash/thrashers/sync-many.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) + - \(POOL_APP_NOT_ENABLED\) + conf: + mon: + paxos min: 10 + paxos trim min: 10 +tasks: +- mon_thrash: + revive_delay: 90 + thrash_delay: 1 + thrash_many: true diff --git a/qa/suites/rados/monthrash/thrashers/sync.yaml b/qa/suites/rados/monthrash/thrashers/sync.yaml new file mode 100644 index 000000000..30f133055 --- /dev/null +++ b/qa/suites/rados/monthrash/thrashers/sync.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) + - \(POOL_APP_NOT_ENABLED\) + conf: + mon: + paxos min: 10 + paxos trim min: 10 +tasks: +- mon_thrash: + revive_delay: 90 + thrash_delay: 1 diff --git a/qa/suites/rados/monthrash/workloads/.qa b/qa/suites/rados/monthrash/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/monthrash/workloads/pool-create-delete.yaml b/qa/suites/rados/monthrash/workloads/pool-create-delete.yaml new file mode 100644 index 000000000..6e8dadaf0 --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/pool-create-delete.yaml @@ -0,0 +1,57 @@ +overrides: + ceph: + log-ignorelist: + - slow request + - overall HEALTH_ +tasks: +- exec: + client.0: + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel + - ceph_test_rados_delete_pools_parallel diff --git a/qa/suites/rados/monthrash/workloads/rados_5925.yaml b/qa/suites/rados/monthrash/workloads/rados_5925.yaml new file mode 100644 index 000000000..bf7eee534 --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/rados_5925.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ +tasks: +- exec: + client.0: + - ceph_test_rados_delete_pools_parallel --debug_objecter 20 --debug_ms 1 --debug_rados 20 --debug_monc 20 diff --git a/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml b/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml new file mode 100644 index 000000000..f14c2c2a6 --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/rados_api_tests.yaml @@ -0,0 +1,28 @@ +overrides: + ceph: + log-ignorelist: + - reached quota + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - \(CACHE_POOL_NEAR_FULL\) + - \(POOL_FULL\) + - \(SLOW_OPS\) + - \(MON_DOWN\) + - \(PG_ + - \(SMALLER_PGP_NUM\) + - slow request + conf: + global: + debug objecter: 20 + debug rados: 20 + debug ms: 1 + mon: + mon warn on pool no app: false + osd: + osd class load list: "*" + osd class default list: "*" +tasks: +- workunit: + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml b/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml new file mode 100644 index 000000000..372bf2561 --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/rados_mon_osdmap_prune.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + conf: + mon: + mon debug extra checks: true + mon min osdmap epochs: 100 + mon osdmap full prune enabled: true + mon osdmap full prune min: 200 + mon osdmap full prune interval: 10 + mon osdmap full prune txsize: 100 + osd: + osd beacon report interval: 10 + log-ignorelist: + # setting/unsetting noup will trigger health warns, + # causing tests to fail due to health warns, even if + # the tests themselves are successful. + - \(OSDMAP_FLAGS\) +tasks: +- workunit: + clients: + client.0: + - mon/test_mon_osdmap_prune.sh diff --git a/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml b/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml new file mode 100644 index 000000000..2f9729c92 --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/rados_mon_workunits.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(PG_ + - \(MON_DOWN\) + - \(AUTH_BAD_CAPS\) +tasks: +- workunit: + clients: + client.0: + - mon/pool_ops.sh + - mon/crush_ops.sh + - mon/osd.sh + - mon/caps.sh + diff --git a/qa/suites/rados/monthrash/workloads/snaps-few-objects.yaml b/qa/suites/rados/monthrash/workloads/snaps-few-objects.yaml new file mode 100644 index 000000000..aa82d973a --- /dev/null +++ b/qa/suites/rados/monthrash/workloads/snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/multimon/% b/qa/suites/rados/multimon/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/multimon/% diff --git a/qa/suites/rados/multimon/.qa b/qa/suites/rados/multimon/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/multimon/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/multimon/clusters/.qa b/qa/suites/rados/multimon/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/multimon/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/multimon/clusters/21.yaml b/qa/suites/rados/multimon/clusters/21.yaml new file mode 100644 index 000000000..aae968665 --- /dev/null +++ b/qa/suites/rados/multimon/clusters/21.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, mon.d, mon.g, mon.j, mon.m, mon.p, mon.s] +- [mon.b, mon.e, mon.h, mon.k, mon.n, mon.q, mon.t, mgr.x] +- [mon.c, mon.f, mon.i, mon.l, mon.o, mon.r, mon.u] +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB diff --git a/qa/suites/rados/multimon/clusters/3.yaml b/qa/suites/rados/multimon/clusters/3.yaml new file mode 100644 index 000000000..11adef16b --- /dev/null +++ b/qa/suites/rados/multimon/clusters/3.yaml @@ -0,0 +1,7 @@ +roles: +- [mon.a, mon.c] +- [mon.b, mgr.x] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB diff --git a/qa/suites/rados/multimon/clusters/6.yaml b/qa/suites/rados/multimon/clusters/6.yaml new file mode 100644 index 000000000..29c74dc7f --- /dev/null +++ b/qa/suites/rados/multimon/clusters/6.yaml @@ -0,0 +1,7 @@ +roles: +- [mon.a, mon.c, mon.e, mgr.x] +- [mon.b, mon.d, mon.f, mgr.y] +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB diff --git a/qa/suites/rados/multimon/clusters/9.yaml b/qa/suites/rados/multimon/clusters/9.yaml new file mode 100644 index 000000000..d51168556 --- /dev/null +++ b/qa/suites/rados/multimon/clusters/9.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, mon.d, mon.g] +- [mon.b, mon.e, mon.h, mgr.x] +- [mon.c, mon.f, mon.i] +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB diff --git a/qa/suites/rados/multimon/mon_election b/qa/suites/rados/multimon/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/multimon/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/multimon/msgr b/qa/suites/rados/multimon/msgr new file mode 120000 index 000000000..57bee80db --- /dev/null +++ b/qa/suites/rados/multimon/msgr @@ -0,0 +1 @@ +.qa/msgr
\ No newline at end of file diff --git a/qa/suites/rados/multimon/msgr-failures/.qa b/qa/suites/rados/multimon/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/multimon/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/multimon/msgr-failures/few.yaml b/qa/suites/rados/multimon/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/rados/multimon/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/multimon/msgr-failures/many.yaml b/qa/suites/rados/multimon/msgr-failures/many.yaml new file mode 100644 index 000000000..d47b466b9 --- /dev/null +++ b/qa/suites/rados/multimon/msgr-failures/many.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 1000 + mon client directed command retry: 5 + mon mgr beacon grace: 90 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/multimon/no_pools.yaml b/qa/suites/rados/multimon/no_pools.yaml new file mode 100644 index 000000000..32ef2439f --- /dev/null +++ b/qa/suites/rados/multimon/no_pools.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + create_rbd_pool: false + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force diff --git a/qa/suites/rados/multimon/objectstore b/qa/suites/rados/multimon/objectstore new file mode 120000 index 000000000..848c65f9e --- /dev/null +++ b/qa/suites/rados/multimon/objectstore @@ -0,0 +1 @@ +.qa/objectstore_debug
\ No newline at end of file diff --git a/qa/suites/rados/multimon/rados.yaml b/qa/suites/rados/multimon/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/multimon/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/multimon/supported-random-distro$ b/qa/suites/rados/multimon/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/multimon/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/multimon/tasks/.qa b/qa/suites/rados/multimon/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/multimon/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml b/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml new file mode 100644 index 000000000..691bd1efc --- /dev/null +++ b/qa/suites/rados/multimon/tasks/mon_clock_no_skews.yaml @@ -0,0 +1,12 @@ +tasks: +- install: +- ceph: + log-ignorelist: + - slow request + - .*clock.*skew.* + - clocks not synchronized + - overall HEALTH_ + - \(MON_CLOCK_SKEW\) + - \(POOL_APP_NOT_ENABLED\) +- mon_clock_skew_check: + expect-skew: false diff --git a/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml b/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml new file mode 100644 index 000000000..079e1555b --- /dev/null +++ b/qa/suites/rados/multimon/tasks/mon_clock_with_skews.yaml @@ -0,0 +1,25 @@ +tasks: +- install: +- exec: + mon.b: + - sudo systemctl stop chronyd.service || true + - sudo systemctl stop systemd-timesync.service || true + - sudo systemctl stop ntpd.service || true + - sudo systemctl stop ntp.service || true + - date -u -s @$(expr $(date -u +%s) + 2) +- ceph: + wait-for-healthy: false + log-ignorelist: + - .*clock.*skew.* + - clocks not synchronized + - overall HEALTH_ + - \(MON_CLOCK_SKEW\) + - \(MGR_DOWN\) + - \(MON_DOWN\) + - \(PG_ + - \(SLOW_OPS\) + - No standby daemons available + - slow request + - \(POOL_APP_NOT_ENABLED\) +- mon_clock_skew_check: + expect-skew: true diff --git a/qa/suites/rados/multimon/tasks/mon_recovery.yaml b/qa/suites/rados/multimon/tasks/mon_recovery.yaml new file mode 100644 index 000000000..18e53e092 --- /dev/null +++ b/qa/suites/rados/multimon/tasks/mon_recovery.yaml @@ -0,0 +1,11 @@ +tasks: +- install: +- ceph: + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) + - \(PG_AVAILABILITY\) + - \(SLOW_OPS\) + - slow request + - \(POOL_APP_NOT_ENABLED\) +- mon_recovery: diff --git a/qa/suites/rados/objectstore/% b/qa/suites/rados/objectstore/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/objectstore/% diff --git a/qa/suites/rados/objectstore/.qa b/qa/suites/rados/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/objectstore/backends/.qa b/qa/suites/rados/objectstore/backends/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/objectstore/backends/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/objectstore/backends/ceph_objectstore_tool.yaml b/qa/suites/rados/objectstore/backends/ceph_objectstore_tool.yaml new file mode 100644 index 000000000..1ae569b7e --- /dev/null +++ b/qa/suites/rados/objectstore/backends/ceph_objectstore_tool.yaml @@ -0,0 +1,26 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, client.0] +openstack: +- volumes: # attached to each instance + count: 6 + size: 10 # GB +tasks: +- install: +- ceph: + fs: xfs + conf: + global: + osd max object name len: 460 + osd max object namespace len: 64 + osd: + osd objectstore: bluestore + osd op queue: wpq + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(TOO_FEW_PGS\) + - \(POOL_APP_NOT_ENABLED\) +- ceph_objectstore_tool: + objects: 20 diff --git a/qa/suites/rados/objectstore/backends/fusestore.yaml b/qa/suites/rados/objectstore/backends/fusestore.yaml new file mode 100644 index 000000000..1c34fcaea --- /dev/null +++ b/qa/suites/rados/objectstore/backends/fusestore.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- workunit: + clients: + all: + - objectstore/test_fuse.sh + diff --git a/qa/suites/rados/objectstore/backends/keyvaluedb.yaml b/qa/suites/rados/objectstore/backends/keyvaluedb.yaml new file mode 100644 index 000000000..efff8d379 --- /dev/null +++ b/qa/suites/rados/objectstore/backends/keyvaluedb.yaml @@ -0,0 +1,8 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- exec: + client.0: + - mkdir $TESTDIR/kvtest && cd $TESTDIR/kvtest && ceph_test_keyvaluedb + - rm -rf $TESTDIR/kvtest diff --git a/qa/suites/rados/objectstore/backends/objectcacher-stress.yaml b/qa/suites/rados/objectstore/backends/objectcacher-stress.yaml new file mode 100644 index 000000000..ae0f8f381 --- /dev/null +++ b/qa/suites/rados/objectstore/backends/objectcacher-stress.yaml @@ -0,0 +1,16 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- ceph: + fs: xfs + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- workunit: + clients: + all: + - osdc/stress_objectcacher.sh diff --git a/qa/suites/rados/objectstore/backends/objectstore-bluestore-a.yaml b/qa/suites/rados/objectstore/backends/objectstore-bluestore-a.yaml new file mode 100644 index 000000000..b3c615bd6 --- /dev/null +++ b/qa/suites/rados/objectstore/backends/objectstore-bluestore-a.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- exec: + client.0: + - mkdir $TESTDIR/archive/ostest && cd $TESTDIR/archive/ostest && ulimit -Sn 16384 && CEPH_ARGS="--no-log-to-stderr --log-file $TESTDIR/archive/ceph_test_objectstore.log --debug-bluestore 20" ceph_test_objectstore --gtest_filter=*/1:-*SyntheticMatrixC* --gtest_catch_exceptions=0 + - rm -rf $TESTDIR/archive/ostest diff --git a/qa/suites/rados/objectstore/backends/objectstore-bluestore-b.yaml b/qa/suites/rados/objectstore/backends/objectstore-bluestore-b.yaml new file mode 100644 index 000000000..eacb5ab44 --- /dev/null +++ b/qa/suites/rados/objectstore/backends/objectstore-bluestore-b.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- exec: + client.0: + - mkdir $TESTDIR/archive/ostest && cd $TESTDIR/archive/ostest && ulimit -Sn 16384 && CEPH_ARGS="--no-log-to-stderr --log-file $TESTDIR/archive/ceph_test_objectstore.log --debug-bluestore 20" ceph_test_objectstore --gtest_filter=*SyntheticMatrixC*/2 --gtest_catch_exceptions=0 + - rm -rf $TESTDIR/archive/ostest diff --git a/qa/suites/rados/objectstore/backends/objectstore-memstore.yaml b/qa/suites/rados/objectstore/backends/objectstore-memstore.yaml new file mode 100644 index 000000000..f60b6d59b --- /dev/null +++ b/qa/suites/rados/objectstore/backends/objectstore-memstore.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- exec: + client.0: + - mkdir $TESTDIR/archive/ostest && cd $TESTDIR/archive/ostest && ulimit -Sn 16384 && CEPH_ARGS="--no-log-to-stderr --log-file $TESTDIR/archive/ceph_test_objectstore.log --debug-bluestore 20" ceph_test_objectstore --gtest_filter=*/0 --gtest_catch_exceptions=0 + - rm -rf $TESTDIR/archive/ostest diff --git a/qa/suites/rados/objectstore/supported-random-distro$ b/qa/suites/rados/objectstore/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/objectstore/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/perf/% b/qa/suites/rados/perf/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/perf/% diff --git a/qa/suites/rados/perf/.qa b/qa/suites/rados/perf/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/perf/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/perf/ceph.yaml b/qa/suites/rados/perf/ceph.yaml new file mode 100644 index 000000000..ca229dd46 --- /dev/null +++ b/qa/suites/rados/perf/ceph.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + conf: + global: + osd client message cap: 5000 +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] +tasks: +- install: +- ceph: + fs: xfs + wait-for-scrub: false + log-ignorelist: + - \(PG_ + - \(OSD_ + - \(OBJECT_ + - overall HEALTH + - \(POOL_APP_NOT_ENABLED\) +- ssh_keys: diff --git a/qa/suites/rados/perf/mon_election b/qa/suites/rados/perf/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/perf/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/perf/objectstore/.qa b/qa/suites/rados/perf/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/perf/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/perf/objectstore/bluestore-basic-min-osd-mem-target.yaml b/qa/suites/rados/perf/objectstore/bluestore-basic-min-osd-mem-target.yaml new file mode 100644 index 000000000..32f596da1 --- /dev/null +++ b/qa/suites/rados/perf/objectstore/bluestore-basic-min-osd-mem-target.yaml @@ -0,0 +1,25 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + osd memory target: 2147483648 # min recommended is 2_G + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + diff --git a/qa/suites/rados/perf/objectstore/bluestore-bitmap.yaml b/qa/suites/rados/perf/objectstore/bluestore-bitmap.yaml new file mode 100644 index 000000000..b18e04bee --- /dev/null +++ b/qa/suites/rados/perf/objectstore/bluestore-bitmap.yaml @@ -0,0 +1,43 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + bluestore allocator: bitmap + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + bdev enable discard: true + bdev async discard: true + diff --git a/qa/suites/rados/perf/objectstore/bluestore-comp.yaml b/qa/suites/rados/perf/objectstore/bluestore-comp.yaml new file mode 100644 index 000000000..b408032fd --- /dev/null +++ b/qa/suites/rados/perf/objectstore/bluestore-comp.yaml @@ -0,0 +1,23 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/suites/rados/perf/objectstore/bluestore-low-osd-mem-target.yaml b/qa/suites/rados/perf/objectstore/bluestore-low-osd-mem-target.yaml new file mode 100644 index 000000000..b2a49790b --- /dev/null +++ b/qa/suites/rados/perf/objectstore/bluestore-low-osd-mem-target.yaml @@ -0,0 +1,25 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + osd memory target: 1610612736 # reduced to 1.5_G + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + diff --git a/qa/suites/rados/perf/objectstore/bluestore-stupid.yaml b/qa/suites/rados/perf/objectstore/bluestore-stupid.yaml new file mode 100644 index 000000000..ca811f131 --- /dev/null +++ b/qa/suites/rados/perf/objectstore/bluestore-stupid.yaml @@ -0,0 +1,43 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + bluestore allocator: stupid + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + bdev enable discard: true + bdev async discard: true + diff --git a/qa/suites/rados/perf/openstack.yaml b/qa/suites/rados/perf/openstack.yaml new file mode 100644 index 000000000..f4d1349b4 --- /dev/null +++ b/qa/suites/rados/perf/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 3 + size: 30 # GB diff --git a/qa/suites/rados/perf/scheduler/.qa b/qa/suites/rados/perf/scheduler/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/perf/scheduler/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/perf/scheduler/dmclock_1Shard_16Threads.yaml b/qa/suites/rados/perf/scheduler/dmclock_1Shard_16Threads.yaml new file mode 100644 index 000000000..10388ad72 --- /dev/null +++ b/qa/suites/rados/perf/scheduler/dmclock_1Shard_16Threads.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + osd: + osd op num shards: 1 + osd op num threads per shard: 16 + osd op queue: mclock_scheduler diff --git a/qa/suites/rados/perf/scheduler/dmclock_default_shards.yaml b/qa/suites/rados/perf/scheduler/dmclock_default_shards.yaml new file mode 100644 index 000000000..57a0ed912 --- /dev/null +++ b/qa/suites/rados/perf/scheduler/dmclock_default_shards.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd op queue: mclock_scheduler diff --git a/qa/suites/rados/perf/scheduler/wpq_default_shards.yaml b/qa/suites/rados/perf/scheduler/wpq_default_shards.yaml new file mode 100644 index 000000000..25d358f27 --- /dev/null +++ b/qa/suites/rados/perf/scheduler/wpq_default_shards.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd op queue: wpq diff --git a/qa/suites/rados/perf/settings/.qa b/qa/suites/rados/perf/settings/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/perf/settings/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/perf/settings/optimized.yaml b/qa/suites/rados/perf/settings/optimized.yaml new file mode 100644 index 000000000..dc4dcbb96 --- /dev/null +++ b/qa/suites/rados/perf/settings/optimized.yaml @@ -0,0 +1,74 @@ +overrides: + ceph: + conf: + mon: + debug mon: "0/0" + debug ms: "0/0" + debug paxos: "0/0" + osd: + debug filestore: "0/0" + debug journal: "0/0" + debug ms: "0/0" + debug osd: "0/0" + global: + auth client required: none + auth cluster required: none + auth service required: none + auth supported: none + + debug lockdep: "0/0" + debug context: "0/0" + debug crush: "0/0" + debug mds: "0/0" + debug mds balancer: "0/0" + debug mds locker: "0/0" + debug mds log: "0/0" + debug mds log expire: "0/0" + debug mds migrator: "0/0" + debug buffer: "0/0" + debug timer: "0/0" + debug filer: "0/0" + debug striper: "0/0" + debug objecter: "0/0" + debug rados: "0/0" + debug rbd: "0/0" + debug rbd mirror: "0/0" + debug rbd replay: "0/0" + debug journaler: "0/0" + debug objectcacher: "0/0" + debug client: "0/0" + debug osd: "0/0" + debug optracker: "0/0" + debug objclass: "0/0" + debug filestore: "0/0" + debug journal: "0/0" + debug ms: "0/0" + debug mon: "0/0" + debug monc: "0/0" + debug paxos: "0/0" + debug tp: "0/0" + debug auth: "0/0" + debug crypto: "0/0" + debug finisher: "0/0" + debug heartbeatmap: "0/0" + debug perfcounter: "0/0" + debug rgw: "0/0" + debug rgw sync: "0/0" + debug civetweb: "0/0" + debug javaclient: "0/0" + debug asok: "0/0" + debug throttle: "0/0" + debug refs: "0/0" + debug compressor: "0/0" + debug bluestore: "0/0" + debug bluefs: "0/0" + debug bdev: "0/0" + debug kstore: "0/0" + debug rocksdb: "0/0" + debug leveldb: "0/0" + debug memdb: "0/0" + debug fuse: "0/0" + debug mgr: "0/0" + debug mgrc: "0/0" + debug dpdk: "0/0" + debug eventtrace: "0/0" diff --git a/qa/suites/rados/perf/ubuntu_latest.yaml b/qa/suites/rados/perf/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/rados/perf/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/rados/perf/workloads/.qa b/qa/suites/rados/perf/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/perf/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/perf/workloads/fio_4K_rand_read.yaml b/qa/suites/rados/perf/workloads/fio_4K_rand_read.yaml new file mode 100644 index 000000000..d5ef33ab3 --- /dev/null +++ b/qa/suites/rados/perf/workloads/fio_4K_rand_read.yaml @@ -0,0 +1,24 @@ +tasks: +- cbt: + benchmarks: + librbdfio: + op_size: [4096] + time: 60 + mode: ['randread'] + norandommap: True + vol_size: 4096 + procs_per_volume: [1] + volumes_per_client: [2] + iodepth: [32] + osd_ra: [4096] + pool_profile: 'rbd' + log_avg_msec: 100 + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + rbd: + pg_size: 128 + pgp_size: 128 + replication: 3 diff --git a/qa/suites/rados/perf/workloads/fio_4K_rand_rw.yaml b/qa/suites/rados/perf/workloads/fio_4K_rand_rw.yaml new file mode 100644 index 000000000..14d43f143 --- /dev/null +++ b/qa/suites/rados/perf/workloads/fio_4K_rand_rw.yaml @@ -0,0 +1,24 @@ +tasks: +- cbt: + benchmarks: + librbdfio: + op_size: [4096] + time: 60 + mode: ['randrw'] + norandommap: True + vol_size: 4096 + procs_per_volume: [1] + volumes_per_client: [2] + iodepth: [32] + osd_ra: [4096] + pool_profile: 'rbd' + log_avg_msec: 100 + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + rbd: + pg_size: 128 + pgp_size: 128 + replication: 3 diff --git a/qa/suites/rados/perf/workloads/fio_4M_rand_read.yaml b/qa/suites/rados/perf/workloads/fio_4M_rand_read.yaml new file mode 100644 index 000000000..b07432243 --- /dev/null +++ b/qa/suites/rados/perf/workloads/fio_4M_rand_read.yaml @@ -0,0 +1,24 @@ +tasks: +- cbt: + benchmarks: + librbdfio: + op_size: [4194304] + time: 60 + mode: ['randread'] + norandommap: True + vol_size: 4096 + procs_per_volume: [1] + volumes_per_client: [2] + iodepth: [32] + osd_ra: [4096] + pool_profile: 'rbd' + log_avg_msec: 100 + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + rbd: + pg_size: 128 + pgp_size: 128 + replication: 3 diff --git a/qa/suites/rados/perf/workloads/fio_4M_rand_rw.yaml b/qa/suites/rados/perf/workloads/fio_4M_rand_rw.yaml new file mode 100644 index 000000000..5fd6e2877 --- /dev/null +++ b/qa/suites/rados/perf/workloads/fio_4M_rand_rw.yaml @@ -0,0 +1,24 @@ +tasks: +- cbt: + benchmarks: + librbdfio: + op_size: [4194304] + time: 60 + mode: ['randrw'] + norandommap: True + vol_size: 4096 + procs_per_volume: [1] + volumes_per_client: [2] + iodepth: [32] + osd_ra: [4096] + pool_profile: 'rbd' + log_avg_msec: 100 + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + rbd: + pg_size: 128 + pgp_size: 128 + replication: 3 diff --git a/qa/suites/rados/perf/workloads/fio_4M_rand_write.yaml b/qa/suites/rados/perf/workloads/fio_4M_rand_write.yaml new file mode 100644 index 000000000..2d9d83611 --- /dev/null +++ b/qa/suites/rados/perf/workloads/fio_4M_rand_write.yaml @@ -0,0 +1,24 @@ +tasks: +- cbt: + benchmarks: + librbdfio: + op_size: [4194304] + time: 60 + mode: ['randwrite'] + norandommap: True + vol_size: 4096 + procs_per_volume: [1] + volumes_per_client: [2] + iodepth: [32] + osd_ra: [4096] + pool_profile: 'rbd' + log_avg_msec: 100 + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + rbd: + pg_size: 128 + pgp_size: 128 + replication: 3 diff --git a/qa/suites/rados/perf/workloads/radosbench_4K_rand_read.yaml b/qa/suites/rados/perf/workloads/radosbench_4K_rand_read.yaml new file mode 100644 index 000000000..f1de9b41b --- /dev/null +++ b/qa/suites/rados/perf/workloads/radosbench_4K_rand_read.yaml @@ -0,0 +1,24 @@ +tasks: +- cbt: + benchmarks: + radosbench: + concurrent_ops: 4 + concurrent_procs: 2 + op_size: [4096] + pool_monitoring_list: + - collectl + pool_profile: 'replicated' + run_monitoring_list: + - collectl + time: 60 + write_only: false + readmode: 'rand' + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + replicated: + pg_size: 256 + pgp_size: 256 + replication: 'replicated' diff --git a/qa/suites/rados/perf/workloads/radosbench_4K_seq_read.yaml b/qa/suites/rados/perf/workloads/radosbench_4K_seq_read.yaml new file mode 100644 index 000000000..8fb204a2f --- /dev/null +++ b/qa/suites/rados/perf/workloads/radosbench_4K_seq_read.yaml @@ -0,0 +1,23 @@ +tasks: +- cbt: + benchmarks: + radosbench: + concurrent_ops: 4 + concurrent_procs: 2 + op_size: [4096] + pool_monitoring_list: + - collectl + pool_profile: 'replicated' + run_monitoring_list: + - collectl + time: 60 + write_only: false + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + replicated: + pg_size: 256 + pgp_size: 256 + replication: 'replicated' diff --git a/qa/suites/rados/perf/workloads/radosbench_4M_rand_read.yaml b/qa/suites/rados/perf/workloads/radosbench_4M_rand_read.yaml new file mode 100644 index 000000000..cc1c74489 --- /dev/null +++ b/qa/suites/rados/perf/workloads/radosbench_4M_rand_read.yaml @@ -0,0 +1,24 @@ +tasks: +- cbt: + benchmarks: + radosbench: + concurrent_ops: 4 + concurrent_procs: 2 + op_size: [4194304] + pool_monitoring_list: + - collectl + pool_profile: 'replicated' + run_monitoring_list: + - collectl + time: 60 + write_only: false + readmode: 'rand' + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + replicated: + pg_size: 256 + pgp_size: 256 + replication: 'replicated' diff --git a/qa/suites/rados/perf/workloads/radosbench_4M_seq_read.yaml b/qa/suites/rados/perf/workloads/radosbench_4M_seq_read.yaml new file mode 100644 index 000000000..3ab55cf51 --- /dev/null +++ b/qa/suites/rados/perf/workloads/radosbench_4M_seq_read.yaml @@ -0,0 +1,23 @@ +tasks: +- cbt: + benchmarks: + radosbench: + concurrent_ops: 4 + concurrent_procs: 2 + op_size: [4194304] + pool_monitoring_list: + - collectl + pool_profile: 'replicated' + run_monitoring_list: + - collectl + time: 60 + write_only: false + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + replicated: + pg_size: 256 + pgp_size: 256 + replication: 'replicated' diff --git a/qa/suites/rados/perf/workloads/radosbench_4M_write.yaml b/qa/suites/rados/perf/workloads/radosbench_4M_write.yaml new file mode 100644 index 000000000..f6a5d715c --- /dev/null +++ b/qa/suites/rados/perf/workloads/radosbench_4M_write.yaml @@ -0,0 +1,23 @@ +tasks: +- cbt: + benchmarks: + radosbench: + concurrent_ops: 4 + concurrent_procs: 2 + op_size: [4194304] + pool_monitoring_list: + - collectl + pool_profile: 'replicated' + run_monitoring_list: + - collectl + time: 60 + write_only: true + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + replicated: + pg_size: 256 + pgp_size: 256 + replication: 'replicated' diff --git a/qa/suites/rados/perf/workloads/radosbench_omap_write.yaml b/qa/suites/rados/perf/workloads/radosbench_omap_write.yaml new file mode 100644 index 000000000..5df4674d9 --- /dev/null +++ b/qa/suites/rados/perf/workloads/radosbench_omap_write.yaml @@ -0,0 +1,7 @@ +tasks: +- radosbench: + clients: [client.0] + write-omap: True + objectsize: 4096 + size: 4096 + time: 300 diff --git a/qa/suites/rados/perf/workloads/sample_fio.yaml b/qa/suites/rados/perf/workloads/sample_fio.yaml new file mode 100644 index 000000000..98411392d --- /dev/null +++ b/qa/suites/rados/perf/workloads/sample_fio.yaml @@ -0,0 +1,24 @@ +tasks: +- cbt: + benchmarks: + librbdfio: + op_size: [4096] + time: 60 + mode: ['randwrite'] + norandommap: True + vol_size: 4096 + procs_per_volume: [1] + volumes_per_client: [2] + iodepth: [32] + osd_ra: [4096] + pool_profile: 'rbd' + log_avg_msec: 100 + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + rbd: + pg_size: 128 + pgp_size: 128 + replication: 3 diff --git a/qa/suites/rados/perf/workloads/sample_radosbench.yaml b/qa/suites/rados/perf/workloads/sample_radosbench.yaml new file mode 100644 index 000000000..e3dc47ae6 --- /dev/null +++ b/qa/suites/rados/perf/workloads/sample_radosbench.yaml @@ -0,0 +1,23 @@ +tasks: +- cbt: + benchmarks: + radosbench: + concurrent_ops: 4 + concurrent_procs: 2 + op_size: [4096] + pool_monitoring_list: + - collectl + pool_profile: 'replicated' + run_monitoring_list: + - collectl + time: 60 + write_only: true + cluster: + user: 'ubuntu' + osds_per_node: 3 + iterations: 1 + pool_profiles: + replicated: + pg_size: 256 + pgp_size: 256 + replication: 'replicated' diff --git a/qa/suites/rados/rest/% b/qa/suites/rados/rest/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/rest/% diff --git a/qa/suites/rados/rest/.qa b/qa/suites/rados/rest/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/rest/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/rest/mgr-restful.yaml b/qa/suites/rados/rest/mgr-restful.yaml new file mode 100644 index 000000000..4901f401d --- /dev/null +++ b/qa/suites/rados/rest/mgr-restful.yaml @@ -0,0 +1,31 @@ +openstack: +- volumes: # attached to each instance + count: 3 + size: 10 # GB +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, mds.a, client.a] +tasks: +- install: +- ceph: + log-ignorelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(PG_ + - \(OSD_ + - \(OBJECT_ + - \(OSDMAP_FLAGS\) + - \(POOL_APP_NOT_ENABLED\) +- exec: + mon.a: + - ceph restful create-key admin + - ceph restful create-self-signed-cert + - ceph restful restart +- workunit: + clients: + client.a: + - rest/test-restful.sh +- exec: + mon.a: + - ceph restful delete-key admin + - ceph restful list-keys | jq ".admin" | grep null + diff --git a/qa/suites/rados/rest/supported-random-distro$ b/qa/suites/rados/rest/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/rest/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/singleton-bluestore/% b/qa/suites/rados/singleton-bluestore/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/% diff --git a/qa/suites/rados/singleton-bluestore/.qa b/qa/suites/rados/singleton-bluestore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/singleton-bluestore/all/.qa b/qa/suites/rados/singleton-bluestore/all/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/all/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/singleton-bluestore/all/cephtool.yaml b/qa/suites/rados/singleton-bluestore/all/cephtool.yaml new file mode 100644 index 000000000..f86be3459 --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/all/cephtool.yaml @@ -0,0 +1,48 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + log-ignorelist: + - but it is still running + - had wrong client addr + - had wrong cluster addr + - must scrub before tier agent can activate + - failsafe engaged, dropping updates + - failsafe disengaged, no longer dropping updates + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(SMALLER_PG_NUM\) + - \(SMALLER_PGP_NUM\) + - \(CACHE_POOL_NO_HIT_SET\) + - \(CACHE_POOL_NEAR_FULL\) + - \(FS_WITH_FAILED_MDS\) + - \(FS_DEGRADED\) + - \(POOL_BACKFILLFULL\) + - \(POOL_FULL\) + - \(SMALLER_PGP_NUM\) + - \(POOL_NEARFULL\) + - \(POOL_APP_NOT_ENABLED\) + - \(AUTH_BAD_CAPS\) + - \(FS_INLINE_DATA_DEPRECATED\) + - \(MON_DOWN\) + - \(SLOW_OPS\) + - slow request +- workunit: + clients: + all: + - cephtool + - mon/pool_ops.sh diff --git a/qa/suites/rados/singleton-bluestore/mon_election b/qa/suites/rados/singleton-bluestore/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/singleton-bluestore/msgr b/qa/suites/rados/singleton-bluestore/msgr new file mode 120000 index 000000000..57bee80db --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/msgr @@ -0,0 +1 @@ +.qa/msgr
\ No newline at end of file diff --git a/qa/suites/rados/singleton-bluestore/msgr-failures b/qa/suites/rados/singleton-bluestore/msgr-failures new file mode 120000 index 000000000..3ded97b94 --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/msgr-failures @@ -0,0 +1 @@ +../singleton/msgr-failures
\ No newline at end of file diff --git a/qa/suites/rados/singleton-bluestore/objectstore/.qa b/qa/suites/rados/singleton-bluestore/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/singleton-bluestore/objectstore/bluestore-bitmap.yaml b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..66cf2bc75 --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore_debug/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-lz4.yaml b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-lz4.yaml new file mode 120000 index 000000000..da2e2598c --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-lz4.yaml @@ -0,0 +1 @@ +.qa/objectstore_debug/bluestore-comp-lz4.yaml
\ No newline at end of file diff --git a/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml new file mode 120000 index 000000000..f75b0e1b4 --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml @@ -0,0 +1 @@ +.qa/objectstore_debug/bluestore-comp-snappy.yaml
\ No newline at end of file diff --git a/qa/suites/rados/singleton-bluestore/rados.yaml b/qa/suites/rados/singleton-bluestore/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/singleton-bluestore/supported-random-distro$ b/qa/suites/rados/singleton-bluestore/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/singleton-bluestore/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/singleton-nomsgr/% b/qa/suites/rados/singleton-nomsgr/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/% diff --git a/qa/suites/rados/singleton-nomsgr/.qa b/qa/suites/rados/singleton-nomsgr/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/singleton-nomsgr/all/.qa b/qa/suites/rados/singleton-nomsgr/all/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/singleton-nomsgr/all/admin_socket_output.yaml b/qa/suites/rados/singleton-nomsgr/all/admin_socket_output.yaml new file mode 100644 index 000000000..341a559f3 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/admin_socket_output.yaml @@ -0,0 +1,28 @@ +openstack: + - volumes: # attached to each instance + count: 2 + size: 10 # GB +roles: +- [mon.a, mds.a, mgr.x, osd.0, osd.1, client.0] +overrides: + ceph: + log-ignorelist: + - MDS in read-only mode + - force file system read-only + - overall HEALTH_ + - \(FS_DEGRADED\) + - \(OSDMAP_FLAGS\) + - \(OSD_FULL\) + - \(MDS_READ_ONLY\) + - \(POOL_FULL\) + - \(POOL_APP_NOT_ENABLED\) +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force +- rgw: + - client.0 +- exec: + client.0: + - ceph_test_admin_socket_output --all diff --git a/qa/suites/rados/singleton-nomsgr/all/balancer.yaml b/qa/suites/rados/singleton-nomsgr/all/balancer.yaml new file mode 100644 index 000000000..c42c5539d --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/balancer.yaml @@ -0,0 +1,15 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + fs: xfs + log-ignorelist: + - \(PG_AVAILABILITY\) + - \(POOL_APP_NOT_ENABLED\) +- cram: + clients: + client.0: + - src/test/cli-integration/balancer/misplaced.t diff --git a/qa/suites/rados/singleton-nomsgr/all/cache-fs-trunc.yaml b/qa/suites/rados/singleton-nomsgr/all/cache-fs-trunc.yaml new file mode 100644 index 000000000..fddbd0723 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/cache-fs-trunc.yaml @@ -0,0 +1,55 @@ +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +roles: +- [mon.a, mgr.x, mds.a, osd.0, osd.1, osd.2, client.0, client.1] +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - \(POOL_APP_NOT_ENABLED\) + conf: + global: + osd max object name len: 460 + osd max object namespace len: 64 + debug client: 20 + debug mds: 20 + debug ms: 1 +- exec: + client.0: + - ceph osd pool create data_cache 4 + - ceph osd tier add cephfs_data data_cache + - ceph osd tier cache-mode data_cache writeback + - ceph osd tier set-overlay cephfs_data data_cache + - ceph osd pool set data_cache hit_set_type bloom + - ceph osd pool set data_cache hit_set_count 8 + - ceph osd pool set data_cache hit_set_period 3600 + - ceph osd pool set data_cache min_read_recency_for_promote 0 +- ceph-fuse: +- exec: + client.0: + - sudo chmod 777 $TESTDIR/mnt.0/ + - dd if=/dev/urandom of=$TESTDIR/mnt.0/foo bs=1M count=5 + - ls -al $TESTDIR/mnt.0/foo + - truncate --size 0 $TESTDIR/mnt.0/foo + - ls -al $TESTDIR/mnt.0/foo + - dd if=/dev/urandom of=$TESTDIR/mnt.0/foo bs=1M count=5 + - ls -al $TESTDIR/mnt.0/foo + - cp $TESTDIR/mnt.0/foo /tmp/foo + - sync + - rados -p data_cache ls - + - sleep 10 + - rados -p data_cache ls - + - rados -p data_cache cache-flush-evict-all + - rados -p data_cache ls - + - sleep 1 +- exec: + client.1: + - hexdump -C /tmp/foo | head + - hexdump -C $TESTDIR/mnt.1/foo | head + - cmp $TESTDIR/mnt.1/foo /tmp/foo diff --git a/qa/suites/rados/singleton-nomsgr/all/ceph-kvstore-tool.yaml b/qa/suites/rados/singleton-nomsgr/all/ceph-kvstore-tool.yaml new file mode 100644 index 000000000..6a8faa4a8 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/ceph-kvstore-tool.yaml @@ -0,0 +1,25 @@ +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] + +overrides: + ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(POOL_APP_NOT_ENABLED\) + +tasks: +- install: +- ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- workunit: + clients: + all: + - cephtool/test_kvstore_tool.sh diff --git a/qa/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml b/qa/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml new file mode 100644 index 000000000..530dc42a7 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/ceph-post-file.yaml @@ -0,0 +1,12 @@ +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] +tasks: +- install: +- workunit: + clients: + all: + - post-file.sh diff --git a/qa/suites/rados/singleton-nomsgr/all/crushdiff.yaml b/qa/suites/rados/singleton-nomsgr/all/crushdiff.yaml new file mode 100644 index 000000000..1639f0ed5 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/crushdiff.yaml @@ -0,0 +1,24 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, osd.3, client.0] + +overrides: + ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(POOL_APP_NOT_ENABLED\) + - \(PG_DEGRADED\) + +tasks: +- install: +- ceph: +- workunit: + clients: + all: + - rados/test_crushdiff.sh diff --git a/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml b/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml new file mode 100644 index 000000000..b4ce5468a --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/export-after-evict.yaml @@ -0,0 +1,41 @@ +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - \(POOL_APP_NOT_ENABLED\) + conf: + global: + osd max object name len: 460 + osd max object namespace len: 64 +- exec: + client.0: + - ceph osd pool create base-pool 4 + - ceph osd pool application enable base-pool rados + - ceph osd pool create cache-pool 4 + - ceph osd tier add base-pool cache-pool + - ceph osd tier cache-mode cache-pool writeback + - ceph osd tier set-overlay base-pool cache-pool + - dd if=/dev/urandom of=$TESTDIR/foo bs=1M count=1 + - rbd import --image-format 2 $TESTDIR/foo base-pool/bar + - rbd snap create base-pool/bar@snap + - rados -p base-pool cache-flush-evict-all + - rbd export base-pool/bar $TESTDIR/bar + - rbd export base-pool/bar@snap $TESTDIR/snap + - cmp $TESTDIR/foo $TESTDIR/bar + - cmp $TESTDIR/foo $TESTDIR/snap + - rm $TESTDIR/foo $TESTDIR/bar $TESTDIR/snap diff --git a/qa/suites/rados/singleton-nomsgr/all/full-tiering.yaml b/qa/suites/rados/singleton-nomsgr/all/full-tiering.yaml new file mode 100644 index 000000000..a06221449 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/full-tiering.yaml @@ -0,0 +1,41 @@ +# verify #13098 fix +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] +overrides: + ceph: + log-ignorelist: + - is full + - overall HEALTH_ + - \(POOL_FULL\) + - \(POOL_NEAR_FULL\) + - \(CACHE_POOL_NO_HIT_SET\) + - \(CACHE_POOL_NEAR_FULL\) + - \(POOL_APP_NOT_ENABLED\) +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + conf: + global: + osd max object name len: 460 + osd max object namespace len: 64 +- exec: + client.0: + - ceph osd pool create ec-ca 1 1 + - ceph osd pool create ec 1 1 erasure default + - ceph osd pool application enable ec rados + - ceph osd tier add ec ec-ca + - ceph osd tier cache-mode ec-ca readproxy + - ceph osd tier set-overlay ec ec-ca + - ceph osd pool set ec-ca hit_set_type bloom + - ceph osd pool set-quota ec-ca max_bytes 20480000 + - ceph osd pool set-quota ec max_bytes 20480000 + - ceph osd pool set ec-ca target_max_bytes 20480000 + - timeout 30 rados -p ec-ca bench 30 write || true + - ceph osd pool set-quota ec-ca max_bytes 0 + - ceph osd pool set-quota ec max_bytes 0 diff --git a/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml b/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml new file mode 100644 index 000000000..5ed655324 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml @@ -0,0 +1,23 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, osd.3, osd.4, osd.5, osd.6, osd.7, osd.8, osd.9, client.0] +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + conf: + osd: +# we may land on ext4 + osd max object name len: 400 + osd max object namespace len: 64 + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_APP_NOT_ENABLED\) +- workunit: + clients: + all: + - rados/test_health_warnings.sh diff --git a/qa/suites/rados/singleton-nomsgr/all/large-omap-object-warnings.yaml b/qa/suites/rados/singleton-nomsgr/all/large-omap-object-warnings.yaml new file mode 100644 index 000000000..e1e9d34ef --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/large-omap-object-warnings.yaml @@ -0,0 +1,30 @@ +openstack: + - volumes: # attached to each instance + count: 2 + size: 10 # GB +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +overrides: + ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - \(OSDMAP_FLAGS\) + - \(OSD_FULL\) + - \(MDS_READ_ONLY\) + - \(POOL_APP_NOT_ENABLED\) + - large omap objects + - Large omap object found + - application not enabled + conf: + osd: + osd scrub backoff ratio: 0 + osd deep scrub large omap object value sum threshold: 8800000 + osd deep scrub large omap object key threshold: 20000 +tasks: +- install: +- ceph: +- workunit: + clients: + all: + - rados/test_large_omap_detection.py diff --git a/qa/suites/rados/singleton-nomsgr/all/lazy_omap_stats_output.yaml b/qa/suites/rados/singleton-nomsgr/all/lazy_omap_stats_output.yaml new file mode 100644 index 000000000..61c2fa663 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/lazy_omap_stats_output.yaml @@ -0,0 +1,18 @@ +openstack: + - volumes: # attached to each instance + count: 2 + size: 10 # GB +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force +- exec: + client.0: + - ceph_test_lazy_omap_stats diff --git a/qa/suites/rados/singleton-nomsgr/all/librados_hello_world.yaml b/qa/suites/rados/singleton-nomsgr/all/librados_hello_world.yaml new file mode 100644 index 000000000..0c0a071e9 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/librados_hello_world.yaml @@ -0,0 +1,24 @@ +roles: +- [mon.a, mds.a, mgr.x, osd.0, osd.1, client.0] +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- install: + extra_packages: + deb: + - libradosstriper-dev + - librados-dev + - libradospp-dev + rpm: + - libradosstriper-devel + - librados-devel + - libradospp-devel +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force +- workunit: + clients: + all: + - rados/test_librados_build.sh diff --git a/qa/suites/rados/singleton-nomsgr/all/msgr.yaml b/qa/suites/rados/singleton-nomsgr/all/msgr.yaml new file mode 100644 index 000000000..4eb376fcf --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/msgr.yaml @@ -0,0 +1,23 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- exec: + client.0: + - ceph_test_async_driver + - ceph_test_msgr +openstack: + - machine: + disk: 40 # GB + ram: 15000 # MB + cpus: 1 + volumes: # attached to each instance + count: 0 + size: 1 # GB +overrides: + ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + conf: + client: + debug ms: 20 diff --git a/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml b/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml new file mode 100644 index 000000000..15952b989 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/multi-backfill-reject.yaml @@ -0,0 +1,51 @@ +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +- - osd.3 + - osd.4 + - osd.5 +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - overall HEALTH_ + - \(PG_ + - \(OSD_ + - \(OBJECT_ + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd debug reject backfill probability: .3 + osd min pg log entries: 25 + osd max pg log entries: 100 + osd max object name len: 460 + osd max object namespace len: 64 +- exec: + client.0: + - sudo ceph osd pool create foo 64 + - sudo ceph osd pool application enable foo rados + - rados -p foo bench 60 write -b 1024 --no-cleanup + - sudo ceph osd pool set foo size 3 + - sudo ceph osd out 0 1 +- sleep: + duration: 60 +- exec: + client.0: + - sudo ceph osd in 0 1 +- sleep: + duration: 60 +- exec: + client.0: + - sudo ceph osd pool set foo size 2 +- sleep: + duration: 300 diff --git a/qa/suites/rados/singleton-nomsgr/all/osd_stale_reads.yaml b/qa/suites/rados/singleton-nomsgr/all/osd_stale_reads.yaml new file mode 100644 index 000000000..5beb2015f --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/osd_stale_reads.yaml @@ -0,0 +1,29 @@ +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] +overrides: + ceph: + log-ignorelist: + - \(OSD_DOWN\) + - \(POOL_APP_NOT_ENABLED\) + - \(SLOW_OPS\) + - \(PG_AVAILABILITY\) + - \(PG_DEGRADED\) + - application not enabled + - slow request + conf: + osd: + osd scrub backoff ratio: 0 + osd deep scrub large omap object value sum threshold: 8800000 + osd deep scrub large omap object key threshold: 20000 +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force +- exec: + client.0: + - ceph_test_osd_stale_read diff --git a/qa/suites/rados/singleton-nomsgr/all/pool-access.yaml b/qa/suites/rados/singleton-nomsgr/all/pool-access.yaml new file mode 100644 index 000000000..26d548430 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/pool-access.yaml @@ -0,0 +1,17 @@ +openstack: + - volumes: # attached to each instance + count: 2 + size: 10 # GB +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- workunit: + clients: + all: + - rados/test_pool_access.sh diff --git a/qa/suites/rados/singleton-nomsgr/all/recovery-unfound-found.yaml b/qa/suites/rados/singleton-nomsgr/all/recovery-unfound-found.yaml new file mode 100644 index 000000000..39788ddd2 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/recovery-unfound-found.yaml @@ -0,0 +1,60 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 +openstack: + - volumes: # attached to each instance + count: 2 + size: 20 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + fs: xfs + conf: + osd: + osd recovery sleep: .1 + osd objectstore: bluestore + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(OBJECT_ + - \(PG_ + - overall HEALTH +- exec: + osd.0: + - ceph osd pool create foo 32 + - ceph osd pool application enable foo foo + - rados -p foo bench 30 write -b 4096 --no-cleanup + - ceph osd set noup +- ceph.restart: + daemons: [osd.0] + wait-for-up: false + wait-for-healthy: false +- exec: + osd.0: + - sleep 5 + - rados -p foo bench 3 write -b 4096 --no-cleanup + - ceph osd unset noup + - sleep 10 + - ceph osd set noup +- ceph.restart: + daemons: [osd.1] + wait-for-up: false + wait-for-healthy: false +- exec: + osd.0: + - ceph osd out 0 + - sleep 10 + - ceph osd unset noup +- ceph.healthy: + wait-for-healthy: false # only wait for osds up and pgs clean, ignore misplaced +- exec: + osd.0: + - ceph osd in 0 +- ceph.healthy: diff --git a/qa/suites/rados/singleton-nomsgr/all/version-number-sanity.yaml b/qa/suites/rados/singleton-nomsgr/all/version-number-sanity.yaml new file mode 100644 index 000000000..daeeeef4e --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/all/version-number-sanity.yaml @@ -0,0 +1,15 @@ +roles: +- [mon.a, mds.a, mgr.x, osd.0, osd.1, client.0] +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force +- workunit: + clients: + all: + - rados/version_number_sanity.sh diff --git a/qa/suites/rados/singleton-nomsgr/mon_election b/qa/suites/rados/singleton-nomsgr/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/singleton-nomsgr/rados.yaml b/qa/suites/rados/singleton-nomsgr/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/singleton-nomsgr/supported-random-distro$ b/qa/suites/rados/singleton-nomsgr/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/singleton-nomsgr/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/singleton/% b/qa/suites/rados/singleton/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/singleton/% diff --git a/qa/suites/rados/singleton/.qa b/qa/suites/rados/singleton/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/singleton/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/singleton/all/.qa b/qa/suites/rados/singleton/all/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/singleton/all/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/singleton/all/admin-socket.yaml b/qa/suites/rados/singleton/all/admin-socket.yaml new file mode 100644 index 000000000..0d88e6f2a --- /dev/null +++ b/qa/suites/rados/singleton/all/admin-socket.yaml @@ -0,0 +1,28 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - client.a +openstack: + - volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- admin_socket: + osd.0: + version: + git_version: + help: + config show: + config help: + config set bluestore_csum_type xxhash64: + perf dump: + perf schema: + get_heap_property tcmalloc.max_total_thread_cache_byte || dump_metrics memory: + set_heap_property tcmalloc.max_total_thread_cache_bytes 67108864 || dump_metrics memory: + set_heap_property tcmalloc.max_total_thread_cache_bytes 33554432 || dump_metrics memory: diff --git a/qa/suites/rados/singleton/all/backfill-toofull.yaml b/qa/suites/rados/singleton/all/backfill-toofull.yaml new file mode 100644 index 000000000..d53e9a3b2 --- /dev/null +++ b/qa/suites/rados/singleton/all/backfill-toofull.yaml @@ -0,0 +1,38 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +tasks: +- install: +- ceph: + create_rbd_pool: false + pre-mgr-commands: + - sudo ceph config set mgr mgr/devicehealth/enable_monitoring false --force + log-ignorelist: + - Error + - overall HEALTH_ + - \(OBJECT_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_BACKFILLFULL\) + - \(POOL_NEARFULL\) + - \(SLOW_OPS\) + - \(TOO_FEW_PGS\) + - Monitor daemon marked osd\.[[:digit:]]+ down, but it is still running + - slow request + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd min pg log entries: 5 + osd max pg log entries: 5 +- backfill_toofull: diff --git a/qa/suites/rados/singleton/all/deduptool.yaml b/qa/suites/rados/singleton/all/deduptool.yaml new file mode 100644 index 000000000..3a34cb309 --- /dev/null +++ b/qa/suites/rados/singleton/all/deduptool.yaml @@ -0,0 +1,28 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - had wrong client addr + - had wrong cluster addr + - reached quota + - overall HEALTH_ + - \(POOL_FULL\) + - \(POOL_APP_NOT_ENABLED\) +- workunit: + clients: + all: + - rados/test_dedup_tool.sh diff --git a/qa/suites/rados/singleton/all/divergent_priors.yaml b/qa/suites/rados/singleton/all/divergent_priors.yaml new file mode 100644 index 000000000..81d68654a --- /dev/null +++ b/qa/suites/rados/singleton/all/divergent_priors.yaml @@ -0,0 +1,28 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB + +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OBJECT_ + - \(POOL_APP_NOT_ENABLED\) + +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force +- divergent_priors: diff --git a/qa/suites/rados/singleton/all/divergent_priors2.yaml b/qa/suites/rados/singleton/all/divergent_priors2.yaml new file mode 100644 index 000000000..baac3110c --- /dev/null +++ b/qa/suites/rados/singleton/all/divergent_priors2.yaml @@ -0,0 +1,28 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB + +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OBJECT_ + - \(POOL_APP_NOT_ENABLED\) + +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force +- divergent_priors2: diff --git a/qa/suites/rados/singleton/all/dump-stuck.yaml b/qa/suites/rados/singleton/all/dump-stuck.yaml new file mode 100644 index 000000000..f561795bd --- /dev/null +++ b/qa/suites/rados/singleton/all/dump-stuck.yaml @@ -0,0 +1,22 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 +openstack: + - volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_APP_NOT_ENABLED\) +- dump_stuck: diff --git a/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml b/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml new file mode 100644 index 000000000..31724f9e8 --- /dev/null +++ b/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml @@ -0,0 +1,37 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +tasks: +- install: +- ceph: + create_rbd_pool: false + pre-mgr-commands: + - sudo ceph config set mgr mgr/devicehealth/enable_monitoring false --force + log-ignorelist: + - \(OBJECT_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(SLOW_OPS\) + - deep-scrub + - missing + - overall HEALTH_ + - repair + - slow request + - unfound + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd min pg log entries: 5 + osd max pg log entries: 5 +- ec_inconsistent_hinfo: diff --git a/qa/suites/rados/singleton/all/ec-lost-unfound.yaml b/qa/suites/rados/singleton/all/ec-lost-unfound.yaml new file mode 100644 index 000000000..d397b005c --- /dev/null +++ b/qa/suites/rados/singleton/all/ec-lost-unfound.yaml @@ -0,0 +1,30 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +tasks: +- install: +- ceph: + create_rbd_pool: false + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - objects unfound and apparently lost + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OBJECT_ + - \(SLOW_OPS\) + - slow request + - \(POOL_APP_NOT_ENABLED\) +- ec_lost_unfound: diff --git a/qa/suites/rados/singleton/all/erasure-code-nonregression.yaml b/qa/suites/rados/singleton/all/erasure-code-nonregression.yaml new file mode 100644 index 000000000..e8201ee0b --- /dev/null +++ b/qa/suites/rados/singleton/all/erasure-code-nonregression.yaml @@ -0,0 +1,17 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- workunit: + clients: + all: + - erasure-code/encode-decode-non-regression.sh diff --git a/qa/suites/rados/singleton/all/lost-unfound-delete.yaml b/qa/suites/rados/singleton/all/lost-unfound-delete.yaml new file mode 100644 index 000000000..a6b68cd50 --- /dev/null +++ b/qa/suites/rados/singleton/all/lost-unfound-delete.yaml @@ -0,0 +1,28 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - objects unfound and apparently lost + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OBJECT_ + - \(SLOW_OPS\) + - slow request + - \(POOL_APP_NOT_ENABLED\) +- rep_lost_unfound_delete: diff --git a/qa/suites/rados/singleton/all/lost-unfound.yaml b/qa/suites/rados/singleton/all/lost-unfound.yaml new file mode 100644 index 000000000..4010a5208 --- /dev/null +++ b/qa/suites/rados/singleton/all/lost-unfound.yaml @@ -0,0 +1,28 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - objects unfound and apparently lost + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OBJECT_ + - \(SLOW_OPS\) + - slow request + - \(POOL_APP_NOT_ENABLED\) +- lost_unfound: diff --git a/qa/suites/rados/singleton/all/max-pg-per-osd.from-mon.yaml b/qa/suites/rados/singleton/all/max-pg-per-osd.from-mon.yaml new file mode 100644 index 000000000..e7eded31e --- /dev/null +++ b/qa/suites/rados/singleton/all/max-pg-per-osd.from-mon.yaml @@ -0,0 +1,30 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 +openstack: + - volumes: # attached to each instance + count: 2 + size: 10 # GB +overrides: + ceph: + create_rbd_pool: False + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + conf: + mon: + osd pool default size: 2 + osd: + mon max pg per osd : 2 + osd max pg per osd hard ratio : 1 + log-ignorelist: + - \(TOO_FEW_PGS\) + - \(PENDING_CREATING_PGS\) + - \(POOL_APP_NOT_ENABLED\) +tasks: +- install: +- ceph: +- osd_max_pg_per_osd: + test_create_from_mon: True + pg_num: 2 diff --git a/qa/suites/rados/singleton/all/max-pg-per-osd.from-primary.yaml b/qa/suites/rados/singleton/all/max-pg-per-osd.from-primary.yaml new file mode 100644 index 000000000..dc7c3f0f5 --- /dev/null +++ b/qa/suites/rados/singleton/all/max-pg-per-osd.from-primary.yaml @@ -0,0 +1,35 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + create_rbd_pool: False + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + conf: + mon: + osd pool default size: 2 + osd: + mon max pg per osd : 1 + osd max pg per osd hard ratio : 1 + log-ignorelist: + - \(TOO_FEW_PGS\) + - \(PG_ + - \(PENDING_CREATING_PGS\) + - \(POOL_APP_NOT_ENABLED\) +tasks: +- install: +- ceph: +- osd_max_pg_per_osd: + test_create_from_mon: False + pg_num: 1 + pool_size: 2 + from_primary: True diff --git a/qa/suites/rados/singleton/all/max-pg-per-osd.from-replica.yaml b/qa/suites/rados/singleton/all/max-pg-per-osd.from-replica.yaml new file mode 100644 index 000000000..ee0dae1d4 --- /dev/null +++ b/qa/suites/rados/singleton/all/max-pg-per-osd.from-replica.yaml @@ -0,0 +1,35 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + create_rbd_pool: False + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + conf: + mon: + osd pool default size: 2 + osd: + mon max pg per osd : 1 + osd max pg per osd hard ratio : 1 + log-ignorelist: + - \(TOO_FEW_PGS\) + - \(PG_ + - \(PENDING_CREATING_PGS\) + - \(POOL_APP_NOT_ENABLED\) +tasks: +- install: +- ceph: +- osd_max_pg_per_osd: + test_create_from_mon: False + pg_num: 1 + pool_size: 2 + from_primary: False diff --git a/qa/suites/rados/singleton/all/mon-auth-caps.yaml b/qa/suites/rados/singleton/all/mon-auth-caps.yaml new file mode 100644 index 000000000..264dc535a --- /dev/null +++ b/qa/suites/rados/singleton/all/mon-auth-caps.yaml @@ -0,0 +1,21 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - overall HEALTH_ + - \(AUTH_BAD_CAPS\) + - \(POOL_APP_NOT_ENABLED\) +- workunit: + clients: + all: + - mon/auth_caps.sh + - mon/auth_key_rotation.sh diff --git a/qa/suites/rados/singleton/all/mon-config-key-caps.yaml b/qa/suites/rados/singleton/all/mon-config-key-caps.yaml new file mode 100644 index 000000000..c475a2080 --- /dev/null +++ b/qa/suites/rados/singleton/all/mon-config-key-caps.yaml @@ -0,0 +1,20 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - overall HEALTH_ + - \(AUTH_BAD_CAPS\) + - \(POOL_APP_NOT_ENABLED\) +- workunit: + clients: + all: + - mon/test_config_key_caps.sh diff --git a/qa/suites/rados/singleton/all/mon-config-keys.yaml b/qa/suites/rados/singleton/all/mon-config-keys.yaml new file mode 100644 index 000000000..117b6d055 --- /dev/null +++ b/qa/suites/rados/singleton/all/mon-config-keys.yaml @@ -0,0 +1,22 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force +- workunit: + clients: + all: + - mon/test_mon_config_key.py diff --git a/qa/suites/rados/singleton/all/mon-config.yaml b/qa/suites/rados/singleton/all/mon-config.yaml new file mode 100644 index 000000000..ab1eb81b0 --- /dev/null +++ b/qa/suites/rados/singleton/all/mon-config.yaml @@ -0,0 +1,24 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- workunit: + clients: + all: + - mon/config.sh diff --git a/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled b/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled new file mode 100644 index 000000000..e1f79c168 --- /dev/null +++ b/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled @@ -0,0 +1,154 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - osd.8 + - osd.9 + - osd.10 + - osd.11 + - osd.12 + - osd.13 + - osd.14 + - client.0 +openstack: + - volumes: # attached to each instance + count: 4 + size: 1 # GB +overrides: + ceph: + conf: + mon: + mon memory target: 134217728 # reduced to 128_M + rocksdb cache size: 67108864 # reduced to 64_M + mon osd cache size: 100000 + mon osd cache size min: 134217728 + osd: + osd memory target: 1610612736 # reduced to 1.5_G + osd objectstore: bluestore + debug bluestore: 20 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 9 + +tasks: +- install: + branch: wip-sseshasa2-testing-2019-07-30-1825 # change as appropriate +- ceph: + create_rbd_pool: false + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(OBJECT_ + - \(SLOW_OPS\) + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) + - slow request +- interactive: +- parallel: + - log-mon-rss + - stress-tasks + - benchload +- exec: + client.0: + - "ceph_test_mon_memory_target 134217728" # mon memory target + - "ceph_test_mon_rss_usage 134217728" +log-mon-rss: +- background_exec: + client.0: + - while true + - do /usr/bin/ceph_test_log_rss_usage ceph-mon >> /var/log/ceph/ceph-mon-rss-usage.log + - sleep 300 # log rss usage every 5 mins. May be modified accordingly + - done +- exec: + client.0: + - sleep 37860 # sum total of the radosbench test times below plus 60 secs +benchload: # The total radosbench test below translates to 10.5 hrs +- full_sequential: + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 + - radosbench: + clients: [client.0] + time: 1800 +stress-tasks: +- thrashosds: + op_delay: 1 + bdev_inject_crash: 1 + bdev_inject_crash_probability: .8 + chance_down: 80 + chance_pgnum_grow: 3 + chance_pgpnum_fix: 1 + chance_thrash_cluster_full: 0 + chance_thrash_pg_upmap: 3 + chance_thrash_pg_upmap_items: 3 + min_in: 2 diff --git a/qa/suites/rados/singleton/all/osd-backfill.yaml b/qa/suites/rados/singleton/all/osd-backfill.yaml new file mode 100644 index 000000000..92f5959b5 --- /dev/null +++ b/qa/suites/rados/singleton/all/osd-backfill.yaml @@ -0,0 +1,29 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OBJECT_ + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd min pg log entries: 5 +- osd_backfill: diff --git a/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml b/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml new file mode 100644 index 000000000..6d0955c73 --- /dev/null +++ b/qa/suites/rados/singleton/all/osd-recovery-incomplete.yaml @@ -0,0 +1,31 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OBJECT_ + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd min pg log entries: 5 + osd_fast_fail_on_connection_refused: false +- osd_recovery.test_incomplete_pgs: diff --git a/qa/suites/rados/singleton/all/osd-recovery.yaml b/qa/suites/rados/singleton/all/osd-recovery.yaml new file mode 100644 index 000000000..9e33b3c39 --- /dev/null +++ b/qa/suites/rados/singleton/all/osd-recovery.yaml @@ -0,0 +1,33 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OBJECT_DEGRADED\) + - \(SLOW_OPS\) + - slow request + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd min pg log entries: 5 + osd pg log trim min: 0 + osd_fast_fail_on_connection_refused: false +- osd_recovery: diff --git a/qa/suites/rados/singleton/all/peer.yaml b/qa/suites/rados/singleton/all/peer.yaml new file mode 100644 index 000000000..f01473b0f --- /dev/null +++ b/qa/suites/rados/singleton/all/peer.yaml @@ -0,0 +1,28 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + config: + global: + osd pool default min size : 1 + log-ignorelist: + - objects unfound and apparently lost + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_APP_NOT_ENABLED\) +- peer: diff --git a/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml b/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml new file mode 100644 index 000000000..e4b48189f --- /dev/null +++ b/qa/suites/rados/singleton/all/pg-autoscaler-progress-off.yaml @@ -0,0 +1,45 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 +- - mon.b + - mon.c + - osd.4 + - osd.5 + - osd.6 + - osd.7 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +tasks: +- install: +- ceph: + create_rbd_pool: false + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(OBJECT_ + - \(SLOW_OPS\) + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) + - slow request + - \(POOL_APP_NOT_ENABLED\) +- exec: + client.0: + - ceph progress off + +- workunit: + clients: + all: + - mon/pg_autoscaler.sh diff --git a/qa/suites/rados/singleton/all/pg-autoscaler.yaml b/qa/suites/rados/singleton/all/pg-autoscaler.yaml new file mode 100644 index 000000000..a03c2d521 --- /dev/null +++ b/qa/suites/rados/singleton/all/pg-autoscaler.yaml @@ -0,0 +1,37 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - osd.4 + - osd.5 + - client.0 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +tasks: +- install: +- ceph: + create_rbd_pool: false + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(OBJECT_ + - \(SLOW_OPS\) + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) + - slow request + - \(POOL_APP_NOT_ENABLED\) +- workunit: + clients: + all: + - mon/pg_autoscaler.sh diff --git a/qa/suites/rados/singleton/all/pg-removal-interruption.yaml b/qa/suites/rados/singleton/all/pg-removal-interruption.yaml new file mode 100644 index 000000000..0dd0fb38d --- /dev/null +++ b/qa/suites/rados/singleton/all/pg-removal-interruption.yaml @@ -0,0 +1,37 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - slow request + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_APP_NOT_ENABLED\) +- exec: + client.0: + - sudo ceph osd pool create foo 128 128 + - sudo ceph osd pool application enable foo rados + - sleep 5 + - sudo ceph tell osd.0 injectargs -- --osd-inject-failure-on-pg-removal + - sudo ceph osd pool delete foo foo --yes-i-really-really-mean-it +- ceph.wait_for_failure: [osd.0] +- exec: + client.0: + - sudo ceph osd down 0 +- ceph.restart: [osd.0] +- ceph.healthy: diff --git a/qa/suites/rados/singleton/all/radostool.yaml b/qa/suites/rados/singleton/all/radostool.yaml new file mode 100644 index 000000000..6a3998ed2 --- /dev/null +++ b/qa/suites/rados/singleton/all/radostool.yaml @@ -0,0 +1,28 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - had wrong client addr + - had wrong cluster addr + - reached quota + - overall HEALTH_ + - \(POOL_FULL\) + - \(POOL_APP_NOT_ENABLED\) +- workunit: + clients: + all: + - rados/test_rados_tool.sh diff --git a/qa/suites/rados/singleton/all/random-eio.yaml b/qa/suites/rados/singleton/all/random-eio.yaml new file mode 100644 index 000000000..258ae90ed --- /dev/null +++ b/qa/suites/rados/singleton/all/random-eio.yaml @@ -0,0 +1,46 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 +- - osd.3 + - osd.4 + - osd.5 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - missing primary copy of + - objects unfound and apparently lost + - had a read error + - overall HEALTH_ + - \(POOL_APP_NOT_ENABLED\) + - \(PG_DEGRADED\) + - \(OSD_TOO_MANY_REPAIRS\) +- full_sequential: + - exec: + client.0: + - sudo ceph tell osd.1 injectargs -- --filestore_debug_random_read_err=0.33 + - sudo ceph tell osd.1 injectargs -- --bluestore_debug_random_read_err=0.33 + - sudo ceph osd pool create test 16 16 + - sudo ceph osd pool set test size 3 + - sudo ceph pg dump pgs --format=json-pretty + - radosbench: + clients: [client.0] + time: 360 + type: rand + objectsize: 1048576 + pool: test + create_pool: false + - exec: + client.0: + - sudo ceph tell osd.1 injectargs -- --filestore_debug_random_read_err=0.0 + - sudo ceph tell osd.1 injectargs -- --bluestore_debug_random_read_err=0.0 diff --git a/qa/suites/rados/singleton/all/rebuild-mondb.yaml b/qa/suites/rados/singleton/all/rebuild-mondb.yaml new file mode 100644 index 000000000..0c7de00c8 --- /dev/null +++ b/qa/suites/rados/singleton/all/rebuild-mondb.yaml @@ -0,0 +1,38 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - no reply from + - overall HEALTH_ + - \(MON_DOWN\) + - \(MGR_DOWN\) + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_APP_NOT_ENABLED\) + conf: + mon: + debug auth: 30 +- full_sequential: + - radosbench: + clients: [client.0] + time: 30 + - rebuild_mondb: + - radosbench: + clients: [client.0] + time: 30 diff --git a/qa/suites/rados/singleton/all/recovery-preemption.yaml b/qa/suites/rados/singleton/all/recovery-preemption.yaml new file mode 100644 index 000000000..ce51688e5 --- /dev/null +++ b/qa/suites/rados/singleton/all/recovery-preemption.yaml @@ -0,0 +1,60 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 +openstack: + - volumes: # attached to each instance + count: 3 + size: 20 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + conf: + osd: + osd recovery sleep: .1 + osd min pg log entries: 10 + osd max pg log entries: 1000 + osd_target_pg_log_entries_per_osd: 0 + osd pg log trim min: 10 + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(OBJECT_ + - \(PG_ + - \(SLOW_OPS\) + - overall HEALTH + - slow request +- exec: + osd.0: + - ceph osd pool create foo 128 + - ceph osd pool application enable foo foo + - sleep 5 +- ceph.healthy: +- exec: + osd.0: + - rados -p foo bench 30 write -b 4096 --no-cleanup + - ceph osd out 0 + - sleep 5 + - ceph osd set noup +- ceph.restart: + daemons: [osd.1] + wait-for-up: false + wait-for-healthy: false +- exec: + osd.0: + - rados -p foo bench 3 write -b 4096 --no-cleanup + - ceph osd unset noup + - sleep 10 + - for f in 0 1 2 3 ; do sudo ceph daemon osd.$f config set osd_recovery_sleep 0 ; sudo ceph daemon osd.$f config set osd_recovery_max_active 20 ; done +- ceph.healthy: +- exec: + osd.0: + - egrep '(defer backfill|defer recovery)' /var/log/ceph/ceph-osd.*.log diff --git a/qa/suites/rados/singleton/all/resolve_stuck_peering.yaml b/qa/suites/rados/singleton/all/resolve_stuck_peering.yaml new file mode 100644 index 000000000..41a011bd4 --- /dev/null +++ b/qa/suites/rados/singleton/all/resolve_stuck_peering.yaml @@ -0,0 +1,19 @@ +roles: +- [mon.a, mgr.x] +- [osd.0, osd.1, osd.2, client.0] + +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + fs: xfs + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OBJECT_DEGRADED\) + - \(POOL_APP_NOT_ENABLED\) +- resolve_stuck_peering: + diff --git a/qa/suites/rados/singleton/all/test-crash.yaml b/qa/suites/rados/singleton/all/test-crash.yaml new file mode 100644 index 000000000..ec227cec7 --- /dev/null +++ b/qa/suites/rados/singleton/all/test-crash.yaml @@ -0,0 +1,21 @@ +roles: + - [client.0, mon.a, mgr.x, osd.0, osd.1, osd.2] + +tasks: + - install: + - ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - Reduced data availability + - OSD_.*DOWN + - \(RECENT_CRASH\) + - \(POOL_APP_NOT_ENABLED\) + - workunit: + clients: + client.0: + - rados/test_crash.sh + - ceph.restart: [osd.*] + - exec: + mon.a: + - find $TESTDIR/archive/coredump -type f -exec rm -f {} \; diff --git a/qa/suites/rados/singleton/all/test-noautoscale-flag.yaml b/qa/suites/rados/singleton/all/test-noautoscale-flag.yaml new file mode 100644 index 000000000..039300f7f --- /dev/null +++ b/qa/suites/rados/singleton/all/test-noautoscale-flag.yaml @@ -0,0 +1,40 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + create_rbd_pool: false + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + conf: + mon: + osd pool default pg autoscale mode: on + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(OBJECT_ + - \(SLOW_OPS\) + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) + - slow request + - \(POOL_APP_NOT_ENABLED\) +tasks: +- install: +- ceph: +- workunit: + clients: + all: + - mon/test_noautoscale_flag.sh diff --git a/qa/suites/rados/singleton/all/thrash-backfill-full.yaml b/qa/suites/rados/singleton/all/thrash-backfill-full.yaml new file mode 100644 index 000000000..a9049560d --- /dev/null +++ b/qa/suites/rados/singleton/all/thrash-backfill-full.yaml @@ -0,0 +1,53 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 +- - osd.3 + - osd.4 + - osd.5 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +override: + ceph: + conf: + mon: + osd pool default size: 3 + osd min pg log entries: 5 + osd max pg log entries: 10 +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - missing primary copy of + - objects unfound and apparently lost + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(SLOW_OPS\) + - \(PG_ + - \(OBJECT_MISPLACED\) + - \(OSD_ + - \(OBJECT_ + - \(TOO_FEW_PGS\) + - \(POOL_BACKFILLFULL\) + - slow request + - \(POOL_APP_NOT_ENABLED\) +- thrashosds: + op_delay: 30 + clean_interval: 120 + chance_down: .75 + min_live: 5 + min_in: 5 + chance_test_backfill_full: .5 +- radosbench: + clients: [client.0] + time: 1800 + type: rand + objectsize: 1048576 diff --git a/qa/suites/rados/singleton/all/thrash-eio.yaml b/qa/suites/rados/singleton/all/thrash-eio.yaml new file mode 100644 index 000000000..52e0cc51e --- /dev/null +++ b/qa/suites/rados/singleton/all/thrash-eio.yaml @@ -0,0 +1,50 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 +- - osd.3 + - osd.4 + - osd.5 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +overrides: + ceph: + conf: + mon: + osd pool default size: 3 +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - missing primary copy of + - objects unfound and apparently lost + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(SLOW_OPS\) + - \(PG_ + - \(OBJECT_MISPLACED\) + - \(OSD_ + - \(OBJECT_ + - \(TOO_FEW_PGS\) + - slow request + - \(POOL_APP_NOT_ENABLED\) +- thrashosds: + op_delay: 30 + clean_interval: 120 + chance_down: .5 + random_eio: .33 + min_live: 5 + min_in: 5 +- radosbench: + clients: [client.0] + time: 720 + type: rand + objectsize: 1048576 diff --git a/qa/suites/rados/singleton/all/thrash-rados/+ b/qa/suites/rados/singleton/all/thrash-rados/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/singleton/all/thrash-rados/+ diff --git a/qa/suites/rados/singleton/all/thrash-rados/.qa b/qa/suites/rados/singleton/all/thrash-rados/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/singleton/all/thrash-rados/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/singleton/all/thrash-rados/thrash-rados.yaml b/qa/suites/rados/singleton/all/thrash-rados/thrash-rados.yaml new file mode 100644 index 000000000..b3b54e173 --- /dev/null +++ b/qa/suites/rados/singleton/all/thrash-rados/thrash-rados.yaml @@ -0,0 +1,28 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 +- - osd.3 + - osd.4 + - osd.5 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + log-ignorelist: + - but it is still running + - \(POOL_APP_NOT_ENABLED\) +- thrashosds: + op_delay: 30 + clean_interval: 120 + chance_down: .5 +- workunit: + clients: + all: + - rados/load-gen-mix-small.sh diff --git a/qa/suites/rados/singleton/all/thrash-rados/thrashosds-health.yaml b/qa/suites/rados/singleton/all/thrash-rados/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rados/singleton/all/thrash-rados/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rados/singleton/all/thrash_cache_writeback_proxy_none.yaml b/qa/suites/rados/singleton/all/thrash_cache_writeback_proxy_none.yaml new file mode 100644 index 000000000..e58fb4ef4 --- /dev/null +++ b/qa/suites/rados/singleton/all/thrash_cache_writeback_proxy_none.yaml @@ -0,0 +1,71 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 +- - osd.3 + - osd.4 + - osd.5 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 30 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + log-ignorelist: + - but it is still running + - slow request + - overall HEALTH_ + - \(CACHE_POOL_ + - \(POOL_APP_NOT_ENABLED\) +- exec: + client.0: + - sudo ceph osd pool create base 4 + - sudo ceph osd pool application enable base rados + - sudo ceph osd pool create cache 4 + - sudo ceph osd tier add base cache + - sudo ceph osd tier cache-mode cache writeback + - sudo ceph osd tier set-overlay base cache + - sudo ceph osd pool set cache hit_set_type bloom + - sudo ceph osd pool set cache hit_set_count 8 + - sudo ceph osd pool set cache hit_set_period 60 + - sudo ceph osd pool set cache target_max_objects 500 +- background_exec: + mon.a: + - while true + - do sleep 30 + - sudo ceph osd pool set cache cache_target_full_ratio .001 + - echo cache-try-flush-evict-all + - rados -p cache cache-try-flush-evict-all + - sleep 5 + - echo cache-flush-evict-all + - rados -p cache cache-flush-evict-all + - sleep 5 + - echo remove overlay + - sudo ceph osd tier remove-overlay base + - sleep 20 + # Disabled due to https://tracker.ceph.com/issues/46323 + #- echo add writeback overlay + #- sudo ceph osd tier cache-mode cache writeback + #- sudo ceph osd pool set cache cache_target_full_ratio .8 + #- sudo ceph osd tier set-overlay base cache + #- sleep 30 + #- sudo ceph osd tier cache-mode cache readproxy + - done +- rados: + clients: [client.0] + pools: [base] + max_seconds: 600 + ops: 400000 + objects: 10000 + size: 1024 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 diff --git a/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml b/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml new file mode 100644 index 000000000..04d3969b2 --- /dev/null +++ b/qa/suites/rados/singleton/all/watch-notify-same-primary.yaml @@ -0,0 +1,35 @@ +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + config: + global: + osd pool default min size : 1 + client: + debug ms: 1 + debug objecter: 20 + debug rados: 20 + log-ignorelist: + - objects unfound and apparently lost + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(OBJECT_DEGRADED\) + - \(POOL_APP_NOT_ENABLED\) +- watch_notify_same_primary: + clients: [client.0] diff --git a/qa/suites/rados/singleton/mon_election b/qa/suites/rados/singleton/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/singleton/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/singleton/msgr b/qa/suites/rados/singleton/msgr new file mode 120000 index 000000000..57bee80db --- /dev/null +++ b/qa/suites/rados/singleton/msgr @@ -0,0 +1 @@ +.qa/msgr
\ No newline at end of file diff --git a/qa/suites/rados/singleton/msgr-failures/.qa b/qa/suites/rados/singleton/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/singleton/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/singleton/msgr-failures/few.yaml b/qa/suites/rados/singleton/msgr-failures/few.yaml new file mode 100644 index 000000000..8fd638744 --- /dev/null +++ b/qa/suites/rados/singleton/msgr-failures/few.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME + - \(MON_DOWN\) diff --git a/qa/suites/rados/singleton/msgr-failures/many.yaml b/qa/suites/rados/singleton/msgr-failures/many.yaml new file mode 100644 index 000000000..206da3ec1 --- /dev/null +++ b/qa/suites/rados/singleton/msgr-failures/many.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 1000 + mon mgr beacon grace: 90 + mon client hunt interval max multiple: 2 + mon client directed command retry: 5 + mgr: + debug monc: 10 + log-ignorelist: + - \(OSD_SLOW_PING_TIME + - \(MON_DOWN\) diff --git a/qa/suites/rados/singleton/msgr-failures/none.yaml b/qa/suites/rados/singleton/msgr-failures/none.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/singleton/msgr-failures/none.yaml diff --git a/qa/suites/rados/singleton/objectstore b/qa/suites/rados/singleton/objectstore new file mode 120000 index 000000000..848c65f9e --- /dev/null +++ b/qa/suites/rados/singleton/objectstore @@ -0,0 +1 @@ +.qa/objectstore_debug
\ No newline at end of file diff --git a/qa/suites/rados/singleton/rados.yaml b/qa/suites/rados/singleton/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/singleton/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/singleton/supported-random-distro$ b/qa/suites/rados/singleton/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/singleton/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/standalone/% b/qa/suites/rados/standalone/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/standalone/% diff --git a/qa/suites/rados/standalone/.qa b/qa/suites/rados/standalone/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/standalone/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/standalone/supported-random-distro$ b/qa/suites/rados/standalone/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/standalone/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/standalone/workloads/.qa b/qa/suites/rados/standalone/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/standalone/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/standalone/workloads/c2c.yaml b/qa/suites/rados/standalone/workloads/c2c.yaml new file mode 100644 index 000000000..9a0dfce94 --- /dev/null +++ b/qa/suites/rados/standalone/workloads/c2c.yaml @@ -0,0 +1,18 @@ +arch: x86_64 +roles: +- - mon.a + - mgr.x + - osd.0 + - client.0 +tasks: +- install: + extra_system_packages: + rpm: + - perf + deb: + - linux-tools-generic +- workunit: + basedir: qa/standalone + clients: + all: + - c2c diff --git a/qa/suites/rados/standalone/workloads/crush.yaml b/qa/suites/rados/standalone/workloads/crush.yaml new file mode 100644 index 000000000..a62a0dd81 --- /dev/null +++ b/qa/suites/rados/standalone/workloads/crush.yaml @@ -0,0 +1,18 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- workunit: + basedir: qa/standalone + clients: + all: + - crush diff --git a/qa/suites/rados/standalone/workloads/erasure-code.yaml b/qa/suites/rados/standalone/workloads/erasure-code.yaml new file mode 100644 index 000000000..7d79753ce --- /dev/null +++ b/qa/suites/rados/standalone/workloads/erasure-code.yaml @@ -0,0 +1,18 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- workunit: + basedir: qa/standalone + clients: + all: + - erasure-code diff --git a/qa/suites/rados/standalone/workloads/mgr.yaml b/qa/suites/rados/standalone/workloads/mgr.yaml new file mode 100644 index 000000000..997fae865 --- /dev/null +++ b/qa/suites/rados/standalone/workloads/mgr.yaml @@ -0,0 +1,18 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- workunit: + basedir: qa/standalone + clients: + all: + - mgr diff --git a/qa/suites/rados/standalone/workloads/misc.yaml b/qa/suites/rados/standalone/workloads/misc.yaml new file mode 100644 index 000000000..4aa9ee27e --- /dev/null +++ b/qa/suites/rados/standalone/workloads/misc.yaml @@ -0,0 +1,18 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- workunit: + basedir: qa/standalone + clients: + all: + - misc diff --git a/qa/suites/rados/standalone/workloads/mon-stretch.yaml b/qa/suites/rados/standalone/workloads/mon-stretch.yaml new file mode 100644 index 000000000..d039126c5 --- /dev/null +++ b/qa/suites/rados/standalone/workloads/mon-stretch.yaml @@ -0,0 +1,18 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- workunit: + basedir: qa/standalone + clients: + all: + - mon-stretch
\ No newline at end of file diff --git a/qa/suites/rados/standalone/workloads/mon.yaml b/qa/suites/rados/standalone/workloads/mon.yaml new file mode 100644 index 000000000..c19606f42 --- /dev/null +++ b/qa/suites/rados/standalone/workloads/mon.yaml @@ -0,0 +1,18 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- workunit: + basedir: qa/standalone + clients: + all: + - mon diff --git a/qa/suites/rados/standalone/workloads/osd-backfill.yaml b/qa/suites/rados/standalone/workloads/osd-backfill.yaml new file mode 100644 index 000000000..b61e27289 --- /dev/null +++ b/qa/suites/rados/standalone/workloads/osd-backfill.yaml @@ -0,0 +1,18 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- workunit: + basedir: qa/standalone + clients: + all: + - osd-backfill diff --git a/qa/suites/rados/standalone/workloads/osd.yaml b/qa/suites/rados/standalone/workloads/osd.yaml new file mode 100644 index 000000000..e28b52210 --- /dev/null +++ b/qa/suites/rados/standalone/workloads/osd.yaml @@ -0,0 +1,18 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- workunit: + basedir: qa/standalone + clients: + all: + - osd diff --git a/qa/suites/rados/standalone/workloads/scrub.yaml b/qa/suites/rados/standalone/workloads/scrub.yaml new file mode 100644 index 000000000..7f6fad406 --- /dev/null +++ b/qa/suites/rados/standalone/workloads/scrub.yaml @@ -0,0 +1,18 @@ +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 +openstack: + - volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- workunit: + basedir: qa/standalone + clients: + all: + - scrub diff --git a/qa/suites/rados/thrash-erasure-code-big/% b/qa/suites/rados/thrash-erasure-code-big/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/% diff --git a/qa/suites/rados/thrash-erasure-code-big/.qa b/qa/suites/rados/thrash-erasure-code-big/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/ceph.yaml b/qa/suites/rados/thrash-erasure-code-big/ceph.yaml new file mode 120000 index 000000000..a2fd139cb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/ceph.yaml @@ -0,0 +1 @@ +../thrash/ceph.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/cluster/+ b/qa/suites/rados/thrash-erasure-code-big/cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/cluster/+ diff --git a/qa/suites/rados/thrash-erasure-code-big/cluster/.qa b/qa/suites/rados/thrash-erasure-code-big/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/cluster/12-osds.yaml b/qa/suites/rados/thrash-erasure-code-big/cluster/12-osds.yaml new file mode 100644 index 000000000..1c45ee352 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/cluster/12-osds.yaml @@ -0,0 +1,4 @@ +roles: +- [osd.0, osd.1, osd.2, osd.3, client.0, mon.a] +- [osd.4, osd.5, osd.6, osd.7, mon.b, mgr.x] +- [osd.8, osd.9, osd.10, osd.11, mon.c] diff --git a/qa/suites/rados/thrash-erasure-code-big/cluster/openstack.yaml b/qa/suites/rados/thrash-erasure-code-big/cluster/openstack.yaml new file mode 100644 index 000000000..e559d9126 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/cluster/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/suites/rados/thrash-erasure-code-big/mon_election b/qa/suites/rados/thrash-erasure-code-big/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/msgr-failures b/qa/suites/rados/thrash-erasure-code-big/msgr-failures new file mode 120000 index 000000000..03689aa44 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/msgr-failures @@ -0,0 +1 @@ +../thrash/msgr-failures
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/objectstore b/qa/suites/rados/thrash-erasure-code-big/objectstore new file mode 120000 index 000000000..848c65f9e --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/objectstore @@ -0,0 +1 @@ +.qa/objectstore_debug
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/rados.yaml b/qa/suites/rados/thrash-erasure-code-big/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/recovery-overrides b/qa/suites/rados/thrash-erasure-code-big/recovery-overrides new file mode 120000 index 000000000..1957f2c42 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/recovery-overrides @@ -0,0 +1 @@ +../thrash/2-recovery-overrides
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/supported-random-distro$ b/qa/suites/rados/thrash-erasure-code-big/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/.qa b/qa/suites/rados/thrash-erasure-code-big/thrashers/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/careful.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/careful.yaml new file mode 100644 index 000000000..df0a14500 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/careful.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - slow request + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 6 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + min_in: 8 + aggressive_pg_num_changes: false diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/default.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/default.yaml new file mode 100644 index 000000000..09b6c1782 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/default.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - slow request + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd debug reject backfill probability: .1 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 6 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + min_in: 8 + chance_bluestore_reshard: 1 + bluestore_new_sharding: random diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml new file mode 100644 index 000000000..a36155609 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/fastread.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) + conf: + mon: + osd pool default ec fast read: true + osd: + osd debug reject backfill probability: .1 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + min_in: 4 diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/mapgap.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/mapgap.yaml new file mode 100644 index 000000000..6cf4dc930 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/mapgap.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - osd_map_cache_size + - \(POOL_APP_NOT_ENABLED\) + conf: + mon: + mon min osdmap epochs: 2 + osd: + osd map cache size: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 6 +tasks: +- thrashosds: + timeout: 1800 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + chance_test_map_discontinuity: 0.5 + min_in: 8 diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml new file mode 100644 index 000000000..794e994f2 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/morepggrow.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 9 + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 3 + chance_pgpnum_fix: 1 + min_in: 8 diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashers/pggrow.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashers/pggrow.yaml new file mode 100644 index 000000000..15be6b43b --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/thrashers/pggrow.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 6 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 + min_in: 8 diff --git a/qa/suites/rados/thrash-erasure-code-big/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-big/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/workloads/.qa b/qa/suites/rados/thrash-erasure-code-big/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=jerasure-k=4-m=2.yaml b/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=jerasure-k=4-m=2.yaml new file mode 120000 index 000000000..c18bec161 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=jerasure-k=4-m=2.yaml @@ -0,0 +1 @@ +.qa/erasure-code/ec-rados-plugin=jerasure-k=4-m=2.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml b/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml new file mode 120000 index 000000000..d66fd7960 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-big/workloads/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml @@ -0,0 +1 @@ +.qa/erasure-code/ec-rados-plugin=lrc-k=4-m=2-l=3.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/% b/qa/suites/rados/thrash-erasure-code-isa/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/% diff --git a/qa/suites/rados/thrash-erasure-code-isa/.qa b/qa/suites/rados/thrash-erasure-code-isa/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/arch/.qa b/qa/suites/rados/thrash-erasure-code-isa/arch/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/arch/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml b/qa/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml new file mode 100644 index 000000000..c2409f5d0 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/arch/x86_64.yaml @@ -0,0 +1 @@ +arch: x86_64 diff --git a/qa/suites/rados/thrash-erasure-code-isa/ceph.yaml b/qa/suites/rados/thrash-erasure-code-isa/ceph.yaml new file mode 120000 index 000000000..a2fd139cb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/ceph.yaml @@ -0,0 +1 @@ +../thrash/ceph.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/clusters b/qa/suites/rados/thrash-erasure-code-isa/clusters new file mode 120000 index 000000000..7aac47be3 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/clusters @@ -0,0 +1 @@ +../thrash/clusters
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/mon_election b/qa/suites/rados/thrash-erasure-code-isa/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/msgr-failures b/qa/suites/rados/thrash-erasure-code-isa/msgr-failures new file mode 120000 index 000000000..03689aa44 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/msgr-failures @@ -0,0 +1 @@ +../thrash/msgr-failures
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/objectstore b/qa/suites/rados/thrash-erasure-code-isa/objectstore new file mode 120000 index 000000000..848c65f9e --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/objectstore @@ -0,0 +1 @@ +.qa/objectstore_debug
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/rados.yaml b/qa/suites/rados/thrash-erasure-code-isa/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/recovery-overrides b/qa/suites/rados/thrash-erasure-code-isa/recovery-overrides new file mode 120000 index 000000000..1957f2c42 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/recovery-overrides @@ -0,0 +1 @@ +../thrash/2-recovery-overrides
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/supported-random-distro$ b/qa/suites/rados/thrash-erasure-code-isa/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/thrashers b/qa/suites/rados/thrash-erasure-code-isa/thrashers new file mode 120000 index 000000000..f461dadc3 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/thrashers @@ -0,0 +1 @@ +../thrash/thrashers
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-isa/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/workloads/.qa b/qa/suites/rados/thrash-erasure-code-isa/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml b/qa/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml new file mode 120000 index 000000000..19342b9d8 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-isa/workloads/ec-rados-plugin=isa-k=2-m=1.yaml @@ -0,0 +1 @@ +.qa/erasure-code/ec-rados-plugin=isa-k=2-m=1.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/% b/qa/suites/rados/thrash-erasure-code-overwrites/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/% diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/.qa b/qa/suites/rados/thrash-erasure-code-overwrites/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/bluestore-bitmap.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/bluestore-bitmap.yaml new file mode 120000 index 000000000..635085f7f --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/bluestore-bitmap.yaml @@ -0,0 +1 @@ +../thrash-erasure-code/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/ceph.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/ceph.yaml new file mode 120000 index 000000000..a2fd139cb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/ceph.yaml @@ -0,0 +1 @@ +../thrash/ceph.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/clusters b/qa/suites/rados/thrash-erasure-code-overwrites/clusters new file mode 120000 index 000000000..646ea04cd --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/clusters @@ -0,0 +1 @@ +../thrash-erasure-code/clusters
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/fast b/qa/suites/rados/thrash-erasure-code-overwrites/fast new file mode 120000 index 000000000..6170b30e0 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/fast @@ -0,0 +1 @@ +../thrash-erasure-code/fast
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/mon_election b/qa/suites/rados/thrash-erasure-code-overwrites/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/msgr-failures b/qa/suites/rados/thrash-erasure-code-overwrites/msgr-failures new file mode 120000 index 000000000..70c9ca130 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/msgr-failures @@ -0,0 +1 @@ +../thrash-erasure-code/msgr-failures
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/rados.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/rados.yaml new file mode 120000 index 000000000..017df6f60 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/rados.yaml @@ -0,0 +1 @@ +../thrash-erasure-code/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/recovery-overrides b/qa/suites/rados/thrash-erasure-code-overwrites/recovery-overrides new file mode 120000 index 000000000..1957f2c42 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/recovery-overrides @@ -0,0 +1 @@ +../thrash/2-recovery-overrides
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/supported-random-distro$ b/qa/suites/rados/thrash-erasure-code-overwrites/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/thrashers b/qa/suites/rados/thrash-erasure-code-overwrites/thrashers new file mode 120000 index 000000000..40ff82cf7 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/thrashers @@ -0,0 +1 @@ +../thrash-erasure-code/thrashers
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/workloads/.qa b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-pool-snaps-few-objects-overwrites.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-pool-snaps-few-objects-overwrites.yaml new file mode 100644 index 000000000..d2ad70a57 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-pool-snaps-few-objects-overwrites.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + conf: + global: + enable experimental unrecoverable data corrupting features: '*' + thrashosds: + disable_objectstore_tool_tests: true +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + pool_snaps: true + ec_pool: true + erasure_code_use_overwrites: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-fast-read-overwrites.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-fast-read-overwrites.yaml new file mode 100644 index 000000000..b3f831b77 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-fast-read-overwrites.yaml @@ -0,0 +1,29 @@ +overrides: + ceph: + conf: + global: + enable experimental unrecoverable data corrupting features: '*' + thrashosds: + disable_objectstore_tool_tests: true +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + ec_pool: true + erasure_code_use_overwrites: true + fast_read: true + op_weights: + read: 100 + write: 100 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-overwrites.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-overwrites.yaml new file mode 100644 index 000000000..9baacef48 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-small-objects-overwrites.yaml @@ -0,0 +1,28 @@ +overrides: + ceph: + conf: + global: + enable experimental unrecoverable data corrupting features: '*' + thrashosds: + disable_objectstore_tool_tests: true +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + ec_pool: true + erasure_code_use_overwrites: true + op_weights: + read: 100 + write: 100 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-snaps-few-objects-overwrites.yaml b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-snaps-few-objects-overwrites.yaml new file mode 100644 index 000000000..b7c538199 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-overwrites/workloads/ec-snaps-few-objects-overwrites.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + conf: + global: + enable experimental unrecoverable data corrupting features: '*' + thrashosds: + disable_objectstore_tool_tests: true +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + erasure_code_use_overwrites: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash-erasure-code-shec/% b/qa/suites/rados/thrash-erasure-code-shec/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/% diff --git a/qa/suites/rados/thrash-erasure-code-shec/.qa b/qa/suites/rados/thrash-erasure-code-shec/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/ceph.yaml b/qa/suites/rados/thrash-erasure-code-shec/ceph.yaml new file mode 120000 index 000000000..a2fd139cb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/ceph.yaml @@ -0,0 +1 @@ +../thrash/ceph.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/clusters/+ b/qa/suites/rados/thrash-erasure-code-shec/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/clusters/+ diff --git a/qa/suites/rados/thrash-erasure-code-shec/clusters/.qa b/qa/suites/rados/thrash-erasure-code-shec/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/clusters/fixed-4.yaml b/qa/suites/rados/thrash-erasure-code-shec/clusters/fixed-4.yaml new file mode 120000 index 000000000..aa8830071 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/clusters/fixed-4.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-4.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/clusters/openstack.yaml b/qa/suites/rados/thrash-erasure-code-shec/clusters/openstack.yaml new file mode 100644 index 000000000..e559d9126 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/clusters/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/suites/rados/thrash-erasure-code-shec/mon_election b/qa/suites/rados/thrash-erasure-code-shec/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/msgr-failures b/qa/suites/rados/thrash-erasure-code-shec/msgr-failures new file mode 120000 index 000000000..03689aa44 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/msgr-failures @@ -0,0 +1 @@ +../thrash/msgr-failures
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/objectstore b/qa/suites/rados/thrash-erasure-code-shec/objectstore new file mode 120000 index 000000000..848c65f9e --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/objectstore @@ -0,0 +1 @@ +.qa/objectstore_debug
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/rados.yaml b/qa/suites/rados/thrash-erasure-code-shec/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/recovery-overrides b/qa/suites/rados/thrash-erasure-code-shec/recovery-overrides new file mode 120000 index 000000000..1957f2c42 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/recovery-overrides @@ -0,0 +1 @@ +../thrash/2-recovery-overrides
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/supported-random-distro$ b/qa/suites/rados/thrash-erasure-code-shec/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/thrashers/.qa b/qa/suites/rados/thrash-erasure-code-shec/thrashers/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/thrashers/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/thrashers/careful.yaml b/qa/suites/rados/thrash-erasure-code-shec/thrashers/careful.yaml new file mode 100644 index 000000000..e18379b5f --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/thrashers/careful.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - slow request + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + min_in: 8 + aggressive_pg_num_changes: false diff --git a/qa/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml b/qa/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml new file mode 100644 index 000000000..00c8689d4 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/thrashers/default.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - slow request + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd debug reject backfill probability: .1 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + min_in: 8 + chance_bluestore_reshard: 1 + bluestore_new_sharding: random diff --git a/qa/suites/rados/thrash-erasure-code-shec/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code-shec/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/workloads/.qa b/qa/suites/rados/thrash-erasure-code-shec/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code-shec/workloads/ec-rados-plugin=shec-k=4-m=3-c=2.yaml b/qa/suites/rados/thrash-erasure-code-shec/workloads/ec-rados-plugin=shec-k=4-m=3-c=2.yaml new file mode 120000 index 000000000..8f318cc33 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code-shec/workloads/ec-rados-plugin=shec-k=4-m=3-c=2.yaml @@ -0,0 +1 @@ +.qa/erasure-code/ec-rados-plugin=shec-k=4-m=3-c=2.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/% b/qa/suites/rados/thrash-erasure-code/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/% diff --git a/qa/suites/rados/thrash-erasure-code/.qa b/qa/suites/rados/thrash-erasure-code/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/ceph.yaml b/qa/suites/rados/thrash-erasure-code/ceph.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/ceph.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rados/thrash-erasure-code/clusters b/qa/suites/rados/thrash-erasure-code/clusters new file mode 120000 index 000000000..7aac47be3 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/clusters @@ -0,0 +1 @@ +../thrash/clusters
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/fast/.qa b/qa/suites/rados/thrash-erasure-code/fast/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/fast/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/fast/fast.yaml b/qa/suites/rados/thrash-erasure-code/fast/fast.yaml new file mode 100644 index 000000000..8ebfee0a9 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/fast/fast.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + osd pool default ec fast read: true diff --git a/qa/suites/rados/thrash-erasure-code/fast/normal.yaml b/qa/suites/rados/thrash-erasure-code/fast/normal.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/fast/normal.yaml diff --git a/qa/suites/rados/thrash-erasure-code/mon_election b/qa/suites/rados/thrash-erasure-code/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/msgr-failures b/qa/suites/rados/thrash-erasure-code/msgr-failures new file mode 120000 index 000000000..03689aa44 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/msgr-failures @@ -0,0 +1 @@ +../thrash/msgr-failures
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/objectstore b/qa/suites/rados/thrash-erasure-code/objectstore new file mode 120000 index 000000000..848c65f9e --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/objectstore @@ -0,0 +1 @@ +.qa/objectstore_debug
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/rados.yaml b/qa/suites/rados/thrash-erasure-code/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/recovery-overrides b/qa/suites/rados/thrash-erasure-code/recovery-overrides new file mode 120000 index 000000000..1957f2c42 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/recovery-overrides @@ -0,0 +1 @@ +../thrash/2-recovery-overrides
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/supported-random-distro$ b/qa/suites/rados/thrash-erasure-code/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/.qa b/qa/suites/rados/thrash-erasure-code/thrashers/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashers/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/careful.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/careful.yaml new file mode 100644 index 000000000..0602f01ad --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashers/careful.yaml @@ -0,0 +1,20 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + min_in: 4 + aggressive_pg_num_changes: false diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/default.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/default.yaml new file mode 100644 index 000000000..989b83e8f --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashers/default.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd debug reject backfill probability: .1 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + min_in: 4 + chance_bluestore_reshard: 1 + bluestore_new_sharding: random diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/fastread.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/fastread.yaml new file mode 100644 index 000000000..5fbb9504b --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashers/fastread.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) + conf: + mon: + osd pool default ec fast read: true + osd: + osd debug reject backfill probability: .1 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + min_in: 4 diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/minsize_recovery.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/minsize_recovery.yaml new file mode 100644 index 000000000..771d9a104 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashers/minsize_recovery.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) + create_rbd_pool: False + pre-mgr-commands: + - sudo ceph config set mgr mgr_pool false --force + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_test_min_size: 3 diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml new file mode 100644 index 000000000..f8c542323 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashers/morepggrow.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 9 + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 3 + chance_pgpnum_fix: 1 + min_in: 4 diff --git a/qa/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml b/qa/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml new file mode 100644 index 000000000..ed13bfd3e --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashers/pggrow.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 4 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 + min_in: 4 diff --git a/qa/suites/rados/thrash-erasure-code/thrashosds-health.yaml b/qa/suites/rados/thrash-erasure-code/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/workloads/.qa b/qa/suites/rados/thrash-erasure-code/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=clay-k=4-m=2.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=clay-k=4-m=2.yaml new file mode 120000 index 000000000..08155ed6e --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=clay-k=4-m=2.yaml @@ -0,0 +1 @@ +.qa/erasure-code/ec-rados-plugin=clay-k=4-m=2.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml new file mode 120000 index 000000000..af6d8042f --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=2-m=1.yaml @@ -0,0 +1 @@ +.qa/erasure-code/ec-rados-plugin=jerasure-k=2-m=1.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml new file mode 120000 index 000000000..cdf551995 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-rados-plugin=jerasure-k=3-m=1.yaml @@ -0,0 +1 @@ +.qa/erasure-code/ec-rados-plugin=jerasure-k=3-m=1.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml new file mode 100644 index 000000000..3c2ff7af0 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-radosbench.yaml @@ -0,0 +1,27 @@ +tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 150 + unique_pool: true + ec_pool: true + - radosbench: + clients: [client.0] + time: 150 + unique_pool: true + ec_pool: true + - radosbench: + clients: [client.0] + time: 150 + unique_pool: true + ec_pool: true + - radosbench: + clients: [client.0] + time: 150 + unique_pool: true + ec_pool: true + - radosbench: + clients: [client.0] + time: 150 + unique_pool: true + ec_pool: true diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-balanced.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-balanced.yaml new file mode 100644 index 000000000..af0ac3931 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-balanced.yaml @@ -0,0 +1,21 @@ +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + ec_pool: true + balanced_reads: true + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-fast-read.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-fast-read.yaml new file mode 100644 index 000000000..e732ec6fa --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-fast-read.yaml @@ -0,0 +1,21 @@ +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + ec_pool: true + fast_read: true + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-many-deletes.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-many-deletes.yaml new file mode 100644 index 000000000..25b38e14f --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects-many-deletes.yaml @@ -0,0 +1,14 @@ +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 8 + objects: 20 + size: 16384 + ec_pool: true + op_weights: + write: 0 + read: 0 + append: 10 + delete: 20 diff --git a/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml new file mode 100644 index 000000000..a8ac39716 --- /dev/null +++ b/qa/suites/rados/thrash-erasure-code/workloads/ec-small-objects.yaml @@ -0,0 +1,20 @@ +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + ec_pool: true + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/rados/thrash-old-clients/% b/qa/suites/rados/thrash-old-clients/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/% diff --git a/qa/suites/rados/thrash-old-clients/.qa b/qa/suites/rados/thrash-old-clients/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/0-distro$/.qa b/qa/suites/rados/thrash-old-clients/0-distro$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/0-distro$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/0-distro$/centos_8.stream_container_tools.yaml b/qa/suites/rados/thrash-old-clients/0-distro$/centos_8.stream_container_tools.yaml new file mode 120000 index 000000000..7a86f967f --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/0-distro$/centos_8.stream_container_tools.yaml @@ -0,0 +1 @@ +.qa/distros/podman/centos_8.stream_container_tools.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/.qa b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/2-size-2-min-size.yaml b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/2-size-2-min-size.yaml new file mode 120000 index 000000000..5393a7554 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/2-size-2-min-size.yaml @@ -0,0 +1 @@ +.qa/overrides/2-size-2-min-size.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/3-size-2-min-size.yaml new file mode 120000 index 000000000..5ff70eadf --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/0-size-min-size-overrides/3-size-2-min-size.yaml @@ -0,0 +1 @@ +.qa/overrides/3-size-2-min-size.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/1-install/.qa b/qa/suites/rados/thrash-old-clients/1-install/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/1-install/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/1-install/nautilus-v1only.yaml b/qa/suites/rados/thrash-old-clients/1-install/nautilus-v1only.yaml new file mode 100644 index 000000000..39c5eb4a1 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/1-install/nautilus-v1only.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + mon_bind_msgr2: false + log-ignorelist: + - \(MON_DOWN\) + conf: + global: + ms type: async + ms bind msgr2: false +tasks: +- install: + branch: nautilus + exclude_packages: + - cephadm + - ceph-mgr-cephadm + - ceph-immutable-object-cache + - python3-rados + - python3-rgw + - python3-rbd + - python3-cephfs + - ceph-volume + extra_packages: + - python-rados + - python-rgw + - python-rbd + - python-cephfs diff --git a/qa/suites/rados/thrash-old-clients/1-install/nautilus-v2only.yaml b/qa/suites/rados/thrash-old-clients/1-install/nautilus-v2only.yaml new file mode 100644 index 000000000..61337e0e6 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/1-install/nautilus-v2only.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + log-ignorelist: + - \(MON_DOWN\) + conf: + global: + ms type: async + ms bind msgr2: true + ms bind msgr1: false +tasks: +- install: + branch: nautilus + exclude_packages: + - cephadm + - ceph-mgr-cephadm + - ceph-immutable-object-cache + - python3-rados + - python3-rgw + - python3-rbd + - python3-cephfs + - ceph-volume + extra_packages: + - python-rados + - python-rgw + - python-rbd + - python-cephfs diff --git a/qa/suites/rados/thrash-old-clients/1-install/nautilus.yaml b/qa/suites/rados/thrash-old-clients/1-install/nautilus.yaml new file mode 100644 index 000000000..cd05b71f5 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/1-install/nautilus.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + log-ignorelist: + - \(MON_DOWN\) +tasks: +- install: + branch: nautilus + exclude_packages: + - cephadm + - ceph-mgr-cephadm + - ceph-immutable-object-cache + - python3-rados + - python3-rgw + - python3-rbd + - python3-cephfs + - ceph-volume + extra_packages: + - python-rados + - python-rgw + - python-rbd + - python-cephfs diff --git a/qa/suites/rados/thrash-old-clients/1-install/octopus.yaml b/qa/suites/rados/thrash-old-clients/1-install/octopus.yaml new file mode 100644 index 000000000..39d1da232 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/1-install/octopus.yaml @@ -0,0 +1,35 @@ +overrides: + ceph: + log-ignorelist: + - \(MON_DOWN\) +tasks: +- install: + branch: octopus + exclude_packages: + - ceph-mgr-dashboard + - ceph-mgr-diskprediction-local + - ceph-mgr-rook + - ceph-mgr-cephadm + - ceph-base-debuginfo + - ceph-common-debuginfo + - ceph-immutable-object-cache-debuginfo + - ceph-radosgw-debuginfo + - ceph-test-debuginfo + - ceph-base-debuginfo + - ceph-mgr-debuginfo + - ceph-mds-debuginfo + - ceph-mon-debuginfo + - ceph-osd-debuginfo + - ceph-fuse-debuginfo + - librados-devel-debuginfo + - libcephfs2-debuginfo + - librados2-debuginfo + - librbd1-debuginfo + - python3-cephfs-debuginfo + - python3-rados-debuginfo + - python3-rbd-debuginfo + - python3-rgw-debuginfo + - rbd-fuse-debuginfo + - rbd-mirror-debuginfo + - rbd-nbd-debuginfo + - ceph-volume diff --git a/qa/suites/rados/thrash-old-clients/1-install/pacific.yaml b/qa/suites/rados/thrash-old-clients/1-install/pacific.yaml new file mode 100644 index 000000000..a26629252 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/1-install/pacific.yaml @@ -0,0 +1,35 @@ +overrides: + ceph: + log-ignorelist: + - \(MON_DOWN\) +tasks: +- install: + branch: pacific + exclude_packages: + - ceph-mgr-dashboard + - ceph-mgr-diskprediction-local + - ceph-mgr-rook + - ceph-mgr-cephadm + - ceph-base-debuginfo + - ceph-common-debuginfo + - ceph-immutable-object-cache-debuginfo + - ceph-radosgw-debuginfo + - ceph-test-debuginfo + - ceph-base-debuginfo + - ceph-mgr-debuginfo + - ceph-mds-debuginfo + - ceph-mon-debuginfo + - ceph-osd-debuginfo + - ceph-fuse-debuginfo + - librados-devel-debuginfo + - libcephfs2-debuginfo + - librados2-debuginfo + - librbd1-debuginfo + - python3-cephfs-debuginfo + - python3-rados-debuginfo + - python3-rbd-debuginfo + - python3-rgw-debuginfo + - rbd-fuse-debuginfo + - rbd-mirror-debuginfo + - rbd-nbd-debuginfo + - ceph-volume diff --git a/qa/suites/rados/thrash-old-clients/1-install/quincy.yaml b/qa/suites/rados/thrash-old-clients/1-install/quincy.yaml new file mode 100644 index 000000000..2ce960be0 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/1-install/quincy.yaml @@ -0,0 +1,35 @@ +overrides: + ceph: + log-ignorelist: + - \(MON_DOWN\) +tasks: +- install: + branch: quincy + exclude_packages: + - ceph-mgr-dashboard + - ceph-mgr-diskprediction-local + - ceph-mgr-rook + - ceph-mgr-cephadm + - ceph-base-debuginfo + - ceph-common-debuginfo + - ceph-immutable-object-cache-debuginfo + - ceph-radosgw-debuginfo + - ceph-test-debuginfo + - ceph-base-debuginfo + - ceph-mgr-debuginfo + - ceph-mds-debuginfo + - ceph-mon-debuginfo + - ceph-osd-debuginfo + - ceph-fuse-debuginfo + - librados-devel-debuginfo + - libcephfs2-debuginfo + - librados2-debuginfo + - librbd1-debuginfo + - python3-cephfs-debuginfo + - python3-rados-debuginfo + - python3-rbd-debuginfo + - python3-rgw-debuginfo + - rbd-fuse-debuginfo + - rbd-mirror-debuginfo + - rbd-nbd-debuginfo + - ceph-volume diff --git a/qa/suites/rados/thrash-old-clients/backoff/.qa b/qa/suites/rados/thrash-old-clients/backoff/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/backoff/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/backoff/normal.yaml b/qa/suites/rados/thrash-old-clients/backoff/normal.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/backoff/normal.yaml diff --git a/qa/suites/rados/thrash-old-clients/backoff/peering.yaml b/qa/suites/rados/thrash-old-clients/backoff/peering.yaml new file mode 100644 index 000000000..66d06117e --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/backoff/peering.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd backoff on peering: true diff --git a/qa/suites/rados/thrash-old-clients/backoff/peering_and_degraded.yaml b/qa/suites/rados/thrash-old-clients/backoff/peering_and_degraded.yaml new file mode 100644 index 000000000..e61099065 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/backoff/peering_and_degraded.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + osd: + osd backoff on peering: true + osd backoff on degraded: true diff --git a/qa/suites/rados/thrash-old-clients/ceph.yaml b/qa/suites/rados/thrash-old-clients/ceph.yaml new file mode 100644 index 000000000..016ce36da --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/ceph.yaml @@ -0,0 +1,7 @@ +# Don't verify os + flavor + sha1 +verify_ceph_hash: false +tasks: +- cephadm: + conf: + mon: + auth allow insecure global id reclaim: true diff --git a/qa/suites/rados/thrash-old-clients/clusters/+ b/qa/suites/rados/thrash-old-clients/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/clusters/+ diff --git a/qa/suites/rados/thrash-old-clients/clusters/.qa b/qa/suites/rados/thrash-old-clients/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/clusters/openstack.yaml b/qa/suites/rados/thrash-old-clients/clusters/openstack.yaml new file mode 100644 index 000000000..b0f3b9b4d --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/clusters/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/rados/thrash-old-clients/clusters/three-plus-one.yaml b/qa/suites/rados/thrash-old-clients/clusters/three-plus-one.yaml new file mode 100644 index 000000000..9af5382ea --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/clusters/three-plus-one.yaml @@ -0,0 +1,13 @@ +roles: +- [mon.a, mgr.y, osd.0, osd.1, osd.2, osd.3, client.0] +- [mon.b, mgr.x, osd.4, osd.5, osd.6, osd.7, client.1] +- [mon.c, osd.8, osd.9, osd.10, osd.11, client.2] +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/rados/thrash-old-clients/d-balancer/.qa b/qa/suites/rados/thrash-old-clients/d-balancer/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/d-balancer/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/d-balancer/crush-compat.yaml b/qa/suites/rados/thrash-old-clients/d-balancer/crush-compat.yaml new file mode 100644 index 000000000..aa867660d --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/d-balancer/crush-compat.yaml @@ -0,0 +1,6 @@ +tasks: +- exec: + mon.a: + - while ! ceph balancer status ; do sleep 1 ; done + - ceph balancer mode crush-compat + - ceph balancer on diff --git a/qa/suites/rados/thrash-old-clients/d-balancer/on.yaml b/qa/suites/rados/thrash-old-clients/d-balancer/on.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/d-balancer/on.yaml diff --git a/qa/suites/rados/thrash-old-clients/mon_election b/qa/suites/rados/thrash-old-clients/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/msgr-failures/.qa b/qa/suites/rados/thrash-old-clients/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml b/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml new file mode 100644 index 000000000..ec45f8882 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/msgr-failures/fastclose.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms tcp read timeout: 5 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml b/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml new file mode 100644 index 000000000..cc9a3ae69 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/msgr-failures/few.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + osd: + osd heartbeat use min delay socket: true + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml b/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml new file mode 100644 index 000000000..d7cec6f36 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/msgr-failures/osd-delay.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms inject delay type: osd + ms inject delay probability: .005 + ms inject delay max: 1 + ms inject internal delays: .002 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/thrash-old-clients/rados.yaml b/qa/suites/rados/thrash-old-clients/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/thrashers/.qa b/qa/suites/rados/thrash-old-clients/thrashers/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashers/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/thrashers/careful.yaml b/qa/suites/rados/thrash-old-clients/thrashers/careful.yaml new file mode 100644 index 000000000..8820a6cd2 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashers/careful.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + aggressive_pg_num_changes: false diff --git a/qa/suites/rados/thrash-old-clients/thrashers/default.yaml b/qa/suites/rados/thrash-old-clients/thrashers/default.yaml new file mode 100644 index 000000000..54dc88802 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashers/default.yaml @@ -0,0 +1,25 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/thrash-old-clients/thrashers/mapgap.yaml b/qa/suites/rados/thrash-old-clients/thrashers/mapgap.yaml new file mode 100644 index 000000000..c1ab4493e --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashers/mapgap.yaml @@ -0,0 +1,27 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - osd_map_cache_size + - \(POOL_APP_NOT_ENABLED\) + conf: + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 + osd: + osd map cache size: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd scrub during recovery: false + osd max backfills: 6 +tasks: +- thrashosds: + timeout: 1800 + chance_pgnum_grow: 0.25 + chance_pgpnum_fix: 0.25 + chance_test_map_discontinuity: 2 diff --git a/qa/suites/rados/thrash-old-clients/thrashers/morepggrow.yaml b/qa/suites/rados/thrash-old-clients/thrashers/morepggrow.yaml new file mode 100644 index 000000000..bb65d6a60 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashers/morepggrow.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + journal throttle high multiple: 2 + journal throttle max multiple: 10 + filestore queue throttle high multiple: 2 + filestore queue throttle max multiple: 10 + osd max backfills: 9 + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 3 + chance_pgpnum_fix: 1 +openstack: +- volumes: + size: 50 diff --git a/qa/suites/rados/thrash-old-clients/thrashers/none.yaml b/qa/suites/rados/thrash-old-clients/thrashers/none.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashers/none.yaml diff --git a/qa/suites/rados/thrash-old-clients/thrashers/pggrow.yaml b/qa/suites/rados/thrash-old-clients/thrashers/pggrow.yaml new file mode 100644 index 000000000..000550bd8 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashers/pggrow.yaml @@ -0,0 +1,25 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + filestore odsync write: true + osd max backfills: 2 + osd snap trim sleep: .5 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/thrash-old-clients/thrashosds-health.yaml b/qa/suites/rados/thrash-old-clients/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/workloads/.qa b/qa/suites/rados/thrash-old-clients/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash-old-clients/workloads/cache-snaps.yaml b/qa/suites/rados/thrash-old-clients/workloads/cache-snaps.yaml new file mode 100644 index 000000000..33f667ffd --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/workloads/cache-snaps.yaml @@ -0,0 +1,34 @@ +overrides: + ceph: + log-ignorelist: + - must scrub before tier agent can activate +tasks: +- exec: + client.0: + - sudo ceph osd pool create base 4 + - sudo ceph osd pool application enable base rados + - sudo ceph osd pool create cache 4 + - sudo ceph osd tier add base cache + - sudo ceph osd tier cache-mode cache writeback + - sudo ceph osd tier set-overlay base cache + - sudo ceph osd pool set cache hit_set_type bloom + - sudo ceph osd pool set cache hit_set_count 8 + - sudo ceph osd pool set cache hit_set_period 3600 + - sudo ceph osd pool set cache target_max_objects 250 + - sudo ceph osd pool set cache min_read_recency_for_promote 2 +- rados: + clients: [client.2] + pools: [base] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + cache_flush: 50 + cache_try_flush: 50 + cache_evict: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/rados/thrash-old-clients/workloads/radosbench.yaml b/qa/suites/rados/thrash-old-clients/workloads/radosbench.yaml new file mode 100644 index 000000000..6a89a4e6e --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/workloads/radosbench.yaml @@ -0,0 +1,33 @@ +overrides: + ceph: + conf: + client.2: + debug ms: 1 + debug objecter: 20 + debug rados: 20 +tasks: +- full_sequential: + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 + - radosbench: + clients: [client.2] + time: 90 diff --git a/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml b/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml new file mode 100644 index 000000000..a8bbbafec --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/workloads/rbd_cls.yaml @@ -0,0 +1,7 @@ +meta: +- desc: | + rbd object class functional tests +tasks: +- exec: + client.2: + - ceph_test_cls_rbd --gtest_filter=-TestClsRbd.get_features:TestClsRbd.parents:TestClsRbd.mirror diff --git a/qa/suites/rados/thrash-old-clients/workloads/snaps-few-objects.yaml b/qa/suites/rados/thrash-old-clients/workloads/snaps-few-objects.yaml new file mode 100644 index 000000000..f0a5735a9 --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/workloads/snaps-few-objects.yaml @@ -0,0 +1,13 @@ +tasks: +- rados: + clients: [client.2] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash-old-clients/workloads/test_rbd_api.yaml b/qa/suites/rados/thrash-old-clients/workloads/test_rbd_api.yaml new file mode 100644 index 000000000..39617b37e --- /dev/null +++ b/qa/suites/rados/thrash-old-clients/workloads/test_rbd_api.yaml @@ -0,0 +1,8 @@ +meta: +- desc: | + librbd C and C++ api tests +workload: +- workunit: + clients: + client.2: + - rbd/test_librbd.sh diff --git a/qa/suites/rados/thrash/% b/qa/suites/rados/thrash/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash/% diff --git a/qa/suites/rados/thrash/.qa b/qa/suites/rados/thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/0-size-min-size-overrides/.qa b/qa/suites/rados/thrash/0-size-min-size-overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/0-size-min-size-overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml b/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml new file mode 120000 index 000000000..5393a7554 --- /dev/null +++ b/qa/suites/rados/thrash/0-size-min-size-overrides/2-size-2-min-size.yaml @@ -0,0 +1 @@ +.qa/overrides/2-size-2-min-size.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml b/qa/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml new file mode 120000 index 000000000..5ff70eadf --- /dev/null +++ b/qa/suites/rados/thrash/0-size-min-size-overrides/3-size-2-min-size.yaml @@ -0,0 +1 @@ +.qa/overrides/3-size-2-min-size.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash/1-pg-log-overrides/.qa b/qa/suites/rados/thrash/1-pg-log-overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/1-pg-log-overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/1-pg-log-overrides/normal_pg_log.yaml b/qa/suites/rados/thrash/1-pg-log-overrides/normal_pg_log.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash/1-pg-log-overrides/normal_pg_log.yaml diff --git a/qa/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml b/qa/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml new file mode 120000 index 000000000..abd86d7d9 --- /dev/null +++ b/qa/suites/rados/thrash/1-pg-log-overrides/short_pg_log.yaml @@ -0,0 +1 @@ +.qa/overrides/short_pg_log.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash/2-recovery-overrides/$ b/qa/suites/rados/thrash/2-recovery-overrides/$ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash/2-recovery-overrides/$ diff --git a/qa/suites/rados/thrash/2-recovery-overrides/.qa b/qa/suites/rados/thrash/2-recovery-overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/2-recovery-overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/2-recovery-overrides/default.yaml b/qa/suites/rados/thrash/2-recovery-overrides/default.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash/2-recovery-overrides/default.yaml diff --git a/qa/suites/rados/thrash/2-recovery-overrides/more-active-recovery.yaml b/qa/suites/rados/thrash/2-recovery-overrides/more-active-recovery.yaml new file mode 120000 index 000000000..47afd7020 --- /dev/null +++ b/qa/suites/rados/thrash/2-recovery-overrides/more-active-recovery.yaml @@ -0,0 +1 @@ +.qa/overrides/more-active-recovery.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml b/qa/suites/rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml new file mode 100644 index 000000000..0bbc72db7 --- /dev/null +++ b/qa/suites/rados/thrash/2-recovery-overrides/more-async-partial-recovery.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_async_recovery_min_cost: 1 + osd_object_clean_region_max_num_intervals: 1000 diff --git a/qa/suites/rados/thrash/2-recovery-overrides/more-async-recovery.yaml b/qa/suites/rados/thrash/2-recovery-overrides/more-async-recovery.yaml new file mode 100644 index 000000000..4aed086bc --- /dev/null +++ b/qa/suites/rados/thrash/2-recovery-overrides/more-async-recovery.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + osd_async_recovery_min_cost: 1 diff --git a/qa/suites/rados/thrash/2-recovery-overrides/more-partial-recovery.yaml b/qa/suites/rados/thrash/2-recovery-overrides/more-partial-recovery.yaml new file mode 100644 index 000000000..88f15f2f6 --- /dev/null +++ b/qa/suites/rados/thrash/2-recovery-overrides/more-partial-recovery.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + osd_object_clean_region_max_num_intervals: 1000 diff --git a/qa/suites/rados/thrash/3-scrub-overrides/$ b/qa/suites/rados/thrash/3-scrub-overrides/$ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash/3-scrub-overrides/$ diff --git a/qa/suites/rados/thrash/3-scrub-overrides/.qa b/qa/suites/rados/thrash/3-scrub-overrides/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/3-scrub-overrides/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/3-scrub-overrides/default.yaml b/qa/suites/rados/thrash/3-scrub-overrides/default.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash/3-scrub-overrides/default.yaml diff --git a/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-2.yaml b/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-2.yaml new file mode 100644 index 000000000..abf852e98 --- /dev/null +++ b/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-2.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd max scrubs: 2 diff --git a/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-3.yaml b/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-3.yaml new file mode 100644 index 000000000..3b3dfd61f --- /dev/null +++ b/qa/suites/rados/thrash/3-scrub-overrides/max-simultaneous-scrubs-3.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd max scrubs: 3 diff --git a/qa/suites/rados/thrash/backoff/.qa b/qa/suites/rados/thrash/backoff/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/backoff/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/backoff/normal.yaml b/qa/suites/rados/thrash/backoff/normal.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash/backoff/normal.yaml diff --git a/qa/suites/rados/thrash/backoff/peering.yaml b/qa/suites/rados/thrash/backoff/peering.yaml new file mode 100644 index 000000000..66d06117e --- /dev/null +++ b/qa/suites/rados/thrash/backoff/peering.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + osd: + osd backoff on peering: true diff --git a/qa/suites/rados/thrash/backoff/peering_and_degraded.yaml b/qa/suites/rados/thrash/backoff/peering_and_degraded.yaml new file mode 100644 index 000000000..e61099065 --- /dev/null +++ b/qa/suites/rados/thrash/backoff/peering_and_degraded.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + osd: + osd backoff on peering: true + osd backoff on degraded: true diff --git a/qa/suites/rados/thrash/ceph.yaml b/qa/suites/rados/thrash/ceph.yaml new file mode 100644 index 000000000..67393c564 --- /dev/null +++ b/qa/suites/rados/thrash/ceph.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + osd: + debug monc: 20 diff --git a/qa/suites/rados/thrash/clusters/+ b/qa/suites/rados/thrash/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash/clusters/+ diff --git a/qa/suites/rados/thrash/clusters/.qa b/qa/suites/rados/thrash/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/clusters/fixed-2.yaml b/qa/suites/rados/thrash/clusters/fixed-2.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rados/thrash/clusters/fixed-2.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash/clusters/openstack.yaml b/qa/suites/rados/thrash/clusters/openstack.yaml new file mode 100644 index 000000000..b0f3b9b4d --- /dev/null +++ b/qa/suites/rados/thrash/clusters/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/rados/thrash/crc-failures/.qa b/qa/suites/rados/thrash/crc-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/crc-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml b/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml new file mode 100644 index 000000000..5bbb4385e --- /dev/null +++ b/qa/suites/rados/thrash/crc-failures/bad_map_crc_failure.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + osd: + osd inject bad map crc probability: 0.1 + log-ignorelist: + - failed to encode map diff --git a/qa/suites/rados/thrash/crc-failures/default.yaml b/qa/suites/rados/thrash/crc-failures/default.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash/crc-failures/default.yaml diff --git a/qa/suites/rados/thrash/d-balancer/.qa b/qa/suites/rados/thrash/d-balancer/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/d-balancer/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/d-balancer/crush-compat.yaml b/qa/suites/rados/thrash/d-balancer/crush-compat.yaml new file mode 100644 index 000000000..aa867660d --- /dev/null +++ b/qa/suites/rados/thrash/d-balancer/crush-compat.yaml @@ -0,0 +1,6 @@ +tasks: +- exec: + mon.a: + - while ! ceph balancer status ; do sleep 1 ; done + - ceph balancer mode crush-compat + - ceph balancer on diff --git a/qa/suites/rados/thrash/d-balancer/on.yaml b/qa/suites/rados/thrash/d-balancer/on.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash/d-balancer/on.yaml diff --git a/qa/suites/rados/thrash/mon_election b/qa/suites/rados/thrash/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/thrash/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/thrash/msgr b/qa/suites/rados/thrash/msgr new file mode 120000 index 000000000..57bee80db --- /dev/null +++ b/qa/suites/rados/thrash/msgr @@ -0,0 +1 @@ +.qa/msgr
\ No newline at end of file diff --git a/qa/suites/rados/thrash/msgr-failures/.qa b/qa/suites/rados/thrash/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/msgr-failures/fastclose.yaml b/qa/suites/rados/thrash/msgr-failures/fastclose.yaml new file mode 100644 index 000000000..ec45f8882 --- /dev/null +++ b/qa/suites/rados/thrash/msgr-failures/fastclose.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms tcp read timeout: 5 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/thrash/msgr-failures/few.yaml b/qa/suites/rados/thrash/msgr-failures/few.yaml new file mode 100644 index 000000000..cc9a3ae69 --- /dev/null +++ b/qa/suites/rados/thrash/msgr-failures/few.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + osd: + osd heartbeat use min delay socket: true + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml b/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml new file mode 100644 index 000000000..d7cec6f36 --- /dev/null +++ b/qa/suites/rados/thrash/msgr-failures/osd-delay.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 2500 + ms inject delay type: osd + ms inject delay probability: .005 + ms inject delay max: 1 + ms inject internal delays: .002 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/thrash/msgr-failures/osd-dispatch-delay.yaml b/qa/suites/rados/thrash/msgr-failures/osd-dispatch-delay.yaml new file mode 100644 index 000000000..aff059fb8 --- /dev/null +++ b/qa/suites/rados/thrash/msgr-failures/osd-dispatch-delay.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + global: + osd debug inject dispatch delay duration: 0.1 + osd debug inject dispatch delay probability: 0.1 + diff --git a/qa/suites/rados/thrash/objectstore b/qa/suites/rados/thrash/objectstore new file mode 120000 index 000000000..848c65f9e --- /dev/null +++ b/qa/suites/rados/thrash/objectstore @@ -0,0 +1 @@ +.qa/objectstore_debug
\ No newline at end of file diff --git a/qa/suites/rados/thrash/rados.yaml b/qa/suites/rados/thrash/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/thrash/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash/supported-random-distro$ b/qa/suites/rados/thrash/supported-random-distro$ new file mode 120000 index 000000000..7cef21eef --- /dev/null +++ b/qa/suites/rados/thrash/supported-random-distro$ @@ -0,0 +1 @@ +../basic/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rados/thrash/thrashers/.qa b/qa/suites/rados/thrash/thrashers/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/thrashers/careful.yaml b/qa/suites/rados/thrash/thrashers/careful.yaml new file mode 100644 index 000000000..8190657f3 --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/careful.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + aggressive_pg_num_changes: false diff --git a/qa/suites/rados/thrash/thrashers/default.yaml b/qa/suites/rados/thrash/thrashers/default.yaml new file mode 100644 index 000000000..5a300a9ff --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/default.yaml @@ -0,0 +1,28 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd debug reject backfill probability: .3 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 + osd delete sleep: 1 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + chance_bluestore_reshard: 1 + bluestore_new_sharding: random diff --git a/qa/suites/rados/thrash/thrashers/mapgap.yaml b/qa/suites/rados/thrash/thrashers/mapgap.yaml new file mode 100644 index 000000000..3b34f5b6b --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/mapgap.yaml @@ -0,0 +1,27 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + - osd_map_cache_size + conf: + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 + osd: + osd map cache size: 1 + osd scrub min interval: 60 + osd scrub max interval: 120 + osd scrub during recovery: false + osd max backfills: 6 +tasks: +- thrashosds: + timeout: 1800 + chance_pgnum_grow: 0.25 + chance_pgnum_shrink: 0.25 + chance_pgpnum_fix: 0.25 + chance_test_map_discontinuity: 2 diff --git a/qa/suites/rados/thrash/thrashers/morepggrow.yaml b/qa/suites/rados/thrash/thrashers/morepggrow.yaml new file mode 100644 index 000000000..f18a88711 --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/morepggrow.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + journal throttle high multiple: 2 + journal throttle max multiple: 10 + filestore queue throttle high multiple: 2 + filestore queue throttle max multiple: 10 + osd max backfills: 9 + log-ignorelist: + - but it is still running + - objects unfound and apparently lost +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 3 + chance_pgpnum_fix: 1 +openstack: +- volumes: + size: 50 diff --git a/qa/suites/rados/thrash/thrashers/none.yaml b/qa/suites/rados/thrash/thrashers/none.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/none.yaml diff --git a/qa/suites/rados/thrash/thrashers/pggrow.yaml b/qa/suites/rados/thrash/thrashers/pggrow.yaml new file mode 100644 index 000000000..54498d0cf --- /dev/null +++ b/qa/suites/rados/thrash/thrashers/pggrow.yaml @@ -0,0 +1,24 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost + conf: + osd: + osd scrub min interval: 60 + osd scrub max interval: 120 + filestore odsync write: true + osd max backfills: 2 + osd snap trim sleep: .5 + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 2 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/thrash/thrashosds-health.yaml b/qa/suites/rados/thrash/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rados/thrash/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rados/thrash/workloads/.qa b/qa/suites/rados/thrash/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml b/qa/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml new file mode 100644 index 000000000..6885f72aa --- /dev/null +++ b/qa/suites/rados/thrash/workloads/admin_socket_objecter_requests.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + client.0: + admin socket: /var/run/ceph/ceph-$name.asok + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- radosbench: + clients: [client.0] + time: 150 +- admin_socket: + client.0: + objecter_requests: + test: "http://git.ceph.com/?p={repo};a=blob_plain;f=src/test/admin_socket/objecter_requests;hb={branch}" diff --git a/qa/suites/rados/thrash/workloads/cache-agent-big.yaml b/qa/suites/rados/thrash/workloads/cache-agent-big.yaml new file mode 100644 index 000000000..9ca2576d4 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache-agent-big.yaml @@ -0,0 +1,37 @@ +overrides: + ceph: + log-ignorelist: + - must scrub before tier agent can activate + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + # override short_pg_log_entries.yaml (which sets these under [global]) + osd_min_pg_log_entries: 3000 + osd_max_pg_log_entries: 3000 +tasks: +- exec: + client.0: + - sudo ceph osd erasure-code-profile set myprofile crush-failure-domain=osd m=2 k=2 + - sudo ceph osd pool create base 4 4 erasure myprofile + - sudo ceph osd pool application enable base rados + - sudo ceph osd pool set base min_size 2 + - sudo ceph osd pool create cache 4 + - sudo ceph osd tier add base cache + - sudo ceph osd tier cache-mode cache writeback + - sudo ceph osd tier set-overlay base cache + - sudo ceph osd pool set cache hit_set_type bloom + - sudo ceph osd pool set cache hit_set_count 8 + - sudo ceph osd pool set cache hit_set_period 60 + - sudo ceph osd pool set cache target_max_objects 5000 +- rados: + clients: [client.0] + pools: [base] + ops: 10000 + objects: 6600 + max_seconds: 1200 + size: 1024 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash/workloads/cache-agent-small.yaml b/qa/suites/rados/thrash/workloads/cache-agent-small.yaml new file mode 100644 index 000000000..108009e3b --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache-agent-small.yaml @@ -0,0 +1,35 @@ +overrides: + ceph: + log-ignorelist: + - must scrub before tier agent can activate + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + # override short_pg_log_entries.yaml (which sets these under [global]) + osd_min_pg_log_entries: 3000 + osd_max_pg_log_entries: 3000 +tasks: +- exec: + client.0: + - sudo ceph osd pool create base 4 + - sudo ceph osd pool application enable base rados + - sudo ceph osd pool create cache 4 + - sudo ceph osd tier add base cache + - sudo ceph osd tier cache-mode cache writeback + - sudo ceph osd tier set-overlay base cache + - sudo ceph osd pool set cache hit_set_type bloom + - sudo ceph osd pool set cache hit_set_count 8 + - sudo ceph osd pool set cache hit_set_period 60 + - sudo ceph osd pool set cache target_max_objects 250 + - sudo ceph osd pool set cache min_read_recency_for_promote 2 + - sudo ceph osd pool set cache min_write_recency_for_promote 2 +- rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash/workloads/cache-pool-snaps-readproxy.yaml b/qa/suites/rados/thrash/workloads/cache-pool-snaps-readproxy.yaml new file mode 100644 index 000000000..f864e1170 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache-pool-snaps-readproxy.yaml @@ -0,0 +1,40 @@ +overrides: + ceph: + log-ignorelist: + - must scrub before tier agent can activate + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + # override short_pg_log_entries.yaml (which sets these under [global]) + osd_min_pg_log_entries: 3000 + osd_max_pg_log_entries: 3000 +tasks: +- exec: + client.0: + - sudo ceph osd pool create base 4 + - sudo ceph osd pool application enable base rados + - sudo ceph osd pool create cache 4 + - sudo ceph osd tier add base cache + - sudo ceph osd tier cache-mode cache readproxy + - sudo ceph osd tier set-overlay base cache + - sudo ceph osd pool set cache hit_set_type bloom + - sudo ceph osd pool set cache hit_set_count 8 + - sudo ceph osd pool set cache hit_set_period 3600 + - sudo ceph osd pool set cache target_max_objects 250 +- rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + pool_snaps: true + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + cache_flush: 50 + cache_try_flush: 50 + cache_evict: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/rados/thrash/workloads/cache-pool-snaps.yaml b/qa/suites/rados/thrash/workloads/cache-pool-snaps.yaml new file mode 100644 index 000000000..6bf97c692 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache-pool-snaps.yaml @@ -0,0 +1,45 @@ +overrides: + ceph: + log-ignorelist: + - must scrub before tier agent can activate + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + # override short_pg_log_entries.yaml (which sets these under [global]) + osd_min_pg_log_entries: 3000 + osd_max_pg_log_entries: 3000 +tasks: +- exec: + client.0: + - sudo ceph osd pool create base 4 + - sudo ceph osd pool application enable base rados + - sudo ceph osd pool create cache 4 + - sudo ceph osd tier add base cache + - sudo ceph osd tier cache-mode cache writeback + - sudo ceph osd tier set-overlay base cache + - sudo ceph osd pool set cache hit_set_type bloom + - sudo ceph osd pool set cache hit_set_count 8 + - sudo ceph osd pool set cache hit_set_period 3600 + - sudo ceph osd pool set cache target_max_objects 250 + - sudo ceph osd pool set cache min_read_recency_for_promote 0 + - sudo ceph osd pool set cache min_write_recency_for_promote 0 +- rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + pool_snaps: true + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + cache_flush: 50 + cache_try_flush: 50 + cache_evict: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 +openstack: + - machine: + ram: 15000 # MB diff --git a/qa/suites/rados/thrash/workloads/cache-snaps-balanced.yaml b/qa/suites/rados/thrash/workloads/cache-snaps-balanced.yaml new file mode 100644 index 000000000..574a1f753 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache-snaps-balanced.yaml @@ -0,0 +1,41 @@ +overrides: + ceph: + log-ignorelist: + - must scrub before tier agent can activate + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + # override short_pg_log_entries.yaml (which sets these under [global]) + osd_min_pg_log_entries: 3000 + osd_max_pg_log_entries: 3000 +tasks: +- exec: + client.0: + - sudo ceph osd pool create base 4 + - sudo ceph osd pool application enable base rados + - sudo ceph osd pool create cache 4 + - sudo ceph osd tier add base cache + - sudo ceph osd tier cache-mode cache writeback + - sudo ceph osd tier set-overlay base cache + - sudo ceph osd pool set cache hit_set_type bloom + - sudo ceph osd pool set cache hit_set_count 8 + - sudo ceph osd pool set cache hit_set_period 3600 + - sudo ceph osd pool set cache target_max_objects 250 + - sudo ceph osd pool set cache min_read_recency_for_promote 2 +- rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + balance_reads: true + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + cache_flush: 50 + cache_try_flush: 50 + cache_evict: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/rados/thrash/workloads/cache-snaps.yaml b/qa/suites/rados/thrash/workloads/cache-snaps.yaml new file mode 100644 index 000000000..6d11f4cf1 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache-snaps.yaml @@ -0,0 +1,40 @@ +overrides: + ceph: + log-ignorelist: + - must scrub before tier agent can activate + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + # override short_pg_log_entries.yaml (which sets these under [global]) + osd_min_pg_log_entries: 3000 + osd_max_pg_log_entries: 3000 +tasks: +- exec: + client.0: + - sudo ceph osd pool create base 4 + - sudo ceph osd pool application enable base rados + - sudo ceph osd pool create cache 4 + - sudo ceph osd tier add base cache + - sudo ceph osd tier cache-mode cache writeback + - sudo ceph osd tier set-overlay base cache + - sudo ceph osd pool set cache hit_set_type bloom + - sudo ceph osd pool set cache hit_set_count 8 + - sudo ceph osd pool set cache hit_set_period 3600 + - sudo ceph osd pool set cache target_max_objects 250 + - sudo ceph osd pool set cache min_read_recency_for_promote 2 +- rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + cache_flush: 50 + cache_try_flush: 50 + cache_evict: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/rados/thrash/workloads/cache.yaml b/qa/suites/rados/thrash/workloads/cache.yaml new file mode 100644 index 000000000..bd9daac7a --- /dev/null +++ b/qa/suites/rados/thrash/workloads/cache.yaml @@ -0,0 +1,37 @@ +overrides: + ceph: + log-ignorelist: + - must scrub before tier agent can activate + - \(POOL_APP_NOT_ENABLED\) + conf: + osd: + # override short_pg_log_entries.yaml (which sets these under [global]) + osd_min_pg_log_entries: 3000 + osd_max_pg_log_entries: 3000 +tasks: +- exec: + client.0: + - sudo ceph osd pool create base 4 + - sudo ceph osd pool application enable base rados + - sudo ceph osd pool create cache 4 + - sudo ceph osd tier add base cache + - sudo ceph osd tier cache-mode cache writeback + - sudo ceph osd tier set-overlay base cache + - sudo ceph osd pool set cache hit_set_type bloom + - sudo ceph osd pool set cache hit_set_count 8 + - sudo ceph osd pool set cache hit_set_period 3600 + - sudo ceph osd pool set cache min_read_recency_for_promote 0 + - sudo ceph osd pool set cache min_write_recency_for_promote 0 +- rados: + clients: [client.0] + pools: [base] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + cache_flush: 50 + cache_try_flush: 50 + cache_evict: 50 diff --git a/qa/suites/rados/thrash/workloads/dedup-io-mixed.yaml b/qa/suites/rados/thrash/workloads/dedup-io-mixed.yaml new file mode 100644 index 000000000..7758525a6 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/dedup-io-mixed.yaml @@ -0,0 +1,24 @@ +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- exec: + client.0: + - sudo ceph osd pool create low_tier 4 +- rados: + clients: [client.0] + low_tier_pool: 'low_tier' + ops: 1500 + objects: 50 + set_chunk: true + enable_dedup: true + dedup_chunk_size: '131072' + dedup_chunk_algo: 'fastcdc' + op_weights: + read: 100 + write: 50 + set_chunk: 30 + tier_promote: 10 + tier_flush: 5 + tier_evict: 10 diff --git a/qa/suites/rados/thrash/workloads/dedup-io-snaps.yaml b/qa/suites/rados/thrash/workloads/dedup-io-snaps.yaml new file mode 100644 index 000000000..3d2ce3026 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/dedup-io-snaps.yaml @@ -0,0 +1,27 @@ +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- exec: + client.0: + - sudo ceph osd pool create low_tier 4 +- rados: + clients: [client.0] + low_tier_pool: 'low_tier' + ops: 1500 + objects: 50 + set_chunk: true + enable_dedup: true + dedup_chunk_size: '131072' + dedup_chunk_algo: 'fastcdc' + op_weights: + read: 100 + write: 50 + set_chunk: 30 + tier_promote: 10 + tier_flush: 5 + tier_evict: 10 + snap_create: 10 + snap_remove: 10 + rollback: 10 diff --git a/qa/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml b/qa/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml new file mode 100644 index 000000000..f60afb809 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/pool-snaps-few-objects.yaml @@ -0,0 +1,21 @@ +override: + conf: + osd: + osd deep scrub update digest min age: 0 + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + pool_snaps: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash/workloads/rados_api_tests.yaml b/qa/suites/rados/thrash/workloads/rados_api_tests.yaml new file mode 100644 index 000000000..3e72897ae --- /dev/null +++ b/qa/suites/rados/thrash/workloads/rados_api_tests.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + log-ignorelist: + - reached quota + - \(POOL_APP_NOT_ENABLED\) + - \(PG_AVAILABILITY\) + crush_tunables: jewel + conf: + client: + debug ms: 1 + debug objecter: 20 + debug rados: 20 + mon: + mon warn on pool no app: false + debug mgrc: 20 + osd: + osd class load list: "*" + osd class default list: "*" +tasks: +- workunit: + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/rados/thrash/workloads/radosbench-high-concurrency.yaml b/qa/suites/rados/thrash/workloads/radosbench-high-concurrency.yaml new file mode 100644 index 000000000..afdb3794d --- /dev/null +++ b/qa/suites/rados/thrash/workloads/radosbench-high-concurrency.yaml @@ -0,0 +1,51 @@ +overrides: + ceph: + conf: + client.0: + debug ms: 1 + debug objecter: 20 + debug rados: 20 + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- full_sequential: + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 + - radosbench: + clients: [client.0] + concurrency: 128 + size: 8192 + time: 90 diff --git a/qa/suites/rados/thrash/workloads/radosbench.yaml b/qa/suites/rados/thrash/workloads/radosbench.yaml new file mode 100644 index 000000000..32efe0ba9 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/radosbench.yaml @@ -0,0 +1,26 @@ +overrides: + ceph: + conf: + client.0: + debug ms: 1 + debug objecter: 20 + debug rados: 20 + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 diff --git a/qa/suites/rados/thrash/workloads/redirect.yaml b/qa/suites/rados/thrash/workloads/redirect.yaml new file mode 100644 index 000000000..14cce6643 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/redirect.yaml @@ -0,0 +1,19 @@ +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- exec: + client.0: + - sudo ceph osd pool create low_tier 4 +- rados: + clients: [client.0] + low_tier_pool: 'low_tier' + ops: 4000 + objects: 500 + set_redirect: true + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash/workloads/redirect_promote_tests.yaml b/qa/suites/rados/thrash/workloads/redirect_promote_tests.yaml new file mode 100644 index 000000000..23226771d --- /dev/null +++ b/qa/suites/rados/thrash/workloads/redirect_promote_tests.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- exec: + client.0: + - sudo ceph osd pool create low_tier 4 +- rados: + clients: [client.0] + low_tier_pool: 'low_tier' + ops: 4000 + objects: 500 + set_redirect: true + op_weights: + set_redirect: 100 + read: 50 + tier_promote: 30 diff --git a/qa/suites/rados/thrash/workloads/redirect_set_object.yaml b/qa/suites/rados/thrash/workloads/redirect_set_object.yaml new file mode 100644 index 000000000..7fe81435c --- /dev/null +++ b/qa/suites/rados/thrash/workloads/redirect_set_object.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- exec: + client.0: + - sudo ceph osd pool create low_tier 4 +- rados: + clients: [client.0] + low_tier_pool: 'low_tier' + ops: 4000 + objects: 500 + set_redirect: true + op_weights: + set_redirect: 100 + copy_from: 100 diff --git a/qa/suites/rados/thrash/workloads/set-chunks-read.yaml b/qa/suites/rados/thrash/workloads/set-chunks-read.yaml new file mode 100644 index 000000000..fa6d6a8b8 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/set-chunks-read.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- exec: + client.0: + - sudo ceph osd pool create low_tier 4 +- rados: + clients: [client.0] + low_tier_pool: 'low_tier' + ops: 4000 + objects: 300 + set_chunk: true + op_weights: + chunk_read: 100 + tier_promote: 10 diff --git a/qa/suites/rados/thrash/workloads/small-objects-balanced.yaml b/qa/suites/rados/thrash/workloads/small-objects-balanced.yaml new file mode 100644 index 000000000..ece22cd36 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/small-objects-balanced.yaml @@ -0,0 +1,24 @@ +overrides: + ceph: + crush_tunables: jewel + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + balance_reads: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/rados/thrash/workloads/small-objects-localized.yaml b/qa/suites/rados/thrash/workloads/small-objects-localized.yaml new file mode 100644 index 000000000..ad791ee11 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/small-objects-localized.yaml @@ -0,0 +1,24 @@ +overrides: + ceph: + crush_tunables: jewel + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + localize_reads: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/rados/thrash/workloads/small-objects.yaml b/qa/suites/rados/thrash/workloads/small-objects.yaml new file mode 100644 index 000000000..6f9edfae8 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/small-objects.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + crush_tunables: jewel + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- rados: + clients: [client.0] + ops: 400000 + max_seconds: 600 + max_in_flight: 64 + objects: 1024 + size: 16384 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 diff --git a/qa/suites/rados/thrash/workloads/snaps-few-objects-balanced.yaml b/qa/suites/rados/thrash/workloads/snaps-few-objects-balanced.yaml new file mode 100644 index 000000000..ffb6cbc8b --- /dev/null +++ b/qa/suites/rados/thrash/workloads/snaps-few-objects-balanced.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + balance_reads: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash/workloads/snaps-few-objects-localized.yaml b/qa/suites/rados/thrash/workloads/snaps-few-objects-localized.yaml new file mode 100644 index 000000000..eca004716 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/snaps-few-objects-localized.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + localize_reads: true + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash/workloads/snaps-few-objects.yaml b/qa/suites/rados/thrash/workloads/snaps-few-objects.yaml new file mode 100644 index 000000000..955327b29 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/snaps-few-objects.yaml @@ -0,0 +1,17 @@ +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 50 + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 diff --git a/qa/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml b/qa/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml new file mode 100644 index 000000000..182fc1431 --- /dev/null +++ b/qa/suites/rados/thrash/workloads/write_fadvise_dontneed.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + write_fadvise_dontneed: true + op_weights: + write: 100 diff --git a/qa/suites/rados/upgrade/.qa b/qa/suites/rados/upgrade/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/upgrade/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/upgrade/parallel b/qa/suites/rados/upgrade/parallel new file mode 120000 index 000000000..84b63d6a5 --- /dev/null +++ b/qa/suites/rados/upgrade/parallel @@ -0,0 +1 @@ +../../upgrade/quincy-x/parallel/
\ No newline at end of file diff --git a/qa/suites/rados/valgrind-leaks/% b/qa/suites/rados/valgrind-leaks/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/valgrind-leaks/% diff --git a/qa/suites/rados/valgrind-leaks/.qa b/qa/suites/rados/valgrind-leaks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/valgrind-leaks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/valgrind-leaks/1-start.yaml b/qa/suites/rados/valgrind-leaks/1-start.yaml new file mode 100644 index 000000000..1cdd8a688 --- /dev/null +++ b/qa/suites/rados/valgrind-leaks/1-start.yaml @@ -0,0 +1,31 @@ +openstack: + - volumes: # attached to each instance + count: 2 + size: 10 # GB + +overrides: + install: + ceph: + debuginfo: true + ceph: + log-ignorelist: + - overall HEALTH_ + - \(PG_ + - \(POOL_APP_NOT_ENABLED\) + conf: + global: + osd heartbeat grace: 40 + osd max object name len: 460 + osd max object namespace len: 64 + mon: + mon osd crush smoke test: false + osd: + osd fast shutdown: false + valgrind: + mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] + osd: [--tool=memcheck] +roles: +- [mon.a, mon.b, mon.c, mgr.x, mgr.y, osd.0, osd.1, osd.2, client.0] +tasks: +- install: +- ceph: diff --git a/qa/suites/rados/valgrind-leaks/2-inject-leak/.qa b/qa/suites/rados/valgrind-leaks/2-inject-leak/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/valgrind-leaks/2-inject-leak/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/valgrind-leaks/2-inject-leak/mon.yaml b/qa/suites/rados/valgrind-leaks/2-inject-leak/mon.yaml new file mode 100644 index 000000000..695a9f8af --- /dev/null +++ b/qa/suites/rados/valgrind-leaks/2-inject-leak/mon.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + expect_valgrind_errors: true +tasks: +- exec: + mon.a: + - ceph tell mon.a leak_some_memory diff --git a/qa/suites/rados/valgrind-leaks/2-inject-leak/none.yaml b/qa/suites/rados/valgrind-leaks/2-inject-leak/none.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/valgrind-leaks/2-inject-leak/none.yaml diff --git a/qa/suites/rados/valgrind-leaks/2-inject-leak/osd.yaml b/qa/suites/rados/valgrind-leaks/2-inject-leak/osd.yaml new file mode 100644 index 000000000..f249f16e1 --- /dev/null +++ b/qa/suites/rados/valgrind-leaks/2-inject-leak/osd.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + expect_valgrind_errors: true +tasks: +- exec: + mon.a: + - ceph tell osd.0 leak_some_memory diff --git a/qa/suites/rados/valgrind-leaks/centos_latest.yaml b/qa/suites/rados/valgrind-leaks/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/rados/valgrind-leaks/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/rados/verify/% b/qa/suites/rados/verify/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/verify/% diff --git a/qa/suites/rados/verify/.qa b/qa/suites/rados/verify/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/verify/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/verify/centos_latest.yaml b/qa/suites/rados/verify/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/rados/verify/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/rados/verify/ceph.yaml b/qa/suites/rados/verify/ceph.yaml new file mode 100644 index 000000000..fc5ce350a --- /dev/null +++ b/qa/suites/rados/verify/ceph.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + mon: + mon min osdmap epochs: 50 + paxos service trim min: 10 + # prune full osdmaps regularly + mon osdmap full prune min: 15 + mon osdmap full prune interval: 2 + mon osdmap full prune txsize: 2 + osd: + debug monc: 20 +tasks: +- install: +- ceph: diff --git a/qa/suites/rados/verify/clusters/+ b/qa/suites/rados/verify/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/verify/clusters/+ diff --git a/qa/suites/rados/verify/clusters/.qa b/qa/suites/rados/verify/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/verify/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/verify/clusters/fixed-2.yaml b/qa/suites/rados/verify/clusters/fixed-2.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rados/verify/clusters/fixed-2.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rados/verify/clusters/openstack.yaml b/qa/suites/rados/verify/clusters/openstack.yaml new file mode 100644 index 000000000..e559d9126 --- /dev/null +++ b/qa/suites/rados/verify/clusters/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/suites/rados/verify/d-thrash/.qa b/qa/suites/rados/verify/d-thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/verify/d-thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/verify/d-thrash/default/+ b/qa/suites/rados/verify/d-thrash/default/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/verify/d-thrash/default/+ diff --git a/qa/suites/rados/verify/d-thrash/default/.qa b/qa/suites/rados/verify/d-thrash/default/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/verify/d-thrash/default/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/verify/d-thrash/default/default.yaml b/qa/suites/rados/verify/d-thrash/default/default.yaml new file mode 100644 index 000000000..41b35926f --- /dev/null +++ b/qa/suites/rados/verify/d-thrash/default/default.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 diff --git a/qa/suites/rados/verify/d-thrash/default/thrashosds-health.yaml b/qa/suites/rados/verify/d-thrash/default/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rados/verify/d-thrash/default/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rados/verify/d-thrash/none.yaml b/qa/suites/rados/verify/d-thrash/none.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rados/verify/d-thrash/none.yaml diff --git a/qa/suites/rados/verify/mon_election b/qa/suites/rados/verify/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/rados/verify/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/rados/verify/msgr b/qa/suites/rados/verify/msgr new file mode 120000 index 000000000..57bee80db --- /dev/null +++ b/qa/suites/rados/verify/msgr @@ -0,0 +1 @@ +.qa/msgr
\ No newline at end of file diff --git a/qa/suites/rados/verify/msgr-failures/.qa b/qa/suites/rados/verify/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/verify/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/verify/msgr-failures/few.yaml b/qa/suites/rados/verify/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/rados/verify/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rados/verify/objectstore b/qa/suites/rados/verify/objectstore new file mode 120000 index 000000000..848c65f9e --- /dev/null +++ b/qa/suites/rados/verify/objectstore @@ -0,0 +1 @@ +.qa/objectstore_debug
\ No newline at end of file diff --git a/qa/suites/rados/verify/rados.yaml b/qa/suites/rados/verify/rados.yaml new file mode 120000 index 000000000..d256979c0 --- /dev/null +++ b/qa/suites/rados/verify/rados.yaml @@ -0,0 +1 @@ +.qa/config/rados.yaml
\ No newline at end of file diff --git a/qa/suites/rados/verify/tasks/.qa b/qa/suites/rados/verify/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/verify/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/verify/tasks/mon_recovery.yaml b/qa/suites/rados/verify/tasks/mon_recovery.yaml new file mode 100644 index 000000000..06d9602e6 --- /dev/null +++ b/qa/suites/rados/verify/tasks/mon_recovery.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) + - \(OSDMAP_FLAGS\) + - \(SMALLER_PGP_NUM\) + - \(POOL_APP_NOT_ENABLED\) + - \(SLOW OPS\) + - slow request +tasks: +- mon_recovery: diff --git a/qa/suites/rados/verify/tasks/rados_api_tests.yaml b/qa/suites/rados/verify/tasks/rados_api_tests.yaml new file mode 100644 index 000000000..e5a54e69e --- /dev/null +++ b/qa/suites/rados/verify/tasks/rados_api_tests.yaml @@ -0,0 +1,34 @@ +overrides: + ceph: + log-ignorelist: + - reached quota + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - \(POOL_FULL\) + - \(SMALLER_PGP_NUM\) + - \(SLOW_OPS\) + - \(CACHE_POOL_NEAR_FULL\) + - \(POOL_APP_NOT_ENABLED\) + - \(PG_AVAILABILITY\) + - \(OBJECT_MISPLACED\) + - slow request + conf: + client: + debug ms: 1 + debug objecter: 20 + debug rados: 20 + debug monc: 20 + mon: + mon warn on pool no app: false + osd: + osd class load list: "*" + osd class default list: "*" + osd client watch timeout: 120 +tasks: +- workunit: + timeout: 6h + env: + ALLOW_TIMEOUTS: "1" + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/rados/verify/tasks/rados_cls_all.yaml b/qa/suites/rados/verify/tasks/rados_cls_all.yaml new file mode 100644 index 000000000..0236326f3 --- /dev/null +++ b/qa/suites/rados/verify/tasks/rados_cls_all.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + osd: + osd_class_load_list: "*" + osd_class_default_list: "*" + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +tasks: +- workunit: + clients: + client.0: + - cls diff --git a/qa/suites/rados/verify/validater/.qa b/qa/suites/rados/verify/validater/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rados/verify/validater/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rados/verify/validater/lockdep.yaml b/qa/suites/rados/verify/validater/lockdep.yaml new file mode 100644 index 000000000..25f84355c --- /dev/null +++ b/qa/suites/rados/verify/validater/lockdep.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + global: + lockdep: true diff --git a/qa/suites/rados/verify/validater/valgrind.yaml b/qa/suites/rados/verify/validater/valgrind.yaml new file mode 100644 index 000000000..03accceaf --- /dev/null +++ b/qa/suites/rados/verify/validater/valgrind.yaml @@ -0,0 +1,31 @@ +# see http://tracker.ceph.com/issues/20360 and http://tracker.ceph.com/issues/18126 +os_type: centos + +overrides: + install: + ceph: + debuginfo: true + ceph: + conf: + global: + osd heartbeat grace: 80 + mon: + mon osd crush smoke test: false + osd: + osd fast shutdown: false + debug bluestore: 1 + debug bluefs: 1 + log-ignorelist: + - overall HEALTH_ +# valgrind is slow.. we might get PGs stuck peering etc + - \(PG_ +# mons sometimes are left off of initial quorum due to valgrind slowness. ok to ignore here because we'll still catch an actual crash due to the core + - \(MON_DOWN\) + - \(SLOW_OPS\) + - slow request + valgrind: + mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] + osd: [--tool=memcheck] + mds: [--tool=memcheck] +# https://tracker.ceph.com/issues/38621 +# mgr: [--tool=memcheck] diff --git a/qa/suites/rbd/.qa b/qa/suites/rbd/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/basic/% b/qa/suites/rbd/basic/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/basic/% diff --git a/qa/suites/rbd/basic/.qa b/qa/suites/rbd/basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/basic/base/.qa b/qa/suites/rbd/basic/base/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/basic/base/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/basic/base/install.yaml b/qa/suites/rbd/basic/base/install.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/rbd/basic/base/install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rbd/basic/clusters/+ b/qa/suites/rbd/basic/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/basic/clusters/+ diff --git a/qa/suites/rbd/basic/clusters/.qa b/qa/suites/rbd/basic/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/basic/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/basic/clusters/fixed-1.yaml b/qa/suites/rbd/basic/clusters/fixed-1.yaml new file mode 120000 index 000000000..02df5dd0c --- /dev/null +++ b/qa/suites/rbd/basic/clusters/fixed-1.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-1.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/basic/clusters/openstack.yaml b/qa/suites/rbd/basic/clusters/openstack.yaml new file mode 100644 index 000000000..f4d1349b4 --- /dev/null +++ b/qa/suites/rbd/basic/clusters/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 3 + size: 30 # GB diff --git a/qa/suites/rbd/basic/conf b/qa/suites/rbd/basic/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/basic/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/basic/msgr-failures/.qa b/qa/suites/rbd/basic/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/basic/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/basic/msgr-failures/few.yaml b/qa/suites/rbd/basic/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/rbd/basic/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rbd/basic/objectstore b/qa/suites/rbd/basic/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/basic/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/basic/supported-random-distro$ b/qa/suites/rbd/basic/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/basic/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/basic/tasks/.qa b/qa/suites/rbd/basic/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/basic/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml b/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml new file mode 100644 index 000000000..5bb734d24 --- /dev/null +++ b/qa/suites/rbd/basic/tasks/rbd_api_tests_old_format.yaml @@ -0,0 +1,12 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - is full \(reached quota + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh diff --git a/qa/suites/rbd/basic/tasks/rbd_cls_tests.yaml b/qa/suites/rbd/basic/tasks/rbd_cls_tests.yaml new file mode 100644 index 000000000..51b35e2e1 --- /dev/null +++ b/qa/suites/rbd/basic/tasks/rbd_cls_tests.yaml @@ -0,0 +1,7 @@ +tasks: +- workunit: + clients: + client.0: + - cls/test_cls_rbd.sh + - cls/test_cls_lock.sh + - cls/test_cls_journal.sh diff --git a/qa/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml b/qa/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml new file mode 100644 index 000000000..d2c80ad65 --- /dev/null +++ b/qa/suites/rbd/basic/tasks/rbd_lock_and_fence.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_lock_fence.sh diff --git a/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml b/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml new file mode 100644 index 000000000..eae484a97 --- /dev/null +++ b/qa/suites/rbd/basic/tasks/rbd_python_api_tests_old_format.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - \(SLOW_OPS\) + - slow request + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh diff --git a/qa/suites/rbd/cli/% b/qa/suites/rbd/cli/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/cli/% diff --git a/qa/suites/rbd/cli/.qa b/qa/suites/rbd/cli/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/cli/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/cli/base/.qa b/qa/suites/rbd/cli/base/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/cli/base/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/cli/base/install.yaml b/qa/suites/rbd/cli/base/install.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/rbd/cli/base/install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rbd/cli/clusters b/qa/suites/rbd/cli/clusters new file mode 120000 index 000000000..ae92569e8 --- /dev/null +++ b/qa/suites/rbd/cli/clusters @@ -0,0 +1 @@ +../basic/clusters
\ No newline at end of file diff --git a/qa/suites/rbd/cli/conf b/qa/suites/rbd/cli/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/cli/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/cli/data-pool b/qa/suites/rbd/cli/data-pool new file mode 120000 index 000000000..3df827572 --- /dev/null +++ b/qa/suites/rbd/cli/data-pool @@ -0,0 +1 @@ +.qa/rbd/data-pool/
\ No newline at end of file diff --git a/qa/suites/rbd/cli/features/.qa b/qa/suites/rbd/cli/features/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/cli/features/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/cli/features/defaults.yaml b/qa/suites/rbd/cli/features/defaults.yaml new file mode 100644 index 000000000..75afd68dd --- /dev/null +++ b/qa/suites/rbd/cli/features/defaults.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default features: 61 diff --git a/qa/suites/rbd/cli/features/journaling.yaml b/qa/suites/rbd/cli/features/journaling.yaml new file mode 100644 index 000000000..6cea62a88 --- /dev/null +++ b/qa/suites/rbd/cli/features/journaling.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default features: 125 diff --git a/qa/suites/rbd/cli/features/layering.yaml b/qa/suites/rbd/cli/features/layering.yaml new file mode 100644 index 000000000..429b8e145 --- /dev/null +++ b/qa/suites/rbd/cli/features/layering.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default features: 1 diff --git a/qa/suites/rbd/cli/msgr-failures/.qa b/qa/suites/rbd/cli/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/cli/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/cli/msgr-failures/few.yaml b/qa/suites/rbd/cli/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/rbd/cli/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rbd/cli/objectstore b/qa/suites/rbd/cli/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/cli/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/cli/supported-random-distro$ b/qa/suites/rbd/cli/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/cli/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/cli/workloads/.qa b/qa/suites/rbd/cli/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/cli/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_generic.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_generic.yaml new file mode 100644 index 000000000..be43b3e8d --- /dev/null +++ b/qa/suites/rbd/cli/workloads/rbd_cli_generic.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/cli_generic.sh diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_groups.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_groups.yaml new file mode 100644 index 000000000..6ff836342 --- /dev/null +++ b/qa/suites/rbd/cli/workloads/rbd_cli_groups.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/rbd_groups.sh diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_import_export.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_import_export.yaml new file mode 100644 index 000000000..b08f2612f --- /dev/null +++ b/qa/suites/rbd/cli/workloads/rbd_cli_import_export.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/import_export.sh diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_luks_encryption.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_luks_encryption.yaml new file mode 100644 index 000000000..bb5d1608e --- /dev/null +++ b/qa/suites/rbd/cli/workloads/rbd_cli_luks_encryption.yaml @@ -0,0 +1,9 @@ +overrides: + install: + ceph: + extra_packages: [rbd-nbd] +tasks: +- workunit: + clients: + client.0: + - rbd/luks-encryption.sh diff --git a/qa/suites/rbd/cli/workloads/rbd_cli_migration.yaml b/qa/suites/rbd/cli/workloads/rbd_cli_migration.yaml new file mode 100644 index 000000000..b04ac08f7 --- /dev/null +++ b/qa/suites/rbd/cli/workloads/rbd_cli_migration.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/cli_migration.sh diff --git a/qa/suites/rbd/cli/workloads/rbd_support_module_recovery.yaml b/qa/suites/rbd/cli/workloads/rbd_support_module_recovery.yaml new file mode 100644 index 000000000..aa4d0001f --- /dev/null +++ b/qa/suites/rbd/cli/workloads/rbd_support_module_recovery.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + mgr: + debug rbd: 20 +tasks: +- install: + extra_system_packages: + - fio +- workunit: + clients: + client.0: + - rbd/rbd_support_module_recovery.sh diff --git a/qa/suites/rbd/cli_v1/% b/qa/suites/rbd/cli_v1/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/cli_v1/% diff --git a/qa/suites/rbd/cli_v1/.qa b/qa/suites/rbd/cli_v1/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/cli_v1/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/cli_v1/base/.qa b/qa/suites/rbd/cli_v1/base/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/cli_v1/base/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/cli_v1/base/install.yaml b/qa/suites/rbd/cli_v1/base/install.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/rbd/cli_v1/base/install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rbd/cli_v1/clusters b/qa/suites/rbd/cli_v1/clusters new file mode 120000 index 000000000..ae92569e8 --- /dev/null +++ b/qa/suites/rbd/cli_v1/clusters @@ -0,0 +1 @@ +../basic/clusters
\ No newline at end of file diff --git a/qa/suites/rbd/cli_v1/conf b/qa/suites/rbd/cli_v1/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/cli_v1/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/cli_v1/features/.qa b/qa/suites/rbd/cli_v1/features/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/cli_v1/features/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/cli_v1/features/format-1.yaml b/qa/suites/rbd/cli_v1/features/format-1.yaml new file mode 100644 index 000000000..9c5320835 --- /dev/null +++ b/qa/suites/rbd/cli_v1/features/format-1.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default format: 1 diff --git a/qa/suites/rbd/cli_v1/msgr-failures/.qa b/qa/suites/rbd/cli_v1/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/cli_v1/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/cli_v1/msgr-failures/few.yaml b/qa/suites/rbd/cli_v1/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/rbd/cli_v1/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rbd/cli_v1/objectstore b/qa/suites/rbd/cli_v1/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/cli_v1/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/cli_v1/supported-random-distro$ b/qa/suites/rbd/cli_v1/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/cli_v1/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/cli_v1/workloads/.qa b/qa/suites/rbd/cli_v1/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/cli_v1/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/cli_v1/workloads/rbd_cli_generic.yaml b/qa/suites/rbd/cli_v1/workloads/rbd_cli_generic.yaml new file mode 100644 index 000000000..be43b3e8d --- /dev/null +++ b/qa/suites/rbd/cli_v1/workloads/rbd_cli_generic.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/cli_generic.sh diff --git a/qa/suites/rbd/cli_v1/workloads/rbd_cli_import_export.yaml b/qa/suites/rbd/cli_v1/workloads/rbd_cli_import_export.yaml new file mode 100644 index 000000000..b08f2612f --- /dev/null +++ b/qa/suites/rbd/cli_v1/workloads/rbd_cli_import_export.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/import_export.sh diff --git a/qa/suites/rbd/encryption/% b/qa/suites/rbd/encryption/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/encryption/% diff --git a/qa/suites/rbd/encryption/.qa b/qa/suites/rbd/encryption/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/encryption/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/encryption/cache/.qa b/qa/suites/rbd/encryption/cache/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/encryption/cache/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/encryption/cache/none.yaml b/qa/suites/rbd/encryption/cache/none.yaml new file mode 100644 index 000000000..42fd9c955 --- /dev/null +++ b/qa/suites/rbd/encryption/cache/none.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: false diff --git a/qa/suites/rbd/encryption/cache/writearound.yaml b/qa/suites/rbd/encryption/cache/writearound.yaml new file mode 100644 index 000000000..b6f8e319b --- /dev/null +++ b/qa/suites/rbd/encryption/cache/writearound.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache policy: writearound diff --git a/qa/suites/rbd/encryption/cache/writeback.yaml b/qa/suites/rbd/encryption/cache/writeback.yaml new file mode 100644 index 000000000..a55ec1df0 --- /dev/null +++ b/qa/suites/rbd/encryption/cache/writeback.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache policy: writeback diff --git a/qa/suites/rbd/encryption/cache/writethrough.yaml b/qa/suites/rbd/encryption/cache/writethrough.yaml new file mode 100644 index 000000000..6dc29e16c --- /dev/null +++ b/qa/suites/rbd/encryption/cache/writethrough.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache max dirty: 0 diff --git a/qa/suites/rbd/encryption/clusters/+ b/qa/suites/rbd/encryption/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/encryption/clusters/+ diff --git a/qa/suites/rbd/encryption/clusters/.qa b/qa/suites/rbd/encryption/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/encryption/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/encryption/clusters/fixed-3.yaml b/qa/suites/rbd/encryption/clusters/fixed-3.yaml new file mode 120000 index 000000000..f75a848b8 --- /dev/null +++ b/qa/suites/rbd/encryption/clusters/fixed-3.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/encryption/clusters/openstack.yaml b/qa/suites/rbd/encryption/clusters/openstack.yaml new file mode 100644 index 000000000..9c39c7e5f --- /dev/null +++ b/qa/suites/rbd/encryption/clusters/openstack.yaml @@ -0,0 +1,8 @@ +openstack: + - machine: + disk: 40 # GB + ram: 30000 # MB + cpus: 1 + volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/rbd/encryption/conf b/qa/suites/rbd/encryption/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/encryption/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/encryption/data-pool b/qa/suites/rbd/encryption/data-pool new file mode 120000 index 000000000..3df827572 --- /dev/null +++ b/qa/suites/rbd/encryption/data-pool @@ -0,0 +1 @@ +.qa/rbd/data-pool/
\ No newline at end of file diff --git a/qa/suites/rbd/encryption/features/.qa b/qa/suites/rbd/encryption/features/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/encryption/features/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/encryption/features/defaults.yaml b/qa/suites/rbd/encryption/features/defaults.yaml new file mode 100644 index 000000000..75afd68dd --- /dev/null +++ b/qa/suites/rbd/encryption/features/defaults.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default features: 61 diff --git a/qa/suites/rbd/encryption/msgr-failures/.qa b/qa/suites/rbd/encryption/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/encryption/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/encryption/msgr-failures/few.yaml b/qa/suites/rbd/encryption/msgr-failures/few.yaml new file mode 100644 index 000000000..ca8e09853 --- /dev/null +++ b/qa/suites/rbd/encryption/msgr-failures/few.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - but it is still running + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rbd/encryption/objectstore b/qa/suites/rbd/encryption/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/encryption/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/encryption/supported-random-distro$ b/qa/suites/rbd/encryption/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/encryption/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/encryption/workloads/.qa b/qa/suites/rbd/encryption/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/encryption/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1.yaml new file mode 100644 index 000000000..cb3659f97 --- /dev/null +++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1.yaml @@ -0,0 +1,13 @@ +overrides: + install: + ceph: + extra_packages: [rbd-nbd] +tasks: +- qemu: + all: + clone: true + parent_encryption_format: luks1 + type: block + disks: 3 + test: qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks1.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks1.yaml new file mode 100644 index 000000000..1db50d600 --- /dev/null +++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks1.yaml @@ -0,0 +1,14 @@ +overrides: + install: + ceph: + extra_packages: [rbd-nbd] +tasks: +- qemu: + all: + clone: true + parent_encryption_format: luks1 + encryption_format: luks1 + type: block + disks: 3 + test: qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks2.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks2.yaml new file mode 100644 index 000000000..a8ef5f2dd --- /dev/null +++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks1_luks2.yaml @@ -0,0 +1,14 @@ +overrides: + install: + ceph: + extra_packages: [rbd-nbd] +tasks: +- qemu: + all: + clone: true + parent_encryption_format: luks1 + encryption_format: luks2 + type: block + disks: 3 + test: qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2.yaml new file mode 100644 index 000000000..203372d60 --- /dev/null +++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2.yaml @@ -0,0 +1,13 @@ +overrides: + install: + ceph: + extra_packages: [rbd-nbd] +tasks: +- qemu: + all: + clone: true + parent_encryption_format: luks2 + type: block + disks: 3 + test: qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks1.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks1.yaml new file mode 100644 index 000000000..727e5c32a --- /dev/null +++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks1.yaml @@ -0,0 +1,14 @@ +overrides: + install: + ceph: + extra_packages: [rbd-nbd] +tasks: +- qemu: + all: + clone: true + parent_encryption_format: luks2 + encryption_format: luks1 + type: block + disks: 3 + test: qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks2.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks2.yaml new file mode 100644 index 000000000..43ded129f --- /dev/null +++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_luks2_luks2.yaml @@ -0,0 +1,14 @@ +overrides: + install: + ceph: + extra_packages: [rbd-nbd] +tasks: +- qemu: + all: + clone: true + parent_encryption_format: luks2 + encryption_format: luks2 + type: block + disks: 3 + test: qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks1.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks1.yaml new file mode 100644 index 000000000..7f3f3776f --- /dev/null +++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks1.yaml @@ -0,0 +1,13 @@ +overrides: + install: + ceph: + extra_packages: [rbd-nbd] +tasks: +- qemu: + all: + clone: true + encryption_format: luks1 + type: block + disks: 3 + test: qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks2.yaml b/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks2.yaml new file mode 100644 index 000000000..c9d9829a9 --- /dev/null +++ b/qa/suites/rbd/encryption/workloads/qemu_xfstests_none_luks2.yaml @@ -0,0 +1,13 @@ +overrides: + install: + ceph: + extra_packages: [rbd-nbd] +tasks: +- qemu: + all: + clone: true + encryption_format: luks2 + type: block + disks: 3 + test: qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/immutable-object-cache/% b/qa/suites/rbd/immutable-object-cache/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/% diff --git a/qa/suites/rbd/immutable-object-cache/.qa b/qa/suites/rbd/immutable-object-cache/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/immutable-object-cache/clusters/+ b/qa/suites/rbd/immutable-object-cache/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/clusters/+ diff --git a/qa/suites/rbd/immutable-object-cache/clusters/.qa b/qa/suites/rbd/immutable-object-cache/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/immutable-object-cache/clusters/fix-2.yaml b/qa/suites/rbd/immutable-object-cache/clusters/fix-2.yaml new file mode 100644 index 000000000..dbccecbce --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/clusters/fix-2.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1] +- [mon.b, mgr.y, osd.2, osd.3, client.0] diff --git a/qa/suites/rbd/immutable-object-cache/clusters/openstack.yaml b/qa/suites/rbd/immutable-object-cache/clusters/openstack.yaml new file mode 100644 index 000000000..b113e4f2e --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/clusters/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/suites/rbd/immutable-object-cache/conf b/qa/suites/rbd/immutable-object-cache/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/immutable-object-cache/pool/.qa b/qa/suites/rbd/immutable-object-cache/pool/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/pool/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/immutable-object-cache/pool/ceph_and_immutable_object_cache.yaml b/qa/suites/rbd/immutable-object-cache/pool/ceph_and_immutable_object_cache.yaml new file mode 100644 index 000000000..e977c1ba6 --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/pool/ceph_and_immutable_object_cache.yaml @@ -0,0 +1,12 @@ +tasks: +- install: + extra_packages: ['ceph-immutable-object-cache'] +- ceph: + conf: + client: + rbd parent cache enabled: true + rbd plugins: parent_cache + immutable_object_cache_path: /tmp/ceph-immutable-object-cache + immutable_object_cache_max_size: 10G +- immutable_object_cache: + client.0: diff --git a/qa/suites/rbd/immutable-object-cache/supported-random-distro$ b/qa/suites/rbd/immutable-object-cache/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/immutable-object-cache/workloads/.qa b/qa/suites/rbd/immutable-object-cache/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/immutable-object-cache/workloads/c_api_tests_with_defaults.yaml b/qa/suites/rbd/immutable-object-cache/workloads/c_api_tests_with_defaults.yaml new file mode 120000 index 000000000..949032725 --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/workloads/c_api_tests_with_defaults.yaml @@ -0,0 +1 @@ +../../librbd/workloads/c_api_tests_with_defaults.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/immutable-object-cache/workloads/fio_on_immutable_object_cache.yaml b/qa/suites/rbd/immutable-object-cache/workloads/fio_on_immutable_object_cache.yaml new file mode 100644 index 000000000..08d76ee15 --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/workloads/fio_on_immutable_object_cache.yaml @@ -0,0 +1,11 @@ +tasks: +- rbd_fio: + client.0: + thick-provision: true + fio-io-size: 100% + formats: [2] + features: [[layering]] + io-engine: rbd + test-clone-io: 1 + rw: randread + runtime: 600 diff --git a/qa/suites/rbd/immutable-object-cache/workloads/qemu_on_immutable_object_cache_and_thrash.yaml b/qa/suites/rbd/immutable-object-cache/workloads/qemu_on_immutable_object_cache_and_thrash.yaml new file mode 100644 index 000000000..33a5cf0b1 --- /dev/null +++ b/qa/suites/rbd/immutable-object-cache/workloads/qemu_on_immutable_object_cache_and_thrash.yaml @@ -0,0 +1,11 @@ +tasks: +- qemu: + client.0: + clone: true + test: qa/run_xfstests_qemu.sh + type: block + cpus: 4 + memory: 4096 + disks: 3 +- immutable_object_cache_thrash: + client.0: diff --git a/qa/suites/rbd/iscsi/% b/qa/suites/rbd/iscsi/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/iscsi/% diff --git a/qa/suites/rbd/iscsi/.qa b/qa/suites/rbd/iscsi/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/iscsi/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/iscsi/0-single-container-host.yaml b/qa/suites/rbd/iscsi/0-single-container-host.yaml new file mode 120000 index 000000000..7406e749c --- /dev/null +++ b/qa/suites/rbd/iscsi/0-single-container-host.yaml @@ -0,0 +1 @@ +.qa/distros/single-container-host.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/iscsi/base/.qa b/qa/suites/rbd/iscsi/base/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/iscsi/base/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/iscsi/base/install.yaml b/qa/suites/rbd/iscsi/base/install.yaml new file mode 100644 index 000000000..5c5a6c31f --- /dev/null +++ b/qa/suites/rbd/iscsi/base/install.yaml @@ -0,0 +1,14 @@ +use_shaman: True +tasks: +- cephadm: +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls +- install: + extra_packages: + - iscsi-initiator-utils + - device-mapper-multipath diff --git a/qa/suites/rbd/iscsi/cluster/+ b/qa/suites/rbd/iscsi/cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/iscsi/cluster/+ diff --git a/qa/suites/rbd/iscsi/cluster/.qa b/qa/suites/rbd/iscsi/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/iscsi/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/iscsi/cluster/fixed-3.yaml b/qa/suites/rbd/iscsi/cluster/fixed-3.yaml new file mode 100644 index 000000000..426ea32cf --- /dev/null +++ b/qa/suites/rbd/iscsi/cluster/fixed-3.yaml @@ -0,0 +1,19 @@ +roles: +- - host.a + - mon.a + - mgr.x + - osd.0 + - osd.1 + - client.0 + - ceph.iscsi.iscsi.a +- - mon.b + - osd.2 + - osd.3 + - osd.4 + - client.1 +- - mon.c + - osd.5 + - osd.6 + - osd.7 + - client.2 + - ceph.iscsi.iscsi.b diff --git a/qa/suites/rbd/iscsi/cluster/openstack.yaml b/qa/suites/rbd/iscsi/cluster/openstack.yaml new file mode 100644 index 000000000..40fef4770 --- /dev/null +++ b/qa/suites/rbd/iscsi/cluster/openstack.yaml @@ -0,0 +1,8 @@ +openstack: + - machine: + disk: 40 # GB + ram: 8000 # MB + cpus: 1 + volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/rbd/iscsi/conf b/qa/suites/rbd/iscsi/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/iscsi/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/iscsi/workloads/.qa b/qa/suites/rbd/iscsi/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/iscsi/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/iscsi/workloads/cephadm_iscsi.yaml b/qa/suites/rbd/iscsi/workloads/cephadm_iscsi.yaml new file mode 100644 index 000000000..202e6d8b5 --- /dev/null +++ b/qa/suites/rbd/iscsi/workloads/cephadm_iscsi.yaml @@ -0,0 +1,21 @@ +tasks: +- ceph_iscsi_client: + clients: [client.1] +- cram: + parallel: False + clients: + client.0: + - src/test/cli-integration/rbd/gwcli_create.t + client.1: + - src/test/cli-integration/rbd/iscsi_client.t + client.2: + - src/test/cli-integration/rbd/gwcli_delete.t +- cram: + parallel: False + clients: + client.0: + - src/test/cli-integration/rbd/rest_api_create.t + client.1: + - src/test/cli-integration/rbd/iscsi_client.t + client.2: + - src/test/cli-integration/rbd/rest_api_delete.t diff --git a/qa/suites/rbd/librbd/% b/qa/suites/rbd/librbd/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/librbd/% diff --git a/qa/suites/rbd/librbd/.qa b/qa/suites/rbd/librbd/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/librbd/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/cache/.qa b/qa/suites/rbd/librbd/cache/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/librbd/cache/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/cache/none.yaml b/qa/suites/rbd/librbd/cache/none.yaml new file mode 100644 index 000000000..42fd9c955 --- /dev/null +++ b/qa/suites/rbd/librbd/cache/none.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: false diff --git a/qa/suites/rbd/librbd/cache/writearound.yaml b/qa/suites/rbd/librbd/cache/writearound.yaml new file mode 100644 index 000000000..b6f8e319b --- /dev/null +++ b/qa/suites/rbd/librbd/cache/writearound.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache policy: writearound diff --git a/qa/suites/rbd/librbd/cache/writeback.yaml b/qa/suites/rbd/librbd/cache/writeback.yaml new file mode 100644 index 000000000..a55ec1df0 --- /dev/null +++ b/qa/suites/rbd/librbd/cache/writeback.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache policy: writeback diff --git a/qa/suites/rbd/librbd/cache/writethrough.yaml b/qa/suites/rbd/librbd/cache/writethrough.yaml new file mode 100644 index 000000000..6dc29e16c --- /dev/null +++ b/qa/suites/rbd/librbd/cache/writethrough.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache max dirty: 0 diff --git a/qa/suites/rbd/librbd/clusters/+ b/qa/suites/rbd/librbd/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/librbd/clusters/+ diff --git a/qa/suites/rbd/librbd/clusters/.qa b/qa/suites/rbd/librbd/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/librbd/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/clusters/fixed-3.yaml b/qa/suites/rbd/librbd/clusters/fixed-3.yaml new file mode 120000 index 000000000..f75a848b8 --- /dev/null +++ b/qa/suites/rbd/librbd/clusters/fixed-3.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/clusters/openstack.yaml b/qa/suites/rbd/librbd/clusters/openstack.yaml new file mode 100644 index 000000000..b0f3b9b4d --- /dev/null +++ b/qa/suites/rbd/librbd/clusters/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/rbd/librbd/conf b/qa/suites/rbd/librbd/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/librbd/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/data-pool b/qa/suites/rbd/librbd/data-pool new file mode 120000 index 000000000..3df827572 --- /dev/null +++ b/qa/suites/rbd/librbd/data-pool @@ -0,0 +1 @@ +.qa/rbd/data-pool/
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/extra-conf/.qa b/qa/suites/rbd/librbd/extra-conf/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/librbd/extra-conf/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/extra-conf/copy-on-read.yaml b/qa/suites/rbd/librbd/extra-conf/copy-on-read.yaml new file mode 100644 index 000000000..ce99e7ec0 --- /dev/null +++ b/qa/suites/rbd/librbd/extra-conf/copy-on-read.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd clone copy on read: true diff --git a/qa/suites/rbd/librbd/extra-conf/none.yaml b/qa/suites/rbd/librbd/extra-conf/none.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/librbd/extra-conf/none.yaml diff --git a/qa/suites/rbd/librbd/extra-conf/permit-partial-discard.yaml b/qa/suites/rbd/librbd/extra-conf/permit-partial-discard.yaml new file mode 100644 index 000000000..a99294696 --- /dev/null +++ b/qa/suites/rbd/librbd/extra-conf/permit-partial-discard.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd skip partial discard: false diff --git a/qa/suites/rbd/librbd/min-compat-client/.qa b/qa/suites/rbd/librbd/min-compat-client/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/librbd/min-compat-client/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/min-compat-client/default.yaml b/qa/suites/rbd/librbd/min-compat-client/default.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/librbd/min-compat-client/default.yaml diff --git a/qa/suites/rbd/librbd/min-compat-client/octopus.yaml b/qa/suites/rbd/librbd/min-compat-client/octopus.yaml new file mode 100644 index 000000000..1cd2df483 --- /dev/null +++ b/qa/suites/rbd/librbd/min-compat-client/octopus.yaml @@ -0,0 +1,4 @@ +tasks: +- exec: + client.0: + - sudo ceph osd set-require-min-compat-client octopus diff --git a/qa/suites/rbd/librbd/msgr-failures/.qa b/qa/suites/rbd/librbd/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/librbd/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/msgr-failures/few.yaml b/qa/suites/rbd/librbd/msgr-failures/few.yaml new file mode 100644 index 000000000..df2a313a6 --- /dev/null +++ b/qa/suites/rbd/librbd/msgr-failures/few.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - but it is still running + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rbd/librbd/objectstore b/qa/suites/rbd/librbd/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/librbd/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/supported-random-distro$ b/qa/suites/rbd/librbd/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/librbd/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/workloads/.qa b/qa/suites/rbd/librbd/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/librbd/workloads/c_api_tests.yaml b/qa/suites/rbd/librbd/workloads/c_api_tests.yaml new file mode 100644 index 000000000..eb63fd771 --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/c_api_tests.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - is full \(reached quota + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/rbd/librbd/workloads/c_api_tests_with_defaults.yaml b/qa/suites/rbd/librbd/workloads/c_api_tests_with_defaults.yaml new file mode 100644 index 000000000..ec4194598 --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/c_api_tests_with_defaults.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - is full \(reached quota + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "61" diff --git a/qa/suites/rbd/librbd/workloads/c_api_tests_with_journaling.yaml b/qa/suites/rbd/librbd/workloads/c_api_tests_with_journaling.yaml new file mode 100644 index 000000000..6c3686806 --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/c_api_tests_with_journaling.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - is full \(reached quota + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "125" diff --git a/qa/suites/rbd/librbd/workloads/fsx.yaml b/qa/suites/rbd/librbd/workloads/fsx.yaml new file mode 100644 index 000000000..6d8cd5f1a --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/fsx.yaml @@ -0,0 +1,4 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 20000 diff --git a/qa/suites/rbd/librbd/workloads/python_api_tests.yaml b/qa/suites/rbd/librbd/workloads/python_api_tests.yaml new file mode 100644 index 000000000..516c323df --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/python_api_tests.yaml @@ -0,0 +1,12 @@ +overrides: + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/rbd/librbd/workloads/python_api_tests_with_defaults.yaml b/qa/suites/rbd/librbd/workloads/python_api_tests_with_defaults.yaml new file mode 100644 index 000000000..831f3762b --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/python_api_tests_with_defaults.yaml @@ -0,0 +1,12 @@ +overrides: + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "61" diff --git a/qa/suites/rbd/librbd/workloads/python_api_tests_with_journaling.yaml b/qa/suites/rbd/librbd/workloads/python_api_tests_with_journaling.yaml new file mode 100644 index 000000000..8bd751146 --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/python_api_tests_with_journaling.yaml @@ -0,0 +1,12 @@ +overrides: + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "125" diff --git a/qa/suites/rbd/librbd/workloads/rbd_fio.yaml b/qa/suites/rbd/librbd/workloads/rbd_fio.yaml new file mode 100644 index 000000000..ff788c6a8 --- /dev/null +++ b/qa/suites/rbd/librbd/workloads/rbd_fio.yaml @@ -0,0 +1,10 @@ +tasks: +- rbd_fio: + client.0: + fio-io-size: 80% + formats: [2] + features: [[layering],[layering,exclusive-lock,object-map]] + io-engine: rbd + test-clone-io: 1 + rw: randrw + runtime: 900 diff --git a/qa/suites/rbd/maintenance/% b/qa/suites/rbd/maintenance/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/maintenance/% diff --git a/qa/suites/rbd/maintenance/.qa b/qa/suites/rbd/maintenance/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/maintenance/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/maintenance/base/.qa b/qa/suites/rbd/maintenance/base/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/maintenance/base/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/maintenance/base/install.yaml b/qa/suites/rbd/maintenance/base/install.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/rbd/maintenance/base/install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rbd/maintenance/clusters/+ b/qa/suites/rbd/maintenance/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/maintenance/clusters/+ diff --git a/qa/suites/rbd/maintenance/clusters/.qa b/qa/suites/rbd/maintenance/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/maintenance/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/maintenance/clusters/fixed-3.yaml b/qa/suites/rbd/maintenance/clusters/fixed-3.yaml new file mode 120000 index 000000000..f75a848b8 --- /dev/null +++ b/qa/suites/rbd/maintenance/clusters/fixed-3.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/maintenance/clusters/openstack.yaml b/qa/suites/rbd/maintenance/clusters/openstack.yaml new file mode 120000 index 000000000..3e5028f9a --- /dev/null +++ b/qa/suites/rbd/maintenance/clusters/openstack.yaml @@ -0,0 +1 @@ +../../qemu/clusters/openstack.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/maintenance/conf b/qa/suites/rbd/maintenance/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/maintenance/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/maintenance/objectstore b/qa/suites/rbd/maintenance/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/maintenance/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/maintenance/qemu/.qa b/qa/suites/rbd/maintenance/qemu/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/maintenance/qemu/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/maintenance/qemu/xfstests.yaml b/qa/suites/rbd/maintenance/qemu/xfstests.yaml new file mode 100644 index 000000000..135103b34 --- /dev/null +++ b/qa/suites/rbd/maintenance/qemu/xfstests.yaml @@ -0,0 +1,14 @@ +tasks: +- parallel: + - io_workload + - op_workload +io_workload: + sequential: + - qemu: + client.0: + clone: true + type: block + disks: 3 + time_wait: 120 + test: qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/maintenance/supported-random-distro$ b/qa/suites/rbd/maintenance/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/maintenance/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/maintenance/workloads/.qa b/qa/suites/rbd/maintenance/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/maintenance/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/maintenance/workloads/dynamic_features.yaml b/qa/suites/rbd/maintenance/workloads/dynamic_features.yaml new file mode 100644 index 000000000..d7e1c1ed0 --- /dev/null +++ b/qa/suites/rbd/maintenance/workloads/dynamic_features.yaml @@ -0,0 +1,8 @@ +op_workload: + sequential: + - workunit: + clients: + client.0: + - rbd/qemu_dynamic_features.sh + env: + IMAGE_NAME: client.0.1-clone diff --git a/qa/suites/rbd/maintenance/workloads/dynamic_features_no_cache.yaml b/qa/suites/rbd/maintenance/workloads/dynamic_features_no_cache.yaml new file mode 100644 index 000000000..5d80e6bd3 --- /dev/null +++ b/qa/suites/rbd/maintenance/workloads/dynamic_features_no_cache.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + client: + rbd cache: false +op_workload: + sequential: + - workunit: + clients: + client.0: + - rbd/qemu_dynamic_features.sh + env: + IMAGE_NAME: client.0.1-clone + timeout: 0 diff --git a/qa/suites/rbd/maintenance/workloads/rebuild_object_map.yaml b/qa/suites/rbd/maintenance/workloads/rebuild_object_map.yaml new file mode 100644 index 000000000..308158f61 --- /dev/null +++ b/qa/suites/rbd/maintenance/workloads/rebuild_object_map.yaml @@ -0,0 +1,8 @@ +op_workload: + sequential: + - workunit: + clients: + client.0: + - rbd/qemu_rebuild_object_map.sh + env: + IMAGE_NAME: client.0.1-clone diff --git a/qa/suites/rbd/migration/% b/qa/suites/rbd/migration/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/migration/% diff --git a/qa/suites/rbd/migration/.qa b/qa/suites/rbd/migration/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/migration/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/migration/1-base b/qa/suites/rbd/migration/1-base new file mode 120000 index 000000000..fd10a859d --- /dev/null +++ b/qa/suites/rbd/migration/1-base @@ -0,0 +1 @@ +../thrash/base
\ No newline at end of file diff --git a/qa/suites/rbd/migration/2-clusters/+ b/qa/suites/rbd/migration/2-clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/migration/2-clusters/+ diff --git a/qa/suites/rbd/migration/2-clusters/.qa b/qa/suites/rbd/migration/2-clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/migration/2-clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/migration/2-clusters/fixed-3.yaml b/qa/suites/rbd/migration/2-clusters/fixed-3.yaml new file mode 120000 index 000000000..f75a848b8 --- /dev/null +++ b/qa/suites/rbd/migration/2-clusters/fixed-3.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/migration/2-clusters/openstack.yaml b/qa/suites/rbd/migration/2-clusters/openstack.yaml new file mode 100644 index 000000000..9c39c7e5f --- /dev/null +++ b/qa/suites/rbd/migration/2-clusters/openstack.yaml @@ -0,0 +1,8 @@ +openstack: + - machine: + disk: 40 # GB + ram: 30000 # MB + cpus: 1 + volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/rbd/migration/3-objectstore b/qa/suites/rbd/migration/3-objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/migration/3-objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/migration/4-supported-random-distro$ b/qa/suites/rbd/migration/4-supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/migration/4-supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/migration/5-data-pool b/qa/suites/rbd/migration/5-data-pool new file mode 120000 index 000000000..3df827572 --- /dev/null +++ b/qa/suites/rbd/migration/5-data-pool @@ -0,0 +1 @@ +.qa/rbd/data-pool/
\ No newline at end of file diff --git a/qa/suites/rbd/migration/6-prepare/.qa b/qa/suites/rbd/migration/6-prepare/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/migration/6-prepare/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/migration/6-prepare/qcow2-file.yaml b/qa/suites/rbd/migration/6-prepare/qcow2-file.yaml new file mode 100644 index 000000000..8e3b9f958 --- /dev/null +++ b/qa/suites/rbd/migration/6-prepare/qcow2-file.yaml @@ -0,0 +1,9 @@ +tasks: + - exec: + client.0: + - mkdir /home/ubuntu/cephtest/migration + - wget -nv -O /home/ubuntu/cephtest/migration/base.client.0.qcow2 http://download.ceph.com/qa/ubuntu-12.04.qcow2 + - qemu-img create -f qcow2 /home/ubuntu/cephtest/migration/empty.qcow2 1G + - echo '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/base.client.0.qcow2"}}' | rbd migration prepare --import-only --source-spec-path - client.0.0 + - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.1 + - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.2 diff --git a/qa/suites/rbd/migration/6-prepare/qcow2-http.yaml b/qa/suites/rbd/migration/6-prepare/qcow2-http.yaml new file mode 100644 index 000000000..890d14417 --- /dev/null +++ b/qa/suites/rbd/migration/6-prepare/qcow2-http.yaml @@ -0,0 +1,8 @@ +tasks: + - exec: + client.0: + - mkdir /home/ubuntu/cephtest/migration + - qemu-img create -f qcow2 /home/ubuntu/cephtest/migration/empty.qcow2 1G + - echo '{"type":"qcow","stream":{"type":"http","url":"http://download.ceph.com/qa/ubuntu-12.04.qcow2"}}' | rbd migration prepare --import-only --source-spec-path - client.0.0 + - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.1 + - rbd migration prepare --import-only --source-spec '{"type":"qcow","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.qcow2"}}' client.0.2 diff --git a/qa/suites/rbd/migration/6-prepare/raw-file.yaml b/qa/suites/rbd/migration/6-prepare/raw-file.yaml new file mode 100644 index 000000000..0035534ef --- /dev/null +++ b/qa/suites/rbd/migration/6-prepare/raw-file.yaml @@ -0,0 +1,10 @@ +tasks: + - exec: + client.0: + - mkdir /home/ubuntu/cephtest/migration + - wget -nv -O /home/ubuntu/cephtest/migration/base.client.0.qcow2 http://download.ceph.com/qa/ubuntu-12.04.qcow2 + - qemu-img convert -f qcow2 -O raw /home/ubuntu/cephtest/migration/base.client.0.qcow2 /home/ubuntu/cephtest/migration/base.client.0.raw + - dd if=/dev/zero of=/home/ubuntu/cephtest/migration/empty.raw count=1 bs=1G + - echo '{"type":"raw","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/base.client.0.raw"}}' | rbd migration prepare --import-only --source-spec-path - client.0.0 + - rbd migration prepare --import-only --source-spec '{"type":"raw","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.raw"}}' client.0.1 + - rbd migration prepare --import-only --source-spec '{"type":"raw","stream":{"type":"file","file_path":"/home/ubuntu/cephtest/migration/empty.raw"}}' client.0.2 diff --git a/qa/suites/rbd/migration/7-io-workloads/.qa b/qa/suites/rbd/migration/7-io-workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/migration/7-io-workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/migration/7-io-workloads/qemu_xfstests.yaml b/qa/suites/rbd/migration/7-io-workloads/qemu_xfstests.yaml new file mode 100644 index 000000000..2617a4192 --- /dev/null +++ b/qa/suites/rbd/migration/7-io-workloads/qemu_xfstests.yaml @@ -0,0 +1,15 @@ +io_workload: + sequential: + - qemu: + client.0: + clone: true + type: block + disks: + - action: none + image_name: client.0.0 + - action: none + image_name: client.0.1 + - action: none + image_name: client.0.2 + test: qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/migration/8-migrate-workloads/.qa b/qa/suites/rbd/migration/8-migrate-workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/migration/8-migrate-workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/migration/8-migrate-workloads/execute.yaml b/qa/suites/rbd/migration/8-migrate-workloads/execute.yaml new file mode 100644 index 000000000..d7735c426 --- /dev/null +++ b/qa/suites/rbd/migration/8-migrate-workloads/execute.yaml @@ -0,0 +1,14 @@ +tasks: + - parallel: + - io_workload + - migrate_workload +migrate_workload: + sequential: + - exec: + client.0: + - sleep 60 + - rbd migration execute client.0.0 + - sleep 60 + - rbd migration commit client.0.0 + - sleep 60 + - rbd migration execute client.0.1 diff --git a/qa/suites/rbd/migration/9-cleanup/.qa b/qa/suites/rbd/migration/9-cleanup/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/migration/9-cleanup/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/migration/9-cleanup/cleanup.yaml b/qa/suites/rbd/migration/9-cleanup/cleanup.yaml new file mode 100644 index 000000000..18c2bb5f4 --- /dev/null +++ b/qa/suites/rbd/migration/9-cleanup/cleanup.yaml @@ -0,0 +1,4 @@ +tasks: + - exec: + client.0: + - rm -rf /home/ubuntu/cephtest/migration diff --git a/qa/suites/rbd/migration/conf b/qa/suites/rbd/migration/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/migration/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/% b/qa/suites/rbd/mirror-thrash/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/% diff --git a/qa/suites/rbd/mirror-thrash/.qa b/qa/suites/rbd/mirror-thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/base/.qa b/qa/suites/rbd/mirror-thrash/base/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/base/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/base/install.yaml b/qa/suites/rbd/mirror-thrash/base/install.yaml new file mode 100644 index 000000000..365c3a8cb --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/base/install.yaml @@ -0,0 +1,9 @@ +meta: +- desc: run two ceph clusters and install rbd-mirror +tasks: +- install: + extra_packages: [rbd-mirror] +- ceph: + cluster: cluster1 +- ceph: + cluster: cluster2 diff --git a/qa/suites/rbd/mirror-thrash/clients/.qa b/qa/suites/rbd/mirror-thrash/clients/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/clients/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/clients/mirror.yaml b/qa/suites/rbd/mirror-thrash/clients/mirror.yaml new file mode 100644 index 000000000..1b6808d85 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/clients/mirror.yaml @@ -0,0 +1,36 @@ +meta: +- desc: configure the permissions for client.mirror +overrides: + ceph: + conf: + client: + rbd default features: 125 + debug rbd: 20 + debug rbd_mirror: 15 + log to stderr: false + # override to make these names predictable + client.mirror.0: + admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok + pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid + client.mirror.1: + admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok + pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid + client.mirror.2: + admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok + pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid + client.mirror.3: + admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok + pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid +tasks: +- exec: + cluster1.client.mirror.0: + - "sudo ceph --cluster cluster1 auth caps client.mirror mon 'profile rbd-mirror-peer' osd 'profile rbd'" + - "sudo ceph --cluster cluster1 auth caps client.mirror.0 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster1 auth caps client.mirror.1 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster1 auth caps client.mirror.2 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster1 auth caps client.mirror.3 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster2 auth caps client.mirror mon 'profile rbd-mirror-peer' osd 'profile rbd'" + - "sudo ceph --cluster cluster2 auth caps client.mirror.0 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster2 auth caps client.mirror.1 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster2 auth caps client.mirror.2 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster2 auth caps client.mirror.3 mon 'profile rbd-mirror' osd 'profile rbd'" diff --git a/qa/suites/rbd/mirror-thrash/cluster/+ b/qa/suites/rbd/mirror-thrash/cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/cluster/+ diff --git a/qa/suites/rbd/mirror-thrash/cluster/.qa b/qa/suites/rbd/mirror-thrash/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/cluster/2-node.yaml b/qa/suites/rbd/mirror-thrash/cluster/2-node.yaml new file mode 100644 index 000000000..74f9fb3c4 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/cluster/2-node.yaml @@ -0,0 +1,31 @@ +meta: +- desc: 2 ceph clusters with 1 mon and 3 osds each +roles: +- - cluster1.mon.a + - cluster1.mgr.x + - cluster2.mgr.x + - cluster1.osd.0 + - cluster1.osd.1 + - cluster1.osd.2 + - cluster1.client.0 + - cluster2.client.0 +- - cluster2.mon.a + - cluster2.osd.0 + - cluster2.osd.1 + - cluster2.osd.2 + - cluster1.client.mirror + - cluster1.client.mirror.0 + - cluster1.client.mirror.1 + - cluster1.client.mirror.2 + - cluster1.client.mirror.3 + - cluster1.client.mirror.4 + - cluster1.client.mirror.5 + - cluster1.client.mirror.6 + - cluster2.client.mirror + - cluster2.client.mirror.0 + - cluster2.client.mirror.1 + - cluster2.client.mirror.2 + - cluster2.client.mirror.3 + - cluster2.client.mirror.4 + - cluster2.client.mirror.5 + - cluster2.client.mirror.6 diff --git a/qa/suites/rbd/mirror-thrash/cluster/openstack.yaml b/qa/suites/rbd/mirror-thrash/cluster/openstack.yaml new file mode 100644 index 000000000..f4d1349b4 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/cluster/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 3 + size: 30 # GB diff --git a/qa/suites/rbd/mirror-thrash/conf b/qa/suites/rbd/mirror-thrash/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/msgr-failures b/qa/suites/rbd/mirror-thrash/msgr-failures new file mode 120000 index 000000000..db59eb46c --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/msgr-failures @@ -0,0 +1 @@ +../basic/msgr-failures
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/objectstore b/qa/suites/rbd/mirror-thrash/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/policy/.qa b/qa/suites/rbd/mirror-thrash/policy/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/policy/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/policy/none.yaml b/qa/suites/rbd/mirror-thrash/policy/none.yaml new file mode 100644 index 000000000..e0a7c1185 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/policy/none.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd mirror image policy type: none diff --git a/qa/suites/rbd/mirror-thrash/policy/simple.yaml b/qa/suites/rbd/mirror-thrash/policy/simple.yaml new file mode 100644 index 000000000..ee3082d3c --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/policy/simple.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd mirror image policy type: simple diff --git a/qa/suites/rbd/mirror-thrash/rbd-mirror/.qa b/qa/suites/rbd/mirror-thrash/rbd-mirror/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/rbd-mirror/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/rbd-mirror/four-per-cluster.yaml b/qa/suites/rbd/mirror-thrash/rbd-mirror/four-per-cluster.yaml new file mode 100644 index 000000000..70df34e40 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/rbd-mirror/four-per-cluster.yaml @@ -0,0 +1,31 @@ +meta: +- desc: run four rbd-mirror daemons per cluster +tasks: +- rbd-mirror: + client: cluster1.client.mirror.0 + thrash: True +- rbd-mirror: + client: cluster1.client.mirror.1 + thrash: True +- rbd-mirror: + client: cluster1.client.mirror.2 + thrash: True +- rbd-mirror: + client: cluster1.client.mirror.3 + thrash: True +- rbd-mirror: + client: cluster2.client.mirror.0 + thrash: True +- rbd-mirror: + client: cluster2.client.mirror.1 + thrash: True +- rbd-mirror: + client: cluster2.client.mirror.2 + thrash: True +- rbd-mirror: + client: cluster2.client.mirror.3 + thrash: True +- rbd-mirror-thrash: + cluster: cluster1 +- rbd-mirror-thrash: + cluster: cluster2 diff --git a/qa/suites/rbd/mirror-thrash/supported-random-distro$ b/qa/suites/rbd/mirror-thrash/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/workloads/.qa b/qa/suites/rbd/mirror-thrash/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-fsx-workunit.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-fsx-workunit.yaml new file mode 100644 index 000000000..d2db0f520 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-fsx-workunit.yaml @@ -0,0 +1,33 @@ +meta: +- desc: run multiple FSX workloads to simulate cluster load and then verify + that the images were replicated +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_fsx_prepare.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_NOCLEANUP: '1' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_USE_RBD_MIRROR: '1' +- rbd_fsx: + clients: + - cluster1.client.mirror.0 + - cluster1.client.mirror.1 + - cluster1.client.mirror.2 + - cluster1.client.mirror.3 + - cluster1.client.mirror.4 + - cluster1.client.mirror.5 + ops: 6000 + keep_images: true + pool_name: mirror +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_fsx_compare.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_USE_RBD_MIRROR: '1' + timeout: 6h diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-stress-workunit.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-stress-workunit.yaml new file mode 100644 index 000000000..9579b70d6 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-stress-workunit.yaml @@ -0,0 +1,15 @@ +meta: +- desc: run the rbd_mirror_stress.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_stress.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_USE_RBD_MIRROR: '1' + MIRROR_POOL_MODE: 'pool' + MIRROR_IMAGE_MODE: 'journal' + timeout: 6h diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-workunit.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-workunit.yaml new file mode 100644 index 000000000..5f12b2239 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-journal-workunit.yaml @@ -0,0 +1,12 @@ +meta: +- desc: run the rbd_mirror_journal.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_journal.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_USE_RBD_MIRROR: '1' diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-exclusive-lock.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-exclusive-lock.yaml new file mode 100644 index 000000000..87632483d --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-exclusive-lock.yaml @@ -0,0 +1,16 @@ +meta: +- desc: run the rbd_mirror_stress.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_stress.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + MIRROR_POOL_MODE: 'image' + MIRROR_IMAGE_MODE: 'snapshot' + RBD_IMAGE_FEATURES: 'layering,exclusive-lock' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_USE_RBD_MIRROR: '1' + timeout: 6h diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-fast-diff.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-fast-diff.yaml new file mode 100644 index 000000000..fc43b0ec2 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-fast-diff.yaml @@ -0,0 +1,16 @@ +meta: +- desc: run the rbd_mirror_stress.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_stress.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + MIRROR_POOL_MODE: 'image' + MIRROR_IMAGE_MODE: 'snapshot' + RBD_IMAGE_FEATURES: 'layering,exclusive-lock,object-map,fast-diff' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_USE_RBD_MIRROR: '1' + timeout: 6h diff --git a/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-minimum.yaml b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-minimum.yaml new file mode 100644 index 000000000..af0ea1240 --- /dev/null +++ b/qa/suites/rbd/mirror-thrash/workloads/rbd-mirror-snapshot-stress-workunit-minimum.yaml @@ -0,0 +1,16 @@ +meta: +- desc: run the rbd_mirror_stress.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_stress.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + MIRROR_POOL_MODE: 'image' + MIRROR_IMAGE_MODE: 'snapshot' + RBD_IMAGE_FEATURES: 'layering' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_USE_RBD_MIRROR: '1' + timeout: 6h diff --git a/qa/suites/rbd/mirror/% b/qa/suites/rbd/mirror/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/mirror/% diff --git a/qa/suites/rbd/mirror/.qa b/qa/suites/rbd/mirror/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/mirror/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/mirror/base b/qa/suites/rbd/mirror/base new file mode 120000 index 000000000..8d9546e21 --- /dev/null +++ b/qa/suites/rbd/mirror/base @@ -0,0 +1 @@ +../mirror-thrash/base
\ No newline at end of file diff --git a/qa/suites/rbd/mirror/clients/+ b/qa/suites/rbd/mirror/clients/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/mirror/clients/+ diff --git a/qa/suites/rbd/mirror/clients/.qa b/qa/suites/rbd/mirror/clients/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/mirror/clients/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/mirror/clients/mirror-extra.yaml b/qa/suites/rbd/mirror/clients/mirror-extra.yaml new file mode 100644 index 000000000..5eed5cfcd --- /dev/null +++ b/qa/suites/rbd/mirror/clients/mirror-extra.yaml @@ -0,0 +1,24 @@ +meta: +- desc: configure the permissions for client.mirror +overrides: + ceph: + conf: + # override to make these names predictable + client.mirror.4: + admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok + pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid + client.mirror.5: + admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok + pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid + client.mirror.6: + admin socket: /var/run/ceph/rbd-mirror.$cluster-$name.asok + pid file: /var/run/ceph/rbd-mirror.$cluster-$name.pid +tasks: +- exec: + cluster1.client.mirror.0: + - "sudo ceph --cluster cluster1 auth caps client.mirror.4 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster1 auth caps client.mirror.5 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster1 auth caps client.mirror.6 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster2 auth caps client.mirror.4 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster2 auth caps client.mirror.5 mon 'profile rbd-mirror' osd 'profile rbd'" + - "sudo ceph --cluster cluster2 auth caps client.mirror.6 mon 'profile rbd-mirror' osd 'profile rbd'" diff --git a/qa/suites/rbd/mirror/clients/mirror.yaml b/qa/suites/rbd/mirror/clients/mirror.yaml new file mode 120000 index 000000000..0dfb0c785 --- /dev/null +++ b/qa/suites/rbd/mirror/clients/mirror.yaml @@ -0,0 +1 @@ +../../mirror-thrash/clients/mirror.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/mirror/cluster b/qa/suites/rbd/mirror/cluster new file mode 120000 index 000000000..3fc87a150 --- /dev/null +++ b/qa/suites/rbd/mirror/cluster @@ -0,0 +1 @@ +../mirror-thrash/cluster
\ No newline at end of file diff --git a/qa/suites/rbd/mirror/conf b/qa/suites/rbd/mirror/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/mirror/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/mirror/msgr-failures b/qa/suites/rbd/mirror/msgr-failures new file mode 120000 index 000000000..728aeab33 --- /dev/null +++ b/qa/suites/rbd/mirror/msgr-failures @@ -0,0 +1 @@ +../mirror-thrash/msgr-failures
\ No newline at end of file diff --git a/qa/suites/rbd/mirror/objectstore b/qa/suites/rbd/mirror/objectstore new file mode 120000 index 000000000..d751ff121 --- /dev/null +++ b/qa/suites/rbd/mirror/objectstore @@ -0,0 +1 @@ +../mirror-thrash/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/mirror/supported-random-distro$ b/qa/suites/rbd/mirror/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/mirror/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/mirror/workloads/.qa b/qa/suites/rbd/mirror/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-ha-workunit.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-ha-workunit.yaml new file mode 100644 index 000000000..7347f7f76 --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-ha-workunit.yaml @@ -0,0 +1,16 @@ +meta: +- desc: run the rbd_mirror_ha.sh workunit to test the rbd-mirror daemon +overrides: + ceph: + conf: + client: + rbd mirror image policy type: none +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_ha.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + timeout: 6h diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-journal-bootstrap-workunit.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-journal-bootstrap-workunit.yaml new file mode 100644 index 000000000..b9c5562be --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-journal-bootstrap-workunit.yaml @@ -0,0 +1,13 @@ +meta: +- desc: run the rbd_mirror_bootstrap.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_bootstrap.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '1' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + MIRROR_POOL_MODE: 'pool' + MIRROR_IMAGE_MODE: 'journal' diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-bootstrap-workunit.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-bootstrap-workunit.yaml new file mode 100644 index 000000000..5ad78474d --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-bootstrap-workunit.yaml @@ -0,0 +1,13 @@ +meta: +- desc: run the rbd_mirror_bootstrap.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_bootstrap.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '1' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + MIRROR_POOL_MODE: 'image' + MIRROR_IMAGE_MODE: 'snapshot' diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-exclusive-lock.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-exclusive-lock.yaml new file mode 100644 index 000000000..29047a77d --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-exclusive-lock.yaml @@ -0,0 +1,13 @@ +meta: +- desc: run the rbd_mirror_snapshot.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_snapshot.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_CONFIG_KEY: '1' + RBD_IMAGE_FEATURES: 'layering,exclusive-lock' diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-fast-diff.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-fast-diff.yaml new file mode 100644 index 000000000..af13c92b5 --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-fast-diff.yaml @@ -0,0 +1,13 @@ +meta: +- desc: run the rbd_mirror_snapshot.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_snapshot.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_CONFIG_KEY: '1' + RBD_IMAGE_FEATURES: 'layering,exclusive-lock,object-map,fast-diff' diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-journaling.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-journaling.yaml new file mode 100644 index 000000000..5ea2bb105 --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-journaling.yaml @@ -0,0 +1,13 @@ +meta: +- desc: run the rbd_mirror_snapshot.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_snapshot.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_CONFIG_KEY: '1' + RBD_IMAGE_FEATURES: 'layering,exclusive-lock,journaling' diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-minimum.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-minimum.yaml new file mode 100644 index 000000000..e21d57b2b --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-snapshot-workunit-minimum.yaml @@ -0,0 +1,13 @@ +meta: +- desc: run the rbd_mirror_snapshot.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_snapshot.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_CONFIG_KEY: '1' + RBD_IMAGE_FEATURES: 'layering' diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-stress-workunit-min-compat-client-octopus.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-stress-workunit-min-compat-client-octopus.yaml new file mode 100644 index 000000000..5cc351bb9 --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-stress-workunit-min-compat-client-octopus.yaml @@ -0,0 +1,11 @@ +meta: +- desc: run the rbd_mirror_stress.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_stress.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_MIN_COMPAT_CLIENT: 'octopus' diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-config-key.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-config-key.yaml new file mode 100644 index 000000000..0102050eb --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-config-key.yaml @@ -0,0 +1,12 @@ +meta: +- desc: run the rbd_mirror_journal.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_journal.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_CONFIG_KEY: '1' diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-min-compat-client-octopus.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-min-compat-client-octopus.yaml new file mode 100644 index 000000000..5bd024d2d --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-min-compat-client-octopus.yaml @@ -0,0 +1,11 @@ +meta: +- desc: run the rbd_mirror_journal.sh workunit to test the rbd-mirror daemon +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_journal.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' + RBD_MIRROR_MIN_COMPAT_CLIENT: 'octopus' diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-none.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-none.yaml new file mode 100644 index 000000000..0a610ea2f --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-none.yaml @@ -0,0 +1,16 @@ +meta: +- desc: run the rbd_mirror_journal.sh workunit to test the rbd-mirror daemon +overrides: + ceph: + conf: + client: + rbd mirror image policy type: none +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_journal.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' diff --git a/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-simple.yaml b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-simple.yaml new file mode 100644 index 000000000..883e8abd3 --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/rbd-mirror-workunit-policy-simple.yaml @@ -0,0 +1,16 @@ +meta: +- desc: run the rbd_mirror_journal.sh workunit to test the rbd-mirror daemon +overrides: + ceph: + conf: + client: + rbd mirror image policy type: simple +tasks: +- workunit: + clients: + cluster1.client.mirror: [rbd/rbd_mirror_journal.sh] + env: + # override workunit setting of CEPH_ARGS='--cluster' + CEPH_ARGS: '' + RBD_MIRROR_INSTANCES: '4' + RBD_MIRROR_USE_EXISTING_CLUSTER: '1' diff --git a/qa/suites/rbd/nbd/% b/qa/suites/rbd/nbd/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/nbd/% diff --git a/qa/suites/rbd/nbd/.qa b/qa/suites/rbd/nbd/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/nbd/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/base b/qa/suites/rbd/nbd/base new file mode 120000 index 000000000..fd10a859d --- /dev/null +++ b/qa/suites/rbd/nbd/base @@ -0,0 +1 @@ +../thrash/base
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/cluster/+ b/qa/suites/rbd/nbd/cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/nbd/cluster/+ diff --git a/qa/suites/rbd/nbd/cluster/.qa b/qa/suites/rbd/nbd/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/nbd/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/cluster/fixed-3.yaml b/qa/suites/rbd/nbd/cluster/fixed-3.yaml new file mode 100644 index 000000000..182589152 --- /dev/null +++ b/qa/suites/rbd/nbd/cluster/fixed-3.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, mgr.x, osd.3, osd.4, osd.5] +- [client.0] diff --git a/qa/suites/rbd/nbd/cluster/openstack.yaml b/qa/suites/rbd/nbd/cluster/openstack.yaml new file mode 120000 index 000000000..48becbb83 --- /dev/null +++ b/qa/suites/rbd/nbd/cluster/openstack.yaml @@ -0,0 +1 @@ +../../thrash/clusters/openstack.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/conf b/qa/suites/rbd/nbd/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/nbd/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/msgr-failures b/qa/suites/rbd/nbd/msgr-failures new file mode 120000 index 000000000..03689aa44 --- /dev/null +++ b/qa/suites/rbd/nbd/msgr-failures @@ -0,0 +1 @@ +../thrash/msgr-failures
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/objectstore b/qa/suites/rbd/nbd/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/nbd/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/supported-random-distro$ b/qa/suites/rbd/nbd/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/nbd/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/thrashers b/qa/suites/rbd/nbd/thrashers new file mode 120000 index 000000000..f461dadc3 --- /dev/null +++ b/qa/suites/rbd/nbd/thrashers @@ -0,0 +1 @@ +../thrash/thrashers
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/thrashosds-health.yaml b/qa/suites/rbd/nbd/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rbd/nbd/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/workloads/.qa b/qa/suites/rbd/nbd/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/nbd/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/workloads/rbd_fsx_nbd.yaml b/qa/suites/rbd/nbd/workloads/rbd_fsx_nbd.yaml new file mode 100644 index 000000000..b5737671f --- /dev/null +++ b/qa/suites/rbd/nbd/workloads/rbd_fsx_nbd.yaml @@ -0,0 +1,14 @@ +overrides: + install: + ceph: + extra_packages: [rbd-nbd] +tasks: +- rbd_fsx: + clients: [client.0] + ops: 6000 + nbd: True + holebdy: 512 + punch_holes: true + readbdy: 512 + truncbdy: 512 + writebdy: 512 diff --git a/qa/suites/rbd/nbd/workloads/rbd_nbd.yaml b/qa/suites/rbd/nbd/workloads/rbd_nbd.yaml new file mode 100644 index 000000000..ededea024 --- /dev/null +++ b/qa/suites/rbd/nbd/workloads/rbd_nbd.yaml @@ -0,0 +1,9 @@ +overrides: + install: + ceph: + extra_packages: [rbd-nbd] +tasks: +- workunit: + clients: + client.0: + - rbd/rbd-nbd.sh diff --git a/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml b/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml new file mode 100644 index 000000000..e0a7ebe33 --- /dev/null +++ b/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml @@ -0,0 +1,14 @@ +overrides: + install: + ceph: + extra_packages: + - rbd-nbd + extra_system_packages: + - pv +tasks: +- workunit: + clients: + client.0: + - rbd/diff_continuous.sh + env: + RBD_DEVICE_TYPE: "nbd" diff --git a/qa/suites/rbd/pwl-cache/.qa b/qa/suites/rbd/pwl-cache/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/home/% b/qa/suites/rbd/pwl-cache/home/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/% diff --git a/qa/suites/rbd/pwl-cache/home/.qa b/qa/suites/rbd/pwl-cache/home/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/home/1-base b/qa/suites/rbd/pwl-cache/home/1-base new file mode 120000 index 000000000..89c3c7e84 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/1-base @@ -0,0 +1 @@ +../../basic/base/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/home/2-cluster/+ b/qa/suites/rbd/pwl-cache/home/2-cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/2-cluster/+ diff --git a/qa/suites/rbd/pwl-cache/home/2-cluster/.qa b/qa/suites/rbd/pwl-cache/home/2-cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/2-cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/home/2-cluster/fix-2.yaml b/qa/suites/rbd/pwl-cache/home/2-cluster/fix-2.yaml new file mode 100644 index 000000000..dbccecbce --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/2-cluster/fix-2.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1] +- [mon.b, mgr.y, osd.2, osd.3, client.0] diff --git a/qa/suites/rbd/pwl-cache/home/2-cluster/openstack.yaml b/qa/suites/rbd/pwl-cache/home/2-cluster/openstack.yaml new file mode 100644 index 000000000..b113e4f2e --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/2-cluster/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/suites/rbd/pwl-cache/home/3-supported-random-distro$ b/qa/suites/rbd/pwl-cache/home/3-supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/3-supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/home/4-cache-path.yaml b/qa/suites/rbd/pwl-cache/home/4-cache-path.yaml new file mode 100644 index 000000000..be4641b01 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/4-cache-path.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + client: + rbd_persistent_cache_path: /home/ubuntu/cephtest/rbd-pwl-cache + rbd_plugins: pwl_cache +tasks: +- exec: + client.0: + - "mkdir -m 777 /home/ubuntu/cephtest/rbd-pwl-cache" +- exec_on_cleanup: + client.0: + - "rm -rf /home/ubuntu/cephtest/rbd-pwl-cache" diff --git a/qa/suites/rbd/pwl-cache/home/5-cache-mode/.qa b/qa/suites/rbd/pwl-cache/home/5-cache-mode/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/5-cache-mode/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/home/5-cache-mode/rwl.yaml b/qa/suites/rbd/pwl-cache/home/5-cache-mode/rwl.yaml new file mode 100644 index 000000000..5aeab26b3 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/5-cache-mode/rwl.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd_persistent_cache_mode: rwl diff --git a/qa/suites/rbd/pwl-cache/home/5-cache-mode/ssd.yaml b/qa/suites/rbd/pwl-cache/home/5-cache-mode/ssd.yaml new file mode 100644 index 000000000..082149147 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/5-cache-mode/ssd.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd_persistent_cache_mode: ssd diff --git a/qa/suites/rbd/pwl-cache/home/6-cache-size/.qa b/qa/suites/rbd/pwl-cache/home/6-cache-size/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/6-cache-size/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/home/6-cache-size/1G.yaml b/qa/suites/rbd/pwl-cache/home/6-cache-size/1G.yaml new file mode 100644 index 000000000..53fcddcdf --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/6-cache-size/1G.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd_persistent_cache_size: 1073741824 diff --git a/qa/suites/rbd/pwl-cache/home/6-cache-size/8G.yaml b/qa/suites/rbd/pwl-cache/home/6-cache-size/8G.yaml new file mode 100644 index 000000000..b53d36852 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/6-cache-size/8G.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd_persistent_cache_size: 8589934592 diff --git a/qa/suites/rbd/pwl-cache/home/7-workloads/.qa b/qa/suites/rbd/pwl-cache/home/7-workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/7-workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/home/7-workloads/c_api_tests_with_defaults.yaml b/qa/suites/rbd/pwl-cache/home/7-workloads/c_api_tests_with_defaults.yaml new file mode 120000 index 000000000..359001f8f --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/7-workloads/c_api_tests_with_defaults.yaml @@ -0,0 +1 @@ +../../../librbd/workloads/c_api_tests_with_defaults.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/home/7-workloads/fio.yaml b/qa/suites/rbd/pwl-cache/home/7-workloads/fio.yaml new file mode 100644 index 000000000..f7aecce8a --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/7-workloads/fio.yaml @@ -0,0 +1,8 @@ +tasks: +- rbd_fio: + client.0: + fio-io-size: 100% + formats: [2] + io-engine: rbd + rw: randwrite + runtime: 600 diff --git a/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml b/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml new file mode 100644 index 000000000..3017beb22 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/7-workloads/recovery.yaml @@ -0,0 +1,9 @@ +tasks: +- rbd.create_image: + client.0: + image_name: testimage + image_size: 10240 + image_format: 2 +- rbd_pwl_cache_recovery: + client.0: + image_name: testimage diff --git a/qa/suites/rbd/pwl-cache/home/conf b/qa/suites/rbd/pwl-cache/home/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/pwl-cache/home/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/tmpfs/% b/qa/suites/rbd/pwl-cache/tmpfs/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/% diff --git a/qa/suites/rbd/pwl-cache/tmpfs/.qa b/qa/suites/rbd/pwl-cache/tmpfs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/tmpfs/1-base b/qa/suites/rbd/pwl-cache/tmpfs/1-base new file mode 120000 index 000000000..89c3c7e84 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/1-base @@ -0,0 +1 @@ +../../basic/base/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/+ b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/+ diff --git a/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/.qa b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/fix-2.yaml b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/fix-2.yaml new file mode 100644 index 000000000..dbccecbce --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/fix-2.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1] +- [mon.b, mgr.y, osd.2, osd.3, client.0] diff --git a/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/openstack.yaml b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/openstack.yaml new file mode 100644 index 000000000..b113e4f2e --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/2-cluster/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/suites/rbd/pwl-cache/tmpfs/3-supported-random-distro$ b/qa/suites/rbd/pwl-cache/tmpfs/3-supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/3-supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/tmpfs/4-cache-path.yaml b/qa/suites/rbd/pwl-cache/tmpfs/4-cache-path.yaml new file mode 100644 index 000000000..b5578a0ae --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/4-cache-path.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + conf: + client: + rbd_persistent_cache_path: /home/ubuntu/cephtest/rbd-pwl-cache + rbd_plugins: pwl_cache +tasks: +- exec: + client.0: + - "mkdir /home/ubuntu/cephtest/tmpfs" + - "mkdir /home/ubuntu/cephtest/rbd-pwl-cache" + - "sudo mount -t tmpfs -o size=20G tmpfs /home/ubuntu/cephtest/tmpfs" + - "truncate -s 20G /home/ubuntu/cephtest/tmpfs/loopfile" + - "mkfs.ext4 /home/ubuntu/cephtest/tmpfs/loopfile" + - "sudo mount -o loop /home/ubuntu/cephtest/tmpfs/loopfile /home/ubuntu/cephtest/rbd-pwl-cache" + - "sudo chmod 777 /home/ubuntu/cephtest/rbd-pwl-cache" +- exec_on_cleanup: + client.0: + - "sudo umount /home/ubuntu/cephtest/rbd-pwl-cache" + - "sudo umount /home/ubuntu/cephtest/tmpfs" + - "rm -rf /home/ubuntu/cephtest/rbd-pwl-cache" + - "rm -rf /home/ubuntu/cephtest/tmpfs" diff --git a/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/.qa b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/rwl.yaml b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/rwl.yaml new file mode 100644 index 000000000..5aeab26b3 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/rwl.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd_persistent_cache_mode: rwl diff --git a/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/ssd.yaml b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/ssd.yaml new file mode 100644 index 000000000..082149147 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/5-cache-mode/ssd.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd_persistent_cache_mode: ssd diff --git a/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/.qa b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/1G.yaml b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/1G.yaml new file mode 100644 index 000000000..53fcddcdf --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/1G.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd_persistent_cache_size: 1073741824 diff --git a/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/5G.yaml b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/5G.yaml new file mode 100644 index 000000000..1c43b5de8 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/6-cache-size/5G.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd_persistent_cache_size: 5368709120 diff --git a/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/.qa b/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/qemu_xfstests.yaml b/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/qemu_xfstests.yaml new file mode 100644 index 000000000..255b9631e --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/7-workloads/qemu_xfstests.yaml @@ -0,0 +1,8 @@ +tasks: +- qemu: + client.0: + test: qa/run_xfstests_qemu.sh + type: block + cpus: 4 + memory: 4096 + disks: 3 diff --git a/qa/suites/rbd/pwl-cache/tmpfs/conf b/qa/suites/rbd/pwl-cache/tmpfs/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/pwl-cache/tmpfs/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/% b/qa/suites/rbd/qemu/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/qemu/% diff --git a/qa/suites/rbd/qemu/.qa b/qa/suites/rbd/qemu/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/qemu/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/cache/.qa b/qa/suites/rbd/qemu/cache/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/qemu/cache/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/cache/none.yaml b/qa/suites/rbd/qemu/cache/none.yaml new file mode 100644 index 000000000..42fd9c955 --- /dev/null +++ b/qa/suites/rbd/qemu/cache/none.yaml @@ -0,0 +1,6 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: false diff --git a/qa/suites/rbd/qemu/cache/writearound.yaml b/qa/suites/rbd/qemu/cache/writearound.yaml new file mode 100644 index 000000000..b6f8e319b --- /dev/null +++ b/qa/suites/rbd/qemu/cache/writearound.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache policy: writearound diff --git a/qa/suites/rbd/qemu/cache/writeback.yaml b/qa/suites/rbd/qemu/cache/writeback.yaml new file mode 100644 index 000000000..a55ec1df0 --- /dev/null +++ b/qa/suites/rbd/qemu/cache/writeback.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache policy: writeback diff --git a/qa/suites/rbd/qemu/cache/writethrough.yaml b/qa/suites/rbd/qemu/cache/writethrough.yaml new file mode 100644 index 000000000..6dc29e16c --- /dev/null +++ b/qa/suites/rbd/qemu/cache/writethrough.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + conf: + client: + rbd cache: true + rbd cache max dirty: 0 diff --git a/qa/suites/rbd/qemu/clusters/+ b/qa/suites/rbd/qemu/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/qemu/clusters/+ diff --git a/qa/suites/rbd/qemu/clusters/.qa b/qa/suites/rbd/qemu/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/qemu/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/clusters/fixed-3.yaml b/qa/suites/rbd/qemu/clusters/fixed-3.yaml new file mode 120000 index 000000000..f75a848b8 --- /dev/null +++ b/qa/suites/rbd/qemu/clusters/fixed-3.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/clusters/openstack.yaml b/qa/suites/rbd/qemu/clusters/openstack.yaml new file mode 100644 index 000000000..9c39c7e5f --- /dev/null +++ b/qa/suites/rbd/qemu/clusters/openstack.yaml @@ -0,0 +1,8 @@ +openstack: + - machine: + disk: 40 # GB + ram: 30000 # MB + cpus: 1 + volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/rbd/qemu/conf b/qa/suites/rbd/qemu/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/qemu/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/data-pool b/qa/suites/rbd/qemu/data-pool new file mode 120000 index 000000000..3df827572 --- /dev/null +++ b/qa/suites/rbd/qemu/data-pool @@ -0,0 +1 @@ +.qa/rbd/data-pool/
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/features/.qa b/qa/suites/rbd/qemu/features/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/qemu/features/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/features/defaults.yaml b/qa/suites/rbd/qemu/features/defaults.yaml new file mode 100644 index 000000000..75afd68dd --- /dev/null +++ b/qa/suites/rbd/qemu/features/defaults.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default features: 61 diff --git a/qa/suites/rbd/qemu/features/journaling.yaml b/qa/suites/rbd/qemu/features/journaling.yaml new file mode 100644 index 000000000..6cea62a88 --- /dev/null +++ b/qa/suites/rbd/qemu/features/journaling.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default features: 125 diff --git a/qa/suites/rbd/qemu/features/readbalance.yaml b/qa/suites/rbd/qemu/features/readbalance.yaml new file mode 100644 index 000000000..e3812f4c7 --- /dev/null +++ b/qa/suites/rbd/qemu/features/readbalance.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + client: + rbd read from replica policy: balance + +tasks: +- exec: + osd.0: + - ceph osd set-require-min-compat-client octopus diff --git a/qa/suites/rbd/qemu/msgr-failures/.qa b/qa/suites/rbd/qemu/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/qemu/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/msgr-failures/few.yaml b/qa/suites/rbd/qemu/msgr-failures/few.yaml new file mode 100644 index 000000000..ca8e09853 --- /dev/null +++ b/qa/suites/rbd/qemu/msgr-failures/few.yaml @@ -0,0 +1,9 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - but it is still running + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rbd/qemu/objectstore b/qa/suites/rbd/qemu/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/qemu/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/supported-random-distro$ b/qa/suites/rbd/qemu/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/qemu/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/workloads/.qa b/qa/suites/rbd/qemu/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/qemu/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/qemu/workloads/qemu_bonnie.yaml b/qa/suites/rbd/qemu/workloads/qemu_bonnie.yaml new file mode 100644 index 000000000..0ef9ebb65 --- /dev/null +++ b/qa/suites/rbd/qemu/workloads/qemu_bonnie.yaml @@ -0,0 +1,6 @@ +tasks: +- qemu: + all: + clone: true + test: qa/workunits/suites/bonnie.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/qemu/workloads/qemu_fsstress.yaml b/qa/suites/rbd/qemu/workloads/qemu_fsstress.yaml new file mode 100644 index 000000000..95f514805 --- /dev/null +++ b/qa/suites/rbd/qemu/workloads/qemu_fsstress.yaml @@ -0,0 +1,6 @@ +tasks: +- qemu: + all: + clone: true + test: qa/workunits/suites/fsstress.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled b/qa/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled new file mode 100644 index 000000000..e159e208e --- /dev/null +++ b/qa/suites/rbd/qemu/workloads/qemu_iozone.yaml.disabled @@ -0,0 +1,6 @@ +tasks: +- qemu: + all: + test: qa/workunits/suites/iozone.sh + image_size: 20480 +exclude_arch: armv7l diff --git a/qa/suites/rbd/qemu/workloads/qemu_xfstests.yaml b/qa/suites/rbd/qemu/workloads/qemu_xfstests.yaml new file mode 100644 index 000000000..198f798d4 --- /dev/null +++ b/qa/suites/rbd/qemu/workloads/qemu_xfstests.yaml @@ -0,0 +1,8 @@ +tasks: +- qemu: + all: + clone: true + type: block + disks: 3 + test: qa/run_xfstests_qemu.sh +exclude_arch: armv7l diff --git a/qa/suites/rbd/singleton-bluestore/% b/qa/suites/rbd/singleton-bluestore/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/singleton-bluestore/% diff --git a/qa/suites/rbd/singleton-bluestore/.qa b/qa/suites/rbd/singleton-bluestore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/singleton-bluestore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/singleton-bluestore/all/.qa b/qa/suites/rbd/singleton-bluestore/all/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/singleton-bluestore/all/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/singleton-bluestore/all/issue-20295.yaml b/qa/suites/rbd/singleton-bluestore/all/issue-20295.yaml new file mode 100644 index 000000000..48156c7a0 --- /dev/null +++ b/qa/suites/rbd/singleton-bluestore/all/issue-20295.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, osd.2, client.0] +- [mon.b, mgr.y, osd.3, osd.4, osd.5] +- [mon.c, mgr.z, osd.6, osd.7, osd.8] +- [osd.9, osd.10, osd.11] +tasks: +- install: +- ceph: +- workunit: + timeout: 30m + clients: + all: [rbd/issue-20295.sh] diff --git a/qa/suites/rbd/singleton-bluestore/conf b/qa/suites/rbd/singleton-bluestore/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/singleton-bluestore/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/singleton-bluestore/objectstore/.qa b/qa/suites/rbd/singleton-bluestore/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/singleton-bluestore/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-bitmap.yaml b/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml b/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml new file mode 120000 index 000000000..888caf55f --- /dev/null +++ b/qa/suites/rbd/singleton-bluestore/objectstore/bluestore-comp-snappy.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-comp-snappy.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/singleton-bluestore/openstack.yaml b/qa/suites/rbd/singleton-bluestore/openstack.yaml new file mode 100644 index 000000000..f4d1349b4 --- /dev/null +++ b/qa/suites/rbd/singleton-bluestore/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 3 + size: 30 # GB diff --git a/qa/suites/rbd/singleton-bluestore/supported-random-distro$ b/qa/suites/rbd/singleton-bluestore/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/singleton-bluestore/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/singleton/% b/qa/suites/rbd/singleton/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/singleton/% diff --git a/qa/suites/rbd/singleton/.qa b/qa/suites/rbd/singleton/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/singleton/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/singleton/all/.qa b/qa/suites/rbd/singleton/all/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/singleton/all/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/singleton/all/admin_socket.yaml b/qa/suites/rbd/singleton/all/admin_socket.yaml new file mode 100644 index 000000000..22dbd8c03 --- /dev/null +++ b/qa/suites/rbd/singleton/all/admin_socket.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs +- workunit: + clients: + all: [rbd/test_admin_socket.sh] diff --git a/qa/suites/rbd/singleton/all/formatted-output.yaml b/qa/suites/rbd/singleton/all/formatted-output.yaml new file mode 100644 index 000000000..7be94ef23 --- /dev/null +++ b/qa/suites/rbd/singleton/all/formatted-output.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs +- cram: + clients: + client.0: + - src/test/cli-integration/rbd/formatted-output.t diff --git a/qa/suites/rbd/singleton/all/merge_diff.yaml b/qa/suites/rbd/singleton/all/merge_diff.yaml new file mode 100644 index 000000000..31b269d63 --- /dev/null +++ b/qa/suites/rbd/singleton/all/merge_diff.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs +- workunit: + clients: + all: [rbd/merge_diff.sh] diff --git a/qa/suites/rbd/singleton/all/mon-command-help.yaml b/qa/suites/rbd/singleton/all/mon-command-help.yaml new file mode 100644 index 000000000..a7290aadb --- /dev/null +++ b/qa/suites/rbd/singleton/all/mon-command-help.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs +- cram: + clients: + client.0: + - src/test/cli-integration/rbd/mon-command-help.t diff --git a/qa/suites/rbd/singleton/all/permissions.yaml b/qa/suites/rbd/singleton/all/permissions.yaml new file mode 100644 index 000000000..c00a5c9b8 --- /dev/null +++ b/qa/suites/rbd/singleton/all/permissions.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs +- workunit: + clients: + all: [rbd/permissions.sh] diff --git a/qa/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml b/qa/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml new file mode 100644 index 000000000..364dd8810 --- /dev/null +++ b/qa/suites/rbd/singleton/all/qemu-iotests-no-cache.yaml @@ -0,0 +1,19 @@ +exclude_arch: armv7l +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: + extra_system_packages: + rpm: + - qemu-kvm-block-rbd + deb: + - qemu-block-extra + - qemu-utils +- ceph: + fs: xfs + conf: + client: + rbd cache: false +- workunit: + clients: + all: [rbd/qemu-iotests.sh] diff --git a/qa/suites/rbd/singleton/all/qemu-iotests-writearound.yaml b/qa/suites/rbd/singleton/all/qemu-iotests-writearound.yaml new file mode 100644 index 000000000..975708385 --- /dev/null +++ b/qa/suites/rbd/singleton/all/qemu-iotests-writearound.yaml @@ -0,0 +1,20 @@ +exclude_arch: armv7l +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: + extra_system_packages: + rpm: + - qemu-kvm-block-rbd + deb: + - qemu-block-extra + - qemu-utils +- ceph: + fs: xfs + conf: + client: + rbd cache: true + rbd cache policy: writearound +- workunit: + clients: + all: [rbd/qemu-iotests.sh] diff --git a/qa/suites/rbd/singleton/all/qemu-iotests-writeback.yaml b/qa/suites/rbd/singleton/all/qemu-iotests-writeback.yaml new file mode 100644 index 000000000..9d078c33d --- /dev/null +++ b/qa/suites/rbd/singleton/all/qemu-iotests-writeback.yaml @@ -0,0 +1,20 @@ +exclude_arch: armv7l +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: + extra_system_packages: + rpm: + - qemu-kvm-block-rbd + deb: + - qemu-block-extra + - qemu-utils +- ceph: + fs: xfs + conf: + client: + rbd cache: true + rbd cache policy: writeback +- workunit: + clients: + all: [rbd/qemu-iotests.sh] diff --git a/qa/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml b/qa/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml new file mode 100644 index 000000000..c5250bb90 --- /dev/null +++ b/qa/suites/rbd/singleton/all/qemu-iotests-writethrough.yaml @@ -0,0 +1,20 @@ +exclude_arch: armv7l +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: + extra_system_packages: + rpm: + - qemu-kvm-block-rbd + deb: + - qemu-block-extra + - qemu-utils +- ceph: + fs: xfs + conf: + client: + rbd cache: true + rbd cache max dirty: 0 +- workunit: + clients: + all: [rbd/qemu-iotests.sh] diff --git a/qa/suites/rbd/singleton/all/qos.yaml b/qa/suites/rbd/singleton/all/qos.yaml new file mode 100644 index 000000000..66b90520b --- /dev/null +++ b/qa/suites/rbd/singleton/all/qos.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs +- workunit: + clients: + all: [rbd/qos.sh] diff --git a/qa/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml b/qa/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml new file mode 100644 index 000000000..f14bd7431 --- /dev/null +++ b/qa/suites/rbd/singleton/all/rbd-vs-unmanaged-snaps.yaml @@ -0,0 +1,14 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs + conf: + client: + rbd validate pool: false +- workunit: + clients: + all: + - mon/rbd_snaps_ops.sh + diff --git a/qa/suites/rbd/singleton/all/rbd_mirror.yaml b/qa/suites/rbd/singleton/all/rbd_mirror.yaml new file mode 100644 index 000000000..4120d21c9 --- /dev/null +++ b/qa/suites/rbd/singleton/all/rbd_mirror.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) +- workunit: + clients: + all: [rbd/test_rbd_mirror.sh] diff --git a/qa/suites/rbd/singleton/all/rbd_tasks.yaml b/qa/suites/rbd/singleton/all/rbd_tasks.yaml new file mode 100644 index 000000000..782b02141 --- /dev/null +++ b/qa/suites/rbd/singleton/all/rbd_tasks.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs +- workunit: + clients: + all: [rbd/test_rbd_tasks.sh] diff --git a/qa/suites/rbd/singleton/all/rbdmap_RBDMAPFILE.yaml b/qa/suites/rbd/singleton/all/rbdmap_RBDMAPFILE.yaml new file mode 100644 index 000000000..0053e66ba --- /dev/null +++ b/qa/suites/rbd/singleton/all/rbdmap_RBDMAPFILE.yaml @@ -0,0 +1,7 @@ +roles: +- [client.0] +tasks: +- install: +- workunit: + clients: + all: [rbd/test_rbdmap_RBDMAPFILE.sh] diff --git a/qa/suites/rbd/singleton/all/read-flags-no-cache.yaml b/qa/suites/rbd/singleton/all/read-flags-no-cache.yaml new file mode 100644 index 000000000..cf602cbb1 --- /dev/null +++ b/qa/suites/rbd/singleton/all/read-flags-no-cache.yaml @@ -0,0 +1,12 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs + conf: + client: + rbd cache: false +- workunit: + clients: + all: [rbd/read-flags.sh] diff --git a/qa/suites/rbd/singleton/all/read-flags-writeback.yaml b/qa/suites/rbd/singleton/all/read-flags-writeback.yaml new file mode 100644 index 000000000..ba90c1d1e --- /dev/null +++ b/qa/suites/rbd/singleton/all/read-flags-writeback.yaml @@ -0,0 +1,13 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs + conf: + client: + rbd cache: true + rbd cache policy: writeback +- workunit: + clients: + all: [rbd/read-flags.sh] diff --git a/qa/suites/rbd/singleton/all/read-flags-writethrough.yaml b/qa/suites/rbd/singleton/all/read-flags-writethrough.yaml new file mode 100644 index 000000000..fc499d495 --- /dev/null +++ b/qa/suites/rbd/singleton/all/read-flags-writethrough.yaml @@ -0,0 +1,13 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs + conf: + client: + rbd cache: true + rbd cache max dirty: 0 +- workunit: + clients: + all: [rbd/read-flags.sh] diff --git a/qa/suites/rbd/singleton/all/snap-diff.yaml b/qa/suites/rbd/singleton/all/snap-diff.yaml new file mode 100644 index 000000000..be7e68589 --- /dev/null +++ b/qa/suites/rbd/singleton/all/snap-diff.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs +- cram: + clients: + client.0: + - src/test/cli-integration/rbd/snap-diff.t diff --git a/qa/suites/rbd/singleton/all/verify_pool.yaml b/qa/suites/rbd/singleton/all/verify_pool.yaml new file mode 100644 index 000000000..5ab06f749 --- /dev/null +++ b/qa/suites/rbd/singleton/all/verify_pool.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, mgr.x, osd.0, osd.1, client.0] +tasks: +- install: +- ceph: + fs: xfs +- workunit: + clients: + all: [rbd/verify_pool.sh] diff --git a/qa/suites/rbd/singleton/conf b/qa/suites/rbd/singleton/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/singleton/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/singleton/objectstore b/qa/suites/rbd/singleton/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/singleton/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/singleton/openstack.yaml b/qa/suites/rbd/singleton/openstack.yaml new file mode 100644 index 000000000..21eca2bbd --- /dev/null +++ b/qa/suites/rbd/singleton/openstack.yaml @@ -0,0 +1,4 @@ +openstack: + - volumes: # attached to each instance + count: 2 + size: 30 # GB diff --git a/qa/suites/rbd/singleton/supported-random-distro$ b/qa/suites/rbd/singleton/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/singleton/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/% b/qa/suites/rbd/thrash/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/thrash/% diff --git a/qa/suites/rbd/thrash/.qa b/qa/suites/rbd/thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/base/.qa b/qa/suites/rbd/thrash/base/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/thrash/base/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/base/install.yaml b/qa/suites/rbd/thrash/base/install.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/rbd/thrash/base/install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rbd/thrash/clusters/+ b/qa/suites/rbd/thrash/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/thrash/clusters/+ diff --git a/qa/suites/rbd/thrash/clusters/.qa b/qa/suites/rbd/thrash/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/thrash/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/clusters/fixed-2.yaml b/qa/suites/rbd/thrash/clusters/fixed-2.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rbd/thrash/clusters/fixed-2.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/clusters/openstack.yaml b/qa/suites/rbd/thrash/clusters/openstack.yaml new file mode 100644 index 000000000..40fef4770 --- /dev/null +++ b/qa/suites/rbd/thrash/clusters/openstack.yaml @@ -0,0 +1,8 @@ +openstack: + - machine: + disk: 40 # GB + ram: 8000 # MB + cpus: 1 + volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/rbd/thrash/conf b/qa/suites/rbd/thrash/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/thrash/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/msgr-failures/.qa b/qa/suites/rbd/thrash/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/thrash/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/msgr-failures/few.yaml b/qa/suites/rbd/thrash/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/rbd/thrash/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rbd/thrash/objectstore b/qa/suites/rbd/thrash/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/thrash/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/supported-random-distro$ b/qa/suites/rbd/thrash/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rbd/thrash/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/thrashers/.qa b/qa/suites/rbd/thrash/thrashers/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/thrash/thrashers/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/thrashers/default.yaml b/qa/suites/rbd/thrash/thrashers/default.yaml new file mode 100644 index 000000000..3e2bf7fe1 --- /dev/null +++ b/qa/suites/rbd/thrash/thrashers/default.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost +tasks: +- thrashosds: + timeout: 1200 diff --git a/qa/suites/rbd/thrash/thrashosds-health.yaml b/qa/suites/rbd/thrash/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rbd/thrash/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/workloads/.qa b/qa/suites/rbd/thrash/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/thrash/workloads/journal.yaml b/qa/suites/rbd/thrash/workloads/journal.yaml new file mode 100644 index 000000000..4dae10633 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/journal.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/journal.sh diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml new file mode 100644 index 000000000..ec4194598 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - is full \(reached quota + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "61" diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml new file mode 100644 index 000000000..26e20522c --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests_copy_on_read.yaml @@ -0,0 +1,17 @@ +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "61" +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - is full \(reached quota + - \(POOL_FULL\) + conf: + client: + rbd clone copy on read: true diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests_journaling.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests_journaling.yaml new file mode 100644 index 000000000..6c3686806 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests_journaling.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - is full \(reached quota + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "125" diff --git a/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml b/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml new file mode 100644 index 000000000..eb63fd771 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_api_tests_no_locking.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - is full \(reached quota + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writearound.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writearound.yaml new file mode 100644 index 000000000..e9ea1ebd1 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writearound.yaml @@ -0,0 +1,10 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 6000 +overrides: + ceph: + conf: + client: + rbd cache: true + rbd cache policy: writearound diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml new file mode 100644 index 000000000..41f7d84c9 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writeback.yaml @@ -0,0 +1,10 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 6000 +overrides: + ceph: + conf: + client: + rbd cache: true + rbd cache policy: writeback diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml new file mode 100644 index 000000000..463ba9965 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_cache_writethrough.yaml @@ -0,0 +1,10 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 6000 +overrides: + ceph: + conf: + client: + rbd cache: true + rbd cache max dirty: 0 diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml new file mode 100644 index 000000000..0c284ca8f --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_copy_on_read.yaml @@ -0,0 +1,10 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 6000 +overrides: + ceph: + conf: + client: + rbd cache: true + rbd clone copy on read: true diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_deep_copy.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_deep_copy.yaml new file mode 100644 index 000000000..797491499 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_deep_copy.yaml @@ -0,0 +1,5 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 6000 + deep_copy: True diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_journal.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_journal.yaml new file mode 100644 index 000000000..13e9a7830 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_journal.yaml @@ -0,0 +1,5 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 6000 + journal_replay: True diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml new file mode 100644 index 000000000..968665e18 --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_nocache.yaml @@ -0,0 +1,9 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 6000 +overrides: + ceph: + conf: + client: + rbd cache: false diff --git a/qa/suites/rbd/thrash/workloads/rbd_fsx_rate_limit.yaml b/qa/suites/rbd/thrash/workloads/rbd_fsx_rate_limit.yaml new file mode 100644 index 000000000..611320bca --- /dev/null +++ b/qa/suites/rbd/thrash/workloads/rbd_fsx_rate_limit.yaml @@ -0,0 +1,11 @@ +tasks: +- rbd_fsx: + clients: [client.0] + ops: 6000 +overrides: + ceph: + conf: + client: + rbd qos iops limit: 50 + rbd qos iops burst: 100 + rbd qos schedule tick min: 100 diff --git a/qa/suites/rbd/valgrind/% b/qa/suites/rbd/valgrind/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/valgrind/% diff --git a/qa/suites/rbd/valgrind/.qa b/qa/suites/rbd/valgrind/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/valgrind/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/valgrind/base/.qa b/qa/suites/rbd/valgrind/base/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/valgrind/base/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/valgrind/base/install.yaml b/qa/suites/rbd/valgrind/base/install.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/rbd/valgrind/base/install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/rbd/valgrind/centos_latest.yaml b/qa/suites/rbd/valgrind/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/rbd/valgrind/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/valgrind/clusters b/qa/suites/rbd/valgrind/clusters new file mode 120000 index 000000000..ae92569e8 --- /dev/null +++ b/qa/suites/rbd/valgrind/clusters @@ -0,0 +1 @@ +../basic/clusters
\ No newline at end of file diff --git a/qa/suites/rbd/valgrind/conf b/qa/suites/rbd/valgrind/conf new file mode 120000 index 000000000..4bc0fe86c --- /dev/null +++ b/qa/suites/rbd/valgrind/conf @@ -0,0 +1 @@ +.qa/rbd/conf
\ No newline at end of file diff --git a/qa/suites/rbd/valgrind/objectstore b/qa/suites/rbd/valgrind/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/rbd/valgrind/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/rbd/valgrind/validator/.qa b/qa/suites/rbd/valgrind/validator/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/valgrind/validator/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/valgrind/validator/memcheck.yaml b/qa/suites/rbd/valgrind/validator/memcheck.yaml new file mode 100644 index 000000000..fcea1b88c --- /dev/null +++ b/qa/suites/rbd/valgrind/validator/memcheck.yaml @@ -0,0 +1,12 @@ +# see http://tracker.ceph.com/issues/20360 and http://tracker.ceph.com/issues/18126 +os_type: centos + +overrides: + install: + ceph: + debuginfo: true + rbd_fsx: + valgrind: ["--tool=memcheck"] + workunit: + env: + VALGRIND: "--tool=memcheck --leak-check=full" diff --git a/qa/suites/rbd/valgrind/workloads/.qa b/qa/suites/rbd/valgrind/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/valgrind/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/valgrind/workloads/c_api_tests.yaml b/qa/suites/rbd/valgrind/workloads/c_api_tests.yaml new file mode 100644 index 000000000..eb63fd771 --- /dev/null +++ b/qa/suites/rbd/valgrind/workloads/c_api_tests.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - is full \(reached quota + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/rbd/valgrind/workloads/c_api_tests_with_defaults.yaml b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_defaults.yaml new file mode 100644 index 000000000..ec4194598 --- /dev/null +++ b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_defaults.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - is full \(reached quota + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "61" diff --git a/qa/suites/rbd/valgrind/workloads/c_api_tests_with_journaling.yaml b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_journaling.yaml new file mode 100644 index 000000000..6c3686806 --- /dev/null +++ b/qa/suites/rbd/valgrind/workloads/c_api_tests_with_journaling.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - is full \(reached quota + - \(POOL_FULL\) +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "125" diff --git a/qa/suites/rbd/valgrind/workloads/fsx.yaml b/qa/suites/rbd/valgrind/workloads/fsx.yaml new file mode 100644 index 000000000..5c745a2c6 --- /dev/null +++ b/qa/suites/rbd/valgrind/workloads/fsx.yaml @@ -0,0 +1,4 @@ +tasks: +- rbd_fsx: + clients: [client.0] + size: 134217728 diff --git a/qa/suites/rbd/valgrind/workloads/python_api_tests.yaml b/qa/suites/rbd/valgrind/workloads/python_api_tests.yaml new file mode 100644 index 000000000..516c323df --- /dev/null +++ b/qa/suites/rbd/valgrind/workloads/python_api_tests.yaml @@ -0,0 +1,12 @@ +overrides: + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/rbd/valgrind/workloads/python_api_tests_with_defaults.yaml b/qa/suites/rbd/valgrind/workloads/python_api_tests_with_defaults.yaml new file mode 100644 index 000000000..831f3762b --- /dev/null +++ b/qa/suites/rbd/valgrind/workloads/python_api_tests_with_defaults.yaml @@ -0,0 +1,12 @@ +overrides: + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "61" diff --git a/qa/suites/rbd/valgrind/workloads/python_api_tests_with_journaling.yaml b/qa/suites/rbd/valgrind/workloads/python_api_tests_with_journaling.yaml new file mode 100644 index 000000000..8bd751146 --- /dev/null +++ b/qa/suites/rbd/valgrind/workloads/python_api_tests_with_journaling.yaml @@ -0,0 +1,12 @@ +overrides: + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "125" diff --git a/qa/suites/rbd/valgrind/workloads/rbd_mirror.yaml b/qa/suites/rbd/valgrind/workloads/rbd_mirror.yaml new file mode 100644 index 000000000..ae5a83c30 --- /dev/null +++ b/qa/suites/rbd/valgrind/workloads/rbd_mirror.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) +tasks: +- workunit: + clients: + client.0: + - rbd/test_rbd_mirror.sh diff --git a/qa/suites/rgw-multisite-upgrade/.qa b/qa/suites/rgw-multisite-upgrade/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/% b/qa/suites/rgw-multisite-upgrade/pacific-x/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/pacific-x/% diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/.qa b/qa/suites/rgw-multisite-upgrade/pacific-x/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/pacific-x/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/clusters.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/clusters.yaml new file mode 100644 index 000000000..4e18096e0 --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/pacific-x/clusters.yaml @@ -0,0 +1,3 @@ +roles: +- [c1.mon.a, c1.mgr.x, c1.osd.0, c1.osd.1, c1.osd.2, c1.client.0] +- [c2.mon.a, c2.mgr.x, c2.osd.0, c2.osd.1, c2.osd.2, c2.client.0, c2.client.1] diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/frontend.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/frontend.yaml new file mode 120000 index 000000000..09ced62c4 --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/pacific-x/frontend.yaml @@ -0,0 +1 @@ +.qa/rgw_frontend/beast.yaml
\ No newline at end of file diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/overrides.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/overrides.yaml new file mode 100644 index 000000000..35ef026d6 --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/pacific-x/overrides.yaml @@ -0,0 +1,23 @@ +overrides: + ceph: + log-ignorelist: + - \(PG_AVAILABILITY\) + wait-for-scrub: false + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= + rgw crypt require ssl: false + rgw sync log trim interval: 0 + rgw curl low speed time: 300 + rgw md log max shards: 4 + rgw data log num shards: 4 + rgw sync obj etag verify: true + rgw: + compression type: random + rgw-multisite-tests: + branch: pacific # run pacific branch of tests + repo: https://github.com/ceph/ceph.git diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/realm.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/realm.yaml new file mode 100644 index 000000000..86fc0732f --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/pacific-x/realm.yaml @@ -0,0 +1,20 @@ +overrides: + rgw-multisite: + realm: + name: test-realm + is default: true + zonegroups: + - name: test-zonegroup + is_master: true + is_default: true + endpoints: [c1.client.0] + zones: + - name: test-zone1 + is_master: true + is_default: true + endpoints: [c1.client.0] + - name: test-zone2 + is_default: true + endpoints: [c2.client.0] + rgw-multisite-tests: + args: [tests.py] diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/supported-random-distro$ b/qa/suites/rgw-multisite-upgrade/pacific-x/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/pacific-x/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/tasks.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/tasks.yaml new file mode 100644 index 000000000..843858543 --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/pacific-x/tasks.yaml @@ -0,0 +1,18 @@ +tasks: +- install: + branch: pacific +- ceph: {cluster: c1} +- ceph: {cluster: c2} +- parallel: + - upgrade-task +- rgw: + c1.client.0: + port: 8000 + c2.client.0: + port: 8000 + c2.client.1: + port: 8001 +- rgw-multisite: +- rgw-multisite-tests: + config: + reconfigure_delay: 60 diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/.qa b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/primary.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/primary.yaml new file mode 100644 index 000000000..a29581aa6 --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/primary.yaml @@ -0,0 +1,8 @@ +upgrade-task: +- install.upgrade: # leave c2 on pacific, upgrade c1 to current + c1.mon.a: + c1.mgr.x: + c1.osd.0: + c1.osd.1: + c1.osd.2: + c1.client.0: diff --git a/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/secondary.yaml b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/secondary.yaml new file mode 100644 index 000000000..666986697 --- /dev/null +++ b/qa/suites/rgw-multisite-upgrade/pacific-x/upgrade/secondary.yaml @@ -0,0 +1,9 @@ +upgrade-task: +- install.upgrade: # leave c1 on pacific, upgrade c2 to current + c2.mon.a: + c2.mgr.x: + c2.osd.0: + c2.osd.1: + c2.osd.2: + c2.client.0: + c2.client.1: diff --git a/qa/suites/rgw/.qa b/qa/suites/rgw/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/cloud-transition/+ b/qa/suites/rgw/cloud-transition/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/cloud-transition/+ diff --git a/qa/suites/rgw/cloud-transition/.qa b/qa/suites/rgw/cloud-transition/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/cloud-transition/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/cloud-transition/cluster.yaml b/qa/suites/rgw/cloud-transition/cluster.yaml new file mode 100644 index 000000000..8e0712ea5 --- /dev/null +++ b/qa/suites/rgw/cloud-transition/cluster.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.a, osd.0, osd.1, osd.2, mgr.0, client.0, client.1] +#- [mon.b, osd.3, osd.4, osd.5, mgr.1, client.2, client.3] diff --git a/qa/suites/rgw/cloud-transition/ignore-pg-availability.yaml b/qa/suites/rgw/cloud-transition/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/cloud-transition/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/cloud-transition/overrides.yaml b/qa/suites/rgw/cloud-transition/overrides.yaml new file mode 100644 index 000000000..40ca3556b --- /dev/null +++ b/qa/suites/rgw/cloud-transition/overrides.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= + rgw crypt require ssl: false + rgw lc debug interval: 10 + rgw: + storage classes: LUKEWARM, FROZEN + frontend: beast diff --git a/qa/suites/rgw/cloud-transition/s3tests-branch.yaml b/qa/suites/rgw/cloud-transition/s3tests-branch.yaml new file mode 120000 index 000000000..bdcaca48a --- /dev/null +++ b/qa/suites/rgw/cloud-transition/s3tests-branch.yaml @@ -0,0 +1 @@ +.qa/rgw/s3tests-branch.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/cloud-transition/supported-random-distro$ b/qa/suites/rgw/cloud-transition/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rgw/cloud-transition/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rgw/cloud-transition/tasks/.qa b/qa/suites/rgw/cloud-transition/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/cloud-transition/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/cloud-transition/tasks/cloud_transition_s3tests.yaml b/qa/suites/rgw/cloud-transition/tasks/cloud_transition_s3tests.yaml new file mode 100644 index 000000000..756617fba --- /dev/null +++ b/qa/suites/rgw/cloud-transition/tasks/cloud_transition_s3tests.yaml @@ -0,0 +1,61 @@ +tasks: +- install: +- ceph: +- rgw: + storage classes: LUKEWARM, FROZEN + client.0: + port: 8000 + client.1: + port: 8001 + #client.2: + #port: 8002 + #client.3: + #port: 8003 +- rgw-cloudtier: + client.0: + # cloudtier storage class params + # retain_head_object = false + cloud_storage_class: CLOUDTIER-CLIENT0 + cloud_client: client.1 + cloud_regular_storage_class: LUKEWARM + cloud_target_storage_class: FROZEN + cloud_retain_head_object: "false" + cloud_target_path: "teuthology-client0" + cloudtier_user: + # cloud-user creds to be created on cloud-client + cloud_secret: "abcefgh" + cloud_access_key: "12345678" + #client.2: + # cloudtier storage class params + # retain_head_object = true + # + # Having multiple cloudtier storage classes + # in the same task is increasing the transition + # time and resulting in spurious failures. + # Hence disabling this until there is a + # consistent way of running the tests + # without having to depend on lc_debug_interval. + # + #cloud_storage_class: CLOUDTIER-CLIENT2 + #cloud_client: client.3 + #cloud_regular_storage_class: LUKEWARM + #cloud_retain_head_object: "true" + #cloud_target_path: "teuthology-client2" + #cloudtier_user: + # cloud-user creds to be created on cloud-client + #cloud_secret: "foobar" + #cloud_access_key: "87654321" +- tox: [client.0] +- s3tests: + client.0: + rgw_server: client.0 + storage classes: LUKEWARM, FROZEN + extra_attrs: ["cloud_transition"] + lc_debug_interval: 10 + cloudtier_tests: True + #client.2: + #rgw_server: client.2 + #storage classes: LUKEWARM, FROZEN + #extra_attrs: ["cloud_transition"] + #lc_debug_interval: 10 + #cloudtier_tests: True diff --git a/qa/suites/rgw/crypt/% b/qa/suites/rgw/crypt/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/crypt/% diff --git a/qa/suites/rgw/crypt/.qa b/qa/suites/rgw/crypt/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/crypt/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/crypt/0-cluster/.qa b/qa/suites/rgw/crypt/0-cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/crypt/0-cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/crypt/0-cluster/fixed-1.yaml b/qa/suites/rgw/crypt/0-cluster/fixed-1.yaml new file mode 120000 index 000000000..435ea3c75 --- /dev/null +++ b/qa/suites/rgw/crypt/0-cluster/fixed-1.yaml @@ -0,0 +1 @@ +../../../../clusters/fixed-1.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/crypt/1-ceph-install/.qa b/qa/suites/rgw/crypt/1-ceph-install/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/crypt/1-ceph-install/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/crypt/1-ceph-install/install.yaml b/qa/suites/rgw/crypt/1-ceph-install/install.yaml new file mode 100644 index 000000000..07a08b9a6 --- /dev/null +++ b/qa/suites/rgw/crypt/1-ceph-install/install.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + wait-for-scrub: false + +tasks: +- install: +- ceph: diff --git a/qa/suites/rgw/crypt/2-kms/.qa b/qa/suites/rgw/crypt/2-kms/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/crypt/2-kms/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/crypt/2-kms/barbican.yaml b/qa/suites/rgw/crypt/2-kms/barbican.yaml new file mode 100644 index 000000000..0c75a131c --- /dev/null +++ b/qa/suites/rgw/crypt/2-kms/barbican.yaml @@ -0,0 +1,92 @@ +overrides: + ceph: + conf: + client: + rgw crypt s3 kms backend: barbican + rgw keystone barbican project: rgwcrypt + rgw keystone barbican user: rgwcrypt-user + rgw keystone barbican password: rgwcrypt-pass + rgw keystone barbican domain: Default + rgw keystone api version: 3 + rgw keystone accepted roles: admin,Member,creator + rgw keystone implicit tenants: true + rgw keystone accepted admin roles: admin + rgw swift enforce content length: true + rgw swift account in url: true + rgw swift versioning enabled: true + rgw keystone admin project: admin + rgw keystone admin user: admin + rgw keystone admin password: ADMIN + rgw keystone admin domain: Default + rgw: + client.0: + use-keystone-role: client.0 + use-barbican-role: client.0 + +tasks: +- tox: [ client.0 ] +- keystone: + client.0: + force-branch: stable/2023.1 + services: + - name: swift + type: object-store + description: Swift Service + projects: + - name: rgwcrypt + description: Encryption Tenant + domain: default + - name: barbican + description: Barbican + domain: default + - name: s3 + description: S3 project + domain: default + users: + - name: rgwcrypt-user + password: rgwcrypt-pass + project: rgwcrypt + domain: default + - name: barbican-user + password: barbican-pass + project: barbican + domain: default + - name: s3-user + password: s3-pass + project: s3 + domain: default + roles: [ name: Member, name: creator ] + role-mappings: + - name: Member + user: rgwcrypt-user + project: rgwcrypt + - name: admin + user: barbican-user + project: barbican + - name: creator + user: s3-user + project: s3 +- barbican: + client.0: + force-branch: stable/xena + use-keystone-role: client.0 + keystone_authtoken: + auth_plugin: password + username: barbican-user + password: barbican-pass + user_domain_name: Default + rgw_user: + tenantName: rgwcrypt + username: rgwcrypt-user + password: rgwcrypt-pass + secrets: + - name: my-key-1 + base64: a2V5MS5GcWVxKzhzTGNLaGtzQkg5NGVpb1FKcFpGb2c= + tenantName: s3 + username: s3-user + password: s3-pass + - name: my-key-2 + base64: a2V5Mi5yNUNNMGFzMVdIUVZxcCt5NGVmVGlQQ1k4YWg= + tenantName: s3 + username: s3-user + password: s3-pass diff --git a/qa/suites/rgw/crypt/2-kms/kmip.yaml b/qa/suites/rgw/crypt/2-kms/kmip.yaml new file mode 100644 index 000000000..0057d954e --- /dev/null +++ b/qa/suites/rgw/crypt/2-kms/kmip.yaml @@ -0,0 +1,37 @@ +overrides: + ceph: + conf: + client: + rgw crypt s3 kms backend: kmip + rgw crypt kmip ca path: /etc/ceph/kmiproot.crt + rgw crypt kmip client cert: /etc/ceph/kmip-client.crt + rgw crypt kmip client key: /etc/ceph/kmip-client.key + rgw crypt kmip kms key template: pykmip-$keyid + rgw: + client.0: + use-pykmip-role: client.0 + +tasks: +- openssl_keys: + kmiproot: + client: client.0 + cn: kmiproot + key-type: rsa:4096 + kmip-server: + client: client.0 + ca: kmiproot + kmip-client: + client: client.0 + ca: kmiproot + cn: rgw-client +- exec: + client.0: + - chmod 644 /home/ubuntu/cephtest/ca/kmip-client.key +- pykmip: + client.0: + clientca: kmiproot + servercert: kmip-server + clientcert: kmip-client + secrets: + - name: pykmip-my-key-1 + - name: pykmip-my-key-2 diff --git a/qa/suites/rgw/crypt/2-kms/testing.yaml b/qa/suites/rgw/crypt/2-kms/testing.yaml new file mode 100644 index 000000000..e02f9caad --- /dev/null +++ b/qa/suites/rgw/crypt/2-kms/testing.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + client: + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption_keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= diff --git a/qa/suites/rgw/crypt/2-kms/vault_kv.yaml b/qa/suites/rgw/crypt/2-kms/vault_kv.yaml new file mode 100644 index 000000000..9ee9366d0 --- /dev/null +++ b/qa/suites/rgw/crypt/2-kms/vault_kv.yaml @@ -0,0 +1,25 @@ +overrides: + ceph: + conf: + client: + rgw crypt s3 kms backend: vault + rgw crypt vault auth: token + rgw crypt vault secret engine: kv + rgw crypt vault prefix: /v1/kv/data + rgw: + client.0: + use-vault-role: client.0 + +tasks: +- vault: + client.0: + install_url: https://releases.hashicorp.com/vault/1.2.2/vault_1.2.2_linux_amd64.zip + install_sha256: 7725b35d9ca8be3668abe63481f0731ca4730509419b4eb29fa0b0baa4798458 + root_token: test_root_token + engine: kv + prefix: /v1/kv/data/ + secrets: + - path: my-key-1 + secret: a2V5MS5GcWVxKzhzTGNLaGtzQkg5NGVpb1FKcFpGb2c= + - path: my-key-2 + secret: a2V5Mi5yNUNNMGFzMVdIUVZxcCt5NGVmVGlQQ1k4YWg= diff --git a/qa/suites/rgw/crypt/2-kms/vault_old.yaml b/qa/suites/rgw/crypt/2-kms/vault_old.yaml new file mode 100644 index 000000000..4befc1ecf --- /dev/null +++ b/qa/suites/rgw/crypt/2-kms/vault_old.yaml @@ -0,0 +1,24 @@ +overrides: + ceph: + conf: + client: + rgw crypt s3 kms backend: vault + rgw crypt vault auth: token + rgw crypt vault secret engine: transit + rgw crypt vault prefix: /v1/transit/export/encryption-key/ + rgw: + client.0: + use-vault-role: client.0 + +tasks: +- vault: + client.0: + install_url: https://releases.hashicorp.com/vault/1.2.2/vault_1.2.2_linux_amd64.zip + install_sha256: 7725b35d9ca8be3668abe63481f0731ca4730509419b4eb29fa0b0baa4798458 + root_token: test_root_token + engine: transit + flavor: old + prefix: /v1/transit/keys/ + secrets: + - path: my-key-1 + - path: my-key-2 diff --git a/qa/suites/rgw/crypt/2-kms/vault_transit.yaml b/qa/suites/rgw/crypt/2-kms/vault_transit.yaml new file mode 100644 index 000000000..d20bb52bc --- /dev/null +++ b/qa/suites/rgw/crypt/2-kms/vault_transit.yaml @@ -0,0 +1,29 @@ +overrides: + ceph: + conf: + client: + rgw crypt s3 kms backend: vault + rgw crypt vault auth: token + rgw crypt vault secret engine: transit + rgw crypt vault prefix: /v1/transit/ + rgw crypt sse s3 backend: vault + rgw crypt sse s3 vault auth: token + rgw crypt sse s3 vault secret engine: transit + rgw crypt sse s3 vault prefix: /v1/transit/ + rgw: + client.0: + use-vault-role: client.0 + s3tests: + with-sse-s3: true + +tasks: +- vault: + client.0: + install_url: https://releases.hashicorp.com/vault/1.2.2/vault_1.2.2_linux_amd64.zip + install_sha256: 7725b35d9ca8be3668abe63481f0731ca4730509419b4eb29fa0b0baa4798458 + root_token: test_root_token + engine: transit + prefix: /v1/transit/keys/ + secrets: + - path: my-key-1 + - path: my-key-2 diff --git a/qa/suites/rgw/crypt/3-rgw/.qa b/qa/suites/rgw/crypt/3-rgw/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/crypt/3-rgw/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/crypt/3-rgw/rgw.yaml b/qa/suites/rgw/crypt/3-rgw/rgw.yaml new file mode 100644 index 000000000..764d216aa --- /dev/null +++ b/qa/suites/rgw/crypt/3-rgw/rgw.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + client: + setuser: ceph + setgroup: ceph + rgw crypt require ssl: false + debug rgw: 20 + rgw: + compression type: random + +tasks: +- rgw: + client.0: diff --git a/qa/suites/rgw/crypt/4-tests/+ b/qa/suites/rgw/crypt/4-tests/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/crypt/4-tests/+ diff --git a/qa/suites/rgw/crypt/4-tests/.qa b/qa/suites/rgw/crypt/4-tests/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/crypt/4-tests/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/crypt/4-tests/s3tests.yaml b/qa/suites/rgw/crypt/4-tests/s3tests.yaml new file mode 100644 index 000000000..42b254922 --- /dev/null +++ b/qa/suites/rgw/crypt/4-tests/s3tests.yaml @@ -0,0 +1,16 @@ +tasks: +- tox: [client.0] +- s3tests: + client.0: + barbican: + kms_key: my-key-1 + kms_key2: my-key-2 + vault_kv: + key_path: my-key-1 + key_path2: my-key-2 + vault_old: + key_path: my-key-1/1 + key_path2: my-key-2/1 + vault_transit: + key_path: my-key-1 + key_path2: my-key-2 diff --git a/qa/suites/rgw/crypt/ignore-pg-availability.yaml b/qa/suites/rgw/crypt/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/crypt/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/crypt/s3tests-branch.yaml b/qa/suites/rgw/crypt/s3tests-branch.yaml new file mode 120000 index 000000000..bdcaca48a --- /dev/null +++ b/qa/suites/rgw/crypt/s3tests-branch.yaml @@ -0,0 +1 @@ +.qa/rgw/s3tests-branch.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/crypt/ubuntu_latest.yaml b/qa/suites/rgw/crypt/ubuntu_latest.yaml new file mode 120000 index 000000000..fe01dafd4 --- /dev/null +++ b/qa/suites/rgw/crypt/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/all/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/dbstore/+ b/qa/suites/rgw/dbstore/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/dbstore/+ diff --git a/qa/suites/rgw/dbstore/.qa b/qa/suites/rgw/dbstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/dbstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/dbstore/cluster.yaml b/qa/suites/rgw/dbstore/cluster.yaml new file mode 100644 index 000000000..496b51ba7 --- /dev/null +++ b/qa/suites/rgw/dbstore/cluster.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.a, osd.0, osd.1, osd.2, mgr.0, client.0] + diff --git a/qa/suites/rgw/dbstore/ignore-pg-availability.yaml b/qa/suites/rgw/dbstore/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/dbstore/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/dbstore/overrides.yaml b/qa/suites/rgw/dbstore/overrides.yaml new file mode 100644 index 000000000..df4aaa9af --- /dev/null +++ b/qa/suites/rgw/dbstore/overrides.yaml @@ -0,0 +1,11 @@ +overrides: + ceph: + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 + rgw crypt require ssl: false + rgw backend store: dbstore + rgw: + frontend: beast diff --git a/qa/suites/rgw/dbstore/s3tests-branch.yaml b/qa/suites/rgw/dbstore/s3tests-branch.yaml new file mode 120000 index 000000000..bdcaca48a --- /dev/null +++ b/qa/suites/rgw/dbstore/s3tests-branch.yaml @@ -0,0 +1 @@ +.qa/rgw/s3tests-branch.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/dbstore/supported-random-distro$ b/qa/suites/rgw/dbstore/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rgw/dbstore/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rgw/dbstore/tasks/.qa b/qa/suites/rgw/dbstore/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/dbstore/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/dbstore/tasks/rgw_s3tests.yaml b/qa/suites/rgw/dbstore/tasks/rgw_s3tests.yaml new file mode 100644 index 000000000..daab5581d --- /dev/null +++ b/qa/suites/rgw/dbstore/tasks/rgw_s3tests.yaml @@ -0,0 +1,16 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- tox: [client.0] +- exec: + client.0: + - sudo chmod 0777 /var/lib/ceph + - sudo chmod 0777 /var/lib/ceph/radosgw + - sudo chmod 0777 /var/lib/ceph/radosgw/dbstore-default_ns.db +- s3tests: + client.0: + dbstore_tests: True + rgw_server: client.0 + extra_attrs: ["not fails_on_rgw","not fails_on_dbstore"] + diff --git a/qa/suites/rgw/hadoop-s3a/% b/qa/suites/rgw/hadoop-s3a/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/hadoop-s3a/% diff --git a/qa/suites/rgw/hadoop-s3a/.qa b/qa/suites/rgw/hadoop-s3a/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/hadoop-s3a/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/hadoop-s3a/clusters/.qa b/qa/suites/rgw/hadoop-s3a/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/hadoop-s3a/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml b/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rgw/hadoop-s3a/clusters/fixed-2.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/.qa b/qa/suites/rgw/hadoop-s3a/hadoop/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/hadoop-s3a/hadoop/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml b/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/qa/suites/rgw/hadoop-s3a/hadoop/default.yaml @@ -0,0 +1 @@ + diff --git a/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml b/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml new file mode 100644 index 000000000..d017b756b --- /dev/null +++ b/qa/suites/rgw/hadoop-s3a/hadoop/v32.yaml @@ -0,0 +1,3 @@ +overrides: + s3a-hadoop: + hadoop-version: '3.2.0' diff --git a/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml b/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/hadoop-s3a/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/hadoop-s3a/overrides.yaml b/qa/suites/rgw/hadoop-s3a/overrides.yaml new file mode 100644 index 000000000..d52080bb5 --- /dev/null +++ b/qa/suites/rgw/hadoop-s3a/overrides.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + client: + setuser: ceph + setgroup: ceph diff --git a/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml b/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml new file mode 100644 index 000000000..ed077a89f --- /dev/null +++ b/qa/suites/rgw/hadoop-s3a/s3a-hadoop.yaml @@ -0,0 +1,11 @@ +tasks: +- install: +- ceph: +- ssh-keys: +- dnsmasq: + client.0: [s3.] +- rgw: + client.0: + dns-name: s3. +- s3a-hadoop: + role: client.0 diff --git a/qa/suites/rgw/lifecycle/+ b/qa/suites/rgw/lifecycle/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/lifecycle/+ diff --git a/qa/suites/rgw/lifecycle/.qa b/qa/suites/rgw/lifecycle/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/lifecycle/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/lifecycle/cluster.yaml b/qa/suites/rgw/lifecycle/cluster.yaml new file mode 100644 index 000000000..0eab7ebad --- /dev/null +++ b/qa/suites/rgw/lifecycle/cluster.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, osd.0, osd.1, osd.2, mgr.0, client.0] +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB +overrides: + rgw: + frontend: beast
\ No newline at end of file diff --git a/qa/suites/rgw/lifecycle/ignore-pg-availability.yaml b/qa/suites/rgw/lifecycle/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/lifecycle/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/lifecycle/overrides.yaml b/qa/suites/rgw/lifecycle/overrides.yaml new file mode 100644 index 000000000..83722d312 --- /dev/null +++ b/qa/suites/rgw/lifecycle/overrides.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + wait-for-scrub: false + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= + rgw crypt require ssl: false + rgw lc debug interval: 10 + rgw: + storage classes: LUKEWARM, FROZEN diff --git a/qa/suites/rgw/lifecycle/s3tests-branch.yaml b/qa/suites/rgw/lifecycle/s3tests-branch.yaml new file mode 120000 index 000000000..bdcaca48a --- /dev/null +++ b/qa/suites/rgw/lifecycle/s3tests-branch.yaml @@ -0,0 +1 @@ +.qa/rgw/s3tests-branch.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/lifecycle/supported-random-distro$ b/qa/suites/rgw/lifecycle/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rgw/lifecycle/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rgw/lifecycle/tasks/.qa b/qa/suites/rgw/lifecycle/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/lifecycle/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/lifecycle/tasks/rgw_s3tests.yaml b/qa/suites/rgw/lifecycle/tasks/rgw_s3tests.yaml new file mode 100644 index 000000000..a8675dcf1 --- /dev/null +++ b/qa/suites/rgw/lifecycle/tasks/rgw_s3tests.yaml @@ -0,0 +1,20 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- tox: [client.0] +- s3tests: + client.0: + rgw_server: client.0 + storage classes: LUKEWARM, FROZEN + extra_attrs: ["lifecycle"] + lc_debug_interval: 10 +overrides: + ceph: + conf: + client: + debug rgw: 20 + rgw lc debug interval: 10 + storage classes: LUKEWARM, FROZEN + rgw: + storage classes: LUKEWARM, FROZEN diff --git a/qa/suites/rgw/multifs/% b/qa/suites/rgw/multifs/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/multifs/% diff --git a/qa/suites/rgw/multifs/.qa b/qa/suites/rgw/multifs/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/multifs/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/multifs/clusters/.qa b/qa/suites/rgw/multifs/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/multifs/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/multifs/clusters/fixed-2.yaml b/qa/suites/rgw/multifs/clusters/fixed-2.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rgw/multifs/clusters/fixed-2.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/multifs/frontend b/qa/suites/rgw/multifs/frontend new file mode 120000 index 000000000..926a53e83 --- /dev/null +++ b/qa/suites/rgw/multifs/frontend @@ -0,0 +1 @@ +.qa/rgw_frontend
\ No newline at end of file diff --git a/qa/suites/rgw/multifs/ignore-pg-availability.yaml b/qa/suites/rgw/multifs/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/multifs/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/multifs/objectstore b/qa/suites/rgw/multifs/objectstore new file mode 120000 index 000000000..f81d17413 --- /dev/null +++ b/qa/suites/rgw/multifs/objectstore @@ -0,0 +1 @@ +.qa/objectstore_cephfs
\ No newline at end of file diff --git a/qa/suites/rgw/multifs/overrides.yaml b/qa/suites/rgw/multifs/overrides.yaml new file mode 100644 index 000000000..1cb489072 --- /dev/null +++ b/qa/suites/rgw/multifs/overrides.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + wait-for-scrub: false + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= + rgw crypt require ssl: false + rgw: + storage classes: LUKEWARM, FROZEN diff --git a/qa/suites/rgw/multifs/rgw_pool_type b/qa/suites/rgw/multifs/rgw_pool_type new file mode 120000 index 000000000..3bbd28e96 --- /dev/null +++ b/qa/suites/rgw/multifs/rgw_pool_type @@ -0,0 +1 @@ +.qa/rgw_pool_type
\ No newline at end of file diff --git a/qa/suites/rgw/multifs/s3tests-branch.yaml b/qa/suites/rgw/multifs/s3tests-branch.yaml new file mode 120000 index 000000000..bdcaca48a --- /dev/null +++ b/qa/suites/rgw/multifs/s3tests-branch.yaml @@ -0,0 +1 @@ +.qa/rgw/s3tests-branch.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/multifs/tasks/.qa b/qa/suites/rgw/multifs/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml b/qa/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml new file mode 100644 index 000000000..e07c8b5cc --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_bucket_quota.yaml @@ -0,0 +1,13 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- workunit: + clients: + client.0: + - rgw/s3_bucket_quota.pl +overrides: + ceph: + conf: + client: + rgw relaxed s3 bucket names: true diff --git a/qa/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml b/qa/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml new file mode 100644 index 000000000..bac4f4016 --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_multipart_upload.yaml @@ -0,0 +1,13 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- workunit: + clients: + client.0: + - rgw/s3_multipart_upload.pl +overrides: + ceph: + conf: + client: + rgw relaxed s3 bucket names: true diff --git a/qa/suites/rgw/multifs/tasks/rgw_ragweed.yaml b/qa/suites/rgw/multifs/tasks/rgw_ragweed.yaml new file mode 100644 index 000000000..ab9d21fca --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_ragweed.yaml @@ -0,0 +1,20 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- tox: [client.0] +- ragweed: + client.0: + default-branch: ceph-reef + rgw_server: client.0 + stages: prepare +- ragweed: + client.0: + default-branch: ceph-reef + rgw_server: client.0 + stages: check +overrides: + ceph: + conf: + client: + rgw lc debug interval: 10 diff --git a/qa/suites/rgw/multifs/tasks/rgw_s3tests.yaml b/qa/suites/rgw/multifs/tasks/rgw_s3tests.yaml new file mode 100644 index 000000000..7ff143243 --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_s3tests.yaml @@ -0,0 +1,13 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- tox: [client.0] +- s3tests: + client.0: + rgw_server: client.0 +overrides: + ceph: + conf: + client: + rgw lc debug interval: 10 diff --git a/qa/suites/rgw/multifs/tasks/rgw_user_quota.yaml b/qa/suites/rgw/multifs/tasks/rgw_user_quota.yaml new file mode 100644 index 000000000..92c63d2e8 --- /dev/null +++ b/qa/suites/rgw/multifs/tasks/rgw_user_quota.yaml @@ -0,0 +1,13 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] +- workunit: + clients: + client.0: + - rgw/s3_user_quota.pl +overrides: + ceph: + conf: + client: + rgw relaxed s3 bucket names: true diff --git a/qa/suites/rgw/multifs/ubuntu_latest.yaml b/qa/suites/rgw/multifs/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/rgw/multifs/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/multisite/% b/qa/suites/rgw/multisite/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/multisite/% diff --git a/qa/suites/rgw/multisite/.qa b/qa/suites/rgw/multisite/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/multisite/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/multisite/clusters.yaml b/qa/suites/rgw/multisite/clusters.yaml new file mode 100644 index 000000000..536ef7ca4 --- /dev/null +++ b/qa/suites/rgw/multisite/clusters.yaml @@ -0,0 +1,3 @@ +roles: +- [c1.mon.a, c1.mgr.x, c1.osd.0, c1.osd.1, c1.osd.2, c1.client.0, c1.client.1] +- [c2.mon.a, c2.mgr.x, c2.osd.0, c2.osd.1, c2.osd.2, c2.client.0, c2.client.1] diff --git a/qa/suites/rgw/multisite/frontend b/qa/suites/rgw/multisite/frontend new file mode 120000 index 000000000..926a53e83 --- /dev/null +++ b/qa/suites/rgw/multisite/frontend @@ -0,0 +1 @@ +.qa/rgw_frontend
\ No newline at end of file diff --git a/qa/suites/rgw/multisite/ignore-pg-availability.yaml b/qa/suites/rgw/multisite/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/multisite/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/multisite/notify.yaml b/qa/suites/rgw/multisite/notify.yaml new file mode 100644 index 000000000..00e0bb762 --- /dev/null +++ b/qa/suites/rgw/multisite/notify.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client.0: # disable notifications on one zone per cluster + rgw data notify interval msec: 0 diff --git a/qa/suites/rgw/multisite/omap_limits.yaml b/qa/suites/rgw/multisite/omap_limits.yaml new file mode 100644 index 000000000..9ad1ec9ee --- /dev/null +++ b/qa/suites/rgw/multisite/omap_limits.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + osd: + # remove the threshold so that test_bucket_index_log_trim() will test DeleteRange + rocksdb delete range threshold: 0 + # instead of expanding the matrix, run each osd with a different omap limit + osd.0: + osd_max_omap_entries_per_request: 10 + osd.1: + osd_max_omap_entries_per_request: 1000 + osd.2: + osd_max_omap_entries_per_request: 10000 diff --git a/qa/suites/rgw/multisite/overrides.yaml b/qa/suites/rgw/multisite/overrides.yaml new file mode 100644 index 000000000..76c14eb91 --- /dev/null +++ b/qa/suites/rgw/multisite/overrides.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + wait-for-scrub: false + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 + debug rgw sync: 20 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= + rgw crypt require ssl: false + rgw sync log trim interval: 0 + rgw curl low speed time: 300 + rgw md log max shards: 4 + rgw data log num shards: 4 + rgw sync obj etag verify: true + rgw sync meta inject err probability: 0.1 + rgw sync data inject err probability: 0.1 + rgw: + compression type: random diff --git a/qa/suites/rgw/multisite/realms/.qa b/qa/suites/rgw/multisite/realms/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/multisite/realms/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/multisite/realms/three-zones.yaml b/qa/suites/rgw/multisite/realms/three-zones.yaml new file mode 100644 index 000000000..95318b0f8 --- /dev/null +++ b/qa/suites/rgw/multisite/realms/three-zones.yaml @@ -0,0 +1,23 @@ +overrides: + rgw-multisite: + realm: + name: test-realm + is default: true + zonegroups: + - name: test-zonegroup + is_master: true + is_default: true + endpoints: [c1.client.0] + enabled_features: ['resharding'] + zones: + - name: test-zone1 + is_master: true + is_default: true + endpoints: [c1.client.0] + - name: test-zone2 + is_default: true + endpoints: [c2.client.0] + - name: test-zone3 + endpoints: [c1.client.1] + rgw-multisite-tests: + args: [tests.py] diff --git a/qa/suites/rgw/multisite/realms/two-zonegroup.yaml b/qa/suites/rgw/multisite/realms/two-zonegroup.yaml new file mode 100644 index 000000000..02710a7d9 --- /dev/null +++ b/qa/suites/rgw/multisite/realms/two-zonegroup.yaml @@ -0,0 +1,31 @@ +overrides: + rgw-multisite: + realm: + name: test-realm + is default: true + zonegroups: + - name: a + is_master: true + is_default: true + endpoints: [c1.client.0] + enabled_features: ['resharding'] + zones: + - name: a1 + is_master: true + is_default: true + endpoints: [c1.client.0] + - name: a2 + endpoints: [c1.client.1] + - name: b + is_default: true + endpoints: [c2.client.0] + enabled_features: ['resharding'] + zones: + - name: b1 + is_master: true + is_default: true + endpoints: [c2.client.0] + - name: b2 + endpoints: [c2.client.1] + rgw-multisite-tests: + args: [tests.py] diff --git a/qa/suites/rgw/multisite/supported-random-distro$ b/qa/suites/rgw/multisite/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rgw/multisite/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rgw/multisite/tasks/.qa b/qa/suites/rgw/multisite/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/multisite/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/multisite/tasks/test_multi.yaml b/qa/suites/rgw/multisite/tasks/test_multi.yaml new file mode 100644 index 000000000..1a65a67b5 --- /dev/null +++ b/qa/suites/rgw/multisite/tasks/test_multi.yaml @@ -0,0 +1,17 @@ +tasks: +- install: +- ceph: {cluster: c1} +- ceph: {cluster: c2} +- rgw: + c1.client.0: + port: 8000 + c1.client.1: + port: 8001 + c2.client.0: + port: 8000 + c2.client.1: + port: 8001 +- rgw-multisite: +- rgw-multisite-tests: + config: + reconfigure_delay: 60 diff --git a/qa/suites/rgw/multisite/valgrind.yaml.disabled b/qa/suites/rgw/multisite/valgrind.yaml.disabled new file mode 100644 index 000000000..36e20f02c --- /dev/null +++ b/qa/suites/rgw/multisite/valgrind.yaml.disabled @@ -0,0 +1,20 @@ +overrides: + install: + ceph: + ceph: + conf: + global: + osd heartbeat grace: 40 + mon: + mon osd crush smoke test: false + osd: + osd fast shutdown: false + rgw: + c1.client.0: + valgrind: [--tool=memcheck, --max-threads=1024] # http://tracker.ceph.com/issues/25214 + c1.client.1: + valgrind: [--tool=memcheck, --max-threads=1024] + c2.client.0: + valgrind: [--tool=memcheck, --max-threads=1024] + c2.client.1: + valgrind: [--tool=memcheck, --max-threads=1024] diff --git a/qa/suites/rgw/notifications/% b/qa/suites/rgw/notifications/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/notifications/% diff --git a/qa/suites/rgw/notifications/.qa b/qa/suites/rgw/notifications/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/notifications/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/notifications/beast.yaml b/qa/suites/rgw/notifications/beast.yaml new file mode 120000 index 000000000..09ced62c4 --- /dev/null +++ b/qa/suites/rgw/notifications/beast.yaml @@ -0,0 +1 @@ +.qa/rgw_frontend/beast.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/notifications/bluestore-bitmap.yaml b/qa/suites/rgw/notifications/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/rgw/notifications/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/notifications/fixed-2.yaml b/qa/suites/rgw/notifications/fixed-2.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rgw/notifications/fixed-2.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/notifications/ignore-pg-availability.yaml b/qa/suites/rgw/notifications/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/notifications/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/notifications/overrides.yaml b/qa/suites/rgw/notifications/overrides.yaml new file mode 100644 index 000000000..1cb489072 --- /dev/null +++ b/qa/suites/rgw/notifications/overrides.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + wait-for-scrub: false + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= + rgw crypt require ssl: false + rgw: + storage classes: LUKEWARM, FROZEN diff --git a/qa/suites/rgw/notifications/supported-all-distro$/$ b/qa/suites/rgw/notifications/supported-all-distro$/$ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/notifications/supported-all-distro$/$ diff --git a/qa/suites/rgw/notifications/supported-all-distro$/.qa b/qa/suites/rgw/notifications/supported-all-distro$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/notifications/supported-all-distro$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/notifications/supported-all-distro$/centos_8.yaml b/qa/suites/rgw/notifications/supported-all-distro$/centos_8.yaml new file mode 120000 index 000000000..c23fd0540 --- /dev/null +++ b/qa/suites/rgw/notifications/supported-all-distro$/centos_8.yaml @@ -0,0 +1 @@ +../.qa/distros/supported-all-distro/centos_8.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/notifications/tasks/+ b/qa/suites/rgw/notifications/tasks/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/notifications/tasks/+ diff --git a/qa/suites/rgw/notifications/tasks/.qa b/qa/suites/rgw/notifications/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/notifications/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/notifications/tasks/0-install.yaml b/qa/suites/rgw/notifications/tasks/0-install.yaml new file mode 100644 index 000000000..013ccbd26 --- /dev/null +++ b/qa/suites/rgw/notifications/tasks/0-install.yaml @@ -0,0 +1,15 @@ +tasks: +- install: +- ceph: +- openssl_keys: +- rgw: + client.0: + +overrides: + ceph: + conf: + global: + osd_min_pg_log_entries: 10 + osd_max_pg_log_entries: 10 + client: + rgw lc debug interval: 10 diff --git a/qa/suites/rgw/notifications/tasks/test_amqp.yaml b/qa/suites/rgw/notifications/tasks/test_amqp.yaml new file mode 100644 index 000000000..6807cfb65 --- /dev/null +++ b/qa/suites/rgw/notifications/tasks/test_amqp.yaml @@ -0,0 +1,7 @@ +tasks: +- rabbitmq: + client.0: +- notification-tests: + client.0: + extra_attr: ["amqp_test"] + rgw_server: client.0 diff --git a/qa/suites/rgw/notifications/tasks/test_kafka.yaml b/qa/suites/rgw/notifications/tasks/test_kafka.yaml new file mode 100644 index 000000000..ae647df38 --- /dev/null +++ b/qa/suites/rgw/notifications/tasks/test_kafka.yaml @@ -0,0 +1,8 @@ +tasks: +- kafka: + client.0: + kafka_version: 2.6.0 +- notification-tests: + client.0: + extra_attr: ["kafka_test"] + rgw_server: client.0 diff --git a/qa/suites/rgw/notifications/tasks/test_others.yaml b/qa/suites/rgw/notifications/tasks/test_others.yaml new file mode 100644 index 000000000..793f6f430 --- /dev/null +++ b/qa/suites/rgw/notifications/tasks/test_others.yaml @@ -0,0 +1,4 @@ +tasks: +- notification-tests: + client.0: + rgw_server: client.0 diff --git a/qa/suites/rgw/service-token/% b/qa/suites/rgw/service-token/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/service-token/% diff --git a/qa/suites/rgw/service-token/.qa b/qa/suites/rgw/service-token/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/service-token/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/service-token/clusters/.qa b/qa/suites/rgw/service-token/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/service-token/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/service-token/clusters/fixed-1.yaml b/qa/suites/rgw/service-token/clusters/fixed-1.yaml new file mode 120000 index 000000000..02df5dd0c --- /dev/null +++ b/qa/suites/rgw/service-token/clusters/fixed-1.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-1.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/service-token/frontend b/qa/suites/rgw/service-token/frontend new file mode 120000 index 000000000..926a53e83 --- /dev/null +++ b/qa/suites/rgw/service-token/frontend @@ -0,0 +1 @@ +.qa/rgw_frontend
\ No newline at end of file diff --git a/qa/suites/rgw/service-token/ignore-pg-availability.yaml b/qa/suites/rgw/service-token/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/service-token/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/service-token/overrides.yaml b/qa/suites/rgw/service-token/overrides.yaml new file mode 100644 index 000000000..c727ec3fd --- /dev/null +++ b/qa/suites/rgw/service-token/overrides.yaml @@ -0,0 +1,22 @@ +overrides: + ceph: + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 + rgw keystone api version: 3 + rgw keystone url: http://localhost:5000 + rgw keystone accepted roles: admin,Member + rgw keystone implicit tenants: true + rgw keystone accepted admin roles: admin + rgw swift enforce content length: true + rgw swift account in url: true + rgw swift versioning enabled: true + rgw keystone admin domain: Default + rgw keystone admin user: admin + rgw keystone admin password: ADMIN + rgw keystone admin project: admin + rgw keystone service token enabled: true + rgw keystone service token accepted roles: admin + rgw keystone expired token cache expiration: 10 diff --git a/qa/suites/rgw/service-token/tasks/.qa b/qa/suites/rgw/service-token/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/service-token/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/service-token/tasks/service-token.yaml b/qa/suites/rgw/service-token/tasks/service-token.yaml new file mode 100644 index 000000000..8aef1985b --- /dev/null +++ b/qa/suites/rgw/service-token/tasks/service-token.yaml @@ -0,0 +1,11 @@ +tasks: +- install: +- ceph: +- rgw: + client.0: + port: 8000 +- workunit: + basedir: qa/workunits/rgw + clients: + client.0: + - keystone-service-token.sh diff --git a/qa/suites/rgw/service-token/ubuntu_latest.yaml b/qa/suites/rgw/service-token/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/rgw/service-token/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/singleton/% b/qa/suites/rgw/singleton/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/singleton/% diff --git a/qa/suites/rgw/singleton/.qa b/qa/suites/rgw/singleton/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/singleton/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/singleton/all/.qa b/qa/suites/rgw/singleton/all/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/singleton/all/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/singleton/all/radosgw-admin.yaml b/qa/suites/rgw/singleton/all/radosgw-admin.yaml new file mode 100644 index 000000000..010a0647c --- /dev/null +++ b/qa/suites/rgw/singleton/all/radosgw-admin.yaml @@ -0,0 +1,21 @@ +roles: +- [mon.a, osd.0] +- [mgr.x, client.0, osd.1, osd.2, osd.3] +openstack: +- volumes: # attached to each instance + count: 3 + size: 10 # GB +tasks: +- install: +- ceph: + conf: + client: + debug ms: 1 + rgw gc obj min wait: 15 + osd: + debug ms: 1 + debug objclass : 20 +- rgw: + client.0: +- radosgw-admin: +- radosgw-admin-rest: diff --git a/qa/suites/rgw/singleton/frontend b/qa/suites/rgw/singleton/frontend new file mode 120000 index 000000000..926a53e83 --- /dev/null +++ b/qa/suites/rgw/singleton/frontend @@ -0,0 +1 @@ +.qa/rgw_frontend
\ No newline at end of file diff --git a/qa/suites/rgw/singleton/ignore-pg-availability.yaml b/qa/suites/rgw/singleton/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/singleton/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/singleton/objectstore b/qa/suites/rgw/singleton/objectstore new file mode 120000 index 000000000..f81d17413 --- /dev/null +++ b/qa/suites/rgw/singleton/objectstore @@ -0,0 +1 @@ +.qa/objectstore_cephfs
\ No newline at end of file diff --git a/qa/suites/rgw/singleton/overrides.yaml b/qa/suites/rgw/singleton/overrides.yaml new file mode 100644 index 000000000..d2aea790a --- /dev/null +++ b/qa/suites/rgw/singleton/overrides.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + wait-for-scrub: false + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 diff --git a/qa/suites/rgw/singleton/rgw_pool_type b/qa/suites/rgw/singleton/rgw_pool_type new file mode 120000 index 000000000..3bbd28e96 --- /dev/null +++ b/qa/suites/rgw/singleton/rgw_pool_type @@ -0,0 +1 @@ +.qa/rgw_pool_type
\ No newline at end of file diff --git a/qa/suites/rgw/singleton/supported-random-distro$ b/qa/suites/rgw/singleton/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rgw/singleton/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rgw/sts/% b/qa/suites/rgw/sts/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/sts/% diff --git a/qa/suites/rgw/sts/.qa b/qa/suites/rgw/sts/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/sts/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/sts/cluster.yaml b/qa/suites/rgw/sts/cluster.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rgw/sts/cluster.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/sts/ignore-pg-availability.yaml b/qa/suites/rgw/sts/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/sts/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/sts/objectstore.yaml b/qa/suites/rgw/sts/objectstore.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/rgw/sts/objectstore.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/sts/overrides.yaml b/qa/suites/rgw/sts/overrides.yaml new file mode 100644 index 000000000..1cb489072 --- /dev/null +++ b/qa/suites/rgw/sts/overrides.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + wait-for-scrub: false + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= + rgw crypt require ssl: false + rgw: + storage classes: LUKEWARM, FROZEN diff --git a/qa/suites/rgw/sts/pool-type.yaml b/qa/suites/rgw/sts/pool-type.yaml new file mode 120000 index 000000000..697b33b20 --- /dev/null +++ b/qa/suites/rgw/sts/pool-type.yaml @@ -0,0 +1 @@ +.qa/rgw_pool_type/replicated.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/sts/rgw_frontend b/qa/suites/rgw/sts/rgw_frontend new file mode 120000 index 000000000..926a53e83 --- /dev/null +++ b/qa/suites/rgw/sts/rgw_frontend @@ -0,0 +1 @@ +.qa/rgw_frontend
\ No newline at end of file diff --git a/qa/suites/rgw/sts/s3tests-branch.yaml b/qa/suites/rgw/sts/s3tests-branch.yaml new file mode 120000 index 000000000..bdcaca48a --- /dev/null +++ b/qa/suites/rgw/sts/s3tests-branch.yaml @@ -0,0 +1 @@ +.qa/rgw/s3tests-branch.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/sts/supported-random-distro$ b/qa/suites/rgw/sts/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rgw/sts/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rgw/sts/tasks/+ b/qa/suites/rgw/sts/tasks/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/sts/tasks/+ diff --git a/qa/suites/rgw/sts/tasks/.qa b/qa/suites/rgw/sts/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/sts/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/sts/tasks/0-install.yaml b/qa/suites/rgw/sts/tasks/0-install.yaml new file mode 100644 index 000000000..013ccbd26 --- /dev/null +++ b/qa/suites/rgw/sts/tasks/0-install.yaml @@ -0,0 +1,15 @@ +tasks: +- install: +- ceph: +- openssl_keys: +- rgw: + client.0: + +overrides: + ceph: + conf: + global: + osd_min_pg_log_entries: 10 + osd_max_pg_log_entries: 10 + client: + rgw lc debug interval: 10 diff --git a/qa/suites/rgw/sts/tasks/first.yaml b/qa/suites/rgw/sts/tasks/first.yaml new file mode 100644 index 000000000..db785ba73 --- /dev/null +++ b/qa/suites/rgw/sts/tasks/first.yaml @@ -0,0 +1,16 @@ +tasks: +- tox: [ client.0 ] +- keycloak: + client.0: + keycloak_version: 11.0.0 +- s3tests: + client.0: + sts_tests: True + rgw_server: client.0 + extra_attrs: ['webidentity_test'] +overrides: + ceph: + conf: + client: + rgw sts key: abcdefghijklmnop + rgw s3 auth use sts: true diff --git a/qa/suites/rgw/sts/tasks/ststests.yaml b/qa/suites/rgw/sts/tasks/ststests.yaml new file mode 100644 index 000000000..175071b89 --- /dev/null +++ b/qa/suites/rgw/sts/tasks/ststests.yaml @@ -0,0 +1,12 @@ +tasks: +- s3tests: + client.0: + sts_tests: True + extra_attrs: ["test_of_sts"] + rgw_server: client.0 +overrides: + ceph: + conf: + client: + rgw sts key: abcdefghijklmnop + rgw s3 auth use sts: true diff --git a/qa/suites/rgw/tempest/% b/qa/suites/rgw/tempest/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/tempest/% diff --git a/qa/suites/rgw/tempest/.qa b/qa/suites/rgw/tempest/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/tempest/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/tempest/clusters/.qa b/qa/suites/rgw/tempest/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/tempest/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/tempest/clusters/fixed-1.yaml b/qa/suites/rgw/tempest/clusters/fixed-1.yaml new file mode 120000 index 000000000..02df5dd0c --- /dev/null +++ b/qa/suites/rgw/tempest/clusters/fixed-1.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-1.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/tempest/frontend b/qa/suites/rgw/tempest/frontend new file mode 120000 index 000000000..926a53e83 --- /dev/null +++ b/qa/suites/rgw/tempest/frontend @@ -0,0 +1 @@ +.qa/rgw_frontend
\ No newline at end of file diff --git a/qa/suites/rgw/tempest/ignore-pg-availability.yaml b/qa/suites/rgw/tempest/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/tempest/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/tempest/overrides.yaml b/qa/suites/rgw/tempest/overrides.yaml new file mode 100644 index 000000000..e7a292ffd --- /dev/null +++ b/qa/suites/rgw/tempest/overrides.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 diff --git a/qa/suites/rgw/tempest/tasks/.qa b/qa/suites/rgw/tempest/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/tempest/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/tempest/tasks/rgw_tempest.yaml b/qa/suites/rgw/tempest/tasks/rgw_tempest.yaml new file mode 100644 index 000000000..ad9dc9dd5 --- /dev/null +++ b/qa/suites/rgw/tempest/tasks/rgw_tempest.yaml @@ -0,0 +1,72 @@ +tasks: +- install: +- ceph: +- tox: [ client.0 ] +- keystone: + client.0: + force-branch: stable/2023.1 + services: + - name: swift + type: object-store + description: Swift Service +- rgw: + client.0: + frontend_prefix: /swift + use-keystone-role: client.0 +- tempest: + client.0: + sha1: 34.1.0 + force-branch: master + use-keystone-role: client.0 + auth: + admin_username: admin + admin_project_name: admin + admin_password: ADMIN + admin_domain_name: Default + tempest_roles: admin + identity: + uri: http://{keystone_public_host}:{keystone_public_port}/v2.0/ + uri_v3: http://{keystone_public_host}:{keystone_public_port}/v3/ + auth_version: v3 + admin_role: admin + default_domain_name: Default + object-storage: + reseller_admin_role: admin + object-storage-feature-enabled: + container_sync: false + discoverability: true + # TODO(tobias-urdin): Use sha256 when supported in RadosGW + tempurl_digest_hashlib: sha1 + blocklist: + - .*test_account_quotas_negative.AccountQuotasNegativeTest.test_user_modify_quota + - .*test_container_acl_negative.ObjectACLsNegativeTest.* + - .*test_container_services_negative.ContainerNegativeTest.test_create_container_metadata_.* + - .*test_container_staticweb.StaticWebTest.test_web_index + - .*test_container_staticweb.StaticWebTest.test_web_listing_css + - .*test_container_synchronization.* + - .*test_object_services.PublicObjectTest.test_access_public_container_object_without_using_creds + - .*test_object_services.ObjectTest.test_create_object_with_transfer_encoding + - .*test_container_services.ContainerTest.test_create_container_with_remove_metadata_key + - .*test_container_services.ContainerTest.test_create_container_with_remove_metadata_value + - .*test_object_expiry.ObjectExpiryTest.test_get_object_after_expiry_time + - .*test_object_expiry.ObjectExpiryTest.test_get_object_at_expiry_time + - .*test_account_services.AccountTest.test_list_no_account_metadata + +overrides: + ceph: + conf: + global: + osd_min_pg_log_entries: 10 + osd_max_pg_log_entries: 10 + client: + rgw keystone api version: 3 + rgw keystone accepted roles: admin,member + rgw keystone implicit tenants: true + rgw keystone accepted admin roles: admin + rgw swift enforce content length: true + rgw swift account in url: true + rgw swift versioning enabled: true + rgw keystone admin domain: Default + rgw keystone admin user: admin + rgw keystone admin password: ADMIN + rgw keystone admin project: admin diff --git a/qa/suites/rgw/tempest/ubuntu_latest.yaml b/qa/suites/rgw/tempest/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/rgw/tempest/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/% b/qa/suites/rgw/thrash/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/thrash/% diff --git a/qa/suites/rgw/thrash/.qa b/qa/suites/rgw/thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/clusters/.qa b/qa/suites/rgw/thrash/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/thrash/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/clusters/fixed-2.yaml b/qa/suites/rgw/thrash/clusters/fixed-2.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rgw/thrash/clusters/fixed-2.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/frontend b/qa/suites/rgw/thrash/frontend new file mode 120000 index 000000000..926a53e83 --- /dev/null +++ b/qa/suites/rgw/thrash/frontend @@ -0,0 +1 @@ +.qa/rgw_frontend
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/ignore-pg-availability.yaml b/qa/suites/rgw/thrash/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/thrash/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/install.yaml b/qa/suites/rgw/thrash/install.yaml new file mode 100644 index 000000000..84a1d70cf --- /dev/null +++ b/qa/suites/rgw/thrash/install.yaml @@ -0,0 +1,5 @@ +tasks: +- install: +- ceph: +- rgw: [client.0] + diff --git a/qa/suites/rgw/thrash/objectstore b/qa/suites/rgw/thrash/objectstore new file mode 120000 index 000000000..f81d17413 --- /dev/null +++ b/qa/suites/rgw/thrash/objectstore @@ -0,0 +1 @@ +.qa/objectstore_cephfs
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/s3tests-branch.yaml b/qa/suites/rgw/thrash/s3tests-branch.yaml new file mode 120000 index 000000000..bdcaca48a --- /dev/null +++ b/qa/suites/rgw/thrash/s3tests-branch.yaml @@ -0,0 +1 @@ +.qa/rgw/s3tests-branch.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/thrasher/.qa b/qa/suites/rgw/thrash/thrasher/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/thrash/thrasher/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/thrasher/default.yaml b/qa/suites/rgw/thrash/thrasher/default.yaml new file mode 100644 index 000000000..1f35f1bc9 --- /dev/null +++ b/qa/suites/rgw/thrash/thrasher/default.yaml @@ -0,0 +1,9 @@ +tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgnum_shrink: 1 + chance_pgpnum_fix: 1 + op_delay: 30 + chance_test_min_size: 0 + ceph_objectstore_tool: false diff --git a/qa/suites/rgw/thrash/thrashosds-health.yaml b/qa/suites/rgw/thrash/thrashosds-health.yaml new file mode 120000 index 000000000..9124eb1aa --- /dev/null +++ b/qa/suites/rgw/thrash/thrashosds-health.yaml @@ -0,0 +1 @@ +.qa/tasks/thrashosds-health.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/ubuntu_latest.yaml b/qa/suites/rgw/thrash/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/rgw/thrash/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/workload/.qa b/qa/suites/rgw/thrash/workload/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/thrash/workload/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/thrash/workload/rgw_bucket_quota.yaml b/qa/suites/rgw/thrash/workload/rgw_bucket_quota.yaml new file mode 100644 index 000000000..bc441ff32 --- /dev/null +++ b/qa/suites/rgw/thrash/workload/rgw_bucket_quota.yaml @@ -0,0 +1,10 @@ +tasks: +- workunit: + clients: + client.0: + - rgw/s3_bucket_quota.pl +overrides: + ceph: + conf: + client: + rgw relaxed s3 bucket names: true diff --git a/qa/suites/rgw/thrash/workload/rgw_multipart_upload.yaml b/qa/suites/rgw/thrash/workload/rgw_multipart_upload.yaml new file mode 100644 index 000000000..1b98f2625 --- /dev/null +++ b/qa/suites/rgw/thrash/workload/rgw_multipart_upload.yaml @@ -0,0 +1,10 @@ +tasks: +- workunit: + clients: + client.0: + - rgw/s3_multipart_upload.pl +overrides: + ceph: + conf: + client: + rgw relaxed s3 bucket names: true diff --git a/qa/suites/rgw/thrash/workload/rgw_s3tests.yaml b/qa/suites/rgw/thrash/workload/rgw_s3tests.yaml new file mode 100644 index 000000000..6a59dc905 --- /dev/null +++ b/qa/suites/rgw/thrash/workload/rgw_s3tests.yaml @@ -0,0 +1,13 @@ +tasks: +- tox: [client.0] +- s3tests: + client.0: + rgw_server: client.0 +overrides: + ceph: + conf: + client: + rgw lc debug interval: 10 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= + rgw crypt require ssl: false diff --git a/qa/suites/rgw/thrash/workload/rgw_user_quota.yaml b/qa/suites/rgw/thrash/workload/rgw_user_quota.yaml new file mode 100644 index 000000000..75ba3d474 --- /dev/null +++ b/qa/suites/rgw/thrash/workload/rgw_user_quota.yaml @@ -0,0 +1,10 @@ +tasks: +- workunit: + clients: + client.0: + - rgw/s3_user_quota.pl +overrides: + ceph: + conf: + client: + rgw relaxed s3 bucket names: true diff --git a/qa/suites/rgw/tools/+ b/qa/suites/rgw/tools/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/tools/+ diff --git a/qa/suites/rgw/tools/.qa b/qa/suites/rgw/tools/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/tools/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/tools/centos_latest.yaml b/qa/suites/rgw/tools/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/rgw/tools/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/tools/cluster.yaml b/qa/suites/rgw/tools/cluster.yaml new file mode 100644 index 000000000..0eab7ebad --- /dev/null +++ b/qa/suites/rgw/tools/cluster.yaml @@ -0,0 +1,9 @@ +roles: +- [mon.a, osd.0, osd.1, osd.2, mgr.0, client.0] +openstack: +- volumes: # attached to each instance + count: 1 + size: 10 # GB +overrides: + rgw: + frontend: beast
\ No newline at end of file diff --git a/qa/suites/rgw/tools/ignore-pg-availability.yaml b/qa/suites/rgw/tools/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/tools/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/tools/tasks.yaml b/qa/suites/rgw/tools/tasks.yaml new file mode 100644 index 000000000..acceb21c8 --- /dev/null +++ b/qa/suites/rgw/tools/tasks.yaml @@ -0,0 +1,19 @@ +tasks: +- install: +- ceph: +- rgw: + client.0: + # force rgw_dns_name to be set with the fully qualified host name; + # it will be appended to the empty string + dns-name: '' +- workunit: + clients: + client.0: + - rgw/test_rgw_orphan_list.sh +overrides: + ceph: + conf: + client: + debug rgw: 20 + debug ms: 1 + rgw enable static website: false diff --git a/qa/suites/rgw/upgrade/% b/qa/suites/rgw/upgrade/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/upgrade/% diff --git a/qa/suites/rgw/upgrade/.qa b/qa/suites/rgw/upgrade/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/rgw/upgrade/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/1-install/.qa b/qa/suites/rgw/upgrade/1-install/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/1-install/pacific/% b/qa/suites/rgw/upgrade/1-install/pacific/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/pacific/% diff --git a/qa/suites/rgw/upgrade/1-install/pacific/.qa b/qa/suites/rgw/upgrade/1-install/pacific/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/pacific/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/1-install/pacific/distro$/.qa b/qa/suites/rgw/upgrade/1-install/pacific/distro$/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/pacific/distro$/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/1-install/pacific/distro$/centos_8.stream.yaml b/qa/suites/rgw/upgrade/1-install/pacific/distro$/centos_8.stream.yaml new file mode 120000 index 000000000..5dceec7e2 --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/pacific/distro$/centos_8.stream.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_8.stream.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/1-install/pacific/distro$/ubuntu_20.04.yaml b/qa/suites/rgw/upgrade/1-install/pacific/distro$/ubuntu_20.04.yaml new file mode 120000 index 000000000..29fb99ae2 --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/pacific/distro$/ubuntu_20.04.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_20.04.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/1-install/pacific/install.yaml b/qa/suites/rgw/upgrade/1-install/pacific/install.yaml new file mode 100644 index 000000000..ec89e7921 --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/pacific/install.yaml @@ -0,0 +1,5 @@ +tasks: +- install: + branch: pacific + exclude_packages: + - ceph-volume diff --git a/qa/suites/rgw/upgrade/1-install/pacific/overrides.yaml b/qa/suites/rgw/upgrade/1-install/pacific/overrides.yaml new file mode 100644 index 000000000..d882523e0 --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/pacific/overrides.yaml @@ -0,0 +1,3 @@ +overrides: + ragweed: + default-branch: ceph-reef # ceph-pacific doesn't have tox, but tests are the same diff --git a/qa/suites/rgw/upgrade/1-install/quincy/% b/qa/suites/rgw/upgrade/1-install/quincy/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/quincy/% diff --git a/qa/suites/rgw/upgrade/1-install/quincy/.qa b/qa/suites/rgw/upgrade/1-install/quincy/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/quincy/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/1-install/quincy/distro$/.qa b/qa/suites/rgw/upgrade/1-install/quincy/distro$/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/quincy/distro$/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/1-install/quincy/distro$/centos_latest.yaml b/qa/suites/rgw/upgrade/1-install/quincy/distro$/centos_latest.yaml new file mode 120000 index 000000000..bd9854e70 --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/quincy/distro$/centos_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/centos_latest.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/1-install/quincy/distro$/ubuntu_20.04.yaml b/qa/suites/rgw/upgrade/1-install/quincy/distro$/ubuntu_20.04.yaml new file mode 120000 index 000000000..29fb99ae2 --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/quincy/distro$/ubuntu_20.04.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_20.04.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/1-install/quincy/install.yaml b/qa/suites/rgw/upgrade/1-install/quincy/install.yaml new file mode 100644 index 000000000..a96f09ee7 --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/quincy/install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: + branch: quincy diff --git a/qa/suites/rgw/upgrade/1-install/quincy/overrides.yaml b/qa/suites/rgw/upgrade/1-install/quincy/overrides.yaml new file mode 100644 index 000000000..02508d329 --- /dev/null +++ b/qa/suites/rgw/upgrade/1-install/quincy/overrides.yaml @@ -0,0 +1,3 @@ +overrides: + ragweed: + default-branch: ceph-reef # ceph-quincy doesn't have tox, but tests are the same diff --git a/qa/suites/rgw/upgrade/2-setup.yaml b/qa/suites/rgw/upgrade/2-setup.yaml new file mode 100644 index 000000000..f6627398e --- /dev/null +++ b/qa/suites/rgw/upgrade/2-setup.yaml @@ -0,0 +1,8 @@ +tasks: +- ceph: +- rgw: [client.0, client.1] +- tox: [client.0, client.1] +- print: "installing upgraded packages" +- install.upgrade: + mon.a: + mon.b: diff --git a/qa/suites/rgw/upgrade/3-upgrade-sequence/osds-then-rgws.yaml b/qa/suites/rgw/upgrade/3-upgrade-sequence/osds-then-rgws.yaml new file mode 100644 index 000000000..22bfbc3d2 --- /dev/null +++ b/qa/suites/rgw/upgrade/3-upgrade-sequence/osds-then-rgws.yaml @@ -0,0 +1,27 @@ +tasks: +- print: "ragweed prepare before upgrade" +- ragweed: + client.0: + stages: prepare +- print: "restarting upgraded osds" +- ceph.restart: + daemons: [osd.0, osd.2] +- ceph.restart: + daemons: [osd.1, osd.3] +- ceph.restart: + daemons: [osd.4, osd.6] +- ceph.restart: + daemons: [osd.5, osd.7] +- print: "ragweed check/prepare after osd upgrade" +- ragweed: + client.0: + stages: check + client.1: + stages: prepare +- print: "restarting upgraded rgw" +- ceph.restart: + daemons: [rgw.*] +- print: "ragweed check after rgw upgrade" +- ragweed: + client.1: + stages: check diff --git a/qa/suites/rgw/upgrade/3-upgrade-sequence/rgws-then-osds.yaml b/qa/suites/rgw/upgrade/3-upgrade-sequence/rgws-then-osds.yaml new file mode 100644 index 000000000..662750bee --- /dev/null +++ b/qa/suites/rgw/upgrade/3-upgrade-sequence/rgws-then-osds.yaml @@ -0,0 +1,27 @@ +tasks: +- print: "ragweed prepare before upgrade" +- ragweed: + client.0: + stages: prepare +- print: "restarting upgraded rgws" +- ceph.restart: + daemons: [rgw.*] +- print: "ragweed check/prepare after rgw upgrade" +- ragweed: + client.0: + stages: check + client.1: + stages: prepare +- print: "restarting upgraded osds" +- ceph.restart: + daemons: [osd.0, osd.2] +- ceph.restart: + daemons: [osd.1, osd.3] +- ceph.restart: + daemons: [osd.4, osd.6] +- ceph.restart: + daemons: [osd.5, osd.7] +- print: "ragweed check after osd upgrade" +- ragweed: + client.1: + stages: check diff --git a/qa/suites/rgw/upgrade/cluster.yaml b/qa/suites/rgw/upgrade/cluster.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rgw/upgrade/cluster.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/frontend b/qa/suites/rgw/upgrade/frontend new file mode 120000 index 000000000..926a53e83 --- /dev/null +++ b/qa/suites/rgw/upgrade/frontend @@ -0,0 +1 @@ +.qa/rgw_frontend
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/ignore-pg-availability.yaml b/qa/suites/rgw/upgrade/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/upgrade/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/objectstore b/qa/suites/rgw/upgrade/objectstore new file mode 120000 index 000000000..f81d17413 --- /dev/null +++ b/qa/suites/rgw/upgrade/objectstore @@ -0,0 +1 @@ +.qa/objectstore_cephfs
\ No newline at end of file diff --git a/qa/suites/rgw/upgrade/overrides.yaml b/qa/suites/rgw/upgrade/overrides.yaml new file mode 100644 index 000000000..00592d459 --- /dev/null +++ b/qa/suites/rgw/upgrade/overrides.yaml @@ -0,0 +1,20 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) + - \(MGR_DOWN\) + - \(OSD_DOWN\) + - \(PG_AVAILABILITY\) + - \(PG_DEGRADED\) + - slow request + - failed to encode map + wait-for-scrub: false + conf: + mon: + mon warn on osd down out interval zero: false + osd: + osd min pg log entries: 1 + osd max pg log entries: 2 + ragweed: + rgw_server: client.0 diff --git a/qa/suites/rgw/verify/% b/qa/suites/rgw/verify/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/verify/% diff --git a/qa/suites/rgw/verify/.qa b/qa/suites/rgw/verify/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/verify/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/verify/0-install.yaml b/qa/suites/rgw/verify/0-install.yaml new file mode 100644 index 000000000..252dd1f7f --- /dev/null +++ b/qa/suites/rgw/verify/0-install.yaml @@ -0,0 +1,20 @@ +tasks: +- install: + # extra packages added for the rgw-datacache task + extra_system_packages: + deb: ['s3cmd'] + rpm: ['s3cmd'] +- ceph: +- openssl_keys: +- rgw: + client.0: +- tox: [client.0] + +overrides: + ceph: + conf: + global: + osd_min_pg_log_entries: 10 + osd_max_pg_log_entries: 10 + client: + rgw lc debug interval: 10 diff --git a/qa/suites/rgw/verify/clusters/.qa b/qa/suites/rgw/verify/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/verify/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/verify/clusters/fixed-2.yaml b/qa/suites/rgw/verify/clusters/fixed-2.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rgw/verify/clusters/fixed-2.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/verify/datacache/.qa b/qa/suites/rgw/verify/datacache/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/verify/datacache/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/verify/datacache/no_datacache.yaml b/qa/suites/rgw/verify/datacache/no_datacache.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/verify/datacache/no_datacache.yaml diff --git a/qa/suites/rgw/verify/datacache/rgw-datacache.yaml b/qa/suites/rgw/verify/datacache/rgw-datacache.yaml new file mode 100644 index 000000000..f5f8c94d5 --- /dev/null +++ b/qa/suites/rgw/verify/datacache/rgw-datacache.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + conf: + client: + rgw d3n l1 local datacache enabled: true + rgw enable ops log: true + rgw d3n l1 datacache persistent path: /tmp/rgw_datacache/ + rgw d3n l1 datacache size: 10737418240 + rgw: + datacache: true + datacache_path: /tmp/rgw_datacache +tasks: +- workunit: + clients: + client.0: + - rgw/run-datacache.sh + env: + RGW_DATACACHE_PATH: /tmp/rgw_datacache diff --git a/qa/suites/rgw/verify/frontend b/qa/suites/rgw/verify/frontend new file mode 120000 index 000000000..926a53e83 --- /dev/null +++ b/qa/suites/rgw/verify/frontend @@ -0,0 +1 @@ +.qa/rgw_frontend
\ No newline at end of file diff --git a/qa/suites/rgw/verify/ignore-pg-availability.yaml b/qa/suites/rgw/verify/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/verify/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/verify/inline-data$/off.yaml b/qa/suites/rgw/verify/inline-data$/off.yaml new file mode 100644 index 000000000..56d1bce10 --- /dev/null +++ b/qa/suites/rgw/verify/inline-data$/off.yaml @@ -0,0 +1,3 @@ +overrides: + rgw: + inline data: false diff --git a/qa/suites/rgw/verify/inline-data$/on.yaml b/qa/suites/rgw/verify/inline-data$/on.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/verify/inline-data$/on.yaml diff --git a/qa/suites/rgw/verify/msgr-failures/.qa b/qa/suites/rgw/verify/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/verify/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/verify/msgr-failures/few.yaml b/qa/suites/rgw/verify/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/rgw/verify/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/rgw/verify/objectstore b/qa/suites/rgw/verify/objectstore new file mode 120000 index 000000000..f81d17413 --- /dev/null +++ b/qa/suites/rgw/verify/objectstore @@ -0,0 +1 @@ +.qa/objectstore_cephfs
\ No newline at end of file diff --git a/qa/suites/rgw/verify/overrides.yaml b/qa/suites/rgw/verify/overrides.yaml new file mode 100644 index 000000000..aefee7b70 --- /dev/null +++ b/qa/suites/rgw/verify/overrides.yaml @@ -0,0 +1,13 @@ +overrides: + ceph: + conf: + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= + rgw crypt require ssl: false + rgw: + compression type: random + storage classes: LUKEWARM, FROZEN diff --git a/qa/suites/rgw/verify/proto/.qa b/qa/suites/rgw/verify/proto/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/verify/proto/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/verify/proto/http.yaml b/qa/suites/rgw/verify/proto/http.yaml new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/verify/proto/http.yaml diff --git a/qa/suites/rgw/verify/proto/https.yaml b/qa/suites/rgw/verify/proto/https.yaml new file mode 100644 index 000000000..e0742b5b0 --- /dev/null +++ b/qa/suites/rgw/verify/proto/https.yaml @@ -0,0 +1,14 @@ +overrides: + openssl_keys: + root: + client: client.0 + key-type: rsa:4096 + cn: teuthology + install: [client.0] + rgw.client.0: + client: client.0 + ca: root + embed-key: true + rgw: + client.0: + ssl certificate: rgw.client.0 diff --git a/qa/suites/rgw/verify/rgw_pool_type b/qa/suites/rgw/verify/rgw_pool_type new file mode 120000 index 000000000..3bbd28e96 --- /dev/null +++ b/qa/suites/rgw/verify/rgw_pool_type @@ -0,0 +1 @@ +.qa/rgw_pool_type
\ No newline at end of file diff --git a/qa/suites/rgw/verify/s3tests-branch.yaml b/qa/suites/rgw/verify/s3tests-branch.yaml new file mode 120000 index 000000000..bdcaca48a --- /dev/null +++ b/qa/suites/rgw/verify/s3tests-branch.yaml @@ -0,0 +1 @@ +.qa/rgw/s3tests-branch.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/verify/sharding$ b/qa/suites/rgw/verify/sharding$ new file mode 120000 index 000000000..148393cbf --- /dev/null +++ b/qa/suites/rgw/verify/sharding$ @@ -0,0 +1 @@ +.qa/rgw_bucket_sharding
\ No newline at end of file diff --git a/qa/suites/rgw/verify/striping$/.qa b/qa/suites/rgw/verify/striping$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/verify/striping$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/verify/striping$/stripe-equals-chunk.yaml b/qa/suites/rgw/verify/striping$/stripe-equals-chunk.yaml new file mode 100644 index 000000000..9b3e20a81 --- /dev/null +++ b/qa/suites/rgw/verify/striping$/stripe-equals-chunk.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + client: + # use default values where chunk-size=stripe-size + #rgw max chunk size: 4194304 + #rgw obj stripe size: 4194304 diff --git a/qa/suites/rgw/verify/striping$/stripe-greater-than-chunk.yaml b/qa/suites/rgw/verify/striping$/stripe-greater-than-chunk.yaml new file mode 100644 index 000000000..3bf40d6da --- /dev/null +++ b/qa/suites/rgw/verify/striping$/stripe-greater-than-chunk.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + client: + rgw max chunk size: 4194304 + # stripe size greater than (and not a multiple of) chunk size + rgw obj stripe size: 6291456 diff --git a/qa/suites/rgw/verify/supported-random-distro$ b/qa/suites/rgw/verify/supported-random-distro$ new file mode 120000 index 000000000..0862b4457 --- /dev/null +++ b/qa/suites/rgw/verify/supported-random-distro$ @@ -0,0 +1 @@ +.qa/distros/supported-random-distro$
\ No newline at end of file diff --git a/qa/suites/rgw/verify/tasks/+ b/qa/suites/rgw/verify/tasks/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/verify/tasks/+ diff --git a/qa/suites/rgw/verify/tasks/.qa b/qa/suites/rgw/verify/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/verify/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/verify/tasks/bucket-check.yaml b/qa/suites/rgw/verify/tasks/bucket-check.yaml new file mode 100644 index 000000000..4955d41c6 --- /dev/null +++ b/qa/suites/rgw/verify/tasks/bucket-check.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rgw/run-bucket-check.sh diff --git a/qa/suites/rgw/verify/tasks/cls.yaml b/qa/suites/rgw/verify/tasks/cls.yaml new file mode 100644 index 000000000..936c489bf --- /dev/null +++ b/qa/suites/rgw/verify/tasks/cls.yaml @@ -0,0 +1,16 @@ +tasks: +- workunit: + clients: + client.0: + - cls/test_cls_lock.sh + - cls/test_cls_log.sh + - cls/test_cls_refcount.sh + - cls/test_cls_rgw.sh + - cls/test_cls_rgw_gc.sh + - cls/test_cls_rgw_stats.sh + - cls/test_cls_cmpomap.sh + - cls/test_cls_2pc_queue.sh + - rgw/test_rgw_gc_log.sh + - rgw/test_rgw_obj.sh + - rgw/test_rgw_throttle.sh + - rgw/test_librgw_file.sh diff --git a/qa/suites/rgw/verify/tasks/mp_reupload.yaml b/qa/suites/rgw/verify/tasks/mp_reupload.yaml new file mode 100644 index 000000000..d817a1c35 --- /dev/null +++ b/qa/suites/rgw/verify/tasks/mp_reupload.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rgw/test_rgw_s3_mp_reupload.sh diff --git a/qa/suites/rgw/verify/tasks/ragweed.yaml b/qa/suites/rgw/verify/tasks/ragweed.yaml new file mode 100644 index 000000000..6ac8f29a7 --- /dev/null +++ b/qa/suites/rgw/verify/tasks/ragweed.yaml @@ -0,0 +1,6 @@ +tasks: +- ragweed: + client.0: + default-branch: ceph-reef + rgw_server: client.0 + stages: prepare,check diff --git a/qa/suites/rgw/verify/tasks/reshard.yaml b/qa/suites/rgw/verify/tasks/reshard.yaml new file mode 100644 index 000000000..db65af36a --- /dev/null +++ b/qa/suites/rgw/verify/tasks/reshard.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rgw/run-reshard.sh diff --git a/qa/suites/rgw/verify/tasks/s3tests-java.yaml b/qa/suites/rgw/verify/tasks/s3tests-java.yaml new file mode 100644 index 000000000..722d78f8a --- /dev/null +++ b/qa/suites/rgw/verify/tasks/s3tests-java.yaml @@ -0,0 +1,6 @@ +tasks: +- s3tests-java: + client.0: + force-branch: ceph-reef + force-repo: https://github.com/ceph/java_s3tests.git + diff --git a/qa/suites/rgw/verify/tasks/s3tests.yaml b/qa/suites/rgw/verify/tasks/s3tests.yaml new file mode 100644 index 000000000..573cffbc3 --- /dev/null +++ b/qa/suites/rgw/verify/tasks/s3tests.yaml @@ -0,0 +1,4 @@ +tasks: +- s3tests: + client.0: + rgw_server: client.0 diff --git a/qa/suites/rgw/verify/tasks/versioning.yaml b/qa/suites/rgw/verify/tasks/versioning.yaml new file mode 100644 index 000000000..ab928f270 --- /dev/null +++ b/qa/suites/rgw/verify/tasks/versioning.yaml @@ -0,0 +1,5 @@ +tasks: +- workunit: + clients: + client.0: + - rgw/run-versioning.sh diff --git a/qa/suites/rgw/verify/validater/.qa b/qa/suites/rgw/verify/validater/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/verify/validater/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/verify/validater/lockdep.yaml b/qa/suites/rgw/verify/validater/lockdep.yaml new file mode 100644 index 000000000..941fe12b1 --- /dev/null +++ b/qa/suites/rgw/verify/validater/lockdep.yaml @@ -0,0 +1,7 @@ +overrides: + ceph: + conf: + osd: + lockdep: true + mon: + lockdep: true diff --git a/qa/suites/rgw/verify/validater/valgrind.yaml b/qa/suites/rgw/verify/validater/valgrind.yaml new file mode 100644 index 000000000..898067e68 --- /dev/null +++ b/qa/suites/rgw/verify/validater/valgrind.yaml @@ -0,0 +1,21 @@ +overrides: + install: + ceph: + #debuginfo: true + rgw: + client.0: + valgrind: [--tool=memcheck, --max-threads=1024] # http://tracker.ceph.com/issues/25214 + ceph: + conf: + global: + osd heartbeat grace: 40 + mon: + mon osd crush smoke test: false + osd: + osd fast shutdown: false +# valgrind: +# mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes] +# osd: [--tool=memcheck] +# mds: [--tool=memcheck] +## https://tracker.ceph.com/issues/38621 +## mgr: [--tool=memcheck] diff --git a/qa/suites/rgw/website/% b/qa/suites/rgw/website/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rgw/website/% diff --git a/qa/suites/rgw/website/.qa b/qa/suites/rgw/website/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/website/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/website/clusters/.qa b/qa/suites/rgw/website/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/website/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/website/clusters/fixed-2.yaml b/qa/suites/rgw/website/clusters/fixed-2.yaml new file mode 120000 index 000000000..230ff0fda --- /dev/null +++ b/qa/suites/rgw/website/clusters/fixed-2.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-2.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/website/frontend b/qa/suites/rgw/website/frontend new file mode 120000 index 000000000..926a53e83 --- /dev/null +++ b/qa/suites/rgw/website/frontend @@ -0,0 +1 @@ +.qa/rgw_frontend
\ No newline at end of file diff --git a/qa/suites/rgw/website/http.yaml b/qa/suites/rgw/website/http.yaml new file mode 100644 index 000000000..24cb6fc5d --- /dev/null +++ b/qa/suites/rgw/website/http.yaml @@ -0,0 +1 @@ +# https tests would need to generate wildcard certificates; only test http for now diff --git a/qa/suites/rgw/website/ignore-pg-availability.yaml b/qa/suites/rgw/website/ignore-pg-availability.yaml new file mode 120000 index 000000000..32340b1fa --- /dev/null +++ b/qa/suites/rgw/website/ignore-pg-availability.yaml @@ -0,0 +1 @@ +.qa/rgw/ignore-pg-availability.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/website/overrides.yaml b/qa/suites/rgw/website/overrides.yaml new file mode 100644 index 000000000..80397571e --- /dev/null +++ b/qa/suites/rgw/website/overrides.yaml @@ -0,0 +1,26 @@ +overrides: + install: + ceph: + conf: + global: + osd_min_pg_log_entries: 10 + osd_max_pg_log_entries: 10 + client: + setuser: ceph + setgroup: ceph + debug rgw: 20 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= + rgw crypt require ssl: false + rgw enable static website: True + client.0: + rgw lc debug interval: 10 + client.1: + rgw enable apis: s3website + rgw: + client.0: + valgrind: [--tool=memcheck, --max-threads=1024] # http://tracker.ceph.com/issues/25214 + client.1: + valgrind: [--tool=memcheck, --max-threads=1024] # http://tracker.ceph.com/issues/25214 + s3tests: + calling-format: subdomain diff --git a/qa/suites/rgw/website/s3tests-branch.yaml b/qa/suites/rgw/website/s3tests-branch.yaml new file mode 120000 index 000000000..bdcaca48a --- /dev/null +++ b/qa/suites/rgw/website/s3tests-branch.yaml @@ -0,0 +1 @@ +.qa/rgw/s3tests-branch.yaml
\ No newline at end of file diff --git a/qa/suites/rgw/website/tasks/.qa b/qa/suites/rgw/website/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rgw/website/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rgw/website/tasks/s3tests-website.yaml b/qa/suites/rgw/website/tasks/s3tests-website.yaml new file mode 100644 index 000000000..da10a6f1c --- /dev/null +++ b/qa/suites/rgw/website/tasks/s3tests-website.yaml @@ -0,0 +1,17 @@ +tasks: +- install: +- ceph: +- dnsmasq: + client.0: + s3.: client.0 + s3-website.: client.1 +- rgw: + client.0: + dns-name: s3. + client.1: + dns-s3website-name: s3-website. +- tox: [client.0] +- s3tests: + client.0: + rgw_server: client.0 + rgw_website_server: client.1 diff --git a/qa/suites/rgw/website/ubuntu_latest.yaml b/qa/suites/rgw/website/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/rgw/website/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/suites/samba/% b/qa/suites/samba/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/samba/% diff --git a/qa/suites/samba/.qa b/qa/suites/samba/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/samba/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/samba/clusters/.qa b/qa/suites/samba/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/samba/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/samba/clusters/samba-basic.yaml b/qa/suites/samba/clusters/samba-basic.yaml new file mode 100644 index 000000000..af432f610 --- /dev/null +++ b/qa/suites/samba/clusters/samba-basic.yaml @@ -0,0 +1,7 @@ +roles: +- [mon.a, mon.b, mon.c, mgr.x, mds.a, osd.0, osd.1] +- [samba.0, client.0, client.1] +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB diff --git a/qa/suites/samba/install/.qa b/qa/suites/samba/install/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/samba/install/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/samba/install/install.yaml b/qa/suites/samba/install/install.yaml new file mode 100644 index 000000000..c53f9c55b --- /dev/null +++ b/qa/suites/samba/install/install.yaml @@ -0,0 +1,9 @@ +# we currently can't install Samba on RHEL; need a gitbuilder and code updates +os_type: ubuntu + +tasks: +- install: +- install: + project: samba + extra_packages: ['samba'] +- ceph: diff --git a/qa/suites/samba/mount/.qa b/qa/suites/samba/mount/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/samba/mount/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/samba/mount/fuse.yaml b/qa/suites/samba/mount/fuse.yaml new file mode 100644 index 000000000..d00ffdb48 --- /dev/null +++ b/qa/suites/samba/mount/fuse.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: [client.0] +- samba: + samba.0: + ceph: "{testdir}/mnt.0" + diff --git a/qa/suites/samba/mount/kclient.yaml b/qa/suites/samba/mount/kclient.yaml new file mode 100644 index 000000000..8baa09f8b --- /dev/null +++ b/qa/suites/samba/mount/kclient.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +kernel: + client: + branch: testing +tasks: +- kclient: [client.0] +- samba: + samba.0: + ceph: "{testdir}/mnt.0" + diff --git a/qa/suites/samba/mount/native.yaml b/qa/suites/samba/mount/native.yaml new file mode 100644 index 000000000..09b8c1c4e --- /dev/null +++ b/qa/suites/samba/mount/native.yaml @@ -0,0 +1,2 @@ +tasks: +- samba: diff --git a/qa/suites/samba/mount/noceph.yaml b/qa/suites/samba/mount/noceph.yaml new file mode 100644 index 000000000..3cad4740d --- /dev/null +++ b/qa/suites/samba/mount/noceph.yaml @@ -0,0 +1,5 @@ +tasks: +- localdir: [client.0] +- samba: + samba.0: + ceph: "{testdir}/mnt.0" diff --git a/qa/suites/samba/objectstore b/qa/suites/samba/objectstore new file mode 120000 index 000000000..c40bd3261 --- /dev/null +++ b/qa/suites/samba/objectstore @@ -0,0 +1 @@ +.qa/objectstore
\ No newline at end of file diff --git a/qa/suites/samba/workload/.qa b/qa/suites/samba/workload/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/samba/workload/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/samba/workload/cifs-dbench.yaml b/qa/suites/samba/workload/cifs-dbench.yaml new file mode 100644 index 000000000..c13c1c099 --- /dev/null +++ b/qa/suites/samba/workload/cifs-dbench.yaml @@ -0,0 +1,8 @@ +tasks: +- cifs-mount: + client.1: + share: ceph +- workunit: + clients: + client.1: + - suites/dbench.sh diff --git a/qa/suites/samba/workload/cifs-fsstress.yaml b/qa/suites/samba/workload/cifs-fsstress.yaml new file mode 100644 index 000000000..ff003af34 --- /dev/null +++ b/qa/suites/samba/workload/cifs-fsstress.yaml @@ -0,0 +1,8 @@ +tasks: +- cifs-mount: + client.1: + share: ceph +- workunit: + clients: + client.1: + - suites/fsstress.sh diff --git a/qa/suites/samba/workload/cifs-kernel-build.yaml.disabled b/qa/suites/samba/workload/cifs-kernel-build.yaml.disabled new file mode 100644 index 000000000..ab9ff8ac7 --- /dev/null +++ b/qa/suites/samba/workload/cifs-kernel-build.yaml.disabled @@ -0,0 +1,9 @@ +tasks: +- cifs-mount: + client.1: + share: ceph +- workunit: + clients: + client.1: + - kernel_untar_build.sh + diff --git a/qa/suites/samba/workload/smbtorture.yaml b/qa/suites/samba/workload/smbtorture.yaml new file mode 100644 index 000000000..823489a20 --- /dev/null +++ b/qa/suites/samba/workload/smbtorture.yaml @@ -0,0 +1,39 @@ +tasks: +- pexec: + client.1: + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.lock + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.fdpass + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.unlink + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.attr + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.trans2 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.negnowait + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.dir1 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny1 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny2 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.deny3 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.denydos + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.ntdeny1 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.ntdeny2 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.tcon + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.tcondev + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.vuid + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.rw1 + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.open + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.defer_open + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.xcopy + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.rename + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.properties + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.mangle + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.openattr + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.chkpath + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.secleak + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.disconnect + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.samba3error + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.smb +# - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-holdcon +# - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-holdopen + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-readwrite + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.bench-torture + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-pipe_number + - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-ioctl +# - /usr/local/samba/bin/smbtorture --password=ubuntu //localhost/ceph base.scan-maxfid diff --git a/qa/suites/smoke/.qa b/qa/suites/smoke/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/smoke/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/smoke/basic/% b/qa/suites/smoke/basic/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/smoke/basic/% diff --git a/qa/suites/smoke/basic/.qa b/qa/suites/smoke/basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/smoke/basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/smoke/basic/clusters/+ b/qa/suites/smoke/basic/clusters/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/smoke/basic/clusters/+ diff --git a/qa/suites/smoke/basic/clusters/.qa b/qa/suites/smoke/basic/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/smoke/basic/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/smoke/basic/clusters/fixed-3-cephfs.yaml b/qa/suites/smoke/basic/clusters/fixed-3-cephfs.yaml new file mode 120000 index 000000000..24480dfc7 --- /dev/null +++ b/qa/suites/smoke/basic/clusters/fixed-3-cephfs.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3-cephfs.yaml
\ No newline at end of file diff --git a/qa/suites/smoke/basic/clusters/openstack.yaml b/qa/suites/smoke/basic/clusters/openstack.yaml new file mode 100644 index 000000000..7d652b491 --- /dev/null +++ b/qa/suites/smoke/basic/clusters/openstack.yaml @@ -0,0 +1,8 @@ +openstack: + - machine: + disk: 40 # GB + ram: 8000 # MB + cpus: 1 + volumes: # attached to each instance + count: 4 + size: 10 # GB diff --git a/qa/suites/smoke/basic/objectstore/.qa b/qa/suites/smoke/basic/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/smoke/basic/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/smoke/basic/objectstore/bluestore-bitmap.yaml b/qa/suites/smoke/basic/objectstore/bluestore-bitmap.yaml new file mode 120000 index 000000000..66cf2bc75 --- /dev/null +++ b/qa/suites/smoke/basic/objectstore/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore_debug/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/smoke/basic/s3tests-branch.yaml b/qa/suites/smoke/basic/s3tests-branch.yaml new file mode 120000 index 000000000..bdcaca48a --- /dev/null +++ b/qa/suites/smoke/basic/s3tests-branch.yaml @@ -0,0 +1 @@ +.qa/rgw/s3tests-branch.yaml
\ No newline at end of file diff --git a/qa/suites/smoke/basic/supported-all-distro b/qa/suites/smoke/basic/supported-all-distro new file mode 120000 index 000000000..ca82dde58 --- /dev/null +++ b/qa/suites/smoke/basic/supported-all-distro @@ -0,0 +1 @@ +.qa/distros/supported-all-distro
\ No newline at end of file diff --git a/qa/suites/smoke/basic/tasks/% b/qa/suites/smoke/basic/tasks/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/smoke/basic/tasks/% diff --git a/qa/suites/smoke/basic/tasks/.qa b/qa/suites/smoke/basic/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/smoke/basic/tasks/0-install.yaml b/qa/suites/smoke/basic/tasks/0-install.yaml new file mode 100644 index 000000000..ceffc50d8 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/0-install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: + cleanup: true diff --git a/qa/suites/smoke/basic/tasks/test/.qa b/qa/suites/smoke/basic/tasks/test/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_blogbench.yaml b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_blogbench.yaml new file mode 100644 index 000000000..bc40416ff --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_blogbench.yaml @@ -0,0 +1,10 @@ +tasks: +- ceph: + fs: xfs + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- ceph-fuse: +- workunit: + clients: + all: + - suites/blogbench.sh diff --git a/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_fsstress.yaml b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_fsstress.yaml new file mode 100644 index 000000000..e21286d59 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_fsstress.yaml @@ -0,0 +1,9 @@ +tasks: +- ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- ceph-fuse: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_iozone.yaml b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_iozone.yaml new file mode 100644 index 000000000..871606ab8 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_iozone.yaml @@ -0,0 +1,9 @@ +tasks: +- ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- ceph-fuse: [client.0] +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_pjd.yaml b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_pjd.yaml new file mode 100644 index 000000000..0f4469c93 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/cfuse_workunit_suites_pjd.yaml @@ -0,0 +1,18 @@ +tasks: +- ceph: + fs: xfs + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) + conf: + mds: + debug mds: 20 + debug ms: 1 + client: + debug client: 20 + debug ms: 1 + fuse set user groups: true +- ceph-fuse: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/smoke/basic/tasks/test/kclient_workunit_direct_io.yaml b/qa/suites/smoke/basic/tasks/test/kclient_workunit_direct_io.yaml new file mode 100644 index 000000000..3720d418c --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/kclient_workunit_direct_io.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- kclient: +- workunit: + clients: + all: + - direct_io diff --git a/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_dbench.yaml b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_dbench.yaml new file mode 100644 index 000000000..256d1f1fe --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_dbench.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- ceph: + fs: xfs + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- kclient: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_fsstress.yaml b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_fsstress.yaml new file mode 100644 index 000000000..649ea8e14 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_fsstress.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- ceph: + fs: xfs + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- kclient: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_pjd.yaml b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_pjd.yaml new file mode 100644 index 000000000..7dea45b80 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/kclient_workunit_suites_pjd.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false +tasks: +- ceph: + fs: xfs + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- kclient: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/smoke/basic/tasks/test/libcephfs_interface_tests.yaml b/qa/suites/smoke/basic/tasks/test/libcephfs_interface_tests.yaml new file mode 100644 index 000000000..3be975b6b --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/libcephfs_interface_tests.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + conf: + client: + debug ms: 1 + debug client: 20 + mds: + debug ms: 1 + debug mds: 20 +tasks: +- ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- ceph-fuse: +- workunit: + clients: + client.0: + - libcephfs/test.sh diff --git a/qa/suites/smoke/basic/tasks/test/mon_thrash.yaml b/qa/suites/smoke/basic/tasks/test/mon_thrash.yaml new file mode 100644 index 000000000..9aa6a5f1c --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/mon_thrash.yaml @@ -0,0 +1,39 @@ +overrides: + ceph: + log-ignorelist: + - reached quota + - mons down + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(SMALLER_PGP_NUM\) + - \(OBJECT_ + - \(SLOW_OPS\) + - \(TOO_FEW_PGS\) + - \(OSD_SLOW_PING_TIME + - slow request + conf: + global: + ms inject delay max: 1 + ms inject delay probability: 0.005 + ms inject delay type: mon + ms inject internal delays: 0.002 + ms inject socket failures: 2500 + mon client directed command retry: 5 + osd: + osd class load list: "*" + osd class default list: "*" +tasks: +- ceph: + fs: xfs +- mon_thrash: + revive_delay: 90 + thrash_delay: 1 + thrash_many: true +- workunit: + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/smoke/basic/tasks/test/rados_api_tests.yaml b/qa/suites/smoke/basic/tasks/test/rados_api_tests.yaml new file mode 100644 index 000000000..d81428aba --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rados_api_tests.yaml @@ -0,0 +1,32 @@ +tasks: +- ceph: + fs: ext4 + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(SMALLER_PGP_NUM\) + - \(OBJECT_ + - \(SLOW_OPS\) + - \(TOO_FEW_PGS\) + - reached quota + - but it is still running + - slow request + conf: + mon: + mon warn on pool no app: false + osd: + osd class load list: "*" + osd class default list: "*" +- thrashosds: + chance_pgnum_grow: 2 + chance_pgnum_shrink: 2 + chance_pgpnum_fix: 1 + timeout: 1200 +- workunit: + clients: + client.0: + - rados/test.sh diff --git a/qa/suites/smoke/basic/tasks/test/rados_bench.yaml b/qa/suites/smoke/basic/tasks/test/rados_bench.yaml new file mode 100644 index 000000000..ae8862e1c --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rados_bench.yaml @@ -0,0 +1,47 @@ +overrides: + ceph: + conf: + global: + ms inject delay max: 1 + ms inject delay probability: 0.005 + ms inject delay type: osd + ms inject internal delays: 0.002 + ms inject socket failures: 2500 + mon client directed command retry: 5 +tasks: +- ceph: + fs: xfs + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(SMALLER_PGP_NUM\) + - \(OBJECT_ + - \(SLOW_OPS\) + - \(TOO_FEW_PGS\) + - \(OSD_SLOW_PING_TIME + - slow request +- thrashosds: + chance_pgnum_grow: 2 + chance_pgnum_shrink: 2 + chance_pgpnum_fix: 1 + timeout: 1200 +- full_sequential: + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 + - radosbench: + clients: [client.0] + time: 150 diff --git a/qa/suites/smoke/basic/tasks/test/rados_cache_snaps.yaml b/qa/suites/smoke/basic/tasks/test/rados_cache_snaps.yaml new file mode 100644 index 000000000..7178f6824 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rados_cache_snaps.yaml @@ -0,0 +1,50 @@ +tasks: +- ceph: + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(SMALLER_PGP_NUM\) + - \(OBJECT_ + - \(SLOW_OPS\) + - \(TOO_FEW_PGS\) + - slow request +- thrashosds: + chance_pgnum_grow: 2 + chance_pgnum_shrink: 2 + chance_pgpnum_fix: 1 + timeout: 1200 +- exec: + client.0: + - sudo ceph osd pool create base 4 + - sudo ceph osd pool application enable base rados + - sudo ceph osd pool create cache 4 + - sudo ceph osd tier add base cache + - sudo ceph osd tier cache-mode cache writeback + - sudo ceph osd tier set-overlay base cache + - sudo ceph osd pool set cache hit_set_type bloom + - sudo ceph osd pool set cache hit_set_count 8 + - sudo ceph osd pool set cache hit_set_period 3600 + - sudo ceph osd pool set cache target_max_objects 250 +- rados: + clients: + - client.0 + objects: 500 + op_weights: + copy_from: 50 + delete: 50 + cache_evict: 50 + cache_flush: 50 + read: 100 + rollback: 50 + snap_create: 50 + snap_remove: 50 + cache_try_flush: 50 + write: 100 + ops: 4000 + pool_snaps: true + pools: + - base diff --git a/qa/suites/smoke/basic/tasks/test/rados_cls_all.yaml b/qa/suites/smoke/basic/tasks/test/rados_cls_all.yaml new file mode 100644 index 000000000..c4d55728c --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rados_cls_all.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + conf: + osd: + osd_class_load_list: "*" + osd_class_default_list: "*" +tasks: +- ceph: + fs: xfs + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- workunit: + clients: + client.0: + - cls diff --git a/qa/suites/smoke/basic/tasks/test/rados_ec_snaps.yaml b/qa/suites/smoke/basic/tasks/test/rados_ec_snaps.yaml new file mode 100644 index 000000000..5ee4a7ad9 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rados_ec_snaps.yaml @@ -0,0 +1,40 @@ +tasks: +- ceph: + fs: xfs + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(SMALLER_PGP_NUM\) + - \(OBJECT_ + - \(SLOW_OPS\) + - \(TOO_FEW_PGS\) + - slow request +- thrashosds: + chance_pgnum_grow: 3 + chance_pgnum_shrink: 2 + chance_pgpnum_fix: 1 + timeout: 1200 +- rados: + clients: + - client.0 + ec_pool: true + max_in_flight: 64 + max_seconds: 600 + objects: 1024 + op_weights: + append: 100 + copy_from: 50 + delete: 50 + read: 100 + rmattr: 25 + rollback: 50 + setattr: 25 + snap_create: 50 + snap_remove: 50 + write: 0 + ops: 400000 + size: 16384 diff --git a/qa/suites/smoke/basic/tasks/test/rados_python.yaml b/qa/suites/smoke/basic/tasks/test/rados_python.yaml new file mode 100644 index 000000000..630aa567f --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rados_python.yaml @@ -0,0 +1,21 @@ +overrides: + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- ceph: + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(PG_ + - \(OSD_ + - \(OBJECT_ + - \(POOL_APP_NOT_ENABLED\) +- ceph-fuse: +- workunit: + timeout: 1h + clients: + client.0: + - rados/test_python.sh diff --git a/qa/suites/smoke/basic/tasks/test/rados_workunit_loadgen_mix.yaml b/qa/suites/smoke/basic/tasks/test/rados_workunit_loadgen_mix.yaml new file mode 100644 index 000000000..455d6ae87 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rados_workunit_loadgen_mix.yaml @@ -0,0 +1,12 @@ +tasks: +- ceph: + fs: ext4 + log-ignorelist: + - but it is still running + - overall HEALTH_ + - \(POOL_APP_NOT_ENABLED\) +- ceph-fuse: +- workunit: + clients: + all: + - rados/load-gen-mix.sh diff --git a/qa/suites/smoke/basic/tasks/test/rbd_api_tests.yaml b/qa/suites/smoke/basic/tasks/test/rbd_api_tests.yaml new file mode 100644 index 000000000..cbd0fb27c --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rbd_api_tests.yaml @@ -0,0 +1,18 @@ +tasks: +- ceph: + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - is full \(reached quota + fs: xfs +- ceph-fuse: +- workunit: + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/smoke/basic/tasks/test/rbd_cli_import_export.yaml b/qa/suites/smoke/basic/tasks/test/rbd_cli_import_export.yaml new file mode 100644 index 000000000..79ff9418d --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rbd_cli_import_export.yaml @@ -0,0 +1,12 @@ +tasks: +- ceph: + fs: xfs + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- ceph-fuse: +- workunit: + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format diff --git a/qa/suites/smoke/basic/tasks/test/rbd_fsx.yaml b/qa/suites/smoke/basic/tasks/test/rbd_fsx.yaml new file mode 100644 index 000000000..92bdea280 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rbd_fsx.yaml @@ -0,0 +1,30 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(SMALLER_PGP_NUM\) + - \(OBJECT_ + - \(SLOW_OPS\) + - \(TOO_FEW_PGS\) + - \(OSD_SLOW_PING_TIME + - slow request + conf: + client: + rbd cache: true + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 +tasks: +- ceph: + fs: xfs +- thrashosds: + timeout: 1200 +- rbd_fsx: + clients: + - client.0 + ops: 2000 diff --git a/qa/suites/smoke/basic/tasks/test/rbd_python_api_tests.yaml b/qa/suites/smoke/basic/tasks/test/rbd_python_api_tests.yaml new file mode 100644 index 000000000..73e64bb2c --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rbd_python_api_tests.yaml @@ -0,0 +1,16 @@ +overrides: + install: + ceph: + extra_system_packages: + - python3-pytest +tasks: +- ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- ceph-fuse: +- workunit: + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "1" diff --git a/qa/suites/smoke/basic/tasks/test/rbd_workunit_suites_iozone.yaml b/qa/suites/smoke/basic/tasks/test/rbd_workunit_suites_iozone.yaml new file mode 100644 index 000000000..8602447aa --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rbd_workunit_suites_iozone.yaml @@ -0,0 +1,18 @@ +overrides: + ceph: + conf: + global: + ms die on skipped message: false + client: + rbd default features: 5 +tasks: +- ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- rbd: + all: + image_size: 20480 +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/smoke/basic/tasks/test/rgw_ec_s3tests.yaml b/qa/suites/smoke/basic/tasks/test/rgw_ec_s3tests.yaml new file mode 100644 index 000000000..3214fd900 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rgw_ec_s3tests.yaml @@ -0,0 +1,21 @@ +overrides: + rgw: + ec-data-pool: true + cache-pools: true +tasks: +- ceph: + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- rgw: [client.0] +- tox: [client.0] +- s3tests: + client.0: + rgw_server: client.0 +overrides: + ceph: + conf: + client: + rgw lc debug interval: 10 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= + rgw crypt require ssl: false diff --git a/qa/suites/smoke/basic/tasks/test/rgw_s3tests.yaml b/qa/suites/smoke/basic/tasks/test/rgw_s3tests.yaml new file mode 100644 index 000000000..337452f75 --- /dev/null +++ b/qa/suites/smoke/basic/tasks/test/rgw_s3tests.yaml @@ -0,0 +1,18 @@ +tasks: +- ceph: + fs: xfs + log-ignorelist: + - \(POOL_APP_NOT_ENABLED\) +- rgw: [client.0] +- tox: [client.0] +- s3tests: + client.0: + rgw_server: client.0 +overrides: + ceph: + conf: + client: + rgw lc debug interval: 10 + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= + rgw crypt require ssl: false diff --git a/qa/suites/stress/.qa b/qa/suites/stress/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/stress/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/stress/bench/% b/qa/suites/stress/bench/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/stress/bench/% diff --git a/qa/suites/stress/bench/.qa b/qa/suites/stress/bench/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/stress/bench/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/stress/bench/clusters/.qa b/qa/suites/stress/bench/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/stress/bench/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/stress/bench/clusters/fixed-3-cephfs.yaml b/qa/suites/stress/bench/clusters/fixed-3-cephfs.yaml new file mode 120000 index 000000000..24480dfc7 --- /dev/null +++ b/qa/suites/stress/bench/clusters/fixed-3-cephfs.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-3-cephfs.yaml
\ No newline at end of file diff --git a/qa/suites/stress/bench/tasks/.qa b/qa/suites/stress/bench/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/stress/bench/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml b/qa/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml new file mode 100644 index 000000000..eafec39e3 --- /dev/null +++ b/qa/suites/stress/bench/tasks/cfuse_workunit_snaps.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: +- ceph-fuse: +- workunit: + clients: + all: + - snaps diff --git a/qa/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml b/qa/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml new file mode 100644 index 000000000..bfbb8d3db --- /dev/null +++ b/qa/suites/stress/bench/tasks/kclient_workunit_suites_fsx.yaml @@ -0,0 +1,19 @@ +tasks: +- install: + extra_system_packages: + deb: + - libaio-dev + - libtool-bin + - uuid-dev + - xfslibs-dev + rpm: + - libaio-devel + - libtool + - libuuid-devel + - xfsprogs-devel +- ceph: +- kclient: +- workunit: + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/stress/thrash/% b/qa/suites/stress/thrash/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/stress/thrash/% diff --git a/qa/suites/stress/thrash/.qa b/qa/suites/stress/thrash/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/stress/thrash/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/stress/thrash/clusters/.qa b/qa/suites/stress/thrash/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/stress/thrash/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/stress/thrash/clusters/16-osd.yaml b/qa/suites/stress/thrash/clusters/16-osd.yaml new file mode 100644 index 000000000..76232339b --- /dev/null +++ b/qa/suites/stress/thrash/clusters/16-osd.yaml @@ -0,0 +1,18 @@ +roles: +- [mon.a, mds.a, osd.0] +- [mon.b, mgr.x, osd.1] +- [mon.c, mgr.y, osd.2] +- [osd.3] +- [osd.4] +- [osd.5] +- [osd.6] +- [osd.7] +- [osd.8] +- [osd.9] +- [osd.10] +- [osd.11] +- [osd.12] +- [osd.13] +- [osd.14] +- [osd.15] +- [client.0] diff --git a/qa/suites/stress/thrash/clusters/3-osd-1-machine.yaml b/qa/suites/stress/thrash/clusters/3-osd-1-machine.yaml new file mode 100644 index 000000000..8c3556ae9 --- /dev/null +++ b/qa/suites/stress/thrash/clusters/3-osd-1-machine.yaml @@ -0,0 +1,3 @@ +roles: +- [mon.a, mgr.x, mds.a, osd.0, osd.1, osd.2] +- [mon.b, mon.c, client.0] diff --git a/qa/suites/stress/thrash/clusters/8-osd.yaml b/qa/suites/stress/thrash/clusters/8-osd.yaml new file mode 100644 index 000000000..9f51c6bad --- /dev/null +++ b/qa/suites/stress/thrash/clusters/8-osd.yaml @@ -0,0 +1,10 @@ +roles: +- [mon.a, mds.a, osd.0] +- [mon.b, mgr.x, osd.1] +- [mon.c, osd.2] +- [osd.3] +- [osd.4] +- [osd.5] +- [osd.6] +- [osd.7] +- [client.0] diff --git a/qa/suites/stress/thrash/thrashers/.qa b/qa/suites/stress/thrash/thrashers/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/stress/thrash/thrashers/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/stress/thrash/thrashers/default.yaml b/qa/suites/stress/thrash/thrashers/default.yaml new file mode 100644 index 000000000..47fa40480 --- /dev/null +++ b/qa/suites/stress/thrash/thrashers/default.yaml @@ -0,0 +1,7 @@ +tasks: +- install: +- ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost +- thrashosds: diff --git a/qa/suites/stress/thrash/thrashers/fast.yaml b/qa/suites/stress/thrash/thrashers/fast.yaml new file mode 100644 index 000000000..b2466dbe8 --- /dev/null +++ b/qa/suites/stress/thrash/thrashers/fast.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost +- thrashosds: + op_delay: 1 + chance_down: 10 diff --git a/qa/suites/stress/thrash/thrashers/more-down.yaml b/qa/suites/stress/thrash/thrashers/more-down.yaml new file mode 100644 index 000000000..8ba738d1f --- /dev/null +++ b/qa/suites/stress/thrash/thrashers/more-down.yaml @@ -0,0 +1,8 @@ +tasks: +- install: +- ceph: + log-ignorelist: + - but it is still running + - objects unfound and apparently lost +- thrashosds: + chance_down: 50 diff --git a/qa/suites/stress/thrash/workloads/.qa b/qa/suites/stress/thrash/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/stress/thrash/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/stress/thrash/workloads/bonnie_cfuse.yaml b/qa/suites/stress/thrash/workloads/bonnie_cfuse.yaml new file mode 100644 index 000000000..912f12d6c --- /dev/null +++ b/qa/suites/stress/thrash/workloads/bonnie_cfuse.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/bonnie.sh diff --git a/qa/suites/stress/thrash/workloads/iozone_cfuse.yaml b/qa/suites/stress/thrash/workloads/iozone_cfuse.yaml new file mode 100644 index 000000000..18a6051be --- /dev/null +++ b/qa/suites/stress/thrash/workloads/iozone_cfuse.yaml @@ -0,0 +1,6 @@ +tasks: +- ceph-fuse: +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/stress/thrash/workloads/radosbench.yaml b/qa/suites/stress/thrash/workloads/radosbench.yaml new file mode 100644 index 000000000..3940870fc --- /dev/null +++ b/qa/suites/stress/thrash/workloads/radosbench.yaml @@ -0,0 +1,4 @@ +tasks: +- radosbench: + clients: [client.0] + time: 1800 diff --git a/qa/suites/stress/thrash/workloads/readwrite.yaml b/qa/suites/stress/thrash/workloads/readwrite.yaml new file mode 100644 index 000000000..c53e52b08 --- /dev/null +++ b/qa/suites/stress/thrash/workloads/readwrite.yaml @@ -0,0 +1,9 @@ +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + op_weights: + read: 45 + write: 45 + delete: 10 diff --git a/qa/suites/teuthology/.qa b/qa/suites/teuthology/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/buildpackages/% b/qa/suites/teuthology/buildpackages/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/teuthology/buildpackages/% diff --git a/qa/suites/teuthology/buildpackages/.qa b/qa/suites/teuthology/buildpackages/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/buildpackages/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/buildpackages/supported-all-distro b/qa/suites/teuthology/buildpackages/supported-all-distro new file mode 120000 index 000000000..ca82dde58 --- /dev/null +++ b/qa/suites/teuthology/buildpackages/supported-all-distro @@ -0,0 +1 @@ +.qa/distros/supported-all-distro
\ No newline at end of file diff --git a/qa/suites/teuthology/buildpackages/tasks/.qa b/qa/suites/teuthology/buildpackages/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/buildpackages/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/buildpackages/tasks/default.yaml b/qa/suites/teuthology/buildpackages/tasks/default.yaml new file mode 100644 index 000000000..cb583c763 --- /dev/null +++ b/qa/suites/teuthology/buildpackages/tasks/default.yaml @@ -0,0 +1,14 @@ +roles: + - [client.0] +tasks: + - install: + tag: v0.94.1 + - exec: + client.0: + - ceph --version | grep 'version 0.94.1' + - install.upgrade: + client.0: + tag: v0.94.3 + - exec: + client.0: + - ceph --version | grep 'version 0.94.3' diff --git a/qa/suites/teuthology/buildpackages/tasks/tag.yaml b/qa/suites/teuthology/buildpackages/tasks/tag.yaml new file mode 100644 index 000000000..2bfb8a995 --- /dev/null +++ b/qa/suites/teuthology/buildpackages/tasks/tag.yaml @@ -0,0 +1,11 @@ +roles: + - [mon.a, mgr.x, client.0] +tasks: + - install: + # tag has precedence over branch and sha1 + tag: v0.94.1 + branch: firefly + sha1: e5b6eea91cc37434f78a987d2dd1d3edd4a23f3f # dumpling + - exec: + client.0: + - ceph --version | grep 'version 0.94.1' diff --git a/qa/suites/teuthology/ceph/% b/qa/suites/teuthology/ceph/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/teuthology/ceph/% diff --git a/qa/suites/teuthology/ceph/.qa b/qa/suites/teuthology/ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/ceph/clusters/.qa b/qa/suites/teuthology/ceph/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/ceph/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/ceph/clusters/single.yaml b/qa/suites/teuthology/ceph/clusters/single.yaml new file mode 100644 index 000000000..0c6a40d0b --- /dev/null +++ b/qa/suites/teuthology/ceph/clusters/single.yaml @@ -0,0 +1,2 @@ +roles: + - [mon.a, mgr.x, client.0] diff --git a/qa/suites/teuthology/ceph/distros b/qa/suites/teuthology/ceph/distros new file mode 120000 index 000000000..23d9e9be8 --- /dev/null +++ b/qa/suites/teuthology/ceph/distros @@ -0,0 +1 @@ +.qa/distros/supported
\ No newline at end of file diff --git a/qa/suites/teuthology/ceph/tasks/.qa b/qa/suites/teuthology/ceph/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/ceph/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/ceph/tasks/teuthology.yaml b/qa/suites/teuthology/ceph/tasks/teuthology.yaml new file mode 100644 index 000000000..00081c8aa --- /dev/null +++ b/qa/suites/teuthology/ceph/tasks/teuthology.yaml @@ -0,0 +1,3 @@ +tasks: + - install: + - tests: diff --git a/qa/suites/teuthology/integration.yaml b/qa/suites/teuthology/integration.yaml new file mode 100644 index 000000000..8a7f1c776 --- /dev/null +++ b/qa/suites/teuthology/integration.yaml @@ -0,0 +1,2 @@ +tasks: +- teuthology_integration: diff --git a/qa/suites/teuthology/multi-cluster/% b/qa/suites/teuthology/multi-cluster/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/teuthology/multi-cluster/% diff --git a/qa/suites/teuthology/multi-cluster/.qa b/qa/suites/teuthology/multi-cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/multi-cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/multi-cluster/all/.qa b/qa/suites/teuthology/multi-cluster/all/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/multi-cluster/all/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/multi-cluster/all/ceph.yaml b/qa/suites/teuthology/multi-cluster/all/ceph.yaml new file mode 100644 index 000000000..4659ef3d1 --- /dev/null +++ b/qa/suites/teuthology/multi-cluster/all/ceph.yaml @@ -0,0 +1,25 @@ +roles: +- - ceph.mon.a + - ceph.mon.b + - ceph.mgr.x + - backup.osd.0 + - backup.osd.1 + - backup.osd.2 + - backup.client.0 +- - backup.mon.a + - backup.mgr.x + - ceph.osd.0 + - ceph.osd.1 + - ceph.osd.2 + - ceph.client.0 + - client.1 + - osd.3 +tasks: +- install: +- ceph: + cluster: backup +- ceph: +- workunit: + clients: + ceph.client.0: [true.sh] + backup.client.0: [true.sh] diff --git a/qa/suites/teuthology/multi-cluster/all/thrashosds.yaml b/qa/suites/teuthology/multi-cluster/all/thrashosds.yaml new file mode 100644 index 000000000..52002f57f --- /dev/null +++ b/qa/suites/teuthology/multi-cluster/all/thrashosds.yaml @@ -0,0 +1,21 @@ +roles: +- - backup.mon.a + - backup.mon.b + - backup.mgr.x + - backup.osd.0 + - backup.osd.1 + - backup.osd.2 +- - backup.mon.c + - backup.osd.3 + - backup.osd.4 + - backup.osd.5 + - backup.client.0 +tasks: +- install: +- ceph: + cluster: backup +- thrashosds: + cluster: backup +- workunit: + clients: + all: [true.sh] diff --git a/qa/suites/teuthology/multi-cluster/all/upgrade.yaml b/qa/suites/teuthology/multi-cluster/all/upgrade.yaml new file mode 100644 index 000000000..0973fc390 --- /dev/null +++ b/qa/suites/teuthology/multi-cluster/all/upgrade.yaml @@ -0,0 +1,51 @@ +overrides: + ceph: + log-ignorelist: + - failed to encode map + conf: + mon: + mon warn on legacy crush tunables: false +roles: +- - ceph.mon.a + - ceph.mon.b + - ceph.mgr.x + - backup.osd.0 + - backup.osd.1 + - backup.osd.2 + - backup.client.0 +- - backup.mon.a + - backup.mgr.x + - ceph.osd.0 + - ceph.osd.1 + - ceph.osd.2 + - ceph.client.0 + - client.1 + - osd.3 +tasks: +- install: + branch: infernalis +- ceph: + cluster: backup +- ceph: +- workunit: + clients: + backup.client.0: [true.sh] + ceph.client.0: [true.sh] +- install.upgrade: + ceph.mon.a: + branch: jewel + backup.mon.a: + branch: jewel +- ceph.restart: [ceph.mon.a, ceph.mon.b, ceph.osd.0, ceph.osd.1, ceph.osd.2, osd.3] +- exec: + ceph.client.0: + - ceph --version | grep -F 'version 10.' + client.1: + - ceph --cluster backup --version | grep -F 'version 10.' + backup.client.0: + # cli upgraded + - ceph --cluster backup --id 0 --version | grep -F 'version 10.' + - ceph --version | grep -F 'version 10.' + # backup cluster mon not upgraded + - ceph --cluster backup --id 0 tell mon.a version | grep -F 'version 9.2.' + - ceph tell mon.a version | grep -F 'version 10.' diff --git a/qa/suites/teuthology/multi-cluster/all/workunit.yaml b/qa/suites/teuthology/multi-cluster/all/workunit.yaml new file mode 100644 index 000000000..b1288e38e --- /dev/null +++ b/qa/suites/teuthology/multi-cluster/all/workunit.yaml @@ -0,0 +1,23 @@ +roles: +- - backup.mon.a + - backup.mgr.x + - osd.0 + - osd.1 + - osd.2 + - client.0 + - backup.client.0 +- - mon.a + - mgr.x + - backup.osd.0 + - backup.osd.1 + - backup.osd.2 + - client.1 + - backup.client.1 +tasks: +- install: +- workunit: + clients: + all: [true.sh] +- workunit: + clients: + backup.client.1: [true.sh] diff --git a/qa/suites/teuthology/no-ceph/% b/qa/suites/teuthology/no-ceph/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/teuthology/no-ceph/% diff --git a/qa/suites/teuthology/no-ceph/.qa b/qa/suites/teuthology/no-ceph/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/no-ceph/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/no-ceph/clusters/.qa b/qa/suites/teuthology/no-ceph/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/no-ceph/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/no-ceph/clusters/single.yaml b/qa/suites/teuthology/no-ceph/clusters/single.yaml new file mode 100644 index 000000000..0c6a40d0b --- /dev/null +++ b/qa/suites/teuthology/no-ceph/clusters/single.yaml @@ -0,0 +1,2 @@ +roles: + - [mon.a, mgr.x, client.0] diff --git a/qa/suites/teuthology/no-ceph/tasks/.qa b/qa/suites/teuthology/no-ceph/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/no-ceph/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/no-ceph/tasks/teuthology.yaml b/qa/suites/teuthology/no-ceph/tasks/teuthology.yaml new file mode 100644 index 000000000..1391458b5 --- /dev/null +++ b/qa/suites/teuthology/no-ceph/tasks/teuthology.yaml @@ -0,0 +1,2 @@ +tasks: + - tests: diff --git a/qa/suites/teuthology/nop/% b/qa/suites/teuthology/nop/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/teuthology/nop/% diff --git a/qa/suites/teuthology/nop/.qa b/qa/suites/teuthology/nop/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/nop/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/nop/all/.qa b/qa/suites/teuthology/nop/all/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/nop/all/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/nop/all/nop.yaml b/qa/suites/teuthology/nop/all/nop.yaml new file mode 100644 index 000000000..4a5b227e6 --- /dev/null +++ b/qa/suites/teuthology/nop/all/nop.yaml @@ -0,0 +1,3 @@ +tasks: + - nop: + diff --git a/qa/suites/teuthology/rgw/% b/qa/suites/teuthology/rgw/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/teuthology/rgw/% diff --git a/qa/suites/teuthology/rgw/.qa b/qa/suites/teuthology/rgw/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/rgw/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/rgw/distros b/qa/suites/teuthology/rgw/distros new file mode 120000 index 000000000..23d9e9be8 --- /dev/null +++ b/qa/suites/teuthology/rgw/distros @@ -0,0 +1 @@ +.qa/distros/supported
\ No newline at end of file diff --git a/qa/suites/teuthology/rgw/tasks/.qa b/qa/suites/teuthology/rgw/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/rgw/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/rgw/tasks/s3tests-fastcgi.yaml b/qa/suites/teuthology/rgw/tasks/s3tests-fastcgi.yaml new file mode 100644 index 000000000..d76121fad --- /dev/null +++ b/qa/suites/teuthology/rgw/tasks/s3tests-fastcgi.yaml @@ -0,0 +1,24 @@ +# this runs s3tests against rgw, using mod_fastcgi +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2, client.0] +- [mon.b, mgr.x, osd.3, osd.4, osd.5, client.1] + +tasks: +- install: + branch: master +- ceph: +- rgw: [client.0] +- s3tests: + client.0: + rgw_server: client.0 + force-branch: ceph-master +overrides: + ceph: + fs: xfs + conf: + client: + debug rgw: 20 + rgw lc debug interval: 10 + rgw: + ec-data-pool: false + frontend: apache diff --git a/qa/suites/teuthology/rgw/tasks/s3tests-fcgi.yaml b/qa/suites/teuthology/rgw/tasks/s3tests-fcgi.yaml new file mode 100644 index 000000000..8228501ca --- /dev/null +++ b/qa/suites/teuthology/rgw/tasks/s3tests-fcgi.yaml @@ -0,0 +1,26 @@ +# this runs s3tests against rgw, using mod_proxy_fcgi +# the choice between uds or tcp with mod_proxy_fcgi depends on the distro +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2, client.0] +- [mon.b, mgr.x, osd.3, osd.4, osd.5, client.1] + +tasks: +- install: + branch: master +- ceph: +- rgw: [client.0] +- s3tests: + client.0: + rgw_server: client.0 + force-branch: ceph-master +overrides: + ceph: + fs: xfs + conf: + client: + debug rgw: 20 + rgw lc debug interval: 10 + rgw: + ec-data-pool: false + frontend: apache + use_fcgi: true diff --git a/qa/suites/teuthology/rgw/tasks/s3tests.yaml b/qa/suites/teuthology/rgw/tasks/s3tests.yaml new file mode 100644 index 000000000..ee8e9d5b5 --- /dev/null +++ b/qa/suites/teuthology/rgw/tasks/s3tests.yaml @@ -0,0 +1,23 @@ +# this runs s3tests against rgw +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2, client.0] +- [mon.b, mgr.x, osd.3, osd.4, osd.5, client.1] + +tasks: +- install: + branch: master +- ceph: +- rgw: [client.0] +- s3tests: + client.0: + rgw_server: client.0 + force-branch: ceph-master +overrides: + ceph: + fs: xfs + conf: + client: + debug rgw: 20 + rgw lc debug interval: 10 + rgw: + ec-data-pool: false diff --git a/qa/suites/teuthology/workunits/.qa b/qa/suites/teuthology/workunits/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/teuthology/workunits/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/teuthology/workunits/yes.yaml b/qa/suites/teuthology/workunits/yes.yaml new file mode 100644 index 000000000..45098dbb8 --- /dev/null +++ b/qa/suites/teuthology/workunits/yes.yaml @@ -0,0 +1,8 @@ +roles: + - [client.0] +tasks: +- install: +- workunit: + clients: + all: + - true.sh diff --git a/qa/suites/tgt/.qa b/qa/suites/tgt/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/tgt/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/tgt/basic/% b/qa/suites/tgt/basic/% new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/qa/suites/tgt/basic/% @@ -0,0 +1 @@ + diff --git a/qa/suites/tgt/basic/.qa b/qa/suites/tgt/basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/tgt/basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/tgt/basic/clusters/.qa b/qa/suites/tgt/basic/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/tgt/basic/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/tgt/basic/clusters/fixed-3.yaml b/qa/suites/tgt/basic/clusters/fixed-3.yaml new file mode 100644 index 000000000..5e23c9e4f --- /dev/null +++ b/qa/suites/tgt/basic/clusters/fixed-3.yaml @@ -0,0 +1,4 @@ +roles: +- [mon.a, mon.c, osd.0, osd.1, osd.2] +- [mon.b, mgr.x, mds.a, osd.3, osd.4, osd.5] +- [client.0] diff --git a/qa/suites/tgt/basic/msgr-failures/.qa b/qa/suites/tgt/basic/msgr-failures/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/tgt/basic/msgr-failures/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/tgt/basic/msgr-failures/few.yaml b/qa/suites/tgt/basic/msgr-failures/few.yaml new file mode 100644 index 000000000..519288992 --- /dev/null +++ b/qa/suites/tgt/basic/msgr-failures/few.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 5000 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/tgt/basic/msgr-failures/many.yaml b/qa/suites/tgt/basic/msgr-failures/many.yaml new file mode 100644 index 000000000..e3855297d --- /dev/null +++ b/qa/suites/tgt/basic/msgr-failures/many.yaml @@ -0,0 +1,8 @@ +overrides: + ceph: + conf: + global: + ms inject socket failures: 500 + mon client directed command retry: 5 + log-ignorelist: + - \(OSD_SLOW_PING_TIME diff --git a/qa/suites/tgt/basic/tasks/.qa b/qa/suites/tgt/basic/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/tgt/basic/tasks/blogbench.yaml b/qa/suites/tgt/basic/tasks/blogbench.yaml new file mode 100644 index 000000000..f77a78b6b --- /dev/null +++ b/qa/suites/tgt/basic/tasks/blogbench.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/blogbench.sh diff --git a/qa/suites/tgt/basic/tasks/bonnie.yaml b/qa/suites/tgt/basic/tasks/bonnie.yaml new file mode 100644 index 000000000..2cbfcf887 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/bonnie.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/bonnie.sh diff --git a/qa/suites/tgt/basic/tasks/dbench-short.yaml b/qa/suites/tgt/basic/tasks/dbench-short.yaml new file mode 100644 index 000000000..fcb721a4d --- /dev/null +++ b/qa/suites/tgt/basic/tasks/dbench-short.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/dbench-short.sh diff --git a/qa/suites/tgt/basic/tasks/dbench.yaml b/qa/suites/tgt/basic/tasks/dbench.yaml new file mode 100644 index 000000000..7f732175f --- /dev/null +++ b/qa/suites/tgt/basic/tasks/dbench.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/dbench.sh diff --git a/qa/suites/tgt/basic/tasks/ffsb.yaml b/qa/suites/tgt/basic/tasks/ffsb.yaml new file mode 100644 index 000000000..f50a3a196 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/ffsb.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/ffsb.sh diff --git a/qa/suites/tgt/basic/tasks/fio.yaml b/qa/suites/tgt/basic/tasks/fio.yaml new file mode 100644 index 000000000..e7346ce52 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/fio.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/fio.sh diff --git a/qa/suites/tgt/basic/tasks/fsstress.yaml b/qa/suites/tgt/basic/tasks/fsstress.yaml new file mode 100644 index 000000000..c77f511c0 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/fsstress.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/fsstress.sh diff --git a/qa/suites/tgt/basic/tasks/fsx.yaml b/qa/suites/tgt/basic/tasks/fsx.yaml new file mode 100644 index 000000000..40f7e817a --- /dev/null +++ b/qa/suites/tgt/basic/tasks/fsx.yaml @@ -0,0 +1,20 @@ +tasks: +- install: + extra_system_packages: + deb: + - libaio-dev + - libtool-bin + - uuid-dev + - xfslibs-dev + rpm: + - libaio-devel + - libtool + - libuuid-devel + - xfsprogs-devel +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/fsx.sh diff --git a/qa/suites/tgt/basic/tasks/fsync-tester.yaml b/qa/suites/tgt/basic/tasks/fsync-tester.yaml new file mode 100644 index 000000000..ea627b7d1 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/fsync-tester.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/fsync-tester.sh diff --git a/qa/suites/tgt/basic/tasks/iogen.yaml b/qa/suites/tgt/basic/tasks/iogen.yaml new file mode 100644 index 000000000..1065c74da --- /dev/null +++ b/qa/suites/tgt/basic/tasks/iogen.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/iogen.sh diff --git a/qa/suites/tgt/basic/tasks/iozone-sync.yaml b/qa/suites/tgt/basic/tasks/iozone-sync.yaml new file mode 100644 index 000000000..ac241a417 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/iozone-sync.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/iozone-sync.sh diff --git a/qa/suites/tgt/basic/tasks/iozone.yaml b/qa/suites/tgt/basic/tasks/iozone.yaml new file mode 100644 index 000000000..cf5604c21 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/iozone.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/iozone.sh diff --git a/qa/suites/tgt/basic/tasks/pjd.yaml b/qa/suites/tgt/basic/tasks/pjd.yaml new file mode 100644 index 000000000..ba5c631f1 --- /dev/null +++ b/qa/suites/tgt/basic/tasks/pjd.yaml @@ -0,0 +1,9 @@ +tasks: +- install: +- ceph: +- tgt: +- iscsi: +- workunit: + clients: + all: + - suites/pjd.sh diff --git a/qa/suites/upgrade/.qa b/qa/suites/upgrade/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/cephfs b/qa/suites/upgrade/cephfs new file mode 120000 index 000000000..1ff68fa8b --- /dev/null +++ b/qa/suites/upgrade/cephfs @@ -0,0 +1 @@ +.qa/suites/fs/upgrade/
\ No newline at end of file diff --git a/qa/suites/upgrade/pacific-x/.qa b/qa/suites/upgrade/pacific-x/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/pacific-x/parallel/% b/qa/suites/upgrade/pacific-x/parallel/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/% diff --git a/qa/suites/upgrade/pacific-x/parallel/.qa b/qa/suites/upgrade/pacific-x/parallel/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/pacific-x/parallel/0-random-distro$ b/qa/suites/upgrade/pacific-x/parallel/0-random-distro$ new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/0-random-distro$ @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/upgrade/pacific-x/parallel/0-start.yaml b/qa/suites/upgrade/pacific-x/parallel/0-start.yaml new file mode 100644 index 000000000..3814ea3ef --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/0-start.yaml @@ -0,0 +1,33 @@ +roles: +- - mon.a + - mon.c + - mgr.y + - mds.a + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 + - node-exporter.a + - alertmanager.a +- - mon.b + - mds.b + - mgr.x + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - client.1 + - prometheus.a + - grafana.a + - node-exporter.b +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + create_rbd_pool: true + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/upgrade/pacific-x/parallel/1-tasks.yaml b/qa/suites/upgrade/pacific-x/parallel/1-tasks.yaml new file mode 100644 index 000000000..064d0758d --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/1-tasks.yaml @@ -0,0 +1,43 @@ +tasks: +- install: + branch: pacific + exclude_packages: + - ceph-volume +- print: "**** done install task..." +- print: "**** done start installing pacific cephadm ..." +- cephadm: + image: quay.ceph.io/ceph-ci/ceph:pacific + cephadm_branch: pacific + cephadm_git_url: https://github.com/ceph/ceph + conf: + osd: + #set config option for which cls modules are allowed to be loaded / used + osd_class_load_list: "*" + osd_class_default_list: "*" +- print: "**** done end installing pacific cephadm ..." + +- print: "**** done start cephadm.shell ceph config set mgr..." +- cephadm.shell: + mon.a: + - ceph config set mgr mgr/cephadm/use_repo_digest true --force +- print: "**** done cephadm.shell ceph config set mgr..." + +- print: "**** done start telemetry pacific..." +- workunit: + clients: + client.0: + - test_telemetry_pacific.sh +- print: "**** done end telemetry pacific..." + +- print: "**** done start parallel" +- parallel: + - workload + - upgrade-sequence +- print: "**** done end parallel" + +- print: "**** done start telemetry x..." +- workunit: + clients: + client.0: + - test_telemetry_pacific_x.sh +- print: "**** done end telemetry x..." diff --git a/qa/suites/upgrade/pacific-x/parallel/mon_election b/qa/suites/upgrade/pacific-x/parallel/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/upgrade/pacific-x/parallel/upgrade-sequence.yaml b/qa/suites/upgrade/pacific-x/parallel/upgrade-sequence.yaml new file mode 100644 index 000000000..a3f0888da --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/upgrade-sequence.yaml @@ -0,0 +1,16 @@ +# renamed tasks: to upgrade-sequence: +upgrade-sequence: + sequential: + - print: "**** done start upgrade, wait" + - cephadm.shell: + env: [sha1] + mon.a: + - ceph config set global log_to_journald false --force + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 + - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done + - ceph orch ps + - ceph versions + - ceph versions | jq -e '.overall | length == 1' + - ceph versions | jq -e '.overall | keys' | grep $sha1 + - print: "**** done end upgrade, wait..." + diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/+ b/qa/suites/upgrade/pacific-x/parallel/workload/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/workload/+ diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/.qa b/qa/suites/upgrade/pacific-x/parallel/workload/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/workload/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/ec-rados-default.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/ec-rados-default.yaml new file mode 100644 index 000000000..67a0f39c5 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/workload/ec-rados-default.yaml @@ -0,0 +1,25 @@ +meta: +- desc: | + run run randomized correctness test for rados operations + on an erasure-coded pool +workload: + full_sequential: + - print: "**** done start ec-rados-default.yaml" + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + write_append_excl: false + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 + - print: "**** done end ec-rados-default.yaml" diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/rados_api.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/rados_api.yaml new file mode 100644 index 000000000..1380a4016 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/workload/rados_api.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + object class functional tests +workload: + full_sequential: + - print: "**** done start rados_api.yaml" + - workunit: + branch: pacific + clients: + client.0: + - cls + - print: "**** done end rados_api.yaml" diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/rados_loadgenbig.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/rados_loadgenbig.yaml new file mode 100644 index 000000000..f315b1579 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/workload/rados_loadgenbig.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + generate read/write load with rados objects ranging from 1MB to 25MB +workload: + full_sequential: + - print: "**** done start rados_loadgenbig.yaml" + - workunit: + branch: pacific + clients: + client.0: + - rados/load-gen-big.sh + - print: "**** done end rados_loadgenbig.yaml" diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/rbd_import_export.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/rbd_import_export.yaml new file mode 100644 index 000000000..20e74c176 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/workload/rbd_import_export.yaml @@ -0,0 +1,14 @@ +meta: +- desc: | + run basic import/export cli tests for rbd +workload: + full_sequential: + - print: "**** done start rbd_import_export.yaml" + - workunit: + branch: pacific + clients: + client.1: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format + - print: "**** done end rbd_import_export.yaml" diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_api.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_api.yaml new file mode 100644 index 000000000..6a0242b91 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_api.yaml @@ -0,0 +1,14 @@ +meta: +- desc: | + librbd C and C++ api tests +workload: + full_sequential: + - print: "**** done start test_rbd_api.yaml" + - workunit: + branch: pacific + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "61" + - print: "**** done end test_rbd_api.yaml" diff --git a/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_python.yaml b/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_python.yaml new file mode 100644 index 000000000..8d1f0fd0f --- /dev/null +++ b/qa/suites/upgrade/pacific-x/parallel/workload/test_rbd_python.yaml @@ -0,0 +1,20 @@ +meta: +- desc: | + librbd python api tests +overrides: + install: + ceph: + extra_system_packages: + - python3-pytest +workload: + full_sequential: + - print: "**** done start test_rbd_python.yaml" + - workunit: + branch: pacific + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "61" + - print: "**** done end test_rbd_python.yaml" + diff --git a/qa/suites/upgrade/pacific-x/stress-split/% b/qa/suites/upgrade/pacific-x/stress-split/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/% diff --git a/qa/suites/upgrade/pacific-x/stress-split/.qa b/qa/suites/upgrade/pacific-x/stress-split/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/pacific-x/stress-split/0-distro b/qa/suites/upgrade/pacific-x/stress-split/0-distro new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/0-distro @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/upgrade/pacific-x/stress-split/0-roles.yaml b/qa/suites/upgrade/pacific-x/stress-split/0-roles.yaml new file mode 100644 index 000000000..ad3ee43d3 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/0-roles.yaml @@ -0,0 +1,31 @@ +roles: +- - mon.a + - mon.c + - mgr.y + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 + - node-exporter.a + - alertmanager.a +- - mon.b + - mgr.x + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - client.1 + - prometheus.a + - grafana.a + - node-exporter.b +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + create_rbd_pool: true + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/upgrade/pacific-x/stress-split/1-start.yaml b/qa/suites/upgrade/pacific-x/stress-split/1-start.yaml new file mode 100644 index 000000000..9a552df99 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/1-start.yaml @@ -0,0 +1,122 @@ +tasks: +- install: + branch: pacific + exclude_packages: + - ceph-volume + +- cephadm: + image: quay.ceph.io/ceph-ci/ceph:pacific + cephadm_branch: pacific + cephadm_git_url: https://github.com/ceph/ceph + conf: + osd: + #set config option for which cls modules are allowed to be loaded / used + osd_class_load_list: "*" + osd_class_default_list: "*" + +- cephadm.shell: + mon.a: + - ceph fs volume create foo + - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force + - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force + +- ceph.healthy: + +- print: "**** upgrading first half of cluster, with stress ****" +- parallel: + - first-half-tasks + - first-half-sequence +- print: "**** done upgrading first half of cluster ****" + +- ceph.healthy: + +- print: "**** applying stress + thrashing to mixed-version cluster ****" + +- parallel: + - stress-tasks + +- ceph.healthy: + +- print: "**** finishing upgrade ****" +- parallel: + - second-half-tasks + - second-half-sequence + +- ceph.healthy: + + +################# + +first-half-sequence: +- cephadm.shell: + env: [sha1] + mon.a: + - ceph config set mgr mgr/cephadm/daemon_cache_timeout 60 + - ceph config set global log_to_journald false --force + + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 + - ceph orch ps + + - echo wait for minority of mons to upgrade + - while ! ceph mon versions | grep $sha1 ; do sleep 2 ; done + - ceph orch ps + - ceph orch upgrade pause + - sleep 60 + - ceph orch upgrade resume + + - echo wait for majority of mons to upgrade + - "while ! ceph mon versions | grep $sha1 | egrep ': [23]' ; do sleep 2 ; done" + - ceph orch ps + - ceph orch upgrade pause + - sleep 60 + - ceph orch upgrade resume + + - echo wait for all mons to upgrade + - "while ! ceph mon versions | grep $sha1 | grep ': 3' ; do sleep 2 ; done" + - ceph orch ps + - ceph orch upgrade pause + - sleep 60 + - ceph orch upgrade resume + + - echo wait for half of osds to upgrade + - "while ! ceph osd versions | grep $sha1 | egrep ': [45678]'; do sleep 2 ; done" + - ceph orch upgrade pause + - ceph orch ps + + - ceph orch ps + - ceph versions + + +################# + +stress-tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + chance_thrash_cluster_full: 0 + chance_thrash_pg_upmap: 0 + chance_thrash_pg_upmap_items: 0 + disable_objectstore_tool_tests: true + chance_force_recovery: 0 + aggressive_pg_num_changes: false + + +################# + +second-half-sequence: + sequential: + - cephadm.shell: + env: [sha1] + mon.a: + - ceph orch upgrade resume + - sleep 60 + + - echo wait for upgrade to complete + - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done + + - echo upgrade complete + - ceph orch ps + - ceph versions + - ceph versions | jq -e '.overall | length == 1' + - ceph versions | jq -e '.overall | keys' | grep $sha1 diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/.qa b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/radosbench.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/radosbench.yaml new file mode 100644 index 000000000..3816ca38c --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/radosbench.yaml @@ -0,0 +1,19 @@ +meta: +- desc: | + run randomized correctness test for rados operations + generate write load with rados bench +first-half-tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 +- print: "**** done end radosbench.yaml" diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-cls.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-cls.yaml new file mode 100644 index 000000000..4ebc09310 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-cls.yaml @@ -0,0 +1,10 @@ +meta: +- desc: | + run basic cls tests for rbd +first-half-tasks: +- workunit: + branch: pacific + clients: + client.0: + - cls/test_cls_rbd.sh +- print: "**** done cls/test_cls_rbd.sh 5-workload" diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-import-export.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-import-export.yaml new file mode 100644 index 000000000..6835c9125 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd-import-export.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + run basic import/export cli tests for rbd +first-half-tasks: +- workunit: + branch: pacific + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format +- print: "**** done rbd/import_export.sh 5-workload" diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd_api.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd_api.yaml new file mode 100644 index 000000000..a7060c0ac --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/rbd_api.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + librbd C and C++ api tests +first-half-tasks: +- workunit: + branch: pacific + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "61" +- print: "**** done rbd/test_librbd.sh 7-workload" diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/readwrite.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/readwrite.yaml new file mode 100644 index 000000000..21a9f379a --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/readwrite.yaml @@ -0,0 +1,16 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool, + using only reads, writes, and deletes +first-half-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 500 + write_append_excl: false + op_weights: + read: 45 + write: 45 + delete: 10 +- print: "**** done rados/readwrite 5-workload" diff --git a/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml new file mode 100644 index 000000000..6447c2245 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml @@ -0,0 +1,18 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool with snapshot operations +first-half-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + write_append_excl: false + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 +- print: "**** done rados/snaps-few-objects 5-workload" diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/+ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/+ diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/.qa b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/radosbench.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/radosbench.yaml new file mode 100644 index 000000000..9058bd804 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/radosbench.yaml @@ -0,0 +1,25 @@ +meta: +- desc: | + run randomized correctness test for rados operations + generate write load with rados bench +stress-tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 +- print: "**** done end radosbench.yaml" diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-cls.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-cls.yaml new file mode 100644 index 000000000..e72875c14 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-cls.yaml @@ -0,0 +1,10 @@ +meta: +- desc: | + run basic cls tests for rbd +stress-tasks: +- workunit: + branch: pacific + clients: + client.0: + - cls/test_cls_rbd.sh +- print: "**** done cls/test_cls_rbd.sh 5-workload" diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-import-export.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-import-export.yaml new file mode 100644 index 000000000..c3008f3b1 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd-import-export.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + run basic import/export cli tests for rbd +stress-tasks: +- workunit: + branch: pacific + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format +- print: "**** done rbd/import_export.sh 5-workload" diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd_api.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd_api.yaml new file mode 100644 index 000000000..8b52658c4 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/rbd_api.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + librbd C and C++ api tests +stress-tasks: +- workunit: + branch: pacific + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "61" +- print: "**** done rbd/test_librbd.sh 7-workload" diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/readwrite.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/readwrite.yaml new file mode 100644 index 000000000..41e34d6d7 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/readwrite.yaml @@ -0,0 +1,16 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool, + using only reads, writes, and deletes +stress-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 500 + write_append_excl: false + op_weights: + read: 45 + write: 45 + delete: 10 +- print: "**** done rados/readwrite 5-workload" diff --git a/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/snaps-few-objects.yaml b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/snaps-few-objects.yaml new file mode 100644 index 000000000..f56d0de0f --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/3-stress-tasks/snaps-few-objects.yaml @@ -0,0 +1,18 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool with snapshot operations +stress-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + write_append_excl: false + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 +- print: "**** done rados/snaps-few-objects 5-workload" diff --git a/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/.qa b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/radosbench.yaml b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/radosbench.yaml new file mode 100644 index 000000000..7268cb170 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/radosbench.yaml @@ -0,0 +1,16 @@ +meta: +- desc: | + run randomized correctness test for rados operations + generate write load with rados bench +second-half-tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 +- print: "**** done end radosbench.yaml" diff --git a/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/rbd-import-export.yaml b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/rbd-import-export.yaml new file mode 100644 index 000000000..00cf88d54 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/4-second-half-tasks/rbd-import-export.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + run basic import/export cli tests for rbd +second-half-tasks: +- workunit: + branch: pacific + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format +- print: "**** done rbd/import_export.sh 5-workload" diff --git a/qa/suites/upgrade/pacific-x/stress-split/mon_election b/qa/suites/upgrade/pacific-x/stress-split/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/upgrade/pacific-x/stress-split/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/.qa b/qa/suites/upgrade/quincy-x/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/% b/qa/suites/upgrade/quincy-x/filestore-remove-check/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/% diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/+ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/+ diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/openstack.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/openstack.yaml new file mode 100644 index 000000000..5caffc353 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/openstack.yaml @@ -0,0 +1,6 @@ +openstack: + - machine: + disk: 100 # GB + - volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml new file mode 100644 index 000000000..b4b6f4d90 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/0-cluster/start.yaml @@ -0,0 +1,34 @@ +meta: +- desc: | + Run ceph on one nodes, + Use xfs beneath the osds. upgrade to reef + should fail to start the osds with filestore +overrides: + ceph: + mon_bind_msgr2: false + mon_bind_addrvec: false + mon-health-to-clog: false + wait-for-healthy: false + wait-for-osds-up: false + wait-for-scrub: false + skip_stop_pg_num_changes: true + fs: xfs + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) + - \(MGR_DOWN\) + - slow request + - \(MON_MSGR2_NOT_ENABLED\) + - \(POOL_APP_NOT_ENABLED\) + conf: + global: + enable experimental unrecoverable data corrupting features: "*" + mon warn on msgr2 not enabled: false + mon: + mon warn on osd down out interval zero: false +roles: +- - mon.a + - mgr.x + - osd.0 + - osd.1 + - osd.2 diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml new file mode 100644 index 000000000..471bd61df --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/1-ceph-install/quincy.yaml @@ -0,0 +1,32 @@ +meta: +- desc: install ceph/quincy latest +tasks: +- install: + exclude_packages: + - ceph-mgr-cephadm + - cephadm + - libcephfs-dev + branch: quincy +- print: "**** done install quincy" +- ceph: + create_rbd_pool: false + conf: + global: + bluestore_warn_on_legacy_statfs: false + bluestore warn on no per pool omap: false + mon pg warn min per osd: 0 + mon: + mon_warn_on_insecure_global_id_reclaim: false + mon_warn_on_insecure_global_id_reclaim_allowed: false + log-ignorelist: + - Not found or unloadable + - evicting unresponsive client +- exec: + osd.0: + - ceph osd require-osd-release quincy +- print: "**** done ceph" +overrides: + ceph: + conf: + mon: + mon warn on osd down out interval zero: false diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml new file mode 100644 index 000000000..6aa429f18 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/2 - upgrade.yaml @@ -0,0 +1,20 @@ +meta: +- desc: | + install upgrade ceph/-x on cluster + restart : mons, osd.* +tasks: +- install.upgrade: + mon.a: +- exec: + osd.0: + - ceph osd require-osd-release quincy +- print: "**** done install.upgrade of nodes" +- ceph.restart: + daemons: [mon.a,mgr.x,osd.0,osd.1,osd.2] + mon-health-to-clog: false + wait-for-healthy: false + wait-for-osds-up: false + wait-for-scrub: false + skip_stop_pg_num_changes: true + expected-failure: "FileStore has been deprecated and is no longer supported" +- print: "**** done ceph.restart of all mons and osds" diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/.qa b/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/filestore-xfs.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/filestore-xfs.yaml new file mode 100644 index 000000000..b6ef47b06 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/objectstore/filestore-xfs.yaml @@ -0,0 +1,14 @@ +overrides: + ceph: + fs: xfs + conf: + osd: + osd objectstore: filestore + osd sloppy crc: true + ceph-deploy: + fs: xfs + filestore: True + conf: + osd: + osd objectstore: filestore + osd sloppy crc: true
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/filestore-remove-check/ubuntu_20.04.yaml b/qa/suites/upgrade/quincy-x/filestore-remove-check/ubuntu_20.04.yaml new file mode 100644 index 000000000..e1374c410 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/filestore-remove-check/ubuntu_20.04.yaml @@ -0,0 +1,9 @@ +os_type: ubuntu +os_version: "20.04" +# the normal ubuntu 20.04 kernel (5.4.0-88-generic currently) have a bug that prevents the nvme_loop +# from behaving. I think it is this: +# https://lkml.org/lkml/2020/9/21/1456 +# (at least, that is the symptom: nvme nvme1: Connect command failed, error wo/DNR bit: 880) +overrides: + kernel: + hwe: true
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/parallel/% b/qa/suites/upgrade/quincy-x/parallel/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/% diff --git a/qa/suites/upgrade/quincy-x/parallel/.qa b/qa/suites/upgrade/quincy-x/parallel/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/parallel/0-random-distro$ b/qa/suites/upgrade/quincy-x/parallel/0-random-distro$ new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/0-random-distro$ @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/parallel/0-start.yaml b/qa/suites/upgrade/quincy-x/parallel/0-start.yaml new file mode 100644 index 000000000..3814ea3ef --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/0-start.yaml @@ -0,0 +1,33 @@ +roles: +- - mon.a + - mon.c + - mgr.y + - mds.a + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 + - node-exporter.a + - alertmanager.a +- - mon.b + - mds.b + - mgr.x + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - client.1 + - prometheus.a + - grafana.a + - node-exporter.b +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + create_rbd_pool: true + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml b/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml new file mode 100644 index 000000000..e57e31f2f --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/1-tasks.yaml @@ -0,0 +1,43 @@ +tasks: +- install: + branch: quincy + exclude_packages: + - ceph-volume +- print: "**** done install task..." +- print: "**** done start installing quincy cephadm ..." +- cephadm: + image: quay.ceph.io/ceph-ci/ceph:quincy + cephadm_branch: quincy + cephadm_git_url: https://github.com/ceph/ceph + conf: + osd: + #set config option for which cls modules are allowed to be loaded / used + osd_class_load_list: "*" + osd_class_default_list: "*" +- print: "**** done end installing quincy cephadm ..." + +- print: "**** done start cephadm.shell ceph config set mgr..." +- cephadm.shell: + mon.a: + - ceph config set mgr mgr/cephadm/use_repo_digest true --force +- print: "**** done cephadm.shell ceph config set mgr..." + +- print: "**** done start telemetry quincy..." +- workunit: + clients: + client.0: + - test_telemetry_quincy.sh +- print: "**** done end telemetry quincy..." + +- print: "**** done start parallel" +- parallel: + - workload + - upgrade-sequence +- print: "**** done end parallel" + +- print: "**** done start telemetry x..." +- workunit: + clients: + client.0: + - test_telemetry_quincy_x.sh +- print: "**** done end telemetry x..." diff --git a/qa/suites/upgrade/quincy-x/parallel/mon_election b/qa/suites/upgrade/quincy-x/parallel/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/parallel/upgrade-sequence.yaml b/qa/suites/upgrade/quincy-x/parallel/upgrade-sequence.yaml new file mode 100644 index 000000000..a3f0888da --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/upgrade-sequence.yaml @@ -0,0 +1,16 @@ +# renamed tasks: to upgrade-sequence: +upgrade-sequence: + sequential: + - print: "**** done start upgrade, wait" + - cephadm.shell: + env: [sha1] + mon.a: + - ceph config set global log_to_journald false --force + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 + - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done + - ceph orch ps + - ceph versions + - ceph versions | jq -e '.overall | length == 1' + - ceph versions | jq -e '.overall | keys' | grep $sha1 + - print: "**** done end upgrade, wait..." + diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/+ b/qa/suites/upgrade/quincy-x/parallel/workload/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/workload/+ diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/.qa b/qa/suites/upgrade/quincy-x/parallel/workload/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/workload/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/ec-rados-default.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/ec-rados-default.yaml new file mode 100644 index 000000000..67a0f39c5 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/workload/ec-rados-default.yaml @@ -0,0 +1,25 @@ +meta: +- desc: | + run run randomized correctness test for rados operations + on an erasure-coded pool +workload: + full_sequential: + - print: "**** done start ec-rados-default.yaml" + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + ec_pool: true + write_append_excl: false + op_weights: + read: 100 + write: 0 + append: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 + copy_from: 50 + setattr: 25 + rmattr: 25 + - print: "**** done end ec-rados-default.yaml" diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml new file mode 100644 index 000000000..9c2ff9da1 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/workload/rados_api.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + object class functional tests +workload: + full_sequential: + - print: "**** done start rados_api.yaml" + - workunit: + branch: quincy + clients: + client.0: + - cls + - print: "**** done end rados_api.yaml" diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/rados_loadgenbig.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/rados_loadgenbig.yaml new file mode 100644 index 000000000..25b1e1aaf --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/workload/rados_loadgenbig.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + generate read/write load with rados objects ranging from 1MB to 25MB +workload: + full_sequential: + - print: "**** done start rados_loadgenbig.yaml" + - workunit: + branch: quincy + clients: + client.0: + - rados/load-gen-big.sh + - print: "**** done end rados_loadgenbig.yaml" diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/rbd_import_export.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/rbd_import_export.yaml new file mode 100644 index 000000000..82b66048c --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/workload/rbd_import_export.yaml @@ -0,0 +1,14 @@ +meta: +- desc: | + run basic import/export cli tests for rbd +workload: + full_sequential: + - print: "**** done start rbd_import_export.yaml" + - workunit: + branch: quincy + clients: + client.1: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format + - print: "**** done end rbd_import_export.yaml" diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_api.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_api.yaml new file mode 100644 index 000000000..c871d4c8c --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_api.yaml @@ -0,0 +1,14 @@ +meta: +- desc: | + librbd C and C++ api tests +workload: + full_sequential: + - print: "**** done start test_rbd_api.yaml" + - workunit: + branch: quincy + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "61" + - print: "**** done end test_rbd_api.yaml" diff --git a/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_python.yaml b/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_python.yaml new file mode 100644 index 000000000..3ae98ed1e --- /dev/null +++ b/qa/suites/upgrade/quincy-x/parallel/workload/test_rbd_python.yaml @@ -0,0 +1,20 @@ +meta: +- desc: | + librbd python api tests +overrides: + install: + ceph: + extra_system_packages: + - python3-pytest +workload: + full_sequential: + - print: "**** done start test_rbd_python.yaml" + - workunit: + branch: quincy + clients: + client.0: + - rbd/test_librbd_python.sh + env: + RBD_FEATURES: "61" + - print: "**** done end test_rbd_python.yaml" + diff --git a/qa/suites/upgrade/quincy-x/stress-split/% b/qa/suites/upgrade/quincy-x/stress-split/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/% diff --git a/qa/suites/upgrade/quincy-x/stress-split/.qa b/qa/suites/upgrade/quincy-x/stress-split/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/stress-split/0-distro b/qa/suites/upgrade/quincy-x/stress-split/0-distro new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/0-distro @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/stress-split/0-roles.yaml b/qa/suites/upgrade/quincy-x/stress-split/0-roles.yaml new file mode 100644 index 000000000..ad3ee43d3 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/0-roles.yaml @@ -0,0 +1,31 @@ +roles: +- - mon.a + - mon.c + - mgr.y + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 + - node-exporter.a + - alertmanager.a +- - mon.b + - mgr.x + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - client.1 + - prometheus.a + - grafana.a + - node-exporter.b +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + create_rbd_pool: true + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml b/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml new file mode 100644 index 000000000..b9bca65fb --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/1-start.yaml @@ -0,0 +1,122 @@ +tasks: +- install: + branch: quincy + exclude_packages: + - ceph-volume + +- cephadm: + image: quay.ceph.io/ceph-ci/ceph:quincy + cephadm_branch: quincy + cephadm_git_url: https://github.com/ceph/ceph + conf: + osd: + #set config option for which cls modules are allowed to be loaded / used + osd_class_load_list: "*" + osd_class_default_list: "*" + +- cephadm.shell: + mon.a: + - ceph fs volume create foo + - ceph config set mon mon_warn_on_insecure_global_id_reclaim false --force + - ceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force + +- ceph.healthy: + +- print: "**** upgrading first half of cluster, with stress ****" +- parallel: + - first-half-tasks + - first-half-sequence +- print: "**** done upgrading first half of cluster ****" + +- ceph.healthy: + +- print: "**** applying stress + thrashing to mixed-version cluster ****" + +- parallel: + - stress-tasks + +- ceph.healthy: + +- print: "**** finishing upgrade ****" +- parallel: + - second-half-tasks + - second-half-sequence + +- ceph.healthy: + + +################# + +first-half-sequence: +- cephadm.shell: + env: [sha1] + mon.a: + - ceph config set mgr mgr/cephadm/daemon_cache_timeout 60 + - ceph config set global log_to_journald false --force + + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 + - ceph orch ps + + - echo wait for minority of mons to upgrade + - while ! ceph mon versions | grep $sha1 ; do sleep 2 ; done + - ceph orch ps + - ceph orch upgrade pause + - sleep 60 + - ceph orch upgrade resume + + - echo wait for majority of mons to upgrade + - "while ! ceph mon versions | grep $sha1 | egrep ': [23]' ; do sleep 2 ; done" + - ceph orch ps + - ceph orch upgrade pause + - sleep 60 + - ceph orch upgrade resume + + - echo wait for all mons to upgrade + - "while ! ceph mon versions | grep $sha1 | grep ': 3' ; do sleep 2 ; done" + - ceph orch ps + - ceph orch upgrade pause + - sleep 60 + - ceph orch upgrade resume + + - echo wait for half of osds to upgrade + - "while ! ceph osd versions | grep $sha1 | egrep ': [45678]'; do sleep 2 ; done" + - ceph orch upgrade pause + - ceph orch ps + + - ceph orch ps + - ceph versions + + +################# + +stress-tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + chance_thrash_cluster_full: 0 + chance_thrash_pg_upmap: 0 + chance_thrash_pg_upmap_items: 0 + disable_objectstore_tool_tests: true + chance_force_recovery: 0 + aggressive_pg_num_changes: false + + +################# + +second-half-sequence: + sequential: + - cephadm.shell: + env: [sha1] + mon.a: + - ceph orch upgrade resume + - sleep 60 + + - echo wait for upgrade to complete + - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done + + - echo upgrade complete + - ceph orch ps + - ceph versions + - ceph versions | jq -e '.overall | length == 1' + - ceph versions | jq -e '.overall | keys' | grep $sha1 diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/.qa b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/radosbench.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/radosbench.yaml new file mode 100644 index 000000000..3816ca38c --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/radosbench.yaml @@ -0,0 +1,19 @@ +meta: +- desc: | + run randomized correctness test for rados operations + generate write load with rados bench +first-half-tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 +- print: "**** done end radosbench.yaml" diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml new file mode 100644 index 000000000..b722f1873 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-cls.yaml @@ -0,0 +1,10 @@ +meta: +- desc: | + run basic cls tests for rbd +first-half-tasks: +- workunit: + branch: quincy + clients: + client.0: + - cls/test_cls_rbd.sh +- print: "**** done cls/test_cls_rbd.sh 5-workload" diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-import-export.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-import-export.yaml new file mode 100644 index 000000000..206389055 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd-import-export.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + run basic import/export cli tests for rbd +first-half-tasks: +- workunit: + branch: quincy + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format +- print: "**** done rbd/import_export.sh 5-workload" diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd_api.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd_api.yaml new file mode 100644 index 000000000..0fa5d0944 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/rbd_api.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + librbd C and C++ api tests +first-half-tasks: +- workunit: + branch: quincy + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "61" +- print: "**** done rbd/test_librbd.sh 7-workload" diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/readwrite.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/readwrite.yaml new file mode 100644 index 000000000..21a9f379a --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/readwrite.yaml @@ -0,0 +1,16 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool, + using only reads, writes, and deletes +first-half-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 500 + write_append_excl: false + op_weights: + read: 45 + write: 45 + delete: 10 +- print: "**** done rados/readwrite 5-workload" diff --git a/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml new file mode 100644 index 000000000..6447c2245 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/2-first-half-tasks/snaps-few-objects.yaml @@ -0,0 +1,18 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool with snapshot operations +first-half-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + write_append_excl: false + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 +- print: "**** done rados/snaps-few-objects 5-workload" diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/+ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/+ diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/.qa b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/radosbench.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/radosbench.yaml new file mode 100644 index 000000000..9058bd804 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/radosbench.yaml @@ -0,0 +1,25 @@ +meta: +- desc: | + run randomized correctness test for rados operations + generate write load with rados bench +stress-tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 +- print: "**** done end radosbench.yaml" diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml new file mode 100644 index 000000000..649b024a4 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-cls.yaml @@ -0,0 +1,10 @@ +meta: +- desc: | + run basic cls tests for rbd +stress-tasks: +- workunit: + branch: quincy + clients: + client.0: + - cls/test_cls_rbd.sh +- print: "**** done cls/test_cls_rbd.sh 5-workload" diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-import-export.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-import-export.yaml new file mode 100644 index 000000000..2267e4462 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd-import-export.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + run basic import/export cli tests for rbd +stress-tasks: +- workunit: + branch: quincy + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format +- print: "**** done rbd/import_export.sh 5-workload" diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd_api.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd_api.yaml new file mode 100644 index 000000000..cc4f29a08 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/rbd_api.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + librbd C and C++ api tests +stress-tasks: +- workunit: + branch: quincy + clients: + client.0: + - rbd/test_librbd.sh + env: + RBD_FEATURES: "61" +- print: "**** done rbd/test_librbd.sh 7-workload" diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/readwrite.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/readwrite.yaml new file mode 100644 index 000000000..41e34d6d7 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/readwrite.yaml @@ -0,0 +1,16 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool, + using only reads, writes, and deletes +stress-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 500 + write_append_excl: false + op_weights: + read: 45 + write: 45 + delete: 10 +- print: "**** done rados/readwrite 5-workload" diff --git a/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/snaps-few-objects.yaml b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/snaps-few-objects.yaml new file mode 100644 index 000000000..f56d0de0f --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/3-stress-tasks/snaps-few-objects.yaml @@ -0,0 +1,18 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool with snapshot operations +stress-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + write_append_excl: false + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 +- print: "**** done rados/snaps-few-objects 5-workload" diff --git a/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/.qa b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/radosbench.yaml b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/radosbench.yaml new file mode 100644 index 000000000..7268cb170 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/radosbench.yaml @@ -0,0 +1,16 @@ +meta: +- desc: | + run randomized correctness test for rados operations + generate write load with rados bench +second-half-tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 +- print: "**** done end radosbench.yaml" diff --git a/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/rbd-import-export.yaml b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/rbd-import-export.yaml new file mode 100644 index 000000000..1c509f755 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/4-second-half-tasks/rbd-import-export.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + run basic import/export cli tests for rbd +second-half-tasks: +- workunit: + branch: quincy + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format +- print: "**** done rbd/import_export.sh 5-workload" diff --git a/qa/suites/upgrade/quincy-x/stress-split/mon_election b/qa/suites/upgrade/quincy-x/stress-split/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/upgrade/quincy-x/stress-split/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/upgrade/telemetry-upgrade/pacific-x/% b/qa/suites/upgrade/telemetry-upgrade/pacific-x/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/% diff --git a/qa/suites/upgrade/telemetry-upgrade/pacific-x/.qa b/qa/suites/upgrade/telemetry-upgrade/pacific-x/.qa new file mode 120000 index 000000000..a23f7e045 --- /dev/null +++ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/.qa @@ -0,0 +1 @@ +../../.qa
\ No newline at end of file diff --git a/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-random-distro$ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-random-distro$ new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-random-distro$ @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-start.yaml b/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-start.yaml new file mode 100644 index 000000000..3814ea3ef --- /dev/null +++ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/0-start.yaml @@ -0,0 +1,33 @@ +roles: +- - mon.a + - mon.c + - mgr.y + - mds.a + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 + - node-exporter.a + - alertmanager.a +- - mon.b + - mds.b + - mgr.x + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - client.1 + - prometheus.a + - grafana.a + - node-exporter.b +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + create_rbd_pool: true + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/upgrade/telemetry-upgrade/pacific-x/1-tasks.yaml b/qa/suites/upgrade/telemetry-upgrade/pacific-x/1-tasks.yaml new file mode 100644 index 000000000..28b9eb29f --- /dev/null +++ b/qa/suites/upgrade/telemetry-upgrade/pacific-x/1-tasks.yaml @@ -0,0 +1,54 @@ +tasks: +- install: + branch: pacific + exclude_packages: + - ceph-volume +- print: "**** done install task..." +- print: "**** done start installing pacific cephadm ..." +- cephadm: + image: quay.io/ceph/daemon-base:latest-pacific + cephadm_branch: pacific + cephadm_git_url: https://github.com/ceph/ceph + conf: + osd: + #set config option for which cls modules are allowed to be loaded / used + osd_class_load_list: "*" + osd_class_default_list: "*" +- print: "**** done end installing pacific cephadm ..." + +- print: "**** done start cephadm.shell ceph config set mgr..." +- cephadm.shell: + mon.a: + - ceph config set mgr mgr/cephadm/use_repo_digest true --force +- print: "**** done cephadm.shell ceph config set mgr..." + + +- print: "**** done start telemetry pacific..." +- workunit: + clients: + client.0: + - test_telemetry_pacific.sh +- print: "**** done end telemetry pacific..." + +- print: "**** done start upgrade sequence..." +- sequential: + - print: "**** done start upgrade..." + - cephadm.shell: + env: [sha1] + mon.a: + - ceph config set global log_to_journald false --force + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 + - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done + - ceph orch ps + - ceph versions + - ceph versions | jq -e '.overall | length == 1' + - ceph versions | jq -e '.overall | keys' | grep $sha1 + - print: "**** done end upgrade..." + + - print: "**** done start telemetry x..." + - workunit: + clients: + client.0: + - test_telemetry_pacific_x.sh + - print: "**** done end telemetry x..." +- print: "**** done end upgrade sequence..." diff --git a/qa/suites/upgrade/telemetry-upgrade/quincy-x/% b/qa/suites/upgrade/telemetry-upgrade/quincy-x/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/% diff --git a/qa/suites/upgrade/telemetry-upgrade/quincy-x/.qa b/qa/suites/upgrade/telemetry-upgrade/quincy-x/.qa new file mode 120000 index 000000000..a23f7e045 --- /dev/null +++ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/.qa @@ -0,0 +1 @@ +../../.qa
\ No newline at end of file diff --git a/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-random-distro$ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-random-distro$ new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-random-distro$ @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-start.yaml b/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-start.yaml new file mode 100644 index 000000000..3814ea3ef --- /dev/null +++ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/0-start.yaml @@ -0,0 +1,33 @@ +roles: +- - mon.a + - mon.c + - mgr.y + - mds.a + - osd.0 + - osd.1 + - osd.2 + - osd.3 + - client.0 + - node-exporter.a + - alertmanager.a +- - mon.b + - mds.b + - mgr.x + - osd.4 + - osd.5 + - osd.6 + - osd.7 + - client.1 + - prometheus.a + - grafana.a + - node-exporter.b +openstack: +- volumes: # attached to each instance + count: 4 + size: 10 # GB +overrides: + ceph: + create_rbd_pool: true + conf: + osd: + osd shutdown pgref assert: true diff --git a/qa/suites/upgrade/telemetry-upgrade/quincy-x/1-tasks.yaml b/qa/suites/upgrade/telemetry-upgrade/quincy-x/1-tasks.yaml new file mode 100644 index 000000000..cd6609a6d --- /dev/null +++ b/qa/suites/upgrade/telemetry-upgrade/quincy-x/1-tasks.yaml @@ -0,0 +1,53 @@ +tasks: +- install: + branch: quincy + exclude_packages: + - ceph-volume +- print: "**** done install task..." +- print: "**** done start installing quincy cephadm ..." +- cephadm: + image: quay.io/ceph/daemon-base:latest-quincy + cephadm_branch: quincy + cephadm_git_url: https://github.com/ceph/ceph + conf: + osd: + #set config option for which cls modules are allowed to be loaded / used + osd_class_load_list: "*" + osd_class_default_list: "*" +- print: "**** done end installing quincy cephadm ..." + +- print: "**** done start cephadm.shell ceph config set mgr..." +- cephadm.shell: + mon.a: + - ceph config set mgr mgr/cephadm/use_repo_digest true --force +- print: "**** done cephadm.shell ceph config set mgr..." + +- print: "**** done start telemetry quincy..." +- workunit: + clients: + client.0: + - test_telemetry_quincy.sh +- print: "**** done end telemetry quincy..." + +- print: "**** done start upgrade sequence..." +- sequential: + - print: "**** done start upgrade..." + - cephadm.shell: + env: [sha1] + mon.a: + - ceph config set global log_to_journald false --force + - ceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 + - while ceph orch upgrade status | jq '.in_progress' | grep true ; do ceph orch ps ; ceph versions ; sleep 30 ; done + - ceph orch ps + - ceph versions + - ceph versions | jq -e '.overall | length == 1' + - ceph versions | jq -e '.overall | keys' | grep $sha1 + - print: "**** done end upgrade..." + + - print: "**** done start telemetry x..." + - workunit: + clients: + client.0: + - test_telemetry_quincy_x.sh + - print: "**** done end telemetry x..." +- print: "**** done end upgrade sequence..." diff --git a/qa/suites/windows/.qa b/qa/suites/windows/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/windows/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/windows/basic/% b/qa/suites/windows/basic/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/windows/basic/% diff --git a/qa/suites/windows/basic/.qa b/qa/suites/windows/basic/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/windows/basic/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/windows/basic/clusters/.qa b/qa/suites/windows/basic/clusters/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/windows/basic/clusters/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/windows/basic/clusters/fixed-1.yaml b/qa/suites/windows/basic/clusters/fixed-1.yaml new file mode 120000 index 000000000..02df5dd0c --- /dev/null +++ b/qa/suites/windows/basic/clusters/fixed-1.yaml @@ -0,0 +1 @@ +.qa/clusters/fixed-1.yaml
\ No newline at end of file diff --git a/qa/suites/windows/basic/install/.qa b/qa/suites/windows/basic/install/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/windows/basic/install/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/windows/basic/install/install.yaml b/qa/suites/windows/basic/install/install.yaml new file mode 100644 index 000000000..2030acb90 --- /dev/null +++ b/qa/suites/windows/basic/install/install.yaml @@ -0,0 +1,3 @@ +tasks: +- install: +- ceph: diff --git a/qa/suites/windows/basic/tasks/.qa b/qa/suites/windows/basic/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/windows/basic/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/windows/basic/tasks/windows_tests.yaml b/qa/suites/windows/basic/tasks/windows_tests.yaml new file mode 100644 index 000000000..42469bf3b --- /dev/null +++ b/qa/suites/windows/basic/tasks/windows_tests.yaml @@ -0,0 +1,6 @@ +tasks: +- workunit: + clients: + client.0: + - windows/libvirt_vm/setup.sh + - windows/run-tests.sh diff --git a/qa/suites/windows/basic/ubuntu_latest.yaml b/qa/suites/windows/basic/ubuntu_latest.yaml new file mode 120000 index 000000000..3a09f9abb --- /dev/null +++ b/qa/suites/windows/basic/ubuntu_latest.yaml @@ -0,0 +1 @@ +.qa/distros/supported/ubuntu_latest.yaml
\ No newline at end of file diff --git a/qa/tasks/__init__.py b/qa/tasks/__init__.py new file mode 100644 index 000000000..9a7949a00 --- /dev/null +++ b/qa/tasks/__init__.py @@ -0,0 +1,6 @@ +import logging + +# Inherit teuthology's log level +teuthology_log = logging.getLogger('teuthology') +log = logging.getLogger(__name__) +log.setLevel(teuthology_log.level) diff --git a/qa/tasks/admin_socket.py b/qa/tasks/admin_socket.py new file mode 100644 index 000000000..0d960d1a5 --- /dev/null +++ b/qa/tasks/admin_socket.py @@ -0,0 +1,204 @@ +""" +Admin Socket task -- used in rados, powercycle, and smoke testing +""" + +import json +import logging +import os +import time + +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra import run +from teuthology import misc as teuthology +from teuthology.parallel import parallel +from teuthology.config import config as teuth_config + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Run an admin socket command, make sure the output is json, and run + a test program on it. The test program should read json from + stdin. This task succeeds if the test program exits with status 0. + + To run the same test on all clients:: + + tasks: + - ceph: + - rados: + - admin_socket: + all: + dump_requests: + test: http://example.com/script + + To restrict it to certain clients:: + + tasks: + - ceph: + - rados: [client.1] + - admin_socket: + client.1: + dump_requests: + test: http://example.com/script + + If an admin socket command has arguments, they can be specified as + a list:: + + tasks: + - ceph: + - rados: [client.0] + - admin_socket: + client.0: + dump_requests: + test: http://example.com/script + help: + test: http://example.com/test_help_version + args: [version] + + Note that there must be a ceph client with an admin socket running + before this task is run. The tests are parallelized at the client + level. Tests for a single client are run serially. + + :param ctx: Context + :param config: Configuration + """ + assert isinstance(config, dict), \ + 'admin_socket task requires a dict for configuration' + teuthology.replace_all_with_clients(ctx.cluster, config) + + with parallel() as ptask: + for client, tests in config.items(): + ptask.spawn(_run_tests, ctx, client, tests) + + +def _socket_command(ctx, remote, socket_path, command, args): + """ + Run an admin socket command and return the result as a string. + + :param ctx: Context + :param remote: Remote site + :param socket_path: path to socket + :param command: command to be run remotely + :param args: command arguments + + :returns: output of command in json format + """ + testdir = teuthology.get_testdir(ctx) + max_tries = 120 + sub_commands = [c.strip() for c in command.split('||')] + ex = None + for _ in range(max_tries): + for sub_command in sub_commands: + try: + out = remote.sh([ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph', + '--admin-daemon', socket_path, + ] + sub_command.split(' ') + args) + except CommandFailedError as e: + ex = e + log.info('ceph cli "%s" returned an error %s, ' + 'command not registered yet?', sub_command, e) + else: + log.debug('admin socket command %s returned %s', + sub_command, out) + return json.loads(out) + else: + # exhausted all commands + log.info('sleeping and retrying ...') + time.sleep(1) + else: + # i tried max_tries times.. + assert ex is not None + raise ex + + +def _run_tests(ctx, client, tests): + """ + Create a temp directory and wait for a client socket to be created. + For each test, copy the executable locally and run the test. + Remove temp directory when finished. + + :param ctx: Context + :param client: client machine to run the test + :param tests: list of tests to run + """ + testdir = teuthology.get_testdir(ctx) + log.debug('Running admin socket tests on %s', client) + (remote,) = ctx.cluster.only(client).remotes.keys() + socket_path = '/var/run/ceph/ceph-{name}.asok'.format(name=client) + overrides = ctx.config.get('overrides', {}).get('admin_socket', {}) + + try: + tmp_dir = os.path.join( + testdir, + 'admin_socket_{client}'.format(client=client), + ) + remote.run( + args=[ + 'mkdir', + '--', + tmp_dir, + run.Raw('&&'), + # wait for client process to create the socket + 'while', 'test', '!', '-e', socket_path, run.Raw(';'), + 'do', 'sleep', '1', run.Raw(';'), 'done', + ], + ) + + for command, config in tests.items(): + if config is None: + config = {} + teuthology.deep_merge(config, overrides) + log.debug('Testing %s with config %s', command, str(config)) + + test_path = None + if 'test' in config: + # hack: the git_url is always ceph-ci or ceph + git_url = teuth_config.get_ceph_git_url() + repo_name = 'ceph.git' + if git_url.count('ceph-ci'): + repo_name = 'ceph-ci.git' + url = config['test'].format( + branch=config.get('branch', 'master'), + repo=repo_name, + ) + test_path = os.path.join(tmp_dir, command) + remote.run( + args=[ + 'wget', + '-q', + '-O', + test_path, + '--', + url, + run.Raw('&&'), + 'chmod', + 'u=rx', + '--', + test_path, + ], + ) + + args = config.get('args', []) + assert isinstance(args, list), \ + 'admin socket command args must be a list' + sock_out = _socket_command(ctx, remote, socket_path, command, args) + if test_path is not None: + remote.run( + args=[ + test_path, + ], + stdin=json.dumps(sock_out), + ) + + finally: + remote.run( + args=[ + 'rm', '-rf', '--', tmp_dir, + ], + ) diff --git a/qa/tasks/autotest.py b/qa/tasks/autotest.py new file mode 100644 index 000000000..80c3fc9d2 --- /dev/null +++ b/qa/tasks/autotest.py @@ -0,0 +1,165 @@ +""" +Run an autotest test on the ceph cluster. +""" +import json +import logging +import os + +from teuthology import misc as teuthology +from teuthology.parallel import parallel +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run an autotest test on the ceph cluster. + + Only autotest client tests are supported. + + The config is a mapping from role name to list of tests to run on + that client. + + For example:: + + tasks: + - ceph: + - ceph-fuse: [client.0, client.1] + - autotest: + client.0: [dbench] + client.1: [bonnie] + + You can also specify a list of tests to run on all clients:: + + tasks: + - ceph: + - ceph-fuse: + - autotest: + all: [dbench] + """ + assert isinstance(config, dict) + config = teuthology.replace_all_with_clients(ctx.cluster, config) + log.info('Setting up autotest...') + testdir = teuthology.get_testdir(ctx) + with parallel() as p: + for role in config.keys(): + (remote,) = ctx.cluster.only(role).remotes.keys() + p.spawn(_download, testdir, remote) + + log.info('Making a separate scratch dir for every client...') + for role in config.keys(): + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.keys() + mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) + scratch = os.path.join(mnt, 'client.{id}'.format(id=id_)) + remote.run( + args=[ + 'sudo', + 'install', + '-d', + '-m', '0755', + '--owner={user}'.format(user='ubuntu'), #TODO + '--', + scratch, + ], + ) + + with parallel() as p: + for role, tests in config.items(): + (remote,) = ctx.cluster.only(role).remotes.keys() + p.spawn(_run_tests, testdir, remote, role, tests) + +def _download(testdir, remote): + """ + Download. Does not explicitly support muliple tasks in a single run. + """ + remote.run( + args=[ + # explicitly does not support multiple autotest tasks + # in a single run; the result archival would conflict + 'mkdir', '{tdir}/archive/autotest'.format(tdir=testdir), + run.Raw('&&'), + 'mkdir', '{tdir}/autotest'.format(tdir=testdir), + run.Raw('&&'), + 'wget', + '-nv', + '--no-check-certificate', + 'https://github.com/ceph/autotest/tarball/ceph', + '-O-', + run.Raw('|'), + 'tar', + '-C', '{tdir}/autotest'.format(tdir=testdir), + '-x', + '-z', + '-f-', + '--strip-components=1', + ], + ) + +def _run_tests(testdir, remote, role, tests): + """ + Spawned to run test on remote site + """ + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + mnt = os.path.join(testdir, 'mnt.{id}'.format(id=id_)) + scratch = os.path.join(mnt, 'client.{id}'.format(id=id_)) + + assert isinstance(tests, list) + for idx, testname in enumerate(tests): + log.info('Running autotest client test #%d: %s...', idx, testname) + + tag = 'client.{id}.num{idx}.{testname}'.format( + idx=idx, + testname=testname, + id=id_, + ) + control = '{tdir}/control.{tag}'.format(tdir=testdir, tag=tag) + remote.write_file( + path=control, + data='import json; data=json.loads({data!r}); job.run_test(**data)'.format( + data=json.dumps(dict( + url=testname, + dir=scratch, + # TODO perhaps tag + # results will be in {testdir}/autotest/client/results/dbench + # or {testdir}/autotest/client/results/dbench.{tag} + )), + ), + ) + remote.run( + args=[ + '{tdir}/autotest/client/bin/autotest'.format(tdir=testdir), + '--verbose', + '--harness=simple', + '--tag={tag}'.format(tag=tag), + control, + run.Raw('3>&1'), + ], + ) + + remote.run( + args=[ + 'rm', '-rf', '--', control, + ], + ) + + remote.run( + args=[ + 'mv', + '--', + '{tdir}/autotest/client/results/{tag}'.format(tdir=testdir, tag=tag), + '{tdir}/archive/autotest/{tag}'.format(tdir=testdir, tag=tag), + ], + ) + + remote.run( + args=[ + 'rm', '-rf', '--', '{tdir}/autotest'.format(tdir=testdir), + ], + ) diff --git a/qa/tasks/aver.py b/qa/tasks/aver.py new file mode 100644 index 000000000..79ee18c5c --- /dev/null +++ b/qa/tasks/aver.py @@ -0,0 +1,67 @@ +""" +Aver wrapper task +""" +import contextlib +import logging +from subprocess import check_call, Popen, PIPE + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Execute an aver assertion + + Parameters: + + input: file containing data referred to by the assertions. File name is + relative to the job's archive path + validations: list of validations in the Aver language + + Example: + - aver: + input: bench_output.csv + validations: + - expect performance(alg='ceph') > performance(alg='raw') + - for size > 3 expect avg_throughput > 2000 + """ + log.info('Beginning aver...') + assert isinstance(config, dict), 'expecting dictionary for configuration' + + if 'input' not in config: + raise Exception("Expecting 'input' option") + if len(config.get('validations', [])) < 1: + raise Exception("Expecting at least one entry in 'validations'") + + url = ('https://github.com/ivotron/aver/releases/download/' + 'v0.3.0/aver-linux-amd64.tar.bz2') + + aver_path = ctx.archive + '/aver' + + # download binary + check_call(['wget', '-O', aver_path + '.tbz', url]) + check_call(['tar', 'xfj', aver_path + '.tbz', '-C', ctx.archive]) + + # print version + process = Popen([aver_path, '-v'], stdout=PIPE) + log.info(process.communicate()[0]) + + # validate + for validation in config['validations']: + cmd = (aver_path + ' -s -i ' + (ctx.archive + '/' + config['input']) + + ' "' + validation + '"') + log.info("executing: " + cmd) + process = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) + (stdout, stderr) = process.communicate() + if stderr: + log.info('aver stderr: ' + stderr) + log.info('aver result: ' + stdout) + if stdout.strip(' \t\n\r') != 'true': + raise Exception('Failed validation: ' + validation) + + try: + yield + finally: + log.info('Removing aver binary...') + check_call(['rm', aver_path, aver_path + '.tbz']) diff --git a/qa/tasks/backfill_toofull.py b/qa/tasks/backfill_toofull.py new file mode 100644 index 000000000..f4ff90a46 --- /dev/null +++ b/qa/tasks/backfill_toofull.py @@ -0,0 +1,193 @@ +""" +Backfill_toofull +""" +import logging +import time +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def wait_for_pg_state(manager, pgid, state, to_osd): + log.debug("waiting for pg %s state is %s" % (pgid, state)) + for i in range(300): + time.sleep(5) + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.info('pg=%s' % pg); + assert pg + status = pg['state'].split('+') + if 'active' not in status: + log.debug('not active') + continue + if state not in status: + log.debug('not %s' % state) + continue + assert to_osd in pg['up'] + return + assert False, '%s not in %s' % (pgid, state) + + +def task(ctx, config): + """ + Test backfill reservation calculates "toofull" condition correctly. + + A pretty rigid cluster is brought up and tested by this task + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'backfill_toofull task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + profile = config.get('erasure_code_profile', { + 'k': '2', + 'm': '1', + 'crush-failure-domain': 'osd' + }) + profile_name = profile.get('name', 'backfill_toofull') + manager.create_erasure_code_profile(profile_name, profile) + pool = manager.create_pool_with_unique_name( + pg_num=1, + erasure_code_profile_name=profile_name, + min_size=2) + manager.raw_cluster_cmd('osd', 'pool', 'set', pool, + 'pg_autoscale_mode', 'off') + + manager.flush_pg_stats([0, 1, 2, 3]) + manager.wait_for_clean() + + pool_id = manager.get_pool_num(pool) + pgid = '%d.0' % pool_id + pgs = manager.get_pg_stats() + acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None) + log.debug("acting=%s" % acting) + assert acting + primary = acting[0] + target = acting[1] + + log.debug("write some data") + rados(ctx, mon, ['-p', pool, 'bench', '120', 'write', '--no-cleanup']) + df = manager.get_osd_df(target) + log.debug("target osd df: %s" % df) + + total_kb = df['kb'] + used_kb = df['kb_used'] + + log.debug("pause recovery") + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'nobackfill') + manager.raw_cluster_cmd('osd', 'set', 'norecover') + + log.debug("stop tartget osd %s" % target) + manager.kill_osd(target) + manager.wait_till_active() + + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + + log.debug("re-write data") + rados(ctx, mon, ['-p', pool, 'cleanup']) + time.sleep(10) + rados(ctx, mon, ['-p', pool, 'bench', '60', 'write', '--no-cleanup']) + + df = manager.get_osd_df(primary) + log.debug("primary osd df: %s" % df) + + primary_used_kb = df['kb_used'] + + log.info("test backfill reservation rejected with toofull") + + # We set backfillfull ratio less than new data size and expect the pg + # entering backfill_toofull state. + # + # We also need to update nearfull ratio to prevent "full ratio(s) out of order". + + backfillfull = 0.9 * primary_used_kb / total_kb + nearfull = backfillfull * 0.9 + + log.debug("update nearfull ratio to %s and backfillfull ratio to %s" % + (nearfull, backfillfull)) + manager.raw_cluster_cmd('osd', 'set-nearfull-ratio', + '{:.3f}'.format(nearfull + 0.001)) + manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio', + '{:.3f}'.format(backfillfull + 0.001)) + + log.debug("start tartget osd %s" % target) + + manager.revive_osd(target) + manager.wait_for_active() + manager.wait_till_osd_is_up(target) + + wait_for_pg_state(manager, pgid, 'backfill_toofull', target) + + log.info("test pg not enter backfill_toofull after restarting backfill") + + # We want to set backfillfull ratio to be big enough for the target to + # successfully backfill new data but smaller than the sum of old and new + # data, so if the osd backfill reservation incorrectly calculates "toofull" + # the test will detect this (fail). + # + # Note, we need to operate with "uncompressed" bytes because currently + # osd backfill reservation does not take compression into account. + # + # We also need to update nearfull ratio to prevent "full ratio(s) out of order". + + pdf = manager.get_pool_df(pool) + log.debug("pool %s df: %s" % (pool, pdf)) + assert pdf + compress_ratio = 1.0 * pdf['compress_under_bytes'] / pdf['compress_bytes_used'] \ + if pdf['compress_bytes_used'] > 0 else 1.0 + log.debug("compress_ratio: %s" % compress_ratio) + + backfillfull = (used_kb + primary_used_kb) * compress_ratio / total_kb + assert backfillfull < 0.9 + nearfull_min = max(used_kb, primary_used_kb) * compress_ratio / total_kb + assert nearfull_min < backfillfull + delta = backfillfull - nearfull_min + nearfull = nearfull_min + delta * 0.1 + backfillfull = nearfull_min + delta * 0.2 + + log.debug("update nearfull ratio to %s and backfillfull ratio to %s" % + (nearfull, backfillfull)) + manager.raw_cluster_cmd('osd', 'set-nearfull-ratio', + '{:.3f}'.format(nearfull + 0.001)) + manager.raw_cluster_cmd('osd', 'set-backfillfull-ratio', + '{:.3f}'.format(backfillfull + 0.001)) + + wait_for_pg_state(manager, pgid, 'backfilling', target) + + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + + log.debug("interrupt %s backfill" % target) + manager.mark_down_osd(target) + # after marking the target osd down it will automatically be + # up soon again + + log.debug("resume recovery") + manager.raw_cluster_cmd('osd', 'unset', 'noout') + manager.raw_cluster_cmd('osd', 'unset', 'nobackfill') + manager.raw_cluster_cmd('osd', 'unset', 'norecover') + + # wait for everything to peer, backfill and recover + manager.wait_for_clean() + + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.info('pg=%s' % pg) + assert pg + assert 'clean' in pg['state'].split('+') diff --git a/qa/tasks/barbican.py b/qa/tasks/barbican.py new file mode 100644 index 000000000..771304fba --- /dev/null +++ b/qa/tasks/barbican.py @@ -0,0 +1,524 @@ +""" +Deploy and configure Barbican for Teuthology +""" +import argparse +import contextlib +import logging +import http +import json +import time +import math + +from urllib.parse import urlparse + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.exceptions import ConfigError + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the Barbican from github. + Remove downloaded file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading barbican...') + testdir = teuthology.get_testdir(ctx) + for (client, cconf) in config.items(): + branch = cconf.get('force-branch', 'master') + log.info("Using branch '%s' for barbican", branch) + + sha1 = cconf.get('sha1') + log.info('sha1=%s', sha1) + + ctx.cluster.only(client).run( + args=[ + 'bash', '-l' + ], + ) + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', branch, + 'https://github.com/openstack/barbican.git', + '{tdir}/barbican'.format(tdir=testdir), + ], + ) + if sha1 is not None: + ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/barbican'.format(tdir=testdir), + run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ], + ) + try: + yield + finally: + log.info('Removing barbican...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/barbican'.format(tdir=testdir), + ], + ) + +def get_barbican_dir(ctx): + return '{tdir}/barbican'.format(tdir=teuthology.get_testdir(ctx)) + +def run_in_barbican_dir(ctx, client, args): + ctx.cluster.only(client).run( + args=['cd', get_barbican_dir(ctx), run.Raw('&&'), ] + args, + ) + +def run_in_barbican_venv(ctx, client, args): + run_in_barbican_dir(ctx, client, + ['.', + '.barbicanenv/bin/activate', + run.Raw('&&') + ] + args) + +@contextlib.contextmanager +def setup_venv(ctx, config): + """ + Setup the virtualenv for Barbican using pip. + """ + assert isinstance(config, dict) + log.info('Setting up virtualenv for barbican...') + for (client, _) in config.items(): + run_in_barbican_dir(ctx, client, + ['python3', '-m', 'venv', '.barbicanenv']) + run_in_barbican_venv(ctx, client, + ['pip', 'install', '--upgrade', 'pip']) + run_in_barbican_venv(ctx, client, + ['pip', 'install', 'pytz', + '-e', get_barbican_dir(ctx)]) + yield + +def assign_ports(ctx, config, initial_port): + """ + Assign port numbers starting from @initial_port + """ + port = initial_port + role_endpoints = {} + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in roles_for_host: + if role in config: + role_endpoints[role] = (remote.name.split('@')[1], port) + port += 1 + + return role_endpoints + +def set_authtoken_params(ctx, cclient, cconfig): + section_config_list = cconfig['keystone_authtoken'].items() + for config in section_config_list: + (name, val) = config + run_in_barbican_dir(ctx, cclient, + ['sed', '-i', + '/[[]filter:authtoken]/{p;s##'+'{} = {}'.format(name, val)+'#;}', + 'etc/barbican/barbican-api-paste.ini']) + + keystone_role = cconfig.get('use-keystone-role', None) + public_host, public_port = ctx.keystone.public_endpoints[keystone_role] + url = 'http://{host}:{port}/v3'.format(host=public_host, + port=public_port) + run_in_barbican_dir(ctx, cclient, + ['sed', '-i', + '/[[]filter:authtoken]/{p;s##'+'auth_uri = {}'.format(url)+'#;}', + 'etc/barbican/barbican-api-paste.ini']) + admin_url = 'http://{host}:{port}/v3'.format(host=public_host, + port=public_port) + run_in_barbican_dir(ctx, cclient, + ['sed', '-i', + '/[[]filter:authtoken]/{p;s##'+'auth_url = {}'.format(admin_url)+'#;}', + 'etc/barbican/barbican-api-paste.ini']) + +def fix_barbican_api_paste(ctx, cclient): + run_in_barbican_dir(ctx, cclient, + ['sed', '-i', '-n', + '/\\[pipeline:barbican_api]/ {p;n; /^pipeline =/ '+ + '{ s/.*/pipeline = unauthenticated-context apiapp/;p;d } } ; p', + './etc/barbican/barbican-api-paste.ini']) + +def fix_barbican_api(ctx, cclient): + run_in_barbican_dir(ctx, cclient, + ['sed', '-i', + '/prop_dir =/ s#etc/barbican#{}/etc/barbican#'.format(get_barbican_dir(ctx)), + 'bin/barbican-api']) + +def create_barbican_conf(ctx, cclient): + barbican_host, barbican_port = ctx.barbican.endpoints[cclient] + barbican_url = 'http://{host}:{port}'.format(host=barbican_host, + port=barbican_port) + log.info("barbican url=%s", barbican_url) + + run_in_barbican_dir(ctx, cclient, + ['bash', '-c', + 'echo -n -e "[DEFAULT]\nhost_href=' + barbican_url + '\n" ' + \ + '>barbican.conf']) + + log.info("run barbican db upgrade") + config_path = get_barbican_dir(ctx) + '/barbican.conf' + run_in_barbican_venv(ctx, cclient, ['barbican-manage', '--config-file', config_path, + 'db', 'upgrade']) + log.info("run barbican db sync_secret_stores") + run_in_barbican_venv(ctx, cclient, ['barbican-manage', '--config-file', config_path, + 'db', 'sync_secret_stores']) + +@contextlib.contextmanager +def configure_barbican(ctx, config): + """ + Configure barbican paste-api and barbican-api. + """ + assert isinstance(config, dict) + (cclient, cconfig) = next(iter(config.items())) + + keystone_role = cconfig.get('use-keystone-role', None) + if keystone_role is None: + raise ConfigError('use-keystone-role not defined in barbican task') + + set_authtoken_params(ctx, cclient, cconfig) + fix_barbican_api(ctx, cclient) + fix_barbican_api_paste(ctx, cclient) + create_barbican_conf(ctx, cclient) + try: + yield + finally: + pass + +@contextlib.contextmanager +def run_barbican(ctx, config): + assert isinstance(config, dict) + log.info('Running barbican...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + cluster_name, _, client_id = teuthology.split_role(client) + + # start the public endpoint + client_public_with_id = 'barbican.public' + '.' + client_id + + run_cmd = ['cd', get_barbican_dir(ctx), run.Raw('&&'), + '.', '.barbicanenv/bin/activate', run.Raw('&&'), + 'HOME={}'.format(get_barbican_dir(ctx)), run.Raw('&&'), + 'bin/barbican-api', + run.Raw('& { read; kill %1; }')] + #run.Raw('1>/dev/null') + + run_cmd = 'cd ' + get_barbican_dir(ctx) + ' && ' + \ + '. .barbicanenv/bin/activate && ' + \ + 'HOME={}'.format(get_barbican_dir(ctx)) + ' && ' + \ + 'exec bin/barbican-api & { read; kill %1; }' + + ctx.daemons.add_daemon( + remote, 'barbican', client_public_with_id, + cluster=cluster_name, + args=['bash', '-c', run_cmd], + logger=log.getChild(client), + stdin=run.PIPE, + cwd=get_barbican_dir(ctx), + wait=False, + check_status=False, + ) + + # sleep driven synchronization + run_in_barbican_venv(ctx, client, ['sleep', '15']) + try: + yield + finally: + log.info('Stopping Barbican instance') + ctx.daemons.get_daemon('barbican', client_public_with_id, + cluster_name).stop() + + +@contextlib.contextmanager +def create_secrets(ctx, config): + """ + Create a main and an alternate s3 user. + """ + assert isinstance(config, dict) + (cclient, cconfig) = next(iter(config.items())) + + rgw_user = cconfig['rgw_user'] + + keystone_role = cconfig.get('use-keystone-role', None) + keystone_host, keystone_port = ctx.keystone.public_endpoints[keystone_role] + barbican_host, barbican_port = ctx.barbican.endpoints[cclient] + barbican_url = 'http://{host}:{port}'.format(host=barbican_host, + port=barbican_port) + log.info("barbican_url=%s", barbican_url) + #fetching user_id of user that gets secrets for radosgw + token_req = http.client.HTTPConnection(keystone_host, keystone_port, timeout=30) + token_req.request( + 'POST', + '/v3/auth/tokens', + headers={'Content-Type':'application/json'}, + body=json.dumps({ + "auth": { + "identity": { + "methods": ["password"], + "password": { + "user": { + "domain": {"id": "default"}, + "name": rgw_user["username"], + "password": rgw_user["password"] + } + } + }, + "scope": { + "project": { + "domain": {"id": "default"}, + "name": rgw_user["tenantName"] + } + } + } + })) + rgw_access_user_resp = token_req.getresponse() + if not (rgw_access_user_resp.status >= 200 and + rgw_access_user_resp.status < 300): + raise Exception("Cannot authenticate user "+rgw_user["username"]+" for secret creation") + # baru_resp = json.loads(baru_req.data) + rgw_access_user_data = json.loads(rgw_access_user_resp.read().decode()) + rgw_user_id = rgw_access_user_data['token']['user']['id'] + if 'secrets' in cconfig: + for secret in cconfig['secrets']: + if 'name' not in secret: + raise ConfigError('barbican.secrets must have "name" field') + if 'base64' not in secret: + raise ConfigError('barbican.secrets must have "base64" field') + if 'tenantName' not in secret: + raise ConfigError('barbican.secrets must have "tenantName" field') + if 'username' not in secret: + raise ConfigError('barbican.secrets must have "username" field') + if 'password' not in secret: + raise ConfigError('barbican.secrets must have "password" field') + + token_req = http.client.HTTPConnection(keystone_host, keystone_port, timeout=30) + token_req.request( + 'POST', + '/v3/auth/tokens', + headers={'Content-Type':'application/json'}, + body=json.dumps({ + "auth": { + "identity": { + "methods": ["password"], + "password": { + "user": { + "domain": {"id": "default"}, + "name": secret["username"], + "password": secret["password"] + } + } + }, + "scope": { + "project": { + "domain": {"id": "default"}, + "name": secret["tenantName"] + } + } + } + })) + token_resp = token_req.getresponse() + if not (token_resp.status >= 200 and + token_resp.status < 300): + raise Exception("Cannot authenticate user "+secret["username"]+" for secret creation") + + expire = time.time() + 5400 # now + 90m + (expire_fract,dummy) = math.modf(expire) + expire_format = "%%FT%%T.%06d" % (round(expire_fract*1000000)) + expiration = time.strftime(expire_format, time.gmtime(expire)) + token_id = token_resp.getheader('x-subject-token') + + key1_json = json.dumps( + { + "name": secret['name'], + "expiration": expiration, + "algorithm": "aes", + "bit_length": 256, + "mode": "cbc", + "payload": secret['base64'], + "payload_content_type": "application/octet-stream", + "payload_content_encoding": "base64" + }) + + sec_req = http.client.HTTPConnection(barbican_host, barbican_port, timeout=30) + try: + sec_req.request( + 'POST', + '/v1/secrets', + headers={'Content-Type': 'application/json', + 'Accept': '*/*', + 'X-Auth-Token': token_id}, + body=key1_json + ) + except: + log.info("catched exception!") + run_in_barbican_venv(ctx, cclient, ['sleep', '900']) + + barbican_sec_resp = sec_req.getresponse() + if not (barbican_sec_resp.status >= 200 and + barbican_sec_resp.status < 300): + raise Exception("Cannot create secret") + barbican_data = json.loads(barbican_sec_resp.read().decode()) + if 'secret_ref' not in barbican_data: + raise ValueError("Malformed secret creation response") + secret_ref = barbican_data["secret_ref"] + log.info("secret_ref=%s", secret_ref) + secret_url_parsed = urlparse(secret_ref) + acl_json = json.dumps( + { + "read": { + "users": [rgw_user_id], + "project-access": True + } + }) + acl_req = http.client.HTTPConnection(secret_url_parsed.netloc, timeout=30) + acl_req.request( + 'PUT', + secret_url_parsed.path+'/acl', + headers={'Content-Type': 'application/json', + 'Accept': '*/*', + 'X-Auth-Token': token_id}, + body=acl_json + ) + barbican_acl_resp = acl_req.getresponse() + if not (barbican_acl_resp.status >= 200 and + barbican_acl_resp.status < 300): + raise Exception("Cannot set ACL for secret") + + key = {'id': secret_ref.split('secrets/')[1], 'payload': secret['base64']} + ctx.barbican.keys[secret['name']] = key + + run_in_barbican_venv(ctx, cclient, ['sleep', '3']) + try: + yield + finally: + pass + + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy and configure Keystone + + Example of configuration: + + tasks: + - local_cluster: + cluster_path: /home/adam/ceph-1/build + - local_rgw: + - tox: [ client.0 ] + - keystone: + client.0: + sha1: 17.0.0.0rc2 + force-branch: master + projects: + - name: rgwcrypt + description: Encryption Tenant + - name: barbican + description: Barbican + - name: s3 + description: S3 project + users: + - name: rgwcrypt-user + password: rgwcrypt-pass + project: rgwcrypt + - name: barbican-user + password: barbican-pass + project: barbican + - name: s3-user + password: s3-pass + project: s3 + roles: [ name: Member, name: creator ] + role-mappings: + - name: Member + user: rgwcrypt-user + project: rgwcrypt + - name: admin + user: barbican-user + project: barbican + - name: creator + user: s3-user + project: s3 + services: + - name: keystone + type: identity + description: Keystone Identity Service + - barbican: + client.0: + force-branch: master + use-keystone-role: client.0 + keystone_authtoken: + auth_plugin: password + username: barbican-user + password: barbican-pass + user_domain_name: Default + rgw_user: + tenantName: rgwcrypt + username: rgwcrypt-user + password: rgwcrypt-pass + secrets: + - name: my-key-1 + base64: a2V5MS5GcWVxKzhzTGNLaGtzQkg5NGVpb1FKcFpGb2c= + tenantName: s3 + username: s3-user + password: s3-pass + - name: my-key-2 + base64: a2V5Mi5yNUNNMGFzMVdIUVZxcCt5NGVmVGlQQ1k4YWg= + tenantName: s3 + username: s3-user + password: s3-pass + - s3tests: + client.0: + force-branch: master + kms_key: my-key-1 + - rgw: + client.0: + use-keystone-role: client.0 + use-barbican-role: client.0 + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task keystone only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('barbican', {})) + + log.debug('Barbican config is %s', config) + + if not hasattr(ctx, 'keystone'): + raise ConfigError('barbican must run after the keystone task') + + + ctx.barbican = argparse.Namespace() + ctx.barbican.endpoints = assign_ports(ctx, config, 9311) + ctx.barbican.keys = {} + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: setup_venv(ctx=ctx, config=config), + lambda: configure_barbican(ctx=ctx, config=config), + lambda: run_barbican(ctx=ctx, config=config), + lambda: create_secrets(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/blktrace.py b/qa/tasks/blktrace.py new file mode 100644 index 000000000..10b1da0c0 --- /dev/null +++ b/qa/tasks/blktrace.py @@ -0,0 +1,96 @@ +""" +Run blktrace program through teuthology +""" +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + +log = logging.getLogger(__name__) +blktrace = '/usr/sbin/blktrace' +daemon_signal = 'term' + +@contextlib.contextmanager +def setup(ctx, config): + """ + Setup all the remotes + """ + osds = ctx.cluster.only(teuthology.is_type('osd', config['cluster'])) + log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=teuthology.get_testdir(ctx)) + + for remote, roles_for_host in osds.remotes.items(): + log.info('Creating %s on %s' % (log_dir, remote.name)) + remote.run( + args=['mkdir', '-p', '-m0755', '--', log_dir], + wait=False, + ) + yield + +@contextlib.contextmanager +def execute(ctx, config): + """ + Run the blktrace program on remote machines. + """ + procs = [] + testdir = teuthology.get_testdir(ctx) + log_dir = '{tdir}/archive/performance/blktrace'.format(tdir=testdir) + + osds = ctx.cluster.only(teuthology.is_type('osd')) + for remote, roles_for_host in osds.remotes.items(): + roles_to_devs = ctx.disk_config.remote_to_roles_to_dev[remote] + for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', + config['cluster']): + if roles_to_devs.get(role): + dev = roles_to_devs[role] + log.info("running blktrace on %s: %s" % (remote.name, dev)) + + proc = remote.run( + args=[ + 'cd', + log_dir, + run.Raw(';'), + 'daemon-helper', + daemon_signal, + 'sudo', + blktrace, + '-o', + dev.rsplit("/", 1)[1], + '-d', + dev, + ], + wait=False, + stdin=run.PIPE, + ) + procs.append(proc) + try: + yield + finally: + osds = ctx.cluster.only(teuthology.is_type('osd')) + log.info('stopping blktrace processs') + for proc in procs: + proc.stdin.close() + +@contextlib.contextmanager +def task(ctx, config): + """ + Usage: + blktrace: + + or: + blktrace: + cluster: backup + + Runs blktrace on all osds in the specified cluster (the 'ceph' cluster by + default). + """ + if config is None: + config = {} + config['cluster'] = config.get('cluster', 'ceph') + + with contextutil.nested( + lambda: setup(ctx=ctx, config=config), + lambda: execute(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/boto.cfg.template b/qa/tasks/boto.cfg.template new file mode 100644 index 000000000..cdfe8873b --- /dev/null +++ b/qa/tasks/boto.cfg.template @@ -0,0 +1,2 @@ +[Boto] +http_socket_timeout = {idle_timeout} diff --git a/qa/tasks/cbt.py b/qa/tasks/cbt.py new file mode 100644 index 000000000..56c57138b --- /dev/null +++ b/qa/tasks/cbt.py @@ -0,0 +1,293 @@ +import logging +import os +import yaml + +from teuthology import misc +from teuthology.orchestra import run +from teuthology.task import Task + +log = logging.getLogger(__name__) + + +class CBT(Task): + """ + Passes through a CBT configuration yaml fragment. + """ + def __init__(self, ctx, config): + super(CBT, self).__init__(ctx, config) + self.log = log + + def hosts_of_type(self, type_): + return [r.name for r in self.ctx.cluster.only(misc.is_type(type_)).remotes.keys()] + + def generate_cbt_config(self): + mon_hosts = self.hosts_of_type('mon') + osd_hosts = self.hosts_of_type('osd') + client_hosts = self.hosts_of_type('client') + rgw_client = {} + rgw_client[client_hosts[0]] = None + rgw_hosts = self.config.get('cluster', {}).get('rgws', rgw_client) + cluster_config = dict( + user=self.config.get('cluster', {}).get('user', 'ubuntu'), + head=mon_hosts[0], + osds=osd_hosts, + mons=mon_hosts, + clients=client_hosts, + rgws=rgw_hosts, + osds_per_node=self.config.get('cluster', {}).get('osds_per_node', 1), + rebuild_every_test=False, + use_existing=True, + is_teuthology=self.config.get('cluster', {}).get('is_teuthology', True), + iterations=self.config.get('cluster', {}).get('iterations', 1), + tmp_dir='/tmp/cbt', + pool_profiles=self.config.get('cluster', {}).get('pool_profiles'), + ) + + benchmark_config = self.config.get('benchmarks') + benchmark_type = next(iter(benchmark_config.keys())) + if benchmark_type in ['librbdfio', 'fio']: + testdir = misc.get_testdir(self.ctx) + benchmark_config[benchmark_type]['cmd_path'] = os.path.join(testdir, 'fio/fio') + if benchmark_type == 'cosbench': + # create cosbench_dir and cosbench_xml_dir + testdir = misc.get_testdir(self.ctx) + benchmark_config['cosbench']['cosbench_dir'] = os.path.join(testdir, 'cos') + benchmark_config['cosbench']['cosbench_xml_dir'] = os.path.join(testdir, 'xml') + self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', benchmark_config['cosbench']['cosbench_xml_dir']]) + benchmark_config['cosbench']['controller'] = osd_hosts[0] + + # set auth details + remotes_and_roles = self.ctx.cluster.remotes.items() + ips = [host for (host, port) in + (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] + benchmark_config['cosbench']['auth'] = "username=cosbench:operator;password=intel2012;url=http://%s:80/auth/v1.0;retry=9" %(ips[0]) + client_endpoints_config = self.config.get('client_endpoints', None) + + return dict( + cluster=cluster_config, + benchmarks=benchmark_config, + client_endpoints = client_endpoints_config, + ) + + def install_dependencies(self): + system_type = misc.get_system_type(self.first_mon) + + if system_type == 'rpm': + install_cmd = ['sudo', 'yum', '-y', 'install'] + cbt_depends = ['python3-yaml', 'python3-lxml', 'librbd-devel', 'pdsh', 'collectl'] + else: + install_cmd = ['sudo', 'apt-get', '-y', '--force-yes', 'install'] + cbt_depends = ['python3-yaml', 'python3-lxml', 'librbd-dev', 'collectl'] + self.first_mon.run(args=install_cmd + cbt_depends) + + benchmark_type = next(iter(self.cbt_config.get('benchmarks').keys())) + self.log.info('benchmark: %s', benchmark_type) + + if benchmark_type in ['librbdfio', 'fio']: + # install fio + testdir = misc.get_testdir(self.ctx) + self.first_mon.run( + args=[ + 'git', 'clone', '-b', 'master', + 'https://github.com/axboe/fio.git', + '{tdir}/fio'.format(tdir=testdir) + ] + ) + self.first_mon.run( + args=[ + 'cd', os.path.join(testdir, 'fio'), run.Raw('&&'), + './configure', run.Raw('&&'), + 'make' + ] + ) + + if benchmark_type == 'cosbench': + # install cosbench + self.log.info('install dependencies for cosbench') + if system_type == 'rpm': + cosbench_depends = ['wget', 'unzip', 'java-1.7.0-openjdk', 'curl'] + else: + cosbench_depends = ['wget', 'unzip', 'openjdk-8-jre', 'curl'] + self.first_mon.run(args=install_cmd + cosbench_depends) + testdir = misc.get_testdir(self.ctx) + cosbench_version = '0.4.2.c3' + cosbench_location = 'https://github.com/intel-cloud/cosbench/releases/download/v0.4.2.c3/0.4.2.c3.zip' + os_version = misc.get_system_type(self.first_mon, False, True) + + # additional requirements for bionic + if os_version == '18.04': + self.first_mon.run( + args=['sudo', 'apt-get', '-y', 'purge', 'openjdk-11*']) + # use our own version of cosbench + cosbench_version = 'cosbench-0.4.2.c3.1' + # contains additional parameter "-N" to nc + cosbench_location = 'http://drop.ceph.com/qa/cosbench-0.4.2.c3.1.zip' + cosbench_dir = os.path.join(testdir, cosbench_version) + self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', cosbench_dir]) + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'wget', + cosbench_location, run.Raw('&&'), + 'unzip', '{name}.zip'.format(name=cosbench_version), '-d', cosbench_version + ] + ) + else: + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'wget', + cosbench_location, run.Raw('&&'), + 'unzip', '{name}.zip'.format(name=cosbench_version) + ] + ) + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'ln', '-s', cosbench_version, 'cos', + ] + ) + self.first_mon.run( + args=[ + 'cd', os.path.join(testdir, 'cos'), run.Raw('&&'), + 'chmod', '+x', run.Raw('*.sh'), + ] + ) + + # start cosbench and check info + self.log.info('start cosbench') + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'cd', 'cos', run.Raw('&&'), + 'sh', 'start-all.sh' + ] + ) + self.log.info('check cosbench info') + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'cd', 'cos', run.Raw('&&'), + 'sh', 'cli.sh', 'info' + ] + ) + + def checkout_cbt(self): + testdir = misc.get_testdir(self.ctx) + repo = self.config.get('repo', 'https://github.com/ceph/cbt.git') + branch = self.config.get('branch', 'master') + branch = self.config.get('force-branch', branch) + sha1 = self.config.get('sha1') + if sha1 is None: + self.first_mon.run( + args=[ + 'git', 'clone', '--depth', '1', '-b', branch, repo, + '{tdir}/cbt'.format(tdir=testdir) + ] + ) + else: + self.first_mon.run( + args=[ + 'git', 'clone', '-b', branch, repo, + '{tdir}/cbt'.format(tdir=testdir) + ] + ) + self.first_mon.run( + args=[ + 'cd', os.path.join(testdir, 'cbt'), run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ] + ) + + def setup(self): + super(CBT, self).setup() + self.first_mon = next(iter(self.ctx.cluster.only(misc.get_first_mon(self.ctx, self.config)).remotes.keys())) + self.cbt_config = self.generate_cbt_config() + self.log.info('cbt configuration is %s', self.cbt_config) + self.cbt_dir = os.path.join(misc.get_archive_dir(self.ctx), 'cbt') + self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', self.cbt_dir]) + self.first_mon.write_file( + os.path.join(self.cbt_dir, 'cbt_config.yaml'), + yaml.safe_dump(self.cbt_config, default_flow_style=False)) + self.checkout_cbt() + self.install_dependencies() + + def begin(self): + super(CBT, self).begin() + testdir = misc.get_testdir(self.ctx) + self.first_mon.run( + args=[ + '{tdir}/cbt/cbt.py'.format(tdir=testdir), + '-a', self.cbt_dir, + '{cbtdir}/cbt_config.yaml'.format(cbtdir=self.cbt_dir), + ], + ) + preserve_file = os.path.join(self.ctx.archive, '.preserve') + open(preserve_file, 'a').close() + + def end(self): + super(CBT, self).end() + testdir = misc.get_testdir(self.ctx) + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/cbt'.format(tdir=testdir), + ] + ) + benchmark_type = next(iter(self.cbt_config.get('benchmarks').keys())) + if benchmark_type in ['librbdfio', 'fio']: + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/fio'.format(tdir=testdir), + ] + ) + + if benchmark_type == 'cosbench': + os_version = misc.get_system_type(self.first_mon, False, True) + if os_version == '18.04': + cosbench_version = 'cosbench-0.4.2.c3.1' + else: + cosbench_version = '0.4.2.c3' + # note: stop-all requires 'nc' + self.first_mon.run( + args=[ + 'cd', testdir, run.Raw('&&'), + 'cd', 'cos', run.Raw('&&'), + 'sh', 'stop-all.sh', + run.Raw('||'), 'true' + ] + ) + self.first_mon.run( + args=[ + 'sudo', 'killall', '-9', 'java', + run.Raw('||'), 'true' + ] + ) + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/cos'.format(tdir=testdir), + ] + ) + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/{version}'.format(tdir=testdir, version=cosbench_version), + ] + ) + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/{version}.zip'.format(tdir=testdir, version=cosbench_version), + ] + ) + self.first_mon.run( + args=[ + 'rm', '--one-file-system', '-rf', '--', + '{tdir}/xml'.format(tdir=testdir), + ] + ) + + +task = CBT diff --git a/qa/tasks/ceph.conf.template b/qa/tasks/ceph.conf.template new file mode 100644 index 000000000..a9cce2953 --- /dev/null +++ b/qa/tasks/ceph.conf.template @@ -0,0 +1,113 @@ +[global] + chdir = "" + pid file = /var/run/ceph/$cluster-$name.pid + auth supported = cephx + + filestore xattr use omap = true + + mon clock drift allowed = 1.000 + + osd crush chooseleaf type = 0 + auth debug = true + + ms die on old message = true + ms die on bug = true + + mon max pg per osd = 10000 # >= luminous + mon pg warn max object skew = 0 + + # disable pg_autoscaler by default for new pools + osd_pool_default_pg_autoscale_mode = off + + osd pool default size = 2 + + mon osd allow primary affinity = true + mon osd allow pg remap = true + mon warn on legacy crush tunables = false + mon warn on crush straw calc version zero = false + mon warn on no sortbitwise = false + mon warn on osd down out interval zero = false + mon warn on too few osds = false + mon_warn_on_pool_pg_num_not_power_of_two = false + mon_warn_on_pool_no_redundancy = false + mon_allow_pool_size_one = true + + osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd" + + osd default data pool replay window = 5 + + mon allow pool delete = true + + mon cluster log file level = debug + debug asserts on shutdown = true + mon health detail to clog = false + +[osd] + osd journal size = 100 + + osd scrub load threshold = 5.0 + osd scrub max interval = 600 + osd mclock profile = high_recovery_ops + + osd recover clone overlap = true + osd recovery max chunk = 1048576 + + osd debug shutdown = true + osd debug op order = true + osd debug verify stray on activate = true + + osd open classes on start = true + osd debug pg log writeout = true + + osd deep scrub update digest min age = 30 + + osd map max advance = 10 + + journal zero on create = true + + filestore ondisk finisher threads = 3 + filestore apply finisher threads = 3 + + bdev debug aio = true + osd debug misdirected ops = true + +[mgr] + debug ms = 1 + debug mgr = 20 + debug mon = 20 + debug auth = 20 + mon reweight min pgs per osd = 4 + mon reweight min bytes per osd = 10 + mgr/telemetry/nag = false + +[mon] + debug ms = 1 + debug mon = 20 + debug paxos = 20 + debug auth = 20 + mon data avail warn = 5 + mon mgr mkfs grace = 240 + mon reweight min pgs per osd = 4 + mon osd reporter subtree level = osd + mon osd prime pg temp = true + mon reweight min bytes per osd = 10 + + # rotate auth tickets quickly to exercise renewal paths + auth mon ticket ttl = 660 # 11m + auth service ticket ttl = 240 # 4m + + # don't complain about insecure global_id in the test suite + mon_warn_on_insecure_global_id_reclaim = false + mon_warn_on_insecure_global_id_reclaim_allowed = false + + # 1m isn't quite enough + mon_down_mkfs_grace = 2m + + mon_warn_on_filestore_osds = false + +[client] + rgw cache enabled = true + rgw enable ops log = true + rgw enable usage log = true + log file = /var/log/ceph/$cluster-$name.$pid.log + admin socket = /var/run/ceph/$cluster-$name.$pid.asok diff --git a/qa/tasks/ceph.py b/qa/tasks/ceph.py new file mode 100644 index 000000000..105362d48 --- /dev/null +++ b/qa/tasks/ceph.py @@ -0,0 +1,1960 @@ +""" +Ceph cluster task. + +Handle the setup, starting, and clean-up of a Ceph cluster. +""" +from copy import deepcopy +from io import BytesIO +from io import StringIO + +import argparse +import configobj +import contextlib +import errno +import logging +import os +import json +import time +import gevent +import re +import socket +import yaml + +from paramiko import SSHException +from tasks.ceph_manager import CephManager, write_conf, get_valgrind_args +from tarfile import ReadError +from tasks.cephfs.filesystem import MDSCluster, Filesystem +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology import exceptions +from teuthology.orchestra import run +from tasks import ceph_client as cclient +from teuthology.orchestra.daemon import DaemonGroup +from tasks.daemonwatchdog import DaemonWatchdog + +CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw'] +DATA_PATH = '/var/lib/ceph/{type_}/{cluster}-{id_}' + +log = logging.getLogger(__name__) + + +def generate_caps(type_): + """ + Each call will return the next capability for each system type + (essentially a subset of possible role values). Valid types are osd, + mds and client. + """ + defaults = dict( + osd=dict( + mon='allow profile osd', + mgr='allow profile osd', + osd='allow *', + ), + mgr=dict( + mon='allow profile mgr', + osd='allow *', + mds='allow *', + ), + mds=dict( + mon='allow *', + mgr='allow *', + osd='allow *', + mds='allow', + ), + client=dict( + mon='allow rw', + mgr='allow r', + osd='allow rwx', + mds='allow', + ), + ) + for subsystem, capability in defaults[type_].items(): + yield '--cap' + yield subsystem + yield capability + + +def update_archive_setting(ctx, key, value): + """ + Add logs directory to job's info log file + """ + if ctx.archive is None: + return + with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file: + info_yaml = yaml.safe_load(info_file) + info_file.seek(0) + if 'archive' in info_yaml: + info_yaml['archive'][key] = value + else: + info_yaml['archive'] = {key: value} + yaml.safe_dump(info_yaml, info_file, default_flow_style=False) + + +@contextlib.contextmanager +def ceph_crash(ctx, config): + """ + Gather crash dumps from /var/lib/ceph/crash + """ + + # Add crash directory to job's archive + update_archive_setting(ctx, 'crash', '/var/lib/ceph/crash') + + try: + yield + + finally: + if ctx.archive is not None: + log.info('Archiving crash dumps...') + path = os.path.join(ctx.archive, 'remote') + try: + os.makedirs(path) + except OSError: + pass + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.shortname) + try: + os.makedirs(sub) + except OSError: + pass + try: + teuthology.pull_directory(remote, '/var/lib/ceph/crash', + os.path.join(sub, 'crash')) + except ReadError: + pass + + +@contextlib.contextmanager +def ceph_log(ctx, config): + """ + Create /var/log/ceph log directory that is open to everyone. + Add valgrind and profiling-logger directories. + + :param ctx: Context + :param config: Configuration + """ + log.info('Making ceph log dir writeable by non-root...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'chmod', + '777', + '/var/log/ceph', + ], + wait=False, + ) + ) + log.info('Disabling ceph logrotate...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'rm', '-f', '--', + '/etc/logrotate.d/ceph', + ], + wait=False, + ) + ) + log.info('Creating extra log directories...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'install', '-d', '-m0777', '--', + '/var/log/ceph/valgrind', + '/var/log/ceph/profiling-logger', + ], + wait=False, + ) + ) + + # Add logs directory to job's info log file + update_archive_setting(ctx, 'log', '/var/log/ceph') + + class Rotater(object): + stop_event = gevent.event.Event() + + def invoke_logrotate(self): + # 1) install ceph-test.conf in /etc/logrotate.d + # 2) continuously loop over logrotate invocation with ceph-test.conf + while not self.stop_event.is_set(): + self.stop_event.wait(timeout=30) + try: + procs = ctx.cluster.run( + args=['sudo', 'logrotate', '/etc/logrotate.d/ceph-test.conf'], + wait=False, + stderr=StringIO() + ) + run.wait(procs) + except exceptions.ConnectionLostError as e: + # Some tests may power off nodes during test, in which + # case we will see connection errors that we should ignore. + log.debug("Missed logrotate, node '{0}' is offline".format( + e.node)) + except EOFError: + # Paramiko sometimes raises this when it fails to + # connect to a node during open_session. As with + # ConnectionLostError, we ignore this because nodes + # are allowed to get power cycled during tests. + log.debug("Missed logrotate, EOFError") + except SSHException: + log.debug("Missed logrotate, SSHException") + except run.CommandFailedError as e: + for p in procs: + if p.finished and p.exitstatus != 0: + err = p.stderr.getvalue() + if 'error: error renaming temp state file' in err: + log.info('ignoring transient state error: %s', e) + else: + raise + except socket.error as e: + if e.errno in (errno.EHOSTUNREACH, errno.ECONNRESET): + log.debug("Missed logrotate, host unreachable") + else: + raise + + def begin(self): + self.thread = gevent.spawn(self.invoke_logrotate) + + def end(self): + self.stop_event.set() + self.thread.get() + + def write_rotate_conf(ctx, daemons): + testdir = teuthology.get_testdir(ctx) + remote_logrotate_conf = '%s/logrotate.ceph-test.conf' % testdir + rotate_conf_path = os.path.join(os.path.dirname(__file__), 'logrotate.conf') + with open(rotate_conf_path) as f: + conf = "" + for daemon, size in daemons.items(): + log.info('writing logrotate stanza for {}'.format(daemon)) + conf += f.read().format(daemon_type=daemon, + max_size=size) + f.seek(0, 0) + + for remote in ctx.cluster.remotes.keys(): + remote.write_file(remote_logrotate_conf, BytesIO(conf.encode())) + remote.sh( + f'sudo mv {remote_logrotate_conf} /etc/logrotate.d/ceph-test.conf && ' + 'sudo chmod 0644 /etc/logrotate.d/ceph-test.conf && ' + 'sudo chown root.root /etc/logrotate.d/ceph-test.conf') + remote.chcon('/etc/logrotate.d/ceph-test.conf', + 'system_u:object_r:etc_t:s0') + + if ctx.config.get('log-rotate'): + daemons = ctx.config.get('log-rotate') + log.info('Setting up log rotation with ' + str(daemons)) + write_rotate_conf(ctx, daemons) + logrotater = Rotater() + logrotater.begin() + try: + yield + + finally: + if ctx.config.get('log-rotate'): + log.info('Shutting down logrotate') + logrotater.end() + ctx.cluster.sh('sudo rm /etc/logrotate.d/ceph-test.conf') + if ctx.archive is not None and \ + not (ctx.config.get('archive-on-error') and ctx.summary['success']): + # and logs + log.info('Compressing logs...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'find', + '/var/log/ceph', + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) + + log.info('Archiving logs...') + path = os.path.join(ctx.archive, 'remote') + try: + os.makedirs(path) + except OSError: + pass + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.shortname) + try: + os.makedirs(sub) + except OSError: + pass + teuthology.pull_directory(remote, '/var/log/ceph', + os.path.join(sub, 'log')) + + +def assign_devs(roles, devs): + """ + Create a dictionary of devs indexed by roles + + :param roles: List of roles + :param devs: Corresponding list of devices. + :returns: Dictionary of devs indexed by roles. + """ + return dict(zip(roles, devs)) + + +@contextlib.contextmanager +def valgrind_post(ctx, config): + """ + After the tests run, look through all the valgrind logs. Exceptions are raised + if textual errors occurred in the logs, or if valgrind exceptions were detected in + the logs. + + :param ctx: Context + :param config: Configuration + """ + try: + yield + finally: + lookup_procs = list() + log.info('Checking for errors in any valgrind logs...') + for remote in ctx.cluster.remotes.keys(): + # look at valgrind logs for each node + proc = remote.run( + args="sudo zgrep '<kind>' /var/log/ceph/valgrind/* " + # include a second file so that we always get + # a filename prefix on the output + "/dev/null | sort | uniq", + wait=False, + check_status=False, + stdout=StringIO(), + ) + lookup_procs.append((proc, remote)) + + valgrind_exception = None + for (proc, remote) in lookup_procs: + proc.wait() + out = proc.stdout.getvalue() + for line in out.split('\n'): + if line == '': + continue + try: + (file, kind) = line.split(':') + except Exception: + log.error('failed to split line %s', line) + raise + log.debug('file %s kind %s', file, kind) + if (file.find('mds') >= 0) and kind.find('Lost') > 0: + continue + log.error('saw valgrind issue %s in %s', kind, file) + valgrind_exception = Exception('saw valgrind issues') + + if config.get('expect_valgrind_errors'): + if not valgrind_exception: + raise Exception('expected valgrind issues and found none') + else: + if valgrind_exception: + raise valgrind_exception + + +@contextlib.contextmanager +def crush_setup(ctx, config): + cluster_name = config['cluster'] + first_mon = teuthology.get_first_mon(ctx, config, cluster_name) + (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys() + + profile = config.get('crush_tunables', 'default') + log.info('Setting crush tunables to %s', profile) + mon_remote.run( + args=['sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'crush', 'tunables', profile]) + yield + + +@contextlib.contextmanager +def check_enable_crimson(ctx, config): + # enable crimson-osds if crimson + log.info("check_enable_crimson: {}".format(is_crimson(config))) + if is_crimson(config): + cluster_name = config['cluster'] + first_mon = teuthology.get_first_mon(ctx, config, cluster_name) + (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys() + log.info('check_enable_crimson: setting set-allow-crimson') + mon_remote.run( + args=[ + 'sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'set-allow-crimson', '--yes-i-really-mean-it' + ] + ) + yield + + +@contextlib.contextmanager +def setup_manager(ctx, config): + first_mon = teuthology.get_first_mon(ctx, config, config['cluster']) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + if not hasattr(ctx, 'managers'): + ctx.managers = {} + ctx.managers[config['cluster']] = CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager.' + config['cluster']), + cluster=config['cluster'], + ) + yield + +@contextlib.contextmanager +def create_rbd_pool(ctx, config): + cluster_name = config['cluster'] + first_mon = teuthology.get_first_mon(ctx, config, cluster_name) + (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys() + log.info('Waiting for OSDs to come up') + teuthology.wait_until_osds_up( + ctx, + cluster=ctx.cluster, + remote=mon_remote, + ceph_cluster=cluster_name, + ) + if config.get('create_rbd_pool', True): + log.info('Creating RBD pool') + mon_remote.run( + args=['sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'pool', 'create', 'rbd', '8']) + mon_remote.run( + args=[ + 'sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'pool', 'application', 'enable', + 'rbd', 'rbd', '--yes-i-really-mean-it' + ], + check_status=False) + yield + +@contextlib.contextmanager +def cephfs_setup(ctx, config): + cluster_name = config['cluster'] + + first_mon = teuthology.get_first_mon(ctx, config, cluster_name) + (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys() + mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name)) + # If there are any MDSs, then create a filesystem for them to use + # Do this last because requires mon cluster to be up and running + if mdss.remotes: + log.info('Setting up CephFS filesystem(s)...') + cephfs_config = config.get('cephfs', {}) + fs_configs = cephfs_config.pop('fs', [{'name': 'cephfs'}]) + + # wait for standbys to become available (slow due to valgrind, perhaps) + mdsc = MDSCluster(ctx) + mds_count = len(list(teuthology.all_roles_of_type(ctx.cluster, 'mds'))) + with contextutil.safe_while(sleep=2,tries=150) as proceed: + while proceed(): + if len(mdsc.get_standby_daemons()) >= mds_count: + break + + fss = [] + for fs_config in fs_configs: + assert isinstance(fs_config, dict) + name = fs_config.pop('name') + temp = deepcopy(cephfs_config) + teuthology.deep_merge(temp, fs_config) + subvols = config.get('subvols', None) + if subvols: + teuthology.deep_merge(temp, {'subvols': subvols}) + fs = Filesystem(ctx, fs_config=temp, name=name, create=True) + fss.append(fs) + + yield + + for fs in fss: + fs.destroy() + else: + yield + +@contextlib.contextmanager +def watchdog_setup(ctx, config): + ctx.ceph[config['cluster']].thrashers = [] + ctx.ceph[config['cluster']].watchdog = DaemonWatchdog(ctx, config, ctx.ceph[config['cluster']].thrashers) + ctx.ceph[config['cluster']].watchdog.start() + yield + +def get_mons(roles, ips, cluster_name, + mon_bind_msgr2=False, + mon_bind_addrvec=False): + """ + Get monitors and their associated addresses + """ + mons = {} + v1_ports = {} + v2_ports = {} + is_mon = teuthology.is_type('mon', cluster_name) + for idx, roles in enumerate(roles): + for role in roles: + if not is_mon(role): + continue + if ips[idx] not in v1_ports: + v1_ports[ips[idx]] = 6789 + else: + v1_ports[ips[idx]] += 1 + if mon_bind_msgr2: + if ips[idx] not in v2_ports: + v2_ports[ips[idx]] = 3300 + addr = '{ip}'.format(ip=ips[idx]) + else: + assert mon_bind_addrvec + v2_ports[ips[idx]] += 1 + addr = '[v2:{ip}:{port2},v1:{ip}:{port1}]'.format( + ip=ips[idx], + port2=v2_ports[ips[idx]], + port1=v1_ports[ips[idx]], + ) + elif mon_bind_addrvec: + addr = '[v1:{ip}:{port}]'.format( + ip=ips[idx], + port=v1_ports[ips[idx]], + ) + else: + addr = '{ip}:{port}'.format( + ip=ips[idx], + port=v1_ports[ips[idx]], + ) + mons[role] = addr + assert mons + return mons + +def skeleton_config(ctx, roles, ips, mons, cluster='ceph'): + """ + Returns a ConfigObj that is prefilled with a skeleton config. + + Use conf[section][key]=value or conf.merge to change it. + + Use conf.write to write it out, override .filename first if you want. + """ + path = os.path.join(os.path.dirname(__file__), 'ceph.conf.template') + conf = configobj.ConfigObj(path, file_error=True) + mon_hosts = [] + for role, addr in mons.items(): + mon_cluster, _, _ = teuthology.split_role(role) + if mon_cluster != cluster: + continue + name = teuthology.ceph_role(role) + conf.setdefault(name, {}) + mon_hosts.append(addr) + conf.setdefault('global', {}) + conf['global']['mon host'] = ','.join(mon_hosts) + # set up standby mds's + is_mds = teuthology.is_type('mds', cluster) + for roles_subset in roles: + for role in roles_subset: + if is_mds(role): + name = teuthology.ceph_role(role) + conf.setdefault(name, {}) + return conf + +def create_simple_monmap(ctx, remote, conf, mons, + path=None, + mon_bind_addrvec=False): + """ + Writes a simple monmap based on current ceph.conf into path, or + <testdir>/monmap by default. + + Assumes ceph_conf is up to date. + + Assumes mon sections are named "mon.*", with the dot. + + :return the FSID (as a string) of the newly created monmap + """ + + addresses = list(mons.items()) + assert addresses, "There are no monitors in config!" + log.debug('Ceph mon addresses: %s', addresses) + + try: + log.debug('writing out conf {c}'.format(c=conf)) + except: + log.debug('my conf logging attempt failed') + testdir = teuthology.get_testdir(ctx) + tmp_conf_path = '{tdir}/ceph.tmp.conf'.format(tdir=testdir) + conf_fp = BytesIO() + conf.write(conf_fp) + conf_fp.seek(0) + teuthology.write_file(remote, tmp_conf_path, conf_fp) + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'monmaptool', + '-c', + '{conf}'.format(conf=tmp_conf_path), + '--create', + '--clobber', + ] + if mon_bind_addrvec: + args.extend(['--enable-all-features']) + for (role, addr) in addresses: + _, _, n = teuthology.split_role(role) + if mon_bind_addrvec and (',' in addr or 'v' in addr or ':' in addr): + args.extend(('--addv', n, addr)) + else: + args.extend(('--add', n, addr)) + if not path: + path = '{tdir}/monmap'.format(tdir=testdir) + args.extend([ + '--print', + path + ]) + + monmap_output = remote.sh(args) + fsid = re.search("generated fsid (.+)$", + monmap_output, re.MULTILINE).group(1) + teuthology.delete_file(remote, tmp_conf_path) + return fsid + + +def is_crimson(config): + return config.get('flavor', 'default') == 'crimson' + + +def maybe_redirect_stderr(config, type_, args, log_path): + if type_ == 'osd' and is_crimson(config): + # teuthworker uses ubuntu:ubuntu to access the test nodes + create_log_cmd = \ + f'sudo install -b -o ubuntu -g ubuntu /dev/null {log_path}' + return create_log_cmd, args + [run.Raw('2>>'), log_path] + else: + return None, args + + +@contextlib.contextmanager +def cluster(ctx, config): + """ + Handle the creation and removal of a ceph cluster. + + On startup: + Create directories needed for the cluster. + Create remote journals for all osds. + Create and set keyring. + Copy the monmap to the test systems. + Setup mon nodes. + Setup mds nodes. + Mkfs osd nodes. + Add keyring information to monmaps + Mkfs mon nodes. + + On exit: + If errors occurred, extract a failure message and store in ctx.summary. + Unmount all test files and temporary journaling files. + Save the monitor information and archive all ceph logs. + Cleanup the keyring setup, and remove all monitor map and data files left over. + + :param ctx: Context + :param config: Configuration + """ + if ctx.config.get('use_existing_cluster', False) is True: + log.info("'use_existing_cluster' is true; skipping cluster creation") + yield + + testdir = teuthology.get_testdir(ctx) + cluster_name = config['cluster'] + data_dir = '{tdir}/{cluster}.data'.format(tdir=testdir, cluster=cluster_name) + log.info('Creating ceph cluster %s...', cluster_name) + log.info('config %s', config) + log.info('ctx.config %s', ctx.config) + run.wait( + ctx.cluster.run( + args=[ + 'install', '-d', '-m0755', '--', + data_dir, + ], + wait=False, + ) + ) + + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'install', '-d', '-m0777', '--', '/var/run/ceph', + ], + wait=False, + ) + ) + + devs_to_clean = {} + remote_to_roles_to_devs = {} + osds = ctx.cluster.only(teuthology.is_type('osd', cluster_name)) + for remote, roles_for_host in osds.remotes.items(): + devs = teuthology.get_scratch_devices(remote) + roles_to_devs = assign_devs( + teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name), devs + ) + devs_to_clean[remote] = [] + log.info('osd dev map: {}'.format(roles_to_devs)) + assert roles_to_devs, \ + "remote {} has osd roles, but no osd devices were specified!".format(remote.hostname) + remote_to_roles_to_devs[remote] = roles_to_devs + log.info("remote_to_roles_to_devs: {}".format(remote_to_roles_to_devs)) + for osd_role, dev_name in remote_to_roles_to_devs.items(): + assert dev_name, "{} has no associated device!".format(osd_role) + + log.info('Generating config...') + remotes_and_roles = ctx.cluster.remotes.items() + roles = [role_list for (remote, role_list) in remotes_and_roles] + ips = [host for (host, port) in + (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] + mons = get_mons( + roles, ips, cluster_name, + mon_bind_msgr2=config.get('mon_bind_msgr2'), + mon_bind_addrvec=config.get('mon_bind_addrvec'), + ) + conf = skeleton_config( + ctx, roles=roles, ips=ips, mons=mons, cluster=cluster_name, + ) + for section, keys in config['conf'].items(): + for key, value in keys.items(): + log.info("[%s] %s = %s" % (section, key, value)) + if section not in conf: + conf[section] = {} + conf[section][key] = value + + if not hasattr(ctx, 'ceph'): + ctx.ceph = {} + ctx.ceph[cluster_name] = argparse.Namespace() + ctx.ceph[cluster_name].conf = conf + ctx.ceph[cluster_name].mons = mons + + default_keyring = '/etc/ceph/{cluster}.keyring'.format(cluster=cluster_name) + keyring_path = config.get('keyring_path', default_keyring) + + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + + firstmon = teuthology.get_first_mon(ctx, config, cluster_name) + + log.info('Setting up %s...' % firstmon) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + keyring_path, + ], + ) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--gen-key', + '--name=mon.', + keyring_path, + ], + ) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'chmod', + '0644', + keyring_path, + ], + ) + (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + monmap_path = '{tdir}/{cluster}.monmap'.format(tdir=testdir, + cluster=cluster_name) + fsid = create_simple_monmap( + ctx, + remote=mon0_remote, + conf=conf, + mons=mons, + path=monmap_path, + mon_bind_addrvec=config.get('mon_bind_addrvec'), + ) + ctx.ceph[cluster_name].fsid = fsid + if not 'global' in conf: + conf['global'] = {} + conf['global']['fsid'] = fsid + + default_conf_path = '/etc/ceph/{cluster}.conf'.format(cluster=cluster_name) + conf_path = config.get('conf_path', default_conf_path) + log.info('Writing %s for FSID %s...' % (conf_path, fsid)) + write_conf(ctx, conf_path, cluster_name) + + log.info('Creating admin key on %s...' % firstmon) + ctx.cluster.only(firstmon).run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--gen-key', + '--name=client.admin', + '--cap', 'mon', 'allow *', + '--cap', 'osd', 'allow *', + '--cap', 'mds', 'allow *', + '--cap', 'mgr', 'allow *', + keyring_path, + ], + ) + + log.info('Copying monmap to all nodes...') + keyring = mon0_remote.read_file(keyring_path) + monmap = mon0_remote.read_file(monmap_path) + + for rem in ctx.cluster.remotes.keys(): + # copy mon key and initial monmap + log.info('Sending monmap to node {remote}'.format(remote=rem)) + rem.write_file(keyring_path, keyring, mode='0644', sudo=True) + rem.write_file(monmap_path, monmap) + + log.info('Setting up mon nodes...') + mons = ctx.cluster.only(teuthology.is_type('mon', cluster_name)) + + if not config.get('skip_mgr_daemons', False): + log.info('Setting up mgr nodes...') + mgrs = ctx.cluster.only(teuthology.is_type('mgr', cluster_name)) + for remote, roles_for_host in mgrs.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'mgr', + cluster_name): + _, _, id_ = teuthology.split_role(role) + mgr_dir = DATA_PATH.format( + type_='mgr', cluster=cluster_name, id_=id_) + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + mgr_dir, + run.Raw('&&'), + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + '--gen-key', + '--name=mgr.{id}'.format(id=id_), + mgr_dir + '/keyring', + ], + ) + + log.info('Setting up mds nodes...') + mdss = ctx.cluster.only(teuthology.is_type('mds', cluster_name)) + for remote, roles_for_host in mdss.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'mds', + cluster_name): + _, _, id_ = teuthology.split_role(role) + mds_dir = DATA_PATH.format( + type_='mds', cluster=cluster_name, id_=id_) + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + mds_dir, + run.Raw('&&'), + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + '--gen-key', + '--name=mds.{id}'.format(id=id_), + mds_dir + '/keyring', + ], + ) + remote.run(args=[ + 'sudo', 'chown', '-R', 'ceph:ceph', mds_dir + ]) + + cclient.create_keyring(ctx, cluster_name) + log.info('Running mkfs on osd nodes...') + + if not hasattr(ctx, 'disk_config'): + ctx.disk_config = argparse.Namespace() + if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev'): + ctx.disk_config.remote_to_roles_to_dev = {} + if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_mount_options'): + ctx.disk_config.remote_to_roles_to_dev_mount_options = {} + if not hasattr(ctx.disk_config, 'remote_to_roles_to_dev_fstype'): + ctx.disk_config.remote_to_roles_to_dev_fstype = {} + + teuthology.deep_merge(ctx.disk_config.remote_to_roles_to_dev, remote_to_roles_to_devs) + + log.info("ctx.disk_config.remote_to_roles_to_dev: {r}".format(r=str(ctx.disk_config.remote_to_roles_to_dev))) + + for remote, roles_for_host in osds.remotes.items(): + roles_to_devs = remote_to_roles_to_devs[remote] + + for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): + _, _, id_ = teuthology.split_role(role) + mnt_point = DATA_PATH.format( + type_='osd', cluster=cluster_name, id_=id_) + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + mnt_point, + ]) + log.info('roles_to_devs: {}'.format(roles_to_devs)) + log.info('role: {}'.format(role)) + if roles_to_devs.get(role): + dev = roles_to_devs[role] + fs = config.get('fs') + package = None + mkfs_options = config.get('mkfs_options') + mount_options = config.get('mount_options') + if fs == 'btrfs': + # package = 'btrfs-tools' + if mount_options is None: + mount_options = ['noatime', 'user_subvol_rm_allowed'] + if mkfs_options is None: + mkfs_options = ['-m', 'single', + '-l', '32768', + '-n', '32768'] + if fs == 'xfs': + # package = 'xfsprogs' + if mount_options is None: + mount_options = ['noatime'] + if mkfs_options is None: + mkfs_options = ['-f', '-i', 'size=2048'] + if fs == 'ext4' or fs == 'ext3': + if mount_options is None: + mount_options = ['noatime', 'user_xattr'] + + if mount_options is None: + mount_options = [] + if mkfs_options is None: + mkfs_options = [] + mkfs = ['mkfs.%s' % fs] + mkfs_options + log.info('%s on %s on %s' % (mkfs, dev, remote)) + if package is not None: + remote.sh('sudo apt-get install -y %s' % package) + + try: + remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) + except run.CommandFailedError: + # Newer btfs-tools doesn't prompt for overwrite, use -f + if '-f' not in mount_options: + mkfs_options.append('-f') + mkfs = ['mkfs.%s' % fs] + mkfs_options + log.info('%s on %s on %s' % (mkfs, dev, remote)) + remote.run(args=['yes', run.Raw('|')] + ['sudo'] + mkfs + [dev]) + + log.info('mount %s on %s -o %s' % (dev, remote, + ','.join(mount_options))) + remote.run( + args=[ + 'sudo', + 'mount', + '-t', fs, + '-o', ','.join(mount_options), + dev, + mnt_point, + ] + ) + remote.run( + args=[ + 'sudo', '/sbin/restorecon', mnt_point, + ], + check_status=False, + ) + if not remote in ctx.disk_config.remote_to_roles_to_dev_mount_options: + ctx.disk_config.remote_to_roles_to_dev_mount_options[remote] = {} + ctx.disk_config.remote_to_roles_to_dev_mount_options[remote][role] = mount_options + if not remote in ctx.disk_config.remote_to_roles_to_dev_fstype: + ctx.disk_config.remote_to_roles_to_dev_fstype[remote] = {} + ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] = fs + devs_to_clean[remote].append(mnt_point) + + for role in teuthology.cluster_roles_of_type(roles_for_host, 'osd', cluster_name): + _, _, id_ = teuthology.split_role(role) + try: + args = ['sudo', + 'MALLOC_CHECK_=3', + 'adjust-ulimits', + 'ceph-coverage', coverage_dir, + 'ceph-osd', + '--no-mon-config', + '--cluster', cluster_name, + '--mkfs', + '--mkkey', + '-i', id_, + '--monmap', monmap_path] + log_path = f'/var/log/ceph/{cluster_name}-osd.{id_}.log' + create_log_cmd, args = \ + maybe_redirect_stderr(config, 'osd', args, log_path) + if create_log_cmd: + remote.sh(create_log_cmd) + remote.run(args=args) + except run.CommandFailedError: + # try without --no-mon-config.. this may be an upgrade test + remote.run( + args=[ + 'sudo', + 'MALLOC_CHECK_=3', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-osd', + '--cluster', + cluster_name, + '--mkfs', + '--mkkey', + '-i', id_, + '--monmap', monmap_path, + ], + ) + mnt_point = DATA_PATH.format( + type_='osd', cluster=cluster_name, id_=id_) + remote.run(args=[ + 'sudo', 'chown', '-R', 'ceph:ceph', mnt_point + ]) + + log.info('Reading keys from all nodes...') + keys_fp = BytesIO() + keys = [] + for remote, roles_for_host in ctx.cluster.remotes.items(): + for type_ in ['mgr', 'mds', 'osd']: + if type_ == 'mgr' and config.get('skip_mgr_daemons', False): + continue + for role in teuthology.cluster_roles_of_type(roles_for_host, type_, cluster_name): + _, _, id_ = teuthology.split_role(role) + data = remote.read_file( + os.path.join( + DATA_PATH.format( + type_=type_, id_=id_, cluster=cluster_name), + 'keyring', + ), + sudo=True, + ) + keys.append((type_, id_, data)) + keys_fp.write(data) + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', cluster_name): + _, _, id_ = teuthology.split_role(role) + data = remote.read_file( + '/etc/ceph/{cluster}.client.{id}.keyring'.format(id=id_, cluster=cluster_name) + ) + keys.append(('client', id_, data)) + keys_fp.write(data) + + log.info('Adding keys to all mons...') + writes = mons.run( + args=[ + 'sudo', 'tee', '-a', + keyring_path, + ], + stdin=run.PIPE, + wait=False, + stdout=BytesIO(), + ) + keys_fp.seek(0) + teuthology.feed_many_stdins_and_close(keys_fp, writes) + run.wait(writes) + for type_, id_, data in keys: + run.wait( + mons.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + keyring_path, + '--name={type}.{id}'.format( + type=type_, + id=id_, + ), + ] + list(generate_caps(type_)), + wait=False, + ), + ) + + log.info('Running mkfs on mon nodes...') + for remote, roles_for_host in mons.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'mon', cluster_name): + _, _, id_ = teuthology.split_role(role) + mnt_point = DATA_PATH.format( + type_='mon', id_=id_, cluster=cluster_name) + remote.run( + args=[ + 'sudo', + 'mkdir', + '-p', + mnt_point, + ], + ) + remote.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-mon', + '--cluster', cluster_name, + '--mkfs', + '-i', id_, + '--monmap', monmap_path, + '--keyring', keyring_path, + ], + ) + remote.run(args=[ + 'sudo', 'chown', '-R', 'ceph:ceph', mnt_point + ]) + + run.wait( + mons.run( + args=[ + 'rm', + '--', + monmap_path, + ], + wait=False, + ), + ) + + try: + yield + except Exception: + # we need to know this below + ctx.summary['success'] = False + raise + finally: + (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + + log.info('Checking cluster log for badness...') + + def first_in_ceph_log(pattern, excludes): + """ + Find the first occurrence of the pattern specified in the Ceph log, + Returns None if none found. + + :param pattern: Pattern scanned for. + :param excludes: Patterns to ignore. + :return: First line of text (or None if not found) + """ + args = [ + 'sudo', + 'egrep', pattern, + '/var/log/ceph/{cluster}.log'.format(cluster=cluster_name), + ] + for exclude in excludes: + args.extend([run.Raw('|'), 'egrep', '-v', exclude]) + args.extend([ + run.Raw('|'), 'head', '-n', '1', + ]) + stdout = mon0_remote.sh(args) + return stdout or None + + if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', + config['log_ignorelist']) is not None: + log.warning('Found errors (ERR|WRN|SEC) in cluster log') + ctx.summary['success'] = False + # use the most severe problem as the failure reason + if 'failure_reason' not in ctx.summary: + for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: + match = first_in_ceph_log(pattern, config['log_ignorelist']) + if match is not None: + ctx.summary['failure_reason'] = \ + '"{match}" in cluster log'.format( + match=match.rstrip('\n'), + ) + break + + for remote, dirs in devs_to_clean.items(): + for dir_ in dirs: + log.info('Unmounting %s on %s' % (dir_, remote)) + try: + remote.run( + args=[ + 'sync', + run.Raw('&&'), + 'sudo', + 'umount', + '-f', + dir_ + ] + ) + except Exception as e: + remote.run(args=[ + 'sudo', + run.Raw('PATH=/usr/sbin:$PATH'), + 'lsof', + run.Raw(';'), + 'ps', 'auxf', + ]) + raise e + + if ctx.archive is not None and \ + not (ctx.config.get('archive-on-error') and ctx.summary['success']): + + # archive mon data, too + log.info('Archiving mon data...') + path = os.path.join(ctx.archive, 'data') + try: + os.makedirs(path) + except OSError as e: + if e.errno == errno.EEXIST: + pass + else: + raise + for remote, roles in mons.remotes.items(): + for role in roles: + is_mon = teuthology.is_type('mon', cluster_name) + if is_mon(role): + _, _, id_ = teuthology.split_role(role) + mon_dir = DATA_PATH.format( + type_='mon', id_=id_, cluster=cluster_name) + teuthology.pull_directory_tarball( + remote, + mon_dir, + path + '/' + role + '.tgz') + + log.info('Cleaning ceph cluster...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'rm', + '-rf', + '--', + conf_path, + keyring_path, + data_dir, + monmap_path, + run.Raw('{tdir}/../*.pid'.format(tdir=testdir)), + ], + wait=False, + ), + ) + + +def osd_scrub_pgs(ctx, config): + """ + Scrub pgs when we exit. + + First make sure all pgs are active and clean. + Next scrub all osds. + Then periodically check until all pgs have scrub time stamps that + indicate the last scrub completed. Time out if no progress is made + here after two minutes. + """ + retries = 40 + delays = 20 + cluster_name = config['cluster'] + manager = ctx.managers[cluster_name] + for _ in range(retries): + stats = manager.get_pg_stats() + unclean = [stat['pgid'] for stat in stats if 'active+clean' not in stat['state']] + split_merge = [] + osd_dump = manager.get_osd_dump_json() + try: + split_merge = [i['pool_name'] for i in osd_dump['pools'] if i['pg_num'] != i['pg_num_target']] + except KeyError: + # we don't support pg_num_target before nautilus + pass + if not unclean and not split_merge: + break + waiting_on = [] + if unclean: + waiting_on.append(f'{unclean} to go clean') + if split_merge: + waiting_on.append(f'{split_merge} to split/merge') + waiting_on = ' and '.join(waiting_on) + log.info('Waiting for all PGs to be active+clean and split+merged, waiting on %s', waiting_on) + time.sleep(delays) + else: + raise RuntimeError("Scrubbing terminated -- not all pgs were active and clean.") + check_time_now = time.localtime() + time.sleep(1) + all_roles = teuthology.all_roles(ctx.cluster) + for role in teuthology.cluster_roles_of_type(all_roles, 'osd', cluster_name): + log.info("Scrubbing {osd}".format(osd=role)) + _, _, id_ = teuthology.split_role(role) + # allow this to fail; in certain cases the OSD might not be up + # at this point. we will catch all pgs below. + try: + manager.raw_cluster_cmd('tell', 'osd.' + id_, 'config', 'set', + 'osd_debug_deep_scrub_sleep', '0'); + manager.raw_cluster_cmd('osd', 'deep-scrub', id_) + except run.CommandFailedError: + pass + prev_good = 0 + gap_cnt = 0 + loop = True + while loop: + stats = manager.get_pg_stats() + timez = [(stat['pgid'],stat['last_scrub_stamp']) for stat in stats] + loop = False + thiscnt = 0 + re_scrub = [] + for (pgid, tmval) in timez: + t = tmval[0:tmval.find('.')].replace(' ', 'T') + pgtm = time.strptime(t, '%Y-%m-%dT%H:%M:%S') + if pgtm > check_time_now: + thiscnt += 1 + else: + log.info('pgid %s last_scrub_stamp %s %s <= %s', pgid, tmval, pgtm, check_time_now) + loop = True + re_scrub.append(pgid) + if thiscnt > prev_good: + prev_good = thiscnt + gap_cnt = 0 + else: + gap_cnt += 1 + if gap_cnt % 6 == 0: + for pgid in re_scrub: + # re-request scrub every so often in case the earlier + # request was missed. do not do it every time because + # the scrub may be in progress or not reported yet and + # we will starve progress. + manager.raw_cluster_cmd('pg', 'deep-scrub', pgid) + if gap_cnt > retries: + raise RuntimeError('Exiting scrub checking -- not all pgs scrubbed.') + if loop: + log.info('Still waiting for all pgs to be scrubbed.') + time.sleep(delays) + + +@contextlib.contextmanager +def run_daemon(ctx, config, type_): + """ + Run daemons for a role type. Handle the startup and termination of a a daemon. + On startup -- set coverages, cpu_profile, valgrind values for all remotes, + and a max_mds value for one mds. + On cleanup -- Stop all existing daemons of this type. + + :param ctx: Context + :param config: Configuration + :param type_: Role type + """ + cluster_name = config['cluster'] + log.info('Starting %s daemons in cluster %s...', type_, cluster_name) + testdir = teuthology.get_testdir(ctx) + daemons = ctx.cluster.only(teuthology.is_type(type_, cluster_name)) + + # check whether any daemons if this type are configured + if daemons is None: + return + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + + daemon_signal = 'kill' + if config.get('coverage') or config.get('valgrind') is not None: + daemon_signal = 'term' + + # create osds in order. (this only matters for pre-luminous, which might + # be jewel/hammer, which doesn't take an id_ argument to legacy 'osd create'). + osd_uuids = {} + for remote, roles_for_host in daemons.remotes.items(): + is_type_ = teuthology.is_type(type_, cluster_name) + for role in roles_for_host: + if not is_type_(role): + continue + _, _, id_ = teuthology.split_role(role) + + + if type_ == 'osd': + datadir='/var/lib/ceph/osd/{cluster}-{id}'.format( + cluster=cluster_name, id=id_) + osd_uuid = remote.read_file( + datadir + '/fsid', sudo=True).decode().strip() + osd_uuids[id_] = osd_uuid + for osd_id in range(len(osd_uuids)): + id_ = str(osd_id) + osd_uuid = osd_uuids.get(id_) + try: + remote.run( + args=[ + 'sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'new', osd_uuid, id_, + ] + ) + except: + # fallback to pre-luminous (jewel) + remote.run( + args=[ + 'sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'create', osd_uuid, + ] + ) + if config.get('add_osds_to_crush'): + remote.run( + args=[ + 'sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'crush', 'create-or-move', 'osd.' + id_, + '1.0', 'host=localhost', 'root=default', + ] + ) + + for remote, roles_for_host in daemons.remotes.items(): + is_type_ = teuthology.is_type(type_, cluster_name) + for role in roles_for_host: + if not is_type_(role): + continue + _, _, id_ = teuthology.split_role(role) + + run_cmd = [ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'daemon-helper', + daemon_signal, + ] + run_cmd_tail = [ + 'ceph-%s' % (type_), + '-f', + '--cluster', cluster_name, + '-i', id_] + + if type_ in config.get('cpu_profile', []): + profile_path = '/var/log/ceph/profiling-logger/%s.prof' % (role) + run_cmd.extend(['env', 'CPUPROFILE=%s' % profile_path]) + + vc = config.get('valgrind') + if vc is not None: + valgrind_args = None + if type_ in vc: + valgrind_args = vc[type_] + if role in vc: + valgrind_args = vc[role] + exit_on_first_error = vc.get('exit_on_first_error', True) + run_cmd = get_valgrind_args(testdir, role, run_cmd, valgrind_args, + exit_on_first_error=exit_on_first_error) + + run_cmd.extend(run_cmd_tail) + log_path = f'/var/log/ceph/{cluster_name}-{type_}.{id_}.log' + create_log_cmd, run_cmd = \ + maybe_redirect_stderr(config, type_, run_cmd, log_path) + if create_log_cmd: + remote.sh(create_log_cmd) + # always register mgr; don't necessarily start + ctx.daemons.register_daemon( + remote, type_, id_, + cluster=cluster_name, + args=run_cmd, + logger=log.getChild(role), + stdin=run.PIPE, + wait=False + ) + if type_ != 'mgr' or not config.get('skip_mgr_daemons', False): + role = cluster_name + '.' + type_ + ctx.daemons.get_daemon(type_, id_, cluster_name).restart() + + # kludge: run any pre-manager commands + if type_ == 'mon': + for cmd in config.get('pre-mgr-commands', []): + firstmon = teuthology.get_first_mon(ctx, config, cluster_name) + (remote,) = ctx.cluster.only(firstmon).remotes.keys() + remote.run(args=cmd.split(' ')) + + try: + yield + finally: + teuthology.stop_daemons_of_type(ctx, type_, cluster_name) + + +def healthy(ctx, config): + """ + Wait for all osd's to be up, and for the ceph health monitor to return HEALTH_OK. + + :param ctx: Context + :param config: Configuration + """ + config = config if isinstance(config, dict) else dict() + cluster_name = config.get('cluster', 'ceph') + log.info('Waiting until %s daemons up and pgs clean...', cluster_name) + manager = ctx.managers[cluster_name] + try: + manager.wait_for_mgr_available(timeout=30) + except (run.CommandFailedError, AssertionError) as e: + log.info('ignoring mgr wait error, probably testing upgrade: %s', e) + + manager.wait_for_all_osds_up(timeout=300) + + try: + manager.flush_all_pg_stats() + except (run.CommandFailedError, Exception) as e: + log.info('ignoring flush pg stats error, probably testing upgrade: %s', e) + manager.wait_for_clean() + + if config.get('wait-for-healthy', True): + log.info('Waiting until ceph cluster %s is healthy...', cluster_name) + manager.wait_until_healthy(timeout=300) + + if ctx.cluster.only(teuthology.is_type('mds', cluster_name)).remotes: + # Some MDSs exist, wait for them to be healthy + for fs in Filesystem.get_all_fs(ctx): + fs.wait_for_daemons(timeout=300) + +def wait_for_mon_quorum(ctx, config): + """ + Check renote ceph status until all monitors are up. + + :param ctx: Context + :param config: Configuration + """ + if isinstance(config, dict): + mons = config['daemons'] + cluster_name = config.get('cluster', 'ceph') + else: + assert isinstance(config, list) + mons = config + cluster_name = 'ceph' + firstmon = teuthology.get_first_mon(ctx, config, cluster_name) + (remote,) = ctx.cluster.only(firstmon).remotes.keys() + with contextutil.safe_while(sleep=10, tries=60, + action='wait for monitor quorum') as proceed: + while proceed(): + quorum_status = remote.sh('sudo ceph quorum_status', + logger=log.getChild('quorum_status')) + j = json.loads(quorum_status) + q = j.get('quorum_names', []) + log.debug('Quorum: %s', q) + if sorted(q) == sorted(mons): + break + + +def created_pool(ctx, config): + """ + Add new pools to the dictionary of pools that the ceph-manager + knows about. + """ + for new_pool in config: + if new_pool not in ctx.managers['ceph'].pools: + ctx.managers['ceph'].pools[new_pool] = ctx.managers['ceph'].get_pool_int_property( + new_pool, 'pg_num') + + +@contextlib.contextmanager +def suppress_mon_health_to_clog(ctx, config): + """ + set the option, and then restore it with its original value + + Note, due to the way how tasks are executed/nested, it's not suggested to + use this method as a standalone task. otherwise, it's likely that it will + restore the tweaked option at the /end/ of 'tasks' block. + """ + if config.get('mon-health-to-clog', 'true') == 'false': + cluster = config.get('cluster', 'ceph') + manager = ctx.managers[cluster] + manager.raw_cluster_command( + 'config', 'set', 'mon', 'mon_health_to_clog', 'false' + ) + yield + manager.raw_cluster_command( + 'config', 'rm', 'mon', 'mon_health_to_clog' + ) + else: + yield + +@contextlib.contextmanager +def restart(ctx, config): + """ + restart ceph daemons + + For example:: + tasks: + - ceph.restart: [all] + + For example:: + tasks: + - ceph.restart: [osd.0, mon.1, mds.*] + + or:: + + tasks: + - ceph.restart: + daemons: [osd.0, mon.1] + wait-for-healthy: false + wait-for-osds-up: true + + :param ctx: Context + :param config: Configuration + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True) + clusters = set() + + with suppress_mon_health_to_clog(ctx, config): + for role in daemons: + cluster, type_, id_ = teuthology.split_role(role) + ctx.daemons.get_daemon(type_, id_, cluster).stop() + if type_ == 'osd': + ctx.managers[cluster].mark_down_osd(id_) + ctx.daemons.get_daemon(type_, id_, cluster).restart() + clusters.add(cluster) + + if config.get('wait-for-healthy', True): + for cluster in clusters: + healthy(ctx=ctx, config=dict(cluster=cluster)) + if config.get('wait-for-osds-up', False): + for cluster in clusters: + ctx.managers[cluster].wait_for_all_osds_up() + if config.get('expected-failure') is not None: + log.info('Checking for expected-failure in osds logs after restart...') + expected_fail = config.get('expected-failure') + is_osd = teuthology.is_type('osd') + for role in daemons: + if not is_osd(role): + continue + (remote,) = ctx.cluster.only(role).remotes.keys() + cluster, type_, id_ = teuthology.split_role(role) + remote.run( + args = ['sudo', + 'egrep', expected_fail, + '/var/log/ceph/{cluster}-{type_}.{id_}.log'.format(cluster=cluster, type_=type_, id_=id_), + ]) + yield + + +@contextlib.contextmanager +def stop(ctx, config): + """ + Stop ceph daemons + + For example:: + tasks: + - ceph.stop: [mds.*] + + tasks: + - ceph.stop: [osd.0, osd.2] + + tasks: + - ceph.stop: + daemons: [osd.0, osd.2] + + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True) + clusters = set() + + for role in daemons: + cluster, type_, id_ = teuthology.split_role(role) + ctx.daemons.get_daemon(type_, id_, cluster).stop() + clusters.add(cluster) + + + for cluster in clusters: + ctx.ceph[cluster].watchdog.stop() + ctx.ceph[cluster].watchdog.join() + + yield + + +@contextlib.contextmanager +def wait_for_failure(ctx, config): + """ + Wait for a failure of a ceph daemon + + For example:: + tasks: + - ceph.wait_for_failure: [mds.*] + + tasks: + - ceph.wait_for_failure: [osd.0, osd.2] + + tasks: + - ceph.wait_for_failure: + daemons: [osd.0, osd.2] + + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list(config.get('daemons', None), CEPH_ROLE_TYPES, True) + for role in daemons: + cluster, type_, id_ = teuthology.split_role(role) + try: + ctx.daemons.get_daemon(type_, id_, cluster).wait() + except: + log.info('Saw expected daemon failure. Continuing.') + pass + else: + raise RuntimeError('daemon %s did not fail' % role) + + yield + + +def validate_config(ctx, config): + """ + Perform some simple validation on task configuration. + Raises exceptions.ConfigError if an error is found. + """ + # check for osds from multiple clusters on the same host + for remote, roles_for_host in ctx.cluster.remotes.items(): + last_cluster = None + last_role = None + for role in roles_for_host: + role_cluster, role_type, _ = teuthology.split_role(role) + if role_type != 'osd': + continue + if last_cluster and last_cluster != role_cluster: + msg = "Host should not have osds (%s and %s) from multiple clusters" % ( + last_role, role) + raise exceptions.ConfigError(msg) + last_cluster = role_cluster + last_role = role + + +@contextlib.contextmanager +def task(ctx, config): + """ + Set up and tear down a Ceph cluster. + + For example:: + + tasks: + - ceph: + - interactive: + + You can also specify what branch to run:: + + tasks: + - ceph: + branch: foo + + Or a tag:: + + tasks: + - ceph: + tag: v0.42.13 + + Or a sha1:: + + tasks: + - ceph: + sha1: 1376a5ab0c89780eab39ffbbe436f6a6092314ed + + Or a local source dir:: + + tasks: + - ceph: + path: /home/sage/ceph + + To capture code coverage data, use:: + + tasks: + - ceph: + coverage: true + + To use btrfs, ext4, or xfs on the target's scratch disks, use:: + + tasks: + - ceph: + fs: xfs + mkfs_options: [-b,size=65536,-l,logdev=/dev/sdc1] + mount_options: [nobarrier, inode64] + + To change the cephfs's default max_mds (1), use:: + + tasks: + - ceph: + cephfs: + max_mds: 2 + + To change the max_mds of a specific filesystem, use:: + + tasks: + - ceph: + cephfs: + max_mds: 2 + fs: + - name: a + max_mds: 3 + - name: b + + In the above example, filesystem 'a' will have 'max_mds' 3, + and filesystme 'b' will have 'max_mds' 2. + + To change the mdsmap's default session_timeout (60 seconds), use:: + + tasks: + - ceph: + cephfs: + session_timeout: 300 + + Note, this will cause the task to check the /scratch_devs file on each node + for available devices. If no such file is found, /dev/sdb will be used. + + To run some daemons under valgrind, include their names + and the tool/args to use in a valgrind section:: + + tasks: + - ceph: + valgrind: + mds.1: --tool=memcheck + osd.1: [--tool=memcheck, --leak-check=no] + + Those nodes which are using memcheck or valgrind will get + checked for bad results. + + To adjust or modify config options, use:: + + tasks: + - ceph: + conf: + section: + key: value + + For example:: + + tasks: + - ceph: + conf: + mds.0: + some option: value + other key: other value + client.0: + debug client: 10 + debug ms: 1 + + By default, the cluster log is checked for errors and warnings, + and the run marked failed if any appear. You can ignore log + entries by giving a list of egrep compatible regexes, i.e.: + + tasks: + - ceph: + log-ignorelist: ['foo.*bar', 'bad message'] + + To run multiple ceph clusters, use multiple ceph tasks, and roles + with a cluster name prefix, e.g. cluster1.client.0. Roles with no + cluster use the default cluster name, 'ceph'. OSDs from separate + clusters must be on separate hosts. Clients and non-osd daemons + from multiple clusters may be colocated. For each cluster, add an + instance of the ceph task with the cluster name specified, e.g.:: + + roles: + - [mon.a, osd.0, osd.1] + - [backup.mon.a, backup.osd.0, backup.osd.1] + - [client.0, backup.client.0] + tasks: + - ceph: + cluster: ceph + - ceph: + cluster: backup + + :param ctx: Context + :param config: Configuration + + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + "task ceph only supports a dictionary for configuration" + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph', {})) + + first_ceph_cluster = False + if not hasattr(ctx, 'daemons'): + first_ceph_cluster = True + ctx.daemons = DaemonGroup() + + testdir = teuthology.get_testdir(ctx) + if config.get('coverage'): + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + log.info('Creating coverage directory...') + run.wait( + ctx.cluster.run( + args=[ + 'install', '-d', '-m0755', '--', + coverage_dir, + ], + wait=False, + ) + ) + + if 'cluster' not in config: + config['cluster'] = 'ceph' + + validate_config(ctx, config) + + subtasks = [] + if first_ceph_cluster: + # these tasks handle general log setup and parsing on all hosts, + # so they should only be run once + subtasks = [ + lambda: ceph_log(ctx=ctx, config=None), + lambda: ceph_crash(ctx=ctx, config=None), + lambda: valgrind_post(ctx=ctx, config=config), + ] + + subtasks += [ + lambda: cluster(ctx=ctx, config=dict( + conf=config.get('conf', {}), + fs=config.get('fs', 'xfs'), + mkfs_options=config.get('mkfs_options', None), + mount_options=config.get('mount_options', None), + skip_mgr_daemons=config.get('skip_mgr_daemons', False), + log_ignorelist=config.get('log-ignorelist', []), + cpu_profile=set(config.get('cpu_profile', []),), + cluster=config['cluster'], + mon_bind_msgr2=config.get('mon_bind_msgr2', True), + mon_bind_addrvec=config.get('mon_bind_addrvec', True), + )), + lambda: run_daemon(ctx=ctx, config=config, type_='mon'), + lambda: run_daemon(ctx=ctx, config=config, type_='mgr'), + lambda: crush_setup(ctx=ctx, config=config), + lambda: check_enable_crimson(ctx=ctx, config=config), + lambda: run_daemon(ctx=ctx, config=config, type_='osd'), + lambda: setup_manager(ctx=ctx, config=config), + lambda: create_rbd_pool(ctx=ctx, config=config), + lambda: run_daemon(ctx=ctx, config=config, type_='mds'), + lambda: cephfs_setup(ctx=ctx, config=config), + lambda: watchdog_setup(ctx=ctx, config=config), + ] + + with contextutil.nested(*subtasks): + try: + if config.get('wait-for-healthy', True): + healthy(ctx=ctx, config=dict(cluster=config['cluster'])) + + yield + finally: + # set pg_num_targets back to actual pg_num, so we don't have to + # wait for pending merges (which can take a while!) + if not config.get('skip_stop_pg_num_changes', True): + ctx.managers[config['cluster']].stop_pg_num_changes() + + if config.get('wait-for-scrub', True): + # wait for pgs to become active+clean in case any + # recoveries were triggered since the last health check + ctx.managers[config['cluster']].wait_for_clean() + osd_scrub_pgs(ctx, config) + + # stop logging health to clog during shutdown, or else we generate + # a bunch of scary messages unrelated to our actual run. + firstmon = teuthology.get_first_mon(ctx, config, config['cluster']) + (mon0_remote,) = ctx.cluster.only(firstmon).remotes.keys() + mon0_remote.run( + args=[ + 'sudo', + 'ceph', + '--cluster', config['cluster'], + 'config', 'set', 'global', + 'mon_health_to_clog', 'false', + ], + check_status=False, + ) diff --git a/qa/tasks/ceph_client.py b/qa/tasks/ceph_client.py new file mode 100644 index 000000000..74e818f93 --- /dev/null +++ b/qa/tasks/ceph_client.py @@ -0,0 +1,42 @@ +""" +Set up client keyring +""" +import logging + +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def create_keyring(ctx, cluster_name): + """ + Set up key ring on remote sites + """ + log.info('Setting up client nodes...') + clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) + testdir = teuthology.get_testdir(ctx) + coverage_dir = '{tdir}/archive/coverage'.format(tdir=testdir) + for remote, roles_for_host in clients.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', + cluster_name): + name = teuthology.ceph_role(role) + client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, name) + remote.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + coverage_dir, + 'ceph-authtool', + '--create-keyring', + '--gen-key', + # TODO this --name= is not really obeyed, all unknown "types" are munged to "client" + '--name={name}'.format(name=name), + client_keyring, + run.Raw('&&'), + 'sudo', + 'chmod', + '0644', + client_keyring, + ], + ) diff --git a/qa/tasks/ceph_deploy.py b/qa/tasks/ceph_deploy.py new file mode 100644 index 000000000..99c8c1ffb --- /dev/null +++ b/qa/tasks/ceph_deploy.py @@ -0,0 +1,916 @@ +""" +Execute ceph-deploy as a task +""" + +import contextlib +import os +import time +import logging +import traceback + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.task import install as install_fn +from teuthology.orchestra import run +from tasks.cephfs.filesystem import Filesystem +from teuthology.misc import wait_until_healthy + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download_ceph_deploy(ctx, config): + """ + Downloads ceph-deploy from the ceph.com git mirror and (by default) + switches to the master branch. If the `ceph-deploy-branch` is specified, it + will use that instead. The `bootstrap` script is ran, with the argument + obtained from `python_version`, if specified. + """ + # use mon.a for ceph_admin + (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys() + + try: + py_ver = str(config['python_version']) + except KeyError: + pass + else: + supported_versions = ['2', '3'] + if py_ver not in supported_versions: + raise ValueError("python_version must be: {}, not {}".format( + ' or '.join(supported_versions), py_ver + )) + + log.info("Installing Python") + system_type = teuthology.get_system_type(ceph_admin) + + if system_type == 'rpm': + package = 'python36' if py_ver == '3' else 'python' + ctx.cluster.run(args=[ + 'sudo', 'yum', '-y', 'install', + package, 'python-virtualenv' + ]) + else: + package = 'python3' if py_ver == '3' else 'python' + ctx.cluster.run(args=[ + 'sudo', 'apt-get', '-y', '--force-yes', 'install', + package, 'python-virtualenv' + ]) + + log.info('Downloading ceph-deploy...') + testdir = teuthology.get_testdir(ctx) + ceph_deploy_branch = config.get('ceph-deploy-branch', 'master') + + ceph_admin.run( + args=[ + 'git', 'clone', '-b', ceph_deploy_branch, + teuth_config.ceph_git_base_url + 'ceph-deploy.git', + '{tdir}/ceph-deploy'.format(tdir=testdir), + ], + ) + args = [ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + './bootstrap', + ] + try: + args.append(str(config['python_version'])) + except KeyError: + pass + ceph_admin.run(args=args) + + try: + yield + finally: + log.info('Removing ceph-deploy ...') + ceph_admin.run( + args=[ + 'rm', + '-rf', + '{tdir}/ceph-deploy'.format(tdir=testdir), + ], + ) + + +def is_healthy(ctx, config): + """Wait until a Ceph cluster is healthy.""" + testdir = teuthology.get_testdir(ctx) + ceph_admin = teuthology.get_first_mon(ctx, config) + (remote,) = ctx.cluster.only(ceph_admin).remotes.keys() + max_tries = 90 # 90 tries * 10 secs --> 15 minutes + tries = 0 + while True: + tries += 1 + if tries >= max_tries: + msg = "ceph health was unable to get 'HEALTH_OK' after waiting 15 minutes" + remote.run( + args=[ + 'cd', + '{tdir}'.format(tdir=testdir), + run.Raw('&&'), + 'sudo', 'ceph', + 'report', + ], + ) + raise RuntimeError(msg) + + out = remote.sh( + [ + 'cd', + '{tdir}'.format(tdir=testdir), + run.Raw('&&'), + 'sudo', 'ceph', + 'health', + ], + logger=log.getChild('health'), + ) + log.info('Ceph health: %s', out.rstrip('\n')) + if out.split(None, 1)[0] == 'HEALTH_OK': + break + time.sleep(10) + + +def get_nodes_using_role(ctx, target_role): + """ + Extract the names of nodes that match a given role from a cluster, and modify the + cluster's service IDs to match the resulting node-based naming scheme that ceph-deploy + uses, such that if "mon.a" is on host "foo23", it'll be renamed to "mon.foo23". + """ + + # Nodes containing a service of the specified role + nodes_of_interest = [] + + # Prepare a modified version of cluster.remotes with ceph-deploy-ized names + modified_remotes = {} + ceph_deploy_mapped = dict() + for _remote, roles_for_host in ctx.cluster.remotes.items(): + modified_remotes[_remote] = [] + for svc_id in roles_for_host: + if svc_id.startswith("{0}.".format(target_role)): + fqdn = str(_remote).split('@')[-1] + nodename = str(str(_remote).split('.')[0]).split('@')[1] + if target_role == 'mon': + nodes_of_interest.append(fqdn) + else: + nodes_of_interest.append(nodename) + mapped_role = "{0}.{1}".format(target_role, nodename) + modified_remotes[_remote].append(mapped_role) + # keep dict of mapped role for later use by tasks + # eg. mon.a => mon.node1 + ceph_deploy_mapped[svc_id] = mapped_role + else: + modified_remotes[_remote].append(svc_id) + + ctx.cluster.remotes = modified_remotes + # since the function is called multiple times for target roles + # append new mapped roles + if not hasattr(ctx.cluster, 'mapped_role'): + ctx.cluster.mapped_role = ceph_deploy_mapped + else: + ctx.cluster.mapped_role.update(ceph_deploy_mapped) + log.info("New mapped_role={mr}".format(mr=ctx.cluster.mapped_role)) + return nodes_of_interest + + +def get_dev_for_osd(ctx, config): + """Get a list of all osd device names.""" + osd_devs = [] + for remote, roles_for_host in ctx.cluster.remotes.items(): + host = remote.name.split('@')[-1] + shortname = host.split('.')[0] + devs = teuthology.get_scratch_devices(remote) + num_osd_per_host = list( + teuthology.roles_of_type( + roles_for_host, 'osd')) + num_osds = len(num_osd_per_host) + if config.get('separate_journal_disk') is not None: + num_devs_reqd = 2 * num_osds + assert num_devs_reqd <= len( + devs), 'fewer data and journal disks than required ' + shortname + for dindex in range(0, num_devs_reqd, 2): + jd_index = dindex + 1 + dev_short = devs[dindex].split('/')[-1] + jdev_short = devs[jd_index].split('/')[-1] + osd_devs.append((shortname, dev_short, jdev_short)) + else: + assert num_osds <= len(devs), 'fewer disks than osds ' + shortname + for dev in devs[:num_osds]: + dev_short = dev.split('/')[-1] + osd_devs.append((shortname, dev_short)) + return osd_devs + + +def get_all_nodes(ctx, config): + """Return a string of node names separated by blanks""" + nodelist = [] + for t, k in ctx.config['targets'].items(): + host = t.split('@')[-1] + simple_host = host.split('.')[0] + nodelist.append(simple_host) + nodelist = " ".join(nodelist) + return nodelist + +@contextlib.contextmanager +def build_ceph_cluster(ctx, config): + """Build a ceph cluster""" + + # Expect to find ceph_admin on the first mon by ID, same place that the download task + # puts it. Remember this here, because subsequently IDs will change from those in + # the test config to those that ceph-deploy invents. + + (ceph_admin,) = ctx.cluster.only('mon.a').remotes.keys() + + def execute_ceph_deploy(cmd): + """Remotely execute a ceph_deploy command""" + return ceph_admin.run( + args=[ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + run.Raw(cmd), + ], + check_status=False, + ).exitstatus + + def ceph_disk_osd_create(ctx, config): + node_dev_list = get_dev_for_osd(ctx, config) + no_of_osds = 0 + for d in node_dev_list: + node = d[0] + for disk in d[1:]: + zap = './ceph-deploy disk zap ' + node + ' ' + disk + estatus = execute_ceph_deploy(zap) + if estatus != 0: + raise RuntimeError("ceph-deploy: Failed to zap osds") + osd_create_cmd = './ceph-deploy osd create ' + # first check for filestore, default is bluestore with ceph-deploy + if config.get('filestore') is not None: + osd_create_cmd += '--filestore ' + elif config.get('bluestore') is not None: + osd_create_cmd += '--bluestore ' + if config.get('dmcrypt') is not None: + osd_create_cmd += '--dmcrypt ' + osd_create_cmd += ":".join(d) + estatus_osd = execute_ceph_deploy(osd_create_cmd) + if estatus_osd == 0: + log.info('successfully created osd') + no_of_osds += 1 + else: + raise RuntimeError("ceph-deploy: Failed to create osds") + return no_of_osds + + def ceph_volume_osd_create(ctx, config): + osds = ctx.cluster.only(teuthology.is_type('osd')) + no_of_osds = 0 + for remote in osds.remotes.keys(): + # all devs should be lvm + osd_create_cmd = './ceph-deploy osd create --debug ' + remote.shortname + ' ' + # default is bluestore so we just need config item for filestore + roles = ctx.cluster.remotes[remote] + dev_needed = len([role for role in roles + if role.startswith('osd')]) + all_devs = teuthology.get_scratch_devices(remote) + log.info("node={n}, need_devs={d}, available={a}".format( + n=remote.shortname, + d=dev_needed, + a=all_devs, + )) + devs = all_devs[0:dev_needed] + # rest of the devices can be used for journal if required + jdevs = dev_needed + for device in devs: + device_split = device.split('/') + lv_device = device_split[-2] + '/' + device_split[-1] + if config.get('filestore') is not None: + osd_create_cmd += '--filestore --data ' + lv_device + ' ' + # filestore with ceph-volume also needs journal disk + try: + jdevice = all_devs.pop(jdevs) + except IndexError: + raise RuntimeError("No device available for \ + journal configuration") + jdevice_split = jdevice.split('/') + j_lv = jdevice_split[-2] + '/' + jdevice_split[-1] + osd_create_cmd += '--journal ' + j_lv + else: + osd_create_cmd += ' --data ' + lv_device + estatus_osd = execute_ceph_deploy(osd_create_cmd) + if estatus_osd == 0: + log.info('successfully created osd') + no_of_osds += 1 + else: + raise RuntimeError("ceph-deploy: Failed to create osds") + return no_of_osds + + try: + log.info('Building ceph cluster using ceph-deploy...') + testdir = teuthology.get_testdir(ctx) + ceph_branch = None + if config.get('branch') is not None: + cbranch = config.get('branch') + for var, val in cbranch.items(): + ceph_branch = '--{var}={val}'.format(var=var, val=val) + all_nodes = get_all_nodes(ctx, config) + mds_nodes = get_nodes_using_role(ctx, 'mds') + mds_nodes = " ".join(mds_nodes) + mon_node = get_nodes_using_role(ctx, 'mon') + mon_nodes = " ".join(mon_node) + # skip mgr based on config item + # this is needed when test uses latest code to install old ceph + # versions + skip_mgr = config.get('skip-mgr', False) + if not skip_mgr: + mgr_nodes = get_nodes_using_role(ctx, 'mgr') + mgr_nodes = " ".join(mgr_nodes) + new_mon = './ceph-deploy new' + " " + mon_nodes + if not skip_mgr: + mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes + mon_hostname = mon_nodes.split(' ')[0] + mon_hostname = str(mon_hostname) + gather_keys = './ceph-deploy gatherkeys' + " " + mon_hostname + deploy_mds = './ceph-deploy mds create' + " " + mds_nodes + + if mon_nodes is None: + raise RuntimeError("no monitor nodes in the config file") + + estatus_new = execute_ceph_deploy(new_mon) + if estatus_new != 0: + raise RuntimeError("ceph-deploy: new command failed") + + log.info('adding config inputs...') + testdir = teuthology.get_testdir(ctx) + conf_path = '{tdir}/ceph-deploy/ceph.conf'.format(tdir=testdir) + + if config.get('conf') is not None: + confp = config.get('conf') + for section, keys in confp.items(): + lines = '[{section}]\n'.format(section=section) + ceph_admin.sudo_write_file(conf_path, lines, append=True) + for key, value in keys.items(): + log.info("[%s] %s = %s" % (section, key, value)) + lines = '{key} = {value}\n'.format(key=key, value=value) + ceph_admin.sudo_write_file(conf_path, lines, append=True) + + # install ceph + dev_branch = ctx.config['branch'] + branch = '--dev={branch}'.format(branch=dev_branch) + if ceph_branch: + option = ceph_branch + else: + option = branch + install_nodes = './ceph-deploy install ' + option + " " + all_nodes + estatus_install = execute_ceph_deploy(install_nodes) + if estatus_install != 0: + raise RuntimeError("ceph-deploy: Failed to install ceph") + # install ceph-test package too + install_nodes2 = './ceph-deploy install --tests ' + option + \ + " " + all_nodes + estatus_install = execute_ceph_deploy(install_nodes2) + if estatus_install != 0: + raise RuntimeError("ceph-deploy: Failed to install ceph-test") + + mon_create_nodes = './ceph-deploy mon create-initial' + # If the following fails, it is OK, it might just be that the monitors + # are taking way more than a minute/monitor to form quorum, so lets + # try the next block which will wait up to 15 minutes to gatherkeys. + execute_ceph_deploy(mon_create_nodes) + + estatus_gather = execute_ceph_deploy(gather_keys) + if estatus_gather != 0: + raise RuntimeError("ceph-deploy: Failed during gather keys") + + # install admin key on mons (ceph-create-keys doesn't do this any more) + mons = ctx.cluster.only(teuthology.is_type('mon')) + for remote in mons.remotes.keys(): + execute_ceph_deploy('./ceph-deploy admin ' + remote.shortname) + + # create osd's + if config.get('use-ceph-volume', False): + no_of_osds = ceph_volume_osd_create(ctx, config) + else: + # this method will only work with ceph-deploy v1.5.39 or older + no_of_osds = ceph_disk_osd_create(ctx, config) + + if not skip_mgr: + execute_ceph_deploy(mgr_create) + + if mds_nodes: + estatus_mds = execute_ceph_deploy(deploy_mds) + if estatus_mds != 0: + raise RuntimeError("ceph-deploy: Failed to deploy mds") + + if config.get('test_mon_destroy') is not None: + for d in range(1, len(mon_node)): + mon_destroy_nodes = './ceph-deploy mon destroy' + \ + " " + mon_node[d] + estatus_mon_d = execute_ceph_deploy(mon_destroy_nodes) + if estatus_mon_d != 0: + raise RuntimeError("ceph-deploy: Failed to delete monitor") + + + + if config.get('wait-for-healthy', True) and no_of_osds >= 2: + is_healthy(ctx=ctx, config=None) + + log.info('Setting up client nodes...') + conf_path = '/etc/ceph/ceph.conf' + admin_keyring_path = '/etc/ceph/ceph.client.admin.keyring' + first_mon = teuthology.get_first_mon(ctx, config) + (mon0_remote,) = ctx.cluster.only(first_mon).remotes.keys() + conf_data = mon0_remote.read_file(conf_path, sudo=True) + admin_keyring = mon0_remote.read_file(admin_keyring_path, sudo=True) + + clients = ctx.cluster.only(teuthology.is_type('client')) + for remote, roles_for_host in clients.remotes.items(): + for id_ in teuthology.roles_of_type(roles_for_host, 'client'): + client_keyring = \ + '/etc/ceph/ceph.client.{id}.keyring'.format(id=id_) + mon0_remote.run( + args=[ + 'cd', + '{tdir}'.format(tdir=testdir), + run.Raw('&&'), + 'sudo', 'bash', '-c', + run.Raw('"'), 'ceph', + 'auth', + 'get-or-create', + 'client.{id}'.format(id=id_), + 'mds', 'allow', + 'mon', 'allow *', + 'osd', 'allow *', + run.Raw('>'), + client_keyring, + run.Raw('"'), + ], + ) + key_data = mon0_remote.read_file( + path=client_keyring, + sudo=True, + ) + remote.sudo_write_file( + path=client_keyring, + data=key_data, + mode='0644' + ) + remote.sudo_write_file( + path=admin_keyring_path, + data=admin_keyring, + mode='0644' + ) + remote.sudo_write_file( + path=conf_path, + data=conf_data, + mode='0644' + ) + + if mds_nodes: + log.info('Configuring CephFS...') + Filesystem(ctx, create=True) + elif not config.get('only_mon'): + raise RuntimeError( + "The cluster is NOT operational due to insufficient OSDs") + # create rbd pool + ceph_admin.run( + args=[ + 'sudo', 'ceph', '--cluster', 'ceph', + 'osd', 'pool', 'create', 'rbd', '128', '128'], + check_status=False) + ceph_admin.run( + args=[ + 'sudo', 'ceph', '--cluster', 'ceph', + 'osd', 'pool', 'application', 'enable', + 'rbd', 'rbd', '--yes-i-really-mean-it' + ], + check_status=False) + yield + + except Exception: + log.info( + "Error encountered, logging exception before tearing down ceph-deploy") + log.info(traceback.format_exc()) + raise + finally: + if config.get('keep_running'): + return + log.info('Stopping ceph...') + ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'], + check_status=False) + time.sleep(4) + + # and now just check for the processes themselves, as if upstart/sysvinit + # is lying to us. Ignore errors if the grep fails + ctx.cluster.run(args=['sudo', 'ps', 'aux', run.Raw('|'), + 'grep', '-v', 'grep', run.Raw('|'), + 'grep', 'ceph'], check_status=False) + ctx.cluster.run(args=['sudo', 'systemctl', run.Raw('|'), + 'grep', 'ceph'], check_status=False) + + if ctx.archive is not None: + # archive mon data, too + log.info('Archiving mon data...') + path = os.path.join(ctx.archive, 'data') + os.makedirs(path) + mons = ctx.cluster.only(teuthology.is_type('mon')) + for remote, roles in mons.remotes.items(): + for role in roles: + if role.startswith('mon.'): + teuthology.pull_directory_tarball( + remote, + '/var/lib/ceph/mon', + path + '/' + role + '.tgz') + + log.info('Compressing logs...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'find', + '/var/log/ceph', + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) + + log.info('Archiving logs...') + path = os.path.join(ctx.archive, 'remote') + os.makedirs(path) + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.shortname) + os.makedirs(sub) + teuthology.pull_directory(remote, '/var/log/ceph', + os.path.join(sub, 'log')) + + # Prevent these from being undefined if the try block fails + all_nodes = get_all_nodes(ctx, config) + purge_nodes = './ceph-deploy purge' + " " + all_nodes + purgedata_nodes = './ceph-deploy purgedata' + " " + all_nodes + + log.info('Purging package...') + execute_ceph_deploy(purge_nodes) + log.info('Purging data...') + execute_ceph_deploy(purgedata_nodes) + + +@contextlib.contextmanager +def cli_test(ctx, config): + """ + ceph-deploy cli to exercise most commonly use cli's and ensure + all commands works and also startup the init system. + + """ + log.info('Ceph-deploy Test') + if config is None: + config = {} + test_branch = '' + conf_dir = teuthology.get_testdir(ctx) + "/cdtest" + + def execute_cdeploy(admin, cmd, path): + """Execute ceph-deploy commands """ + """Either use git path or repo path """ + args = ['cd', conf_dir, run.Raw(';')] + if path: + args.append('{path}/ceph-deploy/ceph-deploy'.format(path=path)) + else: + args.append('ceph-deploy') + args.append(run.Raw(cmd)) + ec = admin.run(args=args, check_status=False).exitstatus + if ec != 0: + raise RuntimeError( + "failed during ceph-deploy cmd: {cmd} , ec={ec}".format(cmd=cmd, ec=ec)) + + if config.get('rhbuild'): + path = None + else: + path = teuthology.get_testdir(ctx) + # test on branch from config eg: wip-* , master or next etc + # packages for all distro's should exist for wip* + if ctx.config.get('branch'): + branch = ctx.config.get('branch') + test_branch = ' --dev={branch} '.format(branch=branch) + mons = ctx.cluster.only(teuthology.is_type('mon')) + for node, role in mons.remotes.items(): + admin = node + admin.run(args=['mkdir', conf_dir], check_status=False) + nodename = admin.shortname + system_type = teuthology.get_system_type(admin) + if config.get('rhbuild'): + admin.run(args=['sudo', 'yum', 'install', 'ceph-deploy', '-y']) + log.info('system type is %s', system_type) + osds = ctx.cluster.only(teuthology.is_type('osd')) + + for remote, roles in osds.remotes.items(): + devs = teuthology.get_scratch_devices(remote) + log.info("roles %s", roles) + if (len(devs) < 3): + log.error( + 'Test needs minimum of 3 devices, only found %s', + str(devs)) + raise RuntimeError("Needs minimum of 3 devices ") + + conf_path = '{conf_dir}/ceph.conf'.format(conf_dir=conf_dir) + new_cmd = 'new ' + nodename + execute_cdeploy(admin, new_cmd, path) + if config.get('conf') is not None: + confp = config.get('conf') + for section, keys in confp.items(): + lines = '[{section}]\n'.format(section=section) + admin.sudo_write_file(conf_path, lines, append=True) + for key, value in keys.items(): + log.info("[%s] %s = %s" % (section, key, value)) + lines = '{key} = {value}\n'.format(key=key, value=value) + admin.sudo_write_file(conf_path, lines, append=True) + new_mon_install = 'install {branch} --mon '.format( + branch=test_branch) + nodename + new_mgr_install = 'install {branch} --mgr '.format( + branch=test_branch) + nodename + new_osd_install = 'install {branch} --osd '.format( + branch=test_branch) + nodename + new_admin = 'install {branch} --cli '.format(branch=test_branch) + nodename + create_initial = 'mon create-initial ' + mgr_create = 'mgr create ' + nodename + # either use create-keys or push command + push_keys = 'admin ' + nodename + execute_cdeploy(admin, new_mon_install, path) + execute_cdeploy(admin, new_mgr_install, path) + execute_cdeploy(admin, new_osd_install, path) + execute_cdeploy(admin, new_admin, path) + execute_cdeploy(admin, create_initial, path) + execute_cdeploy(admin, mgr_create, path) + execute_cdeploy(admin, push_keys, path) + + for i in range(3): + zap_disk = 'disk zap ' + "{n}:{d}".format(n=nodename, d=devs[i]) + prepare = 'osd prepare ' + "{n}:{d}".format(n=nodename, d=devs[i]) + execute_cdeploy(admin, zap_disk, path) + execute_cdeploy(admin, prepare, path) + + log.info("list files for debugging purpose to check file permissions") + admin.run(args=['ls', run.Raw('-lt'), conf_dir]) + remote.run(args=['sudo', 'ceph', '-s'], check_status=False) + out = remote.sh('sudo ceph health') + log.info('Ceph health: %s', out.rstrip('\n')) + log.info("Waiting for cluster to become healthy") + with contextutil.safe_while(sleep=10, tries=6, + action='check health') as proceed: + while proceed(): + out = remote.sh('sudo ceph health') + if (out.split(None, 1)[0] == 'HEALTH_OK'): + break + rgw_install = 'install {branch} --rgw {node}'.format( + branch=test_branch, + node=nodename, + ) + rgw_create = 'rgw create ' + nodename + execute_cdeploy(admin, rgw_install, path) + execute_cdeploy(admin, rgw_create, path) + log.info('All ceph-deploy cli tests passed') + try: + yield + finally: + log.info("cleaning up") + ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target'], + check_status=False) + time.sleep(4) + for i in range(3): + umount_dev = "{d}1".format(d=devs[i]) + remote.run(args=['sudo', 'umount', run.Raw(umount_dev)]) + cmd = 'purge ' + nodename + execute_cdeploy(admin, cmd, path) + cmd = 'purgedata ' + nodename + execute_cdeploy(admin, cmd, path) + log.info("Removing temporary dir") + admin.run( + args=[ + 'rm', + run.Raw('-rf'), + run.Raw(conf_dir)], + check_status=False) + if config.get('rhbuild'): + admin.run(args=['sudo', 'yum', 'remove', 'ceph-deploy', '-y']) + + +@contextlib.contextmanager +def single_node_test(ctx, config): + """ + - ceph-deploy.single_node_test: null + + #rhbuild testing + - ceph-deploy.single_node_test: + rhbuild: 1.2.3 + + """ + log.info("Testing ceph-deploy on single node") + if config is None: + config = {} + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) + + if config.get('rhbuild'): + log.info("RH Build, Skip Download") + with contextutil.nested( + lambda: cli_test(ctx=ctx, config=config), + ): + yield + else: + with contextutil.nested( + lambda: install_fn.ship_utilities(ctx=ctx, config=None), + lambda: download_ceph_deploy(ctx=ctx, config=config), + lambda: cli_test(ctx=ctx, config=config), + ): + yield + + +@contextlib.contextmanager +def upgrade(ctx, config): + """ + Upgrade using ceph-deploy + eg: + ceph-deploy.upgrade: + # to upgrade to specific branch, use + branch: + stable: jewel + # to setup mgr node, use + setup-mgr-node: True + # to wait for cluster to be healthy after all upgrade, use + wait-for-healthy: True + role: (upgrades the below roles serially) + mon.a + mon.b + osd.0 + """ + roles = config.get('roles') + # get the roles that are mapped as per ceph-deploy + # roles are mapped for mon/mds eg: mon.a => mon.host_short_name + mapped_role = ctx.cluster.mapped_role + log.info("roles={r}, mapped_roles={mr}".format(r=roles, mr=mapped_role)) + if config.get('branch'): + branch = config.get('branch') + (var, val) = branch.items()[0] + ceph_branch = '--{var}={val}'.format(var=var, val=val) + else: + # default to wip-branch under test + dev_branch = ctx.config['branch'] + ceph_branch = '--dev={branch}'.format(branch=dev_branch) + # get the node used for initial deployment which is mon.a + mon_a = mapped_role.get('mon.a') + (ceph_admin,) = ctx.cluster.only(mon_a).remotes.keys() + testdir = teuthology.get_testdir(ctx) + cmd = './ceph-deploy install ' + ceph_branch + for role in roles: + # check if this role is mapped (mon or mds) + if mapped_role.get(role): + role = mapped_role.get(role) + remotes_and_roles = ctx.cluster.only(role).remotes + for remote, roles in remotes_and_roles.items(): + nodename = remote.shortname + cmd = cmd + ' ' + nodename + log.info("Upgrading ceph on %s", nodename) + ceph_admin.run( + args=[ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + run.Raw(cmd), + ], + ) + # restart all ceph services, ideally upgrade should but it does not + remote.run( + args=[ + 'sudo', 'systemctl', 'restart', 'ceph.target' + ] + ) + ceph_admin.run(args=['sudo', 'ceph', '-s']) + + # workaround for http://tracker.ceph.com/issues/20950 + # write the correct mgr key to disk + if config.get('setup-mgr-node', None): + mons = ctx.cluster.only(teuthology.is_type('mon')) + for remote, roles in mons.remotes.items(): + remote.run( + args=[ + run.Raw('sudo ceph auth get client.bootstrap-mgr'), + run.Raw('|'), + run.Raw('sudo tee'), + run.Raw('/var/lib/ceph/bootstrap-mgr/ceph.keyring') + ] + ) + + if config.get('setup-mgr-node', None): + mgr_nodes = get_nodes_using_role(ctx, 'mgr') + mgr_nodes = " ".join(mgr_nodes) + mgr_install = './ceph-deploy install --mgr ' + ceph_branch + " " + mgr_nodes + mgr_create = './ceph-deploy mgr create' + " " + mgr_nodes + # install mgr + ceph_admin.run( + args=[ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + run.Raw(mgr_install), + ], + ) + # create mgr + ceph_admin.run( + args=[ + 'cd', + '{tdir}/ceph-deploy'.format(tdir=testdir), + run.Raw('&&'), + run.Raw(mgr_create), + ], + ) + ceph_admin.run(args=['sudo', 'ceph', '-s']) + if config.get('wait-for-healthy', None): + wait_until_healthy(ctx, ceph_admin, use_sudo=True) + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Set up and tear down a Ceph cluster. + + For example:: + + tasks: + - install: + extras: yes + - ssh_keys: + - ceph-deploy: + branch: + stable: bobtail + mon_initial_members: 1 + ceph-deploy-branch: my-ceph-deploy-branch + only_mon: true + keep_running: true + # either choose bluestore or filestore, default is bluestore + bluestore: True + # or + filestore: True + # skip install of mgr for old release using below flag + skip-mgr: True ( default is False ) + # to use ceph-volume instead of ceph-disk + # ceph-disk can only be used with old ceph-deploy release from pypi + use-ceph-volume: true + + tasks: + - install: + extras: yes + - ssh_keys: + - ceph-deploy: + branch: + dev: master + conf: + mon: + debug mon = 20 + + tasks: + - install: + extras: yes + - ssh_keys: + - ceph-deploy: + branch: + testing: + dmcrypt: yes + separate_journal_disk: yes + + """ + if config is None: + config = {} + + assert isinstance(config, dict), \ + "task ceph-deploy only supports a dictionary for configuration" + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph-deploy', {})) + + if config.get('branch') is not None: + assert isinstance( + config['branch'], dict), 'branch must be a dictionary' + + log.info('task ceph-deploy with config ' + str(config)) + + # we need to use 1.5.39-stable for testing jewel or master branch with + # ceph-disk + if config.get('use-ceph-volume', False) is False: + # check we are not testing specific branch + if config.get('ceph-deploy-branch', False) is False: + config['ceph-deploy-branch'] = '1.5.39-stable' + + with contextutil.nested( + lambda: install_fn.ship_utilities(ctx=ctx, config=None), + lambda: download_ceph_deploy(ctx=ctx, config=config), + lambda: build_ceph_cluster(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/ceph_fuse.py b/qa/tasks/ceph_fuse.py new file mode 100644 index 000000000..70cf9bf83 --- /dev/null +++ b/qa/tasks/ceph_fuse.py @@ -0,0 +1,185 @@ +""" +Ceph FUSE client task +""" + +import contextlib +import logging + +from teuthology import misc +from tasks.cephfs.fuse_mount import FuseMount + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Mount/unmount a ``ceph-fuse`` client. + + The config is optional and defaults to mounting on all clients. If + a config is given, it is expected to be a list of clients to do + this operation on. This lets you e.g. set up one client with + ``ceph-fuse`` and another with ``kclient``. + + ``brxnet`` should be a Private IPv4 Address range, default range is + [192.168.0.0/16] + + Example that mounts all clients:: + + tasks: + - ceph: + - ceph-fuse: + - interactive: + - brxnet: [192.168.0.0/16] + + Example that uses both ``kclient` and ``ceph-fuse``:: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - kclient: [client.1] + - interactive: + + Example that enables valgrind: + + tasks: + - ceph: + - ceph-fuse: + client.0: + valgrind: [--tool=memcheck, --leak-check=full, --show-reachable=yes] + - interactive: + + Example that stops an already-mounted client: + + :: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - ... do something that requires the FS mounted ... + - ceph-fuse: + client.0: + mounted: false + - ... do something that requires the FS unmounted ... + + Example that adds more generous wait time for mount (for virtual machines): + + tasks: + - ceph: + - ceph-fuse: + client.0: + mount_wait: 60 # default is 0, do not wait before checking /sys/ + mount_timeout: 120 # default is 30, give up if /sys/ is not populated + - interactive: + + Example that creates and mounts a subvol: + + overrides: + ceph: + subvols: + create: 2 + subvol_options: "--namespace-isolated --size 25000000000" + ceph-fuse: + client.0: + mount_subvol_num: 0 + kclient: + client.1: + mount_subvol_num: 1 + + :param ctx: Context + :param config: Configuration + """ + log.info('Running ceph_fuse task...') + + if config is None: + ids = misc.all_roles_of_type(ctx.cluster, 'client') + client_roles = [f'client.{id_}' for id_ in ids] + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, list): + client_roles = config + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, dict): + client_roles = filter(lambda x: 'client.' in x, config.keys()) + else: + raise ValueError(f"Invalid config object: {config} ({config.__class__})") + log.info(f"config is {config}") + + clients = list(misc.get_clients(ctx=ctx, roles=client_roles)) + testdir = misc.get_testdir(ctx) + all_mounts = getattr(ctx, 'mounts', {}) + mounted_by_me = {} + skipped = {} + remotes = set() + + brxnet = config.get("brxnet", None) + + # Construct any new FuseMount instances + overrides = ctx.config.get('overrides', {}).get('ceph-fuse', {}) + top_overrides = dict(filter(lambda x: 'client.' not in x[0], overrides.items())) + for id_, remote in clients: + entity = f"client.{id_}" + client_config = config.get(entity) + if client_config is None: + client_config = {} + # top level overrides + misc.deep_merge(client_config, top_overrides) + # mount specific overrides + client_config_overrides = overrides.get(entity) + misc.deep_merge(client_config, client_config_overrides) + log.info(f"{entity} config is {client_config}") + + remotes.add(remote) + auth_id = client_config.get("auth_id", id_) + cephfs_name = client_config.get("cephfs_name") + + skip = client_config.get("skip", False) + if skip: + skipped[id_] = skip + continue + + if id_ not in all_mounts: + fuse_mount = FuseMount(ctx=ctx, client_config=client_config, + test_dir=testdir, client_id=auth_id, + client_remote=remote, brxnet=brxnet, + cephfs_name=cephfs_name) + all_mounts[id_] = fuse_mount + else: + # Catch bad configs where someone has e.g. tried to use ceph-fuse and kcephfs for the same client + assert isinstance(all_mounts[id_], FuseMount) + + if not config.get("disabled", False) and client_config.get('mounted', True): + mounted_by_me[id_] = {"config": client_config, "mount": all_mounts[id_]} + + ctx.mounts = all_mounts + + # Umount any pre-existing clients that we have not been asked to mount + for client_id in set(all_mounts.keys()) - set(mounted_by_me.keys()) - set(skipped.keys()): + mount = all_mounts[client_id] + if mount.is_mounted(): + mount.umount_wait() + + for remote in remotes: + FuseMount.cleanup_stale_netnses_and_bridge(remote) + + # Mount any clients we have been asked to (default to mount all) + log.info('Mounting ceph-fuse clients...') + for info in mounted_by_me.values(): + config = info["config"] + mount_x = info['mount'] + mount_x.mount(mntopts=config.get('mntopts', []), mntargs=config.get('mntargs', [])) + + for info in mounted_by_me.values(): + info["mount"].wait_until_mounted() + + try: + yield all_mounts + finally: + log.info('Unmounting ceph-fuse clients...') + + for info in mounted_by_me.values(): + # Conditional because an inner context might have umounted it + mount = info["mount"] + if mount.is_mounted(): + mount.umount_wait() + for remote in remotes: + FuseMount.cleanup_stale_netnses_and_bridge(remote) diff --git a/qa/tasks/ceph_iscsi_client.py b/qa/tasks/ceph_iscsi_client.py new file mode 100644 index 000000000..189b7fa31 --- /dev/null +++ b/qa/tasks/ceph_iscsi_client.py @@ -0,0 +1,56 @@ +""" +Set up ceph-iscsi client. +""" +import logging +import contextlib +from textwrap import dedent + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Set up ceph-iscsi client. + + tasks: + ceph_iscsi_client: + clients: [client.1] + """ + log.info('Setting up ceph-iscsi client...') + for role in config['clients']: + (remote,) = (ctx.cluster.only(role).remotes.keys()) + + conf = dedent(''' + InitiatorName=iqn.1994-05.com.redhat:client + ''') + path = "/etc/iscsi/initiatorname.iscsi" + remote.sudo_write_file(path, conf, mkdir=True) + + # the restart is needed after the above change is applied + remote.run(args=['sudo', 'systemctl', 'restart', 'iscsid']) + + remote.run(args=['sudo', 'modprobe', 'dm_multipath']) + remote.run(args=['sudo', 'mpathconf', '--enable']) + conf = dedent(''' + devices { + device { + vendor "LIO-ORG" + product "TCMU device" + hardware_handler "1 alua" + path_grouping_policy "failover" + path_selector "queue-length 0" + failback 60 + path_checker tur + prio alua + prio_args exclusive_pref_bit + fast_io_fail_tmo 25 + no_path_retry queue + } + } + ''') + path = "/etc/multipath.conf" + remote.sudo_write_file(path, conf, append=True) + remote.run(args=['sudo', 'systemctl', 'start', 'multipathd']) + + yield diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py new file mode 100644 index 000000000..516c409e8 --- /dev/null +++ b/qa/tasks/ceph_manager.py @@ -0,0 +1,3235 @@ +""" +ceph manager -- Thrasher and CephManager objects +""" +from functools import wraps +import contextlib +import errno +import random +import signal +import time +import gevent +import base64 +import json +import logging +import threading +import traceback +import os +import shlex + +from io import BytesIO, StringIO +from subprocess import DEVNULL +from teuthology import misc as teuthology +from tasks.scrub import Scrubber +from tasks.util.rados import cmd_erasure_code_profile +from tasks.util import get_remote + +from teuthology.contextutil import safe_while +from teuthology.orchestra.remote import Remote +from teuthology.orchestra import run +from teuthology.parallel import parallel +from teuthology.exceptions import CommandFailedError +from tasks.thrasher import Thrasher + + +DEFAULT_CONF_PATH = '/etc/ceph/ceph.conf' + +log = logging.getLogger(__name__) + +# this is for cephadm clusters +def shell(ctx, cluster_name, remote, args, name=None, **kwargs): + extra_args = [] + if name: + extra_args = ['-n', name] + return remote.run( + args=[ + 'sudo', + ctx.cephadm, + '--image', ctx.ceph[cluster_name].image, + 'shell', + ] + extra_args + [ + '--fsid', ctx.ceph[cluster_name].fsid, + '--', + ] + args, + **kwargs + ) + +# this is for rook clusters +def toolbox(ctx, cluster_name, args, **kwargs): + return ctx.rook[cluster_name].remote.run( + args=[ + 'kubectl', + '-n', 'rook-ceph', + 'exec', + ctx.rook[cluster_name].toolbox, + '--', + ] + args, + **kwargs + ) + + +def write_conf(ctx, conf_path=DEFAULT_CONF_PATH, cluster='ceph'): + conf_fp = BytesIO() + ctx.ceph[cluster].conf.write(conf_fp) + conf_fp.seek(0) + writes = ctx.cluster.run( + args=[ + 'sudo', 'mkdir', '-p', '/etc/ceph', run.Raw('&&'), + 'sudo', 'chmod', '0755', '/etc/ceph', run.Raw('&&'), + 'sudo', 'tee', conf_path, run.Raw('&&'), + 'sudo', 'chmod', '0644', conf_path, + run.Raw('>'), '/dev/null', + + ], + stdin=run.PIPE, + wait=False) + teuthology.feed_many_stdins_and_close(conf_fp, writes) + run.wait(writes) + +def get_valgrind_args(testdir, name, preamble, v, exit_on_first_error=True, cd=True): + """ + Build a command line for running valgrind. + + testdir - test results directory + name - name of daemon (for naming hte log file) + preamble - stuff we should run before valgrind + v - valgrind arguments + """ + if v is None: + return preamble + if not isinstance(v, list): + v = [v] + + # https://tracker.ceph.com/issues/44362 + preamble.extend([ + 'env', 'OPENSSL_ia32cap=~0x1000000000000000', + ]) + + val_path = '/var/log/ceph/valgrind' + if '--tool=memcheck' in v or '--tool=helgrind' in v: + extra_args = [ + 'valgrind', + '--trace-children=no', + '--child-silent-after-fork=yes', + '--soname-synonyms=somalloc=*tcmalloc*', + '--num-callers=50', + '--suppressions={tdir}/valgrind.supp'.format(tdir=testdir), + '--xml=yes', + '--xml-file={vdir}/{n}.log'.format(vdir=val_path, n=name), + '--time-stamp=yes', + '--vgdb=yes', + ] + else: + extra_args = [ + 'valgrind', + '--trace-children=no', + '--child-silent-after-fork=yes', + '--soname-synonyms=somalloc=*tcmalloc*', + '--suppressions={tdir}/valgrind.supp'.format(tdir=testdir), + '--log-file={vdir}/{n}.log'.format(vdir=val_path, n=name), + '--time-stamp=yes', + '--vgdb=yes', + ] + if exit_on_first_error: + extra_args.extend([ + # at least Valgrind 3.14 is required + '--exit-on-first-error=yes', + '--error-exitcode=42', + ]) + args = [] + if cd: + args += ['cd', testdir, run.Raw('&&')] + args += preamble + extra_args + v + log.debug('running %s under valgrind with args %s', name, args) + return args + + +def mount_osd_data(ctx, remote, cluster, osd): + """ + Mount a remote OSD + + :param ctx: Context + :param remote: Remote site + :param cluster: name of ceph cluster + :param osd: Osd name + """ + log.debug('Mounting data for osd.{o} on {r}'.format(o=osd, r=remote)) + role = "{0}.osd.{1}".format(cluster, osd) + alt_role = role if cluster != 'ceph' else "osd.{0}".format(osd) + if remote in ctx.disk_config.remote_to_roles_to_dev: + if alt_role in ctx.disk_config.remote_to_roles_to_dev[remote]: + role = alt_role + if role not in ctx.disk_config.remote_to_roles_to_dev[remote]: + return + dev = ctx.disk_config.remote_to_roles_to_dev[remote][role] + mount_options = ctx.disk_config.\ + remote_to_roles_to_dev_mount_options[remote][role] + fstype = ctx.disk_config.remote_to_roles_to_dev_fstype[remote][role] + mnt = os.path.join('/var/lib/ceph/osd', '{0}-{1}'.format(cluster, osd)) + + log.info('Mounting osd.{o}: dev: {n}, cluster: {c}' + 'mountpoint: {p}, type: {t}, options: {v}'.format( + o=osd, n=remote.name, p=mnt, t=fstype, v=mount_options, + c=cluster)) + + remote.run( + args=[ + 'sudo', + 'mount', + '-t', fstype, + '-o', ','.join(mount_options), + dev, + mnt, + ] + ) + + +def log_exc(func): + @wraps(func) + def wrapper(self): + try: + return func(self) + except: + self.log(traceback.format_exc()) + raise + return wrapper + + +class PoolType: + REPLICATED = 1 + ERASURE_CODED = 3 + + +class OSDThrasher(Thrasher): + """ + Object used to thrash Ceph + """ + def __init__(self, manager, config, name, logger): + super(OSDThrasher, self).__init__() + + self.ceph_manager = manager + self.cluster = manager.cluster + self.ceph_manager.wait_for_clean() + osd_status = self.ceph_manager.get_osd_status() + self.in_osds = osd_status['in'] + self.live_osds = osd_status['live'] + self.out_osds = osd_status['out'] + self.dead_osds = osd_status['dead'] + self.stopping = False + self.logger = logger + self.config = config + self.name = name + self.revive_timeout = self.config.get("revive_timeout", 360) + self.pools_to_fix_pgp_num = set() + if self.config.get('powercycle'): + self.revive_timeout += 120 + self.clean_wait = self.config.get('clean_wait', 0) + self.minin = self.config.get("min_in", 4) + self.chance_move_pg = self.config.get('chance_move_pg', 1.0) + self.sighup_delay = self.config.get('sighup_delay') + self.optrack_toggle_delay = self.config.get('optrack_toggle_delay') + self.dump_ops_enable = self.config.get('dump_ops_enable') + self.noscrub_toggle_delay = self.config.get('noscrub_toggle_delay') + self.chance_thrash_cluster_full = self.config.get('chance_thrash_cluster_full', .05) + self.chance_thrash_pg_upmap = self.config.get('chance_thrash_pg_upmap', 1.0) + self.chance_thrash_pg_upmap_items = self.config.get('chance_thrash_pg_upmap', 1.0) + self.random_eio = self.config.get('random_eio') + self.chance_force_recovery = self.config.get('chance_force_recovery', 0.3) + + num_osds = self.in_osds + self.out_osds + self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * len(num_osds) + self.min_pgs = self.config.get("min_pgs_per_pool_osd", 1) * len(num_osds) + if self.config is None: + self.config = dict() + # prevent monitor from auto-marking things out while thrasher runs + # try both old and new tell syntax, in case we are testing old code + self.saved_options = [] + # assuming that the default settings do not vary from one daemon to + # another + first_mon = teuthology.get_first_mon(manager.ctx, self.config).split('.') + opts = [('mon', 'mon_osd_down_out_interval', 0)] + #why do we disable marking an OSD out automatically? :/ + for service, opt, new_value in opts: + old_value = manager.get_config(first_mon[0], + first_mon[1], + opt) + self.saved_options.append((service, opt, old_value)) + manager.inject_args(service, '*', opt, new_value) + # initialize ceph_objectstore_tool property - must be done before + # do_thrash is spawned - http://tracker.ceph.com/issues/18799 + if (self.config.get('powercycle') or + not self.cmd_exists_on_osds("ceph-objectstore-tool") or + self.config.get('disable_objectstore_tool_tests', False)): + self.ceph_objectstore_tool = False + if self.config.get('powercycle'): + self.log("Unable to test ceph-objectstore-tool, " + "powercycle testing") + else: + self.log("Unable to test ceph-objectstore-tool, " + "not available on all OSD nodes") + else: + self.ceph_objectstore_tool = \ + self.config.get('ceph_objectstore_tool', True) + # spawn do_thrash + self.thread = gevent.spawn(self.do_thrash) + if self.sighup_delay: + self.sighup_thread = gevent.spawn(self.do_sighup) + if self.optrack_toggle_delay: + self.optrack_toggle_thread = gevent.spawn(self.do_optrack_toggle) + if self.dump_ops_enable == "true": + self.dump_ops_thread = gevent.spawn(self.do_dump_ops) + if self.noscrub_toggle_delay: + self.noscrub_toggle_thread = gevent.spawn(self.do_noscrub_toggle) + + def log(self, msg, *args, **kwargs): + self.logger.info(msg, *args, **kwargs) + + def cmd_exists_on_osds(self, cmd): + if self.ceph_manager.cephadm or self.ceph_manager.rook: + return True + allremotes = self.ceph_manager.ctx.cluster.only(\ + teuthology.is_type('osd', self.cluster)).remotes.keys() + allremotes = list(set(allremotes)) + for remote in allremotes: + proc = remote.run(args=['type', cmd], wait=True, + check_status=False, stdout=BytesIO(), + stderr=BytesIO()) + if proc.exitstatus != 0: + return False; + return True; + + def run_ceph_objectstore_tool(self, remote, osd, cmd): + if self.ceph_manager.cephadm: + return shell( + self.ceph_manager.ctx, self.ceph_manager.cluster, remote, + args=['ceph-objectstore-tool', '--err-to-stderr'] + cmd, + name=osd, + wait=True, check_status=False, + stdout=StringIO(), + stderr=StringIO()) + elif self.ceph_manager.rook: + assert False, 'not implemented' + else: + return remote.run( + args=['sudo', 'adjust-ulimits', 'ceph-objectstore-tool', '--err-to-stderr'] + cmd, + wait=True, check_status=False, + stdout=StringIO(), + stderr=StringIO()) + + def run_ceph_bluestore_tool(self, remote, osd, cmd): + if self.ceph_manager.cephadm: + return shell( + self.ceph_manager.ctx, self.ceph_manager.cluster, remote, + args=['ceph-bluestore-tool', '--err-to-stderr'] + cmd, + name=osd, + wait=True, check_status=False, + stdout=StringIO(), + stderr=StringIO()) + elif self.ceph_manager.rook: + assert False, 'not implemented' + else: + return remote.run( + args=['sudo', 'ceph-bluestore-tool', '--err-to-stderr'] + cmd, + wait=True, check_status=False, + stdout=StringIO(), + stderr=StringIO()) + + def kill_osd(self, osd=None, mark_down=False, mark_out=False): + """ + :param osd: Osd to be killed. + :mark_down: Mark down if true. + :mark_out: Mark out if true. + """ + if osd is None: + osd = random.choice(self.live_osds) + self.log("Killing osd %s, live_osds are %s" % (str(osd), + str(self.live_osds))) + self.live_osds.remove(osd) + self.dead_osds.append(osd) + self.ceph_manager.kill_osd(osd) + if mark_down: + self.ceph_manager.mark_down_osd(osd) + if mark_out and osd in self.in_osds: + self.out_osd(osd) + if self.ceph_objectstore_tool: + self.log("Testing ceph-objectstore-tool on down osd.%s" % osd) + remote = self.ceph_manager.find_remote('osd', osd) + FSPATH = self.ceph_manager.get_filepath() + JPATH = os.path.join(FSPATH, "journal") + exp_osd = imp_osd = osd + self.log('remote for osd %s is %s' % (osd, remote)) + exp_remote = imp_remote = remote + # If an older osd is available we'll move a pg from there + if (len(self.dead_osds) > 1 and + random.random() < self.chance_move_pg): + exp_osd = random.choice(self.dead_osds[:-1]) + exp_remote = self.ceph_manager.find_remote('osd', exp_osd) + self.log('remote for exp osd %s is %s' % (exp_osd, exp_remote)) + prefix = [ + '--no-mon-config', + '--log-file=/var/log/ceph/objectstore_tool.$pid.log', + ] + + if self.ceph_manager.rook: + assert False, 'not implemented' + + if not self.ceph_manager.cephadm: + # ceph-objectstore-tool might be temporarily absent during an + # upgrade - see http://tracker.ceph.com/issues/18014 + with safe_while(sleep=15, tries=40, action="type ceph-objectstore-tool") as proceed: + while proceed(): + proc = exp_remote.run(args=['type', 'ceph-objectstore-tool'], + wait=True, check_status=False, stdout=BytesIO(), + stderr=BytesIO()) + if proc.exitstatus == 0: + break + log.debug("ceph-objectstore-tool binary not present, trying again") + + # ceph-objectstore-tool might bogusly fail with "OSD has the store locked" + # see http://tracker.ceph.com/issues/19556 + with safe_while(sleep=15, tries=40, action="ceph-objectstore-tool --op list-pgs") as proceed: + while proceed(): + proc = self.run_ceph_objectstore_tool( + exp_remote, 'osd.%s' % exp_osd, + prefix + [ + '--data-path', FSPATH.format(id=exp_osd), + '--journal-path', JPATH.format(id=exp_osd), + '--op', 'list-pgs', + ]) + if proc.exitstatus == 0: + break + elif (proc.exitstatus == 1 and + proc.stderr.getvalue() == "OSD has the store locked"): + continue + else: + raise Exception("ceph-objectstore-tool: " + "exp list-pgs failure with status {ret}". + format(ret=proc.exitstatus)) + + pgs = proc.stdout.getvalue().split('\n')[:-1] + if len(pgs) == 0: + self.log("No PGs found for osd.{osd}".format(osd=exp_osd)) + return + pg = random.choice(pgs) + #exp_path = teuthology.get_testdir(self.ceph_manager.ctx) + #exp_path = os.path.join(exp_path, '{0}.data'.format(self.cluster)) + exp_path = os.path.join('/var/log/ceph', # available inside 'shell' container + "exp.{pg}.{id}".format( + pg=pg, + id=exp_osd)) + if self.ceph_manager.cephadm: + exp_host_path = os.path.join( + '/var/log/ceph', + self.ceph_manager.ctx.ceph[self.ceph_manager.cluster].fsid, + "exp.{pg}.{id}".format( + pg=pg, + id=exp_osd)) + else: + exp_host_path = exp_path + + # export + # Can't use new export-remove op since this is part of upgrade testing + proc = self.run_ceph_objectstore_tool( + exp_remote, 'osd.%s' % exp_osd, + prefix + [ + '--data-path', FSPATH.format(id=exp_osd), + '--journal-path', JPATH.format(id=exp_osd), + '--op', 'export', + '--pgid', pg, + '--file', exp_path, + ]) + if proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "export failure with status {ret}". + format(ret=proc.exitstatus)) + # remove + proc = self.run_ceph_objectstore_tool( + exp_remote, 'osd.%s' % exp_osd, + prefix + [ + '--data-path', FSPATH.format(id=exp_osd), + '--journal-path', JPATH.format(id=exp_osd), + '--force', + '--op', 'remove', + '--pgid', pg, + ]) + if proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "remove failure with status {ret}". + format(ret=proc.exitstatus)) + # If there are at least 2 dead osds we might move the pg + if exp_osd != imp_osd: + # If pg isn't already on this osd, then we will move it there + proc = self.run_ceph_objectstore_tool( + imp_remote, + 'osd.%s' % imp_osd, + prefix + [ + '--data-path', FSPATH.format(id=imp_osd), + '--journal-path', JPATH.format(id=imp_osd), + '--op', 'list-pgs', + ]) + if proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "imp list-pgs failure with status {ret}". + format(ret=proc.exitstatus)) + pgs = proc.stdout.getvalue().split('\n')[:-1] + if pg not in pgs: + self.log("Moving pg {pg} from osd.{fosd} to osd.{tosd}". + format(pg=pg, fosd=exp_osd, tosd=imp_osd)) + if imp_remote != exp_remote: + # Copy export file to the other machine + self.log("Transfer export file from {srem} to {trem}". + format(srem=exp_remote, trem=imp_remote)) + # just in case an upgrade make /var/log/ceph unreadable by non-root, + exp_remote.run(args=['sudo', 'chmod', '777', + '/var/log/ceph']) + imp_remote.run(args=['sudo', 'chmod', '777', + '/var/log/ceph']) + tmpexport = Remote.get_file(exp_remote, exp_host_path, + sudo=True) + if exp_host_path != exp_path: + # push to /var/log/ceph, then rename (we can't + # chmod 777 the /var/log/ceph/$fsid mountpoint) + Remote.put_file(imp_remote, tmpexport, exp_path) + imp_remote.run(args=[ + 'sudo', 'mv', exp_path, exp_host_path]) + else: + Remote.put_file(imp_remote, tmpexport, exp_host_path) + os.remove(tmpexport) + else: + # Can't move the pg after all + imp_osd = exp_osd + imp_remote = exp_remote + # import + proc = self.run_ceph_objectstore_tool( + imp_remote, 'osd.%s' % imp_osd, + [ + '--data-path', FSPATH.format(id=imp_osd), + '--journal-path', JPATH.format(id=imp_osd), + '--log-file=/var/log/ceph/objectstore_tool.$pid.log', + '--op', 'import', + '--file', exp_path, + ]) + if proc.exitstatus == 1: + bogosity = "The OSD you are using is older than the exported PG" + if bogosity in proc.stderr.getvalue(): + self.log("OSD older than exported PG" + "...ignored") + elif proc.exitstatus == 10: + self.log("Pool went away before processing an import" + "...ignored") + elif proc.exitstatus == 11: + self.log("Attempt to import an incompatible export" + "...ignored") + elif proc.exitstatus == 12: + # this should be safe to ignore because we only ever move 1 + # copy of the pg at a time, and merge is only initiated when + # all replicas are peered and happy. /me crosses fingers + self.log("PG merged on target" + "...ignored") + elif proc.exitstatus: + raise Exception("ceph-objectstore-tool: " + "import failure with status {ret}". + format(ret=proc.exitstatus)) + cmd = "sudo rm -f {file}".format(file=exp_host_path) + exp_remote.run(args=cmd) + if imp_remote != exp_remote: + imp_remote.run(args=cmd) + + def blackhole_kill_osd(self, osd=None): + """ + If all else fails, kill the osd. + :param osd: Osd to be killed. + """ + if osd is None: + osd = random.choice(self.live_osds) + self.log("Blackholing and then killing osd %s, live_osds are %s" % + (str(osd), str(self.live_osds))) + self.live_osds.remove(osd) + self.dead_osds.append(osd) + self.ceph_manager.blackhole_kill_osd(osd) + + def revive_osd(self, osd=None, skip_admin_check=False): + """ + Revive the osd. + :param osd: Osd to be revived. + """ + if osd is None: + osd = random.choice(self.dead_osds) + self.log("Reviving osd %s" % (str(osd),)) + self.ceph_manager.revive_osd( + osd, + self.revive_timeout, + skip_admin_check=skip_admin_check) + self.dead_osds.remove(osd) + self.live_osds.append(osd) + if self.random_eio > 0 and osd == self.rerrosd: + self.ceph_manager.set_config(self.rerrosd, + filestore_debug_random_read_err = self.random_eio) + self.ceph_manager.set_config(self.rerrosd, + bluestore_debug_random_read_err = self.random_eio) + + + def out_osd(self, osd=None): + """ + Mark the osd out + :param osd: Osd to be marked. + """ + if osd is None: + osd = random.choice(self.in_osds) + self.log("Removing osd %s, in_osds are: %s" % + (str(osd), str(self.in_osds))) + self.ceph_manager.mark_out_osd(osd) + self.in_osds.remove(osd) + self.out_osds.append(osd) + + def in_osd(self, osd=None): + """ + Mark the osd out + :param osd: Osd to be marked. + """ + if osd is None: + osd = random.choice(self.out_osds) + if osd in self.dead_osds: + return self.revive_osd(osd) + self.log("Adding osd %s" % (str(osd),)) + self.out_osds.remove(osd) + self.in_osds.append(osd) + self.ceph_manager.mark_in_osd(osd) + self.log("Added osd %s" % (str(osd),)) + + def reweight_osd_or_by_util(self, osd=None): + """ + Reweight an osd that is in + :param osd: Osd to be marked. + """ + if osd is not None or random.choice([True, False]): + if osd is None: + osd = random.choice(self.in_osds) + val = random.uniform(.1, 1.0) + self.log("Reweighting osd %s to %s" % (str(osd), str(val))) + self.ceph_manager.raw_cluster_cmd('osd', 'reweight', + str(osd), str(val)) + else: + # do it several times, the option space is large + for i in range(5): + options = { + 'max_change': random.choice(['0.05', '1.0', '3.0']), + 'overage': random.choice(['110', '1000']), + 'type': random.choice([ + 'reweight-by-utilization', + 'test-reweight-by-utilization']), + } + self.log("Reweighting by: %s"%(str(options),)) + self.ceph_manager.raw_cluster_cmd( + 'osd', + options['type'], + options['overage'], + options['max_change']) + + def primary_affinity(self, osd=None): + self.log("primary_affinity") + if osd is None: + osd = random.choice(self.in_osds) + if random.random() >= .5: + pa = random.random() + elif random.random() >= .5: + pa = 1 + else: + pa = 0 + self.log('Setting osd %s primary_affinity to %f' % (str(osd), pa)) + self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity', + str(osd), str(pa)) + + def thrash_cluster_full(self): + """ + Set and unset cluster full condition + """ + self.log('Setting full ratio to .001') + self.ceph_manager.raw_cluster_cmd('osd', 'set-full-ratio', '.001') + time.sleep(1) + self.log('Setting full ratio back to .95') + self.ceph_manager.raw_cluster_cmd('osd', 'set-full-ratio', '.95') + + def thrash_pg_upmap(self): + """ + Install or remove random pg_upmap entries in OSDMap + """ + self.log("thrash_pg_upmap") + from random import shuffle + out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty') + j = json.loads(out) + self.log('j is %s' % j) + try: + if random.random() >= .3: + pgs = self.ceph_manager.get_pg_stats() + if not pgs: + self.log('No pgs; doing nothing') + return + pg = random.choice(pgs) + pgid = str(pg['pgid']) + poolid = int(pgid.split('.')[0]) + sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid] + if len(sizes) == 0: + self.log('No pools; doing nothing') + return + n = sizes[0] + osds = self.in_osds + self.out_osds + shuffle(osds) + osds = osds[0:n] + self.log('Setting %s to %s' % (pgid, osds)) + cmd = ['osd', 'pg-upmap', pgid] + [str(x) for x in osds] + self.log('cmd %s' % cmd) + self.ceph_manager.raw_cluster_cmd(*cmd) + else: + m = j['pg_upmap'] + if len(m) > 0: + shuffle(m) + pg = m[0]['pgid'] + self.log('Clearing pg_upmap on %s' % pg) + self.ceph_manager.raw_cluster_cmd( + 'osd', + 'rm-pg-upmap', + pg) + else: + self.log('No pg_upmap entries; doing nothing') + except CommandFailedError: + self.log('Failed to rm-pg-upmap, ignoring') + + def thrash_pg_upmap_items(self): + """ + Install or remove random pg_upmap_items entries in OSDMap + """ + self.log("thrash_pg_upmap_items") + from random import shuffle + out = self.ceph_manager.raw_cluster_cmd('osd', 'dump', '-f', 'json-pretty') + j = json.loads(out) + self.log('j is %s' % j) + try: + if random.random() >= .3: + pgs = self.ceph_manager.get_pg_stats() + if not pgs: + self.log('No pgs; doing nothing') + return + pg = random.choice(pgs) + pgid = str(pg['pgid']) + poolid = int(pgid.split('.')[0]) + sizes = [x['size'] for x in j['pools'] if x['pool'] == poolid] + if len(sizes) == 0: + self.log('No pools; doing nothing') + return + n = sizes[0] + osds = self.in_osds + self.out_osds + shuffle(osds) + osds = osds[0:n*2] + self.log('Setting %s to %s' % (pgid, osds)) + cmd = ['osd', 'pg-upmap-items', pgid] + [str(x) for x in osds] + self.log('cmd %s' % cmd) + self.ceph_manager.raw_cluster_cmd(*cmd) + else: + m = j['pg_upmap_items'] + if len(m) > 0: + shuffle(m) + pg = m[0]['pgid'] + self.log('Clearing pg_upmap on %s' % pg) + self.ceph_manager.raw_cluster_cmd( + 'osd', + 'rm-pg-upmap-items', + pg) + else: + self.log('No pg_upmap entries; doing nothing') + except CommandFailedError: + self.log('Failed to rm-pg-upmap-items, ignoring') + + def force_recovery(self): + """ + Force recovery on some of PGs + """ + backfill = random.random() >= 0.5 + j = self.ceph_manager.get_pgids_to_force(backfill) + if j: + try: + if backfill: + self.ceph_manager.raw_cluster_cmd('pg', 'force-backfill', *j) + else: + self.ceph_manager.raw_cluster_cmd('pg', 'force-recovery', *j) + except CommandFailedError: + self.log('Failed to force backfill|recovery, ignoring') + + + def cancel_force_recovery(self): + """ + Force recovery on some of PGs + """ + backfill = random.random() >= 0.5 + j = self.ceph_manager.get_pgids_to_cancel_force(backfill) + if j: + try: + if backfill: + self.ceph_manager.raw_cluster_cmd('pg', 'cancel-force-backfill', *j) + else: + self.ceph_manager.raw_cluster_cmd('pg', 'cancel-force-recovery', *j) + except CommandFailedError: + self.log('Failed to force backfill|recovery, ignoring') + + def force_cancel_recovery(self): + """ + Force or cancel forcing recovery + """ + if random.random() >= 0.4: + self.force_recovery() + else: + self.cancel_force_recovery() + + def all_up(self): + """ + Make sure all osds are up and not out. + """ + while len(self.dead_osds) > 0: + self.log("reviving osd") + self.revive_osd() + while len(self.out_osds) > 0: + self.log("inning osd") + self.in_osd() + + def all_up_in(self): + """ + Make sure all osds are up and fully in. + """ + self.all_up(); + for osd in self.live_osds: + self.ceph_manager.raw_cluster_cmd('osd', 'reweight', + str(osd), str(1)) + self.ceph_manager.raw_cluster_cmd('osd', 'primary-affinity', + str(osd), str(1)) + + def do_join(self): + """ + Break out of this Ceph loop + """ + self.stopping = True + self.thread.get() + if self.sighup_delay: + self.log("joining the do_sighup greenlet") + self.sighup_thread.get() + if self.optrack_toggle_delay: + self.log("joining the do_optrack_toggle greenlet") + self.optrack_toggle_thread.join() + if self.dump_ops_enable == "true": + self.log("joining the do_dump_ops greenlet") + self.dump_ops_thread.join() + if self.noscrub_toggle_delay: + self.log("joining the do_noscrub_toggle greenlet") + self.noscrub_toggle_thread.join() + + def grow_pool(self): + """ + Increase the size of the pool + """ + pool = self.ceph_manager.get_pool() + if pool is None: + return + self.log("Growing pool %s" % (pool,)) + if self.ceph_manager.expand_pool(pool, + self.config.get('pool_grow_by', 10), + self.max_pgs): + self.pools_to_fix_pgp_num.add(pool) + + def shrink_pool(self): + """ + Decrease the size of the pool + """ + pool = self.ceph_manager.get_pool() + if pool is None: + return + _ = self.ceph_manager.get_pool_pg_num(pool) + self.log("Shrinking pool %s" % (pool,)) + if self.ceph_manager.contract_pool( + pool, + self.config.get('pool_shrink_by', 10), + self.min_pgs): + self.pools_to_fix_pgp_num.add(pool) + + def fix_pgp_num(self, pool=None): + """ + Fix number of pgs in pool. + """ + if pool is None: + pool = self.ceph_manager.get_pool() + if not pool: + return + force = False + else: + force = True + self.log("fixing pg num pool %s" % (pool,)) + if self.ceph_manager.set_pool_pgpnum(pool, force): + self.pools_to_fix_pgp_num.discard(pool) + + def test_pool_min_size(self): + """ + Loop to selectively push PGs below their min_size and test that recovery + still occurs. + """ + self.log("test_pool_min_size") + self.all_up() + time.sleep(60) # buffer time for recovery to start. + self.ceph_manager.wait_for_recovery( + timeout=self.config.get('timeout') + ) + minout = int(self.config.get("min_out", 1)) + minlive = int(self.config.get("min_live", 2)) + mindead = int(self.config.get("min_dead", 1)) + self.log("doing min_size thrashing") + self.ceph_manager.wait_for_clean(timeout=180) + assert self.ceph_manager.is_clean(), \ + 'not clean before minsize thrashing starts' + while not self.stopping: + # look up k and m from all the pools on each loop, in case it + # changes as the cluster runs + k = 0 + m = 99 + has_pools = False + pools_json = self.ceph_manager.get_osd_dump_json()['pools'] + + for pool_json in pools_json: + pool = pool_json['pool_name'] + has_pools = True + pool_type = pool_json['type'] # 1 for rep, 3 for ec + min_size = pool_json['min_size'] + self.log("pool {pool} min_size is {min_size}".format(pool=pool,min_size=min_size)) + try: + ec_profile = self.ceph_manager.get_pool_property(pool, 'erasure_code_profile') + if pool_type != PoolType.ERASURE_CODED: + continue + ec_profile = pool_json['erasure_code_profile'] + ec_profile_json = self.ceph_manager.raw_cluster_cmd( + 'osd', + 'erasure-code-profile', + 'get', + ec_profile, + '--format=json') + ec_json = json.loads(ec_profile_json) + local_k = int(ec_json['k']) + local_m = int(ec_json['m']) + self.log("pool {pool} local_k={k} local_m={m}".format(pool=pool, + k=local_k, m=local_m)) + if local_k > k: + self.log("setting k={local_k} from previous {k}".format(local_k=local_k, k=k)) + k = local_k + if local_m < m: + self.log("setting m={local_m} from previous {m}".format(local_m=local_m, m=m)) + m = local_m + except CommandFailedError: + self.log("failed to read erasure_code_profile. %s was likely removed", pool) + continue + + if has_pools : + self.log("using k={k}, m={m}".format(k=k,m=m)) + else: + self.log("No pools yet, waiting") + time.sleep(5) + continue + + if minout > len(self.out_osds): # kill OSDs and mark out + self.log("forced to out an osd") + self.kill_osd(mark_out=True) + continue + elif mindead > len(self.dead_osds): # kill OSDs but force timeout + self.log("forced to kill an osd") + self.kill_osd() + continue + else: # make mostly-random choice to kill or revive OSDs + minup = max(minlive, k) + rand_val = random.uniform(0, 1) + self.log("choosing based on number of live OSDs and rand val {rand}".\ + format(rand=rand_val)) + if len(self.live_osds) > minup+1 and rand_val < 0.5: + # chose to knock out as many OSDs as we can w/out downing PGs + + most_killable = min(len(self.live_osds) - minup, m) + self.log("chose to kill {n} OSDs".format(n=most_killable)) + for i in range(1, most_killable): + self.kill_osd(mark_out=True) + time.sleep(10) + # try a few times since there might be a concurrent pool + # creation or deletion + with safe_while( + sleep=25, tries=5, + action='check for active or peered') as proceed: + while proceed(): + if self.ceph_manager.all_active_or_peered(): + break + self.log('not all PGs are active or peered') + else: # chose to revive OSDs, bring up a random fraction of the dead ones + self.log("chose to revive osds") + for i in range(1, int(rand_val * len(self.dead_osds))): + self.revive_osd(i) + + # let PGs repair themselves or our next knockout might kill one + self.ceph_manager.wait_for_clean(timeout=self.config.get('timeout')) + + # / while not self.stopping + self.all_up_in() + + self.ceph_manager.wait_for_recovery( + timeout=self.config.get('timeout') + ) + + def inject_pause(self, conf_key, duration, check_after, should_be_down): + """ + Pause injection testing. Check for osd being down when finished. + """ + the_one = random.choice(self.live_osds) + self.log("inject_pause on osd.{osd}".format(osd=the_one)) + self.log( + "Testing {key} pause injection for duration {duration}".format( + key=conf_key, + duration=duration + )) + self.log( + "Checking after {after}, should_be_down={shouldbedown}".format( + after=check_after, + shouldbedown=should_be_down + )) + self.ceph_manager.set_config(the_one, **{conf_key: duration}) + if not should_be_down: + return + time.sleep(check_after) + status = self.ceph_manager.get_osd_status() + assert the_one in status['down'] + time.sleep(duration - check_after + 20) + status = self.ceph_manager.get_osd_status() + assert not the_one in status['down'] + + def test_backfill_full(self): + """ + Test backfills stopping when the replica fills up. + + First, use injectfull admin command to simulate a now full + osd by setting it to 0 on all of the OSDs. + + Second, on a random subset, set + osd_debug_skip_full_check_in_backfill_reservation to force + the more complicated check in do_scan to be exercised. + + Then, verify that all backfillings stop. + """ + self.log("injecting backfill full") + for i in self.live_osds: + self.ceph_manager.set_config( + i, + osd_debug_skip_full_check_in_backfill_reservation= + random.choice(['false', 'true'])) + self.ceph_manager.osd_admin_socket(i, command=['injectfull', 'backfillfull'], + check_status=True, timeout=30, stdout=DEVNULL) + for i in range(30): + status = self.ceph_manager.compile_pg_status() + if 'backfilling' not in status.keys(): + break + self.log( + "waiting for {still_going} backfillings".format( + still_going=status.get('backfilling'))) + time.sleep(1) + assert('backfilling' not in self.ceph_manager.compile_pg_status().keys()) + for i in self.live_osds: + self.ceph_manager.set_config( + i, + osd_debug_skip_full_check_in_backfill_reservation='false') + self.ceph_manager.osd_admin_socket(i, command=['injectfull', 'none'], + check_status=True, timeout=30, stdout=DEVNULL) + + + def generate_random_sharding(self): + prefixes = [ + 'm','O','P','L' + ] + new_sharding = '' + for prefix in prefixes: + choose = random.choice([False, True]) + if not choose: + continue + if new_sharding != '': + new_sharding = new_sharding + ' ' + columns = random.randint(1, 5) + do_hash = random.choice([False, True]) + if do_hash: + low_hash = random.choice([0, 5, 8]) + do_high_hash = random.choice([False, True]) + if do_high_hash: + high_hash = random.choice([8, 16, 30]) + low_hash + new_sharding = new_sharding + prefix + '(' + str(columns) + ',' + str(low_hash) + '-' + str(high_hash) + ')' + else: + new_sharding = new_sharding + prefix + '(' + str(columns) + ',' + str(low_hash) + '-)' + else: + if columns == 1: + new_sharding = new_sharding + prefix + else: + new_sharding = new_sharding + prefix + '(' + str(columns) + ')' + return new_sharding + + def test_bluestore_reshard_action(self): + """ + Test if resharding of bluestore works properly. + If bluestore is not used, or bluestore is in version that + does not support sharding, skip. + """ + + osd = random.choice(self.dead_osds) + remote = self.ceph_manager.find_remote('osd', osd) + FSPATH = self.ceph_manager.get_filepath() + + prefix = [ + '--no-mon-config', + '--log-file=/var/log/ceph/bluestore_tool.$pid.log', + '--log-level=10', + '--path', FSPATH.format(id=osd) + ] + + # sanity check if bluestore-tool accessible + self.log('checking if target objectstore is bluestore on osd.%s' % osd) + cmd = prefix + [ + 'show-label' + ] + proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd) + if proc.exitstatus != 0: + raise Exception("ceph-bluestore-tool access failed.") + + # check if sharding is possible + self.log('checking if target bluestore supports sharding on osd.%s' % osd) + cmd = prefix + [ + 'show-sharding' + ] + proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd) + if proc.exitstatus != 0: + self.log("Unable to test resharding, " + "ceph-bluestore-tool does not support it.") + return + + # now go for reshard to something else + self.log('applying new sharding to bluestore on osd.%s' % osd) + new_sharding = self.config.get('bluestore_new_sharding','random') + + if new_sharding == 'random': + self.log('generate random sharding') + new_sharding = self.generate_random_sharding() + + self.log("applying new sharding: " + new_sharding) + cmd = prefix + [ + '--sharding', new_sharding, + 'reshard' + ] + proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd) + if proc.exitstatus != 0: + raise Exception("ceph-bluestore-tool resharding failed.") + + # now do fsck to + self.log('running fsck to verify new sharding on osd.%s' % osd) + cmd = prefix + [ + 'fsck' + ] + proc = self.run_ceph_bluestore_tool(remote, 'osd.%s' % osd, cmd) + if proc.exitstatus != 0: + raise Exception("ceph-bluestore-tool fsck failed.") + self.log('resharding successfully completed') + + def test_bluestore_reshard(self): + """ + 1) kills an osd + 2) reshards bluestore on killed osd + 3) revives the osd + """ + self.log('test_bluestore_reshard started') + self.kill_osd(mark_down=True, mark_out=True) + self.test_bluestore_reshard_action() + self.revive_osd() + self.log('test_bluestore_reshard completed') + + + def test_map_discontinuity(self): + """ + 1) Allows the osds to recover + 2) kills an osd + 3) allows the remaining osds to recover + 4) waits for some time + 5) revives the osd + This sequence should cause the revived osd to have to handle + a map gap since the mons would have trimmed + """ + self.log("test_map_discontinuity") + while len(self.in_osds) < (self.minin + 1): + self.in_osd() + self.log("Waiting for recovery") + self.ceph_manager.wait_for_all_osds_up( + timeout=self.config.get('timeout') + ) + # now we wait 20s for the pg status to change, if it takes longer, + # the test *should* fail! + time.sleep(20) + self.ceph_manager.wait_for_clean( + timeout=self.config.get('timeout') + ) + + # now we wait 20s for the backfill replicas to hear about the clean + time.sleep(20) + self.log("Recovered, killing an osd") + self.kill_osd(mark_down=True, mark_out=True) + self.log("Waiting for clean again") + self.ceph_manager.wait_for_clean( + timeout=self.config.get('timeout') + ) + self.log("Waiting for trim") + time.sleep(int(self.config.get("map_discontinuity_sleep_time", 40))) + self.revive_osd() + + def choose_action(self): + """ + Random action selector. + """ + chance_down = self.config.get('chance_down', 0.4) + _ = self.config.get('chance_test_min_size', 0) + chance_test_backfill_full = \ + self.config.get('chance_test_backfill_full', 0) + if isinstance(chance_down, int): + chance_down = float(chance_down) / 100 + minin = self.minin + minout = int(self.config.get("min_out", 0)) + minlive = int(self.config.get("min_live", 2)) + mindead = int(self.config.get("min_dead", 0)) + + self.log('choose_action: min_in %d min_out ' + '%d min_live %d min_dead %d ' + 'chance_down %.2f' % + (minin, minout, minlive, mindead, chance_down)) + actions = [] + if len(self.in_osds) > minin: + actions.append((self.out_osd, 1.0,)) + if len(self.live_osds) > minlive and chance_down > 0: + actions.append((self.kill_osd, chance_down,)) + if len(self.out_osds) > minout: + actions.append((self.in_osd, 1.7,)) + if len(self.dead_osds) > mindead: + actions.append((self.revive_osd, 1.0,)) + if self.config.get('thrash_primary_affinity', True): + actions.append((self.primary_affinity, 1.0,)) + actions.append((self.reweight_osd_or_by_util, + self.config.get('reweight_osd', .5),)) + actions.append((self.grow_pool, + self.config.get('chance_pgnum_grow', 0),)) + actions.append((self.shrink_pool, + self.config.get('chance_pgnum_shrink', 0),)) + actions.append((self.fix_pgp_num, + self.config.get('chance_pgpnum_fix', 0),)) + actions.append((self.test_pool_min_size, + self.config.get('chance_test_min_size', 0),)) + actions.append((self.test_backfill_full, + chance_test_backfill_full,)) + if self.chance_thrash_cluster_full > 0: + actions.append((self.thrash_cluster_full, self.chance_thrash_cluster_full,)) + if self.chance_thrash_pg_upmap > 0: + actions.append((self.thrash_pg_upmap, self.chance_thrash_pg_upmap,)) + if self.chance_thrash_pg_upmap_items > 0: + actions.append((self.thrash_pg_upmap_items, self.chance_thrash_pg_upmap_items,)) + if self.chance_force_recovery > 0: + actions.append((self.force_cancel_recovery, self.chance_force_recovery)) + + for key in ['heartbeat_inject_failure', 'filestore_inject_stall']: + for scenario in [ + (lambda: + self.inject_pause(key, + self.config.get('pause_short', 3), + 0, + False), + self.config.get('chance_inject_pause_short', 1),), + (lambda: + self.inject_pause(key, + self.config.get('pause_long', 80), + self.config.get('pause_check_after', 70), + True), + self.config.get('chance_inject_pause_long', 0),)]: + actions.append(scenario) + + # only consider resharding if objectstore is bluestore + cluster_name = self.ceph_manager.cluster + cluster = self.ceph_manager.ctx.ceph[cluster_name] + if cluster.conf.get('osd', {}).get('osd objectstore', 'bluestore') == 'bluestore': + actions.append((self.test_bluestore_reshard, + self.config.get('chance_bluestore_reshard', 0),)) + + total = sum([y for (x, y) in actions]) + val = random.uniform(0, total) + for (action, prob) in actions: + if val < prob: + return action + val -= prob + return None + + def do_thrash(self): + """ + _do_thrash() wrapper. + """ + try: + self._do_thrash() + except Exception as e: + # See _run exception comment for MDSThrasher + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # Allow successful completion so gevent doesn't see an exception. + # The DaemonWatchdog will observe the error and tear down the test. + + @log_exc + def do_sighup(self): + """ + Loops and sends signal.SIGHUP to a random live osd. + + Loop delay is controlled by the config value sighup_delay. + """ + delay = float(self.sighup_delay) + self.log("starting do_sighup with a delay of {0}".format(delay)) + while not self.stopping: + osd = random.choice(self.live_osds) + self.ceph_manager.signal_osd(osd, signal.SIGHUP, silent=True) + time.sleep(delay) + + @log_exc + def do_optrack_toggle(self): + """ + Loops and toggle op tracking to all osds. + + Loop delay is controlled by the config value optrack_toggle_delay. + """ + delay = float(self.optrack_toggle_delay) + osd_state = "true" + self.log("starting do_optrack_toggle with a delay of {0}".format(delay)) + while not self.stopping: + if osd_state == "true": + osd_state = "false" + else: + osd_state = "true" + try: + self.ceph_manager.inject_args('osd', '*', + 'osd_enable_op_tracker', + osd_state) + except CommandFailedError: + self.log('Failed to tell all osds, ignoring') + gevent.sleep(delay) + + @log_exc + def do_dump_ops(self): + """ + Loops and does op dumps on all osds + """ + self.log("starting do_dump_ops") + while not self.stopping: + for osd in self.live_osds: + # Ignore errors because live_osds is in flux + self.ceph_manager.osd_admin_socket(osd, command=['dump_ops_in_flight'], + check_status=False, timeout=30, stdout=DEVNULL) + self.ceph_manager.osd_admin_socket(osd, command=['dump_blocked_ops'], + check_status=False, timeout=30, stdout=DEVNULL) + self.ceph_manager.osd_admin_socket(osd, command=['dump_historic_ops'], + check_status=False, timeout=30, stdout=DEVNULL) + gevent.sleep(0) + + @log_exc + def do_noscrub_toggle(self): + """ + Loops and toggle noscrub flags + + Loop delay is controlled by the config value noscrub_toggle_delay. + """ + delay = float(self.noscrub_toggle_delay) + scrub_state = "none" + self.log("starting do_noscrub_toggle with a delay of {0}".format(delay)) + while not self.stopping: + if scrub_state == "none": + self.ceph_manager.raw_cluster_cmd('osd', 'set', 'noscrub') + scrub_state = "noscrub" + elif scrub_state == "noscrub": + self.ceph_manager.raw_cluster_cmd('osd', 'set', 'nodeep-scrub') + scrub_state = "both" + elif scrub_state == "both": + self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'noscrub') + scrub_state = "nodeep-scrub" + else: + self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub') + scrub_state = "none" + gevent.sleep(delay) + self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'noscrub') + self.ceph_manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub') + + @log_exc + def _do_thrash(self): + """ + Loop to select random actions to thrash ceph manager with. + """ + cleanint = self.config.get("clean_interval", 60) + scrubint = self.config.get("scrub_interval", -1) + maxdead = self.config.get("max_dead", 0) + delay = self.config.get("op_delay", 5) + self.rerrosd = self.live_osds[0] + if self.random_eio > 0: + self.ceph_manager.inject_args('osd', self.rerrosd, + 'filestore_debug_random_read_err', + self.random_eio) + self.ceph_manager.inject_args('osd', self.rerrosd, + 'bluestore_debug_random_read_err', + self.random_eio) + self.log("starting do_thrash") + while not self.stopping: + to_log = [str(x) for x in ["in_osds: ", self.in_osds, + "out_osds: ", self.out_osds, + "dead_osds: ", self.dead_osds, + "live_osds: ", self.live_osds]] + self.log(" ".join(to_log)) + if random.uniform(0, 1) < (float(delay) / cleanint): + while len(self.dead_osds) > maxdead: + self.revive_osd() + for osd in self.in_osds: + self.ceph_manager.raw_cluster_cmd('osd', 'reweight', + str(osd), str(1)) + if random.uniform(0, 1) < float( + self.config.get('chance_test_map_discontinuity', 0)) \ + and len(self.live_osds) > 5: # avoid m=2,k=2 stall, w/ some buffer for crush being picky + self.test_map_discontinuity() + else: + self.ceph_manager.wait_for_recovery( + timeout=self.config.get('timeout') + ) + time.sleep(self.clean_wait) + if scrubint > 0: + if random.uniform(0, 1) < (float(delay) / scrubint): + self.log('Scrubbing while thrashing being performed') + Scrubber(self.ceph_manager, self.config) + self.choose_action()() + time.sleep(delay) + self.all_up() + if self.random_eio > 0: + self.ceph_manager.inject_args('osd', self.rerrosd, + 'filestore_debug_random_read_err', '0.0') + self.ceph_manager.inject_args('osd', self.rerrosd, + 'bluestore_debug_random_read_err', '0.0') + for pool in list(self.pools_to_fix_pgp_num): + if self.ceph_manager.get_pool_pg_num(pool) > 0: + self.fix_pgp_num(pool) + self.pools_to_fix_pgp_num.clear() + for service, opt, saved_value in self.saved_options: + self.ceph_manager.inject_args(service, '*', opt, saved_value) + self.saved_options = [] + self.all_up_in() + + +class ObjectStoreTool: + + def __init__(self, manager, pool, **kwargs): + self.manager = manager + self.pool = pool + self.osd = kwargs.get('osd', None) + self.object_name = kwargs.get('object_name', None) + self.do_revive = kwargs.get('do_revive', True) + if self.osd and self.pool and self.object_name: + if self.osd == "primary": + self.osd = self.manager.get_object_primary(self.pool, + self.object_name) + assert self.osd is not None + if self.object_name: + self.pgid = self.manager.get_object_pg_with_shard(self.pool, + self.object_name, + self.osd) + self.remote = next(iter(self.manager.ctx.\ + cluster.only('osd.{o}'.format(o=self.osd)).remotes.keys())) + path = self.manager.get_filepath().format(id=self.osd) + self.paths = ("--data-path {path} --journal-path {path}/journal". + format(path=path)) + + def build_cmd(self, options, args, stdin): + lines = [] + if self.object_name: + lines.append("object=$(sudo adjust-ulimits ceph-objectstore-tool " + "{paths} --pgid {pgid} --op list |" + "grep '\"oid\":\"{name}\"')". + format(paths=self.paths, + pgid=self.pgid, + name=self.object_name)) + args = '"$object" ' + args + options += " --pgid {pgid}".format(pgid=self.pgid) + cmd = ("sudo adjust-ulimits ceph-objectstore-tool {paths} {options} {args}". + format(paths=self.paths, + args=args, + options=options)) + if stdin: + cmd = ("echo {payload} | base64 --decode | {cmd}". + format(payload=base64.encode(stdin), + cmd=cmd)) + lines.append(cmd) + return "\n".join(lines) + + def run(self, options, args): + self.manager.kill_osd(self.osd) + cmd = self.build_cmd(options, args, None) + self.manager.log(cmd) + try: + proc = self.remote.run(args=['bash', '-e', '-x', '-c', cmd], + check_status=False, + stdout=BytesIO(), + stderr=BytesIO()) + proc.wait() + if proc.exitstatus != 0: + self.manager.log("failed with " + str(proc.exitstatus)) + error = proc.stdout.getvalue().decode() + " " + \ + proc.stderr.getvalue().decode() + raise Exception(error) + finally: + if self.do_revive: + self.manager.revive_osd(self.osd) + self.manager.wait_till_osd_is_up(self.osd, 300) + + +# XXX: this class has nothing to do with the Ceph daemon (ceph-mgr) of +# the same name. +class CephManager: + """ + Ceph manager object. + Contains several local functions that form a bulk of this module. + + :param controller: the remote machine where the Ceph commands should be + executed + :param ctx: the cluster context + :param config: path to Ceph config file + :param logger: for logging messages + :param cluster: name of the Ceph cluster + """ + + def __init__(self, controller, ctx=None, config=None, logger=None, + cluster='ceph', cephadm=False, rook=False) -> None: + self.lock = threading.RLock() + self.ctx = ctx + self.config = config + self.controller = controller + self.next_pool_id = 0 + self.cluster = cluster + + if (logger): + self.log = lambda x: logger.info(x) + else: + def tmp(x): + """ + implement log behavior. + """ + print(x) + self.log = tmp + + if self.config is None: + self.config = dict() + + # NOTE: These variables are meant to be overriden by vstart_runner.py. + self.rook = rook + self.cephadm = cephadm + self.testdir = teuthology.get_testdir(self.ctx) + # prefix args for ceph cmds to be executed + pre = ['adjust-ulimits', 'ceph-coverage', + f'{self.testdir}/archive/coverage'] + self.CEPH_CMD = ['sudo'] + pre + ['timeout', '120', 'ceph', + '--cluster', self.cluster] + self.RADOS_CMD = pre + ['rados', '--cluster', self.cluster] + self.run_ceph_w_prefix = ['sudo', 'daemon-helper', 'kill', 'ceph', + '--cluster', self.cluster] + + pools = self.list_pools() + self.pools = {} + for pool in pools: + # we may race with a pool deletion; ignore failures here + try: + self.pools[pool] = self.get_pool_int_property(pool, 'pg_num') + except CommandFailedError: + self.log('Failed to get pg_num from pool %s, ignoring' % pool) + + def ceph(self, cmd, **kwargs): + """ + Simple Ceph admin command wrapper around run_cluster_cmd. + """ + + kwargs.pop('args', None) + args = shlex.split(cmd) + stdout = kwargs.pop('stdout', StringIO()) + stderr = kwargs.pop('stderr', StringIO()) + return self.run_cluster_cmd(args=args, stdout=stdout, stderr=stderr, **kwargs) + + def run_cluster_cmd(self, **kwargs): + """ + Run a Ceph command and return the object representing the process + for the command. + + Accepts arguments same as that of teuthology.orchestra.run.run() + """ + if isinstance(kwargs['args'], str): + kwargs['args'] = shlex.split(kwargs['args']) + elif isinstance(kwargs['args'], tuple): + kwargs['args'] = list(kwargs['args']) + + prefixcmd = [] + timeoutcmd = kwargs.pop('timeoutcmd', None) + if timeoutcmd is not None: + prefixcmd += ['timeout', str(timeoutcmd)] + + if self.cephadm: + prefixcmd += ['ceph'] + cmd = prefixcmd + list(kwargs['args']) + return shell(self.ctx, self.cluster, self.controller, + args=cmd, + stdout=StringIO(), + check_status=kwargs.get('check_status', True)) + elif self.rook: + prefixcmd += ['ceph'] + cmd = prefixcmd + list(kwargs['args']) + return toolbox(self.ctx, self.cluster, + args=cmd, + stdout=StringIO(), + check_status=kwargs.get('check_status', True)) + else: + kwargs['args'] = prefixcmd + self.CEPH_CMD + kwargs['args'] + return self.controller.run(**kwargs) + + def raw_cluster_cmd(self, *args, **kwargs) -> str: + """ + Start ceph on a raw cluster. Return count + """ + if kwargs.get('args') is None and args: + kwargs['args'] = args + kwargs['stdout'] = kwargs.pop('stdout', StringIO()) + return self.run_cluster_cmd(**kwargs).stdout.getvalue() + + def raw_cluster_cmd_result(self, *args, **kwargs): + """ + Start ceph on a cluster. Return success or failure information. + """ + if kwargs.get('args') is None and args: + kwargs['args'] = args + kwargs['check_status'] = False + return self.run_cluster_cmd(**kwargs).exitstatus + + def get_keyring(self, client_id): + """ + Return keyring for the given client. + + :param client_id: str + :return keyring: str + """ + if client_id.find('client.') != -1: + client_id = client_id.replace('client.', '') + + keyring = self.run_cluster_cmd(args=f'auth get client.{client_id}', + stdout=StringIO()).\ + stdout.getvalue().strip() + + assert isinstance(keyring, str) and keyring != '' + return keyring + + def run_ceph_w(self, watch_channel=None): + """ + Execute "ceph -w" in the background with stdout connected to a BytesIO, + and return the RemoteProcess. + + :param watch_channel: Specifies the channel to be watched. This can be + 'cluster', 'audit', ... + :type watch_channel: str + """ + args = self.run_ceph_w_prefix + ['-w'] + if watch_channel is not None: + args.append("--watch-channel") + args.append(watch_channel) + return self.controller.run(args=args, wait=False, stdout=StringIO(), stdin=run.PIPE) + + def get_mon_socks(self): + """ + Get monitor sockets. + + :return socks: tuple of strings; strings are individual sockets. + """ + from json import loads + + output = loads(self.raw_cluster_cmd(['--format=json', 'mon', 'dump'])) + socks = [] + for mon in output['mons']: + for addrvec_mem in mon['public_addrs']['addrvec']: + socks.append(addrvec_mem['addr']) + return tuple(socks) + + def get_msgrv1_mon_socks(self): + """ + Get monitor sockets that use msgrv1 to operate. + + :return socks: tuple of strings; strings are individual sockets. + """ + from json import loads + + output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump')) + socks = [] + for mon in output['mons']: + for addrvec_mem in mon['public_addrs']['addrvec']: + if addrvec_mem['type'] == 'v1': + socks.append(addrvec_mem['addr']) + return tuple(socks) + + def get_msgrv2_mon_socks(self): + """ + Get monitor sockets that use msgrv2 to operate. + + :return socks: tuple of strings; strings are individual sockets. + """ + from json import loads + + output = loads(self.raw_cluster_cmd('--format=json', 'mon', 'dump')) + socks = [] + for mon in output['mons']: + for addrvec_mem in mon['public_addrs']['addrvec']: + if addrvec_mem['type'] == 'v2': + socks.append(addrvec_mem['addr']) + return tuple(socks) + + def flush_pg_stats(self, osds, no_wait=None, wait_for_mon=300): + """ + Flush pg stats from a list of OSD ids, ensuring they are reflected + all the way to the monitor. Luminous and later only. + + :param osds: list of OSDs to flush + :param no_wait: list of OSDs not to wait for seq id. by default, we + wait for all specified osds, but some of them could be + moved out of osdmap, so we cannot get their updated + stat seq from monitor anymore. in that case, you need + to pass a blocklist. + :param wait_for_mon: wait for mon to be synced with mgr. 0 to disable + it. (5 min by default) + """ + if no_wait is None: + no_wait = [] + + def flush_one_osd(osd: int, wait_for_mon: int): + need = int(self.raw_cluster_cmd('tell', 'osd.%d' % osd, 'flush_pg_stats')) + if not wait_for_mon: + return + if osd in no_wait: + return + got = 0 + while wait_for_mon > 0: + got = int(self.raw_cluster_cmd('osd', 'last-stat-seq', 'osd.%d' % osd)) + self.log('need seq {need} got {got} for osd.{osd}'.format( + need=need, got=got, osd=osd)) + if got >= need: + break + A_WHILE = 1 + time.sleep(A_WHILE) + wait_for_mon -= A_WHILE + else: + raise Exception('timed out waiting for mon to be updated with ' + 'osd.{osd}: {got} < {need}'. + format(osd=osd, got=got, need=need)) + + with parallel() as p: + for osd in osds: + p.spawn(flush_one_osd, osd, wait_for_mon) + + def flush_all_pg_stats(self): + self.flush_pg_stats(range(len(self.get_osd_dump()))) + + def do_rados(self, cmd, pool=None, namespace=None, remote=None, **kwargs): + """ + Execute a remote rados command. + """ + if remote is None: + remote = self.controller + + pre = self.RADOS_CMD + [] # deep-copying! + if pool is not None: + pre += ['--pool', pool] + if namespace is not None: + pre += ['--namespace', namespace] + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=True, + **kwargs + ) + return proc + + def rados_write_objects(self, pool, num_objects, size, + timelimit, threads, cleanup=False): + """ + Write rados objects + Threads not used yet. + """ + args = [ + '--num-objects', num_objects, + '-b', size, + 'bench', timelimit, + 'write' + ] + if not cleanup: + args.append('--no-cleanup') + return self.do_rados(map(str, args), pool=pool) + + def do_put(self, pool, obj, fname, namespace=None): + """ + Implement rados put operation + """ + args = ['put', obj, fname] + return self.do_rados( + args, + check_status=False, + pool=pool, + namespace=namespace + ).exitstatus + + def do_get(self, pool, obj, fname='/dev/null', namespace=None): + """ + Implement rados get operation + """ + args = ['get', obj, fname] + return self.do_rados( + args, + check_status=False, + pool=pool, + namespace=namespace, + ).exitstatus + + def do_rm(self, pool, obj, namespace=None): + """ + Implement rados rm operation + """ + args = ['rm', obj] + return self.do_rados( + args, + check_status=False, + pool=pool, + namespace=namespace + ).exitstatus + + def osd_admin_socket(self, osd_id, command, check_status=True, timeout=0, stdout=None): + if stdout is None: + stdout = StringIO() + return self.admin_socket('osd', osd_id, command, check_status, timeout, stdout) + + def find_remote(self, service_type, service_id): + """ + Get the Remote for the host where a particular service runs. + + :param service_type: 'mds', 'osd', 'client' + :param service_id: The second part of a role, e.g. '0' for + the role 'client.0' + :return: a Remote instance for the host where the + requested role is placed + """ + return get_remote(self.ctx, self.cluster, + service_type, service_id) + + def admin_socket(self, service_type, service_id, + command, check_status=True, timeout=0, stdout=None): + """ + Remotely start up ceph specifying the admin socket + :param command: a list of words to use as the command + to the admin socket + """ + if stdout is None: + stdout = StringIO() + + remote = self.find_remote(service_type, service_id) + + if self.cephadm: + return shell( + self.ctx, self.cluster, remote, + args=[ + 'ceph', 'daemon', '%s.%s' % (service_type, service_id), + ] + command, + stdout=stdout, + wait=True, + check_status=check_status, + ) + if self.rook: + assert False, 'not implemented' + + args = [ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + f'{self.testdir}/archive/coverage', + 'timeout', + str(timeout), + 'ceph', + '--cluster', + self.cluster, + '--admin-daemon', + '/var/run/ceph/{cluster}-{type}.{id}.asok'.format( + cluster=self.cluster, + type=service_type, + id=service_id), + ] + args.extend(command) + return remote.run( + args=args, + stdout=stdout, + wait=True, + check_status=check_status + ) + + def objectstore_tool(self, pool, options, args, **kwargs): + return ObjectStoreTool(self, pool, **kwargs).run(options, args) + + def get_pgid(self, pool, pgnum): + """ + :param pool: pool name + :param pgnum: pg number + :returns: a string representing this pg. + """ + poolnum = self.get_pool_num(pool) + pg_str = "{poolnum}.{pgnum}".format( + poolnum=poolnum, + pgnum=pgnum) + return pg_str + + def get_pg_replica(self, pool, pgnum): + """ + get replica for pool, pgnum (e.g. (data, 0)->0 + """ + pg_str = self.get_pgid(pool, pgnum) + output = self.raw_cluster_cmd("pg", "map", pg_str, '--format=json') + j = json.loads('\n'.join(output.split('\n')[1:])) + return int(j['acting'][-1]) + assert False + + def wait_for_pg_stats(func): + # both osd_mon_report_interval and mgr_stats_period are 5 seconds + # by default, and take the faulty injection in ms into consideration, + # 12 seconds are more than enough + delays = [1, 1, 2, 3, 5, 8, 13, 0] + @wraps(func) + def wrapper(self, *args, **kwargs): + exc = None + for delay in delays: + try: + return func(self, *args, **kwargs) + except AssertionError as e: + time.sleep(delay) + exc = e + raise exc + return wrapper + + def get_pg_primary(self, pool, pgnum): + """ + get primary for pool, pgnum (e.g. (data, 0)->0 + """ + pg_str = self.get_pgid(pool, pgnum) + output = self.raw_cluster_cmd("pg", "map", pg_str, '--format=json') + j = json.loads('\n'.join(output.split('\n')[1:])) + return int(j['acting'][0]) + assert False + + def get_pool_num(self, pool): + """ + get number for pool (e.g., data -> 2) + """ + return int(self.get_pool_dump(pool)['pool']) + + def list_pools(self): + """ + list all pool names + """ + osd_dump = self.get_osd_dump_json() + self.log(osd_dump['pools']) + return [str(i['pool_name']) for i in osd_dump['pools']] + + def clear_pools(self): + """ + remove all pools + """ + [self.remove_pool(i) for i in self.list_pools()] + + def kick_recovery_wq(self, osdnum): + """ + Run kick_recovery_wq on cluster. + """ + return self.raw_cluster_cmd( + 'tell', "osd.%d" % (int(osdnum),), + 'debug', + 'kick_recovery_wq', + '0') + + def wait_run_admin_socket(self, service_type, + service_id, args=['version'], timeout=75, stdout=None): + """ + If osd_admin_socket call succeeds, return. Otherwise wait + five seconds and try again. + """ + if stdout is None: + stdout = StringIO() + tries = 0 + while True: + proc = self.admin_socket(service_type, service_id, + args, check_status=False, stdout=stdout) + if proc.exitstatus == 0: + return proc + else: + tries += 1 + if (tries * 5) > timeout: + raise Exception('timed out waiting for admin_socket ' + 'to appear after {type}.{id} restart'. + format(type=service_type, + id=service_id)) + self.log("waiting on admin_socket for {type}-{id}, " + "{command}".format(type=service_type, + id=service_id, + command=args)) + time.sleep(5) + + def get_pool_dump(self, pool): + """ + get the osd dump part of a pool + """ + osd_dump = self.get_osd_dump_json() + for i in osd_dump['pools']: + if i['pool_name'] == pool: + return i + assert False + + def get_config(self, service_type, service_id, name): + """ + :param node: like 'mon.a' + :param name: the option name + """ + proc = self.wait_run_admin_socket(service_type, service_id, + ['config', 'show']) + j = json.loads(proc.stdout.getvalue()) + return j[name] + + def inject_args(self, service_type, service_id, name, value): + whom = '{0}.{1}'.format(service_type, service_id) + if isinstance(value, bool): + value = 'true' if value else 'false' + opt_arg = '--{name}={value}'.format(name=name, value=value) + self.raw_cluster_cmd('--', 'tell', whom, 'injectargs', opt_arg) + + def set_config(self, osdnum, **argdict): + """ + :param osdnum: osd number + :param argdict: dictionary containing values to set. + """ + for k, v in argdict.items(): + self.wait_run_admin_socket( + 'osd', osdnum, + ['config', 'set', str(k), str(v)]) + + def raw_cluster_status(self): + """ + Get status from cluster + """ + status = self.raw_cluster_cmd('status', '--format=json') + return json.loads(status) + + def raw_osd_status(self): + """ + Get osd status from cluster + """ + return self.raw_cluster_cmd('osd', 'dump') + + def get_osd_status(self): + """ + Get osd statuses sorted by states that the osds are in. + """ + osd_lines = list(filter( + lambda x: x.startswith('osd.') and (("up" in x) or ("down" in x)), + self.raw_osd_status().split('\n'))) + self.log(osd_lines) + in_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " in " in x, osd_lines)] + out_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " out " in x, osd_lines)] + up_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " up " in x, osd_lines)] + down_osds = [int(i[4:].split()[0]) + for i in filter(lambda x: " down " in x, osd_lines)] + dead_osds = [int(x.id_) + for x in filter(lambda x: + not x.running(), + self.ctx.daemons. + iter_daemons_of_role('osd', self.cluster))] + live_osds = [int(x.id_) for x in + filter(lambda x: + x.running(), + self.ctx.daemons.iter_daemons_of_role('osd', + self.cluster))] + return {'in': in_osds, 'out': out_osds, 'up': up_osds, + 'down': down_osds, 'dead': dead_osds, 'live': live_osds, + 'raw': osd_lines} + + def get_num_pgs(self): + """ + Check cluster status for the number of pgs + """ + status = self.raw_cluster_status() + self.log(status) + return status['pgmap']['num_pgs'] + + def create_erasure_code_profile(self, profile_name, profile): + """ + Create an erasure code profile name that can be used as a parameter + when creating an erasure coded pool. + """ + with self.lock: + args = cmd_erasure_code_profile(profile_name, profile) + self.raw_cluster_cmd(*args) + + def create_pool_with_unique_name(self, pg_num=16, + erasure_code_profile_name=None, + min_size=None, + erasure_code_use_overwrites=False): + """ + Create a pool named unique_pool_X where X is unique. + """ + name = "" + with self.lock: + name = "unique_pool_%s" % (str(self.next_pool_id),) + self.next_pool_id += 1 + self.create_pool( + name, + pg_num, + erasure_code_profile_name=erasure_code_profile_name, + min_size=min_size, + erasure_code_use_overwrites=erasure_code_use_overwrites) + return name + + @contextlib.contextmanager + def pool(self, pool_name, pg_num=16, erasure_code_profile_name=None): + self.create_pool(pool_name, pg_num, erasure_code_profile_name) + yield + self.remove_pool(pool_name) + + def create_pool(self, pool_name, pg_num=16, + erasure_code_profile_name=None, + min_size=None, + erasure_code_use_overwrites=False): + """ + Create a pool named from the pool_name parameter. + :param pool_name: name of the pool being created. + :param pg_num: initial number of pgs. + :param erasure_code_profile_name: if set and !None create an + erasure coded pool using the profile + :param erasure_code_use_overwrites: if true, allow overwrites + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(pg_num, int) + assert pool_name not in self.pools + self.log("creating pool_name %s" % (pool_name,)) + if erasure_code_profile_name: + self.raw_cluster_cmd('osd', 'pool', 'create', + pool_name, str(pg_num), str(pg_num), + 'erasure', erasure_code_profile_name) + else: + self.raw_cluster_cmd('osd', 'pool', 'create', + pool_name, str(pg_num)) + if min_size is not None: + self.raw_cluster_cmd( + 'osd', 'pool', 'set', pool_name, + 'min_size', + str(min_size)) + if erasure_code_use_overwrites: + self.raw_cluster_cmd( + 'osd', 'pool', 'set', pool_name, + 'allow_ec_overwrites', + 'true') + self.raw_cluster_cmd( + 'osd', 'pool', 'application', 'enable', + pool_name, 'rados', '--yes-i-really-mean-it', + run.Raw('||'), 'true') + self.pools[pool_name] = pg_num + time.sleep(1) + + def add_pool_snap(self, pool_name, snap_name): + """ + Add pool snapshot + :param pool_name: name of pool to snapshot + :param snap_name: name of snapshot to take + """ + self.raw_cluster_cmd('osd', 'pool', 'mksnap', + str(pool_name), str(snap_name)) + + def remove_pool_snap(self, pool_name, snap_name): + """ + Remove pool snapshot + :param pool_name: name of pool to snapshot + :param snap_name: name of snapshot to remove + """ + self.raw_cluster_cmd('osd', 'pool', 'rmsnap', + str(pool_name), str(snap_name)) + + def remove_pool(self, pool_name): + """ + Remove the indicated pool + :param pool_name: Pool to be removed + """ + with self.lock: + assert isinstance(pool_name, str) + assert pool_name in self.pools + self.log("removing pool_name %s" % (pool_name,)) + del self.pools[pool_name] + self.raw_cluster_cmd('osd', 'pool', 'rm', pool_name, pool_name, + "--yes-i-really-really-mean-it") + + def get_pool(self): + """ + Pick a random pool + """ + with self.lock: + if self.pools: + return random.sample(self.pools.keys(), 1)[0] + + def get_pool_pg_num(self, pool_name): + """ + Return the number of pgs in the pool specified. + """ + with self.lock: + assert isinstance(pool_name, str) + if pool_name in self.pools: + return self.pools[pool_name] + return 0 + + def get_pool_property(self, pool_name, prop): + """ + :param pool_name: pool + :param prop: property to be checked. + :returns: property as string + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(prop, str) + output = self.raw_cluster_cmd( + 'osd', + 'pool', + 'get', + pool_name, + prop) + return output.split()[1] + + def get_pool_int_property(self, pool_name, prop): + return int(self.get_pool_property(pool_name, prop)) + + def set_pool_property(self, pool_name, prop, val): + """ + :param pool_name: pool + :param prop: property to be set. + :param val: value to set. + + This routine retries if set operation fails. + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(prop, str) + assert isinstance(val, int) + tries = 0 + while True: + r = self.raw_cluster_cmd_result( + 'osd', + 'pool', + 'set', + pool_name, + prop, + str(val)) + if r != 11: # EAGAIN + break + tries += 1 + if tries > 50: + raise Exception('timed out getting EAGAIN ' + 'when setting pool property %s %s = %s' % + (pool_name, prop, val)) + self.log('got EAGAIN setting pool property, ' + 'waiting a few seconds...') + time.sleep(2) + + def expand_pool(self, pool_name, by, max_pgs): + """ + Increase the number of pgs in a pool + """ + with self.lock: + assert isinstance(pool_name, str) + assert isinstance(by, int) + assert pool_name in self.pools + if self.get_num_creating() > 0: + return False + if (self.pools[pool_name] + by) > max_pgs: + return False + self.log("increase pool size by %d" % (by,)) + new_pg_num = self.pools[pool_name] + by + self.set_pool_property(pool_name, "pg_num", new_pg_num) + self.pools[pool_name] = new_pg_num + return True + + def contract_pool(self, pool_name, by, min_pgs): + """ + Decrease the number of pgs in a pool + """ + with self.lock: + self.log('contract_pool %s by %s min %s' % ( + pool_name, str(by), str(min_pgs))) + assert isinstance(pool_name, str) + assert isinstance(by, int) + assert pool_name in self.pools + if self.get_num_creating() > 0: + self.log('too many creating') + return False + proj = self.pools[pool_name] - by + if proj < min_pgs: + self.log('would drop below min_pgs, proj %d, currently %d' % (proj,self.pools[pool_name],)) + return False + self.log("decrease pool size by %d" % (by,)) + new_pg_num = self.pools[pool_name] - by + self.set_pool_property(pool_name, "pg_num", new_pg_num) + self.pools[pool_name] = new_pg_num + return True + + def stop_pg_num_changes(self): + """ + Reset all pg_num_targets back to pg_num, canceling splits and merges + """ + self.log('Canceling any pending splits or merges...') + osd_dump = self.get_osd_dump_json() + try: + for pool in osd_dump['pools']: + if pool['pg_num'] != pool['pg_num_target']: + self.log('Setting pool %s (%d) pg_num %d -> %d' % + (pool['pool_name'], pool['pool'], + pool['pg_num_target'], + pool['pg_num'])) + self.raw_cluster_cmd('osd', 'pool', 'set', pool['pool_name'], + 'pg_num', str(pool['pg_num'])) + except KeyError: + # we don't support pg_num_target before nautilus + pass + + def set_pool_pgpnum(self, pool_name, force): + """ + Set pgpnum property of pool_name pool. + """ + with self.lock: + assert isinstance(pool_name, str) + assert pool_name in self.pools + if not force and self.get_num_creating() > 0: + return False + self.set_pool_property(pool_name, 'pgp_num', self.pools[pool_name]) + return True + + def list_pg_unfound(self, pgid): + """ + return list of unfound pgs with the id specified + """ + r = None + offset = {} + while True: + out = self.raw_cluster_cmd('--', 'pg', pgid, 'list_unfound', + json.dumps(offset)) + j = json.loads(out) + if r is None: + r = j + else: + r['objects'].extend(j['objects']) + if not 'more' in j: + break + if j['more'] == 0: + break + offset = j['objects'][-1]['oid'] + if 'more' in r: + del r['more'] + return r + + def get_pg_stats(self): + """ + Dump the cluster and get pg stats + """ + out = self.raw_cluster_cmd('pg', 'dump', '--format=json') + j = json.loads('\n'.join(out.split('\n')[1:])) + try: + return j['pg_map']['pg_stats'] + except KeyError: + return j['pg_stats'] + + def get_osd_df(self, osdid): + """ + Get the osd df stats + """ + out = self.raw_cluster_cmd('osd', 'df', 'name', 'osd.{}'.format(osdid), + '--format=json') + j = json.loads('\n'.join(out.split('\n')[1:])) + return j['nodes'][0] + + def get_pool_df(self, name): + """ + Get the pool df stats + """ + out = self.raw_cluster_cmd('df', 'detail', '--format=json') + j = json.loads('\n'.join(out.split('\n')[1:])) + return next((p['stats'] for p in j['pools'] if p['name'] == name), + None) + + def get_pgids_to_force(self, backfill): + """ + Return the randomized list of PGs that can have their recovery/backfill forced + """ + j = self.get_pg_stats(); + pgids = [] + if backfill: + wanted = ['degraded', 'backfilling', 'backfill_wait'] + else: + wanted = ['recovering', 'degraded', 'recovery_wait'] + for pg in j: + status = pg['state'].split('+') + for t in wanted: + if random.random() > 0.5 and not ('forced_backfill' in status or 'forced_recovery' in status) and t in status: + pgids.append(pg['pgid']) + break + return pgids + + def get_pgids_to_cancel_force(self, backfill): + """ + Return the randomized list of PGs whose recovery/backfill priority is forced + """ + j = self.get_pg_stats(); + pgids = [] + if backfill: + wanted = 'forced_backfill' + else: + wanted = 'forced_recovery' + for pg in j: + status = pg['state'].split('+') + if wanted in status and random.random() > 0.5: + pgids.append(pg['pgid']) + return pgids + + def compile_pg_status(self): + """ + Return a histogram of pg state values + """ + ret = {} + j = self.get_pg_stats() + for pg in j: + for status in pg['state'].split('+'): + if status not in ret: + ret[status] = 0 + ret[status] += 1 + return ret + + @wait_for_pg_stats # type: ignore + def with_pg_state(self, pool, pgnum, check): + pgstr = self.get_pgid(pool, pgnum) + stats = self.get_single_pg_stats(pgstr) + assert(check(stats['state'])) + + @wait_for_pg_stats # type: ignore + def with_pg(self, pool, pgnum, check): + pgstr = self.get_pgid(pool, pgnum) + stats = self.get_single_pg_stats(pgstr) + return check(stats) + + def get_last_scrub_stamp(self, pool, pgnum): + """ + Get the timestamp of the last scrub. + """ + stats = self.get_single_pg_stats(self.get_pgid(pool, pgnum)) + return stats["last_scrub_stamp"] + + def do_pg_scrub(self, pool, pgnum, stype): + """ + Scrub pg and wait for scrubbing to finish + """ + init = self.get_last_scrub_stamp(pool, pgnum) + RESEND_TIMEOUT = 120 # Must be a multiple of SLEEP_TIME + FATAL_TIMEOUT = RESEND_TIMEOUT * 3 + SLEEP_TIME = 10 + timer = 0 + while init == self.get_last_scrub_stamp(pool, pgnum): + assert timer < FATAL_TIMEOUT, "fatal timeout trying to " + stype + self.log("waiting for scrub type %s" % (stype,)) + if (timer % RESEND_TIMEOUT) == 0: + self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum)) + # The first time in this loop is the actual request + if timer != 0 and stype == "repair": + self.log("WARNING: Resubmitted a non-idempotent repair") + time.sleep(SLEEP_TIME) + timer += SLEEP_TIME + + def wait_snap_trimming_complete(self, pool): + """ + Wait for snap trimming on pool to end + """ + POLL_PERIOD = 10 + FATAL_TIMEOUT = 600 + start = time.time() + poolnum = self.get_pool_num(pool) + poolnumstr = "%s." % (poolnum,) + while (True): + now = time.time() + if (now - start) > FATAL_TIMEOUT: + assert (now - start) < FATAL_TIMEOUT, \ + 'failed to complete snap trimming before timeout' + all_stats = self.get_pg_stats() + trimming = False + for pg in all_stats: + if (poolnumstr in pg['pgid']) and ('snaptrim' in pg['state']): + self.log("pg {pg} in trimming, state: {state}".format( + pg=pg['pgid'], + state=pg['state'])) + trimming = True + if not trimming: + break + self.log("{pool} still trimming, waiting".format(pool=pool)) + time.sleep(POLL_PERIOD) + + def get_single_pg_stats(self, pgid): + """ + Return pg for the pgid specified. + """ + all_stats = self.get_pg_stats() + + for pg in all_stats: + if pg['pgid'] == pgid: + return pg + + return None + + def get_object_pg_with_shard(self, pool, name, osdid): + """ + """ + pool_dump = self.get_pool_dump(pool) + object_map = self.get_object_map(pool, name) + if pool_dump["type"] == PoolType.ERASURE_CODED: + shard = object_map['acting'].index(osdid) + return "{pgid}s{shard}".format(pgid=object_map['pgid'], + shard=shard) + else: + return object_map['pgid'] + + def get_object_primary(self, pool, name): + """ + """ + object_map = self.get_object_map(pool, name) + return object_map['acting_primary'] + + def get_object_map(self, pool, name): + """ + osd map --format=json converted to a python object + :returns: the python object + """ + out = self.raw_cluster_cmd('--format=json', 'osd', 'map', pool, name) + return json.loads('\n'.join(out.split('\n')[1:])) + + def get_osd_dump_json(self): + """ + osd dump --format=json converted to a python object + :returns: the python object + """ + out = self.raw_cluster_cmd('osd', 'dump', '--format=json') + return json.loads('\n'.join(out.split('\n')[1:])) + + def get_osd_dump(self): + """ + Dump osds + :returns: all osds + """ + return self.get_osd_dump_json()['osds'] + + def get_osd_metadata(self): + """ + osd metadata --format=json converted to a python object + :returns: the python object containing osd metadata information + """ + out = self.raw_cluster_cmd('osd', 'metadata', '--format=json') + return json.loads('\n'.join(out.split('\n')[1:])) + + def get_mgr_dump(self): + out = self.raw_cluster_cmd('mgr', 'dump', '--format=json') + return json.loads(out) + + def get_stuck_pgs(self, type_, threshold): + """ + :returns: stuck pg information from the cluster + """ + out = self.raw_cluster_cmd('pg', 'dump_stuck', type_, str(threshold), + '--format=json') + return json.loads(out).get('stuck_pg_stats',[]) + + def get_num_unfound_objects(self): + """ + Check cluster status to get the number of unfound objects + """ + status = self.raw_cluster_status() + self.log(status) + return status['pgmap'].get('unfound_objects', 0) + + def get_num_creating(self): + """ + Find the number of pgs in creating mode. + """ + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if 'creating' in pg['state']: + num += 1 + return num + + def get_num_active_clean(self): + """ + Find the number of active and clean pgs. + """ + pgs = self.get_pg_stats() + return self._get_num_active_clean(pgs) + + def _get_num_active_clean(self, pgs): + num = 0 + for pg in pgs: + if (pg['state'].count('active') and + pg['state'].count('clean') and + not pg['state'].count('stale')): + num += 1 + return num + + def get_num_active_recovered(self): + """ + Find the number of active and recovered pgs. + """ + pgs = self.get_pg_stats() + return self._get_num_active_recovered(pgs) + + def _get_num_active_recovered(self, pgs): + num = 0 + for pg in pgs: + if (pg['state'].count('active') and + not pg['state'].count('recover') and + not pg['state'].count('backfilling') and + not pg['state'].count('stale')): + num += 1 + return num + + def get_is_making_recovery_progress(self): + """ + Return whether there is recovery progress discernable in the + raw cluster status + """ + status = self.raw_cluster_status() + kps = status['pgmap'].get('recovering_keys_per_sec', 0) + bps = status['pgmap'].get('recovering_bytes_per_sec', 0) + ops = status['pgmap'].get('recovering_objects_per_sec', 0) + return kps > 0 or bps > 0 or ops > 0 + + def get_num_active(self): + """ + Find the number of active pgs. + """ + pgs = self.get_pg_stats() + return self._get_num_active(pgs) + + def _get_num_active(self, pgs): + num = 0 + for pg in pgs: + if pg['state'].count('active') and not pg['state'].count('stale'): + num += 1 + return num + + def get_num_down(self): + """ + Find the number of pgs that are down. + """ + pgs = self.get_pg_stats() + num = 0 + for pg in pgs: + if ((pg['state'].count('down') and not + pg['state'].count('stale')) or + (pg['state'].count('incomplete') and not + pg['state'].count('stale'))): + num += 1 + return num + + def get_num_active_down(self): + """ + Find the number of pgs that are either active or down. + """ + pgs = self.get_pg_stats() + return self._get_num_active_down(pgs) + + def _get_num_active_down(self, pgs): + num = 0 + for pg in pgs: + if ((pg['state'].count('active') and not + pg['state'].count('stale')) or + (pg['state'].count('down') and not + pg['state'].count('stale')) or + (pg['state'].count('incomplete') and not + pg['state'].count('stale'))): + num += 1 + return num + + def get_num_peered(self): + """ + Find the number of PGs that are peered + """ + pgs = self.get_pg_stats() + return self._get_num_peered(pgs) + + def _get_num_peered(self, pgs): + num = 0 + for pg in pgs: + if pg['state'].count('peered') and not pg['state'].count('stale'): + num += 1 + return num + + def is_clean(self): + """ + True if all pgs are clean + """ + pgs = self.get_pg_stats() + if self._get_num_active_clean(pgs) == len(pgs): + return True + else: + self.dump_pgs_not_active_clean() + return False + + def is_recovered(self): + """ + True if all pgs have recovered + """ + pgs = self.get_pg_stats() + return self._get_num_active_recovered(pgs) == len(pgs) + + def is_active_or_down(self): + """ + True if all pgs are active or down + """ + pgs = self.get_pg_stats() + return self._get_num_active_down(pgs) == len(pgs) + + def dump_pgs_not_active_clean(self): + """ + Dumps all pgs that are not active+clean + """ + pgs = self.get_pg_stats() + for pg in pgs: + if pg['state'] != 'active+clean': + self.log('PG %s is not active+clean' % pg['pgid']) + self.log(pg) + + def dump_pgs_not_active_down(self): + """ + Dumps all pgs that are not active or down + """ + pgs = self.get_pg_stats() + for pg in pgs: + if 'active' not in pg['state'] and 'down' not in pg['state']: + self.log('PG %s is not active or down' % pg['pgid']) + self.log(pg) + + def dump_pgs_not_active(self): + """ + Dumps all pgs that are not active + """ + pgs = self.get_pg_stats() + for pg in pgs: + if 'active' not in pg['state']: + self.log('PG %s is not active' % pg['pgid']) + self.log(pg) + + def dump_pgs_not_active_peered(self, pgs): + for pg in pgs: + if (not pg['state'].count('active')) and (not pg['state'].count('peered')): + self.log('PG %s is not active or peered' % pg['pgid']) + self.log(pg) + + def wait_for_clean(self, timeout=1200): + """ + Returns true when all pgs are clean. + """ + self.log("waiting for clean") + start = time.time() + num_active_clean = self.get_num_active_clean() + while not self.is_clean(): + if timeout is not None: + if self.get_is_making_recovery_progress(): + self.log("making progress, resetting timeout") + start = time.time() + else: + self.log("no progress seen, keeping timeout for now") + if time.time() - start >= timeout: + self.log('dumping pgs not clean') + self.dump_pgs_not_active_clean() + assert time.time() - start < timeout, \ + 'wait_for_clean: failed before timeout expired' + cur_active_clean = self.get_num_active_clean() + if cur_active_clean != num_active_clean: + start = time.time() + num_active_clean = cur_active_clean + time.sleep(3) + self.log("clean!") + + def are_all_osds_up(self): + """ + Returns true if all osds are up. + """ + x = self.get_osd_dump() + return (len(x) == sum([(y['up'] > 0) for y in x])) + + def wait_for_all_osds_up(self, timeout=None): + """ + When this exits, either the timeout has expired, or all + osds are up. + """ + self.log("waiting for all up") + start = time.time() + while not self.are_all_osds_up(): + if timeout is not None: + assert time.time() - start < timeout, \ + 'timeout expired in wait_for_all_osds_up' + time.sleep(3) + self.log("all up!") + + def pool_exists(self, pool): + if pool in self.list_pools(): + return True + return False + + def wait_for_pool(self, pool, timeout=300): + """ + Wait for a pool to exist + """ + self.log('waiting for pool %s to exist' % pool) + start = time.time() + while not self.pool_exists(pool): + if timeout is not None: + assert time.time() - start < timeout, \ + 'timeout expired in wait_for_pool' + time.sleep(3) + + def wait_for_pools(self, pools): + for pool in pools: + self.wait_for_pool(pool) + + def is_mgr_available(self): + x = self.get_mgr_dump() + return x.get('available', False) + + def wait_for_mgr_available(self, timeout=None): + self.log("waiting for mgr available") + start = time.time() + while not self.is_mgr_available(): + if timeout is not None: + assert time.time() - start < timeout, \ + 'timeout expired in wait_for_mgr_available' + time.sleep(3) + self.log("mgr available!") + + def wait_for_recovery(self, timeout=None): + """ + Check peering. When this exists, we have recovered. + """ + self.log("waiting for recovery to complete") + start = time.time() + num_active_recovered = self.get_num_active_recovered() + while not self.is_recovered(): + now = time.time() + if timeout is not None: + if self.get_is_making_recovery_progress(): + self.log("making progress, resetting timeout") + start = time.time() + else: + self.log("no progress seen, keeping timeout for now") + if now - start >= timeout: + if self.is_recovered(): + break + self.log('dumping pgs not recovered yet') + self.dump_pgs_not_active_clean() + assert now - start < timeout, \ + 'wait_for_recovery: failed before timeout expired' + cur_active_recovered = self.get_num_active_recovered() + if cur_active_recovered != num_active_recovered: + start = time.time() + num_active_recovered = cur_active_recovered + time.sleep(3) + self.log("recovered!") + + def wait_for_active(self, timeout=None): + """ + Check peering. When this exists, we are definitely active + """ + self.log("waiting for peering to complete") + start = time.time() + num_active = self.get_num_active() + while not self.is_active(): + if timeout is not None: + if time.time() - start >= timeout: + self.log('dumping pgs not active') + self.dump_pgs_not_active() + assert time.time() - start < timeout, \ + 'wait_for_active: failed before timeout expired' + cur_active = self.get_num_active() + if cur_active != num_active: + start = time.time() + num_active = cur_active + time.sleep(3) + self.log("active!") + + def wait_for_active_or_down(self, timeout=None): + """ + Check peering. When this exists, we are definitely either + active or down + """ + self.log("waiting for peering to complete or become blocked") + start = time.time() + num_active_down = self.get_num_active_down() + while not self.is_active_or_down(): + if timeout is not None: + if time.time() - start >= timeout: + self.log('dumping pgs not active or down') + self.dump_pgs_not_active_down() + assert time.time() - start < timeout, \ + 'wait_for_active_or_down: failed before timeout expired' + cur_active_down = self.get_num_active_down() + if cur_active_down != num_active_down: + start = time.time() + num_active_down = cur_active_down + time.sleep(3) + self.log("active or down!") + + def osd_is_up(self, osd): + """ + Wrapper for osd check + """ + osds = self.get_osd_dump() + return osds[osd]['up'] > 0 + + def wait_till_osd_is_up(self, osd, timeout=None): + """ + Loop waiting for osd. + """ + self.log('waiting for osd.%d to be up' % osd) + start = time.time() + while not self.osd_is_up(osd): + if timeout is not None: + assert time.time() - start < timeout, \ + 'osd.%d failed to come up before timeout expired' % osd + time.sleep(3) + self.log('osd.%d is up' % osd) + + def is_active(self): + """ + Wrapper to check if all pgs are active + """ + return self.get_num_active() == self.get_num_pgs() + + def all_active_or_peered(self): + """ + Wrapper to check if all PGs are active or peered + """ + pgs = self.get_pg_stats() + if self._get_num_active(pgs) + self._get_num_peered(pgs) == len(pgs): + return True + else: + self.dump_pgs_not_active_peered(pgs) + return False + + def wait_till_active(self, timeout=None): + """ + Wait until all pgs are active. + """ + self.log("waiting till active") + start = time.time() + while not self.is_active(): + if timeout is not None: + if time.time() - start >= timeout: + self.log('dumping pgs not active') + self.dump_pgs_not_active() + assert time.time() - start < timeout, \ + 'wait_till_active: failed before timeout expired' + time.sleep(3) + self.log("active!") + + def wait_till_pg_convergence(self, timeout=None): + start = time.time() + old_stats = None + active_osds = [osd['osd'] for osd in self.get_osd_dump() + if osd['in'] and osd['up']] + while True: + # strictly speaking, no need to wait for mon. but due to the + # "ms inject socket failures" setting, the osdmap could be delayed, + # so mgr is likely to ignore the pg-stat messages with pgs serving + # newly created pools which is not yet known by mgr. so, to make sure + # the mgr is updated with the latest pg-stats, waiting for mon/mgr is + # necessary. + self.flush_pg_stats(active_osds) + new_stats = dict((stat['pgid'], stat['state']) + for stat in self.get_pg_stats()) + if old_stats == new_stats: + return old_stats + if timeout is not None: + assert time.time() - start < timeout, \ + 'failed to reach convergence before %d secs' % timeout + old_stats = new_stats + # longer than mgr_stats_period + time.sleep(5 + 1) + + def mark_out_osd(self, osd): + """ + Wrapper to mark osd out. + """ + self.raw_cluster_cmd('osd', 'out', str(osd)) + + def kill_osd(self, osd): + """ + Kill osds by either power cycling (if indicated by the config) + or by stopping. + """ + if self.config.get('powercycle'): + remote = self.find_remote('osd', osd) + self.log('kill_osd on osd.{o} ' + 'doing powercycle of {s}'.format(o=osd, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_off() + elif self.config.get('bdev_inject_crash') and self.config.get('bdev_inject_crash_probability'): + if random.uniform(0, 1) < self.config.get('bdev_inject_crash_probability', .5): + self.inject_args( + 'osd', osd, + 'bdev-inject-crash', self.config.get('bdev_inject_crash')) + try: + self.ctx.daemons.get_daemon('osd', osd, self.cluster).wait() + except: + pass + else: + raise RuntimeError('osd.%s did not fail' % osd) + else: + self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop() + else: + self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop() + + @staticmethod + def _assert_ipmi(remote): + assert remote.console.has_ipmi_credentials, ( + "powercycling requested but RemoteConsole is not " + "initialized. Check ipmi config.") + + def blackhole_kill_osd(self, osd): + """ + Stop osd if nothing else works. + """ + self.inject_args('osd', osd, + 'objectstore-blackhole', True) + time.sleep(2) + self.ctx.daemons.get_daemon('osd', osd, self.cluster).stop() + + def revive_osd(self, osd, timeout=360, skip_admin_check=False): + """ + Revive osds by either power cycling (if indicated by the config) + or by restarting. + """ + if self.config.get('powercycle'): + remote = self.find_remote('osd', osd) + self.log('kill_osd on osd.{o} doing powercycle of {s}'. + format(o=osd, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_on() + if not remote.console.check_status(300): + raise Exception('Failed to revive osd.{o} via ipmi'. + format(o=osd)) + teuthology.reconnect(self.ctx, 60, [remote]) + mount_osd_data(self.ctx, remote, self.cluster, str(osd)) + self.make_admin_daemon_dir(remote) + self.ctx.daemons.get_daemon('osd', osd, self.cluster).reset() + self.ctx.daemons.get_daemon('osd', osd, self.cluster).restart() + + if not skip_admin_check: + # wait for dump_ops_in_flight; this command doesn't appear + # until after the signal handler is installed and it is safe + # to stop the osd again without making valgrind leak checks + # unhappy. see #5924. + self.wait_run_admin_socket('osd', osd, + args=['dump_ops_in_flight'], + timeout=timeout, stdout=DEVNULL) + + def mark_down_osd(self, osd): + """ + Cluster command wrapper + """ + self.raw_cluster_cmd('osd', 'down', str(osd)) + + def mark_in_osd(self, osd): + """ + Cluster command wrapper + """ + self.raw_cluster_cmd('osd', 'in', str(osd)) + + def signal_osd(self, osd, sig, silent=False): + """ + Wrapper to local get_daemon call which sends the given + signal to the given osd. + """ + self.ctx.daemons.get_daemon('osd', osd, + self.cluster).signal(sig, silent=silent) + + ## monitors + def signal_mon(self, mon, sig, silent=False): + """ + Wrapper to local get_daemon call + """ + self.ctx.daemons.get_daemon('mon', mon, + self.cluster).signal(sig, silent=silent) + + def kill_mon(self, mon): + """ + Kill the monitor by either power cycling (if the config says so), + or by doing a stop. + """ + if self.config.get('powercycle'): + remote = self.find_remote('mon', mon) + self.log('kill_mon on mon.{m} doing powercycle of {s}'. + format(m=mon, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_off() + else: + self.ctx.daemons.get_daemon('mon', mon, self.cluster).stop() + + def revive_mon(self, mon): + """ + Restart by either power cycling (if the config says so), + or by doing a normal restart. + """ + if self.config.get('powercycle'): + remote = self.find_remote('mon', mon) + self.log('revive_mon on mon.{m} doing powercycle of {s}'. + format(m=mon, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_on() + self.make_admin_daemon_dir(remote) + self.ctx.daemons.get_daemon('mon', mon, self.cluster).restart() + + def revive_mgr(self, mgr): + """ + Restart by either power cycling (if the config says so), + or by doing a normal restart. + """ + if self.config.get('powercycle'): + remote = self.find_remote('mgr', mgr) + self.log('revive_mgr on mgr.{m} doing powercycle of {s}'. + format(m=mgr, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_on() + self.make_admin_daemon_dir(remote) + self.ctx.daemons.get_daemon('mgr', mgr, self.cluster).restart() + + def get_mon_status(self, mon): + """ + Extract all the monitor status information from the cluster + """ + out = self.raw_cluster_cmd('tell', 'mon.%s' % mon, 'mon_status') + return json.loads(out) + + def get_mon_quorum(self): + """ + Extract monitor quorum information from the cluster + """ + out = self.raw_cluster_cmd('quorum_status') + j = json.loads(out) + return j['quorum'] + + def wait_for_mon_quorum_size(self, size, timeout=300): + """ + Loop until quorum size is reached. + """ + self.log('waiting for quorum size %d' % size) + sleep = 3 + with safe_while(sleep=sleep, + tries=timeout // sleep, + action=f'wait for quorum size {size}') as proceed: + while proceed(): + try: + if len(self.get_mon_quorum()) == size: + break + except CommandFailedError as e: + # could fail instea4d of blocked if the rotating key of the + # connected monitor is not updated yet after they form the + # quorum + if e.exitstatus == errno.EACCES: + pass + else: + raise + self.log("quorum is size %d" % size) + + def get_mon_health(self, debug=False): + """ + Extract all the monitor health information. + """ + out = self.raw_cluster_cmd('health', '--format=json') + if debug: + self.log('health:\n{h}'.format(h=out)) + return json.loads(out) + + def wait_until_healthy(self, timeout=None): + self.log("wait_until_healthy") + start = time.time() + while self.get_mon_health()['status'] != 'HEALTH_OK': + if timeout is not None: + assert time.time() - start < timeout, \ + 'timeout expired in wait_until_healthy' + time.sleep(3) + self.log("wait_until_healthy done") + + def get_filepath(self): + """ + Return path to osd data with {id} needing to be replaced + """ + return '/var/lib/ceph/osd/' + self.cluster + '-{id}' + + def make_admin_daemon_dir(self, remote): + """ + Create /var/run/ceph directory on remote site. + + :param ctx: Context + :param remote: Remote site + """ + remote.run(args=['sudo', + 'install', '-d', '-m0777', '--', '/var/run/ceph', ], ) + + def get_service_task_status(self, service, status_key): + """ + Return daemon task status for a given ceph service. + + :param service: ceph service (mds, osd, etc...) + :param status_key: matching task status key + """ + task_status = {} + status = self.raw_cluster_status() + try: + for k,v in status['servicemap']['services'][service]['daemons'].items(): + ts = dict(v).get('task_status', None) + if ts: + task_status[k] = ts[status_key] + except KeyError: # catches missing service and status key + return {} + self.log(task_status) + return task_status + +def utility_task(name): + """ + Generate ceph_manager subtask corresponding to ceph_manager + method name + """ + def task(ctx, config): + if config is None: + config = {} + args = config.get('args', []) + kwargs = config.get('kwargs', {}) + cluster = config.get('cluster', 'ceph') + fn = getattr(ctx.managers[cluster], name) + fn(*args, **kwargs) + return task + +revive_osd = utility_task("revive_osd") +revive_mon = utility_task("revive_mon") +kill_osd = utility_task("kill_osd") +kill_mon = utility_task("kill_mon") +create_pool = utility_task("create_pool") +remove_pool = utility_task("remove_pool") +wait_for_clean = utility_task("wait_for_clean") +flush_all_pg_stats = utility_task("flush_all_pg_stats") +set_pool_property = utility_task("set_pool_property") +do_pg_scrub = utility_task("do_pg_scrub") +wait_for_pool = utility_task("wait_for_pool") +wait_for_pools = utility_task("wait_for_pools") diff --git a/qa/tasks/ceph_objectstore_tool.py b/qa/tasks/ceph_objectstore_tool.py new file mode 100644 index 000000000..9c29d80b2 --- /dev/null +++ b/qa/tasks/ceph_objectstore_tool.py @@ -0,0 +1,662 @@ +""" +ceph_objectstore_tool - Simple test of ceph-objectstore-tool utility +""" +from io import BytesIO + +import contextlib +import json +import logging +import os +import sys +import tempfile +import time +from tasks import ceph_manager +from tasks.util.rados import (rados, create_replicated_pool, create_ec_pool) +from teuthology import misc as teuthology +from teuthology.orchestra import run + +from teuthology.exceptions import CommandFailedError + +# from util.rados import (rados, create_ec_pool, +# create_replicated_pool, +# create_cache_pool) + +log = logging.getLogger(__name__) + +# Should get cluster name "ceph" from somewhere +# and normal path from osd_data and osd_journal in conf +FSPATH = "/var/lib/ceph/osd/ceph-{id}" +JPATH = "/var/lib/ceph/osd/ceph-{id}/journal" + + +def cod_setup_local_data(log, ctx, NUM_OBJECTS, DATADIR, + BASE_NAME, DATALINECOUNT): + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = BASE_NAME + "{num}".format(num=i) + LOCALNAME = os.path.join(DATADIR, NAME) + + dataline = range(DATALINECOUNT) + fd = open(LOCALNAME, "w") + data = "This is the data for " + NAME + "\n" + for _ in dataline: + fd.write(data) + fd.close() + + +def cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, + BASE_NAME, DATALINECOUNT): + + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = BASE_NAME + "{num}".format(num=i) + DDNAME = os.path.join(DATADIR, NAME) + + remote.run(args=['rm', '-f', DDNAME]) + + dataline = range(DATALINECOUNT) + data = "This is the data for " + NAME + "\n" + DATA = "" + for _ in dataline: + DATA += data + remote.write_file(DDNAME, DATA) + + +def cod_setup(log, ctx, remote, NUM_OBJECTS, DATADIR, + BASE_NAME, DATALINECOUNT, POOL, db, ec): + ERRORS = 0 + log.info("Creating {objs} objects in pool".format(objs=NUM_OBJECTS)) + + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = BASE_NAME + "{num}".format(num=i) + DDNAME = os.path.join(DATADIR, NAME) + + proc = rados(ctx, remote, ['-p', POOL, 'put', NAME, DDNAME], + wait=False) + # proc = remote.run(args=['rados', '-p', POOL, 'put', NAME, DDNAME]) + ret = proc.wait() + if ret != 0: + log.critical("Rados put failed with status {ret}". + format(ret=proc.exitstatus)) + sys.exit(1) + + db[NAME] = {} + + keys = range(i) + db[NAME]["xattr"] = {} + for k in keys: + if k == 0: + continue + mykey = "key{i}-{k}".format(i=i, k=k) + myval = "val{i}-{k}".format(i=i, k=k) + proc = remote.run(args=['rados', '-p', POOL, 'setxattr', + NAME, mykey, myval]) + ret = proc.wait() + if ret != 0: + log.error("setxattr failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[NAME]["xattr"][mykey] = myval + + # Erasure coded pools don't support omap + if ec: + continue + + # Create omap header in all objects but REPobject1 + if i != 1: + myhdr = "hdr{i}".format(i=i) + proc = remote.run(args=['rados', '-p', POOL, 'setomapheader', + NAME, myhdr]) + ret = proc.wait() + if ret != 0: + log.critical("setomapheader failed with {ret}".format(ret=ret)) + ERRORS += 1 + db[NAME]["omapheader"] = myhdr + + db[NAME]["omap"] = {} + for k in keys: + if k == 0: + continue + mykey = "okey{i}-{k}".format(i=i, k=k) + myval = "oval{i}-{k}".format(i=i, k=k) + proc = remote.run(args=['rados', '-p', POOL, 'setomapval', + NAME, mykey, myval]) + ret = proc.wait() + if ret != 0: + log.critical("setomapval failed with {ret}".format(ret=ret)) + db[NAME]["omap"][mykey] = myval + + return ERRORS + + +def get_lines(filename): + tmpfd = open(filename, "r") + line = True + lines = [] + while line: + line = tmpfd.readline().rstrip('\n') + if line: + lines += [line] + tmpfd.close() + os.unlink(filename) + return lines + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run ceph_objectstore_tool test + + The config should be as follows:: + + ceph_objectstore_tool: + objects: 20 # <number of objects> + pgnum: 12 + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'ceph_objectstore_tool task only accepts a dict for configuration' + + log.info('Beginning ceph_objectstore_tool...') + + log.debug(config) + log.debug(ctx) + clients = ctx.cluster.only(teuthology.is_type('client')) + assert len(clients.remotes) > 0, 'Must specify at least 1 client' + (cli_remote, _) = clients.remotes.popitem() + log.debug(cli_remote) + + # clients = dict(teuthology.get_clients(ctx=ctx, roles=config.keys())) + # client = clients.popitem() + # log.info(client) + osds = ctx.cluster.only(teuthology.is_type('osd')) + log.info("OSDS") + log.info(osds) + log.info(osds.remotes) + + manager = ctx.managers['ceph'] + while (len(manager.get_osd_status()['up']) != + len(manager.get_osd_status()['raw'])): + time.sleep(10) + while (len(manager.get_osd_status()['in']) != + len(manager.get_osd_status()['up'])): + time.sleep(10) + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + + PGNUM = config.get('pgnum', 12) + log.info("pgnum: {num}".format(num=PGNUM)) + + ERRORS = 0 + + REP_POOL = "rep_pool" + REP_NAME = "REPobject" + create_replicated_pool(cli_remote, REP_POOL, PGNUM) + ERRORS += test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME) + + EC_POOL = "ec_pool" + EC_NAME = "ECobject" + create_ec_pool(cli_remote, EC_POOL, 'default', PGNUM) + ERRORS += test_objectstore(ctx, config, cli_remote, + EC_POOL, EC_NAME, ec=True) + + if ERRORS == 0: + log.info("TEST PASSED") + else: + log.error("TEST FAILED WITH {errcount} ERRORS".format(errcount=ERRORS)) + + assert ERRORS == 0 + + try: + yield + finally: + log.info('Ending ceph_objectstore_tool') + + +def test_objectstore(ctx, config, cli_remote, REP_POOL, REP_NAME, ec=False): + manager = ctx.managers['ceph'] + + osds = ctx.cluster.only(teuthology.is_type('osd')) + + TEUTHDIR = teuthology.get_testdir(ctx) + DATADIR = os.path.join(TEUTHDIR, "ceph.data") + DATALINECOUNT = 10000 + ERRORS = 0 + NUM_OBJECTS = config.get('objects', 10) + log.info("objects: {num}".format(num=NUM_OBJECTS)) + + pool_dump = manager.get_pool_dump(REP_POOL) + REPID = pool_dump['pool'] + + log.debug("repid={num}".format(num=REPID)) + + db = {} + + LOCALDIR = tempfile.mkdtemp("cod") + + cod_setup_local_data(log, ctx, NUM_OBJECTS, LOCALDIR, + REP_NAME, DATALINECOUNT) + allremote = [] + allremote.append(cli_remote) + allremote += list(osds.remotes.keys()) + allremote = list(set(allremote)) + for remote in allremote: + cod_setup_remote_data(log, ctx, remote, NUM_OBJECTS, DATADIR, + REP_NAME, DATALINECOUNT) + + ERRORS += cod_setup(log, ctx, cli_remote, NUM_OBJECTS, DATADIR, + REP_NAME, DATALINECOUNT, REP_POOL, db, ec) + + pgs = {} + for stats in manager.get_pg_stats(): + if stats["pgid"].find(str(REPID) + ".") != 0: + continue + if pool_dump["type"] == ceph_manager.PoolType.REPLICATED: + for osd in stats["acting"]: + pgs.setdefault(osd, []).append(stats["pgid"]) + elif pool_dump["type"] == ceph_manager.PoolType.ERASURE_CODED: + shard = 0 + for osd in stats["acting"]: + pgs.setdefault(osd, []).append("{pgid}s{shard}". + format(pgid=stats["pgid"], + shard=shard)) + shard += 1 + else: + raise Exception("{pool} has an unexpected type {type}". + format(pool=REP_POOL, type=pool_dump["type"])) + + log.info(pgs) + log.info(db) + + for osd in manager.get_osd_status()['up']: + manager.kill_osd(osd) + time.sleep(5) + + pgswithobjects = set() + objsinpg = {} + + # Test --op list and generate json for all objects + log.info("Test --op list by generating json for all objects") + prefix = ("sudo ceph-objectstore-tool " + "--data-path {fpath} " + "--journal-path {jpath} ").format(fpath=FSPATH, jpath=JPATH) + for remote in osds.remotes.keys(): + log.debug(remote) + log.debug(osds.remotes[remote]) + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + log.info("process osd.{id} on {remote}". + format(id=osdid, remote=remote)) + cmd = (prefix + "--op list").format(id=osdid) + try: + lines = remote.sh(cmd, check_status=False).splitlines() + for pgline in lines: + if not pgline: + continue + (pg, obj) = json.loads(pgline) + name = obj['oid'] + if name in db: + pgswithobjects.add(pg) + objsinpg.setdefault(pg, []).append(name) + db[name].setdefault("pg2json", + {})[pg] = json.dumps(obj) + except CommandFailedError as e: + log.error("Bad exit status {ret} from --op list request". + format(ret=e.exitstatus)) + ERRORS += 1 + + log.info(db) + log.info(pgswithobjects) + log.info(objsinpg) + + if pool_dump["type"] == ceph_manager.PoolType.REPLICATED: + # Test get-bytes + log.info("Test get-bytes and set-bytes") + for basename in db.keys(): + file = os.path.join(DATADIR, basename) + GETNAME = os.path.join(DATADIR, "get") + SETNAME = os.path.join(DATADIR, "set") + + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg, JSON in db[basename]["pg2json"].items(): + if pg in pgs[osdid]: + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("get-bytes {fname}". + format(fname=GETNAME).split()) + proc = remote.run(args=cmd, check_status=False) + if proc.exitstatus != 0: + remote.run(args="rm -f {getfile}". + format(getfile=GETNAME).split()) + log.error("Bad exit status {ret}". + format(ret=proc.exitstatus)) + ERRORS += 1 + continue + cmd = ("diff -q {file} {getfile}". + format(file=file, getfile=GETNAME)) + proc = remote.run(args=cmd.split()) + if proc.exitstatus != 0: + log.error("Data from get-bytes differ") + # log.debug("Got:") + # cat_file(logging.DEBUG, GETNAME) + # log.debug("Expected:") + # cat_file(logging.DEBUG, file) + ERRORS += 1 + remote.run(args="rm -f {getfile}". + format(getfile=GETNAME).split()) + + data = ("put-bytes going into {file}\n". + format(file=file)) + remote.write_file(SETNAME, data) + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("set-bytes {fname}". + format(fname=SETNAME).split()) + proc = remote.run(args=cmd, check_status=False) + proc.wait() + if proc.exitstatus != 0: + log.info("set-bytes failed for object {obj} " + "in pg {pg} osd.{id} ret={ret}". + format(obj=basename, pg=pg, + id=osdid, ret=proc.exitstatus)) + ERRORS += 1 + + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += "get-bytes -".split() + try: + output = remote.sh(cmd, wait=True) + if data != output: + log.error("Data inconsistent after " + "set-bytes, got:") + log.error(output) + ERRORS += 1 + except CommandFailedError as e: + log.error("get-bytes after " + "set-bytes ret={ret}". + format(ret=e.exitstatus)) + ERRORS += 1 + + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("set-bytes {fname}". + format(fname=file).split()) + proc = remote.run(args=cmd, check_status=False) + proc.wait() + if proc.exitstatus != 0: + log.info("set-bytes failed for object {obj} " + "in pg {pg} osd.{id} ret={ret}". + format(obj=basename, pg=pg, + id=osdid, ret=proc.exitstatus)) + ERRORS += 1 + + log.info("Test list-attrs get-attr") + for basename in db.keys(): + file = os.path.join(DATADIR, basename) + GETNAME = os.path.join(DATADIR, "get") + SETNAME = os.path.join(DATADIR, "set") + + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg, JSON in db[basename]["pg2json"].items(): + if pg in pgs[osdid]: + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ["list-attrs"] + try: + keys = remote.sh(cmd, wait=True, stderr=BytesIO()).split() + except CommandFailedError as e: + log.error("Bad exit status {ret}". + format(ret=e.exitstatus)) + ERRORS += 1 + continue + values = dict(db[basename]["xattr"]) + + for key in keys: + if (key == "_" or + key == "snapset" or + key == "hinfo_key"): + continue + key = key.strip("_") + if key not in values: + log.error("The key {key} should be present". + format(key=key)) + ERRORS += 1 + continue + exp = values.pop(key) + cmd = ((prefix + "--pgid {pg}"). + format(id=osdid, pg=pg).split()) + cmd.append(run.Raw("'{json}'".format(json=JSON))) + cmd += ("get-attr {key}". + format(key="_" + key).split()) + try: + val = remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("get-attr failed with {ret}". + format(ret=e.exitstatus)) + ERRORS += 1 + continue + if exp != val: + log.error("For key {key} got value {got} " + "instead of {expected}". + format(key=key, got=val, + expected=exp)) + ERRORS += 1 + if "hinfo_key" in keys: + cmd_prefix = prefix.format(id=osdid) + cmd = """ + expected=$({prefix} --pgid {pg} '{json}' get-attr {key} | base64) + echo placeholder | {prefix} --pgid {pg} '{json}' set-attr {key} - + test $({prefix} --pgid {pg} '{json}' get-attr {key}) = placeholder + echo $expected | base64 --decode | \ + {prefix} --pgid {pg} '{json}' set-attr {key} - + test $({prefix} --pgid {pg} '{json}' get-attr {key} | base64) = $expected + """.format(prefix=cmd_prefix, pg=pg, json=JSON, + key="hinfo_key") + log.debug(cmd) + proc = remote.run(args=['bash', '-e', '-x', + '-c', cmd], + check_status=False, + stdout=BytesIO(), + stderr=BytesIO()) + proc.wait() + if proc.exitstatus != 0: + log.error("failed with " + + str(proc.exitstatus)) + log.error(" ".join([ + proc.stdout.getvalue().decode(), + proc.stderr.getvalue().decode(), + ])) + ERRORS += 1 + + if len(values) != 0: + log.error("Not all keys found, remaining keys:") + log.error(values) + + log.info("Test pg info") + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + cmd = ((prefix + "--op info --pgid {pg}"). + format(id=osdid, pg=pg).split()) + try: + info = remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("Failure of --op info command with %s", + e.exitstatus) + ERRORS += 1 + continue + if not str(pg) in info: + log.error("Bad data from info: %s", info) + ERRORS += 1 + + log.info("Test pg logging") + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + cmd = ((prefix + "--op log --pgid {pg}"). + format(id=osdid, pg=pg).split()) + try: + output = remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("Getting log failed for pg {pg} " + "from osd.{id} with {ret}". + format(pg=pg, id=osdid, ret=e.exitstatus)) + ERRORS += 1 + continue + HASOBJ = pg in pgswithobjects + MODOBJ = "modify" in output + if HASOBJ != MODOBJ: + log.error("Bad log for pg {pg} from osd.{id}". + format(pg=pg, id=osdid)) + MSG = (HASOBJ and [""] or ["NOT "])[0] + log.error("Log should {msg}have a modify entry". + format(msg=MSG)) + ERRORS += 1 + + log.info("Test pg export") + EXP_ERRORS = 0 + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + fpath = os.path.join(DATADIR, "osd{id}.{pg}". + format(id=osdid, pg=pg)) + + cmd = ((prefix + "--op export --pgid {pg} --file {file}"). + format(id=osdid, pg=pg, file=fpath)) + try: + remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("Exporting failed for pg {pg} " + "on osd.{id} with {ret}". + format(pg=pg, id=osdid, ret=e.exitstatus)) + EXP_ERRORS += 1 + + ERRORS += EXP_ERRORS + + log.info("Test pg removal") + RM_ERRORS = 0 + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + cmd = ((prefix + "--force --op remove --pgid {pg}"). + format(pg=pg, id=osdid)) + try: + remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("Removing failed for pg {pg} " + "on osd.{id} with {ret}". + format(pg=pg, id=osdid, ret=e.exitstatus)) + RM_ERRORS += 1 + + ERRORS += RM_ERRORS + + IMP_ERRORS = 0 + if EXP_ERRORS == 0 and RM_ERRORS == 0: + log.info("Test pg import") + + for remote in osds.remotes.keys(): + for role in osds.remotes[remote]: + if not role.startswith("osd."): + continue + osdid = int(role.split('.')[1]) + if osdid not in pgs: + continue + + for pg in pgs[osdid]: + fpath = os.path.join(DATADIR, "osd{id}.{pg}". + format(id=osdid, pg=pg)) + + cmd = ((prefix + "--op import --file {file}"). + format(id=osdid, file=fpath)) + try: + remote.sh(cmd, wait=True) + except CommandFailedError as e: + log.error("Import failed from {file} with {ret}". + format(file=fpath, ret=e.exitstatus)) + IMP_ERRORS += 1 + else: + log.warning("SKIPPING IMPORT TESTS DUE TO PREVIOUS FAILURES") + + ERRORS += IMP_ERRORS + + if EXP_ERRORS == 0 and RM_ERRORS == 0 and IMP_ERRORS == 0: + log.info("Restarting OSDs....") + # They are still look to be up because of setting nodown + for osd in manager.get_osd_status()['up']: + manager.revive_osd(osd) + # Wait for health? + time.sleep(5) + # Let scrub after test runs verify consistency of all copies + log.info("Verify replicated import data") + objects = range(1, NUM_OBJECTS + 1) + for i in objects: + NAME = REP_NAME + "{num}".format(num=i) + TESTNAME = os.path.join(DATADIR, "gettest") + REFNAME = os.path.join(DATADIR, NAME) + + proc = rados(ctx, cli_remote, + ['-p', REP_POOL, 'get', NAME, TESTNAME], wait=False) + + ret = proc.wait() + if ret != 0: + log.error("After import, rados get failed with {ret}". + format(ret=proc.exitstatus)) + ERRORS += 1 + continue + + cmd = "diff -q {gettest} {ref}".format(gettest=TESTNAME, + ref=REFNAME) + proc = cli_remote.run(args=cmd, check_status=False) + proc.wait() + if proc.exitstatus != 0: + log.error("Data comparison failed for {obj}".format(obj=NAME)) + ERRORS += 1 + + return ERRORS diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py new file mode 100644 index 000000000..3f8a152d7 --- /dev/null +++ b/qa/tasks/ceph_test_case.py @@ -0,0 +1,224 @@ +from typing import Optional, TYPE_CHECKING +import unittest +import time +import logging + +from teuthology.exceptions import CommandFailedError + +if TYPE_CHECKING: + from tasks.mgr.mgr_test_case import MgrCluster + +log = logging.getLogger(__name__) + +class TestTimeoutError(RuntimeError): + pass + +class CephTestCase(unittest.TestCase): + """ + For test tasks that want to define a structured set of + tests implemented in python. Subclass this with appropriate + helpers for the subsystem you're testing. + """ + + # Environment references + mounts = None + fs = None + recovery_fs = None + backup_fs = None + ceph_cluster = None + mds_cluster = None + mgr_cluster: Optional['MgrCluster'] = None + ctx = None + + mon_manager = None + + # Declarative test requirements: subclasses should override these to indicate + # their special needs. If not met, tests will be skipped. + REQUIRE_MEMSTORE = False + + def setUp(self): + self._mon_configs_set = set() + + self.ceph_cluster.mon_manager.raw_cluster_cmd("log", + "Starting test {0}".format(self.id())) + + if self.REQUIRE_MEMSTORE: + objectstore = self.ceph_cluster.get_config("osd_objectstore", "osd") + if objectstore != "memstore": + # You certainly *could* run this on a real OSD, but you don't want to sit + # here for hours waiting for the test to fill up a 1TB drive! + raise self.skipTest("Require `memstore` OSD backend (test " \ + "would take too long on full sized OSDs") + + def tearDown(self): + self.config_clear() + + self.ceph_cluster.mon_manager.raw_cluster_cmd("log", + "Ended test {0}".format(self.id())) + + def config_clear(self): + for section, key in self._mon_configs_set: + self.config_rm(section, key) + self._mon_configs_set.clear() + + def _fix_key(self, key): + return str(key).replace(' ', '_') + + def config_get(self, section, key): + key = self._fix_key(key) + return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "get", section, key).strip() + + def config_show(self, entity, key): + key = self._fix_key(key) + return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "show", entity, key).strip() + + def config_minimal(self): + return self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "generate-minimal-conf").strip() + + def config_rm(self, section, key): + key = self._fix_key(key) + self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "rm", section, key) + # simplification: skip removing from _mon_configs_set; + # let tearDown clear everything again + + def config_set(self, section, key, value): + key = self._fix_key(key) + self._mon_configs_set.add((section, key)) + self.ceph_cluster.mon_manager.raw_cluster_cmd("config", "set", section, key, str(value)) + + def cluster_cmd(self, command: str): + assert self.ceph_cluster is not None + return self.ceph_cluster.mon_manager.raw_cluster_cmd(*(command.split(" "))) + + + def assert_cluster_log(self, expected_pattern, invert_match=False, + timeout=10, watch_channel=None, present=True): + """ + Context manager. Assert that during execution, or up to 5 seconds later, + the Ceph cluster log emits a message matching the expected pattern. + + :param expected_pattern: A string that you expect to see in the log output + :type expected_pattern: str + :param watch_channel: Specifies the channel to be watched. This can be + 'cluster', 'audit', ... + :type watch_channel: str + :param present: Assert the log entry is present (default: True) or not (False). + :type present: bool + """ + + ceph_manager = self.ceph_cluster.mon_manager + + class ContextManager(object): + def match(self): + found = expected_pattern in self.watcher_process.stdout.getvalue() + if invert_match: + return not found + + return found + + def __enter__(self): + self.watcher_process = ceph_manager.run_ceph_w(watch_channel) + + def __exit__(self, exc_type, exc_val, exc_tb): + fail = False + if not self.watcher_process.finished: + # Check if we got an early match, wait a bit if we didn't + if present and self.match(): + return + elif not present and self.match(): + fail = True + else: + log.debug("No log hits yet, waiting...") + # Default monc tick interval is 10s, so wait that long and + # then some grace + time.sleep(5 + timeout) + + self.watcher_process.stdin.close() + try: + self.watcher_process.wait() + except CommandFailedError: + pass + + if present and not self.match(): + log.error(f"Log output: \n{self.watcher_process.stdout.getvalue()}\n") + raise AssertionError(f"Expected log message found: '{expected_pattern}'") + elif fail or (not present and self.match()): + log.error(f"Log output: \n{self.watcher_process.stdout.getvalue()}\n") + raise AssertionError(f"Unexpected log message found: '{expected_pattern}'") + + return ContextManager() + + def wait_for_health(self, pattern, timeout): + """ + Wait until 'ceph health' contains messages matching the pattern + """ + def seen_health_warning(): + health = self.ceph_cluster.mon_manager.get_mon_health() + codes = [s for s in health['checks']] + summary_strings = [s[1]['summary']['message'] for s in health['checks'].items()] + if len(summary_strings) == 0: + log.debug("Not expected number of summary strings ({0})".format(summary_strings)) + return False + else: + for ss in summary_strings: + if pattern in ss: + return True + if pattern in codes: + return True + + log.debug("Not found expected summary strings yet ({0})".format(summary_strings)) + return False + + log.info(f"waiting {timeout}s for health warning matching {pattern}") + self.wait_until_true(seen_health_warning, timeout) + + def wait_for_health_clear(self, timeout): + """ + Wait until `ceph health` returns no messages + """ + def is_clear(): + health = self.ceph_cluster.mon_manager.get_mon_health() + return len(health['checks']) == 0 + + self.wait_until_true(is_clear, timeout) + + def wait_until_equal(self, get_fn, expect_val, timeout, reject_fn=None, period=5): + elapsed = 0 + while True: + val = get_fn() + if val == expect_val: + return + elif reject_fn and reject_fn(val): + raise RuntimeError("wait_until_equal: forbidden value {0} seen".format(val)) + else: + if elapsed >= timeout: + raise TestTimeoutError("Timed out after {0} seconds waiting for {1} (currently {2})".format( + elapsed, expect_val, val + )) + else: + log.debug("wait_until_equal: {0} != {1}, waiting (timeout={2})...".format(val, expect_val, timeout)) + time.sleep(period) + elapsed += period + + log.debug("wait_until_equal: success") + + @classmethod + def wait_until_true(cls, condition, timeout, check_fn=None, period=5): + elapsed = 0 + retry_count = 0 + while True: + if condition(): + log.debug("wait_until_true: success in {0}s and {1} retries".format(elapsed, retry_count)) + return + else: + if elapsed >= timeout: + if check_fn and check_fn() and retry_count < 5: + elapsed = 0 + retry_count += 1 + log.debug("wait_until_true: making progress, waiting (timeout={0} retry_count={1})...".format(timeout, retry_count)) + else: + raise TestTimeoutError("Timed out after {0}s and {1} retries".format(elapsed, retry_count)) + else: + log.debug("wait_until_true: waiting (timeout={0} retry_count={1})...".format(timeout, retry_count)) + time.sleep(period) + elapsed += period diff --git a/qa/tasks/cephadm.conf b/qa/tasks/cephadm.conf new file mode 100644 index 000000000..9ec08a346 --- /dev/null +++ b/qa/tasks/cephadm.conf @@ -0,0 +1,91 @@ +[global] +# make logging friendly to teuthology +log_to_file = true +log_to_stderr = false +log to journald = false +mon cluster log file level = debug + +mon clock drift allowed = 1.000 + +# replicate across OSDs, not hosts +osd crush chooseleaf type = 0 +#osd pool default size = 2 +osd pool default erasure code profile = "plugin=jerasure technique=reed_sol_van k=2 m=1 crush-failure-domain=osd" + +# enable some debugging +auth debug = true +ms die on old message = true +ms die on bug = true +debug asserts on shutdown = true + +# adjust warnings +mon max pg per osd = 10000 # >= luminous +mon pg warn max object skew = 0 +mon osd allow primary affinity = true +mon osd allow pg remap = true +mon warn on legacy crush tunables = false +mon warn on crush straw calc version zero = false +mon warn on no sortbitwise = false +mon warn on osd down out interval zero = false +mon warn on too few osds = false +mon_warn_on_pool_pg_num_not_power_of_two = false + +# disable pg_autoscaler by default for new pools +osd_pool_default_pg_autoscale_mode = off + +# tests delete pools +mon allow pool delete = true + +[osd] +osd scrub load threshold = 5.0 +osd scrub max interval = 600 +osd mclock profile = high_recovery_ops + +osd recover clone overlap = true +osd recovery max chunk = 1048576 + +osd deep scrub update digest min age = 30 + +osd map max advance = 10 + +osd memory target autotune = true + +# debugging +osd debug shutdown = true +osd debug op order = true +osd debug verify stray on activate = true +osd debug pg log writeout = true +osd debug verify cached snaps = true +osd debug verify missing on start = true +osd debug misdirected ops = true +osd op queue = debug_random +osd op queue cut off = debug_random +osd shutdown pgref assert = true +bdev debug aio = true +osd sloppy crc = true + +[mgr] +mon reweight min pgs per osd = 4 +mon reweight min bytes per osd = 10 +mgr/telemetry/nag = false + +[mon] +mon data avail warn = 5 +mon mgr mkfs grace = 240 +mon reweight min pgs per osd = 4 +mon osd reporter subtree level = osd +mon osd prime pg temp = true +mon reweight min bytes per osd = 10 + +# rotate auth tickets quickly to exercise renewal paths +auth mon ticket ttl = 660 # 11m +auth service ticket ttl = 240 # 4m + +# don't complain about global id reclaim +mon_warn_on_insecure_global_id_reclaim = false +mon_warn_on_insecure_global_id_reclaim_allowed = false + +[client.rgw] +rgw cache enabled = true +rgw enable ops log = true +rgw enable usage log = true diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py new file mode 100644 index 000000000..e9fc25d6a --- /dev/null +++ b/qa/tasks/cephadm.py @@ -0,0 +1,1754 @@ +""" +Ceph cluster task, deployed via cephadm orchestrator +""" +import argparse +import configobj +import contextlib +import logging +import os +import json +import re +import uuid +import yaml + +from copy import deepcopy +from io import BytesIO, StringIO +from tarfile import ReadError +from tasks.ceph_manager import CephManager +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology import packaging +from teuthology.orchestra import run +from teuthology.orchestra.daemon import DaemonGroup +from teuthology.config import config as teuth_config +from textwrap import dedent +from tasks.cephfs.filesystem import MDSCluster, Filesystem +from tasks.util import chacra + +# these items we use from ceph.py should probably eventually move elsewhere +from tasks.ceph import get_mons, healthy +from tasks.vip import subst_vip + +CEPH_ROLE_TYPES = ['mon', 'mgr', 'osd', 'mds', 'rgw', 'prometheus'] + +log = logging.getLogger(__name__) + + +def _shell(ctx, cluster_name, remote, args, extra_cephadm_args=[], **kwargs): + teuthology.get_testdir(ctx) + return remote.run( + args=[ + 'sudo', + ctx.cephadm, + '--image', ctx.ceph[cluster_name].image, + 'shell', + '-c', '/etc/ceph/{}.conf'.format(cluster_name), + '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + '--fsid', ctx.ceph[cluster_name].fsid, + ] + extra_cephadm_args + [ + '--', + ] + args, + **kwargs + ) + + +def build_initial_config(ctx, config): + cluster_name = config['cluster'] + + path = os.path.join(os.path.dirname(__file__), 'cephadm.conf') + conf = configobj.ConfigObj(path, file_error=True) + + conf.setdefault('global', {}) + conf['global']['fsid'] = ctx.ceph[cluster_name].fsid + + # overrides + for section, keys in config.get('conf',{}).items(): + for key, value in keys.items(): + log.info(" override: [%s] %s = %s" % (section, key, value)) + if section not in conf: + conf[section] = {} + conf[section][key] = value + + return conf + + +def distribute_iscsi_gateway_cfg(ctx, conf_data): + """ + Distribute common gateway config to get the IPs. + These will help in iscsi clients with finding trusted_ip_list. + """ + log.info('Distributing iscsi-gateway.cfg...') + for remote, roles in ctx.cluster.remotes.items(): + remote.write_file( + path='/etc/ceph/iscsi-gateway.cfg', + data=conf_data, + sudo=True) + +def update_archive_setting(ctx, key, value): + """ + Add logs directory to job's info log file + """ + if ctx.archive is None: + return + with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file: + info_yaml = yaml.safe_load(info_file) + info_file.seek(0) + if 'archive' in info_yaml: + info_yaml['archive'][key] = value + else: + info_yaml['archive'] = {key: value} + yaml.safe_dump(info_yaml, info_file, default_flow_style=False) + + +@contextlib.contextmanager +def normalize_hostnames(ctx): + """ + Ensure we have short hostnames throughout, for consistency between + remote.shortname and socket.gethostname() in cephadm. + """ + log.info('Normalizing hostnames...') + cluster = ctx.cluster.filter(lambda r: '.' in r.hostname) + cluster.run(args=[ + 'sudo', + 'hostname', + run.Raw('$(hostname -s)'), + ]) + + try: + yield + finally: + pass + + +@contextlib.contextmanager +def download_cephadm(ctx, config, ref): + cluster_name = config['cluster'] + + if config.get('cephadm_mode') != 'cephadm-package': + if ctx.config.get('redhat'): + _fetch_cephadm_from_rpm(ctx) + # TODO: come up with a sensible way to detect if we need an "old, uncompiled" + # cephadm + elif 'cephadm_git_url' in config and 'cephadm_branch' in config: + _fetch_cephadm_from_github(ctx, config, ref) + else: + _fetch_cephadm_from_chachra(ctx, config, cluster_name) + + try: + yield + finally: + _rm_cluster(ctx, cluster_name) + if config.get('cephadm_mode') == 'root': + _rm_cephadm(ctx) + + +def _fetch_cephadm_from_rpm(ctx): + log.info("Copying cephadm installed from an RPM package") + # cephadm already installed from redhat.install task + ctx.cluster.run( + args=[ + 'cp', + run.Raw('$(which cephadm)'), + ctx.cephadm, + run.Raw('&&'), + 'ls', '-l', + ctx.cephadm, + ] + ) + + +def _fetch_cephadm_from_github(ctx, config, ref): + ref = config.get('cephadm_branch', ref) + git_url = config.get('cephadm_git_url', teuth_config.get_ceph_git_url()) + log.info('Downloading cephadm (repo %s ref %s)...' % (git_url, ref)) + if git_url.startswith('https://github.com/'): + # git archive doesn't like https:// URLs, which we use with github. + rest = git_url.split('https://github.com/', 1)[1] + rest = re.sub(r'\.git/?$', '', rest).strip() # no .git suffix + ctx.cluster.run( + args=[ + 'curl', '--silent', + 'https://raw.githubusercontent.com/' + rest + '/' + ref + '/src/cephadm/cephadm', + run.Raw('>'), + ctx.cephadm, + run.Raw('&&'), + 'ls', '-l', + ctx.cephadm, + ], + ) + else: + ctx.cluster.run( + args=[ + 'git', 'clone', git_url, 'testrepo', + run.Raw('&&'), + 'cd', 'testrepo', + run.Raw('&&'), + 'git', 'show', f'{ref}:src/cephadm/cephadm', + run.Raw('>'), + ctx.cephadm, + run.Raw('&&'), + 'ls', '-l', ctx.cephadm, + ], + ) + # sanity-check the resulting file and set executable bit + cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm) + ctx.cluster.run( + args=[ + 'test', '-s', ctx.cephadm, + run.Raw('&&'), + 'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'), + run.Raw('&&'), + 'chmod', '+x', ctx.cephadm, + ], + ) + + +def _fetch_cephadm_from_chachra(ctx, config, cluster_name): + log.info('Downloading "compiled" cephadm from cachra') + bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote + bp = packaging.get_builder_project()( + config.get('project', 'ceph'), + config, + ctx=ctx, + remote=bootstrap_remote, + ) + log.info('builder_project result: %s' % (bp._result.json())) + + flavor = config.get('flavor', 'default') + branch = config.get('branch') + sha1 = config.get('sha1') + + # pull the cephadm binary from chacra + url = chacra.get_binary_url( + 'cephadm', + project=bp.project, + distro=bp.distro.split('/')[0], + release=bp.distro.split('/')[1], + arch=bp.arch, + flavor=flavor, + branch=branch, + sha1=sha1, + ) + log.info("Discovered cachra url: %s", url) + ctx.cluster.run( + args=[ + 'curl', '--silent', '-L', url, + run.Raw('>'), + ctx.cephadm, + run.Raw('&&'), + 'ls', '-l', + ctx.cephadm, + ], + ) + + # sanity-check the resulting file and set executable bit + cephadm_file_size = '$(stat -c%s {})'.format(ctx.cephadm) + ctx.cluster.run( + args=[ + 'test', '-s', ctx.cephadm, + run.Raw('&&'), + 'test', run.Raw(cephadm_file_size), "-gt", run.Raw('1000'), + run.Raw('&&'), + 'chmod', '+x', ctx.cephadm, + ], + ) + + +def _rm_cluster(ctx, cluster_name): + log.info('Removing cluster...') + ctx.cluster.run(args=[ + 'sudo', + ctx.cephadm, + 'rm-cluster', + '--fsid', ctx.ceph[cluster_name].fsid, + '--force', + ]) + + +def _rm_cephadm(ctx): + log.info('Removing cephadm ...') + ctx.cluster.run( + args=[ + 'rm', + '-rf', + ctx.cephadm, + ], + ) + + +@contextlib.contextmanager +def ceph_log(ctx, config): + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + update_archive_setting(ctx, 'log', '/var/log/ceph') + + + try: + yield + + except Exception: + # we need to know this below + ctx.summary['success'] = False + raise + + finally: + log.info('Checking cluster log for badness...') + def first_in_ceph_log(pattern, excludes): + """ + Find the first occurrence of the pattern specified in the Ceph log, + Returns None if none found. + + :param pattern: Pattern scanned for. + :param excludes: Patterns to ignore. + :return: First line of text (or None if not found) + """ + args = [ + 'sudo', + 'egrep', pattern, + '/var/log/ceph/{fsid}/ceph.log'.format( + fsid=fsid), + ] + if excludes: + for exclude in excludes: + args.extend([run.Raw('|'), 'egrep', '-v', exclude]) + args.extend([ + run.Raw('|'), 'head', '-n', '1', + ]) + r = ctx.ceph[cluster_name].bootstrap_remote.run( + stdout=StringIO(), + args=args, + ) + stdout = r.stdout.getvalue() + if stdout != '': + return stdout + return None + + if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', + config.get('log-ignorelist')) is not None: + log.warning('Found errors (ERR|WRN|SEC) in cluster log') + ctx.summary['success'] = False + # use the most severe problem as the failure reason + if 'failure_reason' not in ctx.summary: + for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: + match = first_in_ceph_log(pattern, config['log-ignorelist']) + if match is not None: + ctx.summary['failure_reason'] = \ + '"{match}" in cluster log'.format( + match=match.rstrip('\n'), + ) + break + + if ctx.archive is not None and \ + not (ctx.config.get('archive-on-error') and ctx.summary['success']): + # and logs + log.info('Compressing logs...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'find', + '/var/log/ceph', # all logs, not just for the cluster + '/var/log/rbd-target-api', # ceph-iscsi + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) + + log.info('Archiving logs...') + path = os.path.join(ctx.archive, 'remote') + try: + os.makedirs(path) + except OSError: + pass + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.shortname) + try: + os.makedirs(sub) + except OSError: + pass + try: + teuthology.pull_directory(remote, '/var/log/ceph', # everything + os.path.join(sub, 'log')) + except ReadError: + pass + + +@contextlib.contextmanager +def ceph_crash(ctx, config): + """ + Gather crash dumps from /var/lib/ceph/$fsid/crash + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + update_archive_setting(ctx, 'crash', '/var/lib/ceph/crash') + + try: + yield + + finally: + if ctx.archive is not None: + log.info('Archiving crash dumps...') + path = os.path.join(ctx.archive, 'remote') + try: + os.makedirs(path) + except OSError: + pass + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.shortname) + try: + os.makedirs(sub) + except OSError: + pass + try: + teuthology.pull_directory(remote, + '/var/lib/ceph/%s/crash' % fsid, + os.path.join(sub, 'crash')) + except ReadError: + pass + + +@contextlib.contextmanager +def pull_image(ctx, config): + cluster_name = config['cluster'] + log.info(f'Pulling image {ctx.ceph[cluster_name].image} on all hosts...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + ctx.cephadm, + '--image', ctx.ceph[cluster_name].image, + 'pull', + ], + wait=False, + ) + ) + + try: + yield + finally: + pass + +@contextlib.contextmanager +def setup_ca_signed_keys(ctx, config): + # generate our ca key + cluster_name = config['cluster'] + bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote + bootstrap_remote.run(args=[ + 'sudo', 'ssh-keygen', '-t', 'rsa', '-f', '/root/ca-key', '-N', '' + ]) + + # not using read_file here because it runs dd as a non-root + # user and would hit permission issues + r = bootstrap_remote.run(args=[ + 'sudo', 'cat', '/root/ca-key.pub' + ], stdout=StringIO()) + ca_key_pub_contents = r.stdout.getvalue() + + # make CA key accepted on each host + for remote in ctx.cluster.remotes.keys(): + # write key to each host's /etc/ssh dir + remote.run(args=[ + 'sudo', 'echo', ca_key_pub_contents, + run.Raw('|'), + 'sudo', 'tee', '-a', '/etc/ssh/ca-key.pub', + ]) + # make sshd accept the CA signed key + remote.run(args=[ + 'sudo', 'echo', 'TrustedUserCAKeys /etc/ssh/ca-key.pub', + run.Raw('|'), + 'sudo', 'tee', '-a', '/etc/ssh/sshd_config', + run.Raw('&&'), + 'sudo', 'systemctl', 'restart', 'sshd', + ]) + + # generate a new key pair and sign the pub key to make a cert + bootstrap_remote.run(args=[ + 'sudo', 'ssh-keygen', '-t', 'rsa', '-f', '/root/cephadm-ssh-key', '-N', '', + run.Raw('&&'), + 'sudo', 'ssh-keygen', '-s', '/root/ca-key', '-I', 'user_root', '-n', 'root', '-V', '+52w', '/root/cephadm-ssh-key', + ]) + + # for debugging, to make sure this setup has worked as intended + for remote in ctx.cluster.remotes.keys(): + remote.run(args=[ + 'sudo', 'cat', '/etc/ssh/ca-key.pub' + ]) + remote.run(args=[ + 'sudo', 'cat', '/etc/ssh/sshd_config', + run.Raw('|'), + 'grep', 'TrustedUserCAKeys' + ]) + bootstrap_remote.run(args=[ + 'sudo', 'ls', '/root/' + ]) + + ctx.ca_signed_key_info = {} + ctx.ca_signed_key_info['ca-key'] = '/root/ca-key' + ctx.ca_signed_key_info['ca-key-pub'] = '/root/ca-key.pub' + ctx.ca_signed_key_info['private-key'] = '/root/cephadm-ssh-key' + ctx.ca_signed_key_info['ca-signed-cert'] = '/root/cephadm-ssh-key-cert.pub' + + try: + yield + finally: + pass + +@contextlib.contextmanager +def ceph_bootstrap(ctx, config): + """ + Bootstrap ceph cluster. + + :param ctx: the argparse.Namespace object + :param config: the config dict + """ + cluster_name = config['cluster'] + testdir = teuthology.get_testdir(ctx) + fsid = ctx.ceph[cluster_name].fsid + + bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote + first_mon = ctx.ceph[cluster_name].first_mon + first_mon_role = ctx.ceph[cluster_name].first_mon_role + mons = ctx.ceph[cluster_name].mons + + ctx.cluster.run(args=[ + 'sudo', 'mkdir', '-p', '/etc/ceph', + ]); + ctx.cluster.run(args=[ + 'sudo', 'chmod', '777', '/etc/ceph', + ]); + try: + # write seed config + log.info('Writing seed config...') + conf_fp = BytesIO() + seed_config = build_initial_config(ctx, config) + seed_config.write(conf_fp) + bootstrap_remote.write_file( + path='{}/seed.{}.conf'.format(testdir, cluster_name), + data=conf_fp.getvalue()) + log.debug('Final config:\n' + conf_fp.getvalue().decode()) + ctx.ceph[cluster_name].conf = seed_config + + # register initial daemons + ctx.daemons.register_daemon( + bootstrap_remote, 'mon', first_mon, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild('mon.' + first_mon), + wait=False, + started=True, + ) + if not ctx.ceph[cluster_name].roleless: + first_mgr = ctx.ceph[cluster_name].first_mgr + ctx.daemons.register_daemon( + bootstrap_remote, 'mgr', first_mgr, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild('mgr.' + first_mgr), + wait=False, + started=True, + ) + + # bootstrap + log.info('Bootstrapping...') + cmd = [ + 'sudo', + ctx.cephadm, + '--image', ctx.ceph[cluster_name].image, + '-v', + 'bootstrap', + '--fsid', fsid, + '--config', '{}/seed.{}.conf'.format(testdir, cluster_name), + '--output-config', '/etc/ceph/{}.conf'.format(cluster_name), + '--output-keyring', + '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + ] + + if not config.get("use-ca-signed-key", False): + cmd += ['--output-pub-ssh-key', '{}/{}.pub'.format(testdir, cluster_name)] + else: + # ctx.ca_signed_key_info should have been set up in + # setup_ca_signed_keys function which we expect to have + # run before bootstrap if use-ca-signed-key is true + signed_key_info = ctx.ca_signed_key_info + cmd += [ + "--ssh-private-key", signed_key_info['private-key'], + "--ssh-signed-cert", signed_key_info['ca-signed-cert'], + ] + + if config.get("no_cgroups_split") is True: + cmd.insert(cmd.index("bootstrap"), "--no-cgroups-split") + + if config.get('registry-login'): + registry = config['registry-login'] + cmd += [ + "--registry-url", registry['url'], + "--registry-username", registry['username'], + "--registry-password", registry['password'], + ] + + if not ctx.ceph[cluster_name].roleless: + cmd += [ + '--mon-id', first_mon, + '--mgr-id', first_mgr, + '--orphan-initial-daemons', # we will do it explicitly! + '--skip-monitoring-stack', # we'll provision these explicitly + ] + + if mons[first_mon_role].startswith('['): + cmd += ['--mon-addrv', mons[first_mon_role]] + else: + cmd += ['--mon-ip', mons[first_mon_role]] + if config.get('skip_dashboard'): + cmd += ['--skip-dashboard'] + if config.get('skip_monitoring_stack'): + cmd += ['--skip-monitoring-stack'] + if config.get('single_host_defaults'): + cmd += ['--single-host-defaults'] + if not config.get('avoid_pacific_features', False): + cmd += ['--skip-admin-label'] + # bootstrap makes the keyring root 0600, so +r it for our purposes + cmd += [ + run.Raw('&&'), + 'sudo', 'chmod', '+r', + '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + ] + bootstrap_remote.run(args=cmd) + + # fetch keys and configs + log.info('Fetching config...') + ctx.ceph[cluster_name].config_file = \ + bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.conf') + log.info('Fetching client.admin keyring...') + ctx.ceph[cluster_name].admin_keyring = \ + bootstrap_remote.read_file(f'/etc/ceph/{cluster_name}.client.admin.keyring') + log.info('Fetching mon keyring...') + ctx.ceph[cluster_name].mon_keyring = \ + bootstrap_remote.read_file(f'/var/lib/ceph/{fsid}/mon.{first_mon}/keyring', sudo=True) + + if not config.get("use-ca-signed-key", False): + # fetch ssh key, distribute to additional nodes + log.info('Fetching pub ssh key...') + ssh_pub_key = bootstrap_remote.read_file( + f'{testdir}/{cluster_name}.pub').decode('ascii').strip() + + log.info('Installing pub ssh key for root users...') + ctx.cluster.run(args=[ + 'sudo', 'install', '-d', '-m', '0700', '/root/.ssh', + run.Raw('&&'), + 'echo', ssh_pub_key, + run.Raw('|'), + 'sudo', 'tee', '-a', '/root/.ssh/authorized_keys', + run.Raw('&&'), + 'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys', + ]) + + # set options + if config.get('allow_ptrace', True): + _shell(ctx, cluster_name, bootstrap_remote, + ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true']) + + if not config.get('avoid_pacific_features', False): + log.info('Distributing conf and client.admin keyring to all hosts + 0755') + _shell(ctx, cluster_name, bootstrap_remote, + ['ceph', 'orch', 'client-keyring', 'set', 'client.admin', + '*', '--mode', '0755'], + check_status=False) + + # add other hosts + for remote in ctx.cluster.remotes.keys(): + if remote == bootstrap_remote: + continue + + # note: this may be redundant (see above), but it avoids + # us having to wait for cephadm to do it. + log.info('Writing (initial) conf and keyring to %s' % remote.shortname) + remote.write_file( + path='/etc/ceph/{}.conf'.format(cluster_name), + data=ctx.ceph[cluster_name].config_file) + remote.write_file( + path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + data=ctx.ceph[cluster_name].admin_keyring) + + log.info('Adding host %s to orchestrator...' % remote.shortname) + _shell(ctx, cluster_name, bootstrap_remote, [ + 'ceph', 'orch', 'host', 'add', + remote.shortname + ]) + r = _shell(ctx, cluster_name, bootstrap_remote, + ['ceph', 'orch', 'host', 'ls', '--format=json'], + stdout=StringIO()) + hosts = [node['hostname'] for node in json.loads(r.stdout.getvalue())] + assert remote.shortname in hosts + + yield + + finally: + log.info('Cleaning up testdir ceph.* files...') + ctx.cluster.run(args=[ + 'rm', '-f', + '{}/seed.{}.conf'.format(testdir, cluster_name), + '{}/{}.pub'.format(testdir, cluster_name), + ]) + + log.info('Stopping all daemons...') + + # this doesn't block until they are all stopped... + #ctx.cluster.run(args=['sudo', 'systemctl', 'stop', 'ceph.target']) + + # stop the daemons we know + for role in ctx.daemons.resolve_role_list(None, CEPH_ROLE_TYPES, True): + cluster, type_, id_ = teuthology.split_role(role) + try: + ctx.daemons.get_daemon(type_, id_, cluster).stop() + except Exception: + log.exception(f'Failed to stop "{role}"') + raise + + # tear down anything left (but leave the logs behind) + ctx.cluster.run( + args=[ + 'sudo', + ctx.cephadm, + 'rm-cluster', + '--fsid', fsid, + '--force', + '--keep-logs', + ], + check_status=False, # may fail if upgrading from old cephadm + ) + + # clean up /etc/ceph + ctx.cluster.run(args=[ + 'sudo', 'rm', '-f', + '/etc/ceph/{}.conf'.format(cluster_name), + '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + ]) + + +@contextlib.contextmanager +def ceph_mons(ctx, config): + """ + Deploy any additional mons + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + try: + daemons = {} + if config.get('add_mons_via_daemon_add'): + # This is the old way of adding mons that works with the (early) octopus + # cephadm scheduler. + num_mons = 1 + for remote, roles in ctx.cluster.remotes.items(): + for mon in [r for r in roles + if teuthology.is_type('mon', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(mon) + if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: + continue + log.info('Adding %s on %s' % (mon, remote.shortname)) + num_mons += 1 + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'daemon', 'add', 'mon', + remote.shortname + ':' + ctx.ceph[cluster_name].mons[mon] + '=' + id_, + ]) + ctx.daemons.register_daemon( + remote, 'mon', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(mon), + wait=False, + started=True, + ) + daemons[mon] = (remote, id_) + + with contextutil.safe_while(sleep=1, tries=180) as proceed: + while proceed(): + log.info('Waiting for %d mons in monmap...' % (num_mons)) + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=remote, + args=[ + 'ceph', 'mon', 'dump', '-f', 'json', + ], + stdout=StringIO(), + ) + j = json.loads(r.stdout.getvalue()) + if len(j['mons']) == num_mons: + break + else: + nodes = [] + for remote, roles in ctx.cluster.remotes.items(): + for mon in [r for r in roles + if teuthology.is_type('mon', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(mon) + log.info('Adding %s on %s' % (mon, remote.shortname)) + nodes.append(remote.shortname + + ':' + ctx.ceph[cluster_name].mons[mon] + + '=' + id_) + if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mon: + continue + daemons[mon] = (remote, id_) + + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'mon', + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for mgr, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, 'mon', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(mon), + wait=False, + started=True, + ) + + with contextutil.safe_while(sleep=1, tries=180) as proceed: + while proceed(): + log.info('Waiting for %d mons in monmap...' % (len(nodes))) + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=remote, + args=[ + 'ceph', 'mon', 'dump', '-f', 'json', + ], + stdout=StringIO(), + ) + j = json.loads(r.stdout.getvalue()) + if len(j['mons']) == len(nodes): + break + + # refresh our (final) ceph.conf file + bootstrap_remote = ctx.ceph[cluster_name].bootstrap_remote + log.info('Generating final ceph.conf file...') + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=bootstrap_remote, + args=[ + 'ceph', 'config', 'generate-minimal-conf', + ], + stdout=StringIO(), + ) + ctx.ceph[cluster_name].config_file = r.stdout.getvalue() + + yield + + finally: + pass + + +@contextlib.contextmanager +def ceph_mgrs(ctx, config): + """ + Deploy any additional mgrs + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + try: + nodes = [] + daemons = {} + for remote, roles in ctx.cluster.remotes.items(): + for mgr in [r for r in roles + if teuthology.is_type('mgr', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(mgr) + log.info('Adding %s on %s' % (mgr, remote.shortname)) + nodes.append(remote.shortname + '=' + id_) + if c_ == cluster_name and id_ == ctx.ceph[cluster_name].first_mgr: + continue + daemons[mgr] = (remote, id_) + if nodes: + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'mgr', + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for mgr, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, 'mgr', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(mgr), + wait=False, + started=True, + ) + + yield + + finally: + pass + + +@contextlib.contextmanager +def ceph_osds(ctx, config): + """ + Deploy OSDs + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + try: + log.info('Deploying OSDs...') + + # provision OSDs in numeric order + id_to_remote = {} + devs_by_remote = {} + for remote, roles in ctx.cluster.remotes.items(): + devs_by_remote[remote] = teuthology.get_scratch_devices(remote) + for osd in [r for r in roles + if teuthology.is_type('osd', cluster_name)(r)]: + _, _, id_ = teuthology.split_role(osd) + id_to_remote[int(id_)] = (osd, remote) + + cur = 0 + for osd_id in sorted(id_to_remote.keys()): + osd, remote = id_to_remote[osd_id] + _, _, id_ = teuthology.split_role(osd) + assert int(id_) == cur + devs = devs_by_remote[remote] + assert devs ## FIXME ## + dev = devs.pop() + if all(_ in dev for _ in ('lv', 'vg')): + short_dev = dev.replace('/dev/', '') + else: + short_dev = dev + log.info('Deploying %s on %s with %s...' % ( + osd, remote.shortname, dev)) + _shell(ctx, cluster_name, remote, [ + 'ceph-volume', 'lvm', 'zap', dev]) + add_osd_args = ['ceph', 'orch', 'daemon', 'add', 'osd', + remote.shortname + ':' + short_dev] + osd_method = config.get('osd_method') + if osd_method: + add_osd_args.append(osd_method) + _shell(ctx, cluster_name, remote, add_osd_args) + ctx.daemons.register_daemon( + remote, 'osd', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(osd), + wait=False, + started=True, + ) + cur += 1 + + if cur == 0: + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'osd', '--all-available-devices', + ]) + # expect the number of scratch devs + num_osds = sum(map(len, devs_by_remote.values())) + assert num_osds + else: + # expect the number of OSDs we created + num_osds = cur + + log.info(f'Waiting for {num_osds} OSDs to come up...') + with contextutil.safe_while(sleep=1, tries=120) as proceed: + while proceed(): + p = _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, + ['ceph', 'osd', 'stat', '-f', 'json'], stdout=StringIO()) + j = json.loads(p.stdout.getvalue()) + if int(j.get('num_up_osds', 0)) == num_osds: + break; + + if not hasattr(ctx, 'managers'): + ctx.managers = {} + ctx.managers[cluster_name] = CephManager( + ctx.ceph[cluster_name].bootstrap_remote, + ctx=ctx, + logger=log.getChild('ceph_manager.' + cluster_name), + cluster=cluster_name, + cephadm=True, + ) + + yield + finally: + pass + + +@contextlib.contextmanager +def ceph_mdss(ctx, config): + """ + Deploy MDSss + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + nodes = [] + daemons = {} + for remote, roles in ctx.cluster.remotes.items(): + for role in [r for r in roles + if teuthology.is_type('mds', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(role) + log.info('Adding %s on %s' % (role, remote.shortname)) + nodes.append(remote.shortname + '=' + id_) + daemons[role] = (remote, id_) + if nodes: + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'mds', + 'all', + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for role, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, 'mds', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(role), + wait=False, + started=True, + ) + + yield + +@contextlib.contextmanager +def cephfs_setup(ctx, config): + mdss = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) + + # If there are any MDSs, then create a filesystem for them to use + # Do this last because requires mon cluster to be up and running + if len(mdss) > 0: + log.info('Setting up CephFS filesystem(s)...') + cephfs_config = config.get('cephfs', {}) + fs_configs = cephfs_config.pop('fs', [{'name': 'cephfs'}]) + set_allow_multifs = len(fs_configs) > 1 + + # wait for standbys to become available (slow due to valgrind, perhaps) + mdsc = MDSCluster(ctx) + with contextutil.safe_while(sleep=2,tries=150) as proceed: + while proceed(): + if len(mdsc.get_standby_daemons()) >= len(mdss): + break + + fss = [] + for fs_config in fs_configs: + assert isinstance(fs_config, dict) + name = fs_config.pop('name') + temp = deepcopy(cephfs_config) + teuthology.deep_merge(temp, fs_config) + subvols = config.get('subvols', None) + if subvols: + teuthology.deep_merge(temp, {'subvols': subvols}) + fs = Filesystem(ctx, fs_config=temp, name=name, create=True) + if set_allow_multifs: + fs.set_allow_multifs() + set_allow_multifs = False + fss.append(fs) + + yield + + for fs in fss: + fs.destroy() + else: + yield + +@contextlib.contextmanager +def ceph_monitoring(daemon_type, ctx, config): + """ + Deploy prometheus, node-exporter, etc. + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + nodes = [] + daemons = {} + for remote, roles in ctx.cluster.remotes.items(): + for role in [r for r in roles + if teuthology.is_type(daemon_type, cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(role) + log.info('Adding %s on %s' % (role, remote.shortname)) + nodes.append(remote.shortname + '=' + id_) + daemons[role] = (remote, id_) + if nodes: + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', daemon_type, + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for role, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, daemon_type, id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(role), + wait=False, + started=True, + ) + + yield + + +@contextlib.contextmanager +def ceph_rgw(ctx, config): + """ + Deploy rgw + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + nodes = {} + daemons = {} + for remote, roles in ctx.cluster.remotes.items(): + for role in [r for r in roles + if teuthology.is_type('rgw', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(role) + log.info('Adding %s on %s' % (role, remote.shortname)) + svc = '.'.join(id_.split('.')[0:2]) + if svc not in nodes: + nodes[svc] = [] + nodes[svc].append(remote.shortname + '=' + id_) + daemons[role] = (remote, id_) + + for svc, nodes in nodes.items(): + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'rgw', svc, + '--placement', + str(len(nodes)) + ';' + ';'.join(nodes)] + ) + for role, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, 'rgw', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(role), + wait=False, + started=True, + ) + + yield + + +@contextlib.contextmanager +def ceph_iscsi(ctx, config): + """ + Deploy iSCSIs + """ + cluster_name = config['cluster'] + fsid = ctx.ceph[cluster_name].fsid + + nodes = [] + daemons = {} + ips = [] + + for remote, roles in ctx.cluster.remotes.items(): + for role in [r for r in roles + if teuthology.is_type('iscsi', cluster_name)(r)]: + c_, _, id_ = teuthology.split_role(role) + log.info('Adding %s on %s' % (role, remote.shortname)) + nodes.append(remote.shortname + '=' + id_) + daemons[role] = (remote, id_) + ips.append(remote.ip_address) + trusted_ip_list = ','.join(ips) + if nodes: + poolname = 'datapool' + # ceph osd pool create datapool 3 3 replicated + _shell(ctx, cluster_name, remote, [ + 'ceph', 'osd', 'pool', 'create', + poolname, '3', '3', 'replicated'] + ) + + _shell(ctx, cluster_name, remote, [ + 'rbd', 'pool', 'init', poolname] + ) + + # ceph orch apply iscsi datapool (admin)user (admin)password + _shell(ctx, cluster_name, remote, [ + 'ceph', 'orch', 'apply', 'iscsi', + poolname, 'admin', 'admin', + '--trusted_ip_list', trusted_ip_list, + '--placement', str(len(nodes)) + ';' + ';'.join(nodes)] + ) + + # used by iscsi client to identify valid gateway ip's + conf_data = dedent(f""" + [config] + trusted_ip_list = {trusted_ip_list} + """) + distribute_iscsi_gateway_cfg(ctx, conf_data) + + for role, i in daemons.items(): + remote, id_ = i + ctx.daemons.register_daemon( + remote, 'iscsi', id_, + cluster=cluster_name, + fsid=fsid, + logger=log.getChild(role), + wait=False, + started=True, + ) + + yield + + +@contextlib.contextmanager +def ceph_clients(ctx, config): + cluster_name = config['cluster'] + + log.info('Setting up client nodes...') + clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) + for remote, roles_for_host in clients.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', + cluster_name): + name = teuthology.ceph_role(role) + client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, + name) + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=remote, + args=[ + 'ceph', 'auth', + 'get-or-create', name, + 'mon', 'allow *', + 'osd', 'allow *', + 'mds', 'allow *', + 'mgr', 'allow *', + ], + stdout=StringIO(), + ) + keyring = r.stdout.getvalue() + remote.sudo_write_file(client_keyring, keyring, mode='0644') + yield + + +@contextlib.contextmanager +def ceph_initial(): + try: + yield + finally: + log.info('Teardown complete') + + +## public methods +@contextlib.contextmanager +def stop(ctx, config): + """ + Stop ceph daemons + + For example:: + tasks: + - ceph.stop: [mds.*] + + tasks: + - ceph.stop: [osd.0, osd.2] + + tasks: + - ceph.stop: + daemons: [osd.0, osd.2] + + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list( + config.get('daemons', None), CEPH_ROLE_TYPES, True) + clusters = set() + + for role in daemons: + cluster, type_, id_ = teuthology.split_role(role) + ctx.daemons.get_daemon(type_, id_, cluster).stop() + clusters.add(cluster) + +# for cluster in clusters: +# ctx.ceph[cluster].watchdog.stop() +# ctx.ceph[cluster].watchdog.join() + + yield + + +def shell(ctx, config): + """ + Execute (shell) commands + """ + cluster_name = config.get('cluster', 'ceph') + + args = [] + for k in config.pop('env', []): + args.extend(['-e', k + '=' + ctx.config.get(k, '')]) + for k in config.pop('volumes', []): + args.extend(['-v', k]) + + if 'all-roles' in config and len(config) == 1: + a = config['all-roles'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles if not id_.startswith('host.')) + elif 'all-hosts' in config and len(config) == 1: + a = config['all-hosts'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles if id_.startswith('host.')) + + for role, cmd in config.items(): + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Running commands on role %s host %s', role, remote.name) + if isinstance(cmd, list): + for c in cmd: + _shell(ctx, cluster_name, remote, + ['bash', '-c', subst_vip(ctx, c)], + extra_cephadm_args=args) + else: + assert isinstance(cmd, str) + _shell(ctx, cluster_name, remote, + ['bash', '-ex', '-c', subst_vip(ctx, cmd)], + extra_cephadm_args=args) + + +def apply(ctx, config): + """ + Apply spec + + tasks: + - cephadm.apply: + specs: + - service_type: rgw + service_id: foo + spec: + rgw_frontend_port: 8000 + - service_type: rgw + service_id: bar + spec: + rgw_frontend_port: 9000 + zone: bar + realm: asdf + + """ + cluster_name = config.get('cluster', 'ceph') + + specs = config.get('specs', []) + y = subst_vip(ctx, yaml.dump_all(specs)) + + log.info(f'Applying spec(s):\n{y}') + _shell( + ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, + ['ceph', 'orch', 'apply', '-i', '-'], + stdin=y, + ) + + +def wait_for_service(ctx, config): + """ + Wait for a service to be fully started + + tasks: + - cephadm.wait_for_service: + service: rgw.foo + timeout: 60 # defaults to 300 + + """ + cluster_name = config.get('cluster', 'ceph') + timeout = config.get('timeout', 300) + service = config.get('service') + assert service + + log.info( + f'Waiting for {cluster_name} service {service} to start (timeout {timeout})...' + ) + with contextutil.safe_while(sleep=1, tries=timeout) as proceed: + while proceed(): + r = _shell( + ctx=ctx, + cluster_name=cluster_name, + remote=ctx.ceph[cluster_name].bootstrap_remote, + args=[ + 'ceph', 'orch', 'ls', '-f', 'json', + ], + stdout=StringIO(), + ) + j = json.loads(r.stdout.getvalue()) + svc = None + for s in j: + if s['service_name'] == service: + svc = s + break + if svc: + log.info( + f"{service} has {s['status']['running']}/{s['status']['size']}" + ) + if s['status']['running'] == s['status']['size']: + break + + +@contextlib.contextmanager +def tweaked_option(ctx, config): + """ + set an option, and then restore it with its original value + + Note, due to the way how tasks are executed/nested, it's not suggested to + use this method as a standalone task. otherwise, it's likely that it will + restore the tweaked option at the /end/ of 'tasks' block. + """ + saved_options = {} + # we can complicate this when necessary + options = ['mon-health-to-clog'] + type_, id_ = 'mon', '*' + cluster = config.get('cluster', 'ceph') + manager = ctx.managers[cluster] + if id_ == '*': + get_from = next(teuthology.all_roles_of_type(ctx.cluster, type_)) + else: + get_from = id_ + for option in options: + if option not in config: + continue + value = 'true' if config[option] else 'false' + option = option.replace('-', '_') + old_value = manager.get_config(type_, get_from, option) + if value != old_value: + saved_options[option] = old_value + manager.inject_args(type_, id_, option, value) + yield + for option, value in saved_options.items(): + manager.inject_args(type_, id_, option, value) + + +@contextlib.contextmanager +def restart(ctx, config): + """ + restart ceph daemons + + For example:: + tasks: + - ceph.restart: [all] + + For example:: + tasks: + - ceph.restart: [osd.0, mon.1, mds.*] + + or:: + + tasks: + - ceph.restart: + daemons: [osd.0, mon.1] + wait-for-healthy: false + wait-for-osds-up: true + + :param ctx: Context + :param config: Configuration + """ + if config is None: + config = {} + elif isinstance(config, list): + config = {'daemons': config} + + daemons = ctx.daemons.resolve_role_list( + config.get('daemons', None), CEPH_ROLE_TYPES, True) + clusters = set() + + log.info('daemons %s' % daemons) + with tweaked_option(ctx, config): + for role in daemons: + cluster, type_, id_ = teuthology.split_role(role) + d = ctx.daemons.get_daemon(type_, id_, cluster) + assert d, 'daemon %s does not exist' % role + d.stop() + if type_ == 'osd': + ctx.managers[cluster].mark_down_osd(id_) + d.restart() + clusters.add(cluster) + + if config.get('wait-for-healthy', True): + for cluster in clusters: + healthy(ctx=ctx, config=dict(cluster=cluster)) + if config.get('wait-for-osds-up', False): + for cluster in clusters: + ctx.managers[cluster].wait_for_all_osds_up() + yield + + +@contextlib.contextmanager +def distribute_config_and_admin_keyring(ctx, config): + """ + Distribute a sufficient config and keyring for clients + """ + cluster_name = config['cluster'] + log.info('Distributing (final) config and client.admin keyring...') + for remote, roles in ctx.cluster.remotes.items(): + remote.write_file( + '/etc/ceph/{}.conf'.format(cluster_name), + ctx.ceph[cluster_name].config_file, + sudo=True) + remote.write_file( + path='/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + data=ctx.ceph[cluster_name].admin_keyring, + sudo=True) + try: + yield + finally: + ctx.cluster.run(args=[ + 'sudo', 'rm', '-f', + '/etc/ceph/{}.conf'.format(cluster_name), + '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + ]) + + +@contextlib.contextmanager +def crush_setup(ctx, config): + cluster_name = config['cluster'] + + profile = config.get('crush_tunables', 'default') + log.info('Setting crush tunables to %s', profile) + _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, + args=['ceph', 'osd', 'crush', 'tunables', profile]) + yield + + +@contextlib.contextmanager +def create_rbd_pool(ctx, config): + if config.get('create_rbd_pool', False): + cluster_name = config['cluster'] + log.info('Waiting for OSDs to come up') + teuthology.wait_until_osds_up( + ctx, + cluster=ctx.cluster, + remote=ctx.ceph[cluster_name].bootstrap_remote, + ceph_cluster=cluster_name, + ) + log.info('Creating RBD pool') + _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, + args=['sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'pool', 'create', 'rbd', '8']) + _shell(ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, + args=['sudo', 'ceph', '--cluster', cluster_name, + 'osd', 'pool', 'application', 'enable', + 'rbd', 'rbd', '--yes-i-really-mean-it' + ]) + yield + + +@contextlib.contextmanager +def _bypass(): + yield + + +@contextlib.contextmanager +def initialize_config(ctx, config): + cluster_name = config['cluster'] + testdir = teuthology.get_testdir(ctx) + + ctx.ceph[cluster_name].thrashers = [] + # fixme: setup watchdog, ala ceph.py + + ctx.ceph[cluster_name].roleless = False # see below + + first_ceph_cluster = False + if not hasattr(ctx, 'daemons'): + first_ceph_cluster = True + + # cephadm mode? + if 'cephadm_mode' not in config: + config['cephadm_mode'] = 'root' + assert config['cephadm_mode'] in ['root', 'cephadm-package'] + if config['cephadm_mode'] == 'root': + ctx.cephadm = testdir + '/cephadm' + else: + ctx.cephadm = 'cephadm' # in the path + + if first_ceph_cluster: + # FIXME: this is global for all clusters + ctx.daemons = DaemonGroup( + use_cephadm=ctx.cephadm) + + # uuid + fsid = str(uuid.uuid1()) + log.info('Cluster fsid is %s' % fsid) + ctx.ceph[cluster_name].fsid = fsid + + # mon ips + log.info('Choosing monitor IPs and ports...') + remotes_and_roles = ctx.cluster.remotes.items() + ips = [host for (host, port) in + (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)] + + if config.get('roleless', False): + # mons will be named after hosts + first_mon = None + max_mons = config.get('max_mons', 5) + for remote, _ in remotes_and_roles: + ctx.cluster.remotes[remote].append('mon.' + remote.shortname) + if not first_mon: + first_mon = remote.shortname + bootstrap_remote = remote + max_mons -= 1 + if not max_mons: + break + log.info('No mon roles; fabricating mons') + + roles = [role_list for (remote, role_list) in ctx.cluster.remotes.items()] + + ctx.ceph[cluster_name].mons = get_mons( + roles, ips, cluster_name, + mon_bind_msgr2=config.get('mon_bind_msgr2', True), + mon_bind_addrvec=config.get('mon_bind_addrvec', True), + ) + log.info('Monitor IPs: %s' % ctx.ceph[cluster_name].mons) + + if config.get('roleless', False): + ctx.ceph[cluster_name].roleless = True + ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote + ctx.ceph[cluster_name].first_mon = first_mon + ctx.ceph[cluster_name].first_mon_role = 'mon.' + first_mon + else: + first_mon_role = sorted(ctx.ceph[cluster_name].mons.keys())[0] + _, _, first_mon = teuthology.split_role(first_mon_role) + (bootstrap_remote,) = ctx.cluster.only(first_mon_role).remotes.keys() + log.info('First mon is mon.%s on %s' % (first_mon, + bootstrap_remote.shortname)) + ctx.ceph[cluster_name].bootstrap_remote = bootstrap_remote + ctx.ceph[cluster_name].first_mon = first_mon + ctx.ceph[cluster_name].first_mon_role = first_mon_role + + others = ctx.cluster.remotes[bootstrap_remote] + mgrs = sorted([r for r in others + if teuthology.is_type('mgr', cluster_name)(r)]) + if not mgrs: + raise RuntimeError('no mgrs on the same host as first mon %s' % first_mon) + _, _, first_mgr = teuthology.split_role(mgrs[0]) + log.info('First mgr is %s' % (first_mgr)) + ctx.ceph[cluster_name].first_mgr = first_mgr + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy ceph cluster using cephadm + + For example, teuthology.yaml can contain the 'defaults' section: + + defaults: + cephadm: + containers: + image: 'quay.io/ceph-ci/ceph' + + Using overrides makes it possible to customize it per run. + The equivalent 'overrides' section looks like: + + overrides: + cephadm: + containers: + image: 'quay.io/ceph-ci/ceph' + registry-login: + url: registry-url + username: registry-user + password: registry-password + + :param ctx: the argparse.Namespace object + :param config: the config dict + """ + if config is None: + config = {} + + assert isinstance(config, dict), \ + "task only supports a dictionary for configuration" + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph', {})) + teuthology.deep_merge(config, overrides.get('cephadm', {})) + log.info('Config: ' + str(config)) + + # set up cluster context + if not hasattr(ctx, 'ceph'): + ctx.ceph = {} + if 'cluster' not in config: + config['cluster'] = 'ceph' + cluster_name = config['cluster'] + if cluster_name not in ctx.ceph: + ctx.ceph[cluster_name] = argparse.Namespace() + ctx.ceph[cluster_name].bootstrapped = False + + # image + teuth_defaults = teuth_config.get('defaults', {}) + cephadm_defaults = teuth_defaults.get('cephadm', {}) + containers_defaults = cephadm_defaults.get('containers', {}) + container_image_name = containers_defaults.get('image', None) + + containers = config.get('containers', {}) + container_image_name = containers.get('image', container_image_name) + + if not hasattr(ctx.ceph[cluster_name], 'image'): + ctx.ceph[cluster_name].image = config.get('image') + ref = ctx.config.get("branch", "main") + if not ctx.ceph[cluster_name].image: + if not container_image_name: + raise Exception("Configuration error occurred. " + "The 'image' value is undefined for 'cephadm' task. " + "Please provide corresponding options in the task's " + "config, task 'overrides', or teuthology 'defaults' " + "section.") + sha1 = config.get('sha1') + flavor = config.get('flavor', 'default') + + if sha1: + if flavor == "crimson": + ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor + else: + ctx.ceph[cluster_name].image = container_image_name + ':' + sha1 + ref = sha1 + else: + # fall back to using the branch value + ctx.ceph[cluster_name].image = container_image_name + ':' + ref + log.info('Cluster image is %s' % ctx.ceph[cluster_name].image) + + + with contextutil.nested( + #if the cluster is already bootstrapped bypass corresponding methods + lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \ + else initialize_config(ctx=ctx, config=config), + lambda: ceph_initial(), + lambda: normalize_hostnames(ctx=ctx), + lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \ + else download_cephadm(ctx=ctx, config=config, ref=ref), + lambda: ceph_log(ctx=ctx, config=config), + lambda: ceph_crash(ctx=ctx, config=config), + lambda: pull_image(ctx=ctx, config=config), + lambda: _bypass() if not (config.get('use-ca-signed-key', False)) \ + else setup_ca_signed_keys(ctx, config), + lambda: _bypass() if (ctx.ceph[cluster_name].bootstrapped) \ + else ceph_bootstrap(ctx, config), + lambda: crush_setup(ctx=ctx, config=config), + lambda: ceph_mons(ctx=ctx, config=config), + lambda: distribute_config_and_admin_keyring(ctx=ctx, config=config), + lambda: ceph_mgrs(ctx=ctx, config=config), + lambda: ceph_osds(ctx=ctx, config=config), + lambda: ceph_mdss(ctx=ctx, config=config), + lambda: cephfs_setup(ctx=ctx, config=config), + lambda: ceph_rgw(ctx=ctx, config=config), + lambda: ceph_iscsi(ctx=ctx, config=config), + lambda: ceph_monitoring('prometheus', ctx=ctx, config=config), + lambda: ceph_monitoring('node-exporter', ctx=ctx, config=config), + lambda: ceph_monitoring('alertmanager', ctx=ctx, config=config), + lambda: ceph_monitoring('grafana', ctx=ctx, config=config), + lambda: ceph_clients(ctx=ctx, config=config), + lambda: create_rbd_pool(ctx=ctx, config=config), + ): + try: + if config.get('wait-for-healthy', True): + healthy(ctx=ctx, config=config) + + log.info('Setup complete, yielding') + yield + + finally: + log.info('Teardown begin') + diff --git a/qa/tasks/cephadm_cases/__init__.py b/qa/tasks/cephadm_cases/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/tasks/cephadm_cases/__init__.py diff --git a/qa/tasks/cephadm_cases/test_cli.py b/qa/tasks/cephadm_cases/test_cli.py new file mode 100644 index 000000000..ca40a8cdb --- /dev/null +++ b/qa/tasks/cephadm_cases/test_cli.py @@ -0,0 +1,73 @@ +import json +import logging +import time + +from tasks.mgr.mgr_test_case import MgrTestCase +from teuthology.contextutil import safe_while + +log = logging.getLogger(__name__) + + +class TestCephadmCLI(MgrTestCase): + def _cmd(self, *args) -> str: + assert self.mgr_cluster is not None + return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args) + + def _orch_cmd(self, *args) -> str: + return self._cmd("orch", *args) + + def setUp(self): + super(TestCephadmCLI, self).setUp() + + def test_yaml(self): + """ + to prevent oddities like + + >>> import yaml + ... from collections import OrderedDict + ... assert yaml.dump(OrderedDict()) == '!!python/object/apply:collections.OrderedDict\\n- []\\n' + """ + out = self._orch_cmd('device', 'ls', '--format', 'yaml') + self.assertNotIn('!!python', out) + + out = self._orch_cmd('host', 'ls', '--format', 'yaml') + self.assertNotIn('!!python', out) + + out = self._orch_cmd('ls', '--format', 'yaml') + self.assertNotIn('!!python', out) + + out = self._orch_cmd('ps', '--format', 'yaml') + self.assertNotIn('!!python', out) + + out = self._orch_cmd('status', '--format', 'yaml') + self.assertNotIn('!!python', out) + + def test_pause(self): + self._orch_cmd('pause') + self.wait_for_health('CEPHADM_PAUSED', 60) + self._orch_cmd('resume') + self.wait_for_health_clear(60) + + def test_daemon_restart(self): + self._orch_cmd('daemon', 'stop', 'osd.0') + self.wait_for_health('OSD_DOWN', 60) + with safe_while(sleep=2, tries=30) as proceed: + while proceed(): + j = json.loads(self._orch_cmd('ps', '--format', 'json')) + d = {d['daemon_name']: d for d in j} + if d['osd.0']['status_desc'] != 'running': + break + time.sleep(5) + self._orch_cmd('daemon', 'start', 'osd.0') + self.wait_for_health_clear(120) + self._orch_cmd('daemon', 'restart', 'osd.0') + + def test_device_ls_wide(self): + self._orch_cmd('device', 'ls', '--wide') + + def test_cephfs_mirror(self): + self._orch_cmd('apply', 'cephfs-mirror') + self.wait_until_true(lambda: 'cephfs-mirror' in self._orch_cmd('ps'), 60) + self.wait_for_health_clear(60) + self._orch_cmd('rm', 'cephfs-mirror') + self.wait_until_true(lambda: 'cephfs-mirror' not in self._orch_cmd('ps'), 60) diff --git a/qa/tasks/cephadm_cases/test_cli_mon.py b/qa/tasks/cephadm_cases/test_cli_mon.py new file mode 100644 index 000000000..72aee094e --- /dev/null +++ b/qa/tasks/cephadm_cases/test_cli_mon.py @@ -0,0 +1,71 @@ +import json +import logging + +from tasks.mgr.mgr_test_case import MgrTestCase + +log = logging.getLogger(__name__) + + +class TestCephadmCLI(MgrTestCase): + + APPLY_MON_PERIOD = 60 + + def _cmd(self, *args) -> str: + assert self.mgr_cluster is not None + return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args) + + def _orch_cmd(self, *args) -> str: + return self._cmd("orch", *args) + + def setUp(self): + super(TestCephadmCLI, self).setUp() + + def _create_and_write_pool(self, pool_name): + # Create new pool and write to it, simulating a small workload. + self.mgr_cluster.mon_manager.create_pool(pool_name) + args = [ + "rados", "-p", pool_name, "bench", "30", "write", "-t", "16"] + self.mgr_cluster.admin_remote.run(args=args, wait=True) + + def _get_quorum_size(self) -> int: + # Evaluate if the quorum size of the cluster is correct. + # log the quorum_status before reducing the monitors + retstr = self._cmd('quorum_status') + log.info("test_apply_mon._check_quorum_size: %s" % json.dumps(retstr, indent=2)) + quorum_size = len(json.loads(retstr)['quorum']) # get quorum size + return quorum_size + + def _check_no_crashes(self): + # Evaluate if there are no crashes + # log the crash + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + log.info("test_apply_mon._check_no_crashes: %s" % retstr) + self.assertEqual(0, len(retstr)) # check if there are no crashes + + def test_apply_mon_three(self): + # Evaluating the process of reducing the number of + # monitors from 5 to 3 and increasing the number of + # monitors from 3 to 5, using the `ceph orch apply mon <num>` command. + + self.wait_until_equal(lambda : self._get_quorum_size(), 5, + timeout=self.APPLY_MON_PERIOD, period=10) + + self._orch_cmd('apply', 'mon', '3') # reduce the monitors from 5 -> 3 + + self._create_and_write_pool('test_pool1') + + self.wait_until_equal(lambda : self._get_quorum_size(), 3, + timeout=self.APPLY_MON_PERIOD, period=10) + + self._check_no_crashes() + + self._orch_cmd('apply', 'mon', '5') # increase the monitors from 3 -> 5 + + self._create_and_write_pool('test_pool2') + + self.wait_until_equal(lambda : self._get_quorum_size(), 5, + timeout=self.APPLY_MON_PERIOD, period=10) + + self._check_no_crashes()
\ No newline at end of file diff --git a/qa/tasks/cephfs/__init__.py b/qa/tasks/cephfs/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/tasks/cephfs/__init__.py diff --git a/qa/tasks/cephfs/caps_helper.py b/qa/tasks/cephfs/caps_helper.py new file mode 100644 index 000000000..ac9bc4401 --- /dev/null +++ b/qa/tasks/cephfs/caps_helper.py @@ -0,0 +1,195 @@ +""" +Helper methods to test that MON and MDS caps are enforced properly. +""" +from os.path import join as os_path_join +from logging import getLogger + +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +from teuthology.orchestra.run import Raw + + +log = getLogger(__name__) + + +class CapTester(CephFSTestCase): + """ + Test that MON and MDS caps are enforced. + + MDS caps are tested by exercising read-write permissions and MON caps are + tested using output of command "ceph fs ls". Besides, it provides + write_test_files() which creates test files at the given path on CephFS + mounts passed to it. + + USAGE: Call write_test_files() method at the beginning of the test and + once the caps that needs to be tested are assigned to the client and + CephFS be remount for caps to effective, call run_cap_tests(), + run_mon_cap_tests() or run_mds_cap_tests() as per the need. + """ + + def write_test_files(self, mounts, testpath=''): + """ + Exercising 'r' and 'w' access levels on a file on CephFS mount is + pretty routine across all tests for caps. Adding to method to write + that file will reduce clutter in these tests. + + This methods writes a fixed data in a file with a fixed name located + at the path passed in testpath for the given list of mounts. If + testpath is empty, the file is created at the root of the CephFS. + """ + dirname, filename = 'testdir', 'testfile' + self.test_set = [] + # XXX: The reason behind testpath[1:] below is that the testpath is + # supposed to contain a path inside CephFS (which might be passed as + # an absolute path). os.path.join() deletes all previous path + # components when it encounters a path component starting with '/'. + # Deleting the first '/' from the string in testpath ensures that + # previous path components are not deleted by os.path.join(). + if testpath: + testpath = testpath[1:] if testpath[0] == '/' else testpath + # XXX: passing just '/' screw up os.path.join() ahead. + if testpath == '/': + testpath = '' + + for mount_x in mounts: + log.info(f'creating test file on FS {mount_x.cephfs_name} ' + f'mounted at {mount_x.mountpoint}...') + dirpath = os_path_join(mount_x.hostfs_mntpt, testpath, dirname) + mount_x.run_shell(f'mkdir {dirpath}') + filepath = os_path_join(dirpath, filename) + # XXX: the reason behind adding filepathm, cephfs_name and both + # mntpts is to avoid a test bug where we mount cephfs1 but what + # ends up being mounted cephfs2. since filepath and filedata are + # identical, how would tests figure otherwise that they are + # accessing the right filename but on wrong CephFS. + filedata = (f'filepath = {filepath}\n' + f'cephfs_name = {mount_x.cephfs_name}\n' + f'cephfs_mntpt = {mount_x.cephfs_mntpt}\n' + f'hostfs_mntpt = {mount_x.hostfs_mntpt}') + mount_x.write_file(filepath, filedata) + self.test_set.append((mount_x, filepath, filedata)) + log.info('test file created at {path} with data "{data}.') + + def run_cap_tests(self, perm, mntpt=None): + # TODO + #self.run_mon_cap_tests() + self.run_mds_cap_tests(perm, mntpt=mntpt) + + def _get_fsnames_from_moncap(self, moncap): + fsnames = [] + while moncap.find('fsname=') != -1: + fsname_first_char = moncap.index('fsname=') + len('fsname=') + + if ',' in moncap: + last = moncap.index(',') + fsname = moncap[fsname_first_char : last] + moncap = moncap.replace(moncap[0 : last+1], '') + else: + fsname = moncap[fsname_first_char : ] + moncap = moncap.replace(moncap[0 : ], '') + + fsnames.append(fsname) + + return fsnames + + def run_mon_cap_tests(self, def_fs, client_id): + """ + Check that MON cap is enforced for a client by searching for a Ceph + FS name in output of cmd "fs ls" executed with that client's caps. + + def_fs stands for default FS on Ceph cluster. + """ + get_cluster_cmd_op = def_fs.mon_manager.raw_cluster_cmd + + keyring = get_cluster_cmd_op(args=f'auth get client.{client_id}') + + moncap = None + for line in keyring.split('\n'): + if 'caps mon' in line: + moncap = line[line.find(' = "') + 4 : -1] + break + else: + raise RuntimeError('run_mon_cap_tests(): mon cap not found in ' + 'keyring. keyring -\n' + keyring) + + keyring_path = def_fs.admin_remote.mktemp(data=keyring) + + fsls = get_cluster_cmd_op( + args=f'fs ls --id {client_id} -k {keyring_path}') + log.info(f'output of fs ls cmd run by client.{client_id} -\n{fsls}') + + if 'fsname=' not in moncap: + log.info('no FS name is mentioned in moncap, client has ' + 'permission to list all files. moncap -\n{moncap}') + log.info('testing for presence of all FS names in output of ' + '"fs ls" command run by client.') + + fsls_admin = get_cluster_cmd_op(args='fs ls') + log.info('output of fs ls cmd run by admin -\n{fsls_admin}') + + self.assertEqual(fsls, fsls_admin) + return + + log.info('FS names are mentioned in moncap. moncap -\n{moncap}') + log.info('testing for presence of these FS names in output of ' + '"fs ls" command run by client.') + for fsname in self._get_fsnames_from_moncap(moncap): + self.assertIn('name: ' + fsname, fsls) + + def run_mds_cap_tests(self, perm, mntpt=None): + """ + Run test for read perm and, for write perm, run positive test if it + is present and run negative test if not. + """ + # XXX: mntpt is path inside cephfs that serves as root for current + # mount. Therefore, this path must me deleted from self.filepaths. + # Example - + # orignal path: /mnt/cephfs_x/dir1/dir2/testdir + # cephfs dir serving as root for current mnt: /dir1/dir2 + # therefore, final path: /mnt/cephfs_x//testdir + if mntpt: + self.test_set = [(x, y.replace(mntpt, ''), z) for x, y, z in \ + self.test_set] + + self.conduct_pos_test_for_read_caps() + + if perm == 'rw': + self.conduct_pos_test_for_write_caps() + elif perm == 'r': + self.conduct_neg_test_for_write_caps() + else: + raise RuntimeError(f'perm = {perm}\nIt should be "r" or "rw".') + + def conduct_pos_test_for_read_caps(self): + for mount, path, data in self.test_set: + log.info(f'test read perm: read file {path} and expect data ' + f'"{data}"') + contents = mount.read_file(path) + self.assertEqual(data, contents) + log.info(f'read perm was tested successfully: "{data}" was ' + f'successfully read from path {path}') + + def conduct_pos_test_for_write_caps(self): + for mount, path, data in self.test_set: + log.info(f'test write perm: try writing data "{data}" to ' + f'file {path}.') + mount.write_file(path=path, data=data) + contents = mount.read_file(path=path) + self.assertEqual(data, contents) + log.info(f'write perm was tested was successfully: data ' + f'"{data}" was successfully written to file "{path}".') + + def conduct_neg_test_for_write_caps(self, sudo_write=False): + possible_errmsgs = ('permission denied', 'operation not permitted') + cmdargs = ['echo', 'some random data', Raw('|')] + cmdargs += ['sudo', 'tee'] if sudo_write else ['tee'] + + # don't use data, cmd args to write are set already above. + for mount, path, data in self.test_set: + log.info('test absence of write perm: expect failure ' + f'writing data to file {path}.') + cmdargs.append(path) + mount.negtestcmd(args=cmdargs, retval=1, errmsgs=possible_errmsgs) + cmdargs.pop(-1) + log.info('absence of write perm was tested successfully: ' + f'failed to be write data to file {path}.') diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py new file mode 100644 index 000000000..d2688929c --- /dev/null +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -0,0 +1,442 @@ +import json +import logging +import os +import re + +from shlex import split as shlex_split + +from tasks.ceph_test_case import CephTestCase + +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.exceptions import CommandFailedError + +log = logging.getLogger(__name__) + +def classhook(m): + def dec(cls): + getattr(cls, m)() + return cls + return dec + +def for_teuthology(f): + """ + Decorator that adds an "is_for_teuthology" attribute to the wrapped function + """ + f.is_for_teuthology = True + return f + + +def needs_trimming(f): + """ + Mark fn as requiring a client capable of trimming its cache (i.e. for ceph-fuse + this means it needs to be able to run as root, currently) + """ + f.needs_trimming = True + return f + + +class MountDetails(): + + def __init__(self, mntobj): + self.client_id = mntobj.client_id + self.client_keyring_path = mntobj.client_keyring_path + self.client_remote = mntobj.client_remote + self.cephfs_name = mntobj.cephfs_name + self.cephfs_mntpt = mntobj.cephfs_mntpt + self.hostfs_mntpt = mntobj.hostfs_mntpt + + def restore(self, mntobj): + mntobj.client_id = self.client_id + mntobj.client_keyring_path = self.client_keyring_path + mntobj.client_remote = self.client_remote + mntobj.cephfs_name = self.cephfs_name + mntobj.cephfs_mntpt = self.cephfs_mntpt + mntobj.hostfs_mntpt = self.hostfs_mntpt + + +class CephFSTestCase(CephTestCase): + """ + Test case for Ceph FS, requires caller to populate Filesystem and Mounts, + into the fs, mount_a, mount_b class attributes (setting mount_b is optional) + + Handles resetting the cluster under test between tests. + """ + + # FIXME weird explicit naming + mount_a = None + mount_b = None + recovery_mount = None + + # Declarative test requirements: subclasses should override these to indicate + # their special needs. If not met, tests will be skipped. + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + REQUIRE_ONE_CLIENT_REMOTE = False + + # Whether to create the default filesystem during setUp + REQUIRE_FILESYSTEM = True + + # create a backup filesystem if required. + # required REQUIRE_FILESYSTEM enabled + REQUIRE_BACKUP_FILESYSTEM = False + + LOAD_SETTINGS = [] # type: ignore + + def _save_mount_details(self): + """ + XXX: Tests may change details of mount objects, so let's stash them so + that these details are restored later to ensure smooth setUps and + tearDowns for upcoming tests. + """ + self._orig_mount_details = [MountDetails(m) for m in self.mounts] + log.info(self._orig_mount_details) + + def _remove_blocklist(self): + # In case anything is in the OSD blocklist list, clear it out. This is to avoid + # the OSD map changing in the background (due to blocklist expiry) while tests run. + try: + self.mds_cluster.mon_manager.run_cluster_cmd(args="osd blocklist clear") + except CommandFailedError: + # Fallback for older Ceph cluster + try: + blocklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd", + "dump", "--format=json-pretty"))['blocklist'] + log.info(f"Removing {len(blocklist)} blocklist entries") + for addr, blocklisted_at in blocklist.items(): + self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blocklist", "rm", addr) + except KeyError: + # Fallback for more older Ceph clusters, who will use 'blacklist' instead. + blacklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd", + "dump", "--format=json-pretty"))['blacklist'] + log.info(f"Removing {len(blacklist)} blacklist entries") + for addr, blocklisted_at in blacklist.items(): + self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "rm", addr) + + def setUp(self): + super(CephFSTestCase, self).setUp() + + self.config_set('mon', 'mon_allow_pool_delete', True) + + if len(self.mds_cluster.mds_ids) < self.MDSS_REQUIRED: + self.skipTest("Only have {0} MDSs, require {1}".format( + len(self.mds_cluster.mds_ids), self.MDSS_REQUIRED + )) + + if len(self.mounts) < self.CLIENTS_REQUIRED: + self.skipTest("Only have {0} clients, require {1}".format( + len(self.mounts), self.CLIENTS_REQUIRED + )) + + if self.REQUIRE_ONE_CLIENT_REMOTE: + if self.mounts[0].client_remote.hostname in self.mds_cluster.get_mds_hostnames(): + self.skipTest("Require first client to be on separate server from MDSs") + + # Create friendly mount_a, mount_b attrs + for i in range(0, self.CLIENTS_REQUIRED): + setattr(self, "mount_{0}".format(chr(ord('a') + i)), self.mounts[i]) + + self.mds_cluster.clear_firewall() + + # Unmount all clients, we are about to blow away the filesystem + for mount in self.mounts: + if mount.is_mounted(): + mount.umount_wait(force=True) + self._save_mount_details() + + # To avoid any issues with e.g. unlink bugs, we destroy and recreate + # the filesystem rather than just doing a rm -rf of files + self.mds_cluster.delete_all_filesystems() + self.mds_cluster.mds_restart() # to reset any run-time configs, etc. + self.fs = None # is now invalid! + self.backup_fs = None + self.recovery_fs = None + + self._remove_blocklist() + + client_mount_ids = [m.client_id for m in self.mounts] + # In case there were any extra auth identities around from a previous + # test, delete them + for entry in self.auth_list(): + ent_type, ent_id = entry['entity'].split(".") + if ent_type == "client" and ent_id not in client_mount_ids and not (ent_id == "admin" or ent_id[:6] == 'mirror'): + self.mds_cluster.mon_manager.raw_cluster_cmd("auth", "del", entry['entity']) + + if self.REQUIRE_FILESYSTEM: + self.fs = self.mds_cluster.newfs(create=True) + + # In case some test messed with auth caps, reset them + for client_id in client_mount_ids: + cmd = ['auth', 'caps', f'client.{client_id}', 'mon','allow r', + 'osd', f'allow rw tag cephfs data={self.fs.name}', + 'mds', 'allow'] + + if self.run_cluster_cmd_result(cmd) == 0: + break + + cmd[1] = 'add' + if self.run_cluster_cmd_result(cmd) != 0: + raise RuntimeError(f'Failed to create new client {cmd[2]}') + + # wait for ranks to become active + self.fs.wait_for_daemons() + + # Mount the requested number of clients + for i in range(0, self.CLIENTS_REQUIRED): + self.mounts[i].mount_wait() + + if self.REQUIRE_BACKUP_FILESYSTEM: + if not self.REQUIRE_FILESYSTEM: + self.skipTest("backup filesystem requires a primary filesystem as well") + self.fs.mon_manager.raw_cluster_cmd('fs', 'flag', 'set', + 'enable_multiple', 'true', + '--yes-i-really-mean-it') + self.backup_fs = self.mds_cluster.newfs(name="backup_fs") + self.backup_fs.wait_for_daemons() + + # Load an config settings of interest + for setting in self.LOAD_SETTINGS: + setattr(self, setting, float(self.fs.mds_asok( + ['config', 'get', setting], list(self.mds_cluster.mds_ids)[0] + )[setting])) + + self.configs_set = set() + + def tearDown(self): + self.mds_cluster.clear_firewall() + for m in self.mounts: + m.teardown() + + # To prevent failover messages during Unwind of ceph task + self.mds_cluster.delete_all_filesystems() + + for m, md in zip(self.mounts, self._orig_mount_details): + md.restore(m) + + for subsys, key in self.configs_set: + self.mds_cluster.clear_ceph_conf(subsys, key) + + return super(CephFSTestCase, self).tearDown() + + def set_conf(self, subsys, key, value): + self.configs_set.add((subsys, key)) + self.mds_cluster.set_ceph_conf(subsys, key, value) + + def auth_list(self): + """ + Convenience wrapper on "ceph auth ls" + """ + return json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd( + "auth", "ls", "--format=json-pretty" + ))['auth_dump'] + + def assert_session_count(self, expected, ls_data=None, mds_id=None): + if ls_data is None: + ls_data = self.fs.mds_asok(['session', 'ls'], mds_id=mds_id) + + alive_count = len([s for s in ls_data if s['state'] != 'killing']) + + self.assertEqual(expected, alive_count, "Expected {0} sessions, found {1}".format( + expected, alive_count + )) + + def assert_session_state(self, client_id, expected_state): + self.assertEqual( + self._session_by_id( + self.fs.mds_asok(['session', 'ls'])).get(client_id, {'state': None})['state'], + expected_state) + + def get_session_data(self, client_id): + return self._session_by_id(client_id) + + def _session_list(self): + ls_data = self.fs.mds_asok(['session', 'ls']) + ls_data = [s for s in ls_data if s['state'] not in ['stale', 'closed']] + return ls_data + + def get_session(self, client_id, session_ls=None): + if session_ls is None: + session_ls = self.fs.mds_asok(['session', 'ls']) + + return self._session_by_id(session_ls)[client_id] + + def _session_by_id(self, session_ls): + return dict([(s['id'], s) for s in session_ls]) + + def perf_dump(self, rank=None, status=None): + return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status) + + def wait_until_evicted(self, client_id, timeout=30): + def is_client_evicted(): + ls = self._session_list() + for s in ls: + if s['id'] == client_id: + return False + return True + self.wait_until_true(is_client_evicted, timeout) + + def wait_for_daemon_start(self, daemon_ids=None): + """ + Wait until all the daemons appear in the FSMap, either assigned + MDS ranks or in the list of standbys + """ + def get_daemon_names(): + return [info['name'] for info in self.mds_cluster.status().get_all()] + + if daemon_ids is None: + daemon_ids = self.mds_cluster.mds_ids + + try: + self.wait_until_true( + lambda: set(daemon_ids) & set(get_daemon_names()) == set(daemon_ids), + timeout=30 + ) + except RuntimeError: + log.warning("Timeout waiting for daemons {0}, while we have {1}".format( + daemon_ids, get_daemon_names() + )) + raise + + def delete_mds_coredump(self, daemon_id): + # delete coredump file, otherwise teuthology.internal.coredump will + # catch it later and treat it as a failure. + core_pattern = self.mds_cluster.mds_daemons[daemon_id].remote.sh( + "sudo sysctl -n kernel.core_pattern") + core_dir = os.path.dirname(core_pattern.strip()) + if core_dir: # Non-default core_pattern with a directory in it + # We have seen a core_pattern that looks like it's from teuthology's coredump + # task, so proceed to clear out the core file + if core_dir[0] == '|': + log.info("Piped core dumps to program {0}, skip cleaning".format(core_dir[1:])) + return; + + log.info("Clearing core from directory: {0}".format(core_dir)) + + # Verify that we see the expected single coredump + ls_output = self.mds_cluster.mds_daemons[daemon_id].remote.sh([ + "cd", core_dir, run.Raw('&&'), + "sudo", "ls", run.Raw('|'), "sudo", "xargs", "file" + ]) + cores = [l.partition(":")[0] + for l in ls_output.strip().split("\n") + if re.match(r'.*ceph-mds.* -i +{0}'.format(daemon_id), l)] + + log.info("Enumerated cores: {0}".format(cores)) + self.assertEqual(len(cores), 1) + + log.info("Found core file {0}, deleting it".format(cores[0])) + + self.mds_cluster.mds_daemons[daemon_id].remote.run(args=[ + "cd", core_dir, run.Raw('&&'), "sudo", "rm", "-f", cores[0] + ]) + else: + log.info("No core_pattern directory set, nothing to clear (internal.coredump not enabled?)") + + def _get_subtrees(self, status=None, rank=None, path=None): + if path is None: + path = "/" + try: + with contextutil.safe_while(sleep=1, tries=3) as proceed: + while proceed(): + try: + if rank == "all": + subtrees = [] + for r in self.fs.get_ranks(status=status): + s = self.fs.rank_asok(["get", "subtrees"], status=status, rank=r['rank']) + s = filter(lambda s: s['auth_first'] == r['rank'] and s['auth_second'] == -2, s) + subtrees += s + else: + subtrees = self.fs.rank_asok(["get", "subtrees"], status=status, rank=rank) + subtrees = filter(lambda s: s['dir']['path'].startswith(path), subtrees) + return list(subtrees) + except CommandFailedError as e: + # Sometimes we get transient errors + if e.exitstatus == 22: + pass + else: + raise + except contextutil.MaxWhileTries as e: + raise RuntimeError(f"could not get subtree state from rank {rank}") from e + + def _wait_subtrees(self, test, status=None, rank=None, timeout=30, sleep=2, action=None, path=None): + test = sorted(test) + try: + with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed: + while proceed(): + subtrees = self._get_subtrees(status=status, rank=rank, path=path) + filtered = sorted([(s['dir']['path'], s['auth_first']) for s in subtrees]) + log.info("%s =?= %s", filtered, test) + if filtered == test: + # Confirm export_pin in output is correct: + for s in subtrees: + if s['export_pin_target'] >= 0: + self.assertTrue(s['export_pin_target'] == s['auth_first']) + return subtrees + if action is not None: + action() + except contextutil.MaxWhileTries as e: + raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e + + def _wait_until_scrub_complete(self, path="/", recursive=True, timeout=100): + out_json = self.fs.run_scrub(["start", path] + ["recursive"] if recursive else []) + if not self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"], + sleep=10, timeout=timeout): + log.info("timed out waiting for scrub to complete") + + def _wait_distributed_subtrees(self, count, status=None, rank=None, path=None): + try: + with contextutil.safe_while(sleep=5, tries=20) as proceed: + while proceed(): + subtrees = self._get_subtrees(status=status, rank=rank, path=path) + subtrees = list(filter(lambda s: s['distributed_ephemeral_pin'] == True and + s['auth_first'] == s['export_pin_target'], + subtrees)) + log.info(f"len={len(subtrees)} {subtrees}") + if len(subtrees) >= count: + return subtrees + except contextutil.MaxWhileTries as e: + raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e + + def _wait_random_subtrees(self, count, status=None, rank=None, path=None): + try: + with contextutil.safe_while(sleep=5, tries=20) as proceed: + while proceed(): + subtrees = self._get_subtrees(status=status, rank=rank, path=path) + subtrees = list(filter(lambda s: s['random_ephemeral_pin'] == True and + s['auth_first'] == s['export_pin_target'], + subtrees)) + log.info(f"len={len(subtrees)} {subtrees}") + if len(subtrees) >= count: + return subtrees + except contextutil.MaxWhileTries as e: + raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e + + def run_cluster_cmd(self, cmd): + if isinstance(cmd, str): + cmd = shlex_split(cmd) + return self.fs.mon_manager.raw_cluster_cmd(*cmd) + + def run_cluster_cmd_result(self, cmd): + if isinstance(cmd, str): + cmd = shlex_split(cmd) + return self.fs.mon_manager.raw_cluster_cmd_result(*cmd) + + def create_client(self, client_id, moncap=None, osdcap=None, mdscap=None): + if not (moncap or osdcap or mdscap): + if self.fs: + return self.fs.authorize(client_id, ('/', 'rw')) + else: + raise RuntimeError('no caps were passed and the default FS ' + 'is not created yet to allow client auth ' + 'for it.') + + cmd = ['auth', 'add', f'client.{client_id}'] + if moncap: + cmd += ['mon', moncap] + if osdcap: + cmd += ['osd', osdcap] + if mdscap: + cmd += ['mds', mdscap] + + self.run_cluster_cmd(cmd) + return self.run_cluster_cmd(f'auth get {self.client_name}') diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py new file mode 100644 index 000000000..777ba8249 --- /dev/null +++ b/qa/tasks/cephfs/filesystem.py @@ -0,0 +1,1712 @@ + +import json +import logging +from gevent import Greenlet +import os +import time +import datetime +import re +import errno +import random + +from io import BytesIO, StringIO +from errno import EBUSY + +from teuthology.exceptions import CommandFailedError +from teuthology import misc +from teuthology.nuke import clear_firewall +from teuthology.parallel import parallel +from teuthology import contextutil +from tasks.ceph_manager import write_conf +from tasks import ceph_manager + + +log = logging.getLogger(__name__) + + +DAEMON_WAIT_TIMEOUT = 120 +ROOT_INO = 1 + +class FileLayout(object): + def __init__(self, pool=None, pool_namespace=None, stripe_unit=None, stripe_count=None, object_size=None): + self.pool = pool + self.pool_namespace = pool_namespace + self.stripe_unit = stripe_unit + self.stripe_count = stripe_count + self.object_size = object_size + + @classmethod + def load_from_ceph(layout_str): + # TODO + pass + + def items(self): + if self.pool is not None: + yield ("pool", self.pool) + if self.pool_namespace: + yield ("pool_namespace", self.pool_namespace) + if self.stripe_unit is not None: + yield ("stripe_unit", self.stripe_unit) + if self.stripe_count is not None: + yield ("stripe_count", self.stripe_count) + if self.object_size is not None: + yield ("object_size", self.stripe_size) + +class ObjectNotFound(Exception): + def __init__(self, object_name): + self._object_name = object_name + + def __str__(self): + return "Object not found: '{0}'".format(self._object_name) + +class FSMissing(Exception): + def __init__(self, ident): + self.ident = ident + + def __str__(self): + return f"File system {self.ident} does not exist in the map" + +class FSStatus(object): + """ + Operations on a snapshot of the FSMap. + """ + def __init__(self, mon_manager, epoch=None): + self.mon = mon_manager + cmd = ["fs", "dump", "--format=json"] + if epoch is not None: + cmd.append(str(epoch)) + self.map = json.loads(self.mon.raw_cluster_cmd(*cmd)) + + def __str__(self): + return json.dumps(self.map, indent = 2, sort_keys = True) + + # Expose the fsmap for manual inspection. + def __getitem__(self, key): + """ + Get a field from the fsmap. + """ + return self.map[key] + + def get_filesystems(self): + """ + Iterator for all filesystems. + """ + for fs in self.map['filesystems']: + yield fs + + def get_all(self): + """ + Iterator for all the mds_info components in the FSMap. + """ + for info in self.map['standbys']: + yield info + for fs in self.map['filesystems']: + for info in fs['mdsmap']['info'].values(): + yield info + + def get_standbys(self): + """ + Iterator for all standbys. + """ + for info in self.map['standbys']: + yield info + + def get_fsmap(self, fscid): + """ + Get the fsmap for the given FSCID. + """ + for fs in self.map['filesystems']: + if fscid is None or fs['id'] == fscid: + return fs + raise FSMissing(fscid) + + def get_fsmap_byname(self, name): + """ + Get the fsmap for the given file system name. + """ + for fs in self.map['filesystems']: + if name is None or fs['mdsmap']['fs_name'] == name: + return fs + raise FSMissing(name) + + def get_replays(self, fscid): + """ + Get the standby:replay MDS for the given FSCID. + """ + fs = self.get_fsmap(fscid) + for info in fs['mdsmap']['info'].values(): + if info['state'] == 'up:standby-replay': + yield info + + def get_ranks(self, fscid): + """ + Get the ranks for the given FSCID. + """ + fs = self.get_fsmap(fscid) + for info in fs['mdsmap']['info'].values(): + if info['rank'] >= 0 and info['state'] != 'up:standby-replay': + yield info + + def get_damaged(self, fscid): + """ + Get the damaged ranks for the given FSCID. + """ + fs = self.get_fsmap(fscid) + return fs['mdsmap']['damaged'] + + def get_rank(self, fscid, rank): + """ + Get the rank for the given FSCID. + """ + for info in self.get_ranks(fscid): + if info['rank'] == rank: + return info + raise RuntimeError("FSCID {0} has no rank {1}".format(fscid, rank)) + + def get_mds(self, name): + """ + Get the info for the given MDS name. + """ + for info in self.get_all(): + if info['name'] == name: + return info + return None + + def get_mds_addr(self, name): + """ + Return the instance addr as a string, like "10.214.133.138:6807\/10825" + """ + info = self.get_mds(name) + if info: + return info['addr'] + else: + log.warning(json.dumps(list(self.get_all()), indent=2)) # dump for debugging + raise RuntimeError("MDS id '{0}' not found in map".format(name)) + + def get_mds_addrs(self, name): + """ + Return the instance addr as a string, like "[10.214.133.138:6807 10.214.133.138:6808]" + """ + info = self.get_mds(name) + if info: + return [e['addr'] for e in info['addrs']['addrvec']] + else: + log.warn(json.dumps(list(self.get_all()), indent=2)) # dump for debugging + raise RuntimeError("MDS id '{0}' not found in map".format(name)) + + def get_mds_gid(self, gid): + """ + Get the info for the given MDS gid. + """ + for info in self.get_all(): + if info['gid'] == gid: + return info + return None + + def hadfailover(self, status): + """ + Compares two statuses for mds failovers. + Returns True if there is a failover. + """ + for fs in status.map['filesystems']: + for info in fs['mdsmap']['info'].values(): + oldinfo = self.get_mds_gid(info['gid']) + if oldinfo is None or oldinfo['incarnation'] != info['incarnation']: + return True + #all matching + return False + +class CephCluster(object): + @property + def admin_remote(self): + first_mon = misc.get_first_mon(self._ctx, None) + (result,) = self._ctx.cluster.only(first_mon).remotes.keys() + return result + + def __init__(self, ctx) -> None: + self._ctx = ctx + self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager')) + + def get_config(self, key, service_type=None): + """ + Get config from mon by default, or a specific service if caller asks for it + """ + if service_type is None: + service_type = 'mon' + + service_id = sorted(misc.all_roles_of_type(self._ctx.cluster, service_type))[0] + return self.json_asok(['config', 'get', key], service_type, service_id)[key] + + def set_ceph_conf(self, subsys, key, value): + if subsys not in self._ctx.ceph['ceph'].conf: + self._ctx.ceph['ceph'].conf[subsys] = {} + self._ctx.ceph['ceph'].conf[subsys][key] = value + write_conf(self._ctx) # XXX because we don't have the ceph task's config object, if they + # used a different config path this won't work. + + def clear_ceph_conf(self, subsys, key): + del self._ctx.ceph['ceph'].conf[subsys][key] + write_conf(self._ctx) + + def json_asok(self, command, service_type, service_id, timeout=None): + if timeout is None: + timeout = 300 + command.insert(0, '--format=json') + proc = self.mon_manager.admin_socket(service_type, service_id, command, timeout=timeout) + response_data = proc.stdout.getvalue().strip() + if len(response_data) > 0: + + def get_nonnumeric_values(value): + c = {"NaN": float("nan"), "Infinity": float("inf"), + "-Infinity": -float("inf")} + return c[value] + + j = json.loads(response_data.replace('inf', 'Infinity'), + parse_constant=get_nonnumeric_values) + pretty = json.dumps(j, sort_keys=True, indent=2) + log.debug(f"_json_asok output\n{pretty}") + return j + else: + log.debug("_json_asok output empty") + return None + + def is_addr_blocklisted(self, addr): + blocklist = json.loads(self.mon_manager.raw_cluster_cmd( + "osd", "dump", "--format=json"))['blocklist'] + if addr in blocklist: + return True + log.warn(f'The address {addr} is not blocklisted') + return False + + +class MDSCluster(CephCluster): + """ + Collective operations on all the MDS daemons in the Ceph cluster. These + daemons may be in use by various Filesystems. + + For the benefit of pre-multi-filesystem tests, this class is also + a parent of Filesystem. The correct way to use MDSCluster going forward is + as a separate instance outside of your (multiple) Filesystem instances. + """ + + def __init__(self, ctx): + super(MDSCluster, self).__init__(ctx) + + @property + def mds_ids(self): + # do this dynamically because the list of ids may change periodically with cephadm + return list(misc.all_roles_of_type(self._ctx.cluster, 'mds')) + + @property + def mds_daemons(self): + return dict([(mds_id, self._ctx.daemons.get_daemon('mds', mds_id)) for mds_id in self.mds_ids]) + + def _one_or_all(self, mds_id, cb, in_parallel=True): + """ + Call a callback for a single named MDS, or for all. + + Note that the parallelism here isn't for performance, it's to avoid being overly kind + to the cluster by waiting a graceful ssh-latency of time between doing things, and to + avoid being overly kind by executing them in a particular order. However, some actions + don't cope with being done in parallel, so it's optional (`in_parallel`) + + :param mds_id: MDS daemon name, or None + :param cb: Callback taking single argument of MDS daemon name + :param in_parallel: whether to invoke callbacks concurrently (else one after the other) + """ + + if mds_id is None: + if in_parallel: + with parallel() as p: + for mds_id in self.mds_ids: + p.spawn(cb, mds_id) + else: + for mds_id in self.mds_ids: + cb(mds_id) + else: + cb(mds_id) + + def get_config(self, key, service_type=None): + """ + get_config specialization of service_type="mds" + """ + if service_type != "mds": + return super(MDSCluster, self).get_config(key, service_type) + + # Some tests stop MDS daemons, don't send commands to a dead one: + running_daemons = [i for i, mds in self.mds_daemons.items() if mds.running()] + service_id = random.sample(running_daemons, 1)[0] + return self.json_asok(['config', 'get', key], service_type, service_id)[key] + + def mds_stop(self, mds_id=None): + """ + Stop the MDS daemon process(se). If it held a rank, that rank + will eventually go laggy. + """ + self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].stop()) + + def mds_fail(self, mds_id=None): + """ + Inform MDSMonitor of the death of the daemon process(es). If it held + a rank, that rank will be relinquished. + """ + self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_)) + + def mds_restart(self, mds_id=None): + self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart()) + + def mds_fail_restart(self, mds_id=None): + """ + Variation on restart that includes marking MDSs as failed, so that doing this + operation followed by waiting for healthy daemon states guarantees that they + have gone down and come up, rather than potentially seeing the healthy states + that existed before the restart. + """ + def _fail_restart(id_): + self.mds_daemons[id_].stop() + self.mon_manager.raw_cluster_cmd("mds", "fail", id_) + self.mds_daemons[id_].restart() + + self._one_or_all(mds_id, _fail_restart) + + def mds_signal(self, mds_id, sig, silent=False): + """ + signal a MDS daemon + """ + self.mds_daemons[mds_id].signal(sig, silent); + + def mds_is_running(self, mds_id): + return self.mds_daemons[mds_id].running() + + def newfs(self, name='cephfs', create=True): + return Filesystem(self._ctx, name=name, create=create) + + def status(self, epoch=None): + return FSStatus(self.mon_manager, epoch) + + def get_standby_daemons(self): + return set([s['name'] for s in self.status().get_standbys()]) + + def get_mds_hostnames(self): + result = set() + for mds_id in self.mds_ids: + mds_remote = self.mon_manager.find_remote('mds', mds_id) + result.add(mds_remote.hostname) + + return list(result) + + def set_clients_block(self, blocked, mds_id=None): + """ + Block (using iptables) client communications to this MDS. Be careful: if + other services are running on this MDS, or other MDSs try to talk to this + MDS, their communications may also be blocked as collatoral damage. + + :param mds_id: Optional ID of MDS to block, default to all + :return: + """ + da_flag = "-A" if blocked else "-D" + + def set_block(_mds_id): + remote = self.mon_manager.find_remote('mds', _mds_id) + status = self.status() + + addr = status.get_mds_addr(_mds_id) + ip_str, port_str, inst_str = re.match("(.+):(.+)/(.+)", addr).groups() + + remote.run( + args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"]) + remote.run( + args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"]) + + self._one_or_all(mds_id, set_block, in_parallel=False) + + def set_inter_mds_block(self, blocked, mds_rank_1, mds_rank_2): + """ + Block (using iptables) communications from a provided MDS to other MDSs. + Block all ports that an MDS uses for communication. + + :param blocked: True to block the MDS, False otherwise + :param mds_rank_1: MDS rank + :param mds_rank_2: MDS rank + :return: + """ + da_flag = "-A" if blocked else "-D" + + def set_block(mds_ids): + status = self.status() + + mds = mds_ids[0] + remote = self.mon_manager.find_remote('mds', mds) + addrs = status.get_mds_addrs(mds) + for addr in addrs: + ip_str, port_str = re.match("(.+):(.+)", addr).groups() + remote.run( + args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"], omit_sudo=False) + + + mds = mds_ids[1] + remote = self.mon_manager.find_remote('mds', mds) + addrs = status.get_mds_addrs(mds) + for addr in addrs: + ip_str, port_str = re.match("(.+):(.+)", addr).groups() + remote.run( + args=["sudo", "iptables", da_flag, "OUTPUT", "-p", "tcp", "--sport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"], omit_sudo=False) + remote.run( + args=["sudo", "iptables", da_flag, "INPUT", "-p", "tcp", "--dport", port_str, "-j", "REJECT", "-m", + "comment", "--comment", "teuthology"], omit_sudo=False) + + self._one_or_all((mds_rank_1, mds_rank_2), set_block, in_parallel=False) + + def clear_firewall(self): + clear_firewall(self._ctx) + + def get_mds_info(self, mds_id): + return FSStatus(self.mon_manager).get_mds(mds_id) + + def is_pool_full(self, pool_name): + pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools'] + for pool in pools: + if pool['pool_name'] == pool_name: + return 'full' in pool['flags_names'].split(",") + + raise RuntimeError("Pool not found '{0}'".format(pool_name)) + + def delete_all_filesystems(self): + """ + Remove all filesystems that exist, and any pools in use by them. + """ + for fs in self.status().get_filesystems(): + Filesystem(ctx=self._ctx, fscid=fs['id']).destroy() + + @property + def beacon_timeout(self): + """ + Generate an acceptable timeout for the mons to drive some MDSMap change + because of missed beacons from some MDS. This involves looking up the + grace period in use by the mons and adding an acceptable buffer. + """ + + grace = float(self.get_config("mds_beacon_grace", service_type="mon")) + return grace*2+15 + + +class Filesystem(MDSCluster): + + """ + Generator for all Filesystems in the cluster. + """ + @classmethod + def get_all_fs(cls, ctx): + mdsc = MDSCluster(ctx) + status = mdsc.status() + for fs in status.get_filesystems(): + yield cls(ctx, fscid=fs['id']) + + """ + This object is for driving a CephFS filesystem. The MDS daemons driven by + MDSCluster may be shared with other Filesystems. + """ + def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False): + super(Filesystem, self).__init__(ctx) + + self.name = name + self.id = None + self.metadata_pool_name = None + self.data_pool_name = None + self.data_pools = None + self.fs_config = fs_config + self.ec_profile = fs_config.get('ec_profile') + + client_list = list(misc.all_roles_of_type(self._ctx.cluster, 'client')) + self.client_id = client_list[0] + self.client_remote = list(misc.get_clients(ctx=ctx, roles=["client.{0}".format(self.client_id)]))[0][1] + + if name is not None: + if fscid is not None: + raise RuntimeError("cannot specify fscid when creating fs") + if create and not self.legacy_configured(): + self.create() + else: + if fscid is not None: + self.id = fscid + self.getinfo(refresh = True) + + # Stash a reference to the first created filesystem on ctx, so + # that if someone drops to the interactive shell they can easily + # poke our methods. + if not hasattr(self._ctx, "filesystem"): + self._ctx.filesystem = self + + def dead(self): + try: + return not bool(self.get_mds_map()) + except FSMissing: + return True + + def get_task_status(self, status_key): + return self.mon_manager.get_service_task_status("mds", status_key) + + def getinfo(self, refresh = False): + status = self.status() + if self.id is not None: + fsmap = status.get_fsmap(self.id) + elif self.name is not None: + fsmap = status.get_fsmap_byname(self.name) + else: + fss = [fs for fs in status.get_filesystems()] + if len(fss) == 1: + fsmap = fss[0] + elif len(fss) == 0: + raise RuntimeError("no file system available") + else: + raise RuntimeError("more than one file system available") + self.id = fsmap['id'] + self.name = fsmap['mdsmap']['fs_name'] + self.get_pool_names(status = status, refresh = refresh) + return status + + def reach_max_mds(self): + status = self.wait_for_daemons() + mds_map = self.get_mds_map(status=status) + assert(mds_map['in'] == list(range(0, mds_map['max_mds']))) + + def reset(self): + self.mon_manager.raw_cluster_cmd("fs", "reset", str(self.name), '--yes-i-really-mean-it') + + def fail(self): + self.mon_manager.raw_cluster_cmd("fs", "fail", str(self.name)) + + def set_flag(self, var, *args): + a = map(lambda x: str(x).lower(), args) + self.mon_manager.raw_cluster_cmd("fs", "flag", "set", var, *a) + + def set_allow_multifs(self, yes=True): + self.set_flag("enable_multiple", yes) + + def set_var(self, var, *args): + a = map(lambda x: str(x).lower(), args) + self.mon_manager.raw_cluster_cmd("fs", "set", self.name, var, *a) + + def set_down(self, down=True): + self.set_var("down", str(down).lower()) + + def set_joinable(self, joinable=True): + self.set_var("joinable", joinable) + + def set_max_mds(self, max_mds): + self.set_var("max_mds", "%d" % max_mds) + + def set_session_timeout(self, timeout): + self.set_var("session_timeout", "%d" % timeout) + + def set_allow_standby_replay(self, yes): + self.set_var("allow_standby_replay", yes) + + def set_allow_new_snaps(self, yes): + self.set_var("allow_new_snaps", yes, '--yes-i-really-mean-it') + + def set_bal_rank_mask(self, bal_rank_mask): + self.set_var("bal_rank_mask", bal_rank_mask) + + def set_refuse_client_session(self, yes): + self.set_var("refuse_client_session", yes) + + def compat(self, *args): + a = map(lambda x: str(x).lower(), args) + self.mon_manager.raw_cluster_cmd("fs", "compat", self.name, *a) + + def add_compat(self, *args): + self.compat("add_compat", *args) + + def add_incompat(self, *args): + self.compat("add_incompat", *args) + + def rm_compat(self, *args): + self.compat("rm_compat", *args) + + def rm_incompat(self, *args): + self.compat("rm_incompat", *args) + + def required_client_features(self, *args, **kwargs): + c = ["fs", "required_client_features", self.name, *args] + return self.mon_manager.run_cluster_cmd(args=c, **kwargs) + + # Since v15.1.0 the pg autoscale mode has been enabled as default, + # will let the pg autoscale mode to calculate the pg_num as needed. + # We set the pg_num_min to 64 to make sure that pg autoscale mode + # won't set the pg_num to low to fix Tracker#45434. + pg_num = 64 + pg_num_min = 64 + target_size_ratio = 0.9 + target_size_ratio_ec = 0.9 + + def create(self, recover=False, metadata_overlay=False): + if self.name is None: + self.name = "cephfs" + if self.metadata_pool_name is None: + self.metadata_pool_name = "{0}_metadata".format(self.name) + if self.data_pool_name is None: + data_pool_name = "{0}_data".format(self.name) + else: + data_pool_name = self.data_pool_name + + # will use the ec pool to store the data and a small amount of + # metadata still goes to the primary data pool for all files. + if not metadata_overlay and self.ec_profile and 'disabled' not in self.ec_profile: + self.target_size_ratio = 0.05 + + log.debug("Creating filesystem '{0}'".format(self.name)) + + try: + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + self.metadata_pool_name, + '--pg_num_min', str(self.pg_num_min)) + + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + data_pool_name, str(self.pg_num), + '--pg_num_min', str(self.pg_num_min), + '--target_size_ratio', + str(self.target_size_ratio)) + except CommandFailedError as e: + if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + self.metadata_pool_name, + str(self.pg_num_min)) + + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + data_pool_name, str(self.pg_num), + str(self.pg_num_min)) + else: + raise + + args = ["fs", "new", self.name, self.metadata_pool_name, data_pool_name] + if recover: + args.append('--recover') + if metadata_overlay: + args.append('--allow-dangerous-metadata-overlay') + self.mon_manager.raw_cluster_cmd(*args) + + if not recover: + if self.ec_profile and 'disabled' not in self.ec_profile: + ec_data_pool_name = data_pool_name + "_ec" + log.debug("EC profile is %s", self.ec_profile) + cmd = ['osd', 'erasure-code-profile', 'set', ec_data_pool_name] + cmd.extend(self.ec_profile) + self.mon_manager.raw_cluster_cmd(*cmd) + try: + self.mon_manager.raw_cluster_cmd( + 'osd', 'pool', 'create', ec_data_pool_name, + 'erasure', ec_data_pool_name, + '--pg_num_min', str(self.pg_num_min), + '--target_size_ratio', str(self.target_size_ratio_ec)) + except CommandFailedError as e: + if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option + self.mon_manager.raw_cluster_cmd( + 'osd', 'pool', 'create', ec_data_pool_name, + str(self.pg_num_min), 'erasure', ec_data_pool_name) + else: + raise + self.mon_manager.raw_cluster_cmd( + 'osd', 'pool', 'set', + ec_data_pool_name, 'allow_ec_overwrites', 'true') + self.add_data_pool(ec_data_pool_name, create=False) + self.check_pool_application(ec_data_pool_name) + + self.run_client_payload(f"setfattr -n ceph.dir.layout.pool -v {ec_data_pool_name} . && getfattr -n ceph.dir.layout .") + + self.check_pool_application(self.metadata_pool_name) + self.check_pool_application(data_pool_name) + + # Turn off spurious standby count warnings from modifying max_mds in tests. + try: + self.mon_manager.raw_cluster_cmd('fs', 'set', self.name, 'standby_count_wanted', '0') + except CommandFailedError as e: + if e.exitstatus == 22: + # standby_count_wanted not available prior to luminous (upgrade tests would fail otherwise) + pass + else: + raise + + if self.fs_config is not None: + log.debug(f"fs_config: {self.fs_config}") + max_mds = self.fs_config.get('max_mds', 1) + if max_mds > 1: + self.set_max_mds(max_mds) + + standby_replay = self.fs_config.get('standby_replay', False) + self.set_allow_standby_replay(standby_replay) + + # If absent will use the default value (60 seconds) + session_timeout = self.fs_config.get('session_timeout', 60) + if session_timeout != 60: + self.set_session_timeout(session_timeout) + + if self.fs_config.get('subvols', None) is not None: + log.debug(f"Creating {self.fs_config.get('subvols')} subvols " + f"for filesystem '{self.name}'") + if not hasattr(self._ctx, "created_subvols"): + self._ctx.created_subvols = dict() + + subvols = self.fs_config.get('subvols') + assert(isinstance(subvols, dict)) + assert(isinstance(subvols['create'], int)) + assert(subvols['create'] > 0) + + for sv in range(0, subvols['create']): + sv_name = f'sv_{sv}' + self.mon_manager.raw_cluster_cmd( + 'fs', 'subvolume', 'create', self.name, sv_name, + self.fs_config.get('subvol_options', '')) + + if self.name not in self._ctx.created_subvols: + self._ctx.created_subvols[self.name] = [] + + subvol_path = self.mon_manager.raw_cluster_cmd( + 'fs', 'subvolume', 'getpath', self.name, sv_name) + subvol_path = subvol_path.strip() + self._ctx.created_subvols[self.name].append(subvol_path) + else: + log.debug(f"Not Creating any subvols for filesystem '{self.name}'") + + + self.getinfo(refresh = True) + + # wait pgs to be clean + self.mon_manager.wait_for_clean() + + def run_client_payload(self, cmd): + # avoid circular dep by importing here: + from tasks.cephfs.fuse_mount import FuseMount + + # Wait for at MDS daemons to be ready before mounting the + # ceph-fuse client in run_client_payload() + self.wait_for_daemons() + + d = misc.get_testdir(self._ctx) + m = FuseMount(self._ctx, d, "admin", self.client_remote, cephfs_name=self.name) + m.mount_wait() + m.run_shell_payload(cmd) + m.umount_wait(require_clean=True) + + def _remove_pool(self, name, **kwargs): + c = f'osd pool rm {name} {name} --yes-i-really-really-mean-it' + return self.mon_manager.ceph(c, **kwargs) + + def rm(self, **kwargs): + c = f'fs rm {self.name} --yes-i-really-mean-it' + return self.mon_manager.ceph(c, **kwargs) + + def remove_pools(self, data_pools): + self._remove_pool(self.get_metadata_pool_name()) + for poolname in data_pools: + try: + self._remove_pool(poolname) + except CommandFailedError as e: + # EBUSY, this data pool is used by two metadata pools, let the + # 2nd pass delete it + if e.exitstatus == EBUSY: + pass + else: + raise + + def destroy(self, reset_obj_attrs=True): + log.info(f'Destroying file system {self.name} and related pools') + + if self.dead(): + log.debug('already dead...') + return + + data_pools = self.get_data_pool_names(refresh=True) + + # make sure no MDSs are attached to given FS. + self.fail() + self.rm() + + self.remove_pools(data_pools) + + if reset_obj_attrs: + self.id = None + self.name = None + self.metadata_pool_name = None + self.data_pool_name = None + self.data_pools = None + + def recreate(self): + self.destroy() + + self.create() + self.getinfo(refresh=True) + + def check_pool_application(self, pool_name): + osd_map = self.mon_manager.get_osd_dump_json() + for pool in osd_map['pools']: + if pool['pool_name'] == pool_name: + if "application_metadata" in pool: + if not "cephfs" in pool['application_metadata']: + raise RuntimeError("Pool {pool_name} does not name cephfs as application!".\ + format(pool_name=pool_name)) + + def __del__(self): + if getattr(self._ctx, "filesystem", None) == self: + delattr(self._ctx, "filesystem") + + def exists(self): + """ + Whether a filesystem exists in the mon's filesystem list + """ + fs_list = json.loads(self.mon_manager.raw_cluster_cmd('fs', 'ls', '--format=json-pretty')) + return self.name in [fs['name'] for fs in fs_list] + + def legacy_configured(self): + """ + Check if a legacy (i.e. pre "fs new") filesystem configuration is present. If this is + the case, the caller should avoid using Filesystem.create + """ + try: + out_text = self.mon_manager.raw_cluster_cmd('--format=json-pretty', 'osd', 'lspools') + pools = json.loads(out_text) + metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools] + if metadata_pool_exists: + self.metadata_pool_name = 'metadata' + except CommandFailedError as e: + # For use in upgrade tests, Ceph cuttlefish and earlier don't support + # structured output (--format) from the CLI. + if e.exitstatus == 22: + metadata_pool_exists = True + else: + raise + + return metadata_pool_exists + + def _df(self): + return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")) + + # may raise FSMissing + def get_mds_map(self, status=None): + if status is None: + status = self.status() + return status.get_fsmap(self.id)['mdsmap'] + + def get_var(self, var, status=None): + return self.get_mds_map(status=status)[var] + + def set_dir_layout(self, mount, path, layout): + for name, value in layout.items(): + mount.run_shell(args=["setfattr", "-n", "ceph.dir.layout."+name, "-v", str(value), path]) + + def add_data_pool(self, name, create=True): + if create: + try: + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, + '--pg_num_min', str(self.pg_num_min)) + except CommandFailedError as e: + if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option + self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, + str(self.pg_num_min)) + else: + raise + self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name) + self.get_pool_names(refresh = True) + for poolid, fs_name in self.data_pools.items(): + if name == fs_name: + return poolid + raise RuntimeError("could not get just created pool '{0}'".format(name)) + + def get_pool_names(self, refresh = False, status = None): + if refresh or self.metadata_pool_name is None or self.data_pools is None: + if status is None: + status = self.status() + fsmap = status.get_fsmap(self.id) + + osd_map = self.mon_manager.get_osd_dump_json() + id_to_name = {} + for p in osd_map['pools']: + id_to_name[p['pool']] = p['pool_name'] + + self.metadata_pool_name = id_to_name[fsmap['mdsmap']['metadata_pool']] + self.data_pools = {} + for data_pool in fsmap['mdsmap']['data_pools']: + self.data_pools[data_pool] = id_to_name[data_pool] + + def get_data_pool_name(self, refresh = False): + if refresh or self.data_pools is None: + self.get_pool_names(refresh = True) + assert(len(self.data_pools) == 1) + return next(iter(self.data_pools.values())) + + def get_data_pool_id(self, refresh = False): + """ + Don't call this if you have multiple data pools + :return: integer + """ + if refresh or self.data_pools is None: + self.get_pool_names(refresh = True) + assert(len(self.data_pools) == 1) + return next(iter(self.data_pools.keys())) + + def get_data_pool_names(self, refresh = False): + if refresh or self.data_pools is None: + self.get_pool_names(refresh = True) + return list(self.data_pools.values()) + + def get_metadata_pool_name(self): + return self.metadata_pool_name + + def set_data_pool_name(self, name): + if self.id is not None: + raise RuntimeError("can't set filesystem name if its fscid is set") + self.data_pool_name = name + + def get_pool_pg_num(self, pool_name): + pgs = json.loads(self.mon_manager.raw_cluster_cmd('osd', 'pool', 'get', + pool_name, 'pg_num', + '--format=json-pretty')) + return int(pgs['pg_num']) + + def get_namespace_id(self): + return self.id + + def get_pool_df(self, pool_name): + """ + Return a dict like: + {u'bytes_used': 0, u'max_avail': 83848701, u'objects': 0, u'kb_used': 0} + """ + for pool_df in self._df()['pools']: + if pool_df['name'] == pool_name: + return pool_df['stats'] + + raise RuntimeError("Pool name '{0}' not found".format(pool_name)) + + def get_usage(self): + return self._df()['stats']['total_used_bytes'] + + def are_daemons_healthy(self, status=None, skip_max_mds_check=False): + """ + Return true if all daemons are in one of active, standby, standby-replay, and + at least max_mds daemons are in 'active'. + + Unlike most of Filesystem, this function is tolerant of new-style `fs` + commands being missing, because we are part of the ceph installation + process during upgrade suites, so must fall back to old style commands + when we get an EINVAL on a new style command. + + :return: + """ + # First, check to see that processes haven't exited with an error code + for mds in self._ctx.daemons.iter_daemons_of_role('mds'): + mds.check_status() + + active_count = 0 + mds_map = self.get_mds_map(status=status) + + log.debug("are_daemons_healthy: mds map: {0}".format(mds_map)) + + for mds_id, mds_status in mds_map['info'].items(): + if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]: + log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state'])) + return False + elif mds_status['state'] == 'up:active': + active_count += 1 + + log.debug("are_daemons_healthy: {0}/{1}".format( + active_count, mds_map['max_mds'] + )) + + if not skip_max_mds_check: + if active_count > mds_map['max_mds']: + log.debug("are_daemons_healthy: number of actives is greater than max_mds: {0}".format(mds_map)) + return False + elif active_count == mds_map['max_mds']: + # The MDSMap says these guys are active, but let's check they really are + for mds_id, mds_status in mds_map['info'].items(): + if mds_status['state'] == 'up:active': + try: + daemon_status = self.mds_tell(["status"], mds_id=mds_status['name']) + except CommandFailedError as cfe: + if cfe.exitstatus == errno.EINVAL: + # Old version, can't do this check + continue + else: + # MDS not even running + return False + + if daemon_status['state'] != 'up:active': + # MDS hasn't taken the latest map yet + return False + + return True + else: + return False + else: + log.debug("are_daemons_healthy: skipping max_mds check") + return True + + def get_daemon_names(self, state=None, status=None): + """ + Return MDS daemon names of those daemons in the given state + :param state: + :return: + """ + mdsmap = self.get_mds_map(status) + result = [] + for mds_status in sorted(mdsmap['info'].values(), + key=lambda _: _['rank']): + if mds_status['state'] == state or state is None: + result.append(mds_status['name']) + + return result + + def get_active_names(self, status=None): + """ + Return MDS daemon names of those daemons holding ranks + in state up:active + + :return: list of strings like ['a', 'b'], sorted by rank + """ + return self.get_daemon_names("up:active", status=status) + + def get_all_mds_rank(self, status=None): + mdsmap = self.get_mds_map(status) + result = [] + for mds_status in sorted(mdsmap['info'].values(), + key=lambda _: _['rank']): + if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay': + result.append(mds_status['rank']) + + return result + + def get_rank(self, rank=None, status=None): + if status is None: + status = self.getinfo() + if rank is None: + rank = 0 + return status.get_rank(self.id, rank) + + def rank_restart(self, rank=0, status=None): + name = self.get_rank(rank=rank, status=status)['name'] + self.mds_restart(mds_id=name) + + def rank_signal(self, signal, rank=0, status=None): + name = self.get_rank(rank=rank, status=status)['name'] + self.mds_signal(name, signal) + + def rank_freeze(self, yes, rank=0): + self.mon_manager.raw_cluster_cmd("mds", "freeze", "{}:{}".format(self.id, rank), str(yes).lower()) + + def rank_repaired(self, rank): + self.mon_manager.raw_cluster_cmd("mds", "repaired", "{}:{}".format(self.id, rank)) + + def rank_fail(self, rank=0): + self.mon_manager.raw_cluster_cmd("mds", "fail", "{}:{}".format(self.id, rank)) + + def rank_is_running(self, rank=0, status=None): + name = self.get_rank(rank=rank, status=status)['name'] + return self.mds_is_running(name) + + def get_ranks(self, status=None): + if status is None: + status = self.getinfo() + return status.get_ranks(self.id) + + def get_damaged(self, status=None): + if status is None: + status = self.getinfo() + return status.get_damaged(self.id) + + def get_replays(self, status=None): + if status is None: + status = self.getinfo() + return status.get_replays(self.id) + + def get_replay(self, rank=0, status=None): + for replay in self.get_replays(status=status): + if replay['rank'] == rank: + return replay + return None + + def get_rank_names(self, status=None): + """ + Return MDS daemon names of those daemons holding a rank, + sorted by rank. This includes e.g. up:replay/reconnect + as well as active, but does not include standby or + standby-replay. + """ + mdsmap = self.get_mds_map(status) + result = [] + for mds_status in sorted(mdsmap['info'].values(), + key=lambda _: _['rank']): + if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay': + result.append(mds_status['name']) + + return result + + def wait_for_daemons(self, timeout=None, skip_max_mds_check=False, status=None): + """ + Wait until all daemons are healthy + :return: + """ + + if timeout is None: + timeout = DAEMON_WAIT_TIMEOUT + + if self.id is None: + status = self.getinfo(refresh=True) + + if status is None: + status = self.status() + + elapsed = 0 + while True: + if self.are_daemons_healthy(status=status, skip_max_mds_check=skip_max_mds_check): + return status + else: + time.sleep(1) + elapsed += 1 + + if elapsed > timeout: + log.debug("status = {0}".format(status)) + raise RuntimeError("Timed out waiting for MDS daemons to become healthy") + + status = self.status() + + def dencoder(self, obj_type, obj_blob): + args = [os.path.join(self._prefix, "ceph-dencoder"), 'type', obj_type, 'import', '-', 'decode', 'dump_json'] + p = self.mon_manager.controller.run(args=args, stdin=BytesIO(obj_blob), stdout=BytesIO()) + return p.stdout.getvalue() + + def rados(self, *args, **kwargs): + """ + Callout to rados CLI. + """ + + return self.mon_manager.do_rados(*args, **kwargs) + + def radosm(self, *args, **kwargs): + """ + Interact with the metadata pool via rados CLI. + """ + + return self.rados(*args, **kwargs, pool=self.get_metadata_pool_name()) + + def radosmo(self, *args, stdout=BytesIO(), **kwargs): + """ + Interact with the metadata pool via rados CLI. Get the stdout. + """ + + return self.radosm(*args, **kwargs, stdout=stdout).stdout.getvalue() + + def get_metadata_object(self, object_type, object_id): + """ + Retrieve an object from the metadata pool, pass it through + ceph-dencoder to dump it to JSON, and return the decoded object. + """ + + o = self.radosmo(['get', object_id, '-']) + j = self.dencoder(object_type, o) + try: + return json.loads(j) + except (TypeError, ValueError): + log.error("Failed to decode JSON: '{0}'".format(j)) + raise + + def get_journal_version(self): + """ + Read the JournalPointer and Journal::Header objects to learn the version of + encoding in use. + """ + journal_pointer_object = '400.00000000' + journal_pointer_dump = self.get_metadata_object("JournalPointer", journal_pointer_object) + journal_ino = journal_pointer_dump['journal_pointer']['front'] + + journal_header_object = "{0:x}.00000000".format(journal_ino) + journal_header_dump = self.get_metadata_object('Journaler::Header', journal_header_object) + + version = journal_header_dump['journal_header']['stream_format'] + log.debug("Read journal version {0}".format(version)) + + return version + + def mds_asok(self, command, mds_id=None, timeout=None): + if mds_id is None: + return self.rank_asok(command, timeout=timeout) + + return self.json_asok(command, 'mds', mds_id, timeout=timeout) + + def mds_tell(self, command, mds_id=None): + if mds_id is None: + return self.rank_tell(command) + + return json.loads(self.mon_manager.raw_cluster_cmd("tell", f"mds.{mds_id}", *command)) + + def rank_asok(self, command, rank=0, status=None, timeout=None): + info = self.get_rank(rank=rank, status=status) + return self.json_asok(command, 'mds', info['name'], timeout=timeout) + + def rank_tell(self, command, rank=0, status=None): + try: + out = self.mon_manager.raw_cluster_cmd("tell", f"mds.{self.id}:{rank}", *command) + return json.loads(out) + except json.decoder.JSONDecodeError: + log.error("could not decode: {}".format(out)) + raise + + def ranks_tell(self, command, status=None): + if status is None: + status = self.status() + out = [] + for r in status.get_ranks(self.id): + result = self.rank_tell(command, rank=r['rank'], status=status) + out.append((r['rank'], result)) + return sorted(out) + + def ranks_perf(self, f, status=None): + perf = self.ranks_tell(["perf", "dump"], status=status) + out = [] + for rank, perf in perf: + out.append((rank, f(perf))) + return out + + def read_cache(self, path, depth=None, rank=None): + cmd = ["dump", "tree", path] + if depth is not None: + cmd.append(depth.__str__()) + result = self.rank_asok(cmd, rank=rank) + if result is None or len(result) == 0: + raise RuntimeError("Path not found in cache: {0}".format(path)) + + return result + + def wait_for_state(self, goal_state, reject=None, timeout=None, mds_id=None, rank=None): + """ + Block until the MDS reaches a particular state, or a failure condition + is met. + + When there are multiple MDSs, succeed when exaclty one MDS is in the + goal state, or fail when any MDS is in the reject state. + + :param goal_state: Return once the MDS is in this state + :param reject: Fail if the MDS enters this state before the goal state + :param timeout: Fail if this many seconds pass before reaching goal + :return: number of seconds waited, rounded down to integer + """ + + started_at = time.time() + while True: + status = self.status() + if rank is not None: + try: + mds_info = status.get_rank(self.id, rank) + current_state = mds_info['state'] if mds_info else None + log.debug("Looked up MDS state for mds.{0}: {1}".format(rank, current_state)) + except: + mdsmap = self.get_mds_map(status=status) + if rank in mdsmap['failed']: + log.debug("Waiting for rank {0} to come back.".format(rank)) + current_state = None + else: + raise + elif mds_id is not None: + # mds_info is None if no daemon with this ID exists in the map + mds_info = status.get_mds(mds_id) + current_state = mds_info['state'] if mds_info else None + log.debug("Looked up MDS state for {0}: {1}".format(mds_id, current_state)) + else: + # In general, look for a single MDS + states = [m['state'] for m in status.get_ranks(self.id)] + if [s for s in states if s == goal_state] == [goal_state]: + current_state = goal_state + elif reject in states: + current_state = reject + else: + current_state = None + log.debug("mapped states {0} to {1}".format(states, current_state)) + + elapsed = time.time() - started_at + if current_state == goal_state: + log.debug("reached state '{0}' in {1}s".format(current_state, elapsed)) + return elapsed + elif reject is not None and current_state == reject: + raise RuntimeError("MDS in reject state {0}".format(current_state)) + elif timeout is not None and elapsed > timeout: + log.error("MDS status at timeout: {0}".format(status.get_fsmap(self.id))) + raise RuntimeError( + "Reached timeout after {0} seconds waiting for state {1}, while in state {2}".format( + elapsed, goal_state, current_state + )) + else: + time.sleep(1) + + def _read_data_xattr(self, ino_no, xattr_name, obj_type, pool): + if pool is None: + pool = self.get_data_pool_name() + + obj_name = "{0:x}.00000000".format(ino_no) + + args = ["getxattr", obj_name, xattr_name] + try: + proc = self.rados(args, pool=pool, stdout=BytesIO()) + except CommandFailedError as e: + log.error(e.__str__()) + raise ObjectNotFound(obj_name) + + obj_blob = proc.stdout.getvalue() + return json.loads(self.dencoder(obj_type, obj_blob).strip()) + + def _write_data_xattr(self, ino_no, xattr_name, data, pool=None): + """ + Write to an xattr of the 0th data object of an inode. Will + succeed whether the object and/or xattr already exist or not. + + :param ino_no: integer inode number + :param xattr_name: string name of the xattr + :param data: byte array data to write to the xattr + :param pool: name of data pool or None to use primary data pool + :return: None + """ + if pool is None: + pool = self.get_data_pool_name() + + obj_name = "{0:x}.00000000".format(ino_no) + args = ["setxattr", obj_name, xattr_name, data] + self.rados(args, pool=pool) + + def read_symlink(self, ino_no, pool=None): + return self._read_data_xattr(ino_no, "symlink", "string_wrapper", pool) + + def read_backtrace(self, ino_no, pool=None): + """ + Read the backtrace from the data pool, return a dict in the format + given by inode_backtrace_t::dump, which is something like: + + :: + + rados -p cephfs_data getxattr 10000000002.00000000 parent > out.bin + ceph-dencoder type inode_backtrace_t import out.bin decode dump_json + + { "ino": 1099511627778, + "ancestors": [ + { "dirino": 1, + "dname": "blah", + "version": 11}], + "pool": 1, + "old_pools": []} + + :param pool: name of pool to read backtrace from. If omitted, FS must have only + one data pool and that will be used. + """ + return self._read_data_xattr(ino_no, "parent", "inode_backtrace_t", pool) + + def read_layout(self, ino_no, pool=None): + """ + Read 'layout' xattr of an inode and parse the result, returning a dict like: + :: + { + "stripe_unit": 4194304, + "stripe_count": 1, + "object_size": 4194304, + "pool_id": 1, + "pool_ns": "", + } + + :param pool: name of pool to read backtrace from. If omitted, FS must have only + one data pool and that will be used. + """ + return self._read_data_xattr(ino_no, "layout", "file_layout_t", pool) + + def _enumerate_data_objects(self, ino, size): + """ + Get the list of expected data objects for a range, and the list of objects + that really exist. + + :return a tuple of two lists of strings (expected, actual) + """ + stripe_size = 1024 * 1024 * 4 + + size = max(stripe_size, size) + + want_objects = [ + "{0:x}.{1:08x}".format(ino, n) + for n in range(0, ((size - 1) // stripe_size) + 1) + ] + + exist_objects = self.rados(["ls"], pool=self.get_data_pool_name(), stdout=StringIO()).stdout.getvalue().split("\n") + + return want_objects, exist_objects + + def data_objects_present(self, ino, size): + """ + Check that *all* the expected data objects for an inode are present in the data pool + """ + + want_objects, exist_objects = self._enumerate_data_objects(ino, size) + missing = set(want_objects) - set(exist_objects) + + if missing: + log.debug("Objects missing (ino {0}, size {1}): {2}".format( + ino, size, missing + )) + return False + else: + log.debug("All objects for ino {0} size {1} found".format(ino, size)) + return True + + def data_objects_absent(self, ino, size): + want_objects, exist_objects = self._enumerate_data_objects(ino, size) + present = set(want_objects) & set(exist_objects) + + if present: + log.debug("Objects not absent (ino {0}, size {1}): {2}".format( + ino, size, present + )) + return False + else: + log.debug("All objects for ino {0} size {1} are absent".format(ino, size)) + return True + + def dirfrag_exists(self, ino, frag): + try: + self.radosm(["stat", "{0:x}.{1:08x}".format(ino, frag)]) + except CommandFailedError: + return False + else: + return True + + def list_dirfrag(self, dir_ino): + """ + Read the named object and return the list of omap keys + + :return a list of 0 or more strings + """ + + dirfrag_obj_name = "{0:x}.00000000".format(dir_ino) + + try: + key_list_str = self.radosmo(["listomapkeys", dirfrag_obj_name], stdout=StringIO()) + except CommandFailedError as e: + log.error(e.__str__()) + raise ObjectNotFound(dirfrag_obj_name) + + return key_list_str.strip().split("\n") if key_list_str else [] + + def get_meta_of_fs_file(self, dir_ino, obj_name, out): + """ + get metadata from parent to verify the correctness of the data format encoded by the tool, cephfs-meta-injection. + warning : The splitting of directory is not considered here. + """ + + dirfrag_obj_name = "{0:x}.00000000".format(dir_ino) + try: + self.radosm(["getomapval", dirfrag_obj_name, obj_name+"_head", out]) + except CommandFailedError as e: + log.error(e.__str__()) + raise ObjectNotFound(dir_ino) + + def erase_metadata_objects(self, prefix): + """ + For all objects in the metadata pool matching the prefix, + erase them. + + This O(N) with the number of objects in the pool, so only suitable + for use on toy test filesystems. + """ + all_objects = self.radosmo(["ls"], stdout=StringIO()).strip().split("\n") + matching_objects = [o for o in all_objects if o.startswith(prefix)] + for o in matching_objects: + self.radosm(["rm", o]) + + def erase_mds_objects(self, rank): + """ + Erase all the per-MDS objects for a particular rank. This includes + inotable, sessiontable, journal + """ + + def obj_prefix(multiplier): + """ + MDS object naming conventions like rank 1's + journal is at 201.*** + """ + return "%x." % (multiplier * 0x100 + rank) + + # MDS_INO_LOG_OFFSET + self.erase_metadata_objects(obj_prefix(2)) + # MDS_INO_LOG_BACKUP_OFFSET + self.erase_metadata_objects(obj_prefix(3)) + # MDS_INO_LOG_POINTER_OFFSET + self.erase_metadata_objects(obj_prefix(4)) + # MDSTables & SessionMap + self.erase_metadata_objects("mds{rank:d}_".format(rank=rank)) + + @property + def _prefix(self): + """ + Override this to set a different + """ + return "" + + def _make_rank(self, rank): + return "{}:{}".format(self.name, rank) + + def _run_tool(self, tool, args, rank=None, quiet=False): + # Tests frequently have [client] configuration that jacks up + # the objecter log level (unlikely to be interesting here) + # and does not set the mds log level (very interesting here) + if quiet: + base_args = [os.path.join(self._prefix, tool), '--debug-mds=1', '--debug-objecter=1'] + else: + base_args = [os.path.join(self._prefix, tool), '--debug-mds=20', '--debug-ms=1', '--debug-objecter=1'] + + if rank is not None: + base_args.extend(["--rank", "%s" % str(rank)]) + + t1 = datetime.datetime.now() + r = self.tool_remote.sh(script=base_args + args, stdout=StringIO()).strip() + duration = datetime.datetime.now() - t1 + log.debug("Ran {0} in time {1}, result:\n{2}".format( + base_args + args, duration, r + )) + return r + + @property + def tool_remote(self): + """ + An arbitrary remote to use when invoking recovery tools. Use an MDS host because + it'll definitely have keys with perms to access cephfs metadata pool. This is public + so that tests can use this remote to go get locally written output files from the tools. + """ + return self.mon_manager.controller + + def journal_tool(self, args, rank, quiet=False): + """ + Invoke cephfs-journal-tool with the passed arguments for a rank, and return its stdout + """ + fs_rank = self._make_rank(rank) + return self._run_tool("cephfs-journal-tool", args, fs_rank, quiet) + + def meta_tool(self, args, rank, quiet=False): + """ + Invoke cephfs-meta-injection with the passed arguments for a rank, and return its stdout + """ + fs_rank = self._make_rank(rank) + return self._run_tool("cephfs-meta-injection", args, fs_rank, quiet) + + def table_tool(self, args, quiet=False): + """ + Invoke cephfs-table-tool with the passed arguments, and return its stdout + """ + return self._run_tool("cephfs-table-tool", args, None, quiet) + + def data_scan(self, args, quiet=False, worker_count=1): + """ + Invoke cephfs-data-scan with the passed arguments, and return its stdout + + :param worker_count: if greater than 1, multiple workers will be run + in parallel and the return value will be None + """ + + workers = [] + + for n in range(0, worker_count): + if worker_count > 1: + # data-scan args first token is a command, followed by args to it. + # insert worker arguments after the command. + cmd = args[0] + worker_args = [cmd] + ["--worker_n", n.__str__(), "--worker_m", worker_count.__str__()] + args[1:] + else: + worker_args = args + + workers.append(Greenlet.spawn(lambda wargs=worker_args: + self._run_tool("cephfs-data-scan", wargs, None, quiet))) + + for w in workers: + w.get() + + if worker_count == 1: + return workers[0].value + else: + return None + + def is_full(self): + return self.is_pool_full(self.get_data_pool_name()) + + def authorize(self, client_id, caps=('/', 'rw')): + """ + Run "ceph fs authorize" and run "ceph auth get" to get and returnt the + keyring. + + client_id: client id that will be authorized + caps: tuple containing the path and permission (can be r or rw) + respectively. + """ + if isinstance(caps[0], (tuple, list)): + x = [] + for c in caps: + x.extend(c) + caps = tuple(x) + + client_name = 'client.' + client_id + return self.mon_manager.raw_cluster_cmd('fs', 'authorize', self.name, + client_name, *caps) + + def grow(self, new_max_mds, status=None): + oldmax = self.get_var('max_mds', status=status) + assert(new_max_mds > oldmax) + self.set_max_mds(new_max_mds) + return self.wait_for_daemons() + + def shrink(self, new_max_mds, status=None): + oldmax = self.get_var('max_mds', status=status) + assert(new_max_mds < oldmax) + self.set_max_mds(new_max_mds) + return self.wait_for_daemons() + + def run_scrub(self, cmd, rank=0): + return self.rank_tell(["scrub"] + cmd, rank) + + def get_scrub_status(self, rank=0): + return self.run_scrub(["status"], rank) + + def flush(self, rank=0): + return self.rank_tell(["flush", "journal"], rank=rank) + + def wait_until_scrub_complete(self, result=None, tag=None, rank=0, sleep=30, + timeout=300, reverse=False): + # time out after "timeout" seconds and assume as done + if result is None: + result = "no active scrubs running" + with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed: + while proceed(): + out_json = self.rank_tell(["scrub", "status"], rank=rank) + assert out_json is not None + if not reverse: + if result in out_json['status']: + log.info("all active scrubs completed") + return True + else: + if result not in out_json['status']: + log.info("all active scrubs completed") + return True + + if tag is not None: + status = out_json['scrubs'][tag] + if status is not None: + log.info(f"scrub status for tag:{tag} - {status}") + else: + log.info(f"scrub has completed for tag:{tag}") + return True + + # timed out waiting for scrub to complete + return False + + def get_damage(self, rank=None): + if rank is None: + result = {} + for info in self.get_ranks(): + rank = info['rank'] + result[rank] = self.get_damage(rank=rank) + return result + else: + return self.rank_tell(['damage', 'ls'], rank=rank) diff --git a/qa/tasks/cephfs/fuse_mount.py b/qa/tasks/cephfs/fuse_mount.py new file mode 100644 index 000000000..0b9b17403 --- /dev/null +++ b/qa/tasks/cephfs/fuse_mount.py @@ -0,0 +1,533 @@ +import json +import time +import logging + +from io import StringIO +from textwrap import dedent + +from teuthology.contextutil import MaxWhileTries +from teuthology.contextutil import safe_while +from teuthology.orchestra import run +from teuthology.exceptions import CommandFailedError +from tasks.ceph_manager import get_valgrind_args +from tasks.cephfs.mount import CephFSMount, UMOUNT_TIMEOUT + +log = logging.getLogger(__name__) + +# Refer mount.py for docstrings. +class FuseMount(CephFSMount): + def __init__(self, ctx, test_dir, client_id, client_remote, + client_keyring_path=None, cephfs_name=None, + cephfs_mntpt=None, hostfs_mntpt=None, brxnet=None, + client_config={}): + super(FuseMount, self).__init__(ctx=ctx, test_dir=test_dir, + client_id=client_id, client_remote=client_remote, + client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt, + cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet, + client_config=client_config) + + self.fuse_daemon = None + self._fuse_conn = None + self.id = None + self.inst = None + self.addr = None + self.mount_timeout = int(self.client_config.get('mount_timeout', 30)) + + self._mount_bin = [ + 'ceph-fuse', "-f", + "--admin-socket", "/var/run/ceph/$cluster-$name.$pid.asok"] + self._mount_cmd_cwd = self.test_dir + if self.client_config.get('valgrind') is not None: + self.cwd = None # get_valgrind_args chdir for us + self._mount_cmd_logger = log.getChild('ceph-fuse.{id}'.format(id=self.client_id)) + self._mount_cmd_stdin = run.PIPE + + def mount(self, mntopts=None, check_status=True, mntargs=None, **kwargs): + self.update_attrs(**kwargs) + self.assert_and_log_minimum_mount_details() + + self.setup_netns() + + try: + return self._mount(mntopts, mntargs, check_status) + except RuntimeError: + # Catch exceptions by the mount() logic (i.e. not remote command + # failures) and ensure the mount is not left half-up. + # Otherwise we might leave a zombie mount point that causes + # anyone traversing cephtest/ to get hung up on. + log.warning("Trying to clean up after failed mount") + self.umount_wait(force=True) + raise + + def _mount(self, mntopts, mntargs, check_status): + log.info("Client client.%s config is %s" % (self.client_id, + self.client_config)) + + self._create_mntpt() + + retval = self._run_mount_cmd(mntopts, mntargs, check_status) + if retval: + return retval + + self.gather_mount_info() + + def _run_mount_cmd(self, mntopts, mntargs, check_status): + mount_cmd = self._get_mount_cmd(mntopts, mntargs) + mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO() + + # Before starting ceph-fuse process, note the contents of + # /sys/fs/fuse/connections + pre_mount_conns = self._list_fuse_conns() + log.info("Pre-mount connections: {0}".format(pre_mount_conns)) + + self.fuse_daemon = self.client_remote.run( + args=mount_cmd, + cwd=self._mount_cmd_cwd, + logger=self._mount_cmd_logger, + stdin=self._mount_cmd_stdin, + stdout=mountcmd_stdout, + stderr=mountcmd_stderr, + wait=False + ) + + return self._wait_and_record_our_fuse_conn( + check_status, pre_mount_conns, mountcmd_stdout, mountcmd_stderr) + + def _get_mount_cmd(self, mntopts, mntargs): + daemon_signal = 'kill' + if self.client_config.get('coverage') or \ + self.client_config.get('valgrind') is not None: + daemon_signal = 'term' + + mount_cmd = ['sudo', 'adjust-ulimits', 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=self.test_dir), + 'daemon-helper', daemon_signal] + + mount_cmd = self._add_valgrind_args(mount_cmd) + mount_cmd = ['sudo'] + self._nsenter_args + mount_cmd + + mount_cmd += self._mount_bin + [self.hostfs_mntpt] + if self.client_id: + mount_cmd += ['--id', self.client_id] + if self.client_keyring_path and self.client_id: + mount_cmd += ['-k', self.client_keyring_path] + + self.validate_subvol_options() + + if self.cephfs_mntpt: + mount_cmd += ["--client_mountpoint=" + self.cephfs_mntpt] + + if self.cephfs_name: + mount_cmd += ["--client_fs=" + self.cephfs_name] + if mntopts: + mount_cmd.extend(('-o', ','.join(mntopts))) + if mntargs: + mount_cmd.extend(mntargs) + + return mount_cmd + + def _add_valgrind_args(self, mount_cmd): + if self.client_config.get('valgrind') is not None: + mount_cmd = get_valgrind_args( + self.test_dir, + 'client.{id}'.format(id=self.client_id), + mount_cmd, + self.client_config.get('valgrind'), + cd=False + ) + + return mount_cmd + + def _list_fuse_conns(self): + conn_dir = "/sys/fs/fuse/connections" + + self.client_remote.run(args=['sudo', 'modprobe', 'fuse'], + check_status=False) + self.client_remote.run( + args=["sudo", "mount", "-t", "fusectl", conn_dir, conn_dir], + check_status=False, timeout=(30)) + + try: + ls_str = self.client_remote.sh("ls " + conn_dir, + stdout=StringIO(), + timeout=300).strip() + except CommandFailedError: + return [] + + if ls_str: + return [int(n) for n in ls_str.split("\n")] + else: + return [] + + def _wait_and_record_our_fuse_conn(self, check_status, pre_mount_conns, + mountcmd_stdout, mountcmd_stderr): + """ + Wait for the connection reference to appear in /sys + """ + waited = 0 + + post_mount_conns = self._list_fuse_conns() + while len(post_mount_conns) <= len(pre_mount_conns): + if self.fuse_daemon.finished: + # Did mount fail? Raise the CommandFailedError instead of + # hitting the "failed to populate /sys/" timeout + try: + self.fuse_daemon.wait() + except CommandFailedError as e: + log.info('mount command failed.') + if check_status: + raise + else: + return (e, mountcmd_stdout.getvalue(), + mountcmd_stderr.getvalue()) + time.sleep(1) + waited += 1 + if waited > self._fuse_conn_check_timeout: + raise RuntimeError( + "Fuse mount failed to populate/sys/ after {} " + "seconds".format(waited)) + else: + post_mount_conns = self._list_fuse_conns() + + log.info("Post-mount connections: {0}".format(post_mount_conns)) + + self._record_our_fuse_conn(pre_mount_conns, post_mount_conns) + + @property + def _fuse_conn_check_timeout(self): + mount_wait = self.client_config.get('mount_wait', 0) + if mount_wait > 0: + log.info("Fuse mount waits {0} seconds before checking /sys/".format(mount_wait)) + time.sleep(mount_wait) + timeout = int(self.client_config.get('mount_timeout', 30)) + return timeout + + def _record_our_fuse_conn(self, pre_mount_conns, post_mount_conns): + """ + Record our fuse connection number so that we can use it when forcing + an unmount. + """ + new_conns = list(set(post_mount_conns) - set(pre_mount_conns)) + if len(new_conns) == 0: + raise RuntimeError("New fuse connection directory not found ({0})".format(new_conns)) + elif len(new_conns) > 1: + raise RuntimeError("Unexpectedly numerous fuse connections {0}".format(new_conns)) + else: + self._fuse_conn = new_conns[0] + + def gather_mount_info(self): + status = self.admin_socket(['status']) + self.id = status['id'] + self.client_pid = status['metadata']['pid'] + try: + self.inst = status['inst_str'] + self.addr = status['addr_str'] + except KeyError: + sessions = self.fs.rank_asok(['session', 'ls']) + for s in sessions: + if s['id'] == self.id: + self.inst = s['inst'] + self.addr = self.inst.split()[1] + if self.inst is None: + raise RuntimeError("cannot find client session") + + def check_mounted_state(self): + proc = self.client_remote.run( + args=[ + 'stat', + '--file-system', + '--printf=%T\n', + '--', + self.hostfs_mntpt, + ], + stdout=StringIO(), + stderr=StringIO(), + wait=False, + timeout=300 + ) + try: + proc.wait() + except CommandFailedError: + error = proc.stderr.getvalue() + if ("endpoint is not connected" in error + or "Software caused connection abort" in error): + # This happens is fuse is killed without unmount + log.warning("Found stale mount point at {0}".format(self.hostfs_mntpt)) + return True + else: + # This happens if the mount directory doesn't exist + log.info('mount point does not exist: %s', self.hostfs_mntpt) + return False + + fstype = proc.stdout.getvalue().rstrip('\n') + if fstype == 'fuseblk': + log.info('ceph-fuse is mounted on %s', self.hostfs_mntpt) + return True + else: + log.debug('ceph-fuse not mounted, got fs type {fstype!r}'.format( + fstype=fstype)) + return False + + def wait_until_mounted(self): + """ + Check to make sure that fuse is mounted on mountpoint. If not, + sleep for 5 seconds and check again. + """ + + while not self.check_mounted_state(): + # Even if it's not mounted, it should at least + # be running: catch simple failures where it has terminated. + assert not self.fuse_daemon.poll() + + time.sleep(5) + + # Now that we're mounted, set permissions so that the rest of the test + # will have unrestricted access to the filesystem mount. + for retry in range(10): + try: + stderr = StringIO() + self.client_remote.run(args=['sudo', 'chmod', '1777', + self.hostfs_mntpt], + timeout=300, + stderr=stderr, omit_sudo=False) + break + except run.CommandFailedError: + stderr = stderr.getvalue().lower() + if "read-only file system" in stderr: + break + elif "permission denied" in stderr: + time.sleep(5) + else: + raise + + def _mountpoint_exists(self): + return self.client_remote.run(args=["ls", "-d", self.hostfs_mntpt], + check_status=False, + timeout=300).exitstatus == 0 + + def umount(self, cleanup=True): + """ + umount() must not run cleanup() when it's called by umount_wait() + since "run.wait([self.fuse_daemon], timeout)" would hang otherwise. + """ + if not self.is_mounted(): + if cleanup: + self.cleanup() + return + if self.is_blocked(): + self._run_umount_lf() + if cleanup: + self.cleanup() + return + + try: + log.info('Running fusermount -u on {name}...'.format(name=self.client_remote.name)) + stderr = StringIO() + self.client_remote.run( + args=['sudo', 'fusermount', '-u', self.hostfs_mntpt], + stderr=stderr, timeout=UMOUNT_TIMEOUT, omit_sudo=False) + except run.CommandFailedError: + if "mountpoint not found" in stderr.getvalue(): + # This happens if the mount directory doesn't exist + log.info('mount point does not exist: %s', self.mountpoint) + elif "not mounted" in stderr.getvalue(): + # This happens if the mount directory already unmouted + log.info('mount point not mounted: %s', self.mountpoint) + else: + log.info('Failed to unmount ceph-fuse on {name}, aborting...'.format(name=self.client_remote.name)) + + self.client_remote.run( + args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof', + run.Raw(';'), 'ps', 'auxf'], + timeout=UMOUNT_TIMEOUT, omit_sudo=False) + + # abort the fuse mount, killing all hung processes + if self._fuse_conn: + self.run_python(dedent(""" + import os + path = "/sys/fs/fuse/connections/{0}/abort" + if os.path.exists(path): + open(path, "w").write("1") + """).format(self._fuse_conn)) + self._fuse_conn = None + + # make sure its unmounted + self._run_umount_lf() + + self._fuse_conn = None + self.id = None + self.inst = None + self.addr = None + if cleanup: + self.cleanup() + + def umount_wait(self, force=False, require_clean=False, + timeout=UMOUNT_TIMEOUT): + """ + :param force: Complete cleanly even if the MDS is offline + """ + if not (self.is_mounted() and self.fuse_daemon): + log.debug('ceph-fuse client.{id} is not mounted at {remote} ' + '{mnt}'.format(id=self.client_id, + remote=self.client_remote, + mnt=self.hostfs_mntpt)) + self.cleanup() + return + + if force: + assert not require_clean # mutually exclusive + + # When we expect to be forcing, kill the ceph-fuse process directly. + # This should avoid hitting the more aggressive fallback killing + # in umount() which can affect other mounts too. + self.fuse_daemon.stdin.close() + + # However, we will still hit the aggressive wait if there is an ongoing + # mount -o remount (especially if the remount is stuck because MDSs + # are unavailable) + + if self.is_blocked(): + self._run_umount_lf() + self.cleanup() + return + + # cleanup is set to to fail since clieanup must happen after umount is + # complete; otherwise following call to run.wait hangs. + self.umount(cleanup=False) + + try: + # Permit a timeout, so that we do not block forever + run.wait([self.fuse_daemon], timeout) + + except MaxWhileTries: + log.error("process failed to terminate after unmount. This probably" + " indicates a bug within ceph-fuse.") + raise + except CommandFailedError: + if require_clean: + raise + + self.cleanup() + + def teardown(self): + """ + Whatever the state of the mount, get it gone. + """ + super(FuseMount, self).teardown() + + self.umount() + + if self.fuse_daemon and not self.fuse_daemon.finished: + self.fuse_daemon.stdin.close() + try: + self.fuse_daemon.wait() + except CommandFailedError: + pass + + def _asok_path(self): + return "/var/run/ceph/ceph-client.{0}.*.asok".format(self.client_id) + + @property + def _prefix(self): + return "" + + def find_admin_socket(self): + pyscript = """ +import glob +import re +import os +import subprocess + +def _find_admin_socket(client_name): + asok_path = "{asok_path}" + files = glob.glob(asok_path) + mountpoint = "{mountpoint}" + + # Given a non-glob path, it better be there + if "*" not in asok_path: + assert(len(files) == 1) + return files[0] + + for f in files: + pid = re.match(".*\.(\d+)\.asok$", f).group(1) + if os.path.exists("/proc/{{0}}".format(pid)): + with open("/proc/{{0}}/cmdline".format(pid), 'r') as proc_f: + contents = proc_f.read() + if mountpoint in contents: + return f + raise RuntimeError("Client socket {{0}} not found".format(client_name)) + +print(_find_admin_socket("{client_name}")) +""".format( + asok_path=self._asok_path(), + client_name="client.{0}".format(self.client_id), + mountpoint=self.mountpoint) + + asok_path = self.run_python(pyscript, sudo=True) + log.info("Found client admin socket at {0}".format(asok_path)) + return asok_path + + def admin_socket(self, args): + asok_path = self.find_admin_socket() + + # Query client ID from admin socket, wait 2 seconds + # and retry 10 times if it is not ready + with safe_while(sleep=2, tries=10) as proceed: + while proceed(): + try: + p = self.client_remote.run(args= + ['sudo', self._prefix + 'ceph', '--admin-daemon', asok_path] + args, + stdout=StringIO(), stderr=StringIO(), wait=False, + timeout=300) + p.wait() + break + except CommandFailedError: + if "connection refused" in p.stderr.getvalue().lower(): + pass + + return json.loads(p.stdout.getvalue().strip()) + + def get_global_id(self): + """ + Look up the CephFS client ID for this mount + """ + return self.admin_socket(['mds_sessions'])['id'] + + def get_global_inst(self): + """ + Look up the CephFS client instance for this mount + """ + return self.inst + + def get_global_addr(self): + """ + Look up the CephFS client addr for this mount + """ + return self.addr + + def get_client_pid(self): + """ + return pid of ceph-fuse process + """ + status = self.admin_socket(['status']) + return status['metadata']['pid'] + + def get_osd_epoch(self): + """ + Return 2-tuple of osd_epoch, osd_epoch_barrier + """ + status = self.admin_socket(['status']) + return status['osd_epoch'], status['osd_epoch_barrier'] + + def get_dentry_count(self): + """ + Return 2-tuple of dentry_count, dentry_pinned_count + """ + status = self.admin_socket(['status']) + return status['dentry_count'], status['dentry_pinned_count'] + + def set_cache_size(self, size): + return self.admin_socket(['config', 'set', 'client_cache_size', str(size)]) + + def get_op_read_count(self): + return self.admin_socket(['perf', 'dump', 'objecter'])['objecter']['osdop_read'] diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py new file mode 100644 index 000000000..89f6b6639 --- /dev/null +++ b/qa/tasks/cephfs/kernel_mount.py @@ -0,0 +1,394 @@ +import errno +import json +import logging +import os +import re + +from io import StringIO +from textwrap import dedent + +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra import run +from teuthology.contextutil import MaxWhileTries + +from tasks.cephfs.mount import CephFSMount, UMOUNT_TIMEOUT + +log = logging.getLogger(__name__) + + +# internal metadata directory +DEBUGFS_META_DIR = 'meta' + +class KernelMount(CephFSMount): + def __init__(self, ctx, test_dir, client_id, client_remote, + client_keyring_path=None, hostfs_mntpt=None, + cephfs_name=None, cephfs_mntpt=None, brxnet=None, + client_config={}): + super(KernelMount, self).__init__(ctx=ctx, test_dir=test_dir, + client_id=client_id, client_remote=client_remote, + client_keyring_path=client_keyring_path, hostfs_mntpt=hostfs_mntpt, + cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet, + client_config=client_config) + + if client_config.get('debug', False): + self.client_remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"]) + self.client_remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"]) + + self.dynamic_debug = self.client_config.get('dynamic_debug', False) + self.rbytes = self.client_config.get('rbytes', False) + self.snapdirname = client_config.get('snapdirname', '.snap') + self.syntax_style = self.client_config.get('syntax', 'v2') + self.inst = None + self.addr = None + self._mount_bin = ['adjust-ulimits', 'ceph-coverage', self.test_dir +\ + '/archive/coverage', '/bin/mount', '-t', 'ceph'] + + def mount(self, mntopts=None, check_status=True, **kwargs): + self.update_attrs(**kwargs) + self.assert_and_log_minimum_mount_details() + + self.setup_netns() + + if not self.cephfs_mntpt: + self.cephfs_mntpt = '/' + if not self.cephfs_name: + self.cephfs_name = 'cephfs' + + self._create_mntpt() + + retval = self._run_mount_cmd(mntopts, check_status) + if retval: + return retval + + self._set_filemode_on_mntpt() + + if self.dynamic_debug: + kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}', 0) + if kmount_count == 0: + self.enable_dynamic_debug() + self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count + 1 + + try: + self.gather_mount_info() + except: + log.warn('failed to fetch mount info - tests depending on mount addr/inst may fail!') + + def gather_mount_info(self): + self.id = self._get_global_id() + self.get_global_inst() + self.get_global_addr() + + def _run_mount_cmd(self, mntopts, check_status): + mount_cmd = self._get_mount_cmd(mntopts) + mountcmd_stdout, mountcmd_stderr = StringIO(), StringIO() + + try: + self.client_remote.run(args=mount_cmd, timeout=300, + stdout=mountcmd_stdout, + stderr=mountcmd_stderr, omit_sudo=False) + except CommandFailedError as e: + log.info('mount command failed') + if check_status: + raise + else: + return (e, mountcmd_stdout.getvalue(), + mountcmd_stderr.getvalue()) + log.info('mount command passed') + + def _make_mount_cmd_old_or_new_style(self): + optd = {} + mnt_stx = '' + + self.validate_subvol_options() + + assert(self.cephfs_mntpt) + if self.syntax_style == 'v1': + mnt_stx = f':{self.cephfs_mntpt}' + if self.client_id: + optd['name'] = self.client_id + if self.cephfs_name: + optd['mds_namespace'] = self.cephfs_name + elif self.syntax_style == 'v2': + mnt_stx = f'{self.client_id}@.{self.cephfs_name}={self.cephfs_mntpt}' + else: + assert 0, f'invalid syntax style: {self.syntax_style}' + return (mnt_stx, optd) + + def _get_mount_cmd(self, mntopts): + opts = 'norequire_active_mds' + if self.client_keyring_path and self.client_id: + opts += ',secret=' + self.get_key_from_keyfile() + if self.config_path: + opts += ',conf=' + self.config_path + if self.rbytes: + opts += ",rbytes" + else: + opts += ",norbytes" + if self.snapdirname != '.snap': + opts += f',snapdirname={self.snapdirname}' + + mount_cmd = ['sudo'] + self._nsenter_args + stx_opt = self._make_mount_cmd_old_or_new_style() + for opt_name, opt_val in stx_opt[1].items(): + opts += f',{opt_name}={opt_val}' + if mntopts: + opts += ',' + ','.join(mntopts) + log.info(f'mounting using device: {stx_opt[0]}') + # do not fall-back to old-style mount (catch new-style + # mount syntax bugs in the kernel). exclude this config + # when using v1-style syntax, since old mount helpers + # (pre-quincy) would pass this option to the kernel. + if self.syntax_style != 'v1': + opts += ",nofallback" + mount_cmd += self._mount_bin + [stx_opt[0], self.hostfs_mntpt, '-v', + '-o', opts] + return mount_cmd + + def umount(self, force=False): + if not self.is_mounted(): + self.cleanup() + return + + if self.is_blocked(): + self._run_umount_lf() + self.cleanup() + return + + log.debug('Unmounting client client.{id}...'.format(id=self.client_id)) + + try: + cmd=['sudo', 'umount', self.hostfs_mntpt] + if force: + cmd.append('-f') + self.client_remote.run(args=cmd, timeout=UMOUNT_TIMEOUT, omit_sudo=False) + except Exception as e: + log.debug('Killing processes on client.{id}...'.format(id=self.client_id)) + self.client_remote.run( + args=['sudo', run.Raw('PATH=/usr/sbin:$PATH'), 'lsof', + run.Raw(';'), 'ps', 'auxf'], + timeout=UMOUNT_TIMEOUT, omit_sudo=False) + raise e + + if self.dynamic_debug: + kmount_count = self.ctx.get(f'kmount_count.{self.client_remote.hostname}') + assert kmount_count + if kmount_count == 1: + self.disable_dynamic_debug() + self.ctx[f'kmount_count.{self.client_remote.hostname}'] = kmount_count - 1 + + self.cleanup() + + def umount_wait(self, force=False, require_clean=False, + timeout=UMOUNT_TIMEOUT): + """ + Unlike the fuse client, the kernel client's umount is immediate + """ + if not self.is_mounted(): + self.cleanup() + return + + try: + self.umount(force) + except (CommandFailedError, MaxWhileTries): + if not force: + raise + + # force delete the netns and umount + self._run_umount_lf() + self.cleanup() + + def wait_until_mounted(self): + """ + Unlike the fuse client, the kernel client is up and running as soon + as the initial mount() function returns. + """ + assert self.is_mounted() + + def teardown(self): + super(KernelMount, self).teardown() + if self.is_mounted(): + self.umount() + + def _get_debug_dir(self): + """ + Get the debugfs folder for this mount + """ + + cluster_name = 'ceph' + fsid = self.ctx.ceph[cluster_name].fsid + + global_id = self._get_global_id() + + return os.path.join("/sys/kernel/debug/ceph/", f"{fsid}.client{global_id}") + + def read_debug_file(self, filename): + """ + Read the debug file "filename", return None if the file doesn't exist. + """ + + path = os.path.join(self._get_debug_dir(), filename) + + stdout = StringIO() + stderr = StringIO() + try: + self.run_shell_payload(f"sudo dd if={path}", timeout=(5 * 60), + stdout=stdout, stderr=stderr) + return stdout.getvalue() + except CommandFailedError: + if 'no such file or directory' in stderr.getvalue().lower(): + return errno.ENOENT + elif 'not a directory' in stderr.getvalue().lower(): + return errno.ENOTDIR + elif 'permission denied' in stderr.getvalue().lower(): + return errno.EACCES + raise + + def _get_global_id(self): + try: + p = self.run_shell_payload("getfattr --only-values -n ceph.client_id .", stdout=StringIO()) + v = p.stdout.getvalue() + prefix = "client" + assert v.startswith(prefix) + return int(v[len(prefix):]) + except CommandFailedError: + # Probably this fallback can be deleted in a few releases when the kernel xattr is widely available. + log.debug("Falling back to messy global_id lookup via /sys...") + + pyscript = dedent(""" + import glob + import os + import json + + def get_id_to_dir(): + result = {} + for dir in glob.glob("/sys/kernel/debug/ceph/*"): + if os.path.basename(dir) == DEBUGFS_META_DIR: + continue + mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines() + global_id = mds_sessions_lines[0].split()[1].strip('"') + client_id = mds_sessions_lines[1].split()[1].strip('"') + result[client_id] = global_id + return result + print(json.dumps(get_id_to_dir())) + """) + + output = self.client_remote.sh([ + 'sudo', 'python3', '-c', pyscript + ], timeout=(5*60)) + client_id_to_global_id = json.loads(output) + + try: + return client_id_to_global_id[self.client_id] + except KeyError: + log.error("Client id '{0}' debug dir not found (clients seen were: {1})".format( + self.client_id, ",".join(client_id_to_global_id.keys()) + )) + raise + + def _dynamic_debug_control(self, enable): + """ + Write to dynamic debug control file. + """ + if enable: + fdata = "module ceph +p" + else: + fdata = "module ceph -p" + + self.run_shell_payload(f""" +sudo modprobe ceph +echo '{fdata}' | sudo tee /sys/kernel/debug/dynamic_debug/control +""") + + def enable_dynamic_debug(self): + """ + Enable the dynamic debug. + """ + self._dynamic_debug_control(True) + + def disable_dynamic_debug(self): + """ + Disable the dynamic debug. + """ + self._dynamic_debug_control(False) + + def get_global_id(self): + """ + Look up the CephFS client ID for this mount, using debugfs. + """ + + assert self.is_mounted() + + return self._get_global_id() + + @property + def _global_addr(self): + if self.addr is not None: + return self.addr + + # The first line of the "status" file's output will be something + # like: + # "instance: client.4297 (0)10.72.47.117:0/1148470933" + # What we need here is only the string "10.72.47.117:0/1148470933" + status = self.read_debug_file("status") + if status is None: + return None + + instance = re.findall(r'instance:.*', status)[0] + self.addr = instance.split()[2].split(')')[1] + return self.addr; + + @property + def _global_inst(self): + if self.inst is not None: + return self.inst + + client_gid = "client%d" % self.get_global_id() + self.inst = " ".join([client_gid, self._global_addr]) + return self.inst + + def get_global_inst(self): + """ + Look up the CephFS client instance for this mount + """ + return self._global_inst + + def get_global_addr(self): + """ + Look up the CephFS client addr for this mount + """ + return self._global_addr + + def get_osd_epoch(self): + """ + Return 2-tuple of osd_epoch, osd_epoch_barrier + """ + osd_map = self.read_debug_file("osdmap") + assert osd_map + + lines = osd_map.split("\n") + first_line_tokens = lines[0].split() + epoch, barrier = int(first_line_tokens[1]), int(first_line_tokens[3]) + + return epoch, barrier + + def get_op_read_count(self): + stdout = StringIO() + stderr = StringIO() + try: + path = os.path.join(self._get_debug_dir(), "metrics/size") + self.run_shell(f"sudo stat {path}", stdout=stdout, + stderr=stderr, cwd=None) + buf = self.read_debug_file("metrics/size") + except CommandFailedError: + if 'no such file or directory' in stderr.getvalue().lower() \ + or 'not a directory' in stderr.getvalue().lower(): + try: + path = os.path.join(self._get_debug_dir(), "metrics") + self.run_shell(f"sudo stat {path}", stdout=stdout, + stderr=stderr, cwd=None) + buf = self.read_debug_file("metrics") + except CommandFailedError: + return errno.ENOENT + else: + return 0 + return int(re.findall(r'read.*', buf)[0].split()[1]) diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py new file mode 100644 index 000000000..4a8187406 --- /dev/null +++ b/qa/tasks/cephfs/mount.py @@ -0,0 +1,1570 @@ +import hashlib +import json +import logging +import datetime +import os +import re +import time + +from io import StringIO +from contextlib import contextmanager +from textwrap import dedent +from IPy import IP + +from teuthology.contextutil import safe_while +from teuthology.misc import get_file, write_file +from teuthology.orchestra import run +from teuthology.orchestra.run import Raw +from teuthology.exceptions import CommandFailedError, ConnectionLostError + +from tasks.cephfs.filesystem import Filesystem + +log = logging.getLogger(__name__) + + +UMOUNT_TIMEOUT = 300 + + +class CephFSMount(object): + def __init__(self, ctx, test_dir, client_id, client_remote, + client_keyring_path=None, hostfs_mntpt=None, + cephfs_name=None, cephfs_mntpt=None, brxnet=None, + client_config=None): + """ + :param test_dir: Global teuthology test dir + :param client_id: Client ID, the 'foo' in client.foo + :param client_keyring_path: path to keyring for given client_id + :param client_remote: Remote instance for the host where client will + run + :param hostfs_mntpt: Path to directory on the FS on which Ceph FS will + be mounted + :param cephfs_name: Name of Ceph FS to be mounted + :param cephfs_mntpt: Path to directory inside Ceph FS that will be + mounted as root + """ + self.ctx = ctx + self.test_dir = test_dir + + self._verify_attrs(client_id=client_id, + client_keyring_path=client_keyring_path, + hostfs_mntpt=hostfs_mntpt, cephfs_name=cephfs_name, + cephfs_mntpt=cephfs_mntpt) + + if client_config is None: + client_config = {} + self.client_config = client_config + + self.cephfs_name = cephfs_name + self.client_id = client_id + self.client_keyring_path = client_keyring_path + self.client_remote = client_remote + self.cluster_name = 'ceph' # TODO: use config['cluster'] + self.fs = None + + if cephfs_mntpt is None and client_config.get("mount_path"): + self.cephfs_mntpt = client_config.get("mount_path") + log.info(f"using client_config[\"cephfs_mntpt\"] = {self.cephfs_mntpt}") + else: + self.cephfs_mntpt = cephfs_mntpt + log.info(f"cephfs_mntpt = {self.cephfs_mntpt}") + + if hostfs_mntpt is None and client_config.get("mountpoint"): + self.hostfs_mntpt = client_config.get("mountpoint") + log.info(f"using client_config[\"hostfs_mntpt\"] = {self.hostfs_mntpt}") + elif hostfs_mntpt is not None: + self.hostfs_mntpt = hostfs_mntpt + else: + self.hostfs_mntpt = os.path.join(self.test_dir, f'mnt.{self.client_id}') + self.hostfs_mntpt_dirname = os.path.basename(self.hostfs_mntpt) + log.info(f"hostfs_mntpt = {self.hostfs_mntpt}") + + self._netns_name = None + self.nsid = -1 + if brxnet is None: + self.ceph_brx_net = '192.168.0.0/16' + else: + self.ceph_brx_net = brxnet + + self.test_files = ['a', 'b', 'c'] + + self.background_procs = [] + + # This will cleanup the stale netnses, which are from the + # last failed test cases. + @staticmethod + def cleanup_stale_netnses_and_bridge(remote): + p = remote.run(args=['ip', 'netns', 'list'], + stdout=StringIO(), timeout=(5*60)) + p = p.stdout.getvalue().strip() + + # Get the netns name list + netns_list = re.findall(r'ceph-ns-[^()\s][-.\w]+[^():\s]', p) + + # Remove the stale netnses + for ns in netns_list: + ns_name = ns.split()[0] + args = ['sudo', 'ip', 'netns', 'delete', '{0}'.format(ns_name)] + try: + remote.run(args=args, timeout=(5*60), omit_sudo=False) + except Exception: + pass + + # Remove the stale 'ceph-brx' + try: + args = ['sudo', 'ip', 'link', 'delete', 'ceph-brx'] + remote.run(args=args, timeout=(5*60), omit_sudo=False) + except Exception: + pass + + def _parse_netns_name(self): + self._netns_name = '-'.join(["ceph-ns", + re.sub(r'/+', "-", self.mountpoint)]) + + @property + def mountpoint(self): + if self.hostfs_mntpt is None: + self.hostfs_mntpt = os.path.join(self.test_dir, + self.hostfs_mntpt_dirname) + return self.hostfs_mntpt + + @mountpoint.setter + def mountpoint(self, path): + if not isinstance(path, str): + raise RuntimeError('path should be of str type.') + self._mountpoint = self.hostfs_mntpt = path + + @property + def netns_name(self): + if self._netns_name == None: + self._parse_netns_name() + return self._netns_name + + @netns_name.setter + def netns_name(self, name): + self._netns_name = name + + def assert_that_ceph_fs_exists(self): + output = self.ctx.managers[self.cluster_name].raw_cluster_cmd("fs", "ls") + if self.cephfs_name: + assert self.cephfs_name in output, \ + 'expected ceph fs is not present on the cluster' + log.info(f'Mounting Ceph FS {self.cephfs_name}; just confirmed its presence on cluster') + else: + assert 'No filesystems enabled' not in output, \ + 'ceph cluster has no ceph fs, not even the default ceph fs' + log.info('Mounting default Ceph FS; just confirmed its presence on cluster') + + def assert_and_log_minimum_mount_details(self): + """ + Make sure we have minimum details required for mounting. Ideally, this + method should be called at the beginning of the mount method. + """ + if not self.client_id or not self.client_remote or \ + not self.hostfs_mntpt: + log.error(f"self.client_id = {self.client_id}") + log.error(f"self.client_remote = {self.client_remote}") + log.error(f"self.hostfs_mntpt = {self.hostfs_mntpt}") + errmsg = ('Mounting CephFS requires that at least following ' + 'details to be provided -\n' + '1. the client ID,\n2. the mountpoint and\n' + '3. the remote machine where CephFS will be mounted.\n') + raise RuntimeError(errmsg) + + self.assert_that_ceph_fs_exists() + + log.info('Mounting Ceph FS. Following are details of mount; remember ' + '"None" represents Python type None -') + log.info(f'self.client_remote.hostname = {self.client_remote.hostname}') + log.info(f'self.client.name = client.{self.client_id}') + log.info(f'self.hostfs_mntpt = {self.hostfs_mntpt}') + log.info(f'self.cephfs_name = {self.cephfs_name}') + log.info(f'self.cephfs_mntpt = {self.cephfs_mntpt}') + log.info(f'self.client_keyring_path = {self.client_keyring_path}') + if self.client_keyring_path: + log.info('keyring content -\n' + + get_file(self.client_remote, self.client_keyring_path, + sudo=True).decode()) + + def is_blocked(self): + if not self.addr: + # can't infer if our addr is blocklisted - let the caller try to + # umount without lazy/force. If the client was blocklisted, then + # the umount would be stuck and the test would fail on timeout. + # happens only with Ubuntu 20.04 (missing kclient patches :/). + return False + self.fs = Filesystem(self.ctx, name=self.cephfs_name) + + try: + output = self.fs.mon_manager.raw_cluster_cmd(args='osd blocklist ls') + except CommandFailedError: + # Fallback for older Ceph cluster + output = self.fs.mon_manager.raw_cluster_cmd(args='osd blacklist ls') + + return self.addr in output + + def is_stuck(self): + """ + Check if mount is stuck/in a hanged state. + """ + if not self.is_mounted(): + return False + + retval = self.client_remote.run(args=f'sudo stat {self.hostfs_mntpt}', + omit_sudo=False, wait=False).returncode + if retval == 0: + return False + + time.sleep(10) + proc = self.client_remote.run(args='ps -ef', stdout=StringIO()) + # if proc was running even after 10 seconds, it has to be stuck. + if f'stat {self.hostfs_mntpt}' in proc.stdout.getvalue(): + log.critical('client mounted at self.hostfs_mntpt is stuck!') + return True + return False + + def is_mounted(self): + file = self.client_remote.read_file('/proc/self/mounts',stdout=StringIO()) + if self.hostfs_mntpt in file: + return True + else: + log.debug(f"not mounted; /proc/self/mounts is:\n{file}") + return False + + def setupfs(self, name=None): + if name is None and self.fs is not None: + # Previous mount existed, reuse the old name + name = self.fs.name + self.fs = Filesystem(self.ctx, name=name) + log.info('Wait for MDS to reach steady state...') + self.fs.wait_for_daemons() + log.info('Ready to start {}...'.format(type(self).__name__)) + + def _create_mntpt(self): + self.client_remote.run(args=f'mkdir -p -v {self.hostfs_mntpt}', + timeout=60) + # Use 0000 mode to prevent undesired modifications to the mountpoint on + # the local file system. + self.client_remote.run(args=f'chmod 0000 {self.hostfs_mntpt}', + timeout=60) + + @property + def _nsenter_args(self): + return ['nsenter', f'--net=/var/run/netns/{self.netns_name}'] + + def _set_filemode_on_mntpt(self): + stderr = StringIO() + try: + self.client_remote.run( + args=['sudo', 'chmod', '1777', self.hostfs_mntpt], + stderr=stderr, timeout=(5*60)) + except CommandFailedError: + # the client does not have write permissions in the caps it holds + # for the Ceph FS that was just mounted. + if 'permission denied' in stderr.getvalue().lower(): + pass + + def _setup_brx_and_nat(self): + # The ip for ceph-brx should be + ip = IP(self.ceph_brx_net)[-2] + mask = self.ceph_brx_net.split('/')[1] + brd = IP(self.ceph_brx_net).broadcast() + + brx = self.client_remote.run(args=['ip', 'addr'], stderr=StringIO(), + stdout=StringIO(), timeout=(5*60)) + brx = re.findall(r'inet .* ceph-brx', brx.stdout.getvalue()) + if brx: + # If the 'ceph-brx' already exists, then check whether + # the new net is conflicting with it + _ip, _mask = brx[0].split()[1].split('/', 1) + if _ip != "{}".format(ip) or _mask != mask: + raise RuntimeError("Conflict with existing ceph-brx {0}, new {1}/{2}".format(brx[0].split()[1], ip, mask)) + + # Setup the ceph-brx and always use the last valid IP + if not brx: + log.info("Setuping the 'ceph-brx' with {0}/{1}".format(ip, mask)) + + self.run_shell_payload(f""" + set -e + sudo ip link add name ceph-brx type bridge + sudo ip addr flush dev ceph-brx + sudo ip link set ceph-brx up + sudo ip addr add {ip}/{mask} brd {brd} dev ceph-brx + """, timeout=(5*60), omit_sudo=False, cwd='/') + + args = "echo 1 | sudo tee /proc/sys/net/ipv4/ip_forward" + self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False) + + # Setup the NAT + p = self.client_remote.run(args=['route'], stderr=StringIO(), + stdout=StringIO(), timeout=(5*60)) + p = re.findall(r'default .*', p.stdout.getvalue()) + if p == False: + raise RuntimeError("No default gw found") + gw = p[0].split()[7] + + self.run_shell_payload(f""" + set -e + sudo iptables -A FORWARD -o {gw} -i ceph-brx -j ACCEPT + sudo iptables -A FORWARD -i {gw} -o ceph-brx -j ACCEPT + sudo iptables -t nat -A POSTROUTING -s {ip}/{mask} -o {gw} -j MASQUERADE + """, timeout=(5*60), omit_sudo=False, cwd='/') + + def _setup_netns(self): + p = self.client_remote.run(args=['ip', 'netns', 'list'], + stderr=StringIO(), stdout=StringIO(), + timeout=(5*60)).stdout.getvalue().strip() + + # Get the netns name list + netns_list = re.findall(r'[^()\s][-.\w]+[^():\s]', p) + + out = re.search(r"{0}".format(self.netns_name), p) + if out is None: + # Get an uniq nsid for the new netns + nsid = 0 + p = self.client_remote.run(args=['ip', 'netns', 'list-id'], + stderr=StringIO(), stdout=StringIO(), + timeout=(5*60)).stdout.getvalue() + while True: + out = re.search(r"nsid {} ".format(nsid), p) + if out is None: + break + + nsid += 1 + + # Add one new netns and set it id + self.run_shell_payload(f""" + set -e + sudo ip netns add {self.netns_name} + sudo ip netns set {self.netns_name} {nsid} + """, timeout=(5*60), omit_sudo=False, cwd='/') + self.nsid = nsid; + else: + # The netns already exists and maybe suspended by self.kill() + self.resume_netns(); + + nsid = int(re.search(r"{0} \(id: (\d+)\)".format(self.netns_name), p).group(1)) + self.nsid = nsid; + return + + # Get one ip address for netns + ips = IP(self.ceph_brx_net) + for ip in ips: + found = False + if ip == ips[0]: + continue + if ip == ips[-2]: + raise RuntimeError("we have ran out of the ip addresses") + + for ns in netns_list: + ns_name = ns.split()[0] + args = ['sudo', 'ip', 'netns', 'exec', '{0}'.format(ns_name), 'ip', 'addr'] + try: + p = self.client_remote.run(args=args, stderr=StringIO(), + stdout=StringIO(), timeout=(5*60), + omit_sudo=False) + q = re.search("{0}".format(ip), p.stdout.getvalue()) + if q is not None: + found = True + break + except CommandFailedError: + if "No such file or directory" in p.stderr.getvalue(): + pass + if "Invalid argument" in p.stderr.getvalue(): + pass + + if found == False: + break + + mask = self.ceph_brx_net.split('/')[1] + brd = IP(self.ceph_brx_net).broadcast() + + log.info("Setuping the netns '{0}' with {1}/{2}".format(self.netns_name, ip, mask)) + + # Setup the veth interfaces + brxip = IP(self.ceph_brx_net)[-2] + self.run_shell_payload(f""" + set -e + sudo ip link add veth0 netns {self.netns_name} type veth peer name brx.{nsid} + sudo ip netns exec {self.netns_name} ip addr add {ip}/{mask} brd {brd} dev veth0 + sudo ip netns exec {self.netns_name} ip link set veth0 up + sudo ip netns exec {self.netns_name} ip link set lo up + sudo ip netns exec {self.netns_name} ip route add default via {brxip} + """, timeout=(5*60), omit_sudo=False, cwd='/') + + # Bring up the brx interface and join it to 'ceph-brx' + self.run_shell_payload(f""" + set -e + sudo ip link set brx.{nsid} up + sudo ip link set dev brx.{nsid} master ceph-brx + """, timeout=(5*60), omit_sudo=False, cwd='/') + + def _cleanup_netns(self): + if self.nsid == -1: + return + log.info("Removing the netns '{0}'".format(self.netns_name)) + + # Delete the netns and the peer veth interface + self.run_shell_payload(f""" + set -e + sudo ip link set brx.{self.nsid} down + sudo ip link delete dev brx.{self.nsid} + sudo ip netns delete {self.netns_name} + """, timeout=(5*60), omit_sudo=False, cwd='/') + + self.nsid = -1 + + def _cleanup_brx_and_nat(self): + brx = self.client_remote.run(args=['ip', 'addr'], stderr=StringIO(), + stdout=StringIO(), timeout=(5*60)) + brx = re.findall(r'inet .* ceph-brx', brx.stdout.getvalue()) + if not brx: + return + + # If we are the last netns, will delete the ceph-brx + args = ['sudo', 'ip', 'link', 'show'] + p = self.client_remote.run(args=args, stdout=StringIO(), + timeout=(5*60), omit_sudo=False) + _list = re.findall(r'brx\.', p.stdout.getvalue().strip()) + if len(_list) != 0: + return + + log.info("Removing the 'ceph-brx'") + + self.run_shell_payload(""" + set -e + sudo ip link set ceph-brx down + sudo ip link delete ceph-brx + """, timeout=(5*60), omit_sudo=False, cwd='/') + + # Drop the iptables NAT rules + ip = IP(self.ceph_brx_net)[-2] + mask = self.ceph_brx_net.split('/')[1] + + p = self.client_remote.run(args=['route'], stderr=StringIO(), + stdout=StringIO(), timeout=(5*60)) + p = re.findall(r'default .*', p.stdout.getvalue()) + if p == False: + raise RuntimeError("No default gw found") + gw = p[0].split()[7] + self.run_shell_payload(f""" + set -e + sudo iptables -D FORWARD -o {gw} -i ceph-brx -j ACCEPT + sudo iptables -D FORWARD -i {gw} -o ceph-brx -j ACCEPT + sudo iptables -t nat -D POSTROUTING -s {ip}/{mask} -o {gw} -j MASQUERADE + """, timeout=(5*60), omit_sudo=False, cwd='/') + + def setup_netns(self): + """ + Setup the netns for the mountpoint. + """ + log.info("Setting the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint)) + self._setup_brx_and_nat() + self._setup_netns() + + def cleanup_netns(self): + """ + Cleanup the netns for the mountpoint. + """ + # We will defer cleaning the netnses and bridge until the last + # mountpoint is unmounted, this will be a temporary work around + # for issue#46282. + + # log.info("Cleaning the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint)) + # self._cleanup_netns() + # self._cleanup_brx_and_nat() + + def suspend_netns(self): + """ + Suspend the netns veth interface. + """ + if self.nsid == -1: + return + + log.info("Suspending the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint)) + + args = ['sudo', 'ip', 'link', 'set', 'brx.{0}'.format(self.nsid), 'down'] + self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False) + + def resume_netns(self): + """ + Resume the netns veth interface. + """ + if self.nsid == -1: + return + + log.info("Resuming the '{0}' netns for '{1}'".format(self._netns_name, self.mountpoint)) + + args = ['sudo', 'ip', 'link', 'set', 'brx.{0}'.format(self.nsid), 'up'] + self.client_remote.run(args=args, timeout=(5*60), omit_sudo=False) + + def mount(self, mntopts=[], check_status=True, **kwargs): + """ + kwargs expects its members to be same as the arguments accepted by + self.update_attrs(). + """ + raise NotImplementedError() + + def mount_wait(self, **kwargs): + """ + Accepts arguments same as self.mount(). + """ + self.mount(**kwargs) + self.wait_until_mounted() + + def _run_umount_lf(self): + log.debug(f'Force/lazy unmounting on client.{self.client_id}') + + try: + proc = self.client_remote.run( + args=f'sudo umount --lazy --force {self.hostfs_mntpt}', + timeout=UMOUNT_TIMEOUT, omit_sudo=False) + except CommandFailedError: + if self.is_mounted(): + raise + + return proc + + def umount(self): + raise NotImplementedError() + + def umount_wait(self, force=False, require_clean=False, + timeout=UMOUNT_TIMEOUT): + """ + + :param force: Expect that the mount will not shutdown cleanly: kill + it hard. + :param require_clean: Wait for the Ceph client associated with the + mount (e.g. ceph-fuse) to terminate, and + raise if it doesn't do so cleanly. + :param timeout: amount of time to be waited for umount command to finish + :return: + """ + raise NotImplementedError() + + def _verify_attrs(self, **kwargs): + """ + Verify that client_id, client_keyring_path, client_remote, hostfs_mntpt, + cephfs_name, cephfs_mntpt are either type str or None. + """ + for k, v in kwargs.items(): + if v is not None and not isinstance(v, str): + raise RuntimeError('value of attributes should be either str ' + f'or None. {k} - {v}') + + def update_attrs(self, client_id=None, client_keyring_path=None, + client_remote=None, hostfs_mntpt=None, cephfs_name=None, + cephfs_mntpt=None): + if not (client_id or client_keyring_path or client_remote or + cephfs_name or cephfs_mntpt or hostfs_mntpt): + return + + self._verify_attrs(client_id=client_id, + client_keyring_path=client_keyring_path, + hostfs_mntpt=hostfs_mntpt, cephfs_name=cephfs_name, + cephfs_mntpt=cephfs_mntpt) + + if client_id: + self.client_id = client_id + if client_keyring_path: + self.client_keyring_path = client_keyring_path + if client_remote: + self.client_remote = client_remote + if hostfs_mntpt: + self.hostfs_mntpt = hostfs_mntpt + if cephfs_name: + self.cephfs_name = cephfs_name + if cephfs_mntpt: + self.cephfs_mntpt = cephfs_mntpt + + def remount(self, **kwargs): + """ + Update mount object's attributes and attempt remount with these + new values for these attrbiutes. + + 1. Run umount_wait(). + 2. Run update_attrs(). + 3. Run mount(). + + Accepts arguments of self.mount() and self.update_attrs() with 1 + exception: wait accepted too which can be True or False. + """ + self.umount_wait() + assert not self.is_mounted() + + mntopts = kwargs.pop('mntopts', []) + check_status = kwargs.pop('check_status', True) + wait = kwargs.pop('wait', True) + + self.update_attrs(**kwargs) + + retval = self.mount(mntopts=mntopts, check_status=check_status) + # avoid this scenario (again): mount command might've failed and + # check_status might have silenced the exception, yet we attempt to + # wait which might lead to an error. + if retval is None and wait: + self.wait_until_mounted() + + return retval + + def kill(self): + """ + Suspend the netns veth interface to make the client disconnected + from the ceph cluster + """ + log.info('Killing connection on {0}...'.format(self.client_remote.name)) + self.suspend_netns() + + def kill_cleanup(self): + """ + Follow up ``kill`` to get to a clean unmounted state. + """ + log.info('Cleaning up killed connection on {0}'.format(self.client_remote.name)) + self.umount_wait(force=True) + + def cleanup(self): + """ + Remove the mount point. + + Prerequisite: the client is not mounted. + """ + log.info('Cleaning up mount {0}'.format(self.client_remote.name)) + stderr = StringIO() + try: + self.client_remote.run(args=['rmdir', '--', self.mountpoint], + cwd=self.test_dir, stderr=stderr, + timeout=(60*5), check_status=False) + except CommandFailedError: + if "no such file or directory" not in stderr.getvalue().lower(): + raise + + self.cleanup_netns() + + def wait_until_mounted(self): + raise NotImplementedError() + + def get_keyring_path(self): + # N.B.: default keyring is /etc/ceph/ceph.keyring; see ceph.py and generate_caps + return '/etc/ceph/ceph.client.{id}.keyring'.format(id=self.client_id) + + def get_key_from_keyfile(self): + # XXX: don't call run_shell(), since CephFS might be unmounted. + keyring = self.client_remote.read_file(self.client_keyring_path).\ + decode() + + for line in keyring.split('\n'): + if line.find('key') != -1: + return line[line.find('=') + 1 : ].strip() + + raise RuntimeError('Key not found in keyring file ' + f'{self.client_keyring_path}. Its contents are -\n' + f'{keyring}') + + @property + def config_path(self): + """ + Path to ceph.conf: override this if you're not a normal systemwide ceph install + :return: stringv + """ + return "/etc/ceph/ceph.conf" + + @contextmanager + def mounted_wait(self): + """ + A context manager, from an initially unmounted state, to mount + this, yield, and then unmount and clean up. + """ + self.mount() + self.wait_until_mounted() + try: + yield + finally: + self.umount_wait() + + def create_file(self, filename='testfile', dirname=None, user=None, + check_status=True): + assert(self.is_mounted()) + + if not os.path.isabs(filename): + if dirname: + if os.path.isabs(dirname): + path = os.path.join(dirname, filename) + else: + path = os.path.join(self.hostfs_mntpt, dirname, filename) + else: + path = os.path.join(self.hostfs_mntpt, filename) + else: + path = filename + + if user: + args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', 'touch ' + path] + else: + args = 'touch ' + path + + return self.client_remote.run(args=args, check_status=check_status) + + def create_files(self): + assert(self.is_mounted()) + + for suffix in self.test_files: + log.info("Creating file {0}".format(suffix)) + self.client_remote.run(args=[ + 'touch', os.path.join(self.hostfs_mntpt, suffix) + ]) + + def test_create_file(self, filename='testfile', dirname=None, user=None, + check_status=True): + return self.create_file(filename=filename, dirname=dirname, user=user, + check_status=False) + + def check_files(self): + assert(self.is_mounted()) + + for suffix in self.test_files: + log.info("Checking file {0}".format(suffix)) + r = self.client_remote.run(args=[ + 'ls', os.path.join(self.hostfs_mntpt, suffix) + ], check_status=False) + if r.exitstatus != 0: + raise RuntimeError("Expected file {0} not found".format(suffix)) + + def write_file(self, path, data, perms=None): + """ + Write the given data at the given path and set the given perms to the + file on the path. + """ + if path.find(self.hostfs_mntpt) == -1: + path = os.path.join(self.hostfs_mntpt, path) + + write_file(self.client_remote, path, data) + + if perms: + self.run_shell(args=f'chmod {perms} {path}') + + def read_file(self, path): + """ + Return the data from the file on given path. + """ + if path.find(self.hostfs_mntpt) == -1: + path = os.path.join(self.hostfs_mntpt, path) + + return self.run_shell(args=['cat', path]).\ + stdout.getvalue().strip() + + def create_destroy(self): + assert(self.is_mounted()) + + filename = "{0} {1}".format(datetime.datetime.now(), self.client_id) + log.debug("Creating test file {0}".format(filename)) + self.client_remote.run(args=[ + 'touch', os.path.join(self.hostfs_mntpt, filename) + ]) + log.debug("Deleting test file {0}".format(filename)) + self.client_remote.run(args=[ + 'rm', '-f', os.path.join(self.hostfs_mntpt, filename) + ]) + + def _run_python(self, pyscript, py_version='python3', sudo=False): + args, omit_sudo = [], True + if sudo: + args.append('sudo') + omit_sudo = False + args += ['adjust-ulimits', 'daemon-helper', 'kill', py_version, '-c', pyscript] + return self.client_remote.run(args=args, wait=False, stdin=run.PIPE, + stdout=StringIO(), omit_sudo=omit_sudo) + + def run_python(self, pyscript, py_version='python3', sudo=False): + p = self._run_python(pyscript, py_version, sudo=sudo) + p.wait() + return p.stdout.getvalue().strip() + + def run_shell(self, args, timeout=300, **kwargs): + omit_sudo = kwargs.pop('omit_sudo', False) + cwd = kwargs.pop('cwd', self.mountpoint) + stdout = kwargs.pop('stdout', StringIO()) + stderr = kwargs.pop('stderr', StringIO()) + + return self.client_remote.run(args=args, cwd=cwd, timeout=timeout, + stdout=stdout, stderr=stderr, + omit_sudo=omit_sudo, **kwargs) + + def run_shell_payload(self, payload, **kwargs): + kwargs['args'] = ["bash", "-c", Raw(f"'{payload}'")] + if kwargs.pop('sudo', False): + kwargs['args'].insert(0, 'sudo') + kwargs['omit_sudo'] = False + return self.run_shell(**kwargs) + + def run_as_user(self, **kwargs): + """ + Besides the arguments defined for run_shell() this method also + accepts argument 'user'. + """ + args = kwargs.pop('args') + user = kwargs.pop('user') + if isinstance(args, str): + args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', args] + elif isinstance(args, list): + cmdlist = args + cmd = '' + for i in cmdlist: + cmd = cmd + i + ' ' + # get rid of extra space at the end. + cmd = cmd[:-1] + + args = ['sudo', '-u', user, '-s', '/bin/bash', '-c', cmd] + + kwargs['args'] = args + kwargs['omit_sudo'] = False + return self.run_shell(**kwargs) + + def run_as_root(self, **kwargs): + """ + Accepts same arguments as run_shell(). + """ + kwargs['user'] = 'root' + return self.run_as_user(**kwargs) + + def assert_retval(self, proc_retval, exp_retval): + msg = (f'expected return value: {exp_retval}\n' + f'received return value: {proc_retval}\n') + assert proc_retval == exp_retval, msg + + def _verify(self, proc, exp_retval=None, exp_errmsgs=None): + if exp_retval is None and exp_errmsgs is None: + raise RuntimeError('Method didn\'t get enough parameters. Pass ' + 'return value or error message expected from ' + 'the command/process.') + + if exp_retval is not None: + self.assert_retval(proc.returncode, exp_retval) + if exp_errmsgs is None: + return + + if isinstance(exp_errmsgs, str): + exp_errmsgs = (exp_errmsgs, ) + + proc_stderr = proc.stderr.getvalue().lower() + msg = ('didn\'t find any of the expected string in stderr.\n' + f'expected string: {exp_errmsgs}\n' + f'received error message: {proc_stderr}\n' + 'note: received error message is converted to lowercase') + for e in exp_errmsgs: + if e in proc_stderr: + break + # this else is meant for for loop. + else: + assert False, msg + + def negtestcmd(self, args, retval=None, errmsgs=None, stdin=None, + cwd=None, wait=True): + """ + Conduct a negative test for the given command. + + retval and errmsgs are parameters to confirm the cause of command + failure. + + Note: errmsgs is expected to be a tuple, but in case there's only + error message, it can also be a string. This method will handle + that internally. + """ + proc = self.run_shell(args=args, wait=wait, stdin=stdin, cwd=cwd, + check_status=False) + self._verify(proc, retval, errmsgs) + return proc + + def negtestcmd_as_user(self, args, user, retval=None, errmsgs=None, + stdin=None, cwd=None, wait=True): + proc = self.run_as_user(args=args, user=user, wait=wait, stdin=stdin, + cwd=cwd, check_status=False) + self._verify(proc, retval, errmsgs) + return proc + + def negtestcmd_as_root(self, args, retval=None, errmsgs=None, stdin=None, + cwd=None, wait=True): + proc = self.run_as_root(args=args, wait=wait, stdin=stdin, cwd=cwd, + check_status=False) + self._verify(proc, retval, errmsgs) + return proc + + def open_for_reading(self, basename): + """ + Open a file for reading only. + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + return self._run_python(dedent( + """ + import os + mode = os.O_RDONLY + fd = os.open("{path}", mode) + os.close(fd) + """.format(path=path) + )) + + def open_for_writing(self, basename, creat=True, trunc=True, excl=False): + """ + Open a file for writing only. + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + return self._run_python(dedent( + """ + import os + mode = os.O_WRONLY + if {creat}: + mode |= os.O_CREAT + if {trunc}: + mode |= os.O_TRUNC + if {excl}: + mode |= os.O_EXCL + fd = os.open("{path}", mode) + os.close(fd) + """.format(path=path, creat=creat, trunc=trunc, excl=excl) + )) + + def open_no_data(self, basename): + """ + A pure metadata operation + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + p = self._run_python(dedent( + """ + f = open("{path}", 'w') + """.format(path=path) + )) + p.wait() + + def open_background(self, basename="background_file", write=True, content="content"): + """ + Open a file for writing, then block such that the client + will hold a capability. + + Don't return until the remote process has got as far as opening + the file, then return the RemoteProcess instance. + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + if write: + pyscript = dedent(""" + import time + + with open("{path}", 'w') as f: + f.write("{content}") + f.flush() + while True: + time.sleep(1) + """).format(path=path, content=content) + else: + pyscript = dedent(""" + import time + + with open("{path}", 'r') as f: + while True: + time.sleep(1) + """).format(path=path) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + + # This wait would not be sufficient if the file had already + # existed, but it's simple and in practice users of open_background + # are not using it on existing files. + if write: + self.wait_for_visible(basename, size=len(content)) + else: + self.wait_for_visible(basename) + + return rproc + + def open_dir_background(self, basename): + """ + Create and hold a capability to a directory. + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + pyscript = dedent(""" + import time + import os + + os.mkdir("{path}") + fd = os.open("{path}", os.O_RDONLY) + while True: + time.sleep(1) + """).format(path=path) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + + self.wait_for_visible(basename) + + return rproc + + def wait_for_dir_empty(self, dirname, timeout=30): + dirpath = os.path.join(self.hostfs_mntpt, dirname) + with safe_while(sleep=5, tries=(timeout//5)) as proceed: + while proceed(): + p = self.run_shell_payload(f"stat -c %h {dirpath}") + nr_links = int(p.stdout.getvalue().strip()) + if nr_links == 2: + return + + def wait_for_visible(self, basename="background_file", size=None, timeout=30): + i = 0 + args = ['stat'] + if size is not None: + args += ['--printf=%s'] + args += [os.path.join(self.hostfs_mntpt, basename)] + while i < timeout: + p = self.client_remote.run(args=args, stdout=StringIO(), check_status=False) + if p.exitstatus == 0: + if size is not None: + s = p.stdout.getvalue().strip() + if int(s) == size: + log.info(f"File {basename} became visible with size {size} from {self.client_id} after {i}s") + return + else: + log.error(f"File {basename} became visible but with size {int(s)} not {size}") + else: + log.info(f"File {basename} became visible from {self.client_id} after {i}s") + return + time.sleep(1) + i += 1 + + raise RuntimeError("Timed out after {0}s waiting for {1} to become visible from {2}".format( + i, basename, self.client_id)) + + def lock_background(self, basename="background_file", do_flock=True): + """ + Open and lock a files for writing, hold the lock in a background process + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + script_builder = """ + import time + import fcntl + import struct""" + if do_flock: + script_builder += """ + f1 = open("{path}-1", 'w') + fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB)""" + script_builder += """ + f2 = open("{path}-2", 'w') + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) + fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) + while True: + time.sleep(1) + """ + + pyscript = dedent(script_builder).format(path=path) + + log.info("lock_background file {0}".format(basename)) + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + return rproc + + def lock_and_release(self, basename="background_file"): + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + script = """ + import time + import fcntl + import struct + f1 = open("{path}-1", 'w') + fcntl.flock(f1, fcntl.LOCK_EX) + f2 = open("{path}-2", 'w') + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) + fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) + """ + pyscript = dedent(script).format(path=path) + + log.info("lock_and_release file {0}".format(basename)) + return self._run_python(pyscript) + + def check_filelock(self, basename="background_file", do_flock=True): + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + script_builder = """ + import fcntl + import errno + import struct""" + if do_flock: + script_builder += """ + f1 = open("{path}-1", 'r') + try: + fcntl.flock(f1, fcntl.LOCK_EX | fcntl.LOCK_NB) + except IOError as e: + if e.errno == errno.EAGAIN: + pass + else: + raise RuntimeError("flock on file {path}-1 not found")""" + script_builder += """ + f2 = open("{path}-2", 'r') + try: + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) + fcntl.fcntl(f2, fcntl.F_SETLK, lockdata) + except IOError as e: + if e.errno == errno.EAGAIN: + pass + else: + raise RuntimeError("posix lock on file {path}-2 not found") + """ + pyscript = dedent(script_builder).format(path=path) + + log.info("check lock on file {0}".format(basename)) + self.client_remote.run(args=[ + 'python3', '-c', pyscript + ]) + + def write_background(self, basename="background_file", loop=False): + """ + Open a file for writing, complete as soon as you can + :param basename: + :return: + """ + assert(self.is_mounted()) + + path = os.path.join(self.hostfs_mntpt, basename) + + pyscript = dedent(""" + import os + import time + + fd = os.open("{path}", os.O_RDWR | os.O_CREAT, 0o644) + try: + while True: + os.write(fd, b'content') + time.sleep(1) + if not {loop}: + break + except IOError as e: + pass + os.close(fd) + """).format(path=path, loop=str(loop)) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + return rproc + + def write_n_mb(self, filename, n_mb, seek=0, wait=True): + """ + Write the requested number of megabytes to a file + """ + assert(self.is_mounted()) + + return self.run_shell(["dd", "if=/dev/urandom", "of={0}".format(filename), + "bs=1M", "conv=fdatasync", + "count={0}".format(int(n_mb)), + "seek={0}".format(int(seek)) + ], wait=wait) + + def write_test_pattern(self, filename, size): + log.info("Writing {0} bytes to {1}".format(size, filename)) + return self.run_python(dedent(""" + import zlib + path = "{path}" + with open(path, 'w') as f: + for i in range(0, {size}): + val = zlib.crc32(str(i).encode('utf-8')) & 7 + f.write(chr(val)) + """.format( + path=os.path.join(self.hostfs_mntpt, filename), + size=size + ))) + + def validate_test_pattern(self, filename, size): + log.info("Validating {0} bytes from {1}".format(size, filename)) + # Use sudo because cephfs-data-scan may recreate the file with owner==root + return self.run_python(dedent(""" + import zlib + path = "{path}" + with open(path, 'r') as f: + bytes = f.read() + if len(bytes) != {size}: + raise RuntimeError("Bad length {{0}} vs. expected {{1}}".format( + len(bytes), {size} + )) + for i, b in enumerate(bytes): + val = zlib.crc32(str(i).encode('utf-8')) & 7 + if b != chr(val): + raise RuntimeError("Bad data at offset {{0}}".format(i)) + """.format( + path=os.path.join(self.hostfs_mntpt, filename), + size=size + )), sudo=True) + + def open_n_background(self, fs_path, count): + """ + Open N files for writing, hold them open in a background process + + :param fs_path: Path relative to CephFS root, e.g. "foo/bar" + :return: a RemoteProcess + """ + assert(self.is_mounted()) + + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + + pyscript = dedent(""" + import sys + import time + import os + + n = {count} + abs_path = "{abs_path}" + + if not os.path.exists(abs_path): + os.makedirs(abs_path) + + handles = [] + for i in range(0, n): + fname = "file_"+str(i) + path = os.path.join(abs_path, fname) + handles.append(open(path, 'w')) + + while True: + time.sleep(1) + """).format(abs_path=abs_path, count=count) + + rproc = self._run_python(pyscript) + self.background_procs.append(rproc) + return rproc + + def create_n_files(self, fs_path, count, sync=False, dirsync=False, + unlink=False, finaldirsync=False, hard_links=0): + """ + Create n files. + + :param sync: sync the file after writing + :param dirsync: sync the containing directory after closing the file + :param unlink: unlink the file after closing + :param finaldirsync: sync the containing directory after closing the last file + :param hard_links: create given number of hard link(s) for each file + """ + + assert(self.is_mounted()) + + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + + pyscript = dedent(f""" + import os + import uuid + + n = {count} + create_hard_links = False + if {hard_links} > 0: + create_hard_links = True + path = "{abs_path}" + + dpath = os.path.dirname(path) + fnameprefix = os.path.basename(path) + os.makedirs(dpath, exist_ok=True) + + try: + dirfd = os.open(dpath, os.O_DIRECTORY) + + for i in range(n): + fpath = os.path.join(dpath, f"{{fnameprefix}}_{{i}}") + with open(fpath, 'w') as f: + f.write(f"{{i}}") + if {sync}: + f.flush() + os.fsync(f.fileno()) + if {unlink}: + os.unlink(fpath) + if {dirsync}: + os.fsync(dirfd) + if create_hard_links: + for j in range({hard_links}): + os.system(f"ln {{fpath}} {{dpath}}/{{fnameprefix}}_{{i}}_{{uuid.uuid4()}}") + if {finaldirsync}: + os.fsync(dirfd) + finally: + os.close(dirfd) + """) + + self.run_python(pyscript) + + def teardown(self): + for p in self.background_procs: + log.info("Terminating background process") + self._kill_background(p) + + self.background_procs = [] + + def _kill_background(self, p): + if p.stdin: + p.stdin.close() + try: + p.wait() + except (CommandFailedError, ConnectionLostError): + pass + + def kill_background(self, p): + """ + For a process that was returned by one of the _background member functions, + kill it hard. + """ + self._kill_background(p) + self.background_procs.remove(p) + + def send_signal(self, signal): + signal = signal.lower() + if signal.lower() not in ['sigstop', 'sigcont', 'sigterm', 'sigkill']: + raise NotImplementedError + + self.client_remote.run(args=['sudo', 'kill', '-{0}'.format(signal), + self.client_pid], omit_sudo=False) + + def get_global_id(self): + raise NotImplementedError() + + def get_global_inst(self): + raise NotImplementedError() + + def get_global_addr(self): + raise NotImplementedError() + + def get_osd_epoch(self): + raise NotImplementedError() + + def get_op_read_count(self): + raise NotImplementedError() + + def readlink(self, fs_path): + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + + pyscript = dedent(""" + import os + + print(os.readlink("{path}")) + """).format(path=abs_path) + + proc = self._run_python(pyscript) + proc.wait() + return str(proc.stdout.getvalue().strip()) + + + def lstat(self, fs_path, follow_symlinks=False, wait=True): + return self.stat(fs_path, follow_symlinks=False, wait=True) + + def stat(self, fs_path, follow_symlinks=True, wait=True, **kwargs): + """ + stat a file, and return the result as a dictionary like this: + { + "st_ctime": 1414161137.0, + "st_mtime": 1414161137.0, + "st_nlink": 33, + "st_gid": 0, + "st_dev": 16777218, + "st_size": 1190, + "st_ino": 2, + "st_uid": 0, + "st_mode": 16877, + "st_atime": 1431520593.0 + } + + Raises exception on absent file. + """ + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + if follow_symlinks: + stat_call = "os.stat('" + abs_path + "')" + else: + stat_call = "os.lstat('" + abs_path + "')" + + pyscript = dedent(""" + import os + import stat + import json + import sys + + try: + s = {stat_call} + except OSError as e: + sys.exit(e.errno) + + attrs = ["st_mode", "st_ino", "st_dev", "st_nlink", "st_uid", "st_gid", "st_size", "st_atime", "st_mtime", "st_ctime"] + print(json.dumps( + dict([(a, getattr(s, a)) for a in attrs]), + indent=2)) + """).format(stat_call=stat_call) + proc = self._run_python(pyscript, **kwargs) + if wait: + proc.wait() + return json.loads(proc.stdout.getvalue().strip()) + else: + return proc + + def touch(self, fs_path): + """ + Create a dentry if it doesn't already exist. This python + implementation exists because the usual command line tool doesn't + pass through error codes like EIO. + + :param fs_path: + :return: + """ + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + pyscript = dedent(""" + import sys + import errno + + try: + f = open("{path}", "w") + f.close() + except IOError as e: + sys.exit(errno.EIO) + """).format(path=abs_path) + proc = self._run_python(pyscript) + proc.wait() + + def path_to_ino(self, fs_path, follow_symlinks=True): + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + + if follow_symlinks: + pyscript = dedent(""" + import os + import stat + + print(os.stat("{path}").st_ino) + """).format(path=abs_path) + else: + pyscript = dedent(""" + import os + import stat + + print(os.lstat("{path}").st_ino) + """).format(path=abs_path) + + proc = self._run_python(pyscript) + proc.wait() + return int(proc.stdout.getvalue().strip()) + + def path_to_nlink(self, fs_path): + abs_path = os.path.join(self.hostfs_mntpt, fs_path) + + pyscript = dedent(""" + import os + import stat + + print(os.stat("{path}").st_nlink) + """).format(path=abs_path) + + proc = self._run_python(pyscript) + proc.wait() + return int(proc.stdout.getvalue().strip()) + + def ls(self, path=None, **kwargs): + """ + Wrap ls: return a list of strings + """ + kwargs['args'] = ["ls"] + if path: + kwargs['args'].append(path) + if kwargs.pop('sudo', False): + kwargs['args'].insert(0, 'sudo') + kwargs['omit_sudo'] = False + ls_text = self.run_shell(**kwargs).stdout.getvalue().strip() + + if ls_text: + return ls_text.split("\n") + else: + # Special case because otherwise split on empty string + # gives you [''] instead of [] + return [] + + def setfattr(self, path, key, val, **kwargs): + """ + Wrap setfattr. + + :param path: relative to mount point + :param key: xattr name + :param val: xattr value + :return: None + """ + kwargs['args'] = ["setfattr", "-n", key, "-v", val, path] + if kwargs.pop('sudo', False): + kwargs['args'].insert(0, 'sudo') + kwargs['omit_sudo'] = False + self.run_shell(**kwargs) + + def getfattr(self, path, attr, **kwargs): + """ + Wrap getfattr: return the values of a named xattr on one file, or + None if the attribute is not found. + + :return: a string + """ + kwargs['args'] = ["getfattr", "--only-values", "-n", attr, path] + if kwargs.pop('sudo', False): + kwargs['args'].insert(0, 'sudo') + kwargs['omit_sudo'] = False + kwargs['wait'] = False + p = self.run_shell(**kwargs) + try: + p.wait() + except CommandFailedError as e: + if e.exitstatus == 1 and "No such attribute" in p.stderr.getvalue(): + return None + else: + raise + + return str(p.stdout.getvalue()) + + def df(self): + """ + Wrap df: return a dict of usage fields in bytes + """ + + p = self.run_shell(["df", "-B1", "."]) + lines = p.stdout.getvalue().strip().split("\n") + fs, total, used, avail = lines[1].split()[:4] + log.warning(lines) + + return { + "total": int(total), + "used": int(used), + "available": int(avail) + } + + def dir_checksum(self, path=None, follow_symlinks=False): + cmd = ["find"] + if follow_symlinks: + cmd.append("-L") + if path: + cmd.append(path) + cmd.extend(["-type", "f", "-exec", "md5sum", "{}", "+"]) + checksum_text = self.run_shell(cmd).stdout.getvalue().strip() + checksum_sorted = sorted(checksum_text.split('\n'), key=lambda v: v.split()[1]) + return hashlib.md5(('\n'.join(checksum_sorted)).encode('utf-8')).hexdigest() + + def validate_subvol_options(self): + mount_subvol_num = self.client_config.get('mount_subvol_num', None) + if self.cephfs_mntpt and mount_subvol_num is not None: + log.warning("You cannot specify both: cephfs_mntpt and mount_subvol_num") + log.info(f"Mounting subvol {mount_subvol_num} for now") + + if mount_subvol_num is not None: + # mount_subvol must be an index into the subvol path array for the fs + if not self.cephfs_name: + self.cephfs_name = 'cephfs' + assert(hasattr(self.ctx, "created_subvols")) + # mount_subvol must be specified under client.[0-9] yaml section + subvol_paths = self.ctx.created_subvols[self.cephfs_name] + path_to_mount = subvol_paths[mount_subvol_num] + self.cephfs_mntpt = path_to_mount diff --git a/qa/tasks/cephfs/test_acls.py b/qa/tasks/cephfs/test_acls.py new file mode 100644 index 000000000..48160dd8b --- /dev/null +++ b/qa/tasks/cephfs/test_acls.py @@ -0,0 +1,39 @@ +from logging import getLogger + +from io import StringIO +from tasks.cephfs.xfstests_dev import XFSTestsDev + + +log = getLogger(__name__) + + +class TestACLs(XFSTestsDev): + + def test_acls(self): + from tasks.cephfs.fuse_mount import FuseMount + from tasks.cephfs.kernel_mount import KernelMount + + if isinstance(self.mount_a, FuseMount): + log.info('client is fuse mounted') + elif isinstance(self.mount_a, KernelMount): + log.info('client is kernel mounted') + + # XXX: check_status is set to False so that we can check for command's + # failure on our own (since this command doesn't set right error code + # and error message in some cases) and print custom log messages + # accordingly. + proc = self.mount_a.client_remote.run(args=['sudo', 'env', 'DIFF_LENGTH=0', + './check', 'generic/099'], cwd=self.xfstests_repo_path, stdout=StringIO(), + stderr=StringIO(), timeout=30, check_status=False,omit_sudo=False, + label='running tests for ACLs from xfstests-dev') + + if proc.returncode != 0: + log.info('Command failed.') + log.info(f'Command return value: {proc.returncode}') + stdout, stderr = proc.stdout.getvalue(), proc.stderr.getvalue() + log.info(f'Command stdout -\n{stdout}') + log.info(f'Command stderr -\n{stderr}') + + self.assertEqual(proc.returncode, 0) + success_line = 'Passed all 1 tests' + self.assertIn(success_line, stdout) diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py new file mode 100644 index 000000000..9890381c6 --- /dev/null +++ b/qa/tasks/cephfs/test_admin.py @@ -0,0 +1,1494 @@ +import errno +import json +import logging +import time +import uuid +from io import StringIO +from os.path import join as os_path_join + +from teuthology.exceptions import CommandFailedError + +from tasks.cephfs.cephfs_test_case import CephFSTestCase, classhook +from tasks.cephfs.filesystem import FileLayout, FSMissing +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.caps_helper import CapTester + +log = logging.getLogger(__name__) + +class TestAdminCommands(CephFSTestCase): + """ + Tests for administration command. + """ + + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + + def check_pool_application_metadata_key_value(self, pool, app, key, value): + output = self.fs.mon_manager.raw_cluster_cmd( + 'osd', 'pool', 'application', 'get', pool, app, key) + self.assertEqual(str(output.strip()), value) + + def setup_ec_pools(self, n, metadata=True, overwrites=True): + if metadata: + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-meta", "8") + cmd = ['osd', 'erasure-code-profile', 'set', n+"-profile", "m=2", "k=2", "crush-failure-domain=osd"] + self.fs.mon_manager.raw_cluster_cmd(*cmd) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-data", "8", "erasure", n+"-profile") + if overwrites: + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true') + +@classhook('_add_valid_tell') +class TestValidTell(TestAdminCommands): + @classmethod + def _add_valid_tell(cls): + tells = [ + ['cache', 'status'], + ['damage', 'ls'], + ['dump_blocked_ops'], + ['dump_blocked_ops_count'], + ['dump_historic_ops'], + ['dump_historic_ops_by_duration'], + ['dump_mempools'], + ['dump_ops_in_flight'], + ['flush', 'journal'], + ['get', 'subtrees'], + ['ops', 'locks'], + ['ops'], + ['status'], + ['version'], + ] + def test(c): + def f(self): + J = self.fs.rank_tell(c) + json.dumps(J) + log.debug("dumped:\n%s", str(J)) + return f + for c in tells: + setattr(cls, 'test_valid_' + '_'.join(c), test(c)) + +class TestFsStatus(TestAdminCommands): + """ + Test "ceph fs status subcommand. + """ + + def test_fs_status(self): + """ + That `ceph fs status` command functions. + """ + + s = self.fs.mon_manager.raw_cluster_cmd("fs", "status") + self.assertTrue("active" in s) + + mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json-pretty"))["mdsmap"] + self.assertEqual(mdsmap[0]["state"], "active") + + mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json"))["mdsmap"] + self.assertEqual(mdsmap[0]["state"], "active") + + +class TestAddDataPool(TestAdminCommands): + """ + Test "ceph fs add_data_pool" subcommand. + """ + + def test_add_data_pool_root(self): + """ + That a new data pool can be added and used for the root directory. + """ + + p = self.fs.add_data_pool("foo") + self.fs.set_dir_layout(self.mount_a, ".", FileLayout(pool=p)) + + def test_add_data_pool_application_metadata(self): + """ + That the application metadata set on a newly added data pool is as expected. + """ + pool_name = "foo" + mon_cmd = self.fs.mon_manager.raw_cluster_cmd + mon_cmd('osd', 'pool', 'create', pool_name, '--pg_num_min', + str(self.fs.pg_num_min)) + # Check whether https://tracker.ceph.com/issues/43061 is fixed + mon_cmd('osd', 'pool', 'application', 'enable', pool_name, 'cephfs') + self.fs.add_data_pool(pool_name, create=False) + self.check_pool_application_metadata_key_value( + pool_name, 'cephfs', 'data', self.fs.name) + + def test_add_data_pool_subdir(self): + """ + That a new data pool can be added and used for a sub-directory. + """ + + p = self.fs.add_data_pool("foo") + self.mount_a.run_shell("mkdir subdir") + self.fs.set_dir_layout(self.mount_a, "subdir", FileLayout(pool=p)) + + def test_add_data_pool_non_alphamueric_name_as_subdir(self): + """ + That a new data pool with non-alphanumeric name can be added and used for a sub-directory. + """ + p = self.fs.add_data_pool("I-am-data_pool00.") + self.mount_a.run_shell("mkdir subdir") + self.fs.set_dir_layout(self.mount_a, "subdir", FileLayout(pool=p)) + + def test_add_data_pool_ec(self): + """ + That a new EC data pool can be added. + """ + + n = "test_add_data_pool_ec" + self.setup_ec_pools(n, metadata=False) + self.fs.add_data_pool(n+"-data", create=False) + + def test_add_already_in_use_data_pool(self): + """ + That command try to add data pool which is already in use with another fs. + """ + + # create first data pool, metadata pool and add with filesystem + first_fs = "first_fs" + first_metadata_pool = "first_metadata_pool" + first_data_pool = "first_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + + # create second data pool, metadata pool and add with filesystem + second_fs = "second_fs" + second_metadata_pool = "second_metadata_pool" + second_data_pool = "second_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) + + # try to add 'first_data_pool' with 'second_fs' + # Expecting EINVAL exit status because 'first_data_pool' is already in use with 'first_fs' + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', second_fs, first_data_pool) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + self.fail("Expected EINVAL because data pool is already in use as data pool for first_fs") + + def test_add_already_in_use_metadata_pool(self): + """ + That command try to add metadata pool which is already in use with another fs. + """ + + # create first data pool, metadata pool and add with filesystem + first_fs = "first_fs" + first_metadata_pool = "first_metadata_pool" + first_data_pool = "first_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + + # create second data pool, metadata pool and add with filesystem + second_fs = "second_fs" + second_metadata_pool = "second_metadata_pool" + second_data_pool = "second_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) + + # try to add 'second_metadata_pool' with 'first_fs' as a data pool + # Expecting EINVAL exit status because 'second_metadata_pool' + # is already in use with 'second_fs' as a metadata pool + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', first_fs, second_metadata_pool) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + self.fail("Expected EINVAL because data pool is already in use as metadata pool for 'second_fs'") + +class TestFsNew(TestAdminCommands): + """ + Test "ceph fs new" subcommand. + """ + MDSS_REQUIRED = 3 + + def test_fsnames_can_only_by_goodchars(self): + n = 'test_fsnames_can_only_by_goodchars' + metapoolname, datapoolname = n+'-testmetapool', n+'-testdatapool' + badname = n+'badname@#' + + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + n+metapoolname) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + n+datapoolname) + + # test that fsname not with "goodchars" fails + args = ['fs', 'new', badname, metapoolname, datapoolname] + proc = self.fs.mon_manager.run_cluster_cmd(args=args,stderr=StringIO(), + check_status=False) + self.assertIn('invalid chars', proc.stderr.getvalue().lower()) + + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', metapoolname, + metapoolname, + '--yes-i-really-really-mean-it-not-faking') + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', datapoolname, + datapoolname, + '--yes-i-really-really-mean-it-not-faking') + + def test_new_default_ec(self): + """ + That a new file system warns/fails with an EC default data pool. + """ + + self.mount_a.umount_wait(require_clean=True) + self.mds_cluster.delete_all_filesystems() + n = "test_new_default_ec" + self.setup_ec_pools(n) + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data") + except CommandFailedError as e: + if e.exitstatus == 22: + pass + else: + raise + else: + raise RuntimeError("expected failure") + + def test_new_default_ec_force(self): + """ + That a new file system succeeds with an EC default data pool with --force. + """ + + self.mount_a.umount_wait(require_clean=True) + self.mds_cluster.delete_all_filesystems() + n = "test_new_default_ec_force" + self.setup_ec_pools(n) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force") + + def test_new_default_ec_no_overwrite(self): + """ + That a new file system fails with an EC default data pool without overwrite. + """ + + self.mount_a.umount_wait(require_clean=True) + self.mds_cluster.delete_all_filesystems() + n = "test_new_default_ec_no_overwrite" + self.setup_ec_pools(n, overwrites=False) + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data") + except CommandFailedError as e: + if e.exitstatus == 22: + pass + else: + raise + else: + raise RuntimeError("expected failure") + # and even with --force ! + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force") + except CommandFailedError as e: + if e.exitstatus == 22: + pass + else: + raise + else: + raise RuntimeError("expected failure") + + def test_fs_new_pool_application_metadata(self): + """ + That the application metadata set on the pools of a newly created filesystem are as expected. + """ + self.mount_a.umount_wait(require_clean=True) + self.mds_cluster.delete_all_filesystems() + fs_name = "test_fs_new_pool_application" + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + mon_cmd = self.fs.mon_manager.raw_cluster_cmd + for p in pool_names: + mon_cmd('osd', 'pool', 'create', p, '--pg_num_min', str(self.fs.pg_num_min)) + mon_cmd('osd', 'pool', 'application', 'enable', p, 'cephfs') + mon_cmd('fs', 'new', fs_name, pool_names[0], pool_names[1]) + for i in range(2): + self.check_pool_application_metadata_key_value( + pool_names[i], 'cephfs', keys[i], fs_name) + + def test_fs_new_with_specific_id(self): + """ + That a file system can be created with a specific ID. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.fs.status().get_fsmap(fscid) + for i in range(2): + self.check_pool_application_metadata_key_value(pool_names[i], 'cephfs', keys[i], fs_name) + + def test_fs_new_with_specific_id_idempotency(self): + """ + That command to create file system with specific ID is idempotent. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.fs.status().get_fsmap(fscid) + + def test_fs_new_with_specific_id_fails_without_force_flag(self): + """ + That command to create file system with specific ID fails without '--force' flag. + """ + fs_name = "test_fs_specific_id" + fscid = 100 + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + try: + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid}') + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on creating a file system with specifc ID without --force flag") + else: + self.fail("expected creating file system with specific ID without '--force' flag to fail") + + def test_fs_new_with_specific_id_fails_already_in_use(self): + """ + That creating file system with ID already in use fails. + """ + fs_name = "test_fs_specific_id" + # file system ID already in use + fscid = self.fs.status().map['filesystems'][0]['id'] + keys = ['metadata', 'data'] + pool_names = [fs_name+'-'+key for key in keys] + for p in pool_names: + self.run_cluster_cmd(f'osd pool create {p}') + try: + self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on creating a file system with specifc ID that is already in use") + else: + self.fail("expected creating file system with ID already in use to fail") + + def test_fs_new_metadata_pool_already_in_use(self): + """ + That creating file system with metadata pool already in use. + """ + + # create first data pool, metadata pool and add with filesystem + first_fs = "first_fs" + first_metadata_pool = "first_metadata_pool" + first_data_pool = "first_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + + second_fs = "second_fs" + second_data_pool = "second_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool) + + # try to create new fs 'second_fs' with following configuration + # metadata pool -> 'first_metadata_pool' + # data pool -> 'second_data_pool' + # Expecting EINVAL exit status because 'first_metadata_pool' + # is already in use with 'first_fs' + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_metadata_pool, second_data_pool) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + self.fail("Expected EINVAL because metadata pool is already in use for 'first_fs'") + + def test_fs_new_data_pool_already_in_use(self): + """ + That creating file system with data pool already in use. + """ + + # create first data pool, metadata pool and add with filesystem + first_fs = "first_fs" + first_metadata_pool = "first_metadata_pool" + first_data_pool = "first_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + + second_fs = "second_fs" + second_metadata_pool = "second_metadata_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool) + + # try to create new fs 'second_fs' with following configuration + # metadata pool -> 'second_metadata_pool' + # data pool -> 'first_data_pool' + # Expecting EINVAL exit status because 'first_data_pool' + # is already in use with 'first_fs' + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, first_data_pool) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + self.fail("Expected EINVAL because data pool is already in use for 'first_fs'") + + def test_fs_new_metadata_and_data_pool_in_use_by_another_same_fs(self): + """ + That creating file system with metadata and data pool which is already in use by another same fs. + """ + + # create first data pool, metadata pool and add with filesystem + first_fs = "first_fs" + first_metadata_pool = "first_metadata_pool" + first_data_pool = "first_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + + second_fs = "second_fs" + + # try to create new fs 'second_fs' with following configuration + # metadata pool -> 'first_metadata_pool' + # data pool -> 'first_data_pool' + # Expecting EINVAL exit status because 'first_metadata_pool' and 'first_data_pool' + # is already in use with 'first_fs' + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_metadata_pool, first_data_pool) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs'") + + def test_fs_new_metadata_and_data_pool_in_use_by_different_fs(self): + """ + That creating file system with metadata and data pool which is already in use by different fs. + """ + + # create first data pool, metadata pool and add with filesystem + first_fs = "first_fs" + first_metadata_pool = "first_metadata_pool" + first_data_pool = "first_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + + # create second data pool, metadata pool and add with filesystem + second_fs = "second_fs" + second_metadata_pool = "second_metadata_pool" + second_data_pool = "second_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) + + third_fs = "third_fs" + + # try to create new fs 'third_fs' with following configuration + # metadata pool -> 'first_metadata_pool' + # data pool -> 'second_data_pool' + # Expecting EINVAL exit status because 'first_metadata_pool' and 'second_data_pool' + # is already in use with 'first_fs' and 'second_fs' + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', third_fs, first_metadata_pool, second_data_pool) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs' and 'second_fs'") + + def test_fs_new_interchange_already_in_use_metadata_and_data_pool_of_same_fs(self): + """ + That creating file system with interchanging metadata and data pool which is already in use by same fs. + """ + + # create first data pool, metadata pool and add with filesystem + first_fs = "first_fs" + first_metadata_pool = "first_metadata_pool" + first_data_pool = "first_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + + second_fs = "second_fs" + + # try to create new fs 'second_fs' with following configuration + # metadata pool -> 'first_data_pool' (already used as data pool for 'first_fs') + # data pool -> 'first_metadata_pool' (already used as metadata pool for 'first_fs') + # Expecting EINVAL exit status because 'first_data_pool' and 'first_metadata_pool' + # is already in use with 'first_fs' + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_data_pool, first_metadata_pool) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs'") + + def test_fs_new_interchange_already_in_use_metadata_and_data_pool_of_different_fs(self): + """ + That creating file system with interchanging metadata and data pool which is already in use by defferent fs. + """ + + # create first data pool, metadata pool and add with filesystem + first_fs = "first_fs" + first_metadata_pool = "first_metadata_pool" + first_data_pool = "first_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + + # create second data pool, metadata pool and add with filesystem + second_fs = "second_fs" + second_metadata_pool = "second_metadata_pool" + second_data_pool = "second_data_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool) + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) + + third_fs = "third_fs" + + # try to create new fs 'third_fs' with following configuration + # metadata pool -> 'first_data_pool' (already used as data pool for 'first_fs') + # data pool -> 'second_metadata_pool' (already used as metadata pool for 'second_fs') + # Expecting EINVAL exit status because 'first_data_pool' and 'second_metadata_pool' + # is already in use with 'first_fs' and 'second_fs' + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', third_fs, first_data_pool, second_metadata_pool) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + self.fail("Expected EINVAL because metadata and data pool is already in use for 'first_fs' and 'second_fs'") + + def test_fs_new_metadata_pool_already_in_use_with_rbd(self): + """ + That creating new file system with metadata pool already used by rbd. + """ + + # create pool and initialise with rbd + new_pool = "new_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_pool) + self.ctx.cluster.run(args=['rbd', 'pool', 'init', new_pool]) + + new_fs = "new_fs" + new_data_pool = "new_data_pool" + + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_data_pool) + + # try to create new fs 'new_fs' with following configuration + # metadata pool -> 'new_pool' (already used by rbd app) + # data pool -> 'new_data_pool' + # Expecting EINVAL exit status because 'new_pool' is already in use with 'rbd' app + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', new_fs, new_pool, new_data_pool) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + self.fail("Expected EINVAL because metadata pool is already in use for rbd") + + def test_fs_new_data_pool_already_in_use_with_rbd(self): + """ + That creating new file system with data pool already used by rbd. + """ + + # create pool and initialise with rbd + new_pool = "new_pool" + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_pool) + self.ctx.cluster.run(args=['rbd', 'pool', 'init', new_pool]) + + new_fs = "new_fs" + new_metadata_pool = "new_metadata_pool" + + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_metadata_pool) + + # try to create new fs 'new_fs' with following configuration + # metadata pool -> 'new_metadata_pool' + # data pool -> 'new_pool' (already used by rbd app) + # Expecting EINVAL exit status because 'new_pool' is already in use with 'rbd' app + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', new_fs, new_metadata_pool, new_pool) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + self.fail("Expected EINVAL because data pool is already in use for rbd") + +class TestRenameCommand(TestAdminCommands): + """ + Tests for rename command. + """ + + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 2 + + def test_fs_rename(self): + """ + That the file system can be renamed, and the application metadata set on its pools are as expected. + """ + # Renaming the file system breaks this mount as the client uses + # file system specific authorization. The client cannot read + # or write even if the client's cephx ID caps are updated to access + # the new file system name without the client being unmounted and + # re-mounted. + self.mount_a.umount_wait(require_clean=True) + orig_fs_name = self.fs.name + new_fs_name = 'new_cephfs' + client_id = 'test_new_cephfs' + + self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') + + # authorize a cephx ID access to the renamed file system. + # use the ID to write to the file system. + self.fs.name = new_fs_name + keyring = self.fs.authorize(client_id, ('/', 'rw')) + keyring_path = self.mount_a.client_remote.mktemp(data=keyring) + self.mount_a.remount(client_id=client_id, + client_keyring_path=keyring_path, + cephfs_mntpt='/', + cephfs_name=self.fs.name) + filedata, filename = 'some data on fs', 'file_on_fs' + filepath = os_path_join(self.mount_a.hostfs_mntpt, filename) + self.mount_a.write_file(filepath, filedata) + self.check_pool_application_metadata_key_value( + self.fs.get_data_pool_name(), 'cephfs', 'data', new_fs_name) + self.check_pool_application_metadata_key_value( + self.fs.get_metadata_pool_name(), 'cephfs', 'metadata', new_fs_name) + + # cleanup + self.mount_a.umount_wait() + self.run_cluster_cmd(f'auth rm client.{client_id}') + + def test_fs_rename_idempotency(self): + """ + That the file system rename operation is idempotent. + """ + # Renaming the file system breaks this mount as the client uses + # file system specific authorization. + self.mount_a.umount_wait(require_clean=True) + orig_fs_name = self.fs.name + new_fs_name = 'new_cephfs' + + self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') + self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') + + # original file system name does not appear in `fs ls` command + self.assertFalse(self.fs.exists()) + self.fs.name = new_fs_name + self.assertTrue(self.fs.exists()) + + def test_fs_rename_fs_new_fails_with_old_fsname_existing_pools(self): + """ + That after renaming a file system, creating a file system with + old name and existing FS pools fails. + """ + # Renaming the file system breaks this mount as the client uses + # file system specific authorization. + self.mount_a.umount_wait(require_clean=True) + orig_fs_name = self.fs.name + new_fs_name = 'new_cephfs' + data_pool = self.fs.get_data_pool_name() + metadata_pool = self.fs.get_metadata_pool_name() + self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') + + try: + self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool}") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on creating a new file system with old " + "name and existing pools.") + else: + self.fail("expected creating new file system with old name and " + "existing pools to fail.") + + try: + self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} --force") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on creating a new file system with old " + "name, existing pools and --force flag.") + else: + self.fail("expected creating new file system with old name, " + "existing pools, and --force flag to fail.") + + try: + self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} " + "--allow-dangerous-metadata-overlay") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on creating a new file system with old name, " + "existing pools and --allow-dangerous-metadata-overlay flag.") + else: + self.fail("expected creating new file system with old name, " + "existing pools, and --allow-dangerous-metadata-overlay flag to fail.") + + def test_fs_rename_fails_without_yes_i_really_mean_it_flag(self): + """ + That renaming a file system without '--yes-i-really-mean-it' flag fails. + """ + try: + self.run_cluster_cmd(f"fs rename {self.fs.name} new_fs") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, + "invalid error code on renaming a file system without the " + "'--yes-i-really-mean-it' flag") + else: + self.fail("expected renaming of file system without the " + "'--yes-i-really-mean-it' flag to fail ") + + def test_fs_rename_fails_for_non_existent_fs(self): + """ + That renaming a non-existent file system fails. + """ + try: + self.run_cluster_cmd("fs rename non_existent_fs new_fs --yes-i-really-mean-it") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on renaming a non-existent fs") + else: + self.fail("expected renaming of a non-existent file system to fail") + + def test_fs_rename_fails_new_name_already_in_use(self): + """ + That renaming a file system fails if the new name refers to an existing file system. + """ + self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True) + + try: + self.run_cluster_cmd(f"fs rename {self.fs.name} {self.fs2.name} --yes-i-really-mean-it") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on renaming to a fs name that is already in use") + else: + self.fail("expected renaming to a new file system name that is already in use to fail.") + + def test_fs_rename_fails_with_mirroring_enabled(self): + """ + That renaming a file system fails if mirroring is enabled on it. + """ + orig_fs_name = self.fs.name + new_fs_name = 'new_cephfs' + + self.run_cluster_cmd(f'fs mirror enable {orig_fs_name}') + try: + self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, "invalid error code on renaming a mirrored file system") + else: + self.fail("expected renaming of a mirrored file system to fail") + self.run_cluster_cmd(f'fs mirror disable {orig_fs_name}') + + +class TestDump(CephFSTestCase): + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 1 + + def test_fs_dump_epoch(self): + """ + That dumping a specific epoch works. + """ + + status1 = self.fs.status() + status2 = self.fs.status(epoch=status1["epoch"]-1) + self.assertEqual(status1["epoch"], status2["epoch"]+1) + + def test_fsmap_trim(self): + """ + That the fsmap is trimmed normally. + """ + + paxos_service_trim_min = 25 + self.config_set('mon', 'paxos_service_trim_min', paxos_service_trim_min) + mon_max_mdsmap_epochs = 20 + self.config_set('mon', 'mon_max_mdsmap_epochs', mon_max_mdsmap_epochs) + + status = self.fs.status() + epoch = status["epoch"] + + # for N mutations + mutations = paxos_service_trim_min + mon_max_mdsmap_epochs + b = False + for i in range(mutations): + self.fs.set_joinable(b) + b = not b + + time.sleep(10) # for tick/compaction + + try: + self.fs.status(epoch=epoch) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT, "invalid error code when trying to fetch FSMap that was trimmed") + else: + self.fail("trimming did not occur as expected") + + def test_fsmap_force_trim(self): + """ + That the fsmap is trimmed forcefully. + """ + + status = self.fs.status() + epoch = status["epoch"] + + paxos_service_trim_min = 1 + self.config_set('mon', 'paxos_service_trim_min', paxos_service_trim_min) + mon_mds_force_trim_to = epoch+1 + self.config_set('mon', 'mon_mds_force_trim_to', mon_mds_force_trim_to) + + # force a new fsmap + self.fs.set_joinable(False) + time.sleep(10) # for tick/compaction + + status = self.fs.status() + log.debug(f"new epoch is {status['epoch']}") + self.fs.status(epoch=epoch+1) # epoch+1 is not trimmed, may not == status["epoch"] + + try: + self.fs.status(epoch=epoch) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT, "invalid error code when trying to fetch FSMap that was trimmed") + else: + self.fail("trimming did not occur as expected") + + +class TestRequiredClientFeatures(CephFSTestCase): + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 1 + + def test_required_client_features(self): + """ + That `ceph fs required_client_features` command functions. + """ + + def is_required(index): + out = self.fs.mon_manager.raw_cluster_cmd('fs', 'get', self.fs.name, '--format=json-pretty') + features = json.loads(out)['mdsmap']['required_client_features'] + if "feature_{0}".format(index) in features: + return True; + return False; + + features = json.loads(self.fs.mon_manager.raw_cluster_cmd('fs', 'feature', 'ls', '--format=json-pretty')) + self.assertGreater(len(features), 0); + + for f in features: + self.fs.required_client_features('rm', str(f['index'])) + + for f in features: + index = f['index'] + feature = f['name'] + if feature == 'reserved': + feature = str(index) + + if index % 3 == 0: + continue; + self.fs.required_client_features('add', feature) + self.assertTrue(is_required(index)) + + if index % 2 == 0: + continue; + self.fs.required_client_features('rm', feature) + self.assertFalse(is_required(index)) + + def test_required_client_feature_add_reserved(self): + """ + That `ceph fs required_client_features X add reserved` fails. + """ + + p = self.fs.required_client_features('add', 'reserved', check_status=False, stderr=StringIO()) + self.assertIn('Invalid feature name', p.stderr.getvalue()) + + def test_required_client_feature_rm_reserved(self): + """ + That `ceph fs required_client_features X rm reserved` fails. + """ + + p = self.fs.required_client_features('rm', 'reserved', check_status=False, stderr=StringIO()) + self.assertIn('Invalid feature name', p.stderr.getvalue()) + + def test_required_client_feature_add_reserved_bit(self): + """ + That `ceph fs required_client_features X add <reserved_bit>` passes. + """ + + p = self.fs.required_client_features('add', '1', stderr=StringIO()) + self.assertIn("added feature 'reserved' to required_client_features", p.stderr.getvalue()) + + def test_required_client_feature_rm_reserved_bit(self): + """ + That `ceph fs required_client_features X rm <reserved_bit>` passes. + """ + + self.fs.required_client_features('add', '1') + p = self.fs.required_client_features('rm', '1', stderr=StringIO()) + self.assertIn("removed feature 'reserved' from required_client_features", p.stderr.getvalue()) + +class TestCompatCommands(CephFSTestCase): + """ + """ + + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 3 + + def test_add_compat(self): + """ + Test adding a compat. + """ + + self.fs.fail() + self.fs.add_compat(63, 'placeholder') + mdsmap = self.fs.get_mds_map() + self.assertIn("feature_63", mdsmap['compat']['compat']) + + def test_add_incompat(self): + """ + Test adding an incompat. + """ + + self.fs.fail() + self.fs.add_incompat(63, 'placeholder') + mdsmap = self.fs.get_mds_map() + log.info(f"{mdsmap}") + self.assertIn("feature_63", mdsmap['compat']['incompat']) + + def test_rm_compat(self): + """ + Test removing a compat. + """ + + self.fs.fail() + self.fs.add_compat(63, 'placeholder') + self.fs.rm_compat(63) + mdsmap = self.fs.get_mds_map() + self.assertNotIn("feature_63", mdsmap['compat']['compat']) + + def test_rm_incompat(self): + """ + Test removing an incompat. + """ + + self.fs.fail() + self.fs.add_incompat(63, 'placeholder') + self.fs.rm_incompat(63) + mdsmap = self.fs.get_mds_map() + self.assertNotIn("feature_63", mdsmap['compat']['incompat']) + + def test_standby_compat(self): + """ + That adding a compat does not prevent standbys from joining. + """ + + self.fs.fail() + self.fs.add_compat(63, "placeholder") + self.fs.set_joinable() + self.fs.wait_for_daemons() + mdsmap = self.fs.get_mds_map() + self.assertIn("feature_63", mdsmap['compat']['compat']) + + def test_standby_incompat_reject(self): + """ + That adding an incompat feature prevents incompatible daemons from joining. + """ + + self.fs.fail() + self.fs.add_incompat(63, "placeholder") + self.fs.set_joinable() + try: + self.fs.wait_for_daemons(timeout=60) + except RuntimeError as e: + if "Timed out waiting for MDS daemons to become healthy" in str(e): + pass + else: + raise + else: + self.fail() + + def test_standby_incompat_upgrade(self): + """ + That an MDS can upgrade the compat of a fs. + """ + + self.fs.fail() + self.fs.rm_incompat(1) + self.fs.set_joinable() + self.fs.wait_for_daemons() + mdsmap = self.fs.get_mds_map() + self.assertIn("feature_1", mdsmap['compat']['incompat']) + + def test_standby_replay_not_upgradeable(self): + """ + That the mons will not upgrade the MDSMap compat if standby-replay is + enabled. + """ + + self.fs.fail() + self.fs.rm_incompat(1) + self.fs.set_allow_standby_replay(True) + self.fs.set_joinable() + try: + self.fs.wait_for_daemons(timeout=60) + except RuntimeError as e: + if "Timed out waiting for MDS daemons to become healthy" in str(e): + pass + else: + raise + else: + self.fail() + + def test_standby_incompat_reject_multifs(self): + """ + Like test_standby_incompat_reject but with a second fs. + """ + + fs2 = self.mds_cluster.newfs(name="cephfs2", create=True) + fs2.fail() + fs2.add_incompat(63, 'placeholder') + fs2.set_joinable() + try: + fs2.wait_for_daemons(timeout=60) + except RuntimeError as e: + if "Timed out waiting for MDS daemons to become healthy" in str(e): + pass + else: + raise + else: + self.fail() + # did self.fs lose MDS or standbys suicide? + self.fs.wait_for_daemons() + mdsmap = fs2.get_mds_map() + self.assertIn("feature_63", mdsmap['compat']['incompat']) + +class TestConfigCommands(CephFSTestCase): + """ + Test that daemons and clients respond to the otherwise rarely-used + runtime config modification operations. + """ + + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + + def test_ceph_config_show(self): + """ + That I can successfully show MDS configuration. + """ + + names = self.fs.get_rank_names() + for n in names: + s = self.fs.mon_manager.raw_cluster_cmd("config", "show", "mds."+n) + self.assertTrue("NAME" in s) + self.assertTrue("mon_host" in s) + + + def test_client_config(self): + """ + That I can successfully issue asok "config set" commands + + :return: + """ + + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Test only applies to FUSE clients") + + test_key = "client_cache_size" + test_val = "123" + self.mount_a.admin_socket(['config', 'set', test_key, test_val]) + out = self.mount_a.admin_socket(['config', 'get', test_key]) + self.assertEqual(out[test_key], test_val) + + + def test_mds_config_asok(self): + test_key = "mds_max_purge_ops" + test_val = "123" + self.fs.mds_asok(['config', 'set', test_key, test_val]) + out = self.fs.mds_asok(['config', 'get', test_key]) + self.assertEqual(out[test_key], test_val) + + def test_mds_dump_cache_asok(self): + cache_file = "cache_file" + timeout = "1" + self.fs.rank_asok(['dump', 'cache', cache_file, timeout]) + + def test_mds_config_tell(self): + test_key = "mds_max_purge_ops" + test_val = "123" + + self.fs.rank_tell(['injectargs', "--{0}={1}".format(test_key, test_val)]) + + # Read it back with asok because there is no `tell` equivalent + out = self.fs.rank_tell(['config', 'get', test_key]) + self.assertEqual(out[test_key], test_val) + + +class TestMirroringCommands(CephFSTestCase): + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + + def _enable_mirroring(self, fs_name): + self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", fs_name) + + def _disable_mirroring(self, fs_name): + self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", fs_name) + + def _add_peer(self, fs_name, peer_spec, remote_fs_name): + peer_uuid = str(uuid.uuid4()) + self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_add", fs_name, peer_uuid, peer_spec, remote_fs_name) + + def _remove_peer(self, fs_name, peer_uuid): + self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_remove", fs_name, peer_uuid) + + def _verify_mirroring(self, fs_name, flag_str): + status = self.fs.status() + fs_map = status.get_fsmap_byname(fs_name) + if flag_str == 'enabled': + self.assertTrue('mirror_info' in fs_map) + elif flag_str == 'disabled': + self.assertTrue('mirror_info' not in fs_map) + else: + raise RuntimeError(f'invalid flag_str {flag_str}') + + def _get_peer_uuid(self, fs_name, peer_spec): + status = self.fs.status() + fs_map = status.get_fsmap_byname(fs_name) + mirror_info = fs_map.get('mirror_info', None) + self.assertTrue(mirror_info is not None) + for peer_uuid, remote in mirror_info['peers'].items(): + client_name = remote['remote']['client_name'] + cluster_name = remote['remote']['cluster_name'] + spec = f'{client_name}@{cluster_name}' + if spec == peer_spec: + return peer_uuid + return None + + def test_mirroring_command(self): + """basic mirroring command test -- enable, disable mirroring on a + filesystem""" + self._enable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "enabled") + self._disable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "disabled") + + def test_mirroring_peer_commands(self): + """test adding and removing peers to a mirror enabled filesystem""" + self._enable_mirroring(self.fs.name) + self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b") + self._add_peer(self.fs.name, "client.site-c@site-c", "fs_c") + self._verify_mirroring(self.fs.name, "enabled") + uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + uuid_peer_c = self._get_peer_uuid(self.fs.name, "client.site-c@site-c") + self.assertTrue(uuid_peer_b is not None) + self.assertTrue(uuid_peer_c is not None) + self._remove_peer(self.fs.name, uuid_peer_b) + self._remove_peer(self.fs.name, uuid_peer_c) + self._disable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "disabled") + + def test_mirroring_command_idempotency(self): + """test to check idempotency of mirroring family of commands """ + self._enable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "enabled") + self._enable_mirroring(self.fs.name) + # add peer + self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b") + uuid_peer_b1 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b1 is not None) + # adding the peer again should be idempotent + self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b") + uuid_peer_b2 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b2 is not None) + self.assertTrue(uuid_peer_b1 == uuid_peer_b2) + # remove peer + self._remove_peer(self.fs.name, uuid_peer_b1) + uuid_peer_b3 = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b3 is None) + # removing the peer again should be idempotent + self._remove_peer(self.fs.name, uuid_peer_b1) + self._disable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "disabled") + self._disable_mirroring(self.fs.name) + + def test_mirroring_disable_with_peers(self): + """test disabling mirroring for a filesystem with active peers""" + self._enable_mirroring(self.fs.name) + self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b") + self._verify_mirroring(self.fs.name, "enabled") + uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b is not None) + self._disable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "disabled") + # enable mirroring to check old peers + self._enable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "enabled") + # peer should be gone + uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b is None) + self._disable_mirroring(self.fs.name) + self._verify_mirroring(self.fs.name, "disabled") + + def test_mirroring_with_filesystem_reset(self): + """test to verify mirroring state post filesystem reset""" + self._enable_mirroring(self.fs.name) + self._add_peer(self.fs.name, "client.site-b@site-b", "fs_b") + self._verify_mirroring(self.fs.name, "enabled") + uuid_peer_b = self._get_peer_uuid(self.fs.name, "client.site-b@site-b") + self.assertTrue(uuid_peer_b is not None) + # reset filesystem + self.fs.fail() + self.fs.reset() + self.fs.wait_for_daemons() + self._verify_mirroring(self.fs.name, "disabled") + + +class TestFsAuthorize(CephFSTestCase): + client_id = 'testuser' + client_name = 'client.' + client_id + + def test_single_path_r(self): + PERM = 'r' + FS_AUTH_CAPS = (('/', PERM),) + self.captester = CapTester() + self.setup_test_env(FS_AUTH_CAPS) + + self.captester.run_mon_cap_tests(self.fs, self.client_id) + self.captester.run_mds_cap_tests(PERM) + + def test_single_path_rw(self): + PERM = 'rw' + FS_AUTH_CAPS = (('/', PERM),) + self.captester = CapTester() + self.setup_test_env(FS_AUTH_CAPS) + + self.captester.run_mon_cap_tests(self.fs, self.client_id) + self.captester.run_mds_cap_tests(PERM) + + def test_single_path_rootsquash(self): + PERM = 'rw' + FS_AUTH_CAPS = (('/', PERM, 'root_squash'),) + self.captester = CapTester() + self.setup_test_env(FS_AUTH_CAPS) + + # testing MDS caps... + # Since root_squash is set in client caps, client can read but not + # write even thought access level is set to "rw". + self.captester.conduct_pos_test_for_read_caps() + self.captester.conduct_neg_test_for_write_caps(sudo_write=True) + + def test_single_path_authorize_on_nonalphanumeric_fsname(self): + """ + That fs authorize command works on filesystems with names having [_.-] + characters + """ + self.mount_a.umount_wait(require_clean=True) + self.mds_cluster.delete_all_filesystems() + fs_name = "cephfs-_." + self.fs = self.mds_cluster.newfs(name=fs_name) + self.fs.wait_for_daemons() + self.run_cluster_cmd(f'auth caps client.{self.mount_a.client_id} ' + f'mon "allow r" ' + f'osd "allow rw pool={self.fs.get_data_pool_name()}" ' + f'mds allow') + self.mount_a.remount(cephfs_name=self.fs.name) + PERM = 'rw' + FS_AUTH_CAPS = (('/', PERM),) + self.captester = CapTester() + self.setup_test_env(FS_AUTH_CAPS) + self.captester.run_mds_cap_tests(PERM) + + def test_multiple_path_r(self): + PERM = 'r' + FS_AUTH_CAPS = (('/dir1/dir12', PERM), ('/dir2/dir22', PERM)) + for c in FS_AUTH_CAPS: + self.mount_a.run_shell(f'mkdir -p .{c[0]}') + self.captesters = (CapTester(), CapTester()) + self.setup_test_env(FS_AUTH_CAPS) + + self.run_cap_test_one_by_one(FS_AUTH_CAPS) + + def test_multiple_path_rw(self): + PERM = 'rw' + FS_AUTH_CAPS = (('/dir1/dir12', PERM), ('/dir2/dir22', PERM)) + for c in FS_AUTH_CAPS: + self.mount_a.run_shell(f'mkdir -p .{c[0]}') + self.captesters = (CapTester(), CapTester()) + self.setup_test_env(FS_AUTH_CAPS) + + self.run_cap_test_one_by_one(FS_AUTH_CAPS) + + def run_cap_test_one_by_one(self, fs_auth_caps): + keyring = self.run_cluster_cmd(f'auth get {self.client_name}') + for i, c in enumerate(fs_auth_caps): + self.assertIn(i, (0, 1)) + PATH = c[0] + PERM = c[1] + self._remount(keyring, PATH) + # actual tests... + self.captesters[i].run_mon_cap_tests(self.fs, self.client_id) + self.captesters[i].run_mds_cap_tests(PERM, PATH) + + def tearDown(self): + self.mount_a.umount_wait() + self.run_cluster_cmd(f'auth rm {self.client_name}') + + super(type(self), self).tearDown() + + def _remount(self, keyring, path='/'): + keyring_path = self.mount_a.client_remote.mktemp(data=keyring) + self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_mntpt=path) + + def setup_for_single_path(self, fs_auth_caps): + self.captester.write_test_files((self.mount_a,), '/') + keyring = self.fs.authorize(self.client_id, fs_auth_caps) + self._remount(keyring) + + def setup_for_multiple_paths(self, fs_auth_caps): + for i, c in enumerate(fs_auth_caps): + PATH = c[0] + self.captesters[i].write_test_files((self.mount_a,), PATH) + + self.fs.authorize(self.client_id, fs_auth_caps) + + def setup_test_env(self, fs_auth_caps): + if len(fs_auth_caps) == 1: + self.setup_for_single_path(fs_auth_caps[0]) + else: + self.setup_for_multiple_paths(fs_auth_caps) + + +class TestAdminCommandIdempotency(CephFSTestCase): + """ + Tests for administration command idempotency. + """ + + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 1 + + def test_rm_idempotency(self): + """ + That a removing a fs twice is idempotent. + """ + + data_pools = self.fs.get_data_pool_names(refresh=True) + self.fs.fail() + self.fs.rm() + try: + self.fs.get_mds_map() + except FSMissing: + pass + else: + self.fail("get_mds_map should raise") + p = self.fs.rm() + self.assertIn("does not exist", p.stderr.getvalue()) + self.fs.remove_pools(data_pools) + + +class TestAdminCommandDumpTree(CephFSTestCase): + """ + Tests for administration command subtrees. + """ + + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 1 + + def test_dump_subtrees(self): + """ + Dump all the subtrees to make sure the MDS daemon won't crash. + """ + + subtrees = self.fs.mds_asok(['get', 'subtrees']) + log.info(f"dumping {len(subtrees)} subtrees:") + for subtree in subtrees: + log.info(f" subtree: '{subtree['dir']['path']}'") + self.fs.mds_asok(['dump', 'tree', subtree['dir']['path']]) + + log.info("dumping 2 special subtrees:") + log.info(" subtree: '/'") + self.fs.mds_asok(['dump', 'tree', '/']) + log.info(" subtree: '~mdsdir'") + self.fs.mds_asok(['dump', 'tree', '~mdsdir']) + +class TestAdminCommandDumpLoads(CephFSTestCase): + """ + Tests for administration command dump loads. + """ + + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 1 + + def test_dump_loads(self): + """ + make sure depth limit param is considered when dump loads for a MDS daemon. + """ + + log.info("dumping loads") + loads = self.fs.mds_asok(['dump', 'loads', '1']) + self.assertIsNotNone(loads) + self.assertIn("dirfrags", loads) + for d in loads["dirfrags"]: + self.assertLessEqual(d["path"].count("/"), 1) + +class TestFsBalRankMask(CephFSTestCase): + """ + Tests ceph fs set <fs_name> bal_rank_mask + """ + + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 2 + + def test_bal_rank_mask(self): + """ + check whether a specified bal_rank_mask value is valid or not. + """ + bal_rank_mask = '0x0' + log.info(f"set bal_rank_mask {bal_rank_mask}") + self.fs.set_bal_rank_mask(bal_rank_mask) + self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask')) + + bal_rank_mask = '0' + log.info(f"set bal_rank_mask {bal_rank_mask}") + self.fs.set_bal_rank_mask(bal_rank_mask) + self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask')) + + bal_rank_mask = '-1' + log.info(f"set bal_rank_mask {bal_rank_mask}") + self.fs.set_bal_rank_mask(bal_rank_mask) + self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask')) + + bal_rank_mask = 'all' + log.info(f"set bal_rank_mask {bal_rank_mask}") + self.fs.set_bal_rank_mask(bal_rank_mask) + self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask')) + + bal_rank_mask = '0x1' + log.info(f"set bal_rank_mask {bal_rank_mask}") + self.fs.set_bal_rank_mask(bal_rank_mask) + self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask')) + + bal_rank_mask = '1' + log.info(f"set bal_rank_mask {bal_rank_mask}") + self.fs.set_bal_rank_mask(bal_rank_mask) + self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask')) + + bal_rank_mask = 'f0' + log.info(f"set bal_rank_mask {bal_rank_mask}") + self.fs.set_bal_rank_mask(bal_rank_mask) + self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask')) + + bal_rank_mask = 'ab' + log.info(f"set bal_rank_mask {bal_rank_mask}") + self.fs.set_bal_rank_mask(bal_rank_mask) + self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask')) + + bal_rank_mask = '0xfff0' + log.info(f"set bal_rank_mask {bal_rank_mask}") + self.fs.set_bal_rank_mask(bal_rank_mask) + self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask')) + + MAX_MDS = 256 + bal_rank_mask = '0x' + 'f' * int(MAX_MDS / 4) + log.info(f"set bal_rank_mask {bal_rank_mask}") + self.fs.set_bal_rank_mask(bal_rank_mask) + self.assertEqual(bal_rank_mask, self.fs.get_var('bal_rank_mask')) + + bal_rank_mask = '' + log.info("set bal_rank_mask to empty string") + try: + self.fs.set_bal_rank_mask(bal_rank_mask) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + + bal_rank_mask = '0x1' + 'f' * int(MAX_MDS / 4) + log.info(f"set bal_rank_mask {bal_rank_mask}") + try: + self.fs.set_bal_rank_mask(bal_rank_mask) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) diff --git a/qa/tasks/cephfs/test_auto_repair.py b/qa/tasks/cephfs/test_auto_repair.py new file mode 100644 index 000000000..e6f0a8f0b --- /dev/null +++ b/qa/tasks/cephfs/test_auto_repair.py @@ -0,0 +1,88 @@ + +""" +Exercise the MDS's auto repair functions +""" + +import logging +import time + +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase + + +log = logging.getLogger(__name__) + + +# Arbitrary timeouts for operations involving restarting +# an MDS or waiting for it to come up +MDS_RESTART_GRACE = 60 + + +class TestMDSAutoRepair(CephFSTestCase): + def test_backtrace_repair(self): + """ + MDS should verify/fix backtrace on fetch dirfrag + """ + + self.mount_a.run_shell(["mkdir", "testdir1"]) + self.mount_a.run_shell(["touch", "testdir1/testfile"]) + dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino("testdir1")) + + # drop inodes caps + self.mount_a.umount_wait() + + # flush journal entries to dirfrag objects, and expire journal + self.fs.mds_asok(['flush', 'journal']) + + # Restart the MDS to drop the metadata cache (because we expired the journal, + # nothing gets replayed into cache on restart) + self.fs.rank_fail() + self.fs.wait_for_daemons() + + # remove testdir1's backtrace + self.fs.radosm(["rmxattr", dir_objname, "parent"]) + + # readdir (fetch dirfrag) should fix testdir1's backtrace + self.mount_a.mount_wait() + self.mount_a.run_shell(["ls", "testdir1"]) + + # flush journal entries to dirfrag objects + self.fs.mds_asok(['flush', 'journal']) + + # check if backtrace exists + self.fs.radosm(["getxattr", dir_objname, "parent"]) + + def test_mds_readonly(self): + """ + test if MDS behave correct when it's readonly + """ + # operation should successd when MDS is not readonly + self.mount_a.run_shell(["touch", "test_file1"]) + writer = self.mount_a.write_background(loop=True) + + time.sleep(10) + self.assertFalse(writer.finished) + + # force MDS to read-only mode + self.fs.mds_asok(['force_readonly']) + time.sleep(10) + + # touching test file should fail + try: + self.mount_a.run_shell(["touch", "test_file1"]) + except CommandFailedError: + pass + else: + self.assertTrue(False) + + # background writer also should fail + self.assertTrue(writer.finished) + + # The MDS should report its readonly health state to the mon + self.wait_for_health("MDS_READ_ONLY", timeout=30) + + # restart mds to make it writable + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + + self.wait_for_health_clear(timeout=30) diff --git a/qa/tasks/cephfs/test_backtrace.py b/qa/tasks/cephfs/test_backtrace.py new file mode 100644 index 000000000..6b094569b --- /dev/null +++ b/qa/tasks/cephfs/test_backtrace.py @@ -0,0 +1,102 @@ + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.cephfs.filesystem import ObjectNotFound + +class TestBacktrace(CephFSTestCase): + def test_backtrace(self): + """ + That the 'parent' 'layout' and 'symlink' xattrs on the head objects of files + are updated correctly. + """ + + old_data_pool_name = self.fs.get_data_pool_name() + old_pool_id = self.fs.get_data_pool_id() + + # Not enabling symlink recovery option should not store symlink xattr + self.config_set('mds', 'mds_symlink_recovery', 'false') + self.mount_a.run_shell(["mkdir", "sym_dir0"]) + self.mount_a.run_shell(["touch", "sym_dir0/file1"]) + self.mount_a.run_shell(["ln", "-s", "sym_dir0/file1", "sym_dir0/symlink_file1"]) + file_ino = self.mount_a.path_to_ino("sym_dir0/symlink_file1", follow_symlinks=False) + + self.fs.mds_asok(["flush", "journal"]) + with self.assertRaises(ObjectNotFound): + self.fs.read_symlink(file_ino) + + # Enabling symlink recovery option should store symlink xattr for symlinks + self.config_set('mds', 'mds_symlink_recovery', 'true') + self.mount_a.run_shell(["mkdir", "sym_dir"]) + self.mount_a.run_shell(["touch", "sym_dir/file1"]) + self.mount_a.run_shell(["ln", "-s", "./file1", "sym_dir/symlink_file1"]) + file_ino = self.mount_a.path_to_ino("sym_dir/symlink_file1", follow_symlinks=False) + + self.fs.mds_asok(["flush", "journal"]) + symlink = self.fs.read_symlink(file_ino) + self.assertEqual(symlink, { + "s" : "./file1", + }) + + # Create a file for subsequent checks + self.mount_a.run_shell(["mkdir", "parent_a"]) + self.mount_a.run_shell(["touch", "parent_a/alpha"]) + file_ino = self.mount_a.path_to_ino("parent_a/alpha") + + # That backtrace and layout are written after initial flush + self.fs.mds_asok(["flush", "journal"]) + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['alpha', 'parent_a'], [a['dname'] for a in backtrace['ancestors']]) + layout = self.fs.read_layout(file_ino) + self.assertDictEqual(layout, { + "stripe_unit": 4194304, + "stripe_count": 1, + "object_size": 4194304, + "pool_id": old_pool_id, + "pool_ns": "", + }) + self.assertEqual(backtrace['pool'], old_pool_id) + + # That backtrace is written after parentage changes + self.mount_a.run_shell(["mkdir", "parent_b"]) + self.mount_a.run_shell(["mv", "parent_a/alpha", "parent_b/alpha"]) + + self.fs.mds_asok(["flush", "journal"]) + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace['ancestors']]) + + # Create a new data pool + new_pool_name = "data_new" + new_pool_id = self.fs.add_data_pool(new_pool_name) + + # That an object which has switched pools gets its backtrace updated + self.mount_a.setfattr("./parent_b/alpha", + "ceph.file.layout.pool", new_pool_name) + self.fs.mds_asok(["flush", "journal"]) + backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name) + self.assertEqual(backtrace_old_pool['pool'], new_pool_id) + backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name) + self.assertEqual(backtrace_new_pool['pool'], new_pool_id) + new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name) + self.assertEqual(new_pool_layout['pool_id'], new_pool_id) + self.assertEqual(new_pool_layout['pool_ns'], '') + + # That subsequent linkage changes are only written to new pool backtrace + self.mount_a.run_shell(["mkdir", "parent_c"]) + self.mount_a.run_shell(["mv", "parent_b/alpha", "parent_c/alpha"]) + self.fs.mds_asok(["flush", "journal"]) + backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name) + self.assertEqual(['alpha', 'parent_b'], [a['dname'] for a in backtrace_old_pool['ancestors']]) + backtrace_new_pool = self.fs.read_backtrace(file_ino, pool=new_pool_name) + self.assertEqual(['alpha', 'parent_c'], [a['dname'] for a in backtrace_new_pool['ancestors']]) + + # That layout is written to new pool after change to other field in layout + self.mount_a.setfattr("./parent_c/alpha", + "ceph.file.layout.object_size", "8388608") + + self.fs.mds_asok(["flush", "journal"]) + new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name) + self.assertEqual(new_pool_layout['object_size'], 8388608) + + # ...but not to the old pool: the old pool's backtrace points to the new pool, and that's enough, + # we don't update the layout in all the old pools whenever it changes + old_pool_layout = self.fs.read_layout(file_ino, pool=old_data_pool_name) + self.assertEqual(old_pool_layout['object_size'], 4194304) diff --git a/qa/tasks/cephfs/test_cap_flush.py b/qa/tasks/cephfs/test_cap_flush.py new file mode 100644 index 000000000..70fdc3893 --- /dev/null +++ b/qa/tasks/cephfs/test_cap_flush.py @@ -0,0 +1,58 @@ + +import os +import time +from textwrap import dedent +from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology + +class TestCapFlush(CephFSTestCase): + @for_teuthology + def test_replay_create(self): + """ + MDS starts to handle client caps when it enters clientreplay stage. + When handling a client cap in clientreplay stage, it's possible that + corresponding inode does not exist because the client request which + creates inode hasn't been replayed. + """ + + dir_path = os.path.join(self.mount_a.mountpoint, "testdir") + py_script = dedent(""" + import os + os.mkdir("{0}") + fd = os.open("{0}", os.O_RDONLY) + os.fchmod(fd, 0o777) + os.fsync(fd) + """).format(dir_path) + self.mount_a.run_python(py_script) + + self.fs.mds_asok(["flush", "journal"]) + + # client will only get unsafe replay + self.fs.mds_asok(["config", "set", "mds_log_pause", "1"]) + + file_name = "testfile" + file_path = dir_path + "/" + file_name + + # Create a file and modify its mode. ceph-fuse will mark Ax cap dirty + py_script = dedent(""" + import os + os.chdir("{0}") + os.setgid(65534) + os.setuid(65534) + fd = os.open("{1}", os.O_CREAT | os.O_RDWR, 0o644) + os.fchmod(fd, 0o640) + """).format(dir_path, file_name) + self.mount_a.run_python(py_script, sudo=True) + + # Modify file mode by different user. ceph-fuse will send a setattr request + self.mount_a.run_shell(["sudo", "chmod", "600", file_path], wait=False, omit_sudo=False) + + time.sleep(10) + + # Restart mds. Client will re-send the unsafe request and cap flush + self.fs.rank_fail() + self.fs.wait_for_daemons() + + mode = self.mount_a.run_shell(['stat', '-c' '%a', file_path]).stdout.getvalue().strip() + # If the cap flush get dropped, mode should be 0644. + # (Ax cap stays in dirty state, which prevents setattr reply from updating file mode) + self.assertEqual(mode, "600") diff --git a/qa/tasks/cephfs/test_cephfs_shell.py b/qa/tasks/cephfs/test_cephfs_shell.py new file mode 100644 index 000000000..9f7434762 --- /dev/null +++ b/qa/tasks/cephfs/test_cephfs_shell.py @@ -0,0 +1,1167 @@ +""" +NOTE: For running this tests locally (using vstart_runner.py), export the +path to src/tools/cephfs/shell/cephfs-shell module to $PATH. Running +"export PATH=$PATH:$(cd ../src/tools/cephfs/shell && pwd)" from the build dir +will update the environment without hassles of typing the path correctly. +""" +from io import StringIO +from os import path +import crypt +import logging +from tempfile import mkstemp as tempfile_mkstemp +import math +from time import sleep +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError +from textwrap import dedent + +log = logging.getLogger(__name__) + + +def humansize(nbytes): + suffixes = ['B', 'K', 'M', 'G', 'T', 'P'] + i = 0 + while nbytes >= 1024 and i < len(suffixes) - 1: + nbytes /= 1024. + i += 1 + nbytes = math.ceil(nbytes) + f = ('%d' % nbytes).rstrip('.') + return '%s%s' % (f, suffixes[i]) + + +def ensure_str(s): + if isinstance(s, str): + return s + if isinstance(s, bytes): + return s.decode() + raise TypeError("not expecting type '%s'" % type(s)) + + +class TestCephFSShell(CephFSTestCase): + CLIENTS_REQUIRED = 1 + + def setUp(self): + super(TestCephFSShell, self).setUp() + + conf_contents = "[cephfs-shell]\ncolors = False\ndebug = True\n" + confpath = self.mount_a.client_remote.sh('mktemp').strip() + self.mount_a.client_remote.write_file(confpath, conf_contents) + self.default_shell_conf_path = confpath + + def run_cephfs_shell_cmd(self, cmd, mount_x=None, shell_conf_path=None, + opts=None, stdout=None, stderr=None, stdin=None, + check_status=True): + stdout = stdout or StringIO() + stderr = stderr or StringIO() + if mount_x is None: + mount_x = self.mount_a + if isinstance(cmd, list): + cmd = " ".join(cmd) + if not shell_conf_path: + shell_conf_path = self.default_shell_conf_path + + args = ["cephfs-shell", "-c", shell_conf_path] + if opts: + args += opts + args.extend(("--", cmd)) + + log.info("Running command: {}".format(" ".join(args))) + return mount_x.client_remote.run(args=args, stdout=stdout, + stderr=stderr, stdin=stdin, + check_status=check_status) + + def negtest_cephfs_shell_cmd(self, **kwargs): + """ + This method verifies that cephfs shell command fails with expected + return value and/or error message. + + kwargs is expected to hold the arguments same as + run_cephfs_shell_cmd() with the following exceptions - + * It should not contain check_status (since commands are expected + to fail, check_status is hardcoded to False). + * It is optional to set expected error message and return value to + dict members 'errmsg' and 'retval' respectively. + + This method servers as shorthand for codeblocks like - + + try: + proc = self.run_cephfs_shell_cmd(args=['some', 'cmd'], + check_status=False, + stdout=stdout) + except CommandFailedError as e: + self.assertNotIn('some error message', + proc.stderr.getvalue.lower()) + + + try: + proc = self.run_cephfs_shell_cmd(args=['some', 'cmd'], + check_status=False, + stdout=stdout) + except CommandFailedError as e: + self.assertNotEqual(1, proc.returncode) + """ + retval = kwargs.pop('retval', None) + errmsg = kwargs.pop('errmsg', None) + kwargs['check_status'] = False + + proc = self.run_cephfs_shell_cmd(**kwargs) + if retval: + self.assertEqual(proc.returncode, retval) + else: + self.assertNotEqual(proc.returncode, 0) + if errmsg: + self.assertIn(errmsg, proc.stderr.getvalue().lower()) + + return proc + + def get_cephfs_shell_cmd_output(self, cmd, mount_x=None, + shell_conf_path=None, opts=None, + stdout=None, stdin=None, + check_status=True): + return ensure_str(self.run_cephfs_shell_cmd( + cmd=cmd, mount_x=mount_x, shell_conf_path=shell_conf_path, + opts=opts, stdout=stdout, stdin=stdin, + check_status=check_status).stdout.getvalue().strip()) + + def get_cephfs_shell_cmd_error(self, cmd, mount_x=None, + shell_conf_path=None, opts=None, + stderr=None, stdin=None, check_status=True): + return ensure_str(self.run_cephfs_shell_cmd( + cmd=cmd, mount_x=mount_x, shell_conf_path=shell_conf_path, + opts=opts, stderr=stderr, stdin=stdin, + check_status=check_status).stderr.getvalue().strip()) + + def run_cephfs_shell_script(self, script, mount_x=None, + shell_conf_path=None, opts=None, stdout=None, + stderr=None, stdin=None, check_status=True): + stdout = stdout or StringIO() + stderr = stderr or StringIO() + if mount_x is None: + mount_x = self.mount_a + + scriptpath = tempfile_mkstemp(prefix='test-cephfs', text=True)[1] + with open(scriptpath, 'w') as scriptfile: + scriptfile.write(script) + # copy script to the machine running cephfs-shell. + mount_x.client_remote.put_file(scriptpath, scriptpath) + mount_x.run_shell_payload(f"chmod 755 {scriptpath}") + + args = ["cephfs-shell", '-b', scriptpath] + if shell_conf_path: + args[1:1] = ["-c", shell_conf_path] + log.info('Running script \"' + scriptpath + '\"') + return mount_x.client_remote.run(args=args, stdout=stdout, + stderr=stderr, stdin=stdin, + check_status=True) + + def get_cephfs_shell_script_output(self, script, mount_x=None, + shell_conf_path=None, opts=None, + stdout=None, stdin=None, + check_status=True): + return ensure_str(self.run_cephfs_shell_script( + script=script, mount_x=mount_x, shell_conf_path=shell_conf_path, + opts=opts, stdout=stdout, stdin=stdin, + check_status=check_status).stdout.getvalue().strip()) + + +class TestGeneric(TestCephFSShell): + + def test_mistyped_cmd(self): + with self.assertRaises(CommandFailedError) as cm: + self.run_cephfs_shell_cmd('lsx') + self.assertEqual(cm.exception.exitstatus, 127) + + +class TestMkdir(TestCephFSShell): + def test_mkdir(self): + """ + Test that mkdir creates directory + """ + o = self.get_cephfs_shell_cmd_output("mkdir d1") + log.info("cephfs-shell output:\n{}".format(o)) + + o = self.mount_a.stat('d1') + log.info("mount_a output:\n{}".format(o)) + + def test_mkdir_with_070000_octal_mode(self): + """ + Test that mkdir fails with octal mode greater than 07777 + """ + self.negtest_cephfs_shell_cmd(cmd="mkdir -m 070000 d2") + try: + self.mount_a.stat('d2') + except CommandFailedError: + pass + + def test_mkdir_with_negative_octal_mode(self): + """ + Test that mkdir fails with negative octal mode + """ + self.negtest_cephfs_shell_cmd(cmd="mkdir -m -0755 d3") + try: + self.mount_a.stat('d3') + except CommandFailedError: + pass + + def test_mkdir_with_non_octal_mode(self): + """ + Test that mkdir passes with non-octal mode + """ + o = self.get_cephfs_shell_cmd_output("mkdir -m u=rwx d4") + log.info("cephfs-shell output:\n{}".format(o)) + + # mkdir d4 should pass + o = self.mount_a.stat('d4') + assert ((o['st_mode'] & 0o700) == 0o700) + + def test_mkdir_with_bad_non_octal_mode(self): + """ + Test that mkdir failes with bad non-octal mode + """ + self.negtest_cephfs_shell_cmd(cmd="mkdir -m ugx=0755 d5") + try: + self.mount_a.stat('d5') + except CommandFailedError: + pass + + def test_mkdir_path_without_path_option(self): + """ + Test that mkdir fails without path option for creating path + """ + self.negtest_cephfs_shell_cmd(cmd="mkdir d5/d6/d7") + try: + self.mount_a.stat('d5/d6/d7') + except CommandFailedError: + pass + + def test_mkdir_path_with_path_option(self): + """ + Test that mkdir passes with path option for creating path + """ + o = self.get_cephfs_shell_cmd_output("mkdir -p d5/d6/d7") + log.info("cephfs-shell output:\n{}".format(o)) + + # mkdir d5/d6/d7 should pass + o = self.mount_a.stat('d5/d6/d7') + log.info("mount_a output:\n{}".format(o)) + + +class TestRmdir(TestCephFSShell): + dir_name = "test_dir" + + def dir_does_not_exists(self): + """ + Tests that directory does not exists + """ + try: + self.mount_a.stat(self.dir_name) + except CommandFailedError as e: + if e.exitstatus == 2: + return 0 + raise + + def test_rmdir(self): + """ + Test that rmdir deletes directory + """ + self.run_cephfs_shell_cmd("mkdir " + self.dir_name) + self.run_cephfs_shell_cmd("rmdir " + self.dir_name) + self.dir_does_not_exists() + + def test_rmdir_non_existing_dir(self): + """ + Test that rmdir does not delete a non existing directory + """ + self.negtest_cephfs_shell_cmd(cmd="rmdir test_dir") + self.dir_does_not_exists() + + def test_rmdir_dir_with_file(self): + """ + Test that rmdir does not delete directory containing file + """ + self.run_cephfs_shell_cmd("mkdir " + self.dir_name) + + self.run_cephfs_shell_cmd("put - test_dir/dumpfile", stdin="Valid File") + # see comment below + # with self.assertRaises(CommandFailedError) as cm: + with self.assertRaises(CommandFailedError): + self.run_cephfs_shell_cmd("rmdir " + self.dir_name) + # TODO: we need to check for exit code and error message as well. + # skipping it for not since error codes used by cephfs-shell are not + # standard and they may change soon. + # self.assertEqual(cm.exception.exitcode, 39) + self.mount_a.stat(self.dir_name) + + def test_rmdir_existing_file(self): + """ + Test that rmdir does not delete a file + """ + self.run_cephfs_shell_cmd("put - dumpfile", stdin="Valid File") + self.negtest_cephfs_shell_cmd(cmd="rmdir dumpfile") + self.mount_a.stat("dumpfile") + + def test_rmdir_p(self): + """ + Test that rmdir -p deletes all empty directories in the root + directory passed + """ + self.run_cephfs_shell_cmd("mkdir -p test_dir/t1/t2/t3") + self.run_cephfs_shell_cmd("rmdir -p " + self.dir_name) + self.dir_does_not_exists() + + def test_rmdir_p_valid_path(self): + """ + Test that rmdir -p deletes all empty directories in the path passed + """ + self.run_cephfs_shell_cmd("mkdir -p test_dir/t1/t2/t3") + self.run_cephfs_shell_cmd("rmdir -p test_dir/t1/t2/t3") + self.dir_does_not_exists() + + def test_rmdir_p_non_existing_dir(self): + """ + Test that rmdir -p does not delete an invalid directory + """ + self.negtest_cephfs_shell_cmd(cmd="rmdir -p test_dir") + self.dir_does_not_exists() + + def test_rmdir_p_dir_with_file(self): + """ + Test that rmdir -p does not delete the directory containing a file + """ + self.run_cephfs_shell_cmd("mkdir " + self.dir_name) + self.run_cephfs_shell_cmd("put - test_dir/dumpfile", + stdin="Valid File") + self.run_cephfs_shell_cmd("rmdir -p " + self.dir_name) + self.mount_a.stat(self.dir_name) + + +class TestLn(TestCephFSShell): + dir1 = 'test_dir1' + dir2 = 'test_dir2' + dump_id = 11 + s = 'somedata' + dump_file = 'dump11' + + def test_soft_link_without_link_name(self): + self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}') + self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}', + data=self.s) + self.run_cephfs_shell_script(script=dedent(f''' + cd /{self.dir1}/{self.dir2} + ln -s ../{self.dump_file}''')) + o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}' + f'/{self.dump_file}') + self.assertEqual(self.s, o) + + def test_soft_link_with_link_name(self): + self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}') + self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}', + data=self.s) + self.run_cephfs_shell_cmd(f'ln -s /{self.dir1}/{self.dump_file} ' + f'/{self.dir1}/{self.dir2}/') + o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}' + f'/{self.dump_file}') + self.assertEqual(self.s, o) + + def test_hard_link_without_link_name(self): + self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}') + self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}', + data=self.s) + self.run_cephfs_shell_script(script=dedent(f''' + cd /{self.dir1}/{self.dir2} + ln ../{self.dump_file}''')) + o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}' + f'/{self.dump_file}') + self.assertEqual(self.s, o) + + def test_hard_link_with_link_name(self): + self.run_cephfs_shell_cmd(f'mkdir -p {self.dir1}/{self.dir2}') + self.mount_a.write_file(path=f'{self.dir1}/{self.dump_file}', + data=self.s) + self.run_cephfs_shell_cmd(f'ln /{self.dir1}/{self.dump_file} ' + f'/{self.dir1}/{self.dir2}/') + o = self.get_cephfs_shell_cmd_output(f'cat /{self.dir1}/{self.dir2}' + f'/{self.dump_file}') + self.assertEqual(self.s, o) + + def test_hard_link_to_dir_not_allowed(self): + self.run_cephfs_shell_cmd(f'mkdir {self.dir1}') + self.run_cephfs_shell_cmd(f'mkdir {self.dir2}') + r = self.run_cephfs_shell_cmd(f'ln /{self.dir1} /{self.dir2}/', + check_status=False) + self.assertEqual(r.returncode, 3) + + def test_target_exists_in_dir(self): + self.mount_a.write_file(path=f'{self.dump_file}', data=self.s) + r = self.run_cephfs_shell_cmd(f'ln {self.dump_file} {self.dump_file}', + check_status=False) + self.assertEqual(r.returncode, 1) + + def test_incorrect_dir(self): + self.mount_a.write_file(path=f'{self.dump_file}', data=self.s) + r = self.run_cephfs_shell_cmd(f'ln {self.dump_file} /dir1/', + check_status=False) + self.assertEqual(r.returncode, 5) + + +class TestGetAndPut(TestCephFSShell): + def test_get_with_target_name(self): + """ + Test that get passes with target name + """ + s = 'C' * 1024 + s_hash = crypt.crypt(s, '.A') + o = self.get_cephfs_shell_cmd_output("put - dump4", stdin=s) + log.info("cephfs-shell output:\n{}".format(o)) + + # put - dump4 should pass + o = self.mount_a.stat('dump4') + log.info("mount_a output:\n{}".format(o)) + + o = self.get_cephfs_shell_cmd_output("get dump4 ./dump4") + log.info("cephfs-shell output:\n{}".format(o)) + + # NOTE: cwd=None because we want to run it at CWD, not at cephfs mntpt. + o = self.mount_a.run_shell('cat dump4', cwd=None).stdout.getvalue(). \ + strip() + o_hash = crypt.crypt(o, '.A') + + # s_hash must be equal to o_hash + log.info("s_hash:{}".format(s_hash)) + log.info("o_hash:{}".format(o_hash)) + assert (s_hash == o_hash) + + # cleanup + self.mount_a.run_shell("rm dump4", cwd=None, check_status=False) + + def test_get_without_target_name(self): + """ + Test that get should fail when there is no target name + """ + s = 'Somedata' + # put - dump5 should pass + self.get_cephfs_shell_cmd_output("put - dump5", stdin=s) + + self.mount_a.stat('dump5') + + # get dump5 should fail as there is no local_path mentioned + with self.assertRaises(CommandFailedError): + self.get_cephfs_shell_cmd_output("get dump5") + + # stat dump would return non-zero exit code as get dump failed + # cwd=None because we want to run it at CWD, not at cephfs mntpt. + r = self.mount_a.run_shell('stat dump5', cwd=None, + check_status=False).returncode + self.assertEqual(r, 1) + + def test_get_doesnt_create_dir(self): + # if get cmd is creating subdirs on its own then dump7 will be + # stored as ./dump7/tmp/dump7 and not ./dump7, therefore + # if doing `cat ./dump7` returns non-zero exit code(i.e. 1) then + # it implies that no such file exists at that location + dir_abspath = path.join(self.mount_a.mountpoint, 'tmp') + self.mount_a.run_shell_payload(f"mkdir {dir_abspath}") + self.mount_a.client_remote.write_file(path.join(dir_abspath, 'dump7'), + 'somedata') + self.get_cephfs_shell_cmd_output("get /tmp/dump7 ./dump7") + # test that dump7 exists + self.mount_a.run_shell("cat ./dump7", cwd=None) + + # cleanup + self.mount_a.run_shell(args='rm dump7', cwd=None, check_status=False) + + def test_get_to_console(self): + """ + Test that get passes with target name + """ + s = 'E' * 1024 + s_hash = crypt.crypt(s, '.A') + o = self.get_cephfs_shell_cmd_output("put - dump6", stdin=s) + log.info("cephfs-shell output:\n{}".format(o)) + + # put - dump6 should pass + o = self.mount_a.stat('dump6') + log.info("mount_a output:\n{}".format(o)) + + # get dump6 - should pass + o = self.get_cephfs_shell_cmd_output("get dump6 -") + o_hash = crypt.crypt(o, '.A') + log.info("cephfs-shell output:\n{}".format(o)) + + # s_hash must be equal to o_hash + log.info("s_hash:{}".format(s_hash)) + log.info("o_hash:{}".format(o_hash)) + assert (s_hash == o_hash) + + + def test_put_without_target_name(self): + """ + put - should fail as the cmd expects both arguments are mandatory. + """ + with self.assertRaises(CommandFailedError): + self.get_cephfs_shell_cmd_output("put -") + + def test_put_validate_local_path(self): + """ + This test is intended to make sure local_path is validated before + trying to put the file from local fs to cephfs and the command + put ./dumpXYZ dump8 would fail as dumpXYX doesn't exist. + """ + with self.assertRaises(CommandFailedError): + o = self.get_cephfs_shell_cmd_output("put ./dumpXYZ dump8") + log.info("cephfs-shell output:\n{}".format(o)) + +class TestSnapshots(TestCephFSShell): + def test_snap(self): + """ + Test that snapshot creation and deletion work + """ + sd = self.fs.get_config('client_snapdir') + sdn = "data_dir/{}/snap1".format(sd) + + # create a data dir and dump some files into it + self.get_cephfs_shell_cmd_output("mkdir data_dir") + s = 'A' * 10240 + o = self.get_cephfs_shell_cmd_output("put - data_dir/data_a", stdin=s) + s = 'B' * 10240 + o = self.get_cephfs_shell_cmd_output("put - data_dir/data_b", stdin=s) + s = 'C' * 10240 + o = self.get_cephfs_shell_cmd_output("put - data_dir/data_c", stdin=s) + s = 'D' * 10240 + o = self.get_cephfs_shell_cmd_output("put - data_dir/data_d", stdin=s) + s = 'E' * 10240 + o = self.get_cephfs_shell_cmd_output("put - data_dir/data_e", stdin=s) + + o = self.get_cephfs_shell_cmd_output("ls -l /data_dir") + log.info("cephfs-shell output:\n{}".format(o)) + + # create the snapshot - must pass + o = self.get_cephfs_shell_cmd_output("snap create snap1 /data_dir") + log.info("cephfs-shell output:\n{}".format(o)) + self.assertEqual("", o) + o = self.mount_a.stat(sdn) + log.info("mount_a output:\n{}".format(o)) + self.assertIn('st_mode', o) + + # create the same snapshot again - must fail with an error message + self.negtest_cephfs_shell_cmd(cmd="snap create snap1 /data_dir", + errmsg="snapshot 'snap1' already exists") + o = self.mount_a.stat(sdn) + log.info("mount_a output:\n{}".format(o)) + self.assertIn('st_mode', o) + + # delete the snapshot - must pass + o = self.get_cephfs_shell_cmd_output("snap delete snap1 /data_dir") + log.info("cephfs-shell output:\n{}".format(o)) + self.assertEqual("", o) + try: + o = self.mount_a.stat(sdn) + except CommandFailedError: + # snap dir should not exist anymore + pass + log.info("mount_a output:\n{}".format(o)) + self.assertNotIn('st_mode', o) + + # delete the same snapshot again - must fail with an error message + self.negtest_cephfs_shell_cmd(cmd="snap delete snap1 /data_dir", + errmsg="'snap1': no such snapshot") + try: + o = self.mount_a.stat(sdn) + except CommandFailedError: + pass + log.info("mount_a output:\n{}".format(o)) + self.assertNotIn('st_mode', o) + + +class TestCD(TestCephFSShell): + CLIENTS_REQUIRED = 1 + + def test_cd_with_no_args(self): + """ + Test that when cd is issued without any arguments, CWD is changed + to root directory. + """ + path = 'dir1/dir2/dir3' + self.mount_a.run_shell_payload(f"mkdir -p {path}") + expected_cwd = '/' + + script = 'cd {}\ncd\ncwd\n'.format(path) + output = self.get_cephfs_shell_script_output(script) + self.assertEqual(output, expected_cwd) + + def test_cd_with_args(self): + """ + Test that when cd is issued with an argument, CWD is changed + to the path passed in the argument. + """ + path = 'dir1/dir2/dir3' + self.mount_a.run_shell_payload(f"mkdir -p {path}") + expected_cwd = '/dir1/dir2/dir3' + + script = 'cd {}\ncwd\n'.format(path) + output = self.get_cephfs_shell_script_output(script) + self.assertEqual(output, expected_cwd) + + +class TestDU(TestCephFSShell): + CLIENTS_REQUIRED = 1 + + def test_du_works_for_regfiles(self): + regfilename = 'some_regfile' + regfile_abspath = path.join(self.mount_a.mountpoint, regfilename) + self.mount_a.client_remote.write_file(regfile_abspath, 'somedata') + + size = humansize(self.mount_a.stat(regfile_abspath)['st_size']) + expected_output = r'{}{}{}'.format(size, " +", regfilename) + + du_output = self.get_cephfs_shell_cmd_output('du ' + regfilename) + self.assertRegex(du_output, expected_output) + + def test_du_works_for_non_empty_dirs(self): + dirname = 'some_directory' + dir_abspath = path.join(self.mount_a.mountpoint, dirname) + regfilename = 'some_regfile' + regfile_abspath = path.join(dir_abspath, regfilename) + self.mount_a.run_shell_payload(f"mkdir {dir_abspath}") + self.mount_a.client_remote.write_file(regfile_abspath, 'somedata') + + # XXX: we stat `regfile_abspath` here because ceph du reports + # a non-empty + # directory's size as sum of sizes of all files under it. + size = humansize(self.mount_a.stat(regfile_abspath)['st_size']) + expected_output = r'{}{}{}'.format(size, " +", dirname) + + sleep(10) + du_output = self.get_cephfs_shell_cmd_output('du ' + dirname) + self.assertRegex(du_output, expected_output) + + def test_du_works_for_empty_dirs(self): + dirname = 'some_directory' + dir_abspath = path.join(self.mount_a.mountpoint, dirname) + self.mount_a.run_shell_payload(f"mkdir {dir_abspath}") + + size = humansize(self.mount_a.stat(dir_abspath)['st_size']) + expected_output = r'{}{}{}'.format(size, " +", dirname) + + du_output = self.get_cephfs_shell_cmd_output('du ' + dirname) + self.assertRegex(du_output, expected_output) + + def test_du_works_for_hardlinks(self): + regfilename = 'some_regfile' + regfile_abspath = path.join(self.mount_a.mountpoint, regfilename) + self.mount_a.client_remote.write_file(regfile_abspath, 'somedata') + hlinkname = 'some_hardlink' + hlink_abspath = path.join(self.mount_a.mountpoint, hlinkname) + self.mount_a.run_shell_payload(f"ln {regfile_abspath} {hlink_abspath}") + + size = humansize(self.mount_a.stat(hlink_abspath)['st_size']) + expected_output = r'{}{}{}'.format(size, " +", hlinkname) + + du_output = self.get_cephfs_shell_cmd_output('du ' + hlinkname) + self.assertRegex(du_output, expected_output) + + def test_du_works_for_softlinks_to_files(self): + regfilename = 'some_regfile' + regfile_abspath = path.join(self.mount_a.mountpoint, regfilename) + self.mount_a.client_remote.write_file(regfile_abspath, 'somedata') + slinkname = 'some_softlink' + slink_abspath = path.join(self.mount_a.mountpoint, slinkname) + self.mount_a.run_shell_payload( + f"ln -s {regfile_abspath} {slink_abspath}") + + size = humansize(self.mount_a.lstat(slink_abspath)['st_size']) + expected_output = r'{}{}{}'.format(size, " +", slinkname) + + du_output = self.get_cephfs_shell_cmd_output('du ' + slinkname) + self.assertRegex(du_output, expected_output) + + def test_du_works_for_softlinks_to_dirs(self): + dirname = 'some_directory' + dir_abspath = path.join(self.mount_a.mountpoint, dirname) + self.mount_a.run_shell_payload(f"mkdir {dir_abspath}") + slinkname = 'some_softlink' + slink_abspath = path.join(self.mount_a.mountpoint, slinkname) + self.mount_a.run_shell_payload(f"ln -s {dir_abspath} {slink_abspath}") + + size = humansize(self.mount_a.lstat(slink_abspath)['st_size']) + expected_output = r'{}{}{}'.format(size, " +", slinkname) + + du_output = self.get_cephfs_shell_cmd_output('du ' + slinkname) + self.assertRegex(du_output, expected_output) + + # NOTE: tests using these are pretty slow since to this methods sleeps for + # 15 seconds + def _setup_files(self, return_path_to_files=False, path_prefix='./'): + dirname = 'dir1' + regfilename = 'regfile' + hlinkname = 'hlink' + slinkname = 'slink1' + slink2name = 'slink2' + + dir_abspath = path.join(self.mount_a.mountpoint, dirname) + regfile_abspath = path.join(self.mount_a.mountpoint, regfilename) + hlink_abspath = path.join(self.mount_a.mountpoint, hlinkname) + slink_abspath = path.join(self.mount_a.mountpoint, slinkname) + slink2_abspath = path.join(self.mount_a.mountpoint, slink2name) + + self.mount_a.run_shell_payload(f"mkdir {dir_abspath}") + self.mount_a.run_shell_payload(f"touch {regfile_abspath}") + self.mount_a.run_shell_payload(f"ln {regfile_abspath} {hlink_abspath}") + self.mount_a.run_shell_payload( + f"ln -s {regfile_abspath} {slink_abspath}") + self.mount_a.run_shell_payload(f"ln -s {dir_abspath} {slink2_abspath}") + + dir2_name = 'dir2' + dir21_name = 'dir21' + regfile121_name = 'regfile121' + dir2_abspath = path.join(self.mount_a.mountpoint, dir2_name) + dir21_abspath = path.join(dir2_abspath, dir21_name) + regfile121_abspath = path.join(dir21_abspath, regfile121_name) + self.mount_a.run_shell_payload(f"mkdir -p {dir21_abspath}") + self.mount_a.run_shell_payload(f"touch {regfile121_abspath}") + + self.mount_a.client_remote.write_file(regfile_abspath, 'somedata') + self.mount_a.client_remote.write_file(regfile121_abspath, + 'somemoredata') + + # TODO: is there a way to trigger/force update ceph.dir.rbytes? + # wait so that attr ceph.dir.rbytes gets a chance to be updated. + sleep(20) + + expected_patterns = [] + path_to_files = [] + + def append_expected_output_pattern(f): + if f == '/': + expected_patterns.append(r'{}{}{}'.format(size, " +", '.' + f)) + else: + expected_patterns.append(r'{}{}{}'.format( + size, " +", + path_prefix + path.relpath(f, self.mount_a.mountpoint))) + + for f in [dir_abspath, regfile_abspath, regfile121_abspath, + hlink_abspath, slink_abspath, slink2_abspath]: + size = humansize(self.mount_a.stat( + f, follow_symlinks=False)['st_size']) + append_expected_output_pattern(f) + + # get size for directories containig regfiles within + for f in [dir2_abspath, dir21_abspath]: + size = humansize(self.mount_a.stat(regfile121_abspath, + follow_symlinks=False)[ + 'st_size']) + append_expected_output_pattern(f) + + # get size for CephFS root + size = 0 + for f in [regfile_abspath, regfile121_abspath, slink_abspath, + slink2_abspath]: + size += self.mount_a.stat(f, follow_symlinks=False)['st_size'] + size = humansize(size) + append_expected_output_pattern('/') + + if return_path_to_files: + for p in [dir_abspath, regfile_abspath, dir2_abspath, + dir21_abspath, regfile121_abspath, hlink_abspath, + slink_abspath, slink2_abspath]: + path_to_files.append(path.relpath(p, self.mount_a.mountpoint)) + + return expected_patterns, path_to_files + else: + return expected_patterns + + def test_du_works_recursively_with_no_path_in_args(self): + expected_patterns_in_output = self._setup_files() + du_output = self.get_cephfs_shell_cmd_output('du -r') + + for expected_output in expected_patterns_in_output: + self.assertRegex(du_output, expected_output) + + def test_du_with_path_in_args(self): + expected_patterns_in_output, path_to_files = self._setup_files( + True, path_prefix='') + + args = ['du', '/'] + for p in path_to_files: + args.append(p) + du_output = self.get_cephfs_shell_cmd_output(args) + + for expected_output in expected_patterns_in_output: + self.assertRegex(du_output, expected_output) + + def test_du_with_no_args(self): + expected_patterns_in_output = self._setup_files() + + du_output = self.get_cephfs_shell_cmd_output('du') + + for expected_output in expected_patterns_in_output: + # Since CWD is CephFS root and being non-recursive expect only + # CWD in DU report. + if expected_output.find('/') == len(expected_output) - 1: + self.assertRegex(du_output, expected_output) + + +class TestDF(TestCephFSShell): + def validate_df(self, filename): + df_output = self.get_cephfs_shell_cmd_output('df ' + filename) + log.info("cephfs-shell df output:\n{}".format(df_output)) + + shell_df = df_output.splitlines()[1].split() + + block_size = int(self.mount_a.df()["total"]) // 1024 + log.info("cephfs df block size output:{}\n".format(block_size)) + + st_size = int(self.mount_a.stat(filename)["st_size"]) + log.info("cephfs stat used output:{}".format(st_size)) + log.info("cephfs available:{}\n".format(block_size - st_size)) + + self.assertTupleEqual((block_size, st_size, block_size - st_size), + (int(shell_df[0]), int(shell_df[1]), + int(shell_df[2]))) + + def test_df_with_no_args(self): + expected_output = '' + df_output = self.get_cephfs_shell_cmd_output('df') + assert df_output == expected_output + + def test_df_for_valid_directory(self): + dir_name = 'dir1' + mount_output = self.mount_a.run_shell_payload(f"mkdir {dir_name}") + log.info("cephfs-shell mount output:\n{}".format(mount_output)) + self.validate_df(dir_name) + + def test_df_for_invalid_directory(self): + dir_abspath = path.join(self.mount_a.mountpoint, 'non-existent-dir') + self.negtest_cephfs_shell_cmd(cmd='df ' + dir_abspath, + errmsg='error in stat') + + def test_df_for_valid_file(self): + s = 'df test' * 14145016 + o = self.get_cephfs_shell_cmd_output("put - dumpfile", stdin=s) + log.info("cephfs-shell output:\n{}".format(o)) + self.validate_df("dumpfile") + + +class TestQuota(TestCephFSShell): + dir_name = 'testdir' + + def create_dir(self): + mount_output = self.get_cephfs_shell_cmd_output( + 'mkdir ' + self.dir_name) + log.info("cephfs-shell mount output:\n{}".format(mount_output)) + + def set_and_get_quota_vals(self, input_val, check_status=True): + self.run_cephfs_shell_cmd(['quota', 'set', '--max_bytes', + input_val[0], '--max_files', input_val[1], + self.dir_name], check_status=check_status) + + quota_output = self.get_cephfs_shell_cmd_output( + ['quota', 'get', self.dir_name], + check_status=check_status) + + quota_output = quota_output.split() + return quota_output[1], quota_output[3] + + def test_set(self): + self.create_dir() + set_values = ('6', '2') + self.assertTupleEqual(self.set_and_get_quota_vals(set_values), + set_values) + + def test_replace_values(self): + self.test_set() + set_values = ('20', '4') + self.assertTupleEqual(self.set_and_get_quota_vals(set_values), + set_values) + + def test_set_invalid_dir(self): + set_values = ('5', '5') + try: + self.assertTupleEqual(self.set_and_get_quota_vals( + set_values, False), set_values) + raise Exception( + "Something went wrong!! Values set for non existing directory") + except IndexError: + # Test should pass as values cannot be set for non + # existing directory + pass + + def test_set_invalid_values(self): + self.create_dir() + set_values = ('-6', '-5') + try: + self.assertTupleEqual(self.set_and_get_quota_vals(set_values, + False), + set_values) + raise Exception("Something went wrong!! Invalid values set") + except IndexError: + # Test should pass as invalid values cannot be set + pass + + def test_exceed_file_limit(self): + self.test_set() + dir_abspath = path.join(self.mount_a.mountpoint, self.dir_name) + self.mount_a.run_shell_payload(f"touch {dir_abspath}/file1") + file2 = path.join(dir_abspath, "file2") + try: + self.mount_a.run_shell_payload(f"touch {file2}") + raise Exception( + "Something went wrong!! File creation should have failed") + except CommandFailedError: + # Test should pass as file quota set to 2 + # Additional condition to confirm file creation failure + if not path.exists(file2): + return 0 + raise + + def test_exceed_write_limit(self): + self.test_set() + dir_abspath = path.join(self.mount_a.mountpoint, self.dir_name) + filename = 'test_file' + file_abspath = path.join(dir_abspath, filename) + try: + # Write should fail as bytes quota is set to 6 + self.mount_a.client_remote.write_file(file_abspath, + 'Disk raise Exception') + raise Exception("Write should have failed") + except CommandFailedError: + # Test should pass only when write command fails + path_exists = path.exists(file_abspath) + if not path_exists: + # Testing with teuthology: No file is created. + return 0 + elif path_exists and not path.getsize(file_abspath): + # Testing on Fedora 30: When write fails, empty + # file gets created. + return 0 + else: + raise + + +class TestXattr(TestCephFSShell): + dir_name = 'testdir' + + def create_dir(self): + self.run_cephfs_shell_cmd('mkdir ' + self.dir_name) + + def set_get_list_xattr_vals(self, input_val, negtest=False): + setxattr_output = self.get_cephfs_shell_cmd_output( + ['setxattr', self.dir_name, input_val[0], input_val[1]]) + log.info("cephfs-shell setxattr output:\n{}".format(setxattr_output)) + + getxattr_output = self.get_cephfs_shell_cmd_output( + ['getxattr', self.dir_name, input_val[0]]) + log.info("cephfs-shell getxattr output:\n{}".format(getxattr_output)) + + listxattr_output = self.get_cephfs_shell_cmd_output( + ['listxattr', self.dir_name]) + log.info("cephfs-shell listxattr output:\n{}".format(listxattr_output)) + + return listxattr_output, getxattr_output + + def test_set(self): + self.create_dir() + set_values = ('user.key', '2') + self.assertTupleEqual(self.set_get_list_xattr_vals(set_values), + set_values) + + def test_reset(self): + self.test_set() + set_values = ('user.key', '4') + self.assertTupleEqual(self.set_get_list_xattr_vals(set_values), + set_values) + + def test_non_existing_dir(self): + input_val = ('user.key', '9') + self.negtest_cephfs_shell_cmd( + cmd=['setxattr', self.dir_name, input_val[0], + input_val[1]]) + self.negtest_cephfs_shell_cmd( + cmd=['getxattr', self.dir_name, input_val[0]]) + self.negtest_cephfs_shell_cmd(cmd=['listxattr', self.dir_name]) + + +class TestLS(TestCephFSShell): + dir_name = 'test_dir' + hidden_dir_name = '.test_hidden_dir' + + def test_ls(self): + """ Test that ls prints files in CWD. """ + self.run_cephfs_shell_cmd(f'mkdir {self.dir_name}') + + ls_output = self.get_cephfs_shell_cmd_output("ls") + log.info(f"output of ls command:\n{ls_output}") + + self.assertIn(self.dir_name, ls_output) + + def test_ls_a(self): + """ Test ls -a prints hidden files in CWD.""" + + self.run_cephfs_shell_cmd(f'mkdir {self.hidden_dir_name}') + + ls_a_output = self.get_cephfs_shell_cmd_output(['ls', '-a']) + log.info(f"output of ls -a command:\n{ls_a_output}") + + self.assertIn(self.hidden_dir_name, ls_a_output) + + def test_ls_does_not_print_hidden_dir(self): + """ Test ls command does not print hidden directory """ + + self.run_cephfs_shell_cmd(f'mkdir {self.hidden_dir_name}') + + ls_output = self.get_cephfs_shell_cmd_output("ls") + log.info(f"output of ls command:\n{ls_output}") + + self.assertNotIn(self.hidden_dir_name, ls_output) + + def test_ls_a_prints_non_hidden_dir(self): + """ Test ls -a command prints non hidden directory """ + + self.run_cephfs_shell_cmd( + f'mkdir {self.hidden_dir_name} {self.dir_name}') + + ls_a_output = self.get_cephfs_shell_cmd_output(['ls', '-a']) + log.info(f"output of ls -a command:\n{ls_a_output}") + + self.assertIn(self.dir_name, ls_a_output) + + def test_ls_H_prints_human_readable_file_size(self): + """ Test "ls -lH" prints human readable file size.""" + + file_sizes = ['1', '1K', '1M', '1G'] + file_names = ['dump1', 'dump2', 'dump3', 'dump4'] + + for (file_size, file_name) in zip(file_sizes, file_names): + temp_file = self.mount_a.client_remote.mktemp(file_name) + self.mount_a.run_shell_payload( + f"fallocate -l {file_size} {temp_file}") + self.mount_a.run_shell_payload(f'mv {temp_file} ./') + + ls_H_output = self.get_cephfs_shell_cmd_output(['ls', '-lH']) + + ls_H_file_size = set() + for line in ls_H_output.split('\n'): + ls_H_file_size.add(line.split()[1]) + + # test that file sizes are in human readable format + self.assertEqual({'1B', '1K', '1M', '1G'}, ls_H_file_size) + + def test_ls_s_sort_by_size(self): + """ Test "ls -S" sorts file listing by file_size """ + test_file1 = "test_file1.txt" + test_file2 = "test_file2.txt" + file1_content = 'A' * 102 + file2_content = 'B' * 10 + + self.run_cephfs_shell_cmd(f"write {test_file1}", stdin=file1_content) + self.run_cephfs_shell_cmd(f"write {test_file2}", stdin=file2_content) + + ls_s_output = self.get_cephfs_shell_cmd_output(['ls', '-lS']) + + file_sizes = [] + for line in ls_s_output.split('\n'): + file_sizes.append(line.split()[1]) + + # test that file size are in ascending order + self.assertEqual(file_sizes, sorted(file_sizes)) + + +class TestMisc(TestCephFSShell): + def test_issue_cephfs_shell_cmd_at_invocation(self): + """ + Test that `cephfs-shell -c conf cmd` works. + """ + # choosing a long name since short ones have a higher probability + # of getting matched by coincidence. + dirname = 'somedirectory' + self.run_cephfs_shell_cmd(['mkdir', dirname]) + + output = self.mount_a.client_remote.sh(['cephfs-shell', 'ls']). \ + strip() + + self.assertRegex(output, dirname) + + def test_help(self): + """ + Test that help outputs commands. + """ + o = self.get_cephfs_shell_cmd_output("help all") + log.info("output:\n{}".format(o)) + + + def test_chmod(self): + """Test chmod is allowed above o0777 """ + + test_file1 = "test_file2.txt" + file1_content = 'A' * 102 + self.run_cephfs_shell_cmd(f"write {test_file1}", stdin=file1_content) + self.run_cephfs_shell_cmd(f"chmod 01777 {test_file1}") + +class TestShellOpts(TestCephFSShell): + """ + Contains tests for shell options from conf file and shell prompt. + """ + + def setUp(self): + super(type(self), self).setUp() + + # output of following command - + # editor - was: 'vim' + # now: '?' + # editor: '?' + self.editor_val = self.get_cephfs_shell_cmd_output( + 'set editor ?, set editor').split('\n')[2] + self.editor_val = self.editor_val.split(':')[1]. \ + replace("'", "", 2).strip() + + def write_tempconf(self, confcontents): + self.tempconfpath = self.mount_a.client_remote.mktemp( + suffix='cephfs-shell.conf') + self.mount_a.client_remote.write_file(self.tempconfpath, + confcontents) + + def test_reading_conf(self): + self.write_tempconf("[cephfs-shell]\neditor = ???") + + # output of following command - + # CephFS:~/>>> set editor + # editor: 'vim' + final_editor_val = self.get_cephfs_shell_cmd_output( + cmd='set editor', shell_conf_path=self.tempconfpath) + final_editor_val = final_editor_val.split(': ')[1] + final_editor_val = final_editor_val.replace("'", "", 2) + + self.assertNotEqual(self.editor_val, final_editor_val) + + def test_reading_conf_with_dup_opt(self): + """ + Read conf without duplicate sections/options. + """ + self.write_tempconf("[cephfs-shell]\neditor = ???\neditor = " + + self.editor_val) + + # output of following command - + # CephFS:~/>>> set editor + # editor: 'vim' + final_editor_val = self.get_cephfs_shell_cmd_output( + cmd='set editor', shell_conf_path=self.tempconfpath) + final_editor_val = final_editor_val.split(': ')[1] + final_editor_val = final_editor_val.replace("'", "", 2) + + self.assertEqual(self.editor_val, final_editor_val) + + def test_setting_opt_after_reading_conf(self): + self.write_tempconf("[cephfs-shell]\neditor = ???") + + # output of following command - + # editor - was: vim + # now: vim + # editor: vim + final_editor_val = self.get_cephfs_shell_cmd_output( + cmd='set editor %s, set editor' % self.editor_val, + shell_conf_path=self.tempconfpath) + final_editor_val = final_editor_val.split('\n')[2] + final_editor_val = final_editor_val.split(': ')[1] + final_editor_val = final_editor_val.replace("'", "", 2) + + self.assertEqual(self.editor_val, final_editor_val) diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py new file mode 100644 index 000000000..c4215df33 --- /dev/null +++ b/qa/tasks/cephfs/test_client_limits.py @@ -0,0 +1,397 @@ + +""" +Exercise the MDS's behaviour when clients and the MDCache reach or +exceed the limits of how many caps/inodes they should hold. +""" + +import logging +from textwrap import dedent +from tasks.ceph_test_case import TestTimeoutError +from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming +from tasks.cephfs.fuse_mount import FuseMount +from teuthology.exceptions import CommandFailedError +import os +from io import StringIO + + +log = logging.getLogger(__name__) + + +# Arbitrary timeouts for operations involving restarting +# an MDS or waiting for it to come up +MDS_RESTART_GRACE = 60 + +# Hardcoded values from Server::recall_client_state +CAP_RECALL_RATIO = 0.8 +CAP_RECALL_MIN = 100 + + +class TestClientLimits(CephFSTestCase): + CLIENTS_REQUIRED = 2 + + def _test_client_pin(self, use_subdir, open_files): + """ + When a client pins an inode in its cache, for example because the file is held open, + it should reject requests from the MDS to trim these caps. The MDS should complain + to the user that it is unable to enforce its cache size limits because of this + objectionable client. + + :param use_subdir: whether to put test files in a subdir or use root + """ + + # Set MDS cache memory limit to a low value that will make the MDS to + # ask the client to trim the caps. + cache_memory_limit = "1K" + + self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit) + self.config_set('mds', 'mds_recall_max_caps', int(open_files/2)) + self.config_set('mds', 'mds_recall_warning_threshold', open_files) + + mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client")) + self.config_set('mds', 'mds_min_caps_working_set', mds_min_caps_per_client) + mds_max_caps_per_client = int(self.config_get('mds', "mds_max_caps_per_client")) + mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate")) + self.assertGreaterEqual(open_files, mds_min_caps_per_client) + + mount_a_client_id = self.mount_a.get_global_id() + path = "subdir" if use_subdir else "." + open_proc = self.mount_a.open_n_background(path, open_files) + + # Client should now hold: + # `open_files` caps for the open files + # 1 cap for root + # 1 cap for subdir + self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'], + open_files + (2 if use_subdir else 1), + timeout=600, + reject_fn=lambda x: x > open_files + 2) + + # MDS should not be happy about that, as the client is failing to comply + # with the SESSION_RECALL messages it is being sent + self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2) + + # We can also test that the MDS health warning for oversized + # cache is functioning as intended. + self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2) + + # When the client closes the files, it should retain only as many caps as allowed + # under the SESSION_RECALL policy + log.info("Terminating process holding files open") + self.mount_a._kill_background(open_proc) + + # The remaining caps should comply with the numbers sent from MDS in SESSION_RECALL message, + # which depend on the caps outstanding, cache size and overall ratio + def expected_caps(): + num_caps = self.get_session(mount_a_client_id)['num_caps'] + if num_caps <= mds_min_caps_per_client: + return True + elif num_caps <= mds_max_caps_per_client: + return True + else: + return False + + self.wait_until_true(expected_caps, timeout=60) + + @needs_trimming + def test_client_pin_root(self): + self._test_client_pin(False, 400) + + @needs_trimming + def test_client_pin(self): + self._test_client_pin(True, 800) + + @needs_trimming + def test_client_pin_mincaps(self): + self._test_client_pin(True, 200) + + def test_client_min_caps_working_set(self): + """ + When a client has inodes pinned in its cache (open files), that the MDS + will not warn about the client not responding to cache pressure when + the number of caps is below mds_min_caps_working_set. + """ + + # Set MDS cache memory limit to a low value that will make the MDS to + # ask the client to trim the caps. + cache_memory_limit = "1K" + open_files = 400 + + self.config_set('mds', 'mds_cache_memory_limit', cache_memory_limit) + self.config_set('mds', 'mds_recall_max_caps', int(open_files/2)) + self.config_set('mds', 'mds_recall_warning_threshold', open_files) + self.config_set('mds', 'mds_min_caps_working_set', open_files*2) + + mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client")) + mds_recall_warning_decay_rate = float(self.config_get('mds', "mds_recall_warning_decay_rate")) + self.assertGreaterEqual(open_files, mds_min_caps_per_client) + + mount_a_client_id = self.mount_a.get_global_id() + self.mount_a.open_n_background("subdir", open_files) + + # Client should now hold: + # `open_files` caps for the open files + # 1 cap for root + # 1 cap for subdir + self.wait_until_equal(lambda: self.get_session(mount_a_client_id)['num_caps'], + open_files + 2, + timeout=600, + reject_fn=lambda x: x > open_files + 2) + + # We can also test that the MDS health warning for oversized + # cache is functioning as intended. + self.wait_for_health("MDS_CACHE_OVERSIZED", mds_recall_warning_decay_rate*2) + + try: + # MDS should not be happy about that but it's not sending + # MDS_CLIENT_RECALL warnings because the client's caps are below + # mds_min_caps_working_set. + self.wait_for_health("MDS_CLIENT_RECALL", mds_recall_warning_decay_rate*2) + except TestTimeoutError: + pass + else: + raise RuntimeError("expected no client recall warning") + + def test_cap_acquisition_throttle_readdir(self): + """ + Mostly readdir acquires caps faster than the mds recalls, so the cap + acquisition via readdir is throttled by retrying the readdir after + a fraction of second (0.5) by default when throttling condition is met. + """ + + subdir_count = 4 + files_per_dir = 25 + + # throttle in a way so that two dir reads are already hitting it. + throttle_value = (files_per_dir * 3) // 2 + + # activate throttling logic by setting max per client to a low value + self.config_set('mds', 'mds_max_caps_per_client', 1) + self.config_set('mds', 'mds_session_cap_acquisition_throttle', throttle_value) + + # Create files split across {subdir_count} directories, {per_dir_count} in each dir + for i in range(1, subdir_count+1): + self.mount_a.create_n_files("dir{0}/file".format(i), files_per_dir, sync=True) + + mount_a_client_id = self.mount_a.get_global_id() + + # recursive readdir. macOs wants an explicit directory for `find`. + proc = self.mount_a.run_shell_payload("find . | wc", stderr=StringIO()) + # return code may be None if the command got interrupted + self.assertTrue(proc.returncode is None or proc.returncode == 0, proc.stderr.getvalue()) + + # validate the throttle condition to be hit atleast once + cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle'] + self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1) + + # validate cap_acquisition decay counter after readdir to NOT exceed the throttle value + # plus one batch that could have been taken immediately before querying + # assuming the batch is equal to the per dir file count. + cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value'] + self.assertLessEqual(cap_acquisition_value, files_per_dir + throttle_value) + + # make sure that the throttle was reported in the events + def historic_ops_have_event(expected_event): + ops_dump = self.fs.rank_tell(['dump_historic_ops']) + # reverse the events and the ops assuming that later ops would be throttled + for op in reversed(ops_dump['ops']): + for ev in reversed(op.get('type_data', {}).get('events', [])): + if ev['event'] == expected_event: + return True + return False + + self.assertTrue(historic_ops_have_event('cap_acquisition_throttle')) + + def test_client_release_bug(self): + """ + When a client has a bug (which we will simulate) preventing it from releasing caps, + the MDS should notice that releases are not being sent promptly, and generate a health + metric to that effect. + """ + + # The debug hook to inject the failure only exists in the fuse client + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to inject client release failure") + + self.set_conf('client.{0}'.format(self.mount_a.client_id), 'client inject release failure', 'true') + self.mount_a.teardown() + self.mount_a.mount_wait() + mount_a_client_id = self.mount_a.get_global_id() + + # Client A creates a file. He will hold the write caps on the file, and later (simulated bug) fail + # to comply with the MDSs request to release that cap + self.mount_a.run_shell(["touch", "file1"]) + + # Client B tries to stat the file that client A created + rproc = self.mount_b.write_background("file1") + + # After session_timeout, we should see a health warning (extra lag from + # MDS beacon period) + session_timeout = self.fs.get_var("session_timeout") + self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10) + + # Client B should still be stuck + self.assertFalse(rproc.finished) + + # Kill client A + self.mount_a.kill() + self.mount_a.kill_cleanup() + + # Client B should complete + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + rproc.wait() + + def test_client_blocklisted_oldest_tid(self): + """ + that a client is blocklisted when its encoded session metadata exceeds the + configured threshold (due to ever growing `completed_requests` caused due + to an unidentified bug (in the client or the MDS)). + """ + + # num of requests client issues + max_requests = 10000 + + # The debug hook to inject the failure only exists in the fuse client + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to inject client release failure") + + self.config_set('client', 'client inject fixed oldest tid', 'true') + self.mount_a.teardown() + self.mount_a.mount_wait() + + self.config_set('mds', 'mds_max_completed_requests', max_requests); + + # Create lots of files + self.mount_a.create_n_files("testdir/file1", max_requests + 100) + + # Create a few files synchronously. This makes sure previous requests are completed + self.mount_a.create_n_files("testdir/file2", 5, True) + + # Wait for the health warnings. Assume mds can handle 10 request per second at least + self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10, check_in_detail=str(self.mount_a.client_id)) + + # set the threshold low so that it has a high probability of + # hitting. + self.config_set('mds', 'mds_session_metadata_threshold', 5000); + + # Create lot many files synchronously. This would hit the session metadata threshold + # causing the client to get blocklisted. + with self.assertRaises(CommandFailedError): + self.mount_a.create_n_files("testdir/file2", 100000, True) + + self.mds_cluster.is_addr_blocklisted(self.mount_a.get_global_addr()) + # the mds should bump up the relevant perf counter + pd = self.perf_dump() + self.assertGreater(pd['mds_sessions']['mdthresh_evicted'], 0) + + # reset the config + self.config_set('client', 'client inject fixed oldest tid', 'false') + + self.mount_a.kill_cleanup() + self.mount_a.mount_wait() + + def test_client_oldest_tid(self): + """ + When a client does not advance its oldest tid, the MDS should notice that + and generate health warnings. + """ + + # num of requests client issues + max_requests = 1000 + + # The debug hook to inject the failure only exists in the fuse client + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to inject client release failure") + + self.set_conf('client', 'client inject fixed oldest tid', 'true') + self.mount_a.teardown() + self.mount_a.mount_wait() + + self.fs.mds_asok(['config', 'set', 'mds_max_completed_requests', '{0}'.format(max_requests)]) + + # Create lots of files + self.mount_a.create_n_files("testdir/file1", max_requests + 100) + + # Create a few files synchronously. This makes sure previous requests are completed + self.mount_a.create_n_files("testdir/file2", 5, True) + + # Wait for the health warnings. Assume mds can handle 10 request per second at least + self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10) + + def _test_client_cache_size(self, mount_subdir): + """ + check if client invalidate kernel dcache according to its cache size config + """ + + # The debug hook to inject the failure only exists in the fuse client + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to inject client release failure") + + if mount_subdir: + # fuse assigns a fix inode number (1) to root inode. But in mounting into + # subdir case, the actual inode number of root is not 1. This mismatch + # confuses fuse_lowlevel_notify_inval_entry() when invalidating dentries + # in root directory. + self.mount_a.run_shell(["mkdir", "subdir"]) + self.mount_a.umount_wait() + self.set_conf('client', 'client mountpoint', '/subdir') + self.mount_a.mount_wait() + root_ino = self.mount_a.path_to_ino(".") + self.assertEqual(root_ino, 1); + + dir_path = os.path.join(self.mount_a.mountpoint, "testdir") + + mkdir_script = dedent(""" + import os + os.mkdir("{path}") + for n in range(0, {num_dirs}): + os.mkdir("{path}/dir{{0}}".format(n)) + """) + + num_dirs = 1000 + self.mount_a.run_python(mkdir_script.format(path=dir_path, num_dirs=num_dirs)) + self.mount_a.run_shell(["sync"]) + + dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count() + self.assertGreaterEqual(dentry_count, num_dirs) + self.assertGreaterEqual(dentry_pinned_count, num_dirs) + + cache_size = num_dirs // 10 + self.mount_a.set_cache_size(cache_size) + + def trimmed(): + dentry_count, dentry_pinned_count = self.mount_a.get_dentry_count() + log.info("waiting, dentry_count, dentry_pinned_count: {0}, {1}".format( + dentry_count, dentry_pinned_count + )) + if dentry_count > cache_size or dentry_pinned_count > cache_size: + return False + + return True + + self.wait_until_true(trimmed, 30) + + @needs_trimming + def test_client_cache_size(self): + self._test_client_cache_size(False) + self._test_client_cache_size(True) + + def test_client_max_caps(self): + """ + That the MDS will not let a client sit above mds_max_caps_per_client caps. + """ + + mds_min_caps_per_client = int(self.config_get('mds', "mds_min_caps_per_client")) + mds_max_caps_per_client = 2*mds_min_caps_per_client + self.config_set('mds', 'mds_max_caps_per_client', mds_max_caps_per_client) + + self.mount_a.create_n_files("foo/", 3*mds_max_caps_per_client, sync=True) + + mount_a_client_id = self.mount_a.get_global_id() + def expected_caps(): + num_caps = self.get_session(mount_a_client_id)['num_caps'] + if num_caps <= mds_max_caps_per_client: + return True + else: + return False + + self.wait_until_true(expected_caps, timeout=60) diff --git a/qa/tasks/cephfs/test_client_recovery.py b/qa/tasks/cephfs/test_client_recovery.py new file mode 100644 index 000000000..1bd6884a9 --- /dev/null +++ b/qa/tasks/cephfs/test_client_recovery.py @@ -0,0 +1,757 @@ + +""" +Teuthology task for exercising CephFS client recovery +""" + +import logging +from textwrap import dedent +import time +import distutils.version as version +import random +import re +import string +import os + +from teuthology.orchestra import run +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.packaging import get_package_version + +log = logging.getLogger(__name__) + + +# Arbitrary timeouts for operations involving restarting +# an MDS or waiting for it to come up +MDS_RESTART_GRACE = 60 + + +class TestClientNetworkRecovery(CephFSTestCase): + REQUIRE_ONE_CLIENT_REMOTE = True + CLIENTS_REQUIRED = 2 + + LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"] + + # Environment references + mds_reconnect_timeout = None + ms_max_backoff = None + + def test_network_death(self): + """ + Simulate software freeze or temporary network failure. + + Check that the client blocks I/O during failure, and completes + I/O after failure. + """ + + session_timeout = self.fs.get_var("session_timeout") + self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false']) + + # We only need one client + self.mount_b.umount_wait() + + # Initially our one client session should be visible + client_id = self.mount_a.get_global_id() + ls_data = self._session_list() + self.assert_session_count(1, ls_data) + self.assertEqual(ls_data[0]['id'], client_id) + self.assert_session_state(client_id, "open") + + # ...and capable of doing I/O without blocking + self.mount_a.create_files() + + # ...but if we turn off the network + self.fs.set_clients_block(True) + + # ...and try and start an I/O + write_blocked = self.mount_a.write_background() + + # ...then it should block + self.assertFalse(write_blocked.finished) + self.assert_session_state(client_id, "open") + time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale + self.assertFalse(write_blocked.finished) + self.assert_session_state(client_id, "stale") + + # ...until we re-enable I/O + self.fs.set_clients_block(False) + + # ...when it should complete promptly + a = time.time() + self.wait_until_true(lambda: write_blocked.finished, self.ms_max_backoff * 2) + write_blocked.wait() # Already know we're finished, wait() to raise exception on errors + recovery_time = time.time() - a + log.info("recovery time: {0}".format(recovery_time)) + self.assert_session_state(client_id, "open") + + +class TestClientRecovery(CephFSTestCase): + CLIENTS_REQUIRED = 2 + + LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"] + + # Environment references + mds_reconnect_timeout = None + ms_max_backoff = None + + def test_basic(self): + # Check that two clients come up healthy and see each others' files + # ===================================================== + self.mount_a.create_files() + self.mount_a.check_files() + self.mount_a.umount_wait() + + self.mount_b.check_files() + + self.mount_a.mount_wait() + + # Check that the admin socket interface is correctly reporting + # two sessions + # ===================================================== + ls_data = self._session_list() + self.assert_session_count(2, ls_data) + + self.assertSetEqual( + set([l['id'] for l in ls_data]), + {self.mount_a.get_global_id(), self.mount_b.get_global_id()} + ) + + def test_restart(self): + # Check that after an MDS restart both clients reconnect and continue + # to handle I/O + # ===================================================== + self.fs.mds_fail_restart() + self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + + self.mount_a.create_destroy() + self.mount_b.create_destroy() + + def _session_num_caps(self, client_id): + ls_data = self.fs.mds_asok(['session', 'ls']) + return int(self._session_by_id(ls_data).get(client_id, {'num_caps': None})['num_caps']) + + def test_reconnect_timeout(self): + # Reconnect timeout + # ================= + # Check that if I stop an MDS and a client goes away, the MDS waits + # for the reconnect period + + mount_a_client_id = self.mount_a.get_global_id() + + self.fs.fail() + + self.mount_a.umount_wait(force=True) + + self.fs.set_joinable() + + self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) + # Check that the MDS locally reports its state correctly + status = self.fs.mds_asok(['status']) + self.assertIn("reconnect_status", status) + + ls_data = self._session_list() + self.assert_session_count(2, ls_data) + + # The session for the dead client should have the 'reconnect' flag set + self.assertTrue(self.get_session(mount_a_client_id)['reconnecting']) + + # Wait for the reconnect state to clear, this should take the + # reconnect timeout period. + in_reconnect_for = self.fs.wait_for_state('up:active', timeout=self.mds_reconnect_timeout * 2) + # Check that the period we waited to enter active is within a factor + # of two of the reconnect timeout. + self.assertGreater(in_reconnect_for, self.mds_reconnect_timeout // 2, + "Should have been in reconnect phase for {0} but only took {1}".format( + self.mds_reconnect_timeout, in_reconnect_for + )) + + self.assert_session_count(1) + + # Check that the client that timed out during reconnect can + # mount again and do I/O + self.mount_a.mount_wait() + self.mount_a.create_destroy() + + self.assert_session_count(2) + + def test_reconnect_eviction(self): + # Eviction during reconnect + # ========================= + mount_a_client_id = self.mount_a.get_global_id() + + self.fs.fail() + + # The mount goes away while the MDS is offline + self.mount_a.kill() + + # wait for it to die + time.sleep(5) + + self.fs.set_joinable() + + # Enter reconnect phase + self.fs.wait_for_state('up:reconnect', reject='up:active', timeout=MDS_RESTART_GRACE) + self.assert_session_count(2) + + # Evict the stuck client + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + self.assert_session_count(1) + + # Observe that we proceed to active phase without waiting full reconnect timeout + evict_til_active = self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + # Once we evict the troublemaker, the reconnect phase should complete + # in well under the reconnect timeout. + self.assertLess(evict_til_active, self.mds_reconnect_timeout * 0.5, + "reconnect did not complete soon enough after eviction, took {0}".format( + evict_til_active + )) + + # We killed earlier so must clean up before trying to use again + self.mount_a.kill_cleanup() + + # Bring the client back + self.mount_a.mount_wait() + self.mount_a.create_destroy() + + def _test_stale_caps(self, write): + session_timeout = self.fs.get_var("session_timeout") + + # Capability release from stale session + # ===================================== + if write: + content = ''.join(random.choices(string.ascii_uppercase + string.digits, k=16)) + cap_holder = self.mount_a.open_background(content=content) + else: + content = '' + self.mount_a.run_shell(["touch", "background_file"]) + self.mount_a.umount_wait() + self.mount_a.mount_wait() + cap_holder = self.mount_a.open_background(write=False) + + self.assert_session_count(2) + mount_a_gid = self.mount_a.get_global_id() + + # Wait for the file to be visible from another client, indicating + # that mount_a has completed its network ops + self.mount_b.wait_for_visible(size=len(content)) + + # Simulate client death + self.mount_a.suspend_netns() + + # wait for it to die so it doesn't voluntarily release buffer cap + time.sleep(5) + + try: + # Now, after session_timeout seconds, the waiter should + # complete their operation when the MDS marks the holder's + # session stale. + cap_waiter = self.mount_b.write_background() + a = time.time() + cap_waiter.wait() + b = time.time() + + # Should have succeeded + self.assertEqual(cap_waiter.exitstatus, 0) + + if write: + self.assert_session_count(1) + else: + self.assert_session_state(mount_a_gid, "stale") + + cap_waited = b - a + log.info("cap_waiter waited {0}s".format(cap_waited)) + self.assertTrue(session_timeout / 2.0 <= cap_waited <= session_timeout * 2.0, + "Capability handover took {0}, expected approx {1}".format( + cap_waited, session_timeout + )) + finally: + self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable + self.mount_a._kill_background(cap_holder) + + def test_stale_read_caps(self): + self._test_stale_caps(False) + + def test_stale_write_caps(self): + self._test_stale_caps(True) + + def test_evicted_caps(self): + # Eviction while holding a capability + # =================================== + + session_timeout = self.fs.get_var("session_timeout") + + # Take out a write capability on a file on client A, + # and then immediately kill it. + cap_holder = self.mount_a.open_background() + mount_a_client_id = self.mount_a.get_global_id() + + # Wait for the file to be visible from another client, indicating + # that mount_a has completed its network ops + self.mount_b.wait_for_visible() + + # Simulate client death + self.mount_a.suspend_netns() + + # wait for it to die so it doesn't voluntarily release buffer cap + time.sleep(5) + + try: + # The waiter should get stuck waiting for the capability + # held on the MDS by the now-dead client A + cap_waiter = self.mount_b.write_background() + time.sleep(5) + self.assertFalse(cap_waiter.finished) + + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + # Now, because I evicted the old holder of the capability, it should + # immediately get handed over to the waiter + a = time.time() + cap_waiter.wait() + b = time.time() + cap_waited = b - a + log.info("cap_waiter waited {0}s".format(cap_waited)) + # This is the check that it happened 'now' rather than waiting + # for the session timeout + self.assertLess(cap_waited, session_timeout / 2.0, + "Capability handover took {0}, expected less than {1}".format( + cap_waited, session_timeout / 2.0 + )) + + finally: + self.mount_a.resume_netns() # allow the mount to recover otherwise background proc is unkillable + self.mount_a._kill_background(cap_holder) + + def test_trim_caps(self): + # Trim capability when reconnecting MDS + # =================================== + + count = 500 + # Create lots of files + for i in range(count): + self.mount_a.run_shell(["touch", "f{0}".format(i)]) + + # Populate mount_b's cache + self.mount_b.run_shell(["ls", "-l"]) + + client_id = self.mount_b.get_global_id() + num_caps = self._session_num_caps(client_id) + self.assertGreaterEqual(num_caps, count) + + # Restart MDS. client should trim its cache when reconnecting to the MDS + self.fs.mds_fail_restart() + self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + + num_caps = self._session_num_caps(client_id) + self.assertLess(num_caps, count, + "should have less than {0} capabilities, have {1}".format( + count, num_caps + )) + + def _is_flockable(self): + a_version_str = get_package_version(self.mount_a.client_remote, "fuse") + b_version_str = get_package_version(self.mount_b.client_remote, "fuse") + flock_version_str = "2.9" + + version_regex = re.compile(r"[0-9\.]+") + a_result = version_regex.match(a_version_str) + self.assertTrue(a_result) + b_result = version_regex.match(b_version_str) + self.assertTrue(b_result) + a_version = version.StrictVersion(a_result.group()) + b_version = version.StrictVersion(b_result.group()) + flock_version=version.StrictVersion(flock_version_str) + + if (a_version >= flock_version and b_version >= flock_version): + log.info("flock locks are available") + return True + else: + log.info("not testing flock locks, machines have versions {av} and {bv}".format( + av=a_version_str,bv=b_version_str)) + return False + + def test_filelock(self): + """ + Check that file lock doesn't get lost after an MDS restart + """ + + flockable = self._is_flockable() + lock_holder = self.mount_a.lock_background(do_flock=flockable) + + self.mount_b.wait_for_visible("background_file-2") + self.mount_b.check_filelock(do_flock=flockable) + + self.fs.mds_fail_restart() + self.fs.wait_for_state('up:active', timeout=MDS_RESTART_GRACE) + + self.mount_b.check_filelock(do_flock=flockable) + + self.mount_a._kill_background(lock_holder) + + def test_filelock_eviction(self): + """ + Check that file lock held by evicted client is given to + waiting client. + """ + if not self._is_flockable(): + self.skipTest("flock is not available") + + lock_holder = self.mount_a.lock_background() + self.mount_b.wait_for_visible("background_file-2") + self.mount_b.check_filelock() + + lock_taker = self.mount_b.lock_and_release() + # Check the taker is waiting (doesn't get it immediately) + time.sleep(2) + self.assertFalse(lock_holder.finished) + self.assertFalse(lock_taker.finished) + + try: + mount_a_client_id = self.mount_a.get_global_id() + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + + # Evicting mount_a should let mount_b's attempt to take the lock + # succeed + self.wait_until_true(lambda: lock_taker.finished, timeout=10) + finally: + self.mount_a._kill_background(lock_holder) + + # teardown() doesn't quite handle this case cleanly, so help it out + self.mount_a.kill() + self.mount_a.kill_cleanup() + + # Bring the client back + self.mount_a.mount_wait() + + def test_dir_fsync(self): + self._test_fsync(True); + + def test_create_fsync(self): + self._test_fsync(False); + + def _test_fsync(self, dirfsync): + """ + That calls to fsync guarantee visibility of metadata to another + client immediately after the fsyncing client dies. + """ + + # Leave this guy out until he's needed + self.mount_b.umount_wait() + + # Create dir + child dentry on client A, and fsync the dir + path = os.path.join(self.mount_a.mountpoint, "subdir") + self.mount_a.run_python( + dedent(""" + import os + import time + + path = "{path}" + + print("Starting creation...") + start = time.time() + + os.mkdir(path) + dfd = os.open(path, os.O_DIRECTORY) + + fd = open(os.path.join(path, "childfile"), "w") + print("Finished creation in {{0}}s".format(time.time() - start)) + + print("Starting fsync...") + start = time.time() + if {dirfsync}: + os.fsync(dfd) + else: + os.fsync(fd) + print("Finished fsync in {{0}}s".format(time.time() - start)) + """.format(path=path,dirfsync=str(dirfsync))) + ) + + # Immediately kill the MDS and then client A + self.fs.fail() + self.mount_a.kill() + self.mount_a.kill_cleanup() + + # Restart the MDS. Wait for it to come up, it'll have to time out in clientreplay + self.fs.set_joinable() + log.info("Waiting for reconnect...") + self.fs.wait_for_state("up:reconnect") + log.info("Waiting for active...") + self.fs.wait_for_state("up:active", timeout=MDS_RESTART_GRACE + self.mds_reconnect_timeout) + log.info("Reached active...") + + # Is the child dentry visible from mount B? + self.mount_b.mount_wait() + self.mount_b.run_shell(["ls", "subdir/childfile"]) + + def test_unmount_for_evicted_client(self): + """Test if client hangs on unmount after evicting the client.""" + mount_a_client_id = self.mount_a.get_global_id() + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + + self.mount_a.umount_wait(require_clean=True, timeout=30) + + def test_mount_after_evicted_client(self): + """Test if a new mount of same fs works after client eviction.""" + + # trash this : we need it to use same remote as mount_a + self.mount_b.umount_wait() + + cl = self.mount_a.__class__ + + # create a new instance of mount_a's class with most of the + # same settings, but mounted on mount_b's mountpoint. + m = cl(ctx=self.mount_a.ctx, + client_config=self.mount_a.client_config, + test_dir=self.mount_a.test_dir, + client_id=self.mount_a.client_id, + client_remote=self.mount_a.client_remote, + client_keyring_path=self.mount_a.client_keyring_path, + cephfs_name=self.mount_a.cephfs_name, + cephfs_mntpt= self.mount_a.cephfs_mntpt, + hostfs_mntpt=self.mount_b.hostfs_mntpt, + brxnet=self.mount_a.ceph_brx_net) + + # evict mount_a + mount_a_client_id = self.mount_a.get_global_id() + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + + m.mount_wait() + m.create_files() + m.check_files() + m.umount_wait(require_clean=True) + + def test_stale_renew(self): + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to handle signal STOP/CONT") + + session_timeout = self.fs.get_var("session_timeout") + + self.mount_a.run_shell(["mkdir", "testdir"]) + self.mount_a.run_shell(["touch", "testdir/file1"]) + # populate readdir cache + self.mount_a.run_shell(["ls", "testdir"]) + self.mount_b.run_shell(["ls", "testdir"]) + + # check if readdir cache is effective + initial_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency']) + self.mount_b.run_shell(["ls", "testdir"]) + current_readdirs = self.fs.mds_asok(['perf', 'dump', 'mds_server', 'req_readdir_latency']) + self.assertEqual(current_readdirs, initial_readdirs); + + mount_b_gid = self.mount_b.get_global_id() + # stop ceph-fuse process of mount_b + self.mount_b.suspend_netns() + + self.assert_session_state(mount_b_gid, "open") + time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale + + self.mount_a.run_shell(["touch", "testdir/file2"]) + self.assert_session_state(mount_b_gid, "stale") + + # resume ceph-fuse process of mount_b + self.mount_b.resume_netns() + # Is the new file visible from mount_b? (caps become invalid after session stale) + self.mount_b.run_shell(["ls", "testdir/file2"]) + + def test_abort_conn(self): + """ + Check that abort_conn() skips closing mds sessions. + """ + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Testing libcephfs function") + + self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false']) + session_timeout = self.fs.get_var("session_timeout") + + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + gid_str = self.mount_a.run_python(dedent(""" + import cephfs as libcephfs + cephfs = libcephfs.LibCephFS(conffile='') + cephfs.mount() + client_id = cephfs.get_instance_id() + cephfs.abort_conn() + print(client_id) + """) + ) + gid = int(gid_str); + + self.assert_session_state(gid, "open") + time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale + self.assert_session_state(gid, "stale") + + def test_dont_mark_unresponsive_client_stale(self): + """ + Test that an unresponsive client holding caps is not marked stale or + evicted unless another clients wants its caps. + """ + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to handle signal STOP/CONT") + + # XXX: To conduct this test we need at least two clients since a + # single client is never evcited by MDS. + SESSION_TIMEOUT = 30 + SESSION_AUTOCLOSE = 50 + time_at_beg = time.time() + mount_a_gid = self.mount_a.get_global_id() + _ = self.mount_a.client_pid + self.fs.set_var('session_timeout', SESSION_TIMEOUT) + self.fs.set_var('session_autoclose', SESSION_AUTOCLOSE) + self.assert_session_count(2, self.fs.mds_asok(['session', 'ls'])) + + # test that client holding cap not required by any other client is not + # marked stale when it becomes unresponsive. + self.mount_a.run_shell(['mkdir', 'dir']) + self.mount_a.send_signal('sigstop') + time.sleep(SESSION_TIMEOUT + 2) + self.assert_session_state(mount_a_gid, "open") + + # test that other clients have to wait to get the caps from + # unresponsive client until session_autoclose. + self.mount_b.run_shell(['stat', 'dir']) + self.assert_session_count(1, self.fs.mds_asok(['session', 'ls'])) + self.assertLess(time.time(), time_at_beg + SESSION_AUTOCLOSE) + + self.mount_a.send_signal('sigcont') + + def test_config_session_timeout(self): + self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false']) + session_timeout = self.fs.get_var("session_timeout") + mount_a_gid = self.mount_a.get_global_id() + + self.fs.mds_asok(['session', 'config', '%s' % mount_a_gid, 'timeout', '%s' % (session_timeout * 2)]) + + self.mount_a.kill(); + + self.assert_session_count(2) + + time.sleep(session_timeout * 1.5) + self.assert_session_state(mount_a_gid, "open") + + time.sleep(session_timeout) + self.assert_session_count(1) + + self.mount_a.kill_cleanup() + + def test_reconnect_after_blocklisted(self): + """ + Test reconnect after blocklisted. + - writing to a fd that was opened before blocklist should return -EBADF + - reading/writing to a file with lost file locks should return -EIO + - readonly fd should continue to work + """ + + self.mount_a.umount_wait() + + if isinstance(self.mount_a, FuseMount): + self.mount_a.mount_wait(mntargs=['--client_reconnect_stale=1', '--fuse_disable_pagecache=1']) + else: + try: + self.mount_a.mount_wait(mntopts=['recover_session=clean']) + except CommandFailedError: + self.mount_a.kill_cleanup() + self.skipTest("Not implemented in current kernel") + + self.mount_a.wait_until_mounted() + + path = os.path.join(self.mount_a.mountpoint, 'testfile_reconnect_after_blocklisted') + pyscript = dedent(""" + import os + import sys + import fcntl + import errno + import time + + fd1 = os.open("{path}.1", os.O_RDWR | os.O_CREAT, 0O666) + fd2 = os.open("{path}.1", os.O_RDONLY) + fd3 = os.open("{path}.2", os.O_RDWR | os.O_CREAT, 0O666) + fd4 = os.open("{path}.2", os.O_RDONLY) + + os.write(fd1, b'content') + os.read(fd2, 1); + + os.write(fd3, b'content') + os.read(fd4, 1); + fcntl.flock(fd4, fcntl.LOCK_SH | fcntl.LOCK_NB) + + print("blocklist") + sys.stdout.flush() + + sys.stdin.readline() + + # wait for mds to close session + time.sleep(10); + + # trigger 'open session' message. kclient relies on 'session reject' message + # to detect if itself is blocklisted + try: + os.stat("{path}.1") + except: + pass + + # wait for auto reconnect + time.sleep(10); + + try: + os.write(fd1, b'content') + except OSError as e: + if e.errno != errno.EBADF: + raise + else: + raise RuntimeError("write() failed to raise error") + + os.read(fd2, 1); + + try: + os.read(fd4, 1) + except OSError as e: + if e.errno != errno.EIO: + raise + else: + raise RuntimeError("read() failed to raise error") + """).format(path=path) + rproc = self.mount_a.client_remote.run( + args=['python3', '-c', pyscript], + wait=False, stdin=run.PIPE, stdout=run.PIPE) + + rproc.stdout.readline() + + mount_a_client_id = self.mount_a.get_global_id() + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + + rproc.stdin.writelines(['done\n']) + rproc.stdin.flush() + + rproc.wait() + self.assertEqual(rproc.exitstatus, 0) + + def test_refuse_client_session(self): + """ + Test that client cannot start session when file system flag + refuse_client_session is set + """ + + self.mount_a.umount_wait() + self.fs.set_refuse_client_session(True) + with self.assertRaises(CommandFailedError): + self.mount_a.mount_wait() + + def test_refuse_client_session_on_reconnect(self): + """ + Test that client cannot reconnect when filesystem comes online and + file system flag refuse_client_session is set + """ + + self.mount_a.create_files() + self.mount_a.check_files() + + self.fs.fail() + self.fs.set_refuse_client_session(True) + self.fs.set_joinable() + with self.assert_cluster_log('client could not reconnect as' + ' file system flag' + ' refuse_client_session is set'): + time.sleep(self.fs.get_var("session_timeout") * 1.5) + self.assertEqual(len(self.fs.mds_tell(["session", "ls"])), 0) + self.mount_a.umount_wait(force=True) + diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py new file mode 100644 index 000000000..bfaa23453 --- /dev/null +++ b/qa/tasks/cephfs/test_damage.py @@ -0,0 +1,663 @@ +from io import BytesIO, StringIO +import json +import logging +import errno +import re +import time +from teuthology.contextutil import MaxWhileTries +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra.run import wait +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology + +DAMAGED_ON_START = "damaged_on_start" +DAMAGED_ON_LS = "damaged_on_ls" +CRASHED = "server crashed" +NO_DAMAGE = "no damage" +READONLY = "readonly" +FAILED_CLIENT = "client failed" +FAILED_SERVER = "server failed" + +# An EIO in response to a stat from the client +EIO_ON_LS = "eio" + +# An EIO, but nothing in damage table (not ever what we expect) +EIO_NO_DAMAGE = "eio without damage entry" + + +log = logging.getLogger(__name__) + + +class TestDamage(CephFSTestCase): + def _simple_workload_write(self): + self.mount_a.run_shell(["mkdir", "subdir"]) + self.mount_a.write_n_mb("subdir/sixmegs", 6) + return self.mount_a.stat("subdir/sixmegs") + + def is_marked_damaged(self, rank): + mds_map = self.fs.get_mds_map() + return rank in mds_map['damaged'] + + @for_teuthology #459s + def test_object_deletion(self): + """ + That the MDS has a clean 'damaged' response to loss of any single metadata object + """ + + self._simple_workload_write() + + # Hmm, actually it would be nice to permute whether the metadata pool + # state contains sessions or not, but for the moment close this session + # to avoid waiting through reconnect on every MDS start. + self.mount_a.umount_wait() + for mds_name in self.fs.get_active_names(): + self.fs.mds_asok(["flush", "journal"], mds_name) + + self.fs.fail() + + serialized = self.fs.radosmo(['export', '-']) + + def is_ignored(obj_id, dentry=None): + """ + A filter to avoid redundantly mutating many similar objects (e.g. + stray dirfrags) or similar dentries (e.g. stray dir dentries) + """ + if re.match("60.\.00000000", obj_id) and obj_id != "600.00000000": + return True + + if dentry and obj_id == "100.00000000": + if re.match("stray.+_head", dentry) and dentry != "stray0_head": + return True + + return False + + def get_path(obj_id, dentry=None): + """ + What filesystem path does this object or dentry correspond to? i.e. + what should I poke to see EIO after damaging it? + """ + + if obj_id == "1.00000000" and dentry == "subdir_head": + return "./subdir" + elif obj_id == "10000000000.00000000" and dentry == "sixmegs_head": + return "./subdir/sixmegs" + + # None means ls will do an "ls -R" in hope of seeing some errors + return None + + objects = self.fs.radosmo(["ls"], stdout=StringIO()).strip().split("\n") + objects = [o for o in objects if not is_ignored(o)] + + # Find all objects with an OMAP header + omap_header_objs = [] + for o in objects: + header = self.fs.radosmo(["getomapheader", o], stdout=StringIO()) + # The rados CLI wraps the header output in a hex-printed style + header_bytes = int(re.match("header \((.+) bytes\)", header).group(1)) + if header_bytes > 0: + omap_header_objs.append(o) + + # Find all OMAP key/vals + omap_keys = [] + for o in objects: + keys_str = self.fs.radosmo(["listomapkeys", o], stdout=StringIO()) + if keys_str: + for key in keys_str.strip().split("\n"): + if not is_ignored(o, key): + omap_keys.append((o, key)) + + # Find objects that have data in their bodies + data_objects = [] + for obj_id in objects: + stat_out = self.fs.radosmo(["stat", obj_id], stdout=StringIO()) + size = int(re.match(".+, size (.+)$", stat_out).group(1)) + if size > 0: + data_objects.append(obj_id) + + # Define the various forms of damage we will inflict + class MetadataMutation(object): + def __init__(self, obj_id_, desc_, mutate_fn_, expectation_, ls_path=None): + self.obj_id = obj_id_ + self.desc = desc_ + self.mutate_fn = mutate_fn_ + self.expectation = expectation_ + if ls_path is None: + self.ls_path = "." + else: + self.ls_path = ls_path + + def __eq__(self, other): + return self.desc == other.desc + + def __hash__(self): + return hash(self.desc) + + junk = "deadbeef" * 10 + mutations = [] + + # Removals + for o in objects: + if o in [ + # JournalPointers are auto-replaced if missing (same path as upgrade) + "400.00000000", + # Missing dirfrags for non-system dirs result in empty directory + "10000000000.00000000", + # PurgeQueue is auto-created if not found on startup + "500.00000000", + # open file table is auto-created if not found on startup + "mds0_openfiles.0" + ]: + expectation = NO_DAMAGE + else: + expectation = DAMAGED_ON_START + + log.info("Expectation on rm '{0}' will be '{1}'".format( + o, expectation + )) + + mutations.append(MetadataMutation( + o, + "Delete {0}".format(o), + lambda o=o: self.fs.radosm(["rm", o]), + expectation + )) + + # Blatant corruptions + for obj_id in data_objects: + if obj_id == "500.00000000": + # purge queue corruption results in read-only FS + mutations.append(MetadataMutation( + obj_id, + "Corrupt {0}".format(obj_id), + lambda o=obj_id: self.fs.radosm(["put", o, "-"], stdin=StringIO(junk)), + READONLY + )) + else: + mutations.append(MetadataMutation( + obj_id, + "Corrupt {0}".format(obj_id), + lambda o=obj_id: self.fs.radosm(["put", o, "-"], stdin=StringIO(junk)), + DAMAGED_ON_START + )) + + # Truncations + for o in data_objects: + if o == "500.00000000": + # The PurgeQueue is allowed to be empty: Journaler interprets + # an empty header object as an empty journal. + expectation = NO_DAMAGE + else: + expectation = DAMAGED_ON_START + + mutations.append( + MetadataMutation( + o, + "Truncate {0}".format(o), + lambda o=o: self.fs.radosm(["truncate", o, "0"]), + expectation + )) + + # OMAP value corruptions + for o, k in omap_keys: + if o.startswith("100."): + # Anything in rank 0's 'mydir' + expectation = DAMAGED_ON_START + else: + expectation = EIO_ON_LS + + mutations.append( + MetadataMutation( + o, + "Corrupt omap key {0}:{1}".format(o, k), + lambda o=o,k=k: self.fs.radosm(["setomapval", o, k, junk]), + expectation, + get_path(o, k) + ) + ) + + # OMAP header corruptions + for o in omap_header_objs: + if re.match("60.\.00000000", o) \ + or o in ["1.00000000", "100.00000000", "mds0_sessionmap"]: + expectation = DAMAGED_ON_START + else: + expectation = NO_DAMAGE + + log.info("Expectation on corrupt header '{0}' will be '{1}'".format( + o, expectation + )) + + mutations.append( + MetadataMutation( + o, + "Corrupt omap header on {0}".format(o), + lambda o=o: self.fs.radosm(["setomapheader", o, junk]), + expectation + ) + ) + + results = {} + + for mutation in mutations: + log.info("Applying mutation '{0}'".format(mutation.desc)) + + # Reset MDS state + self.mount_a.umount_wait(force=True) + self.fs.fail() + self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') + + # Reset RADOS pool state + self.fs.radosm(['import', '-'], stdin=BytesIO(serialized)) + + # Inject the mutation + mutation.mutate_fn() + + # Try starting the MDS + self.fs.set_joinable() + + # How long we'll wait between starting a daemon and expecting + # it to make it through startup, and potentially declare itself + # damaged to the mon cluster. + startup_timeout = 60 + + if mutation.expectation not in (EIO_ON_LS, DAMAGED_ON_LS, NO_DAMAGE): + if mutation.expectation == DAMAGED_ON_START: + # The MDS may pass through active before making it to damaged + try: + self.wait_until_true(lambda: self.is_marked_damaged(0), startup_timeout) + except RuntimeError: + pass + + # Wait for MDS to either come up or go into damaged state + try: + self.wait_until_true(lambda: self.is_marked_damaged(0) or self.fs.are_daemons_healthy(), startup_timeout) + except RuntimeError: + crashed = False + # Didn't make it to healthy or damaged, did it crash? + for daemon_id, daemon in self.fs.mds_daemons.items(): + if daemon.proc and daemon.proc.finished: + crashed = True + log.error("Daemon {0} crashed!".format(daemon_id)) + daemon.proc = None # So that subsequent stop() doesn't raise error + if not crashed: + # Didn't go health, didn't go damaged, didn't crash, so what? + raise + else: + log.info("Result: Mutation '{0}' led to crash".format(mutation.desc)) + results[mutation] = CRASHED + continue + if self.is_marked_damaged(0): + log.info("Result: Mutation '{0}' led to DAMAGED state".format(mutation.desc)) + results[mutation] = DAMAGED_ON_START + continue + else: + log.info("Mutation '{0}' did not prevent MDS startup, attempting ls...".format(mutation.desc)) + else: + try: + self.wait_until_true(self.fs.are_daemons_healthy, 60) + except RuntimeError: + log.info("Result: Mutation '{0}' should have left us healthy, actually not.".format(mutation.desc)) + if self.is_marked_damaged(0): + results[mutation] = DAMAGED_ON_START + else: + results[mutation] = FAILED_SERVER + continue + log.info("Daemons came up after mutation '{0}', proceeding to ls".format(mutation.desc)) + + # MDS is up, should go damaged on ls or client mount + self.mount_a.mount_wait() + if mutation.ls_path == ".": + proc = self.mount_a.run_shell(["ls", "-R", mutation.ls_path], wait=False) + else: + proc = self.mount_a.stat(mutation.ls_path, wait=False) + + if mutation.expectation == DAMAGED_ON_LS: + try: + self.wait_until_true(lambda: self.is_marked_damaged(0), 60) + log.info("Result: Mutation '{0}' led to DAMAGED state after ls".format(mutation.desc)) + results[mutation] = DAMAGED_ON_LS + except RuntimeError: + if self.fs.are_daemons_healthy(): + log.error("Result: Failed to go damaged on mutation '{0}', actually went active".format( + mutation.desc)) + results[mutation] = NO_DAMAGE + else: + log.error("Result: Failed to go damaged on mutation '{0}'".format(mutation.desc)) + results[mutation] = FAILED_SERVER + elif mutation.expectation == READONLY: + proc = self.mount_a.run_shell(["mkdir", "foo"], wait=False) + try: + proc.wait() + except CommandFailedError: + stderr = proc.stderr.getvalue() + log.info(stderr) + if "Read-only file system".lower() in stderr.lower(): + pass + else: + raise + else: + try: + wait([proc], 20) + log.info("Result: Mutation '{0}' did not caused DAMAGED state".format(mutation.desc)) + results[mutation] = NO_DAMAGE + except MaxWhileTries: + log.info("Result: Failed to complete client IO on mutation '{0}'".format(mutation.desc)) + results[mutation] = FAILED_CLIENT + except CommandFailedError as e: + if e.exitstatus == errno.EIO: + log.info("Result: EIO on client") + results[mutation] = EIO_ON_LS + else: + log.info("Result: unexpected error {0} on client".format(e)) + results[mutation] = FAILED_CLIENT + + if mutation.expectation == EIO_ON_LS: + # EIOs mean something handled by DamageTable: assert that it has + # been populated + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty')) + if len(damage) == 0: + results[mutation] = EIO_NO_DAMAGE + + failures = [(mutation, result) for (mutation, result) in results.items() if mutation.expectation != result] + if failures: + log.error("{0} mutations had unexpected outcomes:".format(len(failures))) + for mutation, result in failures: + log.error(" Expected '{0}' actually '{1}' from '{2}'".format( + mutation.expectation, result, mutation.desc + )) + raise RuntimeError("{0} mutations had unexpected outcomes".format(len(failures))) + else: + log.info("All {0} mutations had expected outcomes".format(len(mutations))) + + def test_damaged_dentry(self): + # Damage to dentrys is interesting because it leaves the + # directory's `complete` flag in a subtle state where + # we have marked the dir complete in order that folks + # can access it, but in actual fact there is a dentry + # missing + self.mount_a.run_shell(["mkdir", "subdir/"]) + + self.mount_a.run_shell(["touch", "subdir/file_undamaged"]) + self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"]) + + subdir_ino = self.mount_a.path_to_ino("subdir") + + self.mount_a.umount_wait() + for mds_name in self.fs.get_active_names(): + self.fs.mds_asok(["flush", "journal"], mds_name) + + self.fs.fail() + + # Corrupt a dentry + junk = "deadbeef" * 10 + dirfrag_obj = "{0:x}.00000000".format(subdir_ino) + self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk]) + + # Start up and try to list it + self.fs.set_joinable() + self.fs.wait_for_daemons() + + self.mount_a.mount_wait() + dentries = self.mount_a.ls("subdir/") + + # The damaged guy should have disappeared + self.assertEqual(dentries, ["file_undamaged"]) + + # I should get ENOENT if I try and read it normally, because + # the dir is considered complete + try: + self.mount_a.stat("subdir/file_to_be_damaged", wait=True) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + raise AssertionError("Expected ENOENT") + + # The fact that there is damaged should have bee recorded + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 1) + damage_id = damage[0]['id'] + + # If I try to create a dentry with the same name as the damaged guy + # then that should be forbidden + try: + self.mount_a.touch("subdir/file_to_be_damaged") + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EIO) + else: + raise AssertionError("Expected EIO") + + # Attempting that touch will clear the client's complete flag, now + # when I stat it I'll get EIO instead of ENOENT + try: + self.mount_a.stat("subdir/file_to_be_damaged", wait=True) + except CommandFailedError as e: + if isinstance(self.mount_a, FuseMount): + self.assertEqual(e.exitstatus, errno.EIO) + else: + # Old kernel client handles this case differently + self.assertIn(e.exitstatus, [errno.ENOENT, errno.EIO]) + else: + raise AssertionError("Expected EIO") + + nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files") + self.assertEqual(nfiles, "2") + + self.mount_a.umount_wait() + + # Now repair the stats + scrub_json = self.fs.run_scrub(["start", "/subdir", "repair"]) + log.info(json.dumps(scrub_json, indent=2)) + + self.assertNotEqual(scrub_json, None) + self.assertEqual(scrub_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=scrub_json["scrub_tag"]), True) + + # Check that the file count is now correct + self.mount_a.mount_wait() + nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files") + self.assertEqual(nfiles, "1") + + # Clean up the omap object + self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk]) + + # Clean up the damagetable entry + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "rm", "{did}".format(did=damage_id)) + + # Now I should be able to create a file with the same name as the + # damaged guy if I want. + self.mount_a.touch("subdir/file_to_be_damaged") + + def test_open_ino_errors(self): + """ + That errors encountered during opening inos are properly propagated + """ + + self.mount_a.run_shell(["mkdir", "dir1"]) + self.mount_a.run_shell(["touch", "dir1/file1"]) + self.mount_a.run_shell(["mkdir", "dir2"]) + self.mount_a.run_shell(["touch", "dir2/file2"]) + self.mount_a.run_shell(["mkdir", "testdir"]) + self.mount_a.run_shell(["ln", "dir1/file1", "testdir/hardlink1"]) + self.mount_a.run_shell(["ln", "dir2/file2", "testdir/hardlink2"]) + + file1_ino = self.mount_a.path_to_ino("dir1/file1") + file2_ino = self.mount_a.path_to_ino("dir2/file2") + dir2_ino = self.mount_a.path_to_ino("dir2") + + # Ensure everything is written to backing store + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"]) + + # Drop everything from the MDS cache + self.fs.fail() + self.fs.journal_tool(['journal', 'reset'], 0) + self.fs.set_joinable() + self.fs.wait_for_daemons() + + self.mount_a.mount_wait() + + # Case 1: un-decodeable backtrace + + # Validate that the backtrace is present and decodable + self.fs.read_backtrace(file1_ino) + # Go corrupt the backtrace of alpha/target (used for resolving + # bravo/hardlink). + self.fs._write_data_xattr(file1_ino, "parent", "rhubarb") + + # Check that touching the hardlink gives EIO + ran = self.mount_a.run_shell(["stat", "testdir/hardlink1"], wait=False) + try: + ran.wait() + except CommandFailedError: + self.assertTrue("Input/output error" in ran.stderr.getvalue()) + + # Check that an entry is created in the damage table + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 1) + self.assertEqual(damage[0]['damage_type'], "backtrace") + self.assertEqual(damage[0]['ino'], file1_ino) + + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "rm", str(damage[0]['id'])) + + + # Case 2: missing dirfrag for the target inode + + self.fs.radosm(["rm", "{0:x}.00000000".format(dir2_ino)]) + + # Check that touching the hardlink gives EIO + ran = self.mount_a.run_shell(["stat", "testdir/hardlink2"], wait=False) + try: + ran.wait() + except CommandFailedError: + self.assertTrue("Input/output error" in ran.stderr.getvalue()) + + # Check that an entry is created in the damage table + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 2) + if damage[0]['damage_type'] == "backtrace" : + self.assertEqual(damage[0]['ino'], file2_ino) + self.assertEqual(damage[1]['damage_type'], "dir_frag") + self.assertEqual(damage[1]['ino'], dir2_ino) + else: + self.assertEqual(damage[0]['damage_type'], "dir_frag") + self.assertEqual(damage[0]['ino'], dir2_ino) + self.assertEqual(damage[1]['damage_type'], "backtrace") + self.assertEqual(damage[1]['ino'], file2_ino) + + for entry in damage: + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "rm", str(entry['id'])) + + def test_dentry_first_existing(self): + """ + That the MDS won't abort when the dentry is already known to be damaged. + """ + + def verify_corrupt(): + info = self.fs.read_cache("/a", 0) + log.debug('%s', info) + self.assertEqual(len(info), 1) + dirfrags = info[0]['dirfrags'] + self.assertEqual(len(dirfrags), 1) + dentries = dirfrags[0]['dentries'] + self.assertEqual([dn['path'] for dn in dentries if dn['is_primary']], ['a/c']) + self.assertEqual(dentries[0]['snap_first'], 18446744073709551606) # SNAP_HEAD + + self.mount_a.run_shell_payload("mkdir -p a/b") + self.fs.flush() + self.config_set("mds", "mds_abort_on_newly_corrupt_dentry", False) + self.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0") + time.sleep(5) # for conf to percolate + self.mount_a.run_shell_payload("mv a/b a/c; sync .") + self.mount_a.umount() + verify_corrupt() + self.fs.fail() + self.config_rm("mds", "mds_inject_rename_corrupt_dentry_first") + self.config_set("mds", "mds_abort_on_newly_corrupt_dentry", False) + self.fs.set_joinable() + status = self.fs.status() + self.fs.flush() + self.assertFalse(self.fs.status().hadfailover(status)) + verify_corrupt() + + def test_dentry_first_preflush(self): + """ + That the MDS won't write a dentry with new damage to CDentry::first + to the journal. + """ + + rank0 = self.fs.get_rank() + self.fs.rank_freeze(True, rank=0) + self.mount_a.run_shell_payload("mkdir -p a/{b,c}/d") + self.fs.flush() + self.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0") + time.sleep(5) # for conf to percolate + with self.assert_cluster_log("MDS abort because newly corrupt dentry"): + p = self.mount_a.run_shell_payload("timeout 60 mv a/b a/z", wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout) + self.config_rm("mds", "mds_inject_rename_corrupt_dentry_first") + self.fs.rank_freeze(False, rank=0) + self.delete_mds_coredump(rank0['name']) + self.fs.mds_restart(rank0['name']) + self.fs.wait_for_daemons() + p.wait() + self.mount_a.run_shell_payload("stat a/ && find a/") + self.fs.flush() + + def test_dentry_first_precommit(self): + """ + That the MDS won't write a dentry with new damage to CDentry::first + to the directory object. + """ + + fscid = self.fs.id + self.mount_a.run_shell_payload("mkdir -p a/{b,c}/d; sync .") + self.mount_a.umount() # allow immediate scatter write back + self.fs.flush() + # now just twiddle some inode metadata on a regular file + self.mount_a.mount_wait() + self.mount_a.run_shell_payload("chmod 711 a/b/d; sync .") + self.mount_a.umount() # avoid journaling session related things + # okay, now cause the dentry to get damaged after loading from the journal + self.fs.fail() + self.config_set("mds", "mds_inject_journal_corrupt_dentry_first", "1.0") + time.sleep(5) # for conf to percolate + self.fs.set_joinable() + self.fs.wait_for_daemons() + rank0 = self.fs.get_rank() + self.fs.rank_freeze(True, rank=0) + # so now we want to trigger commit but this will crash, so: + with self.assert_cluster_log("MDS abort because newly corrupt dentry"): + c = ['--connect-timeout=60', 'tell', f"mds.{fscid}:0", "flush", "journal"] + p = self.ceph_cluster.mon_manager.run_cluster_cmd(args=c, wait=False, timeoutcmd=30) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout) + self.config_rm("mds", "mds_inject_journal_corrupt_dentry_first") + self.fs.rank_freeze(False, rank=0) + self.delete_mds_coredump(rank0['name']) + self.fs.mds_restart(rank0['name']) + self.fs.wait_for_daemons() + try: + p.wait() + except CommandFailedError as e: + print(e) + else: + self.fail("flush journal should fail!") + self.mount_a.mount_wait() + self.mount_a.run_shell_payload("stat a/ && find a/") + self.fs.flush() diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py new file mode 100644 index 000000000..9a93bd622 --- /dev/null +++ b/qa/tasks/cephfs/test_data_scan.py @@ -0,0 +1,796 @@ + +""" +Test our tools for recovering metadata from the data pool +""" +import json + +import logging +import os +import time +import traceback +import stat + +from io import BytesIO, StringIO +from collections import namedtuple, defaultdict +from textwrap import dedent + +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology + +log = logging.getLogger(__name__) + + +ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) + + +class Workload(object): + def __init__(self, filesystem, mount): + self._mount = mount + self._filesystem = filesystem + self._initial_state = None + + # Accumulate backtraces for every failed validation, and return them. Backtraces + # are rather verbose, but we only see them when something breaks, and they + # let us see which check failed without having to decorate each check with + # a string + self._errors = [] + + def assert_equal(self, a, b): + try: + if a != b: + raise AssertionError("{0} != {1}".format(a, b)) + except AssertionError as e: + self._errors.append( + ValidationError(e, traceback.format_exc(3)) + ) + + def assert_not_equal(self, a, b): + try: + if a == b: + raise AssertionError("{0} == {1}".format(a, b)) + except AssertionError as e: + self._errors.append( + ValidationError(e, traceback.format_exc(3)) + ) + + def assert_true(self, a): + try: + if not a: + raise AssertionError("{0} is not true".format(a)) + except AssertionError as e: + self._errors.append( + ValidationError(e, traceback.format_exc(3)) + ) + + def write(self): + """ + Write the workload files to the mount + """ + raise NotImplementedError() + + def validate(self): + """ + Read from the mount and validate that the workload files are present (i.e. have + survived or been reconstructed from the test scenario) + """ + raise NotImplementedError() + + def damage(self): + """ + Damage the filesystem pools in ways that will be interesting to recover from. By + default just wipe everything in the metadata pool + """ + # Delete every object in the metadata pool + pool = self._filesystem.get_metadata_pool_name() + self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it']) + + def flush(self): + """ + Called after client unmount, after write: flush whatever you want + """ + self._filesystem.mds_asok(["flush", "journal"]) + + def scrub(self): + """ + Called as a final step post recovery before verification. Right now, this + doesn't bother if errors are found in scrub - just that the MDS doesn't + crash and burn during scrub. + """ + out_json = self._filesystem.run_scrub(["start", "/", "repair,recursive"]) + self.assert_not_equal(out_json, None) + self.assert_equal(out_json["return_code"], 0) + self.assert_equal(self._filesystem.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + +class SimpleWorkload(Workload): + """ + Single file, single directory, check that it gets recovered and so does its size + """ + def write(self): + self._mount.run_shell(["mkdir", "subdir"]) + self._mount.write_n_mb("subdir/sixmegs", 6) + self._initial_state = self._mount.stat("subdir/sixmegs") + + def validate(self): + self._mount.run_shell(["sudo", "ls", "subdir"], omit_sudo=False) + st = self._mount.stat("subdir/sixmegs", sudo=True) + self.assert_equal(st['st_size'], self._initial_state['st_size']) + return self._errors + + +class SymlinkWorkload(Workload): + """ + Symlink file, check that it gets recovered as symlink + """ + def write(self): + self._mount.run_shell(["mkdir", "symdir"]) + self._mount.write_n_mb("symdir/onemegs", 1) + self._mount.run_shell(["ln", "-s", "onemegs", "symdir/symlink_onemegs"]) + self._mount.run_shell(["ln", "-s", "symdir/onemegs", "symlink1_onemegs"]) + + def validate(self): + self._mount.run_shell(["sudo", "ls", "symdir"], omit_sudo=False) + st = self._mount.lstat("symdir/symlink_onemegs") + self.assert_true(stat.S_ISLNK(st['st_mode'])) + target = self._mount.readlink("symdir/symlink_onemegs") + self.assert_equal(target, "onemegs") + + st = self._mount.lstat("symlink1_onemegs") + self.assert_true(stat.S_ISLNK(st['st_mode'])) + target = self._mount.readlink("symlink1_onemegs") + self.assert_equal(target, "symdir/onemegs") + return self._errors + + +class MovedFile(Workload): + def write(self): + # Create a file whose backtrace disagrees with his eventual position + # in the metadata. We will see that he gets reconstructed in his + # original position according to his backtrace. + self._mount.run_shell(["mkdir", "subdir_alpha"]) + self._mount.run_shell(["mkdir", "subdir_bravo"]) + self._mount.write_n_mb("subdir_alpha/sixmegs", 6) + self._filesystem.mds_asok(["flush", "journal"]) + self._mount.run_shell(["mv", "subdir_alpha/sixmegs", "subdir_bravo/sixmegs"]) + self._initial_state = self._mount.stat("subdir_bravo/sixmegs") + + def flush(self): + pass + + def validate(self): + self.assert_equal(self._mount.ls(sudo=True), ["subdir_alpha"]) + st = self._mount.stat("subdir_alpha/sixmegs", sudo=True) + self.assert_equal(st['st_size'], self._initial_state['st_size']) + return self._errors + + +class BacktracelessFile(Workload): + def write(self): + self._mount.run_shell(["mkdir", "subdir"]) + self._mount.write_n_mb("subdir/sixmegs", 6) + self._initial_state = self._mount.stat("subdir/sixmegs") + + def flush(self): + # Never flush metadata, so backtrace won't be written + pass + + def validate(self): + ino_name = "%x" % self._initial_state["st_ino"] + + # The inode should be linked into lost+found because we had no path for it + self.assert_equal(self._mount.ls(sudo=True), ["lost+found"]) + self.assert_equal(self._mount.ls("lost+found", sudo=True), [ino_name]) + st = self._mount.stat(f"lost+found/{ino_name}", sudo=True) + + # We might not have got the name or path, but we should still get the size + self.assert_equal(st['st_size'], self._initial_state['st_size']) + + # remove the entry from lost+found directory + self._mount.run_shell(["sudo", "rm", "-f", f'lost+found/{ino_name}'], omit_sudo=False) + self.assert_equal(self._mount.ls("lost+found", sudo=True), []) + + return self._errors + + +class StripedStashedLayout(Workload): + def __init__(self, fs, m, pool=None): + super(StripedStashedLayout, self).__init__(fs, m) + + # Nice small stripes so we can quickly do our writes+validates + self.sc = 4 + self.ss = 65536 + self.os = 262144 + self.pool = pool and pool or self._filesystem.get_data_pool_name() + + self.interesting_sizes = [ + # Exactly stripe_count objects will exist + self.os * self.sc, + # Fewer than stripe_count objects will exist + self.os * self.sc // 2, + self.os * (self.sc - 1) + self.os // 2, + self.os * (self.sc - 1) + self.os // 2 - 1, + self.os * (self.sc + 1) + self.os // 2, + self.os * (self.sc + 1) + self.os // 2 + 1, + # More than stripe_count objects will exist + self.os * self.sc + self.os * self.sc // 2 + ] + + def write(self): + # Create a dir with a striped layout set on it + self._mount.run_shell(["mkdir", "stripey"]) + + self._mount.setfattr("./stripey", "ceph.dir.layout", + "stripe_unit={ss} stripe_count={sc} object_size={os} pool={pool}".format( + ss=self.ss, os=self.os, sc=self.sc, pool=self.pool + )) + + # Write files, then flush metadata so that its layout gets written into an xattr + for i, n_bytes in enumerate(self.interesting_sizes): + self._mount.write_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes) + # This is really just validating the validator + self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes) + self._filesystem.mds_asok(["flush", "journal"]) + + # Write another file in the same way, but this time don't flush the metadata, + # so that it won't have the layout xattr + self._mount.write_test_pattern("stripey/unflushed_file", 1024 * 512) + self._mount.validate_test_pattern("stripey/unflushed_file", 1024 * 512) + + self._initial_state = { + "unflushed_ino": self._mount.path_to_ino("stripey/unflushed_file") + } + + def flush(self): + # Pass because we already selectively flushed during write + pass + + def validate(self): + # The first files should have been recovered into its original location + # with the correct layout: read back correct data + for i, n_bytes in enumerate(self.interesting_sizes): + try: + self._mount.validate_test_pattern("stripey/flushed_file_{0}".format(i), n_bytes) + except CommandFailedError as e: + self._errors.append( + ValidationError("File {0} (size {1}): {2}".format(i, n_bytes, e), traceback.format_exc(3)) + ) + + # The unflushed file should have been recovered into lost+found without + # the correct layout: read back junk + ino_name = "%x" % self._initial_state["unflushed_ino"] + self.assert_equal(self._mount.ls("lost+found", sudo=True), [ino_name]) + try: + self._mount.validate_test_pattern(os.path.join("lost+found", ino_name), 1024 * 512) + except CommandFailedError: + pass + else: + self._errors.append( + ValidationError("Unexpectedly valid data in unflushed striped file", "") + ) + + return self._errors + + +class ManyFilesWorkload(Workload): + def __init__(self, filesystem, mount, file_count): + super(ManyFilesWorkload, self).__init__(filesystem, mount) + self.file_count = file_count + + def write(self): + self._mount.run_shell(["mkdir", "subdir"]) + for n in range(0, self.file_count): + self._mount.write_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024) + + def validate(self): + for n in range(0, self.file_count): + try: + self._mount.validate_test_pattern("subdir/{0}".format(n), 6 * 1024 * 1024) + except CommandFailedError as e: + self._errors.append( + ValidationError("File {0}: {1}".format(n, e), traceback.format_exc(3)) + ) + + return self._errors + + +class MovedDir(Workload): + def write(self): + # Create a nested dir that we will then move. Two files with two different + # backtraces referring to the moved dir, claiming two different locations for + # it. We will see that only one backtrace wins and the dir ends up with + # single linkage. + self._mount.run_shell(["mkdir", "-p", "grandmother/parent"]) + self._mount.write_n_mb("grandmother/parent/orig_pos_file", 1) + self._filesystem.mds_asok(["flush", "journal"]) + self._mount.run_shell(["mkdir", "grandfather"]) + self._mount.run_shell(["mv", "grandmother/parent", "grandfather"]) + self._mount.write_n_mb("grandfather/parent/new_pos_file", 2) + self._filesystem.mds_asok(["flush", "journal"]) + + self._initial_state = ( + self._mount.stat("grandfather/parent/orig_pos_file"), + self._mount.stat("grandfather/parent/new_pos_file") + ) + + def validate(self): + root_files = self._mount.ls() + self.assert_equal(len(root_files), 1) + self.assert_equal(root_files[0] in ["grandfather", "grandmother"], True) + winner = root_files[0] + st_opf = self._mount.stat(f"{winner}/parent/orig_pos_file", sudo=True) + st_npf = self._mount.stat(f"{winner}/parent/new_pos_file", sudo=True) + + self.assert_equal(st_opf['st_size'], self._initial_state[0]['st_size']) + self.assert_equal(st_npf['st_size'], self._initial_state[1]['st_size']) + + +class MissingZerothObject(Workload): + def write(self): + self._mount.run_shell(["mkdir", "subdir"]) + self._mount.write_n_mb("subdir/sixmegs", 6) + self._initial_state = self._mount.stat("subdir/sixmegs") + + def damage(self): + super(MissingZerothObject, self).damage() + zeroth_id = "{0:x}.00000000".format(self._initial_state['st_ino']) + self._filesystem.rados(["rm", zeroth_id], pool=self._filesystem.get_data_pool_name()) + + def validate(self): + ino = self._initial_state['st_ino'] + st = self._mount.stat(f"lost+found/{ino:x}", sudo=True) + self.assert_equal(st['st_size'], self._initial_state['st_size']) + + +class NonDefaultLayout(Workload): + """ + Check that the reconstruction copes with files that have a different + object size in their layout + """ + def write(self): + self._mount.run_shell(["touch", "datafile"]) + self._mount.setfattr("./datafile", "ceph.file.layout.object_size", "8388608") + self._mount.run_shell(["dd", "if=/dev/urandom", "of=./datafile", "bs=1M", "count=32"]) + self._initial_state = self._mount.stat("datafile") + + def validate(self): + # Check we got the layout reconstructed properly + object_size = int(self._mount.getfattr("./datafile", "ceph.file.layout.object_size", sudo=True)) + self.assert_equal(object_size, 8388608) + + # Check we got the file size reconstructed properly + st = self._mount.stat("datafile", sudo=True) + self.assert_equal(st['st_size'], self._initial_state['st_size']) + + +class TestDataScan(CephFSTestCase): + MDSS_REQUIRED = 2 + + def is_marked_damaged(self, rank): + mds_map = self.fs.get_mds_map() + return rank in mds_map['damaged'] + + def _rebuild_metadata(self, workload, workers=1): + """ + That when all objects in metadata pool are removed, we can rebuild a metadata pool + based on the contents of a data pool, and a client can see and read our files. + """ + + # First, inject some files + + workload.write() + + # Unmount the client and flush the journal: the tool should also cope with + # situations where there is dirty metadata, but we'll test that separately + self.mount_a.umount_wait() + workload.flush() + + # Stop the MDS + self.fs.fail() + + # After recovery, we need the MDS to not be strict about stats (in production these options + # are off by default, but in QA we need to explicitly disable them) + self.fs.set_ceph_conf('mds', 'mds verify scatter', False) + self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) + + # Apply any data damage the workload wants + workload.damage() + + # Reset the MDS map in case multiple ranks were in play: recovery procedure + # only understands how to rebuild metadata under rank 0 + self.fs.reset() + + self.fs.set_joinable() # redundant with reset + + def get_state(mds_id): + info = self.mds_cluster.get_mds_info(mds_id) + return info['state'] if info is not None else None + + self.wait_until_true(lambda: self.is_marked_damaged(0), 60) + for mds_id in self.fs.mds_ids: + self.wait_until_equal( + lambda: get_state(mds_id), + "up:standby", + timeout=60) + + self.fs.table_tool([self.fs.name + ":0", "reset", "session"]) + self.fs.table_tool([self.fs.name + ":0", "reset", "snap"]) + self.fs.table_tool([self.fs.name + ":0", "reset", "inode"]) + + # Run the recovery procedure + if False: + with self.assertRaises(CommandFailedError): + # Normal reset should fail when no objects are present, we'll use --force instead + self.fs.journal_tool(["journal", "reset"], 0) + + self.fs.journal_tool(["journal", "reset", "--force"], 0) + self.fs.data_scan(["init"]) + self.fs.data_scan(["scan_extents"], worker_count=workers) + self.fs.data_scan(["scan_inodes"], worker_count=workers) + self.fs.data_scan(["scan_links"]) + + # Mark the MDS repaired + self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') + + # Start the MDS + self.fs.mds_restart() + self.fs.wait_for_daemons() + log.info(str(self.mds_cluster.status())) + + # Mount a client + self.mount_a.mount_wait() + + # run scrub as it is recommended post recovery for most + # (if not all) recovery mechanisms. + workload.scrub() + + # See that the files are present and correct + errors = workload.validate() + if errors: + log.error("Validation errors found: {0}".format(len(errors))) + for e in errors: + log.error(e.exception) + log.error(e.backtrace) + raise AssertionError("Validation failed, first error: {0}\n{1}".format( + errors[0].exception, errors[0].backtrace + )) + + def test_rebuild_simple(self): + self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a)) + + def test_rebuild_symlink(self): + self._rebuild_metadata(SymlinkWorkload(self.fs, self.mount_a)) + + def test_rebuild_moved_file(self): + self._rebuild_metadata(MovedFile(self.fs, self.mount_a)) + + def test_rebuild_backtraceless(self): + self._rebuild_metadata(BacktracelessFile(self.fs, self.mount_a)) + + def test_rebuild_moved_dir(self): + self._rebuild_metadata(MovedDir(self.fs, self.mount_a)) + + def test_rebuild_missing_zeroth(self): + self._rebuild_metadata(MissingZerothObject(self.fs, self.mount_a)) + + def test_rebuild_nondefault_layout(self): + self._rebuild_metadata(NonDefaultLayout(self.fs, self.mount_a)) + + def test_stashed_layout(self): + self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a)) + + def _dirfrag_keys(self, object_id): + keys_str = self.fs.radosmo(["listomapkeys", object_id], stdout=StringIO()) + if keys_str: + return keys_str.strip().split("\n") + else: + return [] + + def test_fragmented_injection(self): + """ + That when injecting a dentry into a fragmented directory, we put it in the right fragment. + """ + + file_count = 100 + file_names = ["%s" % n for n in range(0, file_count)] + + # Make sure and disable dirfrag auto merging and splitting + self.fs.set_ceph_conf('mds', 'mds bal merge size', 0) + self.fs.set_ceph_conf('mds', 'mds bal split size', 100 * file_count) + + # Create a directory of `file_count` files, each named after its + # decimal number and containing the string of its decimal number + self.mount_a.run_python(dedent(""" + import os + path = os.path.join("{path}", "subdir") + os.mkdir(path) + for n in range(0, {file_count}): + open(os.path.join(path, "%s" % n), 'w').write("%s" % n) + """.format( + path=self.mount_a.mountpoint, + file_count=file_count + ))) + + dir_ino = self.mount_a.path_to_ino("subdir") + + # Only one MDS should be active! + self.assertEqual(len(self.fs.get_active_names()), 1) + + # Ensure that one directory is fragmented + mds_id = self.fs.get_active_names()[0] + self.fs.mds_asok(["dirfrag", "split", "/subdir", "0/0", "1"], mds_id) + + # Flush journal and stop MDS + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"], mds_id) + self.fs.fail() + + # Pick a dentry and wipe out its key + # Because I did a 1 bit split, I know one frag will be named <inode>.01000000 + frag_obj_id = "{0:x}.01000000".format(dir_ino) + keys = self._dirfrag_keys(frag_obj_id) + victim_key = keys[7] # arbitrary choice + log.info("victim_key={0}".format(victim_key)) + victim_dentry = victim_key.split("_head")[0] + self.fs.radosm(["rmomapkey", frag_obj_id, victim_key]) + + # Start filesystem back up, observe that the file appears to be gone in an `ls` + self.fs.set_joinable() + self.fs.wait_for_daemons() + self.mount_a.mount_wait() + files = self.mount_a.run_shell(["ls", "subdir/"]).stdout.getvalue().strip().split("\n") + self.assertListEqual(sorted(files), sorted(list(set(file_names) - set([victim_dentry])))) + + # Stop the filesystem + self.mount_a.umount_wait() + self.fs.fail() + + # Run data-scan, observe that it inserts our dentry back into the correct fragment + # by checking the omap now has the dentry's key again + self.fs.data_scan(["scan_extents"]) + self.fs.data_scan(["scan_inodes"]) + self.fs.data_scan(["scan_links"]) + self.assertIn(victim_key, self._dirfrag_keys(frag_obj_id)) + + # Start the filesystem and check that the dentry we deleted is now once again visible + # and points to the correct file data. + self.fs.set_joinable() + self.fs.wait_for_daemons() + self.mount_a.mount_wait() + self.mount_a.run_shell(["ls", "-l", "subdir/"]) # debugging + # Use sudo because cephfs-data-scan will reinsert the dentry with root ownership, it can't know the real owner. + out = self.mount_a.run_shell_payload(f"sudo cat subdir/{victim_dentry}", omit_sudo=False).stdout.getvalue().strip() + self.assertEqual(out, victim_dentry) + + # Finally, close the loop by checking our injected dentry survives a merge + mds_id = self.fs.get_active_names()[0] + self.mount_a.ls("subdir") # Do an ls to ensure both frags are in cache so the merge will work + self.fs.mds_asok(["dirfrag", "merge", "/subdir", "0/0"], mds_id) + self.fs.mds_asok(["flush", "journal"], mds_id) + frag_obj_id = "{0:x}.00000000".format(dir_ino) + keys = self._dirfrag_keys(frag_obj_id) + self.assertListEqual(sorted(keys), sorted(["%s_head" % f for f in file_names])) + + # run scrub to update and make sure rstat.rbytes info in subdir inode and dirfrag + # are matched + out_json = self.fs.run_scrub(["start", "/subdir", "repair,recursive"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + # Remove the whole 'sudbdir' directory + self.mount_a.run_shell(["rm", "-rf", "subdir/"]) + + @for_teuthology + def test_parallel_execution(self): + self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7) + + def test_pg_files(self): + """ + That the pg files command tells us which files are associated with + a particular PG + """ + file_count = 20 + self.mount_a.run_shell(["mkdir", "mydir"]) + self.mount_a.create_n_files("mydir/myfile", file_count) + + # Some files elsewhere in the system that we will ignore + # to check that the tool is filtering properly + self.mount_a.run_shell(["mkdir", "otherdir"]) + self.mount_a.create_n_files("otherdir/otherfile", file_count) + + pgs_to_files = defaultdict(list) + # Rough (slow) reimplementation of the logic + for i in range(0, file_count): + file_path = "mydir/myfile_{0}".format(i) + ino = self.mount_a.path_to_ino(file_path) + obj = "{0:x}.{1:08x}".format(ino, 0) + pgid = json.loads(self.fs.mon_manager.raw_cluster_cmd( + "osd", "map", self.fs.get_data_pool_name(), obj, + "--format=json-pretty" + ))['pgid'] + pgs_to_files[pgid].append(file_path) + log.info("{0}: {1}".format(file_path, pgid)) + + pg_count = self.fs.get_pool_pg_num(self.fs.get_data_pool_name()) + for pg_n in range(0, pg_count): + pg_str = "{0}.{1:x}".format(self.fs.get_data_pool_id(), pg_n) + out = self.fs.data_scan(["pg_files", "mydir", pg_str]) + lines = [l for l in out.split("\n") if l] + log.info("{0}: {1}".format(pg_str, lines)) + self.assertSetEqual(set(lines), set(pgs_to_files[pg_str])) + + def test_rebuild_linkage(self): + """ + The scan_links command fixes linkage errors + """ + self.mount_a.run_shell(["mkdir", "testdir1"]) + self.mount_a.run_shell(["mkdir", "testdir2"]) + dir1_ino = self.mount_a.path_to_ino("testdir1") + dir2_ino = self.mount_a.path_to_ino("testdir2") + dirfrag1_oid = "{0:x}.00000000".format(dir1_ino) + dirfrag2_oid = "{0:x}.00000000".format(dir2_ino) + + self.mount_a.run_shell(["touch", "testdir1/file1"]) + self.mount_a.run_shell(["ln", "testdir1/file1", "testdir1/link1"]) + self.mount_a.run_shell(["ln", "testdir1/file1", "testdir2/link2"]) + + mds_id = self.fs.get_active_names()[0] + self.fs.mds_asok(["flush", "journal"], mds_id) + + dirfrag1_keys = self._dirfrag_keys(dirfrag1_oid) + + # introduce duplicated primary link + file1_key = "file1_head" + self.assertIn(file1_key, dirfrag1_keys) + file1_omap_data = self.fs.radosmo(["getomapval", dirfrag1_oid, file1_key, '-']) + self.fs.radosm(["setomapval", dirfrag2_oid, file1_key], stdin=BytesIO(file1_omap_data)) + self.assertIn(file1_key, self._dirfrag_keys(dirfrag2_oid)) + + # remove a remote link, make inode link count incorrect + link1_key = 'link1_head' + self.assertIn(link1_key, dirfrag1_keys) + self.fs.radosm(["rmomapkey", dirfrag1_oid, link1_key]) + + # increase good primary link's version + self.mount_a.run_shell(["touch", "testdir1/file1"]) + self.mount_a.umount_wait() + + self.fs.mds_asok(["flush", "journal"], mds_id) + self.fs.fail() + + # repair linkage errors + self.fs.data_scan(["scan_links"]) + + # primary link in testdir2 was deleted? + self.assertNotIn(file1_key, self._dirfrag_keys(dirfrag2_oid)) + + self.fs.set_joinable() + self.fs.wait_for_daemons() + + self.mount_a.mount_wait() + + # link count was adjusted? + file1_nlink = self.mount_a.path_to_nlink("testdir1/file1") + self.assertEqual(file1_nlink, 2) + + out_json = self.fs.run_scrub(["start", "/testdir1", "repair,recursive"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + def test_rebuild_inotable(self): + """ + The scan_links command repair inotables + """ + self.fs.set_max_mds(2) + self.fs.wait_for_daemons() + + active_mds_names = self.fs.get_active_names() + mds0_id = active_mds_names[0] + mds1_id = active_mds_names[1] + + self.mount_a.run_shell(["mkdir", "dir1"]) + dir_ino = self.mount_a.path_to_ino("dir1") + self.mount_a.setfattr("dir1", "ceph.dir.pin", "1") + # wait for subtree migration + + file_ino = 0; + while True: + time.sleep(1) + # allocate an inode from mds.1 + self.mount_a.run_shell(["touch", "dir1/file1"]) + file_ino = self.mount_a.path_to_ino("dir1/file1") + if file_ino >= (2 << 40): + break + self.mount_a.run_shell(["rm", "-f", "dir1/file1"]) + + self.mount_a.umount_wait() + + self.fs.mds_asok(["flush", "journal"], mds0_id) + self.fs.mds_asok(["flush", "journal"], mds1_id) + self.fs.fail() + + self.fs.radosm(["rm", "mds0_inotable"]) + self.fs.radosm(["rm", "mds1_inotable"]) + + self.fs.data_scan(["scan_links", "--filesystem", self.fs.name]) + + mds0_inotable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "inode"])) + self.assertGreaterEqual( + mds0_inotable['0']['data']['inotable']['free'][0]['start'], dir_ino) + + mds1_inotable = json.loads(self.fs.table_tool([self.fs.name + ":1", "show", "inode"])) + self.assertGreaterEqual( + mds1_inotable['1']['data']['inotable']['free'][0]['start'], file_ino) + + self.fs.set_joinable() + self.fs.wait_for_daemons() + + out_json = self.fs.run_scrub(["start", "/dir1", "repair,recursive"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + def test_rebuild_snaptable(self): + """ + The scan_links command repair snaptable + """ + self.fs.set_allow_new_snaps(True) + + self.mount_a.run_shell(["mkdir", "dir1"]) + self.mount_a.run_shell(["mkdir", "dir1/.snap/s1"]) + self.mount_a.run_shell(["mkdir", "dir1/.snap/s2"]) + self.mount_a.run_shell(["rmdir", "dir1/.snap/s2"]) + + self.mount_a.umount_wait() + + mds0_id = self.fs.get_active_names()[0] + self.fs.mds_asok(["flush", "journal"], mds0_id) + + # wait for mds to update removed snaps + time.sleep(10) + + old_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"])) + # stamps may have minor difference + for item in old_snaptable['snapserver']['snaps']: + del item['stamp'] + + self.fs.radosm(["rm", "mds_snaptable"]) + self.fs.data_scan(["scan_links", "--filesystem", self.fs.name]) + + new_snaptable = json.loads(self.fs.table_tool([self.fs.name + ":0", "show", "snap"])) + for item in new_snaptable['snapserver']['snaps']: + del item['stamp'] + self.assertGreaterEqual( + new_snaptable['snapserver']['last_snap'], old_snaptable['snapserver']['last_snap']) + self.assertEqual( + new_snaptable['snapserver']['snaps'], old_snaptable['snapserver']['snaps']) + + out_json = self.fs.run_scrub(["start", "/dir1", "repair,recursive"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + def _prepare_extra_data_pool(self, set_root_layout=True): + extra_data_pool_name = self.fs.get_data_pool_name() + '_extra' + self.fs.add_data_pool(extra_data_pool_name) + if set_root_layout: + self.mount_a.setfattr(".", "ceph.dir.layout.pool", + extra_data_pool_name) + return extra_data_pool_name + + def test_extra_data_pool_rebuild_simple(self): + self._prepare_extra_data_pool() + self._rebuild_metadata(SimpleWorkload(self.fs, self.mount_a)) + + def test_extra_data_pool_rebuild_few_files(self): + self._prepare_extra_data_pool() + self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 5), workers=1) + + @for_teuthology + def test_extra_data_pool_rebuild_many_files_many_workers(self): + self._prepare_extra_data_pool() + self._rebuild_metadata(ManyFilesWorkload(self.fs, self.mount_a, 25), workers=7) + + def test_extra_data_pool_stashed_layout(self): + pool_name = self._prepare_extra_data_pool(False) + self._rebuild_metadata(StripedStashedLayout(self.fs, self.mount_a, pool_name)) diff --git a/qa/tasks/cephfs/test_dump_tree.py b/qa/tasks/cephfs/test_dump_tree.py new file mode 100644 index 000000000..48a2c6f00 --- /dev/null +++ b/qa/tasks/cephfs/test_dump_tree.py @@ -0,0 +1,66 @@ +from tasks.cephfs.cephfs_test_case import CephFSTestCase +import random +import os + +class TestDumpTree(CephFSTestCase): + def get_paths_to_ino(self): + inos = {} + p = self.mount_a.run_shell(["find", "./"]) + paths = p.stdout.getvalue().strip().split() + for path in paths: + inos[path] = self.mount_a.path_to_ino(path, False) + + return inos + + def populate(self): + self.mount_a.run_shell(["git", "clone", + "https://github.com/ceph/ceph-qa-suite"]) + + def test_basic(self): + self.mount_a.run_shell(["mkdir", "parent"]) + self.mount_a.run_shell(["mkdir", "parent/child"]) + self.mount_a.run_shell(["touch", "parent/child/file"]) + self.mount_a.run_shell(["mkdir", "parent/child/grandchild"]) + self.mount_a.run_shell(["touch", "parent/child/grandchild/file"]) + + inos = self.get_paths_to_ino() + tree = self.fs.mds_asok(["dump", "tree", "/parent/child", "1"]) + + target_inos = [inos["./parent/child"], inos["./parent/child/file"], + inos["./parent/child/grandchild"]] + + for ino in tree: + del target_inos[target_inos.index(ino['ino'])] # don't catch! + + assert(len(target_inos) == 0) + + def test_random(self): + random.seed(0) + + self.populate() + inos = self.get_paths_to_ino() + target = random.sample(inos.keys(), 1)[0] + + if target != "./": + target = os.path.dirname(target) + + subtree = [path for path in inos.keys() if path.startswith(target)] + target_inos = [inos[path] for path in subtree] + tree = self.fs.mds_asok(["dump", "tree", target[1:]]) + + for ino in tree: + del target_inos[target_inos.index(ino['ino'])] # don't catch! + + assert(len(target_inos) == 0) + + target_depth = target.count('/') + maxdepth = max([path.count('/') for path in subtree]) - target_depth + depth = random.randint(0, maxdepth) + target_inos = [inos[path] for path in subtree \ + if path.count('/') <= depth + target_depth] + tree = self.fs.mds_asok(["dump", "tree", target[1:], str(depth)]) + + for ino in tree: + del target_inos[target_inos.index(ino['ino'])] # don't catch! + + assert(len(target_inos) == 0) diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py new file mode 100644 index 000000000..4b7e884ec --- /dev/null +++ b/qa/tasks/cephfs/test_exports.py @@ -0,0 +1,582 @@ +import logging +import random +import time +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError + +log = logging.getLogger(__name__) + +class TestExports(CephFSTestCase): + MDSS_REQUIRED = 2 + CLIENTS_REQUIRED = 2 + + def test_session_race(self): + """ + Test session creation race. + + See: https://tracker.ceph.com/issues/24072#change-113056 + """ + + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + rank1 = self.fs.get_rank(rank=1, status=status) + + # Create a directory that is pre-exported to rank 1 + self.mount_a.run_shell(["mkdir", "-p", "a/aa"]) + self.mount_a.setfattr("a", "ceph.dir.pin", "1") + self._wait_subtrees([('/a', 1)], status=status, rank=1) + + # Now set the mds config to allow the race + self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "true"], rank=1) + + # Now create another directory and try to export it + self.mount_b.run_shell(["mkdir", "-p", "b/bb"]) + self.mount_b.setfattr("b", "ceph.dir.pin", "1") + + time.sleep(5) + + # Now turn off the race so that it doesn't wait again + self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "false"], rank=1) + + # Now try to create a session with rank 1 by accessing a dir known to + # be there, if buggy, this should cause the rank 1 to crash: + self.mount_b.run_shell(["ls", "a"]) + + # Check if rank1 changed (standby tookover?) + new_rank1 = self.fs.get_rank(rank=1) + self.assertEqual(rank1['gid'], new_rank1['gid']) + +class TestExportPin(CephFSTestCase): + MDSS_REQUIRED = 3 + CLIENTS_REQUIRED = 1 + + def setUp(self): + CephFSTestCase.setUp(self) + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + self.mount_a.run_shell_payload("mkdir -p 1/2/3/4") + + def test_noop(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "-1") + time.sleep(30) # for something to not happen + self._wait_subtrees([], status=self.status) + + def test_negative(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "-2341") + time.sleep(30) # for something to not happen + self._wait_subtrees([], status=self.status) + + def test_empty_pin(self): + self.mount_a.setfattr("1/2/3/4", "ceph.dir.pin", "1") + time.sleep(30) # for something to not happen + self._wait_subtrees([], status=self.status) + + def test_trivial(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + + def test_export_targets(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + self.status = self.fs.status() + r0 = self.status.get_rank(self.fs.id, 0) + self.assertTrue(sorted(r0['export_targets']) == [1]) + + def test_redundant(self): + # redundant pin /1/2 to rank 1 + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 1), ('/1/2', 1)], status=self.status, rank=1) + + def test_reassignment(self): + self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") + self._wait_subtrees([('/1/2', 1)], status=self.status, rank=1) + self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") + self._wait_subtrees([('/1/2', 0)], status=self.status, rank=0) + + def test_phantom_rank(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "0") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "10") + time.sleep(30) # wait for nothing weird to happen + self._wait_subtrees([('/1', 0)], status=self.status) + + def test_nested(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") + self.mount_a.setfattr("1/2/3", "ceph.dir.pin", "2") + self._wait_subtrees([('/1', 1), ('/1/2', 0), ('/1/2/3', 2)], status=self.status, rank=2) + + def test_nested_unset(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "2") + self._wait_subtrees([('/1', 1), ('/1/2', 2)], status=self.status, rank=1) + self.mount_a.setfattr("1/2", "ceph.dir.pin", "-1") + self._wait_subtrees([('/1', 1)], status=self.status, rank=1) + + def test_rename(self): + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.run_shell_payload("mkdir -p 9/8/7") + self.mount_a.setfattr("9/8", "ceph.dir.pin", "0") + self._wait_subtrees([('/1', 1), ("/9/8", 0)], status=self.status, rank=0) + self.mount_a.run_shell_payload("mv 9/8 1/2") + self._wait_subtrees([('/1', 1), ("/1/2/8", 0)], status=self.status, rank=0) + + def test_getfattr(self): + # pin /1 to rank 0 + self.mount_a.setfattr("1", "ceph.dir.pin", "1") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "0") + self._wait_subtrees([('/1', 1), ('/1/2', 0)], status=self.status, rank=1) + + if not isinstance(self.mount_a, FuseMount): + p = self.mount_a.client_remote.sh('uname -r', wait=True) + dir_pin = self.mount_a.getfattr("1", "ceph.dir.pin") + log.debug("mount.getfattr('1','ceph.dir.pin'): %s " % dir_pin) + if str(p) < "5" and not(dir_pin): + self.skipTest("Kernel does not support getting the extended attribute ceph.dir.pin") + self.assertEqual(self.mount_a.getfattr("1", "ceph.dir.pin"), '1') + self.assertEqual(self.mount_a.getfattr("1/2", "ceph.dir.pin"), '0') + + def test_export_pin_cache_drop(self): + """ + That the export pin does not prevent empty (nothing in cache) subtree merging. + """ + + self.mount_a.setfattr("1", "ceph.dir.pin", "0") + self.mount_a.setfattr("1/2", "ceph.dir.pin", "1") + self._wait_subtrees([('/1', 0), ('/1/2', 1)], status=self.status) + self.mount_a.umount_wait() # release all caps + def _drop(): + self.fs.ranks_tell(["cache", "drop"], status=self.status) + # drop cache multiple times to clear replica pins + self._wait_subtrees([], status=self.status, action=_drop) + + def test_open_file(self): + """ + Test opening a file via a hard link that is not in the same mds as the inode. + + See https://tracker.ceph.com/issues/58411 + """ + + self.mount_a.run_shell_payload("mkdir -p target link") + self.mount_a.touch("target/test.txt") + self.mount_a.run_shell_payload("ln target/test.txt link/test.txt") + self.mount_a.setfattr("target", "ceph.dir.pin", "0") + self.mount_a.setfattr("link", "ceph.dir.pin", "1") + self._wait_subtrees([("/target", 0), ("/link", 1)], status=self.status) + + # Release client cache, otherwise the bug may not be triggered even if buggy. + self.mount_a.remount() + + # Open the file with access mode(O_CREAT|O_WRONLY|O_TRUNC), + # this should cause the rank 1 to crash if buggy. + # It's OK to use 'truncate -s 0 link/test.txt' here, + # its access mode is (O_CREAT|O_WRONLY), it can also trigger this bug. + log.info("test open mode (O_CREAT|O_WRONLY|O_TRUNC)") + proc = self.mount_a.open_for_writing("link/test.txt") + time.sleep(1) + success = proc.finished and self.fs.rank_is_running(rank=1) + + # Test other write modes too. + if success: + self.mount_a.remount() + log.info("test open mode (O_WRONLY|O_TRUNC)") + proc = self.mount_a.open_for_writing("link/test.txt", creat=False) + time.sleep(1) + success = proc.finished and self.fs.rank_is_running(rank=1) + if success: + self.mount_a.remount() + log.info("test open mode (O_CREAT|O_WRONLY)") + proc = self.mount_a.open_for_writing("link/test.txt", trunc=False) + time.sleep(1) + success = proc.finished and self.fs.rank_is_running(rank=1) + + # Test open modes too. + if success: + self.mount_a.remount() + log.info("test open mode (O_RDONLY)") + proc = self.mount_a.open_for_reading("link/test.txt") + time.sleep(1) + success = proc.finished and self.fs.rank_is_running(rank=1) + + if success: + # All tests done, rank 1 didn't crash. + return + + if not proc.finished: + log.warning("open operation is blocked, kill it") + proc.kill() + + if not self.fs.rank_is_running(rank=1): + log.warning("rank 1 crashed") + + self.mount_a.umount_wait(force=True) + + self.assertTrue(success, "open operation failed") + +class TestEphemeralPins(CephFSTestCase): + MDSS_REQUIRED = 3 + CLIENTS_REQUIRED = 1 + + def setUp(self): + CephFSTestCase.setUp(self) + + self.config_set('mds', 'mds_export_ephemeral_random', True) + self.config_set('mds', 'mds_export_ephemeral_distributed', True) + self.config_set('mds', 'mds_export_ephemeral_random_max', 1.0) + + self.mount_a.run_shell_payload(""" +set -e + +# Use up a random number of inode numbers so the ephemeral pinning is not the same every test. +mkdir .inode_number_thrash +count=$((RANDOM % 1024)) +for ((i = 0; i < count; i++)); do touch .inode_number_thrash/$i; done +rm -rf .inode_number_thrash +""") + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + def _setup_tree(self, path="tree", export=-1, distributed=False, random=0.0, count=100, wait=True): + return self.mount_a.run_shell_payload(f""" +set -ex +mkdir -p {path} +{f"setfattr -n ceph.dir.pin -v {export} {path}" if export >= 0 else ""} +{f"setfattr -n ceph.dir.pin.distributed -v 1 {path}" if distributed else ""} +{f"setfattr -n ceph.dir.pin.random -v {random} {path}" if random > 0.0 else ""} +for ((i = 0; i < {count}; i++)); do + mkdir -p "{path}/$i" + echo file > "{path}/$i/file" +done +""", wait=wait) + + def test_ephemeral_pin_dist_override(self): + """ + That an ephemeral distributed pin overrides a normal export pin. + """ + + self._setup_tree(distributed=True) + subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertTrue(s['distributed_ephemeral_pin']) + + def test_ephemeral_pin_dist_override_pin(self): + """ + That an export pin overrides an ephemerally pinned directory. + """ + + self._setup_tree(distributed=True) + subtrees = self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + self.mount_a.setfattr("tree", "ceph.dir.pin", "0") + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertEqual(s['auth_first'], 0) + self.assertFalse(s['distributed_ephemeral_pin']) + # it has been merged into /tree + + def test_ephemeral_pin_dist_off(self): + """ + That turning off ephemeral distributed pin merges subtrees. + """ + + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "0") + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertFalse(s['distributed_ephemeral_pin']) + + + def test_ephemeral_pin_dist_conf_off(self): + """ + That turning off ephemeral distributed pin config prevents distribution. + """ + + self._setup_tree() + self.config_set('mds', 'mds_export_ephemeral_distributed', False) + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1") + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertFalse(s['distributed_ephemeral_pin']) + + def _test_ephemeral_pin_dist_conf_off_merge(self): + """ + That turning off ephemeral distributed pin config merges subtrees. + FIXME: who triggers the merge? + """ + + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + self.config_set('mds', 'mds_export_ephemeral_distributed', False) + self._wait_subtrees([('/tree', 0)], timeout=60, status=self.status) + + def test_ephemeral_pin_dist_override_before(self): + """ + That a conventional export pin overrides the distributed policy _before_ distributed policy is set. + """ + + count = 10 + self._setup_tree(count=count) + test = [] + for i in range(count): + path = f"tree/{i}" + self.mount_a.setfattr(path, "ceph.dir.pin", "1") + test.append(("/"+path, 1)) + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1") + time.sleep(15) # for something to not happen... + self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/") + + def test_ephemeral_pin_dist_override_after(self): + """ + That a conventional export pin overrides the distributed policy _after_ distributed policy is set. + """ + + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + test = [] + for i in range(10): + path = f"tree/{i}" + self.mount_a.setfattr(path, "ceph.dir.pin", "1") + test.append(("/"+path, 1)) + self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/") + + def test_ephemeral_pin_dist_failover(self): + """ + That MDS failover does not cause unnecessary migrations. + """ + + # pin /tree so it does not export during failover + self._setup_tree(distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status, rank="all") + #test = [(s['dir']['path'], s['auth_first']) for s in subtrees] + before = self.fs.ranks_perf(lambda p: p['mds']['exported']) + log.info(f"export stats: {before}") + self.fs.rank_fail(rank=1) + self.status = self.fs.wait_for_daemons() + time.sleep(10) # waiting for something to not happen + after = self.fs.ranks_perf(lambda p: p['mds']['exported']) + log.info(f"export stats: {after}") + self.assertEqual(before, after) + + def test_ephemeral_pin_distribution(self): + """ + That ephemerally pinned subtrees are somewhat evenly distributed. + """ + + max_mds = 3 + frags = 128 + + self.fs.set_max_mds(max_mds) + self.status = self.fs.wait_for_daemons() + + self.config_set('mds', 'mds_export_ephemeral_distributed_factor', (frags-1) / max_mds) + self._setup_tree(count=1000, distributed=True) + + subtrees = self._wait_distributed_subtrees(frags, status=self.status, rank="all") + nsubtrees = len(subtrees) + + # Check if distribution is uniform + rank0 = list(filter(lambda x: x['auth_first'] == 0, subtrees)) + rank1 = list(filter(lambda x: x['auth_first'] == 1, subtrees)) + rank2 = list(filter(lambda x: x['auth_first'] == 2, subtrees)) + self.assertGreaterEqual(len(rank0)/nsubtrees, 0.15) + self.assertGreaterEqual(len(rank1)/nsubtrees, 0.15) + self.assertGreaterEqual(len(rank2)/nsubtrees, 0.15) + + + def test_ephemeral_random(self): + """ + That 100% randomness causes all children to be pinned. + """ + self._setup_tree(random=1.0) + self._wait_random_subtrees(100, status=self.status, rank="all") + + def test_ephemeral_random_max(self): + """ + That the config mds_export_ephemeral_random_max is not exceeded. + """ + + r = 0.5 + count = 1000 + self._setup_tree(count=count, random=r) + subtrees = self._wait_random_subtrees(int(r*count*.75), status=self.status, rank="all") + self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01) + self._setup_tree(path="tree/new", count=count) + time.sleep(30) # for something not to happen... + subtrees = self._get_subtrees(status=self.status, rank="all", path="tree/new/") + self.assertLessEqual(len(subtrees), int(.01*count*1.25)) + + def test_ephemeral_random_max_config(self): + """ + That the config mds_export_ephemeral_random_max config rejects new OOB policies. + """ + + self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01) + try: + p = self._setup_tree(count=1, random=0.02, wait=False) + p.wait() + except CommandFailedError as e: + log.info(f"{e}") + self.assertIn("Invalid", p.stderr.getvalue()) + else: + raise RuntimeError("mds_export_ephemeral_random_max ignored!") + + def test_ephemeral_random_dist(self): + """ + That ephemeral distributed pin overrides ephemeral random pin + """ + + self._setup_tree(random=1.0, distributed=True) + self._wait_distributed_subtrees(3 * 2, status=self.status) + + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path.startswith('/tree'): + self.assertFalse(s['random_ephemeral_pin']) + + def test_ephemeral_random_pin_override_before(self): + """ + That a conventional export pin overrides the random policy before creating new directories. + """ + + self._setup_tree(count=0, random=1.0) + self._setup_tree(path="tree/pin", count=10, export=1) + self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin") + + def test_ephemeral_random_pin_override_after(self): + """ + That a conventional export pin overrides the random policy after creating new directories. + """ + + count = 10 + self._setup_tree(count=0, random=1.0) + self._setup_tree(path="tree/pin", count=count) + self._wait_random_subtrees(count+1, status=self.status, rank="all") + self.mount_a.setfattr("tree/pin", "ceph.dir.pin", "1") + self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin") + + def test_ephemeral_randomness(self): + """ + That the randomness is reasonable. + """ + + r = random.uniform(0.25, 0.75) # ratios don't work for small r! + count = 1000 + self._setup_tree(count=count, random=r) + subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all") + time.sleep(30) # for max to not be exceeded + subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all") + self.assertLessEqual(len(subtrees), int(r*count*1.50)) + + def test_ephemeral_random_cache_drop(self): + """ + That the random ephemeral pin does not prevent empty (nothing in cache) subtree merging. + """ + + count = 100 + self._setup_tree(count=count, random=1.0) + self._wait_random_subtrees(count, status=self.status, rank="all") + self.mount_a.umount_wait() # release all caps + def _drop(): + self.fs.ranks_tell(["cache", "drop"], status=self.status) + self._wait_subtrees([], status=self.status, action=_drop) + + def test_ephemeral_random_failover(self): + """ + That the random ephemeral pins stay pinned across MDS failover. + """ + + count = 100 + r = 0.5 + self._setup_tree(count=count, random=r) + # wait for all random subtrees to be created, not a specific count + time.sleep(30) + subtrees = self._wait_random_subtrees(1, status=self.status, rank=1) + before = [(s['dir']['path'], s['auth_first']) for s in subtrees] + before.sort(); + + self.fs.rank_fail(rank=1) + self.status = self.fs.wait_for_daemons() + + time.sleep(30) # waiting for something to not happen + subtrees = self._wait_random_subtrees(1, status=self.status, rank=1) + after = [(s['dir']['path'], s['auth_first']) for s in subtrees] + after.sort(); + log.info(f"subtrees before: {before}") + log.info(f"subtrees after: {after}") + + self.assertEqual(before, after) + + def test_ephemeral_pin_grow_mds(self): + """ + That consistent hashing works to reduce the number of migrations. + """ + + self.fs.set_max_mds(2) + self.status = self.fs.wait_for_daemons() + + self._setup_tree(random=1.0) + subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all") + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + # Sleeping for a while to allow the ephemeral pin migrations to complete + time.sleep(30) + + subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all") + count = 0 + for old_subtree in subtrees_old: + for new_subtree in subtrees_new: + if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): + count = count + 1 + break + + log.info("{0} migrations have occured due to the cluster resizing".format(count)) + # ~50% of subtrees from the two rank will migrate to another rank + self.assertLessEqual((count/len(subtrees_old)), (0.5)*1.25) # with 25% overbudget + + def test_ephemeral_pin_shrink_mds(self): + """ + That consistent hashing works to reduce the number of migrations. + """ + + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() + + self._setup_tree(random=1.0) + subtrees_old = self._wait_random_subtrees(100, status=self.status, rank="all") + + self.fs.set_max_mds(2) + self.status = self.fs.wait_for_daemons() + time.sleep(30) + + subtrees_new = self._wait_random_subtrees(100, status=self.status, rank="all") + count = 0 + for old_subtree in subtrees_old: + for new_subtree in subtrees_new: + if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): + count = count + 1 + break + + log.info("{0} migrations have occured due to the cluster resizing".format(count)) + # rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2 + self.assertLessEqual((count/len(subtrees_old)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py new file mode 100644 index 000000000..ddcc58ccc --- /dev/null +++ b/qa/tasks/cephfs/test_failover.py @@ -0,0 +1,819 @@ +import time +import signal +import logging +import operator +from random import randint, choice + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.fuse_mount import FuseMount + +log = logging.getLogger(__name__) + +class TestClusterAffinity(CephFSTestCase): + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 4 + + def _verify_join_fs(self, target, status=None, fs=None): + fs_select = fs + if fs_select is None: + fs_select = self.fs + if status is None: + status = fs_select.wait_for_daemons(timeout=30) + log.debug("%s", status) + target = sorted(target, key=operator.itemgetter('name')) + log.info("target = %s", target) + current = list(status.get_all()) + current = sorted(current, key=operator.itemgetter('name')) + log.info("current = %s", current) + self.assertEqual(len(current), len(target)) + for i in range(len(current)): + for attr in target[i]: + self.assertIn(attr, current[i]) + self.assertEqual(target[i][attr], current[i][attr]) + + def _change_target_state(self, state, name, changes): + for entity in state: + if entity['name'] == name: + for k, v in changes.items(): + entity[k] = v + return + self.fail("no entity") + + def _verify_init(self, fs=None): + fs_select = fs + if fs_select is None: + fs_select = self.fs + status = fs_select.status() + log.info("status = {0}".format(status)) + target = [{'join_fscid': -1, 'name': info['name']} for info in status.get_all()] + self._verify_join_fs(target, status=status, fs=fs_select) + return (status, target) + + def _reach_target(self, target): + def takeover(): + try: + self._verify_join_fs(target) + return True + except AssertionError as e: + log.debug("%s", e) + return False + self.wait_until_true(takeover, 30) + + def test_join_fs_runtime(self): + """ + That setting mds_join_fs at runtime affects the cluster layout. + """ + status, target = self._verify_init() + standbys = list(status.get_standbys()) + self.config_set('mds.'+standbys[0]['name'], 'mds_join_fs', 'cephfs') + self._change_target_state(target, standbys[0]['name'], {'join_fscid': self.fs.id, 'state': 'up:active'}) + self._reach_target(target) + + def test_join_fs_unset(self): + """ + That unsetting mds_join_fs will cause failover if another high-affinity standby exists. + """ + status, target = self._verify_init() + standbys = list(status.get_standbys()) + names = (standbys[0]['name'], standbys[1]['name']) + self.config_set('mds.'+names[0], 'mds_join_fs', 'cephfs') + self.config_set('mds.'+names[1], 'mds_join_fs', 'cephfs') + self._change_target_state(target, names[0], {'join_fscid': self.fs.id}) + self._change_target_state(target, names[1], {'join_fscid': self.fs.id}) + self._reach_target(target) + time.sleep(5) # MDSMonitor tick + status = self.fs.wait_for_daemons() + active = self.fs.get_active_names(status=status)[0] + self.assertIn(active, names) + self.config_rm('mds.'+active, 'mds_join_fs') + self._change_target_state(target, active, {'join_fscid': -1}) + new_active = (set(names) - set((active,))).pop() + self._change_target_state(target, new_active, {'state': 'up:active'}) + self._reach_target(target) + + def test_join_fs_drop(self): + """ + That unsetting mds_join_fs will not cause failover if no high-affinity standby exists. + """ + status, target = self._verify_init() + standbys = list(status.get_standbys()) + active = standbys[0]['name'] + self.config_set('mds.'+active, 'mds_join_fs', 'cephfs') + self._change_target_state(target, active, {'join_fscid': self.fs.id, 'state': 'up:active'}) + self._reach_target(target) + self.config_rm('mds.'+active, 'mds_join_fs') + self._change_target_state(target, active, {'join_fscid': -1}) + self._reach_target(target) + + def test_join_fs_vanilla(self): + """ + That a vanilla standby is preferred over others with mds_join_fs set to another fs. + """ + fs2 = self.mds_cluster.newfs(name="cephfs2") + status, target = self._verify_init() + active = self.fs.get_active_names(status=status)[0] + status2, _ = self._verify_init(fs=fs2) + active2 = fs2.get_active_names(status=status2)[0] + standbys = [info['name'] for info in status.get_standbys()] + victim = standbys.pop() + # Set a bogus fs on the others + for mds in standbys: + self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2') + self._change_target_state(target, mds, {'join_fscid': fs2.id}) + # The active MDS for cephfs2 will be replaced by the MDS for which + # file system affinity has been set. Also, set the affinity for + # the earlier active MDS so that it is not chosen by the monitors + # as an active MDS for the existing file system. + log.info(f'assigning affinity to cephfs2 for active mds (mds.{active2})') + self.config_set(f'mds.{active2}', 'mds_join_fs', 'cephfs2') + self._change_target_state(target, active2, {'join_fscid': fs2.id}) + self.fs.rank_fail() + self._change_target_state(target, victim, {'state': 'up:active'}) + self._reach_target(target) + status = self.fs.status() + active = self.fs.get_active_names(status=status)[0] + self.assertEqual(active, victim) + + def test_join_fs_last_resort(self): + """ + That a standby with mds_join_fs set to another fs is still used if necessary. + """ + status, target = self._verify_init() + standbys = [info['name'] for info in status.get_standbys()] + for mds in standbys: + self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2') + fs2 = self.mds_cluster.newfs(name="cephfs2") + for mds in standbys: + self._change_target_state(target, mds, {'join_fscid': fs2.id}) + self.fs.rank_fail() + status = self.fs.status() + ranks = list(self.fs.get_ranks(status=status)) + self.assertEqual(len(ranks), 1) + self.assertIn(ranks[0]['name'], standbys) + # Note that we would expect the former active to reclaim its spot, but + # we're not testing that here. + + def test_join_fs_steady(self): + """ + That a sole MDS with mds_join_fs set will come back as active eventually even after failover. + """ + status, target = self._verify_init() + active = self.fs.get_active_names(status=status)[0] + self.config_set('mds.'+active, 'mds_join_fs', 'cephfs') + self._change_target_state(target, active, {'join_fscid': self.fs.id}) + self._reach_target(target) + self.fs.rank_fail() + self._reach_target(target) + + def test_join_fs_standby_replay(self): + """ + That a standby-replay daemon with weak affinity is replaced by a stronger one. + """ + status, target = self._verify_init() + standbys = [info['name'] for info in status.get_standbys()] + self.config_set('mds.'+standbys[0], 'mds_join_fs', 'cephfs') + self._change_target_state(target, standbys[0], {'join_fscid': self.fs.id, 'state': 'up:active'}) + self._reach_target(target) + self.fs.set_allow_standby_replay(True) + status = self.fs.status() + standbys = [info['name'] for info in status.get_standbys()] + self.config_set('mds.'+standbys[0], 'mds_join_fs', 'cephfs') + self._change_target_state(target, standbys[0], {'join_fscid': self.fs.id, 'state': 'up:standby-replay'}) + self._reach_target(target) + +class TestClusterResize(CephFSTestCase): + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 3 + + def test_grow(self): + """ + That the MDS cluster grows after increasing max_mds. + """ + + # Need all my standbys up as well as the active daemons + # self.wait_for_daemon_start() necessary? + + self.fs.grow(2) + self.fs.grow(3) + + + def test_shrink(self): + """ + That the MDS cluster shrinks automatically after decreasing max_mds. + """ + + self.fs.grow(3) + self.fs.shrink(1) + + def test_up_less_than_max(self): + """ + That a health warning is generated when max_mds is greater than active count. + """ + + status = self.fs.status() + mdss = [info['gid'] for info in status.get_all()] + self.fs.set_max_mds(len(mdss)+1) + self.wait_for_health("MDS_UP_LESS_THAN_MAX", 30) + self.fs.shrink(2) + self.wait_for_health_clear(30) + + def test_down_health(self): + """ + That marking a FS down does not generate a health warning + """ + + self.fs.set_down() + try: + self.wait_for_health("", 30) + raise RuntimeError("got health warning?") + except RuntimeError as e: + if "Timed out after" in str(e): + pass + else: + raise + + def test_down_twice(self): + """ + That marking a FS down twice does not wipe old_max_mds. + """ + + self.fs.grow(2) + self.fs.set_down() + self.fs.wait_for_daemons() + self.fs.set_down(False) + self.assertEqual(self.fs.get_var("max_mds"), 2) + self.fs.wait_for_daemons(timeout=60) + + def test_down_grow(self): + """ + That setting max_mds undoes down. + """ + + self.fs.set_down() + self.fs.wait_for_daemons() + self.fs.grow(2) + self.fs.wait_for_daemons() + + def test_down(self): + """ + That down setting toggles and sets max_mds appropriately. + """ + + self.fs.set_down() + self.fs.wait_for_daemons() + self.assertEqual(self.fs.get_var("max_mds"), 0) + self.fs.set_down(False) + self.assertEqual(self.fs.get_var("max_mds"), 1) + self.fs.wait_for_daemons() + self.assertEqual(self.fs.get_var("max_mds"), 1) + + def test_hole(self): + """ + Test that a hole cannot be created in the FS ranks. + """ + + fscid = self.fs.id + + self.fs.grow(2) + + # Now add a delay which should slow down how quickly rank 1 stops + self.config_set('mds', 'ms_inject_delay_max', '5.0') + self.config_set('mds', 'ms_inject_delay_probability', '1.0') + self.fs.set_max_mds(1) + log.info("status = {0}".format(self.fs.status())) + + # Don't wait for rank 1 to stop + self.fs.set_max_mds(3) + log.info("status = {0}".format(self.fs.status())) + + # Now check that the mons didn't try to promote a standby to rank 2 + self.fs.set_max_mds(2) + status = self.fs.status() + try: + status = self.fs.wait_for_daemons(timeout=90) + ranks = set([info['rank'] for info in status.get_ranks(fscid)]) + self.assertEqual(ranks, set([0, 1])) + finally: + log.info("status = {0}".format(status)) + + def test_thrash(self): + """ + Test that thrashing max_mds does not fail. + """ + + max_mds = 2 + for i in range(0, 100): + self.fs.set_max_mds(max_mds) + max_mds = (max_mds+1)%3+1 + + self.fs.wait_for_daemons(timeout=90) + +class TestFailover(CephFSTestCase): + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 2 + + def test_repeated_boot(self): + """ + That multiple boot messages do not result in the MDS getting evicted. + """ + + interval = 10 + self.config_set("mon", "paxos_propose_interval", interval) + + mds = choice(list(self.fs.status().get_all())) + + with self.assert_cluster_log(f"daemon mds.{mds['name']} restarted", present=False): + # Avoid a beacon to the monitors with down:dne by restarting: + self.fs.mds_fail(mds_id=mds['name']) + # `ceph mds fail` won't return until the FSMap is committed, double-check: + self.assertIsNone(self.fs.status().get_mds_gid(mds['gid'])) + time.sleep(2) # for mds to restart and accept asok commands + status1 = self.fs.mds_asok(['status'], mds_id=mds['name']) + time.sleep(interval*1.5) + status2 = self.fs.mds_asok(['status'], mds_id=mds['name']) + self.assertEqual(status1['id'], status2['id']) + + def test_simple(self): + """ + That when the active MDS is killed, a standby MDS is promoted into + its rank after the grace period. + + This is just a simple unit test, the harder cases are covered + in thrashing tests. + """ + + (original_active, ) = self.fs.get_active_names() + original_standbys = self.mds_cluster.get_standby_daemons() + + # Kill the rank 0 daemon's physical process + self.fs.mds_stop(original_active) + + # Wait until the monitor promotes his replacement + def promoted(): + ranks = list(self.fs.get_ranks()) + return len(ranks) > 0 and ranks[0]['name'] in original_standbys + + log.info("Waiting for promotion of one of the original standbys {0}".format( + original_standbys)) + self.wait_until_true(promoted, timeout=self.fs.beacon_timeout) + + # Start the original rank 0 daemon up again, see that he becomes a standby + self.fs.mds_restart(original_active) + self.wait_until_true( + lambda: original_active in self.mds_cluster.get_standby_daemons(), + timeout=60 # Approximately long enough for MDS to start and mon to notice + ) + + def test_client_abort(self): + """ + That a client will respect fuse_require_active_mds and error out + when the cluster appears to be unavailable. + """ + + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Requires FUSE client to inject client metadata") + + require_active = self.fs.get_config("fuse_require_active_mds", service_type="mon").lower() == "true" + if not require_active: + self.skipTest("fuse_require_active_mds is not set") + + # Check it's not laggy to begin with + (original_active, ) = self.fs.get_active_names() + self.assertNotIn("laggy_since", self.fs.status().get_mds(original_active)) + + self.mounts[0].umount_wait() + + # Control: that we can mount and unmount usually, while the cluster is healthy + self.mounts[0].mount_wait() + self.mounts[0].umount_wait() + + # Stop the daemon processes + self.fs.mds_stop() + + # Wait for everyone to go laggy + def laggy(): + mdsmap = self.fs.get_mds_map() + for info in mdsmap['info'].values(): + if "laggy_since" not in info: + return False + + return True + + self.wait_until_true(laggy, self.fs.beacon_timeout) + with self.assertRaises(CommandFailedError): + self.mounts[0].mount_wait() + + def test_standby_count_wanted(self): + """ + That cluster health warnings are generated by insufficient standbys available. + """ + + # Need all my standbys up as well as the active daemons + self.wait_for_daemon_start() + + standbys = self.mds_cluster.get_standby_daemons() + self.assertGreaterEqual(len(standbys), 1) + self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys))) + + # Kill a standby and check for warning + victim = standbys.pop() + self.fs.mds_stop(victim) + self.wait_for_health("MDS_INSUFFICIENT_STANDBY", self.fs.beacon_timeout) + + # restart the standby, see that he becomes a standby, check health clears + self.fs.mds_restart(victim) + self.wait_until_true( + lambda: victim in self.mds_cluster.get_standby_daemons(), + timeout=60 # Approximately long enough for MDS to start and mon to notice + ) + self.wait_for_health_clear(timeout=30) + + # Set it one greater than standbys ever seen + standbys = self.mds_cluster.get_standby_daemons() + self.assertGreaterEqual(len(standbys), 1) + self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1)) + self.wait_for_health("MDS_INSUFFICIENT_STANDBY", self.fs.beacon_timeout) + + # Set it to 0 + self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0') + self.wait_for_health_clear(timeout=30) + + def test_discontinuous_mdsmap(self): + """ + That discontinuous mdsmap does not affect failover. + See http://tracker.ceph.com/issues/24856. + """ + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + self.mount_a.umount_wait() + + monc_timeout = float(self.fs.get_config("mon_client_ping_timeout", service_type="mds")) + + mds_0 = self.fs.get_rank(rank=0, status=status) + self.fs.rank_freeze(True, rank=0) # prevent failover + self.fs.rank_signal(signal.SIGSTOP, rank=0, status=status) + self.wait_until_true( + lambda: "laggy_since" in self.fs.get_rank(), + timeout=self.fs.beacon_timeout + ) + + self.fs.rank_fail(rank=1) + self.fs.wait_for_state('up:resolve', rank=1, timeout=30) + + # Make sure of mds_0's monitor connection gets reset + time.sleep(monc_timeout * 2) + + # Continue rank 0, it will get discontinuous mdsmap + self.fs.rank_signal(signal.SIGCONT, rank=0) + self.wait_until_true( + lambda: "laggy_since" not in self.fs.get_rank(rank=0), + timeout=self.fs.beacon_timeout + ) + + # mds.b will be stuck at 'reconnect' state if snapserver gets confused + # by discontinuous mdsmap + self.fs.wait_for_state('up:active', rank=1, timeout=30) + self.assertEqual(mds_0['gid'], self.fs.get_rank(rank=0)['gid']) + self.fs.rank_freeze(False, rank=0) + + def test_connect_bootstrapping(self): + self.config_set("mds", "mds_sleep_rank_change", 10000000.0) + self.config_set("mds", "mds_connect_bootstrapping", True) + self.fs.set_max_mds(2) + self.fs.wait_for_daemons() + self.fs.rank_fail(rank=0) + # rank 0 will get stuck in up:resolve, see https://tracker.ceph.com/issues/53194 + self.fs.wait_for_daemons() + + +class TestStandbyReplay(CephFSTestCase): + CLIENTS_REQUIRED = 0 + MDSS_REQUIRED = 4 + + def _confirm_no_replay(self): + status = self.fs.status() + _ = len(list(status.get_standbys())) + self.assertEqual(0, len(list(self.fs.get_replays(status=status)))) + return status + + def _confirm_single_replay(self, full=True, status=None, retries=3): + status = self.fs.wait_for_daemons(status=status) + ranks = sorted(self.fs.get_mds_map(status=status)['in']) + replays = list(self.fs.get_replays(status=status)) + checked_replays = set() + for rank in ranks: + has_replay = False + for replay in replays: + if replay['rank'] == rank: + self.assertFalse(has_replay) + has_replay = True + checked_replays.add(replay['gid']) + if full and not has_replay: + if retries <= 0: + raise RuntimeError("rank "+str(rank)+" has no standby-replay follower") + else: + retries = retries-1 + time.sleep(2) + self.assertEqual(checked_replays, set(info['gid'] for info in replays)) + return status + + def _check_replay_takeover(self, status, rank=0): + replay = self.fs.get_replay(rank=rank, status=status) + new_status = self.fs.wait_for_daemons() + new_active = self.fs.get_rank(rank=rank, status=new_status) + if replay: + self.assertEqual(replay['gid'], new_active['gid']) + else: + # double check takeover came from a standby (or some new daemon via restart) + found = False + for info in status.get_standbys(): + if info['gid'] == new_active['gid']: + found = True + break + if not found: + for info in status.get_all(): + self.assertNotEqual(info['gid'], new_active['gid']) + return new_status + + def test_standby_replay_singleton(self): + """ + That only one MDS becomes standby-replay. + """ + + self._confirm_no_replay() + self.fs.set_allow_standby_replay(True) + time.sleep(30) + self._confirm_single_replay() + + def test_standby_replay_damaged(self): + """ + That a standby-replay daemon can cause the rank to go damaged correctly. + """ + + self._confirm_no_replay() + self.config_set("mds", "mds_standby_replay_damaged", True) + self.fs.set_allow_standby_replay(True) + self.wait_until_true( + lambda: len(self.fs.get_damaged()) > 0, + timeout=30 + ) + status = self.fs.status() + self.assertListEqual([], list(self.fs.get_ranks(status=status))) + self.assertListEqual([0], self.fs.get_damaged(status=status)) + + def test_standby_replay_disable(self): + """ + That turning off allow_standby_replay fails all standby-replay daemons. + """ + + self._confirm_no_replay() + self.fs.set_allow_standby_replay(True) + time.sleep(30) + self._confirm_single_replay() + self.fs.set_allow_standby_replay(False) + self._confirm_no_replay() + + def test_standby_replay_singleton_fail(self): + """ + That failures don't violate singleton constraint. + """ + + self._confirm_no_replay() + self.fs.set_allow_standby_replay(True) + status = self._confirm_single_replay() + + for i in range(10): + time.sleep(randint(1, 5)) + self.fs.rank_restart(status=status) + status = self._check_replay_takeover(status) + status = self._confirm_single_replay(status=status) + + for i in range(10): + time.sleep(randint(1, 5)) + self.fs.rank_fail() + status = self._check_replay_takeover(status) + status = self._confirm_single_replay(status=status) + + def test_standby_replay_singleton_fail_multimds(self): + """ + That failures don't violate singleton constraint with multiple actives. + """ + + status = self._confirm_no_replay() + new_max_mds = randint(2, len(list(status.get_standbys()))) + self.fs.set_max_mds(new_max_mds) + self.fs.wait_for_daemons() # wait for actives to come online! + self.fs.set_allow_standby_replay(True) + status = self._confirm_single_replay(full=False) + + for i in range(10): + time.sleep(randint(1, 5)) + victim = randint(0, new_max_mds-1) + self.fs.rank_restart(rank=victim, status=status) + status = self._check_replay_takeover(status, rank=victim) + status = self._confirm_single_replay(status=status, full=False) + + for i in range(10): + time.sleep(randint(1, 5)) + victim = randint(0, new_max_mds-1) + self.fs.rank_fail(rank=victim) + status = self._check_replay_takeover(status, rank=victim) + status = self._confirm_single_replay(status=status, full=False) + + def test_standby_replay_failure(self): + """ + That the failure of a standby-replay daemon happens cleanly + and doesn't interrupt anything else. + """ + + status = self._confirm_no_replay() + self.fs.set_max_mds(1) + self.fs.set_allow_standby_replay(True) + status = self._confirm_single_replay() + + for i in range(10): + time.sleep(randint(1, 5)) + victim = self.fs.get_replay(status=status) + self.fs.mds_restart(mds_id=victim['name']) + status = self._confirm_single_replay(status=status) + + def test_standby_replay_prepare_beacon(self): + """ + That a MDSMonitor::prepare_beacon handles standby-replay daemons + correctly without removing the standby. (Note, usually a standby-replay + beacon will just be replied to by MDSMonitor::preprocess_beacon.) + """ + + status = self._confirm_no_replay() + self.fs.set_max_mds(1) + self.fs.set_allow_standby_replay(True) + status = self._confirm_single_replay() + replays = list(status.get_replays(self.fs.id)) + self.assertEqual(len(replays), 1) + self.config_set('mds.'+replays[0]['name'], 'mds_inject_health_dummy', True) + time.sleep(10) # for something not to happen... + status = self._confirm_single_replay() + replays2 = list(status.get_replays(self.fs.id)) + self.assertEqual(replays[0]['gid'], replays2[0]['gid']) + + def test_rank_stopped(self): + """ + That when a rank is STOPPED, standby replays for + that rank get torn down + """ + + status = self._confirm_no_replay() + standby_count = len(list(status.get_standbys())) + self.fs.set_max_mds(2) + self.fs.set_allow_standby_replay(True) + status = self._confirm_single_replay() + + self.fs.set_max_mds(1) # stop rank 1 + + status = self._confirm_single_replay() + self.assertTrue(standby_count, len(list(status.get_standbys()))) + + +class TestMultiFilesystems(CephFSTestCase): + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 4 + + # We'll create our own filesystems and start our own daemons + REQUIRE_FILESYSTEM = False + + def setUp(self): + super(TestMultiFilesystems, self).setUp() + self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", + "enable_multiple", "true", + "--yes-i-really-mean-it") + + def _setup_two(self): + fs_a = self.mds_cluster.newfs(name="alpha") + fs_b = self.mds_cluster.newfs(name="bravo") + + self.mds_cluster.mds_restart() + + # Wait for both filesystems to go healthy + fs_a.wait_for_daemons() + fs_b.wait_for_daemons() + + # Reconfigure client auth caps + for mount in self.mounts: + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(mount.client_id), + 'mds', 'allow', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + fs_a.get_data_pool_name(), fs_b.get_data_pool_name())) + + return fs_a, fs_b + + def test_clients(self): + fs_a, fs_b = self._setup_two() + + # Mount a client on fs_a + self.mount_a.mount_wait(cephfs_name=fs_a.name) + self.mount_a.write_n_mb("pad.bin", 1) + self.mount_a.write_n_mb("test.bin", 2) + a_created_ino = self.mount_a.path_to_ino("test.bin") + self.mount_a.create_files() + + # Mount a client on fs_b + self.mount_b.mount_wait(cephfs_name=fs_b.name) + self.mount_b.write_n_mb("test.bin", 1) + b_created_ino = self.mount_b.path_to_ino("test.bin") + self.mount_b.create_files() + + # Check that a non-default filesystem mount survives an MDS + # failover (i.e. that map subscription is continuous, not + # just the first time), reproduces #16022 + old_fs_b_mds = fs_b.get_active_names()[0] + self.mds_cluster.mds_stop(old_fs_b_mds) + self.mds_cluster.mds_fail(old_fs_b_mds) + fs_b.wait_for_daemons() + background = self.mount_b.write_background() + # Raise exception if the write doesn't finish (i.e. if client + # has not kept up with MDS failure) + try: + self.wait_until_true(lambda: background.finished, timeout=30) + except RuntimeError: + # The mount is stuck, we'll have to force it to fail cleanly + background.stdin.close() + self.mount_b.umount_wait(force=True) + raise + + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + # See that the client's files went into the correct pool + self.assertTrue(fs_a.data_objects_present(a_created_ino, 1024 * 1024)) + self.assertTrue(fs_b.data_objects_present(b_created_ino, 1024 * 1024)) + + def test_standby(self): + fs_a, fs_b = self._setup_two() + + # Assert that the remaining two MDS daemons are now standbys + a_daemons = fs_a.get_active_names() + b_daemons = fs_b.get_active_names() + self.assertEqual(len(a_daemons), 1) + self.assertEqual(len(b_daemons), 1) + original_a = a_daemons[0] + original_b = b_daemons[0] + expect_standby_daemons = set(self.mds_cluster.mds_ids) - (set(a_daemons) | set(b_daemons)) + + # Need all my standbys up as well as the active daemons + self.wait_for_daemon_start() + self.assertEqual(expect_standby_daemons, self.mds_cluster.get_standby_daemons()) + + # Kill fs_a's active MDS, see a standby take over + self.mds_cluster.mds_stop(original_a) + self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_a) + self.wait_until_equal(lambda: len(fs_a.get_active_names()), 1, 30, + reject_fn=lambda v: v > 1) + # Assert that it's a *different* daemon that has now appeared in the map for fs_a + self.assertNotEqual(fs_a.get_active_names()[0], original_a) + + # Kill fs_b's active MDS, see a standby take over + self.mds_cluster.mds_stop(original_b) + self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_b) + self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30, + reject_fn=lambda v: v > 1) + # Assert that it's a *different* daemon that has now appeared in the map for fs_a + self.assertNotEqual(fs_b.get_active_names()[0], original_b) + + # Both of the original active daemons should be gone, and all standbys used up + self.assertEqual(self.mds_cluster.get_standby_daemons(), set()) + + # Restart the ones I killed, see them reappear as standbys + self.mds_cluster.mds_restart(original_a) + self.mds_cluster.mds_restart(original_b) + self.wait_until_true( + lambda: {original_a, original_b} == self.mds_cluster.get_standby_daemons(), + timeout=30 + ) + + def test_grow_shrink(self): + # Usual setup... + fs_a, fs_b = self._setup_two() + + # Increase max_mds on fs_b, see a standby take up the role + fs_b.set_max_mds(2) + self.wait_until_equal(lambda: len(fs_b.get_active_names()), 2, 30, + reject_fn=lambda v: v > 2 or v < 1) + + # Increase max_mds on fs_a, see a standby take up the role + fs_a.set_max_mds(2) + self.wait_until_equal(lambda: len(fs_a.get_active_names()), 2, 30, + reject_fn=lambda v: v > 2 or v < 1) + + # Shrink fs_b back to 1, see a daemon go back to standby + fs_b.set_max_mds(1) + self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30, + reject_fn=lambda v: v > 2 or v < 1) + + # Grow fs_a up to 3, see the former fs_b daemon join it. + fs_a.set_max_mds(3) + self.wait_until_equal(lambda: len(fs_a.get_active_names()), 3, 60, + reject_fn=lambda v: v > 3 or v < 2) diff --git a/qa/tasks/cephfs/test_flush.py b/qa/tasks/cephfs/test_flush.py new file mode 100644 index 000000000..17cb84970 --- /dev/null +++ b/qa/tasks/cephfs/test_flush.py @@ -0,0 +1,112 @@ + +from textwrap import dedent +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO + + +class TestFlush(CephFSTestCase): + def test_flush(self): + self.mount_a.run_shell(["mkdir", "mydir"]) + self.mount_a.run_shell(["touch", "mydir/alpha"]) + dir_ino = self.mount_a.path_to_ino("mydir") + file_ino = self.mount_a.path_to_ino("mydir/alpha") + + # Unmount the client so that it isn't still holding caps + self.mount_a.umount_wait() + + # Before flush, the dirfrag object does not exist + with self.assertRaises(ObjectNotFound): + self.fs.list_dirfrag(dir_ino) + + # Before flush, the file's backtrace has not been written + with self.assertRaises(ObjectNotFound): + self.fs.read_backtrace(file_ino) + + # Before flush, there are no dentries in the root + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) + + # Execute flush + flush_data = self.fs.mds_asok(["flush", "journal"]) + self.assertEqual(flush_data['return_code'], 0) + + # After flush, the dirfrag object has been created + dir_list = self.fs.list_dirfrag(dir_ino) + self.assertEqual(dir_list, ["alpha_head"]) + + # And the 'mydir' dentry is in the root + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), ['mydir_head']) + + # ...and the data object has its backtrace + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['alpha', 'mydir'], [a['dname'] for a in backtrace['ancestors']]) + self.assertEqual([dir_ino, 1], [a['dirino'] for a in backtrace['ancestors']]) + self.assertEqual(file_ino, backtrace['ino']) + + # ...and the journal is truncated to just a single subtreemap from the + # newly created segment + summary_output = self.fs.journal_tool(["event", "get", "summary"], 0) + try: + self.assertEqual(summary_output, + dedent( + """ + Events by type: + SUBTREEMAP: 1 + Errors: 0 + """ + ).strip()) + except AssertionError: + # In some states, flushing the journal will leave you + # an extra event from locks a client held. This is + # correct behaviour: the MDS is flushing the journal, + # it's just that new events are getting added too. + # In this case, we should nevertheless see a fully + # empty journal after a second flush. + self.assertEqual(summary_output, + dedent( + """ + Events by type: + SUBTREEMAP: 1 + UPDATE: 1 + Errors: 0 + """ + ).strip()) + flush_data = self.fs.mds_asok(["flush", "journal"]) + self.assertEqual(flush_data['return_code'], 0) + self.assertEqual(self.fs.journal_tool(["event", "get", "summary"], 0), + dedent( + """ + Events by type: + SUBTREEMAP: 1 + Errors: 0 + """ + ).strip()) + + # Now for deletion! + # We will count the RADOS deletions and MDS file purges, to verify that + # the expected behaviour is happening as a result of the purge + initial_dels = self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete'] + initial_purges = self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued'] + + # Use a client to delete a file + self.mount_a.mount_wait() + self.mount_a.run_shell(["rm", "-rf", "mydir"]) + + # Flush the journal so that the directory inode can be purged + flush_data = self.fs.mds_asok(["flush", "journal"]) + self.assertEqual(flush_data['return_code'], 0) + + # We expect to see a single file purge + self.wait_until_true( + lambda: self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['strays_enqueued'] - initial_purges >= 2, + 60) + + # We expect two deletions, one of the dirfrag and one of the backtrace + self.wait_until_true( + lambda: self.fs.mds_asok(['perf', 'dump', 'objecter'])['objecter']['osdop_delete'] - initial_dels >= 2, + 60) # timeout is fairly long to allow for tick+rados latencies + + with self.assertRaises(ObjectNotFound): + self.fs.list_dirfrag(dir_ino) + with self.assertRaises(ObjectNotFound): + self.fs.read_backtrace(file_ino) + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) diff --git a/qa/tasks/cephfs/test_forward_scrub.py b/qa/tasks/cephfs/test_forward_scrub.py new file mode 100644 index 000000000..f3cec881b --- /dev/null +++ b/qa/tasks/cephfs/test_forward_scrub.py @@ -0,0 +1,307 @@ + +""" +Test that the forward scrub functionality can traverse metadata and apply +requested tags, on well formed metadata. + +This is *not* the real testing for forward scrub, which will need to test +how the functionality responds to damaged metadata. + +""" +import logging +import json + +from collections import namedtuple +from io import BytesIO +from textwrap import dedent + +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +import struct + +log = logging.getLogger(__name__) + + +ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) + + +class TestForwardScrub(CephFSTestCase): + MDSS_REQUIRED = 1 + + def _read_str_xattr(self, pool, obj, attr): + """ + Read a ceph-encoded string from a rados xattr + """ + output = self.fs.mon_manager.do_rados(["getxattr", obj, attr], pool=pool, + stdout=BytesIO()).stdout.getvalue() + strlen = struct.unpack('i', output[0:4])[0] + return output[4:(4 + strlen)].decode(encoding='ascii') + + def _get_paths_to_ino(self): + inos = {} + p = self.mount_a.run_shell(["find", "./"]) + paths = p.stdout.getvalue().strip().split() + for path in paths: + inos[path] = self.mount_a.path_to_ino(path) + + return inos + + def test_apply_tag(self): + self.mount_a.run_shell(["mkdir", "parentdir"]) + self.mount_a.run_shell(["mkdir", "parentdir/childdir"]) + self.mount_a.run_shell(["touch", "rfile"]) + self.mount_a.run_shell(["touch", "parentdir/pfile"]) + self.mount_a.run_shell(["touch", "parentdir/childdir/cfile"]) + + # Build a structure mapping path to inode, as we will later want + # to check object by object and objects are named after ino number + inos = self._get_paths_to_ino() + + # Flush metadata: this is a friendly test of forward scrub so we're skipping + # the part where it's meant to cope with dirty metadata + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"]) + + tag = "mytag" + + # Execute tagging forward scrub + self.fs.mds_asok(["tag", "path", "/parentdir", tag]) + # Wait for completion + import time + time.sleep(10) + # FIXME watching clog isn't a nice mechanism for this, once we have a ScrubMap we'll + # watch that instead + + # Check that dirs were tagged + for dirpath in ["./parentdir", "./parentdir/childdir"]: + self.assertTagged(inos[dirpath], tag, self.fs.get_metadata_pool_name()) + + # Check that files were tagged + for filepath in ["./parentdir/pfile", "./parentdir/childdir/cfile"]: + self.assertTagged(inos[filepath], tag, self.fs.get_data_pool_name()) + + # This guy wasn't in the tag path, shouldn't have been tagged + self.assertUntagged(inos["./rfile"]) + + def assertUntagged(self, ino): + file_obj_name = "{0:x}.00000000".format(ino) + with self.assertRaises(CommandFailedError): + self._read_str_xattr( + self.fs.get_data_pool_name(), + file_obj_name, + "scrub_tag" + ) + + def assertTagged(self, ino, tag, pool): + file_obj_name = "{0:x}.00000000".format(ino) + wrote = self._read_str_xattr( + pool, + file_obj_name, + "scrub_tag" + ) + self.assertEqual(wrote, tag) + + def _validate_linkage(self, expected): + inos = self._get_paths_to_ino() + try: + self.assertDictEqual(inos, expected) + except AssertionError: + log.error("Expected: {0}".format(json.dumps(expected, indent=2))) + log.error("Actual: {0}".format(json.dumps(inos, indent=2))) + raise + + def test_orphan_scan(self): + # Create some files whose metadata we will flush + self.mount_a.run_python(dedent(""" + import os + mount_point = "{mount_point}" + parent = os.path.join(mount_point, "parent") + os.mkdir(parent) + flushed = os.path.join(parent, "flushed") + os.mkdir(flushed) + for f in ["alpha", "bravo", "charlie"]: + open(os.path.join(flushed, f), 'w').write(f) + """.format(mount_point=self.mount_a.mountpoint))) + + inos = self._get_paths_to_ino() + + # Flush journal + # Umount before flush to avoid cap releases putting + # things we don't want in the journal later. + self.mount_a.umount_wait() + self.fs.flush() + + # Create a new inode that's just in the log, i.e. would + # look orphaned to backward scan if backward scan wisnae + # respectin' tha scrub_tag xattr. + self.mount_a.mount_wait() + self.mount_a.run_shell(["mkdir", "parent/unflushed"]) + self.mount_a.run_shell(["dd", "if=/dev/urandom", + "of=./parent/unflushed/jfile", + "bs=1M", "count=8"]) + inos["./parent/unflushed"] = self.mount_a.path_to_ino("./parent/unflushed") + inos["./parent/unflushed/jfile"] = self.mount_a.path_to_ino("./parent/unflushed/jfile") + self.mount_a.umount_wait() + + # Orphan an inode by deleting its dentry + # Our victim will be.... bravo. + self.mount_a.umount_wait() + self.fs.fail() + self.fs.set_ceph_conf('mds', 'mds verify scatter', False) + self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) + frag_obj_id = "{0:x}.00000000".format(inos["./parent/flushed"]) + self.fs.radosm(["rmomapkey", frag_obj_id, "bravo_head"]) + + self.fs.set_joinable() + self.fs.wait_for_daemons() + + # See that the orphaned file is indeed missing from a client's POV + self.mount_a.mount_wait() + damaged_state = self._get_paths_to_ino() + self.assertNotIn("./parent/flushed/bravo", damaged_state) + self.mount_a.umount_wait() + + # Run a tagging forward scrub + tag = "mytag123" + self.fs.rank_asok(["tag", "path", "/parent", tag]) + + # See that the orphan wisnae tagged + self.assertUntagged(inos['./parent/flushed/bravo']) + + # See that the flushed-metadata-and-still-present files are tagged + self.assertTagged(inos['./parent/flushed/alpha'], tag, self.fs.get_data_pool_name()) + self.assertTagged(inos['./parent/flushed/charlie'], tag, self.fs.get_data_pool_name()) + + # See that journalled-but-not-flushed file *was* tagged + self.assertTagged(inos['./parent/unflushed/jfile'], tag, self.fs.get_data_pool_name()) + + # okay, now we are going to run cephfs-data-scan. It's necessary to + # have a clean journal otherwise replay will blowup on mismatched + # inotable versions (due to scan_links) + self.fs.flush() + self.fs.fail() + self.fs.journal_tool(["journal", "reset", "--force"], 0) + + # Run cephfs-data-scan targeting only orphans + self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) + self.fs.data_scan([ + "scan_inodes", + "--filter-tag", tag, + self.fs.get_data_pool_name() + ]) + self.fs.data_scan(["scan_links"]) + + # After in-place injection stats should be kosher again + self.fs.set_ceph_conf('mds', 'mds verify scatter', True) + self.fs.set_ceph_conf('mds', 'mds debug scatterstat', True) + + # And we should have all the same linkage we started with, + # and no lost+found, and no extra inodes! + self.fs.set_joinable() + self.fs.wait_for_daemons() + self.mount_a.mount_wait() + self._validate_linkage(inos) + + def _stash_inotable(self): + # Get all active ranks + ranks = self.fs.get_all_mds_rank() + + inotable_dict = {} + for rank in ranks: + inotable_oid = "mds{rank:d}_".format(rank=rank) + "inotable" + print("Trying to fetch inotable object: " + inotable_oid) + + #self.fs.get_metadata_object("InoTable", "mds0_inotable") + inotable_raw = self.fs.radosmo(['get', inotable_oid, '-']) + inotable_dict[inotable_oid] = inotable_raw + return inotable_dict + + def test_inotable_sync(self): + self.mount_a.write_n_mb("file1_sixmegs", 6) + + # Flush journal + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"]) + + inotable_copy = self._stash_inotable() + + self.mount_a.mount_wait() + + self.mount_a.write_n_mb("file2_sixmegs", 6) + self.mount_a.write_n_mb("file3_sixmegs", 6) + + inos = self._get_paths_to_ino() + + # Flush journal + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"]) + + self.mount_a.umount_wait() + + with self.assert_cluster_log("inode table repaired", invert_match=True): + out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + self.fs.fail() + + # Truncate the journal (to ensure the inotable on disk + # is all that will be in the InoTable in memory) + + self.fs.journal_tool(["event", "splice", + "--inode={0}".format(inos["./file2_sixmegs"]), "summary"], 0) + + self.fs.journal_tool(["event", "splice", + "--inode={0}".format(inos["./file3_sixmegs"]), "summary"], 0) + + # Revert to old inotable. + for key, value in inotable_copy.items(): + self.fs.radosm(["put", key, "-"], stdin=BytesIO(value)) + + self.fs.set_joinable() + self.fs.wait_for_daemons() + + with self.assert_cluster_log("inode table repaired"): + out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + self.fs.fail() + table_text = self.fs.table_tool(["0", "show", "inode"]) + table = json.loads(table_text) + self.assertGreater( + table['0']['data']['inotable']['free'][0]['start'], + inos['./file3_sixmegs']) + + def test_backtrace_repair(self): + """ + That the MDS can repair an inodes backtrace in the data pool + if it is found to be damaged. + """ + # Create a file for subsequent checks + self.mount_a.run_shell(["mkdir", "parent_a"]) + self.mount_a.run_shell(["touch", "parent_a/alpha"]) + file_ino = self.mount_a.path_to_ino("parent_a/alpha") + + # That backtrace and layout are written after initial flush + self.fs.mds_asok(["flush", "journal"]) + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['alpha', 'parent_a'], + [a['dname'] for a in backtrace['ancestors']]) + + # Go corrupt the backtrace + self.fs._write_data_xattr(file_ino, "parent", + "oh i'm sorry did i overwrite your xattr?") + + with self.assert_cluster_log("bad backtrace on inode"): + out_json = self.fs.run_scrub(["start", "/", "repair,recursive"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + self.fs.mds_asok(["flush", "journal"]) + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['alpha', 'parent_a'], + [a['dname'] for a in backtrace['ancestors']]) diff --git a/qa/tasks/cephfs/test_fragment.py b/qa/tasks/cephfs/test_fragment.py new file mode 100644 index 000000000..7d35ec0df --- /dev/null +++ b/qa/tasks/cephfs/test_fragment.py @@ -0,0 +1,359 @@ +from io import StringIO + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.orchestra import run + +import os +import time +import logging +log = logging.getLogger(__name__) + + +class TestFragmentation(CephFSTestCase): + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + + def get_splits(self): + return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_split'] + + def get_merges(self): + return self.fs.mds_asok(['perf', 'dump', 'mds'])['mds']['dir_merge'] + + def get_dir_ino(self, path): + dir_cache = self.fs.read_cache(path, 0) + dir_ino = None + dir_inono = self.mount_a.path_to_ino(path.strip("/")) + for ino in dir_cache: + if ino['ino'] == dir_inono: + dir_ino = ino + break + self.assertIsNotNone(dir_ino) + return dir_ino + + def _configure(self, **kwargs): + """ + Apply kwargs as MDS configuration settings, enable dirfrags + and restart the MDSs. + """ + + for k, v in kwargs.items(): + self.ceph_cluster.set_ceph_conf("mds", k, v.__str__()) + + self.mds_cluster.mds_fail_restart() + self.fs.wait_for_daemons() + + def test_oversize(self): + """ + That a directory is split when it becomes too large. + """ + + split_size = 20 + merge_size = 5 + + self._configure( + mds_bal_split_size=split_size, + mds_bal_merge_size=merge_size, + mds_bal_split_bits=1 + ) + + self.assertEqual(self.get_splits(), 0) + + self.mount_a.create_n_files("splitdir/file", split_size + 1) + + self.wait_until_true( + lambda: self.get_splits() == 1, + timeout=30 + ) + + frags = self.get_dir_ino("/splitdir")['dirfrags'] + self.assertEqual(len(frags), 2) + self.assertEqual(frags[0]['dirfrag'], "0x10000000000.0*") + self.assertEqual(frags[1]['dirfrag'], "0x10000000000.1*") + self.assertEqual( + sum([len(f['dentries']) for f in frags]), + split_size + 1 + ) + + self.assertEqual(self.get_merges(), 0) + + self.mount_a.run_shell(["rm", "-f", run.Raw("splitdir/file*")]) + + self.wait_until_true( + lambda: self.get_merges() == 1, + timeout=30 + ) + + self.assertEqual(len(self.get_dir_ino("/splitdir")["dirfrags"]), 1) + + def test_rapid_creation(self): + """ + That the fast-splitting limit of 1.5x normal limit is + applied when creating dentries quickly. + """ + + split_size = 100 + merge_size = 1 + + self._configure( + mds_bal_split_size=split_size, + mds_bal_merge_size=merge_size, + mds_bal_split_bits=3, + mds_bal_fragment_size_max=int(split_size * 1.5 + 2) + ) + + # We test this only at a single split level. If a client was sending + # IO so fast that it hit a second split before the first split + # was complete, it could violate mds_bal_fragment_size_max -- there + # is a window where the child dirfrags of a split are unfrozen + # (so they can grow), but still have STATE_FRAGMENTING (so they + # can't be split). + + # By writing 4x the split size when the split bits are set + # to 3 (i.e. 4-ways), I am reasonably sure to see precisely + # one split. The test is to check whether that split + # happens soon enough that the client doesn't exceed + # 2x the split_size (the "immediate" split mode should + # kick in at 1.5x the split size). + + self.assertEqual(self.get_splits(), 0) + self.mount_a.create_n_files("splitdir/file", split_size * 4) + self.wait_until_equal( + self.get_splits, + 1, + reject_fn=lambda s: s > 1, + timeout=30 + ) + + def test_deep_split(self): + """ + That when the directory grows many times larger than split size, + the fragments get split again. + """ + + split_size = 100 + merge_size = 1 # i.e. don't merge frag unless its empty + split_bits = 1 + + branch_factor = 2**split_bits + + # Arbitrary: how many levels shall we try fragmenting before + # ending the test? + max_depth = 5 + + self._configure( + mds_bal_split_size=split_size, + mds_bal_merge_size=merge_size, + mds_bal_split_bits=split_bits + ) + + # Each iteration we will create another level of fragments. The + # placement of dentries into fragments is by hashes (i.e. pseudo + # random), so we rely on statistics to get the behaviour that + # by writing about 1.5x as many dentries as the split_size times + # the number of frags, we will get them all to exceed their + # split size and trigger a split. + depth = 0 + files_written = 0 + splits_expected = 0 + while depth < max_depth: + log.info("Writing files for depth {0}".format(depth)) + target_files = branch_factor**depth * int(split_size * 1.5) + create_files = target_files - files_written + + self.ceph_cluster.mon_manager.raw_cluster_cmd("log", + "{0} Writing {1} files (depth={2})".format( + self.__class__.__name__, create_files, depth + )) + self.mount_a.create_n_files("splitdir/file_{0}".format(depth), + create_files) + self.ceph_cluster.mon_manager.raw_cluster_cmd("log", + "{0} Done".format(self.__class__.__name__)) + + files_written += create_files + log.info("Now have {0} files".format(files_written)) + + splits_expected += branch_factor**depth + log.info("Waiting to see {0} splits".format(splits_expected)) + try: + self.wait_until_equal( + self.get_splits, + splits_expected, + timeout=30, + reject_fn=lambda x: x > splits_expected + ) + + frags = self.get_dir_ino("/splitdir")['dirfrags'] + self.assertEqual(len(frags), branch_factor**(depth+1)) + self.assertEqual( + sum([len(f['dentries']) for f in frags]), + target_files + ) + except: + # On failures, log what fragmentation we actually ended + # up with. This block is just for logging, at the end + # we raise the exception again. + frags = self.get_dir_ino("/splitdir")['dirfrags'] + log.info("depth={0} splits_expected={1} files_written={2}".format( + depth, splits_expected, files_written + )) + log.info("Dirfrags:") + for f in frags: + log.info("{0}: {1}".format( + f['dirfrag'], len(f['dentries']) + )) + raise + + depth += 1 + + # Remember the inode number because we will be checking for + # objects later. + dir_inode_no = self.mount_a.path_to_ino("splitdir") + + self.mount_a.run_shell(["rm", "-rf", "splitdir/"]) + self.mount_a.umount_wait() + + self.fs.mds_asok(['flush', 'journal']) + + def _check_pq_finished(): + num_strays = self.fs.mds_asok(['perf', 'dump', 'mds_cache'])['mds_cache']['num_strays'] + pq_ops = self.fs.mds_asok(['perf', 'dump', 'purge_queue'])['purge_queue']['pq_executing'] + return num_strays == 0 and pq_ops == 0 + + # Wait for all strays to purge + self.wait_until_true( + lambda: _check_pq_finished(), + timeout=1200 + ) + # Check that the metadata pool objects for all the myriad + # child fragments are gone + metadata_objs = self.fs.radosmo(["ls"], stdout=StringIO()).strip() + frag_objs = [] + for o in metadata_objs.split("\n"): + if o.startswith("{0:x}.".format(dir_inode_no)): + frag_objs.append(o) + self.assertListEqual(frag_objs, []) + + def test_split_straydir(self): + """ + That stray dir is split when it becomes too large. + """ + def _count_fragmented(): + mdsdir_cache = self.fs.read_cache("~mdsdir", 1) + num = 0 + for ino in mdsdir_cache: + if ino["ino"] == 0x100: + continue + if len(ino["dirfrags"]) > 1: + log.info("straydir 0x{:X} is fragmented".format(ino["ino"])) + num += 1; + return num + + split_size = 50 + merge_size = 5 + split_bits = 1 + + self._configure( + mds_bal_split_size=split_size, + mds_bal_merge_size=merge_size, + mds_bal_split_bits=split_bits, + mds_bal_fragment_size_max=(split_size * 100) + ) + + # manually split/merge + self.assertEqual(_count_fragmented(), 0) + self.fs.mds_asok(["dirfrag", "split", "~mdsdir/stray8", "0/0", "1"]) + self.fs.mds_asok(["dirfrag", "split", "~mdsdir/stray9", "0/0", "1"]) + self.wait_until_true( + lambda: _count_fragmented() == 2, + timeout=30 + ) + + time.sleep(30) + + self.fs.mds_asok(["dirfrag", "merge", "~mdsdir/stray8", "0/0"]) + self.wait_until_true( + lambda: _count_fragmented() == 1, + timeout=30 + ) + + time.sleep(30) + + # auto merge + + # merging stray dirs is driven by MDCache::advance_stray() + # advance stray dir 10 times + for _ in range(10): + self.fs.mds_asok(['flush', 'journal']) + + self.wait_until_true( + lambda: _count_fragmented() == 0, + timeout=30 + ) + + # auto split + + # there are 10 stray dirs. advance stray dir 20 times + self.mount_a.create_n_files("testdir1/file", split_size * 20) + self.mount_a.run_shell(["mkdir", "testdir2"]) + testdir1_path = os.path.join(self.mount_a.mountpoint, "testdir1") + for i in self.mount_a.ls(testdir1_path): + self.mount_a.run_shell(["ln", "testdir1/{0}".format(i), "testdir2/"]) + + self.mount_a.umount_wait() + self.mount_a.mount_wait() + self.mount_a.wait_until_mounted() + + # flush journal and restart mds. after restart, testdir2 is not in mds' cache + self.fs.mds_asok(['flush', 'journal']) + self.mds_cluster.mds_fail_restart() + self.fs.wait_for_daemons() + # splitting stray dirs is driven by MDCache::advance_stray() + # advance stray dir after unlink 'split_size' files. + self.fs.mds_asok(['config', 'set', 'mds_log_events_per_segment', str(split_size)]) + + self.assertEqual(_count_fragmented(), 0) + self.mount_a.run_shell(["rm", "-rf", "testdir1"]) + self.wait_until_true( + lambda: _count_fragmented() > 0, + timeout=30 + ) + + def test_dir_merge_with_snap_items(self): + """ + That directory remain fragmented when snapshot items are taken into account. + """ + split_size = 1000 + merge_size = 100 + self._configure( + mds_bal_split_size=split_size, + mds_bal_merge_size=merge_size, + mds_bal_split_bits=1 + ) + + # split the dir + create_files = split_size + 50 + self.mount_a.create_n_files("splitdir/file_", create_files) + + self.wait_until_true( + lambda: self.get_splits() == 1, + timeout=30 + ) + + frags = self.get_dir_ino("/splitdir")['dirfrags'] + self.assertEqual(len(frags), 2) + self.assertEqual(frags[0]['dirfrag'], "0x10000000000.0*") + self.assertEqual(frags[1]['dirfrag'], "0x10000000000.1*") + self.assertEqual( + sum([len(f['dentries']) for f in frags]), create_files + ) + + self.assertEqual(self.get_merges(), 0) + + self.mount_a.run_shell(["mkdir", "splitdir/.snap/snap_a"]) + self.mount_a.run_shell(["mkdir", "splitdir/.snap/snap_b"]) + self.mount_a.run_shell(["rm", "-f", run.Raw("splitdir/file*")]) + + time.sleep(30) + + self.assertEqual(self.get_merges(), 0) + self.assertEqual(len(self.get_dir_ino("/splitdir")["dirfrags"]), 2) diff --git a/qa/tasks/cephfs/test_fscrypt.py b/qa/tasks/cephfs/test_fscrypt.py new file mode 100644 index 000000000..11dd2038f --- /dev/null +++ b/qa/tasks/cephfs/test_fscrypt.py @@ -0,0 +1,77 @@ +from logging import getLogger + +from io import StringIO +from tasks.cephfs.xfstests_dev import XFSTestsDev + + +log = getLogger(__name__) + + +class TestFscrypt(XFSTestsDev): + + def setup_xfsprogs_devs(self): + self.install_xfsprogs = True + + def require_kernel_mount(self): + from tasks.cephfs.fuse_mount import FuseMount + from tasks.cephfs.kernel_mount import KernelMount + + # TODO: make xfstests-dev compatible with ceph-fuse. xfstests-dev + # remounts CephFS before running tests using kernel, so ceph-fuse + # mounts are never actually tested. + if isinstance(self.mount_a, FuseMount): + self.skipTest('Requires kernel client; xfstests-dev not '\ + 'compatible with ceph-fuse ATM.') + elif isinstance(self.mount_a, KernelMount): + log.info('client is kernel mounted') + + def test_fscrypt_encrypt(self): + self.require_kernel_mount() + + # XXX: check_status is set to False so that we can check for command's + # failure on our own (since this command doesn't set right error code + # and error message in some cases) and print custom log messages + # accordingly. + proc = self.mount_a.client_remote.run(args=['sudo', 'env', 'DIFF_LENGTH=0', + './check', '-g', 'encrypt'], cwd=self.xfstests_repo_path, stdout=StringIO(), + stderr=StringIO(), timeout=900, check_status=False, omit_sudo=False, + label='running tests for encrypt from xfstests-dev') + + if proc.returncode != 0: + log.info('Command failed.') + log.info(f'Command return value: {proc.returncode}') + stdout, stderr = proc.stdout.getvalue(), proc.stderr.getvalue() + log.info(f'Command stdout -\n{stdout}') + log.info(f'Command stderr -\n{stderr}') + + # Currently only the 395,396,397,421,429,435,440,580,593,595 and 598 + # of the 26 test cases will be actually ran, all the others will be + # skipped for now because of not supporting features in kernel or kceph. + self.assertEqual(proc.returncode, 0) + self.assertIn('Passed all 26 tests', stdout) + + def test_fscrypt_dummy_encryption_with_quick_group(self): + self.require_kernel_mount() + + self.write_local_config('test_dummy_encryption') + + # XXX: check_status is set to False so that we can check for command's + # failure on our own (since this command doesn't set right error code + # and error message in some cases) and print custom log messages + # accordingly. This will take a long time and set the timeout to 3 hours. + proc = self.mount_a.client_remote.run(args=['sudo', 'env', 'DIFF_LENGTH=0', + './check', '-g', 'quick', '-E', './ceph.exclude'], cwd=self.xfstests_repo_path, + stdout=StringIO(), stderr=StringIO(), timeout=10800, check_status=False, + omit_sudo=False, label='running tests for dummy_encryption from xfstests-dev') + + if proc.returncode != 0: + log.info('Command failed.') + log.info(f'Command return value: {proc.returncode}') + stdout, stderr = proc.stdout.getvalue(), proc.stderr.getvalue() + log.info(f'Command stdout -\n{stdout}') + log.info(f'Command stderr -\n{stderr}') + + # Currently, many test cases will be skipped due to unsupported features, + # but still will be marked as successful. + self.assertEqual(proc.returncode, 0) + self.assertIn('Passed all ', stdout) diff --git a/qa/tasks/cephfs/test_fstop.py b/qa/tasks/cephfs/test_fstop.py new file mode 100644 index 000000000..ed76eaac2 --- /dev/null +++ b/qa/tasks/cephfs/test_fstop.py @@ -0,0 +1,114 @@ +import logging +import json + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError +from teuthology.contextutil import safe_while + +log = logging.getLogger(__name__) + + +class TestFSTop(CephFSTestCase): + CLIENTS_REQUIRED = 2 + + def setUp(self): + super(TestFSTop, self).setUp() + self._enable_mgr_stats_plugin() + + def tearDown(self): + self._disable_mgr_stats_plugin() + super(TestFSTop, self).tearDown() + + def _enable_mgr_stats_plugin(self): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats") + + def _disable_mgr_stats_plugin(self): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats") + + def _fstop_dump(self, *args): + return self.mount_a.run_shell(['cephfs-top', + '--id=admin', + *args]).stdout.getvalue() + + def _get_metrics(self, verifier_callback, trials, *args): + metrics = None + done = False + with safe_while(sleep=1, tries=trials, action='wait for metrics') as proceed: + while proceed(): + metrics = json.loads(self._fstop_dump(*args)) + done = verifier_callback(metrics) + if done: + break + return done, metrics + + # TESTS + def test_fstop_non_existent_cluster(self): + try: + self.mount_a.run_shell(['cephfs-top', + '--cluster=hpec', + '--id=admin', + '--selftest']) + except CommandFailedError: + pass + else: + raise RuntimeError('expected cephfs-top command to fail.') + + def test_fstop(self): + try: + self.mount_a.run_shell(['cephfs-top', + '--id=admin', + '--selftest']) + except CommandFailedError: + raise RuntimeError('cephfs-top --selftest failed') + + def test_dump(self): + """ + Tests 'cephfs-top --dump' output is valid + """ + def verify_fstop_metrics(metrics): + clients = metrics.get(self.fs.name, {}) + if str(self.mount_a.get_global_id()) in clients and \ + str(self.mount_b.get_global_id()) in clients: + return True + return False + + # validate + valid, metrics = self._get_metrics(verify_fstop_metrics, 30, '--dump') + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + def test_dumpfs(self): + """ + Tests 'cephfs-top --dumpfs' output is valid + """ + newfs_name = "cephfs_b" + + def verify_fstop_metrics(metrics): + clients = metrics.get(newfs_name, {}) + if self.fs.name not in metrics and \ + str(self.mount_b.get_global_id()) in clients: + return True + return False + + # umount mount_b, mount another filesystem on it and use --dumpfs filter + self.mount_b.umount_wait() + + self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", "enable_multiple", "true", + "--yes-i-really-mean-it") + + # create a new filesystem + fs_b = self.mds_cluster.newfs(name=newfs_name) + + # mount cephfs_b on mount_b + self.mount_b.mount_wait(cephfs_name=fs_b.name) + + # validate + valid, metrics = self._get_metrics(verify_fstop_metrics, 30, + '--dumpfs={}'.format(newfs_name)) + log.debug("metrics={0}".format(metrics)) + + # restore mount_b + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.fs.name) + + self.assertTrue(valid) diff --git a/qa/tasks/cephfs/test_full.py b/qa/tasks/cephfs/test_full.py new file mode 100644 index 000000000..2b3a7d5f9 --- /dev/null +++ b/qa/tasks/cephfs/test_full.py @@ -0,0 +1,398 @@ +import json +import logging +import os +from textwrap import dedent +from typing import Optional +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase + + +log = logging.getLogger(__name__) + + +class FullnessTestCase(CephFSTestCase): + CLIENTS_REQUIRED = 2 + + # Subclasses define whether they're filling whole cluster or just data pool + data_only = False + + # Subclasses define how many bytes should be written to achieve fullness + pool_capacity: Optional[int] = None + fill_mb = None + + def is_full(self): + return self.fs.is_full() + + def setUp(self): + CephFSTestCase.setUp(self) + + mds_status = self.fs.rank_asok(["status"]) + + # Capture the initial OSD map epoch for later use + self.initial_osd_epoch = mds_status['osdmap_epoch_barrier'] + + def test_barrier(self): + """ + That when an OSD epoch barrier is set on an MDS, subsequently + issued capabilities cause clients to update their OSD map to that + epoch. + """ + + # script that sync up client with MDS OSD map barrier. The barrier should + # be updated by cap flush ack message. + pyscript = dedent(""" + import os + fd = os.open("{path}", os.O_CREAT | os.O_RDWR, 0O600) + os.fchmod(fd, 0O666) + os.fsync(fd) + os.close(fd) + """) + + # Sync up client with initial MDS OSD map barrier. + path = os.path.join(self.mount_a.mountpoint, "foo") + self.mount_a.run_python(pyscript.format(path=path)) + + # Grab mounts' initial OSD epochs: later we will check that + # it hasn't advanced beyond this point. + mount_a_initial_epoch, mount_a_initial_barrier = self.mount_a.get_osd_epoch() + + # Freshly mounted at start of test, should be up to date with OSD map + self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch) + + # Set and unset a flag to cause OSD epoch to increment + self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause") + self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause") + + out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip() + new_epoch = json.loads(out)['epoch'] + self.assertNotEqual(self.initial_osd_epoch, new_epoch) + + # Do a metadata operation on clients, witness that they end up with + # the old OSD map from startup time (nothing has prompted client + # to update its map) + path = os.path.join(self.mount_a.mountpoint, "foo") + self.mount_a.run_python(pyscript.format(path=path)) + mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() + self.assertEqual(mount_a_epoch, mount_a_initial_epoch) + self.assertEqual(mount_a_barrier, mount_a_initial_barrier) + + # Set a barrier on the MDS + self.fs.rank_asok(["osdmap", "barrier", new_epoch.__str__()]) + + # Sync up client with new MDS OSD map barrier + path = os.path.join(self.mount_a.mountpoint, "baz") + self.mount_a.run_python(pyscript.format(path=path)) + mount_a_epoch, mount_a_barrier = self.mount_a.get_osd_epoch() + self.assertEqual(mount_a_barrier, new_epoch) + + # Some time passes here because the metadata part of the operation + # completes immediately, while the resulting OSD map update happens + # asynchronously (it's an Objecter::_maybe_request_map) as a result + # of seeing the new epoch barrier. + self.wait_until_true( + lambda: self.mount_a.get_osd_epoch()[0] >= new_epoch, + timeout=30) + + def _data_pool_name(self): + data_pool_names = self.fs.get_data_pool_names() + if len(data_pool_names) > 1: + raise RuntimeError("This test can't handle multiple data pools") + else: + return data_pool_names[0] + + def _test_full(self, easy_case): + """ + - That a client trying to write data to a file is prevented + from doing so with an -EFULL result + - That they are also prevented from creating new files by the MDS. + - That they may delete another file to get the system healthy again + + :param easy_case: if true, delete a successfully written file to + free up space. else, delete the file that experienced + the failed write. + """ + + osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd')) + + log.info("Writing {0}MB should fill this cluster".format(self.fill_mb)) + + # Fill up the cluster. This dd may or may not fail, as it depends on + # how soon the cluster recognises its own fullness + self.mount_a.write_n_mb("large_file_a", self.fill_mb // 2) + try: + self.mount_a.write_n_mb("large_file_b", (self.fill_mb * 1.1) // 2) + except CommandFailedError: + log.info("Writing file B failed (full status happened already)") + assert self.is_full() + else: + log.info("Writing file B succeeded (full status will happen soon)") + self.wait_until_true(lambda: self.is_full(), + timeout=osd_mon_report_interval * 120) + + # Attempting to write more data should give me ENOSPC + with self.assertRaises(CommandFailedError) as ar: + self.mount_a.write_n_mb("large_file_b", 50, seek=self.fill_mb // 2) + self.assertEqual(ar.exception.exitstatus, 1) # dd returns 1 on "No space" + + # Wait for the MDS to see the latest OSD map so that it will reliably + # be applying the policy of rejecting non-deletion metadata operations + # while in the full state. + osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] + self.wait_until_true( + lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch, + timeout=10) + + if not self.data_only: + with self.assertRaises(CommandFailedError): + self.mount_a.write_n_mb("small_file_1", 0) + + # Clear out some space + if easy_case: + self.mount_a.run_shell(['rm', '-f', 'large_file_a']) + self.mount_a.run_shell(['rm', '-f', 'large_file_b']) + else: + # In the hard case it is the file that filled the system. + # Before the new #7317 (ENOSPC, epoch barrier) changes, this + # would fail because the last objects written would be + # stuck in the client cache as objecter operations. + self.mount_a.run_shell(['rm', '-f', 'large_file_b']) + self.mount_a.run_shell(['rm', '-f', 'large_file_a']) + + # Here we are waiting for two things to happen: + # * The MDS to purge the stray folder and execute object deletions + # * The OSDs to inform the mon that they are no longer full + self.wait_until_true(lambda: not self.is_full(), + timeout=osd_mon_report_interval * 120) + + # Wait for the MDS to see the latest OSD map so that it will reliably + # be applying the free space policy + osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] + self.wait_until_true( + lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch, + timeout=10) + + # Now I should be able to write again + self.mount_a.write_n_mb("large_file", 50, seek=0) + + # Ensure that the MDS keeps its OSD epoch barrier across a restart + + def test_full_different_file(self): + self._test_full(True) + + def test_full_same_file(self): + self._test_full(False) + + def _remote_write_test(self, template): + """ + Run some remote python in a way that's useful for + testing free space behaviour (see test_* methods using this) + """ + file_path = os.path.join(self.mount_a.mountpoint, "full_test_file") + + # Enough to trip the full flag + osd_mon_report_interval = int(self.fs.get_config("osd_mon_report_interval", service_type='osd')) + mon_tick_interval = int(self.fs.get_config("mon_tick_interval", service_type="mon")) + + # Sufficient data to cause RADOS cluster to go 'full' + log.info("pool capacity {0}, {1}MB should be enough to fill it".format(self.pool_capacity, self.fill_mb)) + + # Long enough for RADOS cluster to notice it is full and set flag on mons + # (report_interval for mon to learn PG stats, tick interval for it to update OSD map, + # factor of 1.5 for I/O + network latency in committing OSD map and distributing it + # to the OSDs) + full_wait = (osd_mon_report_interval + mon_tick_interval) * 1.5 + + # Configs for this test should bring this setting down in order to + # run reasonably quickly + if osd_mon_report_interval > 10: + log.warning("This test may run rather slowly unless you decrease" + "osd_mon_report_interval (5 is a good setting)!") + + # set the object_size to 1MB to make the objects destributed more evenly + # among the OSDs to fix Tracker#45434 + file_layout = "stripe_unit=1048576 stripe_count=1 object_size=1048576" + self.mount_a.run_python(template.format( + fill_mb=self.fill_mb, + file_path=file_path, + file_layout=file_layout, + full_wait=full_wait, + is_fuse=isinstance(self.mount_a, FuseMount) + )) + + def test_full_fclose(self): + # A remote script which opens a file handle, fills up the filesystem, and then + # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync + remote_script = dedent(""" + import time + import datetime + import subprocess + import os + + # Write some buffered data through before going full, all should be well + print("writing some data through which we expect to succeed") + bytes = 0 + f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT) + os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}') + bytes += os.write(f, b'a' * 512 * 1024) + os.fsync(f) + print("fsync'ed data successfully, will now attempt to fill fs") + + # Okay, now we're going to fill up the filesystem, and then keep + # writing until we see an error from fsync. As long as we're doing + # buffered IO, the error should always only appear from fsync and not + # from write + full = False + + for n in range(0, int({fill_mb} * 0.9)): + bytes += os.write(f, b'x' * 1024 * 1024) + print("wrote {{0}} bytes via buffered write, may repeat".format(bytes)) + print("done writing {{0}} bytes".format(bytes)) + + # OK, now we should sneak in under the full condition + # due to the time it takes the OSDs to report to the + # mons, and get a successful fsync on our full-making data + os.fsync(f) + print("successfully fsync'ed prior to getting full state reported") + + # buffered write, add more dirty data to the buffer + print("starting buffered write") + try: + for n in range(0, int({fill_mb} * 0.2)): + bytes += os.write(f, b'x' * 1024 * 1024) + print("sleeping a bit as we've exceeded 90% of our expected full ratio") + time.sleep({full_wait}) + except OSError: + pass; + + print("wrote, now waiting 30s and then doing a close we expect to fail") + + # Wait long enough for a background flush that should fail + time.sleep(30) + + if {is_fuse}: + # ...and check that the failed background flush is reflected in fclose + try: + os.close(f) + except OSError: + print("close() returned an error as expected") + else: + raise RuntimeError("close() failed to raise error") + else: + # The kernel cephfs client does not raise errors on fclose + os.close(f) + + os.unlink("{file_path}") + """) + self._remote_write_test(remote_script) + + def test_full_fsync(self): + """ + That when the full flag is encountered during asynchronous + flushes, such that an fwrite() succeeds but an fsync/fclose() + should return the ENOSPC error. + """ + + # A remote script which opens a file handle, fills up the filesystem, and then + # checks that ENOSPC errors on buffered writes appear correctly as errors in fsync + remote_script = dedent(""" + import time + import datetime + import subprocess + import os + + # Write some buffered data through before going full, all should be well + print("writing some data through which we expect to succeed") + bytes = 0 + f = os.open("{file_path}", os.O_WRONLY | os.O_CREAT) + os.setxattr("{file_path}", 'ceph.file.layout', b'{file_layout}') + bytes += os.write(f, b'a' * 4096) + os.fsync(f) + print("fsync'ed data successfully, will now attempt to fill fs") + + # Okay, now we're going to fill up the filesystem, and then keep + # writing until we see an error from fsync. As long as we're doing + # buffered IO, the error should always only appear from fsync and not + # from write + full = False + + for n in range(0, int({fill_mb} * 1.1)): + try: + bytes += os.write(f, b'x' * 1024 * 1024) + print("wrote bytes via buffered write, moving on to fsync") + except OSError as e: + if {is_fuse}: + print("Unexpected error %s from write() instead of fsync()" % e) + raise + else: + print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0))) + full = True + break + + try: + os.fsync(f) + print("fsync'ed successfully") + except OSError as e: + print("Reached fullness after %.2f MB" % (bytes / (1024.0 * 1024.0))) + full = True + break + else: + print("Not full yet after %.2f MB" % (bytes / (1024.0 * 1024.0))) + + if n > {fill_mb} * 0.9: + # Be cautious in the last region where we expect to hit + # the full condition, so that we don't overshoot too dramatically + print("sleeping a bit as we've exceeded 90% of our expected full ratio") + time.sleep({full_wait}) + + if not full: + raise RuntimeError("Failed to reach fullness after writing %d bytes" % bytes) + + # close() should not raise an error because we already caught it in + # fsync. There shouldn't have been any more writeback errors + # since then because all IOs got cancelled on the full flag. + print("calling close") + os.close(f) + print("close() did not raise error") + + os.unlink("{file_path}") + """) + + self._remote_write_test(remote_script) + + +class TestQuotaFull(FullnessTestCase): + """ + Test per-pool fullness, which indicates quota limits exceeded + """ + pool_capacity = 1024 * 1024 * 32 # arbitrary low-ish limit + fill_mb = pool_capacity // (1024 * 1024) # type: ignore + + # We are only testing quota handling on the data pool, not the metadata + # pool. + data_only = True + + def setUp(self): + super(TestQuotaFull, self).setUp() + + pool_name = self.fs.get_data_pool_name() + self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name, + "max_bytes", "{0}".format(self.pool_capacity)) + + +class TestClusterFull(FullnessTestCase): + """ + Test data pool fullness, which indicates that an OSD has become too full + """ + pool_capacity = None + REQUIRE_MEMSTORE = True + + def setUp(self): + super(TestClusterFull, self).setUp() + + if self.pool_capacity is None: + TestClusterFull.pool_capacity = self.fs.get_pool_df(self._data_pool_name())['max_avail'] + TestClusterFull.fill_mb = (self.pool_capacity // (1024 * 1024)) + +# Hide the parent class so that unittest.loader doesn't try to run it. +del globals()['FullnessTestCase'] diff --git a/qa/tasks/cephfs/test_journal_migration.py b/qa/tasks/cephfs/test_journal_migration.py new file mode 100644 index 000000000..67b514c22 --- /dev/null +++ b/qa/tasks/cephfs/test_journal_migration.py @@ -0,0 +1,100 @@ + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.workunit import task as workunit + +JOURNAL_FORMAT_LEGACY = 0 +JOURNAL_FORMAT_RESILIENT = 1 + + +class TestJournalMigration(CephFSTestCase): + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 2 + + def test_journal_migration(self): + old_journal_version = JOURNAL_FORMAT_LEGACY + new_journal_version = JOURNAL_FORMAT_RESILIENT + + self.mount_a.umount_wait() + self.fs.mds_stop() + + # Create a filesystem using the older journal format. + self.fs.set_ceph_conf('mds', 'mds journal format', old_journal_version) + self.fs.mds_restart() + self.fs.recreate() + + # Enable standby replay, to cover the bug case #8811 where + # a standby replay might mistakenly end up trying to rewrite + # the journal at the same time as an active daemon. + self.fs.set_allow_standby_replay(True) + + status = self.fs.wait_for_daemons() + + self.assertTrue(self.fs.get_replay(status=status) is not None) + + # Do some client work so that the log is populated with something. + with self.mount_a.mounted_wait(): + self.mount_a.create_files() + self.mount_a.check_files() # sanity, this should always pass + + # Run a more substantial workunit so that the length of the log to be + # coverted is going span at least a few segments + workunit(self.ctx, { + 'clients': { + "client.{0}".format(self.mount_a.client_id): ["suites/fsstress.sh"], + }, + "timeout": "3h" + }) + + # Modify the ceph.conf to ask the MDS to use the new journal format. + self.fs.set_ceph_conf('mds', 'mds journal format', new_journal_version) + + # Restart the MDS. + self.fs.mds_fail_restart() + + # This ensures that all daemons come up into a valid state + status = self.fs.wait_for_daemons() + + # Check that files created in the initial client workload are still visible + # in a client mount. + with self.mount_a.mounted_wait(): + self.mount_a.check_files() + + # Verify that the journal really has been rewritten. + journal_version = self.fs.get_journal_version() + if journal_version != new_journal_version: + raise RuntimeError("Journal was not upgraded, version should be {0} but is {1}".format( + new_journal_version, journal_version() + )) + + # Verify that cephfs-journal-tool can now read the rewritten journal + inspect_out = self.fs.journal_tool(["journal", "inspect"], 0) + if not inspect_out.endswith(": OK"): + raise RuntimeError("Unexpected journal-tool result: '{0}'".format( + inspect_out + )) + + self.fs.journal_tool(["event", "get", "json", + "--path", "/tmp/journal.json"], 0) + p = self.fs.tool_remote.sh([ + "python3", + "-c", + "import json; print(len(json.load(open('/tmp/journal.json'))))" + ]) + event_count = int(p.strip()) + if event_count < 1000: + # Approximate value of "lots", expected from having run fsstress + raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count)) + + # Do some client work to check that writing the log is still working + with self.mount_a.mounted_wait(): + workunit(self.ctx, { + 'clients': { + "client.{0}".format(self.mount_a.client_id): ["fs/misc/trivial_sync.sh"], + }, + "timeout": "3h" + }) + + # Check that both an active and a standby replay are still up + status = self.fs.status() + self.assertEqual(len(list(self.fs.get_replays(status=status))), 1) + self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1) diff --git a/qa/tasks/cephfs/test_journal_repair.py b/qa/tasks/cephfs/test_journal_repair.py new file mode 100644 index 000000000..c5769784d --- /dev/null +++ b/qa/tasks/cephfs/test_journal_repair.py @@ -0,0 +1,405 @@ + +""" +Test our tools for recovering the content of damaged journals +""" + +import json +import logging +from textwrap import dedent +import time + +from teuthology.exceptions import CommandFailedError, ConnectionLostError +from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO +from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology +from tasks.workunit import task as workunit + +log = logging.getLogger(__name__) + + +class TestJournalRepair(CephFSTestCase): + MDSS_REQUIRED = 2 + + def test_inject_to_empty(self): + """ + That when some dentries in the journal but nothing is in + the backing store, we correctly populate the backing store + from the journalled dentries. + """ + + # Inject metadata operations + self.mount_a.run_shell(["touch", "rootfile"]) + self.mount_a.run_shell(["mkdir", "subdir"]) + self.mount_a.run_shell(["touch", "subdir/subdirfile"]) + # There are several different paths for handling hardlinks, depending + # on whether an existing dentry (being overwritten) is also a hardlink + self.mount_a.run_shell(["mkdir", "linkdir"]) + + # Test inode -> remote transition for a dentry + self.mount_a.run_shell(["touch", "linkdir/link0"]) + self.mount_a.run_shell(["rm", "-f", "linkdir/link0"]) + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link0"]) + + # Test nothing -> remote transition + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link1"]) + + # Test remote -> inode transition + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link2"]) + self.mount_a.run_shell(["rm", "-f", "linkdir/link2"]) + self.mount_a.run_shell(["touch", "linkdir/link2"]) + + # Test remote -> diff remote transition + self.mount_a.run_shell(["ln", "subdir/subdirfile", "linkdir/link3"]) + self.mount_a.run_shell(["rm", "-f", "linkdir/link3"]) + self.mount_a.run_shell(["ln", "rootfile", "linkdir/link3"]) + + # Test an empty directory + self.mount_a.run_shell(["mkdir", "subdir/subsubdir"]) + self.mount_a.run_shell(["sync"]) + + # Before we unmount, make a note of the inode numbers, later we will + # check that they match what we recover from the journal + rootfile_ino = self.mount_a.path_to_ino("rootfile") + subdir_ino = self.mount_a.path_to_ino("subdir") + linkdir_ino = self.mount_a.path_to_ino("linkdir") + subdirfile_ino = self.mount_a.path_to_ino("subdir/subdirfile") + subsubdir_ino = self.mount_a.path_to_ino("subdir/subsubdir") + + self.mount_a.umount_wait() + + # Stop the MDS + self.fs.fail() + + # Now, the journal should contain the operations, but the backing + # store shouldn't + with self.assertRaises(ObjectNotFound): + self.fs.list_dirfrag(subdir_ino) + self.assertEqual(self.fs.list_dirfrag(ROOT_INO), []) + + # Execute the dentry recovery, this should populate the backing store + self.fs.journal_tool(['event', 'recover_dentries', 'list'], 0) + + # Dentries in ROOT_INO are present + self.assertEqual(sorted(self.fs.list_dirfrag(ROOT_INO)), sorted(['rootfile_head', 'subdir_head', 'linkdir_head'])) + self.assertEqual(self.fs.list_dirfrag(subdir_ino), ['subdirfile_head', 'subsubdir_head']) + self.assertEqual(sorted(self.fs.list_dirfrag(linkdir_ino)), + sorted(['link0_head', 'link1_head', 'link2_head', 'link3_head'])) + + # Now check the MDS can read what we wrote: truncate the journal + # and start the mds. + self.fs.journal_tool(['journal', 'reset'], 0) + self.fs.set_joinable() + self.fs.wait_for_daemons() + + # List files + self.mount_a.mount_wait() + + # First ls -R to populate MDCache, such that hardlinks will + # resolve properly (recover_dentries does not create backtraces, + # so ordinarily hardlinks to inodes that happen not to have backtraces + # will be invisible in readdir). + # FIXME: hook in forward scrub here to regenerate backtraces + proc = self.mount_a.run_shell(['ls', '-R']) + self.mount_a.umount_wait() # remount to clear client cache before our second ls + self.mount_a.mount_wait() + + proc = self.mount_a.run_shell(['ls', '-R']) + self.assertEqual(proc.stdout.getvalue().strip(), + dedent(""" + .: + linkdir + rootfile + subdir + + ./linkdir: + link0 + link1 + link2 + link3 + + ./subdir: + subdirfile + subsubdir + + ./subdir/subsubdir: + """).strip()) + + # Check the correct inos were preserved by path + self.assertEqual(rootfile_ino, self.mount_a.path_to_ino("rootfile")) + self.assertEqual(subdir_ino, self.mount_a.path_to_ino("subdir")) + self.assertEqual(subdirfile_ino, self.mount_a.path_to_ino("subdir/subdirfile")) + self.assertEqual(subsubdir_ino, self.mount_a.path_to_ino("subdir/subsubdir")) + + # Check that the hard link handling came out correctly + self.assertEqual(self.mount_a.path_to_ino("linkdir/link0"), subdirfile_ino) + self.assertEqual(self.mount_a.path_to_ino("linkdir/link1"), subdirfile_ino) + self.assertNotEqual(self.mount_a.path_to_ino("linkdir/link2"), subdirfile_ino) + self.assertEqual(self.mount_a.path_to_ino("linkdir/link3"), rootfile_ino) + + # Create a new file, ensure it is not issued the same ino as one of the + # recovered ones + self.mount_a.run_shell(["touch", "afterwards"]) + new_ino = self.mount_a.path_to_ino("afterwards") + self.assertNotIn(new_ino, [rootfile_ino, subdir_ino, subdirfile_ino]) + + # Check that we can do metadata ops in the recovered directory + self.mount_a.run_shell(["touch", "subdir/subsubdir/subsubdirfile"]) + + @for_teuthology # 308s + def test_reset(self): + """ + That after forcibly modifying the backing store, we can get back into + a good state by resetting the MDSMap. + + The scenario is that we have two active MDSs, and we lose the journals. Once + we have completely lost confidence in the integrity of the metadata, we want to + return the system to a single-MDS state to go into a scrub to recover what we + can. + """ + + # Set max_mds to 2 + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + rank0_gid = self.fs.get_rank(rank=0, status=status)['gid'] + self.fs.set_joinable(False) # no unintended failover + + # Create a dir on each rank + self.mount_a.run_shell_payload("mkdir {alpha,bravo} && touch {alpha,bravo}/file") + self.mount_a.setfattr("alpha/", "ceph.dir.pin", "0") + self.mount_a.setfattr("bravo/", "ceph.dir.pin", "1") + + # Ensure the pinning has taken effect and the /bravo dir is now + # migrated to rank 1. + self._wait_subtrees([('/bravo', 1), ('/alpha', 0)], rank=0, status=status) + + # Do some IO (this should be split across ranks according to + # the rank-pinned dirs) + self.mount_a.create_n_files("alpha/file", 1000) + self.mount_a.create_n_files("bravo/file", 1000) + + # Flush the journals so that we have some backing store data + # belonging to one MDS, and some to the other MDS. + self.fs.rank_asok(["flush", "journal"], rank=0) + self.fs.rank_asok(["flush", "journal"], rank=1) + + # Stop (hard) the second MDS daemon + self.fs.rank_fail(rank=1) + + # Wipe out the tables for MDS rank 1 so that it is broken and can't start + # (this is the simulated failure that we will demonstrate that the disaster + # recovery tools can get us back from) + self.fs.erase_metadata_objects(prefix="mds1_") + + # Try to access files from the client + blocked_ls = self.mount_a.run_shell(["ls", "-R"], wait=False) + + # Check that this "ls -R" blocked rather than completing: indicates + # it got stuck trying to access subtrees which were on the now-dead MDS. + log.info("Sleeping to check ls is blocked...") + time.sleep(60) + self.assertFalse(blocked_ls.finished) + + # This mount is now useless because it will depend on MDS rank 1, and MDS rank 1 + # is not coming back. Kill it. + log.info("Killing mount, it's blocked on the MDS we killed") + self.mount_a.kill() + self.mount_a.kill_cleanup() + try: + # Now that the mount is dead, the ls -R should error out. + blocked_ls.wait() + except (CommandFailedError, ConnectionLostError): + # The ConnectionLostError case is for kernel client, where + # killing the mount also means killing the node. + pass + + # See that the second MDS will crash when it starts and tries to + # acquire rank 1 + self.fs.set_joinable(True) + + # The daemon taking the damaged rank should start starting, then + # restart back into standby after asking the mon to mark the rank + # damaged. + def is_marked_damaged(): + mds_map = self.fs.get_mds_map() + return 1 in mds_map['damaged'] + + self.wait_until_true(is_marked_damaged, 60) + self.assertEqual(rank0_gid, self.fs.get_rank(rank=0)['gid']) + + # Now give up and go through a disaster recovery procedure + self.fs.fail() + # Invoke recover_dentries quietly, because otherwise log spews millions of lines + self.fs.journal_tool(["event", "recover_dentries", "summary"], 0, quiet=True) + self.fs.journal_tool(["event", "recover_dentries", "summary"], 1, quiet=True) + self.fs.table_tool(["0", "reset", "session"]) + self.fs.journal_tool(["journal", "reset"], 0) + self.fs.erase_mds_objects(1) + self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name, + '--yes-i-really-mean-it') + + # Bring an MDS back online, mount a client, and see that we can walk the full + # filesystem tree again + self.fs.set_joinable(True) # redundant with `fs reset` + status = self.fs.wait_for_daemons() + self.assertEqual(len(list(self.fs.get_ranks(status=status))), 1) + self.mount_a.mount_wait() + self.mount_a.run_shell(["ls", "-R"], wait=True) + + def test_table_tool(self): + self.mount_a.run_shell(["touch", "foo"]) + self.fs.rank_asok(["flush", "journal"]) + + log.info(self.fs.table_tool(["all", "show", "inode"])) + log.info(self.fs.table_tool(["all", "show", "snap"])) + log.info(self.fs.table_tool(["all", "show", "session"])) + + # Inode table should always be the same because initial state + # and choice of inode are deterministic. + # Should see one inode consumed + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "inode"])), + {"0": { + "data": { + "version": 2, + "inotable": { + "projected_free": [ + {"start": 1099511628777, + "len": 1099511626775}], + "free": [ + {"start": 1099511628777, + "len": 1099511626775}]}}, + "result": 0}} + + ) + + # Should see one session + session_data = json.loads(self.fs.table_tool( + ["all", "show", "session"])) + self.assertEqual(len(session_data["0"]["data"]["sessions"]), 1) + self.assertEqual(session_data["0"]["result"], 0) + + # Should see no snaps + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "snap"])), + {"version": 1, + "snapserver": {"last_snap": 1, + "last_created": 1, + "last_destroyed": 1, + "pending_noop": [], + "snaps": [], + "need_to_purge": {}, + "pending_update": [], + "pending_destroy": []}, + "result": 0} + ) + + # Reset everything + for table in ["session", "inode", "snap"]: + self.fs.table_tool(["all", "reset", table]) + + log.info(self.fs.table_tool(["all", "show", "inode"])) + log.info(self.fs.table_tool(["all", "show", "snap"])) + log.info(self.fs.table_tool(["all", "show", "session"])) + + # Should see 0 sessions + session_data = json.loads(self.fs.table_tool( + ["all", "show", "session"])) + self.assertEqual(len(session_data["0"]["data"]["sessions"]), 0) + self.assertEqual(session_data["0"]["result"], 0) + + # Should see entire inode range now marked free + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "inode"])), + {"0": {"data": {"version": 1, + "inotable": {"projected_free": [ + {"start": 1099511627776, + "len": 1099511627776}], + "free": [ + {"start": 1099511627776, + "len": 1099511627776}]}}, + "result": 0}} + ) + + # Should see no snaps + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "snap"])), + {"version": 1, + "snapserver": {"last_snap": 1, + "last_created": 1, + "last_destroyed": 1, + "pending_noop": [], + "snaps": [], + "need_to_purge": {}, + "pending_update": [], + "pending_destroy": []}, + "result": 0} + ) + + def test_table_tool_take_inos(self): + initial_range_start = 1099511627776 + initial_range_len = 1099511627776 + # Initially a completely clear range + self.assertEqual( + json.loads(self.fs.table_tool(["all", "show", "inode"])), + {"0": {"data": {"version": 0, + "inotable": {"projected_free": [ + {"start": initial_range_start, + "len": initial_range_len}], + "free": [ + {"start": initial_range_start, + "len": initial_range_len}]}}, + "result": 0}} + ) + + # Remove some + self.assertEqual( + json.loads(self.fs.table_tool(["all", "take_inos", "{0}".format(initial_range_start + 100)])), + {"0": {"data": {"version": 1, + "inotable": {"projected_free": [ + {"start": initial_range_start + 101, + "len": initial_range_len - 101}], + "free": [ + {"start": initial_range_start + 101, + "len": initial_range_len - 101}]}}, + "result": 0}} + ) + + @for_teuthology # Hack: "for_teuthology" because .sh doesn't work outside teuth + def test_journal_smoke(self): + workunit(self.ctx, { + 'clients': { + "client.{0}".format(self.mount_a.client_id): [ + "fs/misc/trivial_sync.sh"], + }, + "timeout": "1h" + }) + + for mount in self.mounts: + mount.umount_wait() + + self.fs.fail() + + # journal tool smoke + workunit(self.ctx, { + 'clients': { + "client.{0}".format(self.mount_a.client_id): [ + "suites/cephfs_journal_tool_smoke.sh"], + }, + "timeout": "1h" + }) + + + + self.fs.set_joinable() + self.fs.wait_for_daemons() + + self.mount_a.mount_wait() + + # trivial sync moutn a + workunit(self.ctx, { + 'clients': { + "client.{0}".format(self.mount_a.client_id): [ + "fs/misc/trivial_sync.sh"], + }, + "timeout": "1h" + }) + diff --git a/qa/tasks/cephfs/test_mantle.py b/qa/tasks/cephfs/test_mantle.py new file mode 100644 index 000000000..746c2ffe3 --- /dev/null +++ b/qa/tasks/cephfs/test_mantle.py @@ -0,0 +1,111 @@ +from io import StringIO + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +import json +import logging + +log = logging.getLogger(__name__) +failure = "using old balancer; mantle failed for balancer=" +success = "mantle balancer version changed: " + +class TestMantle(CephFSTestCase): + def start_mantle(self): + self.wait_for_health_clear(timeout=30) + self.fs.set_max_mds(2) + self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30, + reject_fn=lambda v: v > 2 or v < 1) + + for m in self.fs.get_active_names(): + self.fs.mds_asok(['config', 'set', 'debug_objecter', '20'], mds_id=m) + self.fs.mds_asok(['config', 'set', 'debug_ms', '0'], mds_id=m) + self.fs.mds_asok(['config', 'set', 'debug_mds', '0'], mds_id=m) + self.fs.mds_asok(['config', 'set', 'debug_mds_balancer', '5'], mds_id=m) + + def push_balancer(self, obj, lua_code, expect): + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', obj) + self.fs.radosm(["put", obj, "-"], stdin=StringIO(lua_code)) + with self.assert_cluster_log(failure + obj + " " + expect): + log.info("run a " + obj + " balancer that expects=" + expect) + + def test_version_empty(self): + self.start_mantle() + expect = " : (2) No such file or directory" + + ret = self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer') + assert(ret == 22) # EINVAL + + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', " ") + with self.assert_cluster_log(failure + " " + expect): pass + + def test_version_not_in_rados(self): + self.start_mantle() + expect = failure + "ghost.lua : (2) No such file or directory" + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua") + with self.assert_cluster_log(expect): pass + + def test_balancer_invalid(self): + self.start_mantle() + expect = ": (22) Invalid argument" + + lua_code = "this is invalid lua code!" + self.push_balancer("invalid.lua", lua_code, expect) + + lua_code = "BAL_LOG()" + self.push_balancer("invalid_log.lua", lua_code, expect) + + lua_code = "BAL_LOG(0)" + self.push_balancer("invalid_log_again.lua", lua_code, expect) + + def test_balancer_valid(self): + self.start_mantle() + lua_code = "BAL_LOG(0, \"test\")\nreturn {3, 4}" + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") + self.fs.radosm(["put", "valid.lua", "-"], stdin=StringIO(lua_code)) + with self.assert_cluster_log(success + "valid.lua"): + log.info("run a valid.lua balancer") + + def test_return_invalid(self): + self.start_mantle() + expect = ": (22) Invalid argument" + + lua_code = "return \"hello\"" + self.push_balancer("string.lua", lua_code, expect) + + lua_code = "return 3" + self.push_balancer("number.lua", lua_code, expect) + + lua_code = "return {}" + self.push_balancer("dict_empty.lua", lua_code, expect) + + lua_code = "return {\"this\", \"is\", \"a\", \"test\"}" + self.push_balancer("dict_of_strings.lua", lua_code, expect) + + lua_code = "return {3, \"test\"}" + self.push_balancer("dict_of_mixed.lua", lua_code, expect) + + lua_code = "return {3}" + self.push_balancer("not_enough_numbers.lua", lua_code, expect) + + lua_code = "return {3, 4, 5, 6, 7, 8, 9}" + self.push_balancer("too_many_numbers.lua", lua_code, expect) + + def test_dead_osd(self): + self.start_mantle() + expect = " : (110) Connection timed out" + + # kill the OSDs so that the balancer pull from RADOS times out + osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty')) + for i in range(0, len(osd_map['osds'])): + self.fs.mon_manager.raw_cluster_cmd_result('osd', 'down', str(i)) + self.fs.mon_manager.raw_cluster_cmd_result('osd', 'out', str(i)) + + # trigger a pull from RADOS + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") + + # make the timeout a little longer since dead OSDs spam ceph -w + with self.assert_cluster_log(failure + "valid.lua" + expect, timeout=30): + log.info("run a balancer that should timeout") + + # cleanup + for i in range(0, len(osd_map['osds'])): + self.fs.mon_manager.raw_cluster_cmd_result('osd', 'in', str(i)) diff --git a/qa/tasks/cephfs/test_mds_metrics.py b/qa/tasks/cephfs/test_mds_metrics.py new file mode 100644 index 000000000..ad877f622 --- /dev/null +++ b/qa/tasks/cephfs/test_mds_metrics.py @@ -0,0 +1,643 @@ +import os +import json +import time +import random +import logging +import errno + +from teuthology.contextutil import safe_while, MaxWhileTries +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + +class TestMDSMetrics(CephFSTestCase): + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 3 + + TEST_DIR_PERFIX = "test_mds_metrics" + + def setUp(self): + super(TestMDSMetrics, self).setUp() + self._start_with_single_active_mds() + self._enable_mgr_stats_plugin() + + def tearDown(self): + self._disable_mgr_stats_plugin() + super(TestMDSMetrics, self).tearDown() + + def _start_with_single_active_mds(self): + curr_max_mds = self.fs.get_var('max_mds') + if curr_max_mds > 1: + self.fs.shrink(1) + + def verify_mds_metrics(self, active_mds_count=1, client_count=1, ranks=[], mul_fs=[]): + def verify_metrics_cbk(metrics): + mds_metrics = metrics['metrics'] + if not len(mds_metrics) == active_mds_count + 1: # n active mdss + delayed set + return False + fs_status = self.fs.status() + nonlocal ranks, mul_fs + if not ranks: + if not mul_fs: + mul_fs = [self.fs.id] + for filesystem in mul_fs: + ranks = set([info['rank'] for info in fs_status.get_ranks(filesystem)]) + for rank in ranks: + r = mds_metrics.get("mds.{}".format(rank), None) + if not r or not len(mds_metrics['delayed_ranks']) == 0: + return False + for item in mul_fs: + key = fs_status.get_fsmap(item)['mdsmap']['fs_name'] + global_metrics = metrics['global_metrics'].get(key, {}) + client_metadata = metrics['client_metadata'].get(key, {}) + if not len(global_metrics) >= client_count or not len(client_metadata) >= client_count: + return False + return True + return verify_metrics_cbk + + def _fs_perf_stats(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", *args) + + def _enable_mgr_stats_plugin(self): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats") + + def _disable_mgr_stats_plugin(self): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats") + + def _spread_directory_on_all_ranks(self, fscid): + fs_status = self.fs.status() + ranks = set([info['rank'] for info in fs_status.get_ranks(fscid)]) + # create a per-rank pinned directory + for rank in ranks: + dirname = "{0}_{1}".format(TestMDSMetrics.TEST_DIR_PERFIX, rank) + self.mount_a.run_shell(["mkdir", dirname]) + self.mount_a.setfattr(dirname, "ceph.dir.pin", str(rank)) + log.info("pinning directory {0} to rank {1}".format(dirname, rank)) + for i in range(16): + filename = "{0}.{1}".format("test", i) + self.mount_a.write_n_mb(os.path.join(dirname, filename), 1) + + def _do_spread_io(self, fscid): + # spread readdir I/O + self.mount_b.run_shell(["find", "."]) + + def _do_spread_io_all_clients(self, fscid): + # spread readdir I/O + self.mount_a.run_shell(["find", "."]) + self.mount_b.run_shell(["find", "."]) + + def _cleanup_test_dirs(self): + dirnames = self.mount_a.run_shell(["ls"]).stdout.getvalue() + for dirname in dirnames.split("\n"): + if dirname.startswith(TestMDSMetrics.TEST_DIR_PERFIX): + log.info("cleaning directory {}".format(dirname)) + self.mount_a.run_shell(["rm", "-rf", dirname]) + + def _get_metrics(self, verifier_callback, trials, *args): + metrics = None + done = False + with safe_while(sleep=1, tries=trials, action='wait for metrics') as proceed: + while proceed(): + metrics = json.loads(self._fs_perf_stats(*args)) + done = verifier_callback(metrics) + if done: + break + return done, metrics + + def _setup_fs(self, fs_name): + fs_a = self.mds_cluster.newfs(name=fs_name) + + self.mds_cluster.mds_restart() + + # Wait for filesystem to go healthy + fs_a.wait_for_daemons() + + # Reconfigure client auth caps + for mount in self.mounts: + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', f"client.{mount.client_id}", + 'mds', 'allow', + 'mon', 'allow r', + 'osd', f'allow rw pool={fs_a.get_data_pool_name()}') + + return fs_a + + # basic check to verify if we get back metrics from each active mds rank + + def test_metrics_from_rank(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + def test_metrics_post_client_disconnection(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + self.mount_a.umount_wait() + + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED - 1), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + def test_metrics_mds_grow(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # grow the mds cluster + self.fs.grow(2) + + fscid = self.fs.id + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics(self.verify_mds_metrics( + active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED) , 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # cleanup test directories + self._cleanup_test_dirs() + + def test_metrics_mds_grow_and_shrink(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # grow the mds cluster + self.fs.grow(2) + + fscid = self.fs.id + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # shrink mds cluster + self.fs.shrink(1) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # cleanup test directories + self._cleanup_test_dirs() + + def test_delayed_metrics(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # grow the mds cluster + self.fs.grow(2) + + fscid = self.fs.id + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # do not give this mds any chance + delayed_rank = 1 + mds_id_rank0 = self.fs.get_rank(rank=0)['name'] + mds_id_rank1 = self.fs.get_rank(rank=1)['name'] + + self.fs.set_inter_mds_block(True, mds_id_rank0, mds_id_rank1) + + def verify_delayed_metrics(metrics): + mds_metrics = metrics['metrics'] + r = mds_metrics.get("mds.{}".format(delayed_rank), None) + if not r or not delayed_rank in mds_metrics['delayed_ranks']: + return False + return True + # validate + valid, metrics = self._get_metrics(verify_delayed_metrics, 30) + log.debug("metrics={0}".format(metrics)) + + self.assertTrue(valid) + self.fs.set_inter_mds_block(False, mds_id_rank0, mds_id_rank1) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # cleanup test directories + self._cleanup_test_dirs() + + def test_query_mds_filter(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # grow the mds cluster + self.fs.grow(2) + + fscid = self.fs.id + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + filtered_mds = 1 + def verify_filtered_mds_rank_metrics(metrics): + # checks if the metrics has only client_metadata and + # global_metrics filtered using --mds_rank=1 + global_metrics = metrics['global_metrics'].get(self.fs.name, {}) + client_metadata = metrics['client_metadata'].get(self.fs.name, {}) + mds_metrics = metrics['metrics'] + if len(mds_metrics) != 2 or f"mds.{filtered_mds}" not in mds_metrics: + return False + if len(global_metrics) > TestMDSMetrics.CLIENTS_REQUIRED or\ + len(client_metadata) > TestMDSMetrics.CLIENTS_REQUIRED: + return False + if len(set(global_metrics) - set(mds_metrics[f"mds.{filtered_mds}"])) or\ + len(set(client_metadata) - set(mds_metrics[f"mds.{filtered_mds}"])): + return False + return True + # initiate a new query with `--mds_rank` filter and validate if + # we get metrics *only* from that mds. + valid, metrics = self._get_metrics(verify_filtered_mds_rank_metrics, 30, + f'--mds_rank={filtered_mds}') + log.debug(f"metrics={metrics}") + self.assertTrue(valid, "Incorrect 'ceph fs perf stats' output" + f" with filter '--mds_rank={filtered_mds}'") + + def test_query_client_filter(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + mds_metrics = metrics['metrics'] + # pick an random client + client = random.choice(list(mds_metrics['mds.0'].keys())) + # could have used regex to extract client id + client_id = (client.split(' ')[0]).split('.')[-1] + + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=1), 30, '--client_id={}'.format(client_id)) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + def test_query_client_ip_filter(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + client_matadata = metrics['client_metadata'][self.fs.name] + # pick an random client + client = random.choice(list(client_matadata.keys())) + # get IP of client to use in filter + client_ip = client_matadata[client]['IP'] + + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=1), 30, '--client_ip={}'.format(client_ip)) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # verify IP from output with filter IP + for i in metrics['client_metadata'][self.fs.name]: + self.assertEqual(client_ip, metrics['client_metadata'][self.fs.name][i]['IP']) + + def test_query_mds_and_client_filter(self): + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + # grow the mds cluster + self.fs.grow(2) + + fscid = self.fs.id + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io_all_clients(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(active_mds_count=2, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + mds_metrics = metrics['metrics'] + + # pick an random client + client = random.choice(list(mds_metrics['mds.1'].keys())) + # could have used regex to extract client id + client_id = (client.split(' ')[0]).split('.')[-1] + filtered_mds = 1 + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=1, ranks=[filtered_mds]), + 30, '--mds_rank={}'.format(filtered_mds), '--client_id={}'.format(client_id)) + log.debug("metrics={0}".format(metrics)) + self.assertTrue(valid) + + def test_for_invalid_mds_rank(self): + invalid_mds_rank = "1," + # try, 'fs perf stat' command with invalid mds_rank + try: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--mds_rank", invalid_mds_rank) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise + else: + raise RuntimeError("expected the 'fs perf stat' command to fail for invalid mds_rank") + + def test_for_invalid_client_id(self): + invalid_client_id = "abcd" + # try, 'fs perf stat' command with invalid client_id + try: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_id", invalid_client_id) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise + else: + raise RuntimeError("expected the 'fs perf stat' command to fail for invalid client_id") + + def test_for_invalid_client_ip(self): + invalid_client_ip = "1.2.3" + # try, 'fs perf stat' command with invalid client_ip + try: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_ip", invalid_client_ip) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise + else: + raise RuntimeError("expected the 'fs perf stat' command to fail for invalid client_ip") + + def test_perf_stats_stale_metrics(self): + """ + That `ceph fs perf stats` doesn't output stale metrics after the rank0 MDS failover + """ + # validate + valid, metrics = self._get_metrics(self.verify_mds_metrics( + active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug(f'metrics={metrics}') + self.assertTrue(valid) + + # mount_a and mount_b are the clients mounted for TestMDSMetrics. So get their + # entries from the global_metrics. + client_a_name = f'client.{self.mount_a.get_global_id()}' + client_b_name = f'client.{self.mount_b.get_global_id()}' + + global_metrics = metrics['global_metrics'] + client_a_metrics = global_metrics[self.fs.name][client_a_name] + client_b_metrics = global_metrics[self.fs.name][client_b_name] + + # fail rank0 mds + self.fs.rank_fail(rank=0) + + # Wait for rank0 up:active state + self.fs.wait_for_state('up:active', rank=0, timeout=30) + + fscid = self.fs.id + + # spread directory per rank + self._spread_directory_on_all_ranks(fscid) + + # spread some I/O + self._do_spread_io_all_clients(fscid) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + try: + valid, metrics_new = self._get_metrics(self.verify_mds_metrics( + active_mds_count=1, client_count=TestMDSMetrics.CLIENTS_REQUIRED), 30) + log.debug(f'metrics={metrics_new}') + self.assertTrue(valid) + + client_metadata = metrics_new['client_metadata'] + client_a_metadata = client_metadata.get(self.fs.name, {}).get(client_a_name, {}) + client_b_metadata = client_metadata.get(self.fs.name, {}).get(client_b_name, {}) + + global_metrics = metrics_new['global_metrics'] + client_a_metrics_new = global_metrics.get(self.fs.name, {}).get(client_a_name, {}) + client_b_metrics_new = global_metrics.get(self.fs.name, {}).get(client_b_name, {}) + + # the metrics should be different for the test to succeed. + self.assertTrue(client_a_metadata and client_b_metadata and + client_a_metrics_new and client_b_metrics_new and + (client_a_metrics_new != client_a_metrics) and + (client_b_metrics_new != client_b_metrics), + "Invalid 'ceph fs perf stats' metrics after rank0 mds failover") + except MaxWhileTries: + raise RuntimeError("Failed to fetch 'ceph fs perf stats' metrics") + finally: + # cleanup test directories + self._cleanup_test_dirs() + + def test_client_metrics_and_metadata(self): + self.mount_a.umount_wait() + self.mount_b.umount_wait() + self.fs.delete_all_filesystems() + + self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", + "enable_multiple", "true", "--yes-i-really-mean-it") + + # creating filesystem + fs_a = self._setup_fs(fs_name="fs1") + + # Mount a client on fs_a + self.mount_a.mount_wait(cephfs_name=fs_a.name) + self.mount_a.write_n_mb("pad.bin", 1) + self.mount_a.write_n_mb("test.bin", 2) + self.mount_a.path_to_ino("test.bin") + self.mount_a.create_files() + + # creating another filesystem + fs_b = self._setup_fs(fs_name="fs2") + + # Mount a client on fs_b + self.mount_b.mount_wait(cephfs_name=fs_b.name) + self.mount_b.write_n_mb("test.bin", 1) + self.mount_b.path_to_ino("test.bin") + self.mount_b.create_files() + + fscid_list = [fs_a.id, fs_b.id] + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=1, mul_fs=fscid_list), 30) + log.debug(f"metrics={metrics}") + self.assertTrue(valid) + + client_metadata_a = metrics['client_metadata']['fs1'] + client_metadata_b = metrics['client_metadata']['fs2'] + + for i in client_metadata_a: + if not (client_metadata_a[i]['hostname']): + raise RuntimeError("hostname of fs1 not found!") + if not (client_metadata_a[i]['valid_metrics']): + raise RuntimeError("valid_metrics of fs1 not found!") + + for i in client_metadata_b: + if not (client_metadata_b[i]['hostname']): + raise RuntimeError("hostname of fs2 not found!") + if not (client_metadata_b[i]['valid_metrics']): + raise RuntimeError("valid_metrics of fs2 not found!") + + def test_non_existing_mds_rank(self): + def verify_filtered_metrics(metrics): + # checks if the metrics has non empty client_metadata and global_metrics + if metrics['client_metadata'].get(self.fs.name, {})\ + or metrics['global_metrics'].get(self.fs.name, {}): + return True + return False + + try: + # validate + filter_rank = random.randint(1, 10) + valid, metrics = self._get_metrics(verify_filtered_metrics, 30, + '--mds_rank={}'.format(filter_rank)) + log.info(f'metrics={metrics}') + self.assertFalse(valid, "Fetched 'ceph fs perf stats' metrics using nonexistent MDS rank") + except MaxWhileTries: + # success + pass + + def test_perf_stats_stale_metrics_with_multiple_filesystem(self): + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", + "enable_multiple", "true", "--yes-i-really-mean-it") + + # creating filesystem + fs_b = self._setup_fs(fs_name="fs2") + + # Mount a client on fs_b + self.mount_b.mount_wait(cephfs_name=fs_b.name) + self.mount_b.write_n_mb("test.bin", 1) + self.mount_b.path_to_ino("test.bin") + self.mount_b.create_files() + + # creating another filesystem + fs_a = self._setup_fs(fs_name="fs1") + + # Mount a client on fs_a + self.mount_a.mount_wait(cephfs_name=fs_a.name) + self.mount_a.write_n_mb("pad.bin", 1) + self.mount_a.write_n_mb("test.bin", 2) + self.mount_a.path_to_ino("test.bin") + self.mount_a.create_files() + + # validate + valid, metrics = self._get_metrics( + self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30) + log.debug(f"metrics={metrics}") + self.assertTrue(valid) + + # get mounted client's entries from the global_metrics. + client_a_name = f'client.{self.mount_a.get_global_id()}' + + global_metrics = metrics['global_metrics'] + client_a_metrics = global_metrics.get("fs1", {}).get(client_a_name, {}) + + # fail active mds of fs_a + fs_a_mds = fs_a.get_active_names()[0] + self.mds_cluster.mds_fail(fs_a_mds) + fs_a.wait_for_state('up:active', rank=0, timeout=30) + + # spread directory per rank + self._spread_directory_on_all_ranks(fs_a.id) + + # spread some I/O + self._do_spread_io_all_clients(fs_a.id) + + # wait a bit for mgr to get updated metrics + time.sleep(5) + + # validate + try: + valid, metrics_new = self._get_metrics( + self.verify_mds_metrics(client_count=1, mul_fs=[fs_a.id, fs_b.id]), 30) + log.debug(f'metrics={metrics_new}') + self.assertTrue(valid) + + client_metadata = metrics_new['client_metadata'] + client_a_metadata = client_metadata.get("fs1", {}).get(client_a_name, {}) + + global_metrics = metrics_new['global_metrics'] + client_a_metrics_new = global_metrics.get("fs1", {}).get(client_a_name, {}) + + # the metrics should be different for the test to succeed. + self.assertTrue(client_a_metadata and client_a_metrics_new + and (client_a_metrics_new != client_a_metrics), + "Invalid 'ceph fs perf stats' metrics after" + f" rank0 mds of {fs_a.name} failover") + except MaxWhileTries: + raise RuntimeError("Failed to fetch `ceph fs perf stats` metrics") + finally: + # cleanup test directories + self._cleanup_test_dirs() + diff --git a/qa/tasks/cephfs/test_meta_injection.py b/qa/tasks/cephfs/test_meta_injection.py new file mode 100644 index 000000000..916b30a25 --- /dev/null +++ b/qa/tasks/cephfs/test_meta_injection.py @@ -0,0 +1,38 @@ +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +class TestMetaInjection(CephFSTestCase): + def test_meta_injection(self): + conf_ori = self.fs.mds_asok(['config', 'show']) + self.fs.mds_asok(['config', 'set', 'mds_log_max_segments', '1']) + self.mount_a.run_shell(["mkdir", "metadir"]) + self.mount_a.run_shell(["touch", "metadir/metafile1"]) + self.mount_a.run_shell(["touch", "metadir/metafile2"]) + self.fs.mds_asok(['flush', 'journal']) + dirino = self.mount_a.path_to_ino("metadir") + ino = self.mount_a.path_to_ino("metadir/metafile1") + + # export meta of ino + self.fs.meta_tool(['showm', '-i', str(ino), '-o', '/tmp/meta_out'], 0, True) + out = self.mount_a.run_shell(['grep', str(ino),'/tmp/meta_out']).stdout.getvalue().strip() + + # check the metadata of ino + self.assertNotEqual(out.find(u'"ino":'+ str(ino)), -1) + + # amend info of ino + self.fs.get_meta_of_fs_file(dirino, "metafile1", "/tmp/meta_obj") + self.fs.meta_tool(['amend', '-i', str(ino), '--in', '/tmp/meta_out', '--yes-i-really-really-mean-it'], 0, True) + self.fs.get_meta_of_fs_file(dirino, "metafile1", "/tmp/meta_obj_chg") + + # checkout meta_out after import it + ori_mds5 = self.mount_a.run_shell(["md5sum", "/tmp/meta_obj"]).stdout.getvalue().strip().split() + chg_mds5 = self.mount_a.run_shell(["md5sum", "/tmp/meta_obj_chg"]).stdout.getvalue().strip().split() + print(ori_mds5," ==> ", chg_mds5) + self.assertEqual(len(ori_mds5), 2) + self.assertEqual(len(chg_mds5), 2) + self.assertEqual(ori_mds5[0], chg_mds5[0]) + + self.mount_a.run_shell(["rm", "metadir", "-rf"]) + self.mount_a.run_shell(["rm", "/tmp/meta_obj"]) + self.mount_a.run_shell(["rm", "/tmp/meta_obj_chg"]) + # restore config of mds_log_max_segments + self.fs.mds_asok(['config', 'set', 'mds_log_max_segments', conf_ori["mds_log_max_segments"]]) diff --git a/qa/tasks/cephfs/test_mirroring.py b/qa/tasks/cephfs/test_mirroring.py new file mode 100644 index 000000000..c1a940e3f --- /dev/null +++ b/qa/tasks/cephfs/test_mirroring.py @@ -0,0 +1,1298 @@ +import os +import json +import errno +import logging +import random +import time + +from io import StringIO +from collections import deque + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError +from teuthology.contextutil import safe_while + +log = logging.getLogger(__name__) + +class TestMirroring(CephFSTestCase): + MDSS_REQUIRED = 5 + CLIENTS_REQUIRED = 2 + REQUIRE_BACKUP_FILESYSTEM = True + + MODULE_NAME = "mirroring" + + def setUp(self): + super(TestMirroring, self).setUp() + self.primary_fs_name = self.fs.name + self.primary_fs_id = self.fs.id + self.secondary_fs_name = self.backup_fs.name + self.secondary_fs_id = self.backup_fs.id + self.enable_mirroring_module() + + def tearDown(self): + self.disable_mirroring_module() + super(TestMirroring, self).tearDown() + + def enable_mirroring_module(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", TestMirroring.MODULE_NAME) + + def disable_mirroring_module(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", TestMirroring.MODULE_NAME) + + def enable_mirroring(self, fs_name, fs_id): + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "enable", fs_name) + time.sleep(10) + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + self.assertTrue(res['peers'] == {}) + self.assertTrue(res['snap_dirs']['dir_count'] == 0) + + def disable_mirroring(self, fs_name, fs_id): + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "disable", fs_name) + time.sleep(10) + # verify via asok + try: + self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + except CommandFailedError: + pass + else: + raise RuntimeError('expected admin socket to be unavailable') + + def verify_peer_added(self, fs_name, fs_id, peer_spec, remote_fs_name=None): + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + peer_uuid = self.get_peer_uuid(peer_spec) + self.assertTrue(peer_uuid in res['peers']) + client_name = res['peers'][peer_uuid]['remote']['client_name'] + cluster_name = res['peers'][peer_uuid]['remote']['cluster_name'] + self.assertTrue(peer_spec == f'{client_name}@{cluster_name}') + if remote_fs_name: + self.assertTrue(self.secondary_fs_name == res['peers'][peer_uuid]['remote']['fs_name']) + else: + self.assertTrue(self.fs_name == res['peers'][peer_uuid]['remote']['fs_name']) + + def peer_add(self, fs_name, fs_id, peer_spec, remote_fs_name=None): + if remote_fs_name: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec, remote_fs_name) + else: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec) + time.sleep(10) + self.verify_peer_added(fs_name, fs_id, peer_spec, remote_fs_name) + + def peer_remove(self, fs_name, fs_id, peer_spec): + peer_uuid = self.get_peer_uuid(peer_spec) + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", fs_name, peer_uuid) + time.sleep(10) + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + self.assertTrue(res['peers'] == {} and res['snap_dirs']['dir_count'] == 0) + + def bootstrap_peer(self, fs_name, client_name, site_name): + outj = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd( + "fs", "snapshot", "mirror", "peer_bootstrap", "create", fs_name, client_name, site_name)) + return outj['token'] + + def import_peer(self, fs_name, token): + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_bootstrap", "import", + fs_name, token) + + def add_directory(self, fs_name, fs_id, dir_name): + # get initial dir count + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + dir_count = res['snap_dirs']['dir_count'] + log.debug(f'initial dir_count={dir_count}') + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", fs_name, dir_name) + + time.sleep(10) + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + new_dir_count = res['snap_dirs']['dir_count'] + log.debug(f'new dir_count={new_dir_count}') + self.assertTrue(new_dir_count > dir_count) + + def remove_directory(self, fs_name, fs_id, dir_name): + # get initial dir count + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + dir_count = res['snap_dirs']['dir_count'] + log.debug(f'initial dir_count={dir_count}') + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", fs_name, dir_name) + + time.sleep(10) + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + new_dir_count = res['snap_dirs']['dir_count'] + log.debug(f'new dir_count={new_dir_count}') + self.assertTrue(new_dir_count < dir_count) + + def check_peer_status(self, fs_name, fs_id, peer_spec, dir_name, expected_snap_name, + expected_snap_count): + peer_uuid = self.get_peer_uuid(peer_spec) + res = self.mirror_daemon_command(f'peer status for fs: {fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{fs_name}@{fs_id}', peer_uuid) + self.assertTrue(dir_name in res) + self.assertTrue(res[dir_name]['last_synced_snap']['name'] == expected_snap_name) + self.assertTrue(res[dir_name]['snaps_synced'] == expected_snap_count) + + def check_peer_status_deleted_snap(self, fs_name, fs_id, peer_spec, dir_name, + expected_delete_count): + peer_uuid = self.get_peer_uuid(peer_spec) + res = self.mirror_daemon_command(f'peer status for fs: {fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{fs_name}@{fs_id}', peer_uuid) + self.assertTrue(dir_name in res) + self.assertTrue(res[dir_name]['snaps_deleted'] == expected_delete_count) + + def check_peer_status_renamed_snap(self, fs_name, fs_id, peer_spec, dir_name, + expected_rename_count): + peer_uuid = self.get_peer_uuid(peer_spec) + res = self.mirror_daemon_command(f'peer status for fs: {fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{fs_name}@{fs_id}', peer_uuid) + self.assertTrue(dir_name in res) + self.assertTrue(res[dir_name]['snaps_renamed'] == expected_rename_count) + + def check_peer_snap_in_progress(self, fs_name, fs_id, + peer_spec, dir_name, snap_name): + peer_uuid = self.get_peer_uuid(peer_spec) + res = self.mirror_daemon_command(f'peer status for fs: {fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{fs_name}@{fs_id}', peer_uuid) + self.assertTrue('syncing' == res[dir_name]['state']) + self.assertTrue(res[dir_name]['current_sycning_snap']['name'] == snap_name) + + def verify_snapshot(self, dir_name, snap_name): + snap_list = self.mount_b.ls(path=f'{dir_name}/.snap') + self.assertTrue(snap_name in snap_list) + + source_res = self.mount_a.dir_checksum(path=f'{dir_name}/.snap/{snap_name}', + follow_symlinks=True) + log.debug(f'source snapshot checksum {snap_name} {source_res}') + + dest_res = self.mount_b.dir_checksum(path=f'{dir_name}/.snap/{snap_name}', + follow_symlinks=True) + log.debug(f'destination snapshot checksum {snap_name} {dest_res}') + self.assertTrue(source_res == dest_res) + + def verify_failed_directory(self, fs_name, fs_id, peer_spec, dir_name): + peer_uuid = self.get_peer_uuid(peer_spec) + res = self.mirror_daemon_command(f'peer status for fs: {fs_name}', + 'fs', 'mirror', 'peer', 'status', + f'{fs_name}@{fs_id}', peer_uuid) + self.assertTrue('failed' == res[dir_name]['state']) + + def get_peer_uuid(self, peer_spec): + status = self.fs.status() + fs_map = status.get_fsmap_byname(self.primary_fs_name) + peers = fs_map['mirror_info']['peers'] + for peer_uuid, mirror_info in peers.items(): + client_name = mirror_info['remote']['client_name'] + cluster_name = mirror_info['remote']['cluster_name'] + remote_peer_spec = f'{client_name}@{cluster_name}' + if peer_spec == remote_peer_spec: + return peer_uuid + return None + + def get_daemon_admin_socket(self): + """overloaded by teuthology override (fs/mirror/clients/mirror.yaml)""" + return "/var/run/ceph/cephfs-mirror.asok" + + def get_mirror_daemon_pid(self): + """pid file overloaded in fs/mirror/clients/mirror.yaml""" + return self.mount_a.run_shell(['cat', '/var/run/ceph/cephfs-mirror.pid']).stdout.getvalue().strip() + + def get_mirror_rados_addr(self, fs_name, fs_id): + """return the rados addr used by cephfs-mirror instance""" + res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', + 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') + return res['rados_inst'] + + def mirror_daemon_command(self, cmd_label, *args): + asok_path = self.get_daemon_admin_socket() + try: + # use mount_a's remote to execute command + p = self.mount_a.client_remote.run(args= + ['ceph', '--admin-daemon', asok_path] + list(args), + stdout=StringIO(), stderr=StringIO(), timeout=30, + check_status=True, label=cmd_label) + p.wait() + except CommandFailedError as ce: + log.warn(f'mirror daemon command with label "{cmd_label}" failed: {ce}') + raise + res = p.stdout.getvalue().strip() + log.debug(f'command returned={res}') + return json.loads(res) + + def get_mirror_daemon_status(self): + daemon_status = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "daemon", "status")) + log.debug(f'daemon_status: {daemon_status}') + # running a single mirror daemon is supported + status = daemon_status[0] + log.debug(f'status: {status}') + return status + + def test_basic_mirror_commands(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_mirror_peer_commands(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # add peer + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # remove peer + self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph") + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_mirror_disable_with_peer(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # add peer + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_matching_peer(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + try: + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph") + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError('invalid errno when adding a matching remote peer') + else: + raise RuntimeError('adding a peer matching local spec should fail') + + # verify via asok -- nothing should get added + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + self.assertTrue(res['peers'] == {}) + + # and explicitly specifying the spec (via filesystem name) should fail too + try: + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError('invalid errno when adding a matching remote peer') + else: + raise RuntimeError('adding a peer matching local spec should fail') + + # verify via asok -- nothing should get added + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + self.assertTrue(res['peers'] == {}) + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_mirror_peer_add_existing(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # add peer + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # adding the same peer should be idempotent + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # remove peer + self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph") + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_peer_commands_with_mirroring_disabled(self): + # try adding peer when mirroring is not enabled + try: + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a peer') + else: + raise RuntimeError(-errno.EINVAL, 'expected peer_add to fail') + + # try removing peer + try: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", self.primary_fs_name, 'dummy-uuid') + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when removing a peer') + else: + raise RuntimeError(-errno.EINVAL, 'expected peer_remove to fail') + + def test_add_directory_with_mirroring_disabled(self): + # try adding a directory when mirroring is not enabled + try: + self.add_directory(self.primary_fs_name, self.primary_fs_id, "/d1") + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory add to fail') + + def test_directory_commands(self): + self.mount_a.run_shell(["mkdir", "d1"]) + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + try: + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + except CommandFailedError as ce: + if ce.exitstatus != errno.EEXIST: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory add to fail') + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + try: + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + except CommandFailedError as ce: + if ce.exitstatus not in (errno.ENOENT, errno.EINVAL): + raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-deleting a directory') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory removal to fail') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.mount_a.run_shell(["rmdir", "d1"]) + + def test_add_relative_directory_path(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + try: + self.add_directory(self.primary_fs_name, self.primary_fs_id, './d1') + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a relative path dir') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory add to fail') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_add_directory_path_normalization(self): + self.mount_a.run_shell(["mkdir", "-p", "d1/d2/d3"]) + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/d3') + def check_add_command_failure(dir_path): + try: + self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path) + except CommandFailedError as ce: + if ce.exitstatus != errno.EEXIST: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory add to fail') + + # everything points for /d1/d2/d3 + check_add_command_failure('/d1/d2/././././././d3') + check_add_command_failure('/d1/d2/././././././d3//////') + check_add_command_failure('/d1/d2/../d2/././././d3') + check_add_command_failure('/././././d1/./././d2/./././d3//////') + check_add_command_failure('/./d1/./d2/./d3/../../../d1/d2/d3') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.mount_a.run_shell(["rm", "-rf", "d1"]) + + def test_add_ancestor_and_child_directory(self): + self.mount_a.run_shell(["mkdir", "-p", "d1/d2/d3"]) + self.mount_a.run_shell(["mkdir", "-p", "d1/d4"]) + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/') + def check_add_command_failure(dir_path): + try: + self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory') + else: + raise RuntimeError(-errno.EINVAL, 'expected directory add to fail') + + # cannot add ancestors or a subtree for an existing directory + check_add_command_failure('/') + check_add_command_failure('/d1') + check_add_command_failure('/d1/d2/d3') + + # obviously, one can add a non-ancestor or non-subtree + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d4/') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.mount_a.run_shell(["rm", "-rf", "d1"]) + + def test_cephfs_mirror_blocklist(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # add peer + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + peers_1 = set(res['peers']) + + # fetch rados address for blacklist check + rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + + # simulate non-responding mirror daemon by sending SIGSTOP + pid = self.get_mirror_daemon_pid() + log.debug(f'SIGSTOP to cephfs-mirror pid {pid}') + self.mount_a.run_shell(['kill', '-SIGSTOP', pid]) + + # wait for blocklist timeout -- the manager module would blocklist + # the mirror daemon + time.sleep(40) + + # wake up the mirror daemon -- at this point, the daemon should know + # that it has been blocklisted + log.debug('SIGCONT to cephfs-mirror') + self.mount_a.run_shell(['kill', '-SIGCONT', pid]) + + # check if the rados addr is blocklisted + self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst)) + + # wait enough so that the mirror daemon restarts blocklisted instances + time.sleep(40) + rados_inst_new = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + + # and we should get a new rados instance + self.assertTrue(rados_inst != rados_inst_new) + + # along with peers that were added + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + peers_2 = set(res['peers']) + self.assertTrue(peers_1, peers_2) + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_stats(self): + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + # create a bunch of files in a directory to snap + self.mount_a.run_shell(["mkdir", "d0"]) + self.mount_a.create_n_files('d0/file', 50, sync=True) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # take a snapshot + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + + time.sleep(30) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0', 1) + self.verify_snapshot('d0', 'snap0') + + # some more IO + self.mount_a.run_shell(["mkdir", "d0/d00"]) + self.mount_a.run_shell(["mkdir", "d0/d01"]) + + self.mount_a.create_n_files('d0/d00/more_file', 20, sync=True) + self.mount_a.create_n_files('d0/d01/some_more_file', 75, sync=True) + + # take another snapshot + self.mount_a.run_shell(["mkdir", "d0/.snap/snap1"]) + + time.sleep(60) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap1', 2) + self.verify_snapshot('d0', 'snap1') + + # delete a snapshot + self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"]) + + time.sleep(10) + snap_list = self.mount_b.ls(path='d0/.snap') + self.assertTrue('snap0' not in snap_list) + self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 1) + + # rename a snapshot + self.mount_a.run_shell(["mv", "d0/.snap/snap1", "d0/.snap/snap2"]) + + time.sleep(10) + snap_list = self.mount_b.ls(path='d0/.snap') + self.assertTrue('snap1' not in snap_list) + self.assertTrue('snap2' in snap_list) + self.check_peer_status_renamed_snap(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 1) + + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_cancel_sync(self): + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + # create a bunch of files in a directory to snap + self.mount_a.run_shell(["mkdir", "d0"]) + for i in range(8): + filename = f'file.{i}' + self.mount_a.write_n_mb(os.path.join('d0', filename), 1024) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # take a snapshot + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + + time.sleep(10) + self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0') + + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + + snap_list = self.mount_b.ls(path='d0/.snap') + self.assertTrue('snap0' not in snap_list) + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_restart_sync_on_blocklist(self): + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + # create a bunch of files in a directory to snap + self.mount_a.run_shell(["mkdir", "d0"]) + for i in range(8): + filename = f'file.{i}' + self.mount_a.write_n_mb(os.path.join('d0', filename), 1024) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # fetch rados address for blacklist check + rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + + # take a snapshot + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + + time.sleep(10) + self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0') + + # simulate non-responding mirror daemon by sending SIGSTOP + pid = self.get_mirror_daemon_pid() + log.debug(f'SIGSTOP to cephfs-mirror pid {pid}') + self.mount_a.run_shell(['kill', '-SIGSTOP', pid]) + + # wait for blocklist timeout -- the manager module would blocklist + # the mirror daemon + time.sleep(40) + + # wake up the mirror daemon -- at this point, the daemon should know + # that it has been blocklisted + log.debug('SIGCONT to cephfs-mirror') + self.mount_a.run_shell(['kill', '-SIGCONT', pid]) + + # check if the rados addr is blocklisted + self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst)) + + time.sleep(500) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0', expected_snap_count=1) + self.verify_snapshot('d0', 'snap0') + + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_failed_sync_with_correction(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # add a non-existent directory for synchronization + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + + # wait for mirror daemon to mark it the directory as failed + time.sleep(120) + self.verify_failed_directory(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0') + + # create the directory + self.mount_a.run_shell(["mkdir", "d0"]) + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + + # wait for correction + time.sleep(120) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0', 1) + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_service_daemon_status(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + time.sleep(30) + status = self.get_mirror_daemon_status() + + # assumption for this test: mirroring enabled for a single filesystem w/ single + # peer + + # we have not added any directories + peer = status['filesystems'][0]['peers'][0] + self.assertEquals(status['filesystems'][0]['directory_count'], 0) + self.assertEquals(peer['stats']['failure_count'], 0) + self.assertEquals(peer['stats']['recovery_count'], 0) + + # add a non-existent directory for synchronization -- check if its reported + # in daemon stats + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + + time.sleep(120) + status = self.get_mirror_daemon_status() + # we added one + peer = status['filesystems'][0]['peers'][0] + self.assertEquals(status['filesystems'][0]['directory_count'], 1) + # failure count should be reflected + self.assertEquals(peer['stats']['failure_count'], 1) + self.assertEquals(peer['stats']['recovery_count'], 0) + + # create the directory, mirror daemon would recover + self.mount_a.run_shell(["mkdir", "d0"]) + + time.sleep(120) + status = self.get_mirror_daemon_status() + peer = status['filesystems'][0]['peers'][0] + self.assertEquals(status['filesystems'][0]['directory_count'], 1) + # failure and recovery count should be reflected + self.assertEquals(peer['stats']['failure_count'], 1) + self.assertEquals(peer['stats']['recovery_count'], 1) + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_mirroring_init_failure(self): + """Test mirror daemon init failure""" + + # disable mgr mirroring plugin as it would try to load dir map on + # on mirroring enabled for a filesystem (an throw up erorrs in + # the logs) + self.disable_mirroring_module() + + # enable mirroring through mon interface -- this should result in the mirror daemon + # failing to enable mirroring due to absence of `cephfs_mirorr` index object. + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name) + + with safe_while(sleep=5, tries=10, action='wait for failed state') as proceed: + while proceed(): + try: + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + if not 'state' in res: + return + self.assertTrue(res['state'] == "failed") + return True + except: + pass + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name) + time.sleep(10) + # verify via asok + try: + self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + except CommandFailedError: + pass + else: + raise RuntimeError('expected admin socket to be unavailable') + + def test_mirroring_init_failure_with_recovery(self): + """Test if the mirror daemon can recover from a init failure""" + + # disable mgr mirroring plugin as it would try to load dir map on + # on mirroring enabled for a filesystem (an throw up erorrs in + # the logs) + self.disable_mirroring_module() + + # enable mirroring through mon interface -- this should result in the mirror daemon + # failing to enable mirroring due to absence of `cephfs_mirror` index object. + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name) + # need safe_while since non-failed status pops up as mirroring is restarted + # internally in mirror daemon. + with safe_while(sleep=5, tries=20, action='wait for failed state') as proceed: + while proceed(): + try: + # verify via asok + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + if not 'state' in res: + return + self.assertTrue(res['state'] == "failed") + return True + except: + pass + + # create the index object and check daemon recovery + try: + p = self.mount_a.client_remote.run(args=['rados', '-p', self.fs.metadata_pool_name, 'create', 'cephfs_mirror'], + stdout=StringIO(), stderr=StringIO(), timeout=30, + check_status=True, label="create index object") + p.wait() + except CommandFailedError as ce: + log.warn(f'mirror daemon command to create mirror index object failed: {ce}') + raise + time.sleep(30) + res = self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + self.assertTrue(res['peers'] == {}) + self.assertTrue(res['snap_dirs']['dir_count'] == 0) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name) + time.sleep(10) + # verify via asok + try: + self.mirror_daemon_command(f'mirror status for fs: {self.primary_fs_name}', + 'fs', 'mirror', 'status', f'{self.primary_fs_name}@{self.primary_fs_id}') + except CommandFailedError: + pass + else: + raise RuntimeError('expected admin socket to be unavailable') + + def test_cephfs_mirror_peer_bootstrap(self): + """Test importing peer bootstrap token""" + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # create a bootstrap token for the peer + bootstrap_token = self.bootstrap_peer(self.secondary_fs_name, "client.mirror_peer_bootstrap", "site-remote") + + # import the peer via bootstrap token + self.import_peer(self.primary_fs_name, bootstrap_token) + time.sleep(10) + self.verify_peer_added(self.primary_fs_name, self.primary_fs_id, "client.mirror_peer_bootstrap@site-remote", + self.secondary_fs_name) + + # verify via peer_list interface + peer_uuid = self.get_peer_uuid("client.mirror_peer_bootstrap@site-remote") + res = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_list", self.primary_fs_name)) + self.assertTrue(peer_uuid in res) + self.assertTrue('mon_host' in res[peer_uuid] and res[peer_uuid]['mon_host'] != '') + + # remove peer + self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_peer_bootstrap@site-remote") + # disable mirroring + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_symlink_sync(self): + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + # create a bunch of files w/ symbolic links in a directory to snap + self.mount_a.run_shell(["mkdir", "d0"]) + self.mount_a.create_n_files('d0/file', 10, sync=True) + self.mount_a.run_shell(["ln", "-s", "./file_0", "d0/sym_0"]) + self.mount_a.run_shell(["ln", "-s", "./file_1", "d0/sym_1"]) + self.mount_a.run_shell(["ln", "-s", "./file_2", "d0/sym_2"]) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # take a snapshot + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + + time.sleep(30) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0', 1) + self.verify_snapshot('d0', 'snap0') + + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_with_parent_snapshot(self): + """Test snapshot synchronization with parent directory snapshots""" + self.mount_a.run_shell(["mkdir", "-p", "d0/d1/d2/d3"]) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # take a snapshot + self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap0"]) + + time.sleep(30) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap0', 1) + + # create snapshots in parent directories + self.mount_a.run_shell(["mkdir", "d0/.snap/snap_d0"]) + self.mount_a.run_shell(["mkdir", "d0/d1/.snap/snap_d1"]) + self.mount_a.run_shell(["mkdir", "d0/d1/d2/.snap/snap_d2"]) + + # try syncing more snapshots + self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap1"]) + time.sleep(30) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap1', 2) + + self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap0"]) + self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap1"]) + time.sleep(15) + self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0/d1/d2/d3', 2) + + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3') + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_remove_on_stall(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + + # fetch rados address for blacklist check + rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + + # simulate non-responding mirror daemon by sending SIGSTOP + pid = self.get_mirror_daemon_pid() + log.debug(f'SIGSTOP to cephfs-mirror pid {pid}') + self.mount_a.run_shell(['kill', '-SIGSTOP', pid]) + + # wait for blocklist timeout -- the manager module would blocklist + # the mirror daemon + time.sleep(40) + + # make sure the rados addr is blocklisted + self.assertTrue(self.mds_cluster.is_addr_blocklisted(rados_inst)) + + # now we are sure that there are no "active" mirror daemons -- add a directory path. + dir_path_p = "/d0/d1" + dir_path = "/d0/d1/d2" + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path) + + time.sleep(10) + # this uses an undocumented interface to get dirpath map state + res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path) + res = json.loads(res_json) + # there are no mirror daemons + self.assertTrue(res['state'], 'stalled') + + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", self.primary_fs_name, dir_path) + + time.sleep(10) + try: + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise RuntimeError('invalid errno when checking dirmap status for non-existent directory') + else: + raise RuntimeError('incorrect errno when checking dirmap state for non-existent directory') + + # adding a parent directory should be allowed + self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path_p) + + time.sleep(10) + # however, this directory path should get stalled too + res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p) + res = json.loads(res_json) + # there are no mirror daemons + self.assertTrue(res['state'], 'stalled') + + # wake up the mirror daemon -- at this point, the daemon should know + # that it has been blocklisted + log.debug('SIGCONT to cephfs-mirror') + self.mount_a.run_shell(['kill', '-SIGCONT', pid]) + + # wait for restart mirror on blocklist + time.sleep(60) + res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p) + res = json.loads(res_json) + # there are no mirror daemons + self.assertTrue(res['state'], 'mapped') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_incremental_sync(self): + """ Test incremental snapshot synchronization (based on mtime differences).""" + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + repo = 'ceph-qa-suite' + repo_dir = 'ceph_repo' + repo_path = f'{repo_dir}/{repo}' + + def clone_repo(): + self.mount_a.run_shell([ + 'git', 'clone', '--branch', 'giant', + f'http://github.com/ceph/{repo}', repo_path]) + + def exec_git_cmd(cmd_list): + self.mount_a.run_shell(['git', '--git-dir', f'{self.mount_a.mountpoint}/{repo_path}/.git', *cmd_list]) + + self.mount_a.run_shell(["mkdir", repo_dir]) + clone_repo() + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}') + self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a']) + + # full copy, takes time + time.sleep(500) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1) + self.verify_snapshot(repo_path, 'snap_a') + + # create some diff + num = random.randint(5, 20) + log.debug(f'resetting to HEAD~{num}') + exec_git_cmd(["reset", "--hard", f'HEAD~{num}']) + + self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_b']) + # incremental copy, should be fast + time.sleep(180) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2) + self.verify_snapshot(repo_path, 'snap_b') + + # diff again, this time back to HEAD + log.debug('resetting to HEAD') + exec_git_cmd(["pull"]) + + self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_c']) + # incremental copy, should be fast + time.sleep(180) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", f'/{repo_path}', 'snap_c', 3) + self.verify_snapshot(repo_path, 'snap_c') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_incremental_sync_with_type_mixup(self): + """ Test incremental snapshot synchronization with file type changes. + + The same filename exist as a different type in subsequent snapshot. + This verifies if the mirror daemon can identify file type mismatch and + sync snapshots. + + \ snap_0 snap_1 snap_2 snap_3 + \----------------------------------------------- + file_x | reg sym dir reg + | + file_y | dir reg sym dir + | + file_z | sym dir reg sym + """ + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + typs = deque(['reg', 'dir', 'sym']) + def cleanup_and_create_with_type(dirname, fnames): + self.mount_a.run_shell_payload(f"rm -rf {dirname}/*") + fidx = 0 + for t in typs: + fname = f'{dirname}/{fnames[fidx]}' + log.debug(f'file: {fname} type: {t}') + if t == 'reg': + self.mount_a.run_shell(["touch", fname]) + self.mount_a.write_file(fname, data=fname) + elif t == 'dir': + self.mount_a.run_shell(["mkdir", fname]) + elif t == 'sym': + # verify ELOOP in mirror daemon + self.mount_a.run_shell(["ln", "-s", "..", fname]) + fidx += 1 + + def verify_types(dirname, fnames, snap_name): + tidx = 0 + for fname in fnames: + t = self.mount_b.run_shell_payload(f"stat -c %F {dirname}/.snap/{snap_name}/{fname}").stdout.getvalue().strip() + if typs[tidx] == 'reg': + self.assertEquals('regular file', t) + elif typs[tidx] == 'dir': + self.assertEquals('directory', t) + elif typs[tidx] == 'sym': + self.assertEquals('symbolic link', t) + tidx += 1 + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + self.mount_a.run_shell(["mkdir", "d0"]) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + + fnames = ['file_x', 'file_y', 'file_z'] + turns = 0 + while turns != len(typs): + snapname = f'snap_{turns}' + cleanup_and_create_with_type('d0', fnames) + self.mount_a.run_shell(['mkdir', f'd0/.snap/{snapname}']) + time.sleep(30) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', snapname, turns+1) + verify_types('d0', fnames, snapname) + # next type + typs.rotate(1) + turns += 1 + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_sync_with_purged_snapshot(self): + """Test snapshot synchronization in midst of snapshot deletes. + + Deleted the previous snapshot when the mirror daemon is figuring out + incremental differences between current and previous snaphot. The + mirror daemon should identify the purge and switch to using remote + comparison to sync the snapshot (in the next iteration of course). + """ + + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + repo = 'ceph-qa-suite' + repo_dir = 'ceph_repo' + repo_path = f'{repo_dir}/{repo}' + + def clone_repo(): + self.mount_a.run_shell([ + 'git', 'clone', '--branch', 'giant', + f'http://github.com/ceph/{repo}', repo_path]) + + def exec_git_cmd(cmd_list): + self.mount_a.run_shell(['git', '--git-dir', f'{self.mount_a.mountpoint}/{repo_path}/.git', *cmd_list]) + + self.mount_a.run_shell(["mkdir", repo_dir]) + clone_repo() + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}') + self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a']) + + # full copy, takes time + time.sleep(500) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1) + self.verify_snapshot(repo_path, 'snap_a') + + # create some diff + num = random.randint(60, 100) + log.debug(f'resetting to HEAD~{num}') + exec_git_cmd(["reset", "--hard", f'HEAD~{num}']) + + self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_b']) + + time.sleep(15) + self.mount_a.run_shell(['rmdir', f'{repo_path}/.snap/snap_a']) + + # incremental copy but based on remote dir_root + time.sleep(300) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2) + self.verify_snapshot(repo_path, 'snap_b') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_peer_add_primary(self): + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # try adding the primary file system as a peer to secondary file + # system + try: + self.peer_add(self.secondary_fs_name, self.secondary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError('invalid errno when adding a primary file system') + else: + raise RuntimeError('adding peer should fail') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_cephfs_mirror_cancel_mirroring_and_readd(self): + """ + Test adding a directory path for synchronization post removal of already added directory paths + + ... to ensure that synchronization of the newly added directory path functions + as expected. Note that we schedule three (3) directories for mirroring to ensure + that all replayer threads (3 by default) in the mirror daemon are busy. + """ + log.debug('reconfigure client auth caps') + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( + self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + # create a bunch of files in a directory to snap + self.mount_a.run_shell(["mkdir", "d0"]) + self.mount_a.run_shell(["mkdir", "d1"]) + self.mount_a.run_shell(["mkdir", "d2"]) + for i in range(4): + filename = f'file.{i}' + self.mount_a.write_n_mb(os.path.join('d0', filename), 1024) + self.mount_a.write_n_mb(os.path.join('d1', filename), 1024) + self.mount_a.write_n_mb(os.path.join('d2', filename), 1024) + + log.debug('enabling mirroring') + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + log.debug('adding directory paths') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + # take snapshots + log.debug('taking snapshots') + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + self.mount_a.run_shell(["mkdir", "d1/.snap/snap0"]) + self.mount_a.run_shell(["mkdir", "d2/.snap/snap0"]) + + time.sleep(10) + log.debug('checking snap in progress') + self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0') + self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d1', 'snap0') + self.check_peer_snap_in_progress(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d2', 'snap0') + + log.debug('removing directories 1') + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + log.debug('removing directories 2') + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + log.debug('removing directories 3') + self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d2') + + log.debug('removing snapshots') + self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"]) + self.mount_a.run_shell(["rmdir", "d1/.snap/snap0"]) + self.mount_a.run_shell(["rmdir", "d2/.snap/snap0"]) + + for i in range(4): + filename = f'file.{i}' + log.debug(f'deleting {filename}') + self.mount_a.run_shell(["rm", "-f", os.path.join('d0', filename)]) + self.mount_a.run_shell(["rm", "-f", os.path.join('d1', filename)]) + self.mount_a.run_shell(["rm", "-f", os.path.join('d2', filename)]) + + log.debug('creating new files...') + self.mount_a.create_n_files('d0/file', 50, sync=True) + self.mount_a.create_n_files('d1/file', 50, sync=True) + self.mount_a.create_n_files('d2/file', 50, sync=True) + + log.debug('adding directory paths') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2') + + log.debug('creating new snapshots...') + self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) + self.mount_a.run_shell(["mkdir", "d1/.snap/snap0"]) + self.mount_a.run_shell(["mkdir", "d2/.snap/snap0"]) + + time.sleep(60) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d0', 'snap0', 1) + self.verify_snapshot('d0', 'snap0') + + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d1', 'snap0', 1) + self.verify_snapshot('d1', 'snap0') + + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/d2', 'snap0', 1) + self.verify_snapshot('d2', 'snap0') + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + + def test_local_and_remote_dir_root_mode(self): + log.debug('reconfigure client auth caps') + cid = self.mount_b.client_id + data_pool = self.backup_fs.get_data_pool_name() + self.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', f"client.{cid}", + 'mds', 'allow rw', + 'mon', 'allow r', + 'osd', f"allow rw pool={data_pool}, allow rw pool={data_pool}") + + log.debug(f'mounting filesystem {self.secondary_fs_name}') + self.mount_b.umount_wait() + self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) + + self.mount_a.run_shell(["mkdir", "l1"]) + self.mount_a.run_shell(["mkdir", "l1/.snap/snap0"]) + self.mount_a.run_shell(["chmod", "go-rwx", "l1"]) + + self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/l1') + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + + time.sleep(60) + self.check_peer_status(self.primary_fs_name, self.primary_fs_id, + "client.mirror_remote@ceph", '/l1', 'snap0', 1) + + mode_local = self.mount_a.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip() + mode_remote = self.mount_b.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip() + + self.assertTrue(mode_local == mode_remote, f"mode mismatch, local mode: {mode_local}, remote mode: {mode_remote}") + + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) + self.mount_a.run_shell(["rmdir", "l1/.snap/snap0"]) + self.mount_a.run_shell(["rmdir", "l1"]) diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py new file mode 100644 index 000000000..8b48dee69 --- /dev/null +++ b/qa/tasks/cephfs/test_misc.py @@ -0,0 +1,640 @@ +from io import StringIO + +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError +from textwrap import dedent +from threading import Thread +import errno +import platform +import time +import json +import logging +import os +import re + +log = logging.getLogger(__name__) + +class TestMisc(CephFSTestCase): + CLIENTS_REQUIRED = 2 + + def test_statfs_on_deleted_fs(self): + """ + That statfs does not cause monitors to SIGSEGV after fs deletion. + """ + + self.mount_b.umount_wait() + self.mount_a.run_shell_payload("stat -f .") + self.fs.delete_all_filesystems() + # This will hang either way, run in background. + p = self.mount_a.run_shell_payload("stat -f .", wait=False, timeout=60, check_status=False) + time.sleep(30) + self.assertFalse(p.finished) + # the process is stuck in uninterruptible sleep, just kill the mount + self.mount_a.umount_wait(force=True) + p.wait() + + def test_fuse_mount_on_already_mounted_path(self): + if platform.system() != "Linux": + self.skipTest("Require Linux platform") + + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client") + + # Try to mount already mounted path + # expecting EBUSY error + try: + mount_cmd = ['sudo'] + self.mount_a._mount_bin + [self.mount_a.hostfs_mntpt] + self.mount_a.client_remote.run(args=mount_cmd, stderr=StringIO(), + stdout=StringIO(), timeout=60, omit_sudo=False) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EBUSY) + else: + self.fail("Expected EBUSY") + + def test_getattr_caps(self): + """ + Check if MDS recognizes the 'mask' parameter of open request. + The parameter allows client to request caps when opening file + """ + + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client") + + # Enable debug. Client will requests CEPH_CAP_XATTR_SHARED + # on lookup/open + self.mount_b.umount_wait() + self.set_conf('client', 'client debug getattr caps', 'true') + self.mount_b.mount_wait() + + # create a file and hold it open. MDS will issue CEPH_CAP_EXCL_* + # to mount_a + p = self.mount_a.open_background("testfile") + self.mount_b.wait_for_visible("testfile") + + # this triggers a lookup request and an open request. The debug + # code will check if lookup/open reply contains xattrs + self.mount_b.run_shell(["cat", "testfile"]) + + self.mount_a.kill_background(p) + + def test_root_rctime(self): + """ + Check that the root inode has a non-default rctime on startup. + """ + + t = time.time() + rctime = self.mount_a.getfattr(".", "ceph.dir.rctime") + log.info("rctime = {}".format(rctime)) + self.assertGreaterEqual(float(rctime), t - 10) + + def test_fs_new(self): + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + data_pool_name = self.fs.get_data_pool_name() + + self.fs.fail() + + self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name, + '--yes-i-really-mean-it') + + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete', + self.fs.metadata_pool_name, + self.fs.metadata_pool_name, + '--yes-i-really-really-mean-it') + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + self.fs.metadata_pool_name, + '--pg_num_min', str(self.fs.pg_num_min)) + + # insert a garbage object + self.fs.radosm(["put", "foo", "-"], stdin=StringIO("bar")) + + def get_pool_df(fs, name): + try: + return fs.get_pool_df(name)['objects'] > 0 + except RuntimeError: + return False + + self.wait_until_true(lambda: get_pool_df(self.fs, self.fs.metadata_pool_name), timeout=30) + + try: + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, + self.fs.metadata_pool_name, + data_pool_name) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.EINVAL) + else: + raise AssertionError("Expected EINVAL") + + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, + self.fs.metadata_pool_name, + data_pool_name, "--force") + + self.fs.mon_manager.raw_cluster_cmd('fs', 'fail', self.fs.name) + + self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name, + '--yes-i-really-mean-it') + + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete', + self.fs.metadata_pool_name, + self.fs.metadata_pool_name, + '--yes-i-really-really-mean-it') + self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', + self.fs.metadata_pool_name, + '--pg_num_min', str(self.fs.pg_num_min)) + self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, + self.fs.metadata_pool_name, + data_pool_name, + '--allow_dangerous_metadata_overlay') + + def test_cap_revoke_nonresponder(self): + """ + Check that a client is evicted if it has not responded to cap revoke + request for configured number of seconds. + """ + session_timeout = self.fs.get_var("session_timeout") + eviction_timeout = session_timeout / 2.0 + + self.fs.mds_asok(['config', 'set', 'mds_cap_revoke_eviction_timeout', + str(eviction_timeout)]) + + cap_holder = self.mount_a.open_background() + + # Wait for the file to be visible from another client, indicating + # that mount_a has completed its network ops + self.mount_b.wait_for_visible() + + # Simulate client death + self.mount_a.suspend_netns() + + try: + # The waiter should get stuck waiting for the capability + # held on the MDS by the now-dead client A + cap_waiter = self.mount_b.write_background() + + a = time.time() + time.sleep(eviction_timeout) + cap_waiter.wait() + b = time.time() + cap_waited = b - a + log.info("cap_waiter waited {0}s".format(cap_waited)) + + # check if the cap is transferred before session timeout kicked in. + # this is a good enough check to ensure that the client got evicted + # by the cap auto evicter rather than transitioning to stale state + # and then getting evicted. + self.assertLess(cap_waited, session_timeout, + "Capability handover took {0}, expected less than {1}".format( + cap_waited, session_timeout + )) + + self.assertTrue(self.mds_cluster.is_addr_blocklisted( + self.mount_a.get_global_addr())) + self.mount_a._kill_background(cap_holder) + finally: + self.mount_a.resume_netns() + + def test_filtered_df(self): + pool_name = self.fs.get_data_pool_name() + raw_df = self.fs.get_pool_df(pool_name) + raw_avail = float(raw_df["max_avail"]) + out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get', + pool_name, 'size', + '-f', 'json-pretty') + _ = json.loads(out) + + proc = self.mount_a.run_shell(['df', '.']) + output = proc.stdout.getvalue() + fs_avail = output.split('\n')[1].split()[3] + fs_avail = float(fs_avail) * 1024 + + ratio = raw_avail / fs_avail + assert 0.9 < ratio < 1.1 + + def test_dump_inode(self): + info = self.fs.mds_asok(['dump', 'inode', '1']) + assert(info['path'] == "/") + + def test_dump_inode_hexademical(self): + self.mount_a.run_shell(["mkdir", "-p", "foo"]) + ino = self.mount_a.path_to_ino("foo") + assert type(ino) is int + info = self.fs.mds_asok(['dump', 'inode', hex(ino)]) + assert info['path'] == "/foo" + + def test_fs_lsflags(self): + """ + Check that the lsflags displays the default state and the new state of flags + """ + # Set some flags + self.fs.set_joinable(False) + self.fs.set_allow_new_snaps(False) + self.fs.set_allow_standby_replay(True) + + lsflags = json.loads(self.fs.mon_manager.raw_cluster_cmd('fs', 'lsflags', + self.fs.name, + "--format=json-pretty")) + self.assertEqual(lsflags["joinable"], False) + self.assertEqual(lsflags["allow_snaps"], False) + self.assertEqual(lsflags["allow_multimds_snaps"], True) + self.assertEqual(lsflags["allow_standby_replay"], True) + + def _test_sync_stuck_for_around_5s(self, dir_path, file_sync=False): + self.mount_a.run_shell(["mkdir", dir_path]) + + sync_dir_pyscript = dedent(""" + import os + + path = "{path}" + dfd = os.open(path, os.O_DIRECTORY) + os.fsync(dfd) + os.close(dfd) + """.format(path=dir_path)) + + # run create/delete directories and test the sync time duration + for i in range(300): + for j in range(5): + self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")]) + start = time.time() + if file_sync: + self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript]) + else: + self.mount_a.run_shell(["sync"]) + duration = time.time() - start + log.info(f"sync mkdir i = {i}, duration = {duration}") + self.assertLess(duration, 4) + + for j in range(5): + self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")]) + start = time.time() + if file_sync: + self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript]) + else: + self.mount_a.run_shell(["sync"]) + duration = time.time() - start + log.info(f"sync rmdir i = {i}, duration = {duration}") + self.assertLess(duration, 4) + + self.mount_a.run_shell(["rm", "-rf", dir_path]) + + def test_filesystem_sync_stuck_for_around_5s(self): + """ + To check whether the fsync will be stuck to wait for the mdlog to be + flushed for at most 5 seconds. + """ + + dir_path = "filesystem_sync_do_not_wait_mdlog_testdir" + self._test_sync_stuck_for_around_5s(dir_path) + + def test_file_sync_stuck_for_around_5s(self): + """ + To check whether the filesystem sync will be stuck to wait for the + mdlog to be flushed for at most 5 seconds. + """ + + dir_path = "file_sync_do_not_wait_mdlog_testdir" + self._test_sync_stuck_for_around_5s(dir_path, True) + + def test_file_filesystem_sync_crash(self): + """ + To check whether the kernel crashes when doing the file/filesystem sync. + """ + + stop_thread = False + dir_path = "file_filesystem_sync_crash_testdir" + self.mount_a.run_shell(["mkdir", dir_path]) + + def mkdir_rmdir_thread(mount, path): + #global stop_thread + + log.info(" mkdir_rmdir_thread starting...") + num = 0 + while not stop_thread: + n = num + m = num + for __ in range(10): + mount.run_shell(["mkdir", os.path.join(path, f"{n}")]) + n += 1 + for __ in range(10): + mount.run_shell(["rm", "-rf", os.path.join(path, f"{m}")]) + m += 1 + num += 10 + log.info(" mkdir_rmdir_thread stopped") + + def filesystem_sync_thread(mount, path): + #global stop_thread + + log.info(" filesystem_sync_thread starting...") + while not stop_thread: + mount.run_shell(["sync"]) + log.info(" filesystem_sync_thread stopped") + + def file_sync_thread(mount, path): + #global stop_thread + + log.info(" file_sync_thread starting...") + pyscript = dedent(""" + import os + + path = "{path}" + dfd = os.open(path, os.O_DIRECTORY) + os.fsync(dfd) + os.close(dfd) + """.format(path=path)) + + while not stop_thread: + mount.run_shell(['python3', '-c', pyscript]) + log.info(" file_sync_thread stopped") + + td1 = Thread(target=mkdir_rmdir_thread, args=(self.mount_a, dir_path,)) + td2 = Thread(target=filesystem_sync_thread, args=(self.mount_a, dir_path,)) + td3 = Thread(target=file_sync_thread, args=(self.mount_a, dir_path,)) + + td1.start() + td2.start() + td3.start() + time.sleep(1200) # run 20 minutes + stop_thread = True + td1.join() + td2.join() + td3.join() + self.mount_a.run_shell(["rm", "-rf", dir_path]) + + def test_dump_inmemory_log_on_client_eviction(self): + """ + That the in-memory logs are dumped during a client eviction event. + """ + self.fs.mds_asok(['config', 'set', 'debug_mds', '1/10']) + self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1']) + mount_a_client_id = self.mount_a.get_global_id() + infos = self.fs.status().get_ranks(self.fs.id) + + #evict the client + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id]) + time.sleep(10) #wait for 10 seconds for the logs dumping to complete. + + #The client is evicted, so unmount it. + try: + self.mount_a.umount_wait(require_clean=True, timeout=30) + except: + pass #continue with grepping the log + + eviction_log = f"Evicting (\(and blocklisting\) )?client session {mount_a_client_id} \(.+:.+/.+\)" + search_range = "/^--- begin dump of recent events ---$/,/^--- end dump of recent events ---$/p" + for info in infos: + mds_id = info['name'] + try: + remote = self.fs.mon_manager.find_remote('mds', mds_id) + out = remote.run(args=["sed", + "-n", + "{0}".format(search_range), + f"/var/log/ceph/{self.mount_a.cluster_name}-mds.{mds_id}.log"], + stdout=StringIO(), timeout=30) + except: + continue #continue with the next info + if out.stdout and re.search(eviction_log, out.stdout.getvalue().strip()): + return + self.assertTrue(False, "Failed to dump in-memory logs during client eviction") + + def test_dump_inmemory_log_on_missed_beacon_ack_from_monitors(self): + """ + That the in-memory logs are dumped when the mds misses beacon ACKs from monitors. + """ + self.fs.mds_asok(['config', 'set', 'debug_mds', '1/10']) + self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1']) + try: + mons = json.loads(self.fs.mon_manager.raw_cluster_cmd('mon', 'dump', '-f', 'json'))['mons'] + except: + self.assertTrue(False, "Error fetching monitors") + + #Freeze all monitors + for mon in mons: + mon_name = mon['name'] + log.info(f'Sending STOP to mon {mon_name}') + self.fs.mon_manager.signal_mon(mon_name, 19) + + time.sleep(10) #wait for 10 seconds to get the in-memory logs dumped + + #Unfreeze all monitors + for mon in mons: + mon_name = mon['name'] + log.info(f'Sending CONT to mon {mon_name}') + self.fs.mon_manager.signal_mon(mon_name, 18) + + missed_beacon_ack_log = "missed beacon ack from the monitors" + search_range = "/^--- begin dump of recent events ---$/,/^--- end dump of recent events ---$/p" + for info in self.fs.status().get_ranks(self.fs.id): + mds_id = info['name'] + try: + remote = self.fs.mon_manager.find_remote('mds', mds_id) + out = remote.run(args=["sed", + "-n", + "{0}".format(search_range), + f"/var/log/ceph/{self.mount_a.cluster_name}-mds.{mds_id}.log"], + stdout=StringIO(), timeout=30) + except: + continue #continue with the next info + if out.stdout and (missed_beacon_ack_log in out.stdout.getvalue().strip()): + return + self.assertTrue(False, "Failed to dump in-memory logs during missed beacon ack") + + def test_dump_inmemory_log_on_missed_internal_heartbeats(self): + """ + That the in-memory logs are dumped when the mds misses internal heartbeats. + """ + self.fs.mds_asok(['config', 'set', 'debug_mds', '1/10']) + self.fs.mds_asok(['config', 'set', 'mds_heartbeat_grace', '1']) + self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1']) + try: + mons = json.loads(self.fs.mon_manager.raw_cluster_cmd('mon', 'dump', '-f', 'json'))['mons'] + except: + self.assertTrue(False, "Error fetching monitors") + + #Freeze all monitors + for mon in mons: + mon_name = mon['name'] + log.info(f'Sending STOP to mon {mon_name}') + self.fs.mon_manager.signal_mon(mon_name, 19) + + time.sleep(10) #wait for 10 seconds to get the in-memory logs dumped + + #Unfreeze all monitors + for mon in mons: + mon_name = mon['name'] + log.info(f'Sending CONT to mon {mon_name}') + self.fs.mon_manager.signal_mon(mon_name, 18) + + missed_internal_heartbeat_log = \ + "Skipping beacon heartbeat to monitors \(last acked .+s ago\); MDS internal heartbeat is not healthy!" + search_range = "/^--- begin dump of recent events ---$/,/^--- end dump of recent events ---$/p" + for info in self.fs.status().get_ranks(self.fs.id): + mds_id = info['name'] + try: + remote = self.fs.mon_manager.find_remote('mds', mds_id) + out = remote.run(args=["sed", + "-n", + "{0}".format(search_range), + f"/var/log/ceph/{self.mount_a.cluster_name}-mds.{mds_id}.log"], + stdout=StringIO(), timeout=30) + except: + continue #continue with the next info + if out.stdout and re.search(missed_internal_heartbeat_log, out.stdout.getvalue().strip()): + return + self.assertTrue(False, "Failed to dump in-memory logs during missed internal heartbeat") + + def _session_client_ls(self, cmd): + mount_a_client_id = self.mount_a.get_global_id() + info = self.fs.rank_asok(cmd) + mount_a_mountpoint = self.mount_a.mountpoint + mount_b_mountpoint = self.mount_b.mountpoint + self.assertIsNotNone(info) + for i in range(0, len(info)): + self.assertIn(info[i]["client_metadata"]["mount_point"], + [mount_a_mountpoint, mount_b_mountpoint]) + info = self.fs.rank_asok(cmd + [f"id={mount_a_client_id}"]) + self.assertEqual(len(info), 1) + self.assertEqual(info[0]["id"], mount_a_client_id) + self.assertEqual(info[0]["client_metadata"]["mount_point"], mount_a_mountpoint) + info = self.fs.rank_asok(cmd + ['--cap_dump']) + for i in range(0, len(info)): + self.assertIn("caps", info[i]) + + def test_session_ls(self): + self._session_client_ls(['session', 'ls']) + + def test_client_ls(self): + self._session_client_ls(['client', 'ls']) + +class TestCacheDrop(CephFSTestCase): + CLIENTS_REQUIRED = 1 + + def _run_drop_cache_cmd(self, timeout=None): + result = None + args = ["cache", "drop"] + if timeout is not None: + args.append(str(timeout)) + result = self.fs.rank_tell(args) + return result + + def _setup(self, max_caps=20, threshold=400): + # create some files + self.mount_a.create_n_files("dc-dir/dc-file", 1000, sync=True) + + # Reduce this so the MDS doesn't rkcall the maximum for simple tests + self.fs.rank_asok(['config', 'set', 'mds_recall_max_caps', str(max_caps)]) + self.fs.rank_asok(['config', 'set', 'mds_recall_max_decay_threshold', str(threshold)]) + + def test_drop_cache_command(self): + """ + Basic test for checking drop cache command. + Confirm it halts without a timeout. + Note that the cache size post trimming is not checked here. + """ + mds_min_caps_per_client = int(self.fs.get_config("mds_min_caps_per_client")) + self._setup() + result = self._run_drop_cache_cmd() + self.assertEqual(result['client_recall']['return_code'], 0) + self.assertEqual(result['flush_journal']['return_code'], 0) + # It should take at least 1 second + self.assertGreater(result['duration'], 1) + self.assertGreaterEqual(result['trim_cache']['trimmed'], 1000-2*mds_min_caps_per_client) + + def test_drop_cache_command_timeout(self): + """ + Basic test for checking drop cache command. + Confirm recall halts early via a timeout. + Note that the cache size post trimming is not checked here. + """ + self._setup() + result = self._run_drop_cache_cmd(timeout=10) + self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT) + self.assertEqual(result['flush_journal']['return_code'], 0) + self.assertGreater(result['duration'], 10) + self.assertGreaterEqual(result['trim_cache']['trimmed'], 100) # we did something, right? + + def test_drop_cache_command_dead_timeout(self): + """ + Check drop cache command with non-responding client using tell + interface. Note that the cache size post trimming is not checked + here. + """ + self._setup() + self.mount_a.suspend_netns() + # Note: recall is subject to the timeout. The journal flush will + # be delayed due to the client being dead. + result = self._run_drop_cache_cmd(timeout=5) + self.assertEqual(result['client_recall']['return_code'], -errno.ETIMEDOUT) + self.assertEqual(result['flush_journal']['return_code'], 0) + self.assertGreater(result['duration'], 5) + self.assertLess(result['duration'], 120) + # Note: result['trim_cache']['trimmed'] may be >0 because dropping the + # cache now causes the Locker to drive eviction of stale clients (a + # stale session will be autoclosed at mdsmap['session_timeout']). The + # particular operation causing this is journal flush which causes the + # MDS to wait wait for cap revoke. + #self.assertEqual(0, result['trim_cache']['trimmed']) + self.mount_a.resume_netns() + + def test_drop_cache_command_dead(self): + """ + Check drop cache command with non-responding client using tell + interface. Note that the cache size post trimming is not checked + here. + """ + self._setup() + self.mount_a.suspend_netns() + result = self._run_drop_cache_cmd() + self.assertEqual(result['client_recall']['return_code'], 0) + self.assertEqual(result['flush_journal']['return_code'], 0) + self.assertGreater(result['duration'], 5) + self.assertLess(result['duration'], 120) + # Note: result['trim_cache']['trimmed'] may be >0 because dropping the + # cache now causes the Locker to drive eviction of stale clients (a + # stale session will be autoclosed at mdsmap['session_timeout']). The + # particular operation causing this is journal flush which causes the + # MDS to wait wait for cap revoke. + self.mount_a.resume_netns() + +class TestSkipReplayInoTable(CephFSTestCase): + MDSS_REQUIRED = 1 + CLIENTS_REQUIRED = 1 + + def test_alloc_cinode_assert(self): + """ + Test alloc CInode assert. + + See: https://tracker.ceph.com/issues/52280 + """ + + # Create a directory and the mds will journal this and then crash + self.mount_a.run_shell(["rm", "-rf", "test_alloc_ino"]) + self.mount_a.run_shell(["mkdir", "test_alloc_ino"]) + + status = self.fs.status() + rank0 = self.fs.get_rank(rank=0, status=status) + + self.fs.mds_asok(['config', 'set', 'mds_kill_skip_replaying_inotable', "true"]) + # This will make the MDS crash, since we only have one MDS in the + # cluster and without the "wait=False" it will stuck here forever. + self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir1"], wait=False) + + # sleep 10 seconds to make sure the journal logs are flushed and + # the mds crashes + time.sleep(10) + + # Now set the mds config to skip replaying the inotable + self.fs.set_ceph_conf('mds', 'mds_inject_skip_replaying_inotable', True) + self.fs.set_ceph_conf('mds', 'mds_wipe_sessions', True) + + self.fs.mds_restart() + # sleep 5 seconds to make sure the mds tell command won't stuck + time.sleep(5) + self.fs.wait_for_daemons() + + self.delete_mds_coredump(rank0['name']); + + self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir2"]) + + ls_out = set(self.mount_a.ls("test_alloc_ino/")) + self.assertEqual(ls_out, set({"dir1", "dir2"})) diff --git a/qa/tasks/cephfs/test_multifs_auth.py b/qa/tasks/cephfs/test_multifs_auth.py new file mode 100644 index 000000000..c9ea5f528 --- /dev/null +++ b/qa/tasks/cephfs/test_multifs_auth.py @@ -0,0 +1,297 @@ +""" +Test for Ceph clusters with multiple FSs. +""" +import logging + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.cephfs.caps_helper import CapTester + +from teuthology.exceptions import CommandFailedError + + +log = logging.getLogger(__name__) + + +class TestMultiFS(CephFSTestCase): + client_id = 'testuser' + client_name = 'client.' + client_id + # one dedicated for each FS + MDSS_REQUIRED = 2 + CLIENTS_REQUIRED = 2 + + def setUp(self): + super(TestMultiFS, self).setUp() + + self.captester = CapTester() + + # we might have it - the client - if the same cluster was used for a + # different vstart_runner.py run. + self.run_cluster_cmd(f'auth rm {self.client_name}') + + self.fs1 = self.fs + self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True) + + # we'll reassign caps to client.1 so that it can operate with cephfs2 + self.run_cluster_cmd(f'auth caps client.{self.mount_b.client_id} mon ' + f'"allow r" osd "allow rw ' + f'pool={self.fs2.get_data_pool_name()}" mds allow') + self.mount_b.remount(cephfs_name=self.fs2.name) + + +class TestMONCaps(TestMultiFS): + + def test_moncap_with_one_fs_names(self): + moncap = f'allow r fsname={self.fs1.name}' + self.create_client(self.client_id, moncap) + + self.captester.run_mon_cap_tests(self.fs1, self.client_id) + + def test_moncap_with_multiple_fs_names(self): + moncap = (f'allow r fsname={self.fs1.name}, ' + f'allow r fsname={self.fs2.name}') + self.create_client(self.client_id, moncap) + + self.captester.run_mon_cap_tests(self.fs1, self.client_id) + + def test_moncap_with_blanket_allow(self): + moncap = 'allow r' + self.create_client(self.client_id, moncap) + + self.captester.run_mon_cap_tests(self.fs1, self.client_id) + + +#TODO: add tests for capsecs 'p' and 's'. +class TestMDSCaps(TestMultiFS): + """ + 0. Have 2 FSs on Ceph cluster. + 1. Create new files on both FSs. + 2. Create a new client that has authorization for both FSs. + 3. Remount the current mounts with this new client. + 4. Test read and write on both FSs. + """ + def setUp(self): + super(self.__class__, self).setUp() + self.mounts = (self.mount_a, self.mount_b) + + def test_rw_with_fsname_and_no_path_in_cap(self): + PERM = 'rw' + self.captester.write_test_files(self.mounts) + keyring_paths = self._create_client(PERM, fsname=True) + self.remount_with_new_client(keyring_paths) + + self.captester.run_mds_cap_tests(PERM) + + def test_r_with_fsname_and_no_path_in_cap(self): + PERM = 'r' + self.captester.write_test_files(self.mounts) + keyring_paths = self._create_client(PERM, fsname=True) + self.remount_with_new_client(keyring_paths) + + self.captester.run_mds_cap_tests(PERM) + + def test_rw_with_fsname_and_path_in_cap(self): + PERM, CEPHFS_MNTPT = 'rw', 'dir1' + self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}') + self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}') + self.captester.write_test_files(self.mounts, CEPHFS_MNTPT) + keyring_paths = self._create_client(PERM, fsname=True) + self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT) + + self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT) + + def test_r_with_fsname_and_path_in_cap(self): + PERM, CEPHFS_MNTPT = 'r', 'dir1' + self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}') + self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}') + self.captester.write_test_files(self.mounts, CEPHFS_MNTPT) + keyring_paths = self._create_client(PERM, fsname=True) + self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT) + + self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT) + + # XXX: this tests the backward compatibility; "allow rw path=<dir1>" is + # treated as "allow rw fsname=* path=<dir1>" + def test_rw_with_no_fsname_and_path_in_cap(self): + PERM, CEPHFS_MNTPT = 'rw', 'dir1' + self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}') + self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}') + self.captester.write_test_files(self.mounts, CEPHFS_MNTPT) + keyring_paths = self._create_client(PERM) + self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT) + + self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT) + + # XXX: this tests the backward compatibility; "allow r path=<dir1>" is + # treated as "allow r fsname=* path=<dir1>" + def test_r_with_no_fsname_and_path_in_cap(self): + PERM, CEPHFS_MNTPT = 'r', 'dir1' + self.mount_a.run_shell(f'mkdir {CEPHFS_MNTPT}') + self.mount_b.run_shell(f'mkdir {CEPHFS_MNTPT}') + self.captester.write_test_files(self.mounts, CEPHFS_MNTPT) + keyring_paths = self._create_client(PERM) + self.remount_with_new_client(keyring_paths, CEPHFS_MNTPT) + + self.captester.run_mds_cap_tests(PERM, CEPHFS_MNTPT) + + def test_rw_with_no_fsname_and_no_path(self): + PERM = 'rw' + self.captester.write_test_files(self.mounts) + keyring_paths = self._create_client(PERM) + self.remount_with_new_client(keyring_paths) + + self.captester.run_mds_cap_tests(PERM) + + def test_r_with_no_fsname_and_no_path(self): + PERM = 'r' + self.captester.write_test_files(self.mounts) + keyring_paths = self._create_client(PERM) + self.remount_with_new_client(keyring_paths) + + self.captester.run_mds_cap_tests(PERM) + + def tearDown(self): + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + super(type(self), self).tearDown() + + def generate_caps(self, perm, fsname, cephfs_mntpt): + moncap = 'allow r' + osdcap = (f'allow {perm} tag cephfs data={self.fs1.name}, ' + f'allow {perm} tag cephfs data={self.fs2.name}') + + if fsname: + if cephfs_mntpt == '/': + mdscap = (f'allow {perm} fsname={self.fs1.name}, ' + f'allow {perm} fsname={self.fs2.name}') + else: + mdscap = (f'allow {perm} fsname={self.fs1.name} ' + f'path=/{cephfs_mntpt}, ' + f'allow {perm} fsname={self.fs2.name} ' + f'path=/{cephfs_mntpt}') + else: + if cephfs_mntpt == '/': + mdscap = f'allow {perm}' + else: + mdscap = f'allow {perm} path=/{cephfs_mntpt}' + + return moncap, osdcap, mdscap + + def _create_client(self, perm, fsname=False, cephfs_mntpt='/'): + moncap, osdcap, mdscap = self.generate_caps(perm, fsname, + cephfs_mntpt) + + keyring = self.create_client(self.client_id, moncap, osdcap, mdscap) + keyring_paths = [] + for mount_x in self.mounts: + keyring_paths.append(mount_x.client_remote.mktemp(data=keyring)) + + return keyring_paths + + def remount_with_new_client(self, keyring_paths, cephfs_mntpt='/'): + if isinstance(cephfs_mntpt, str) and cephfs_mntpt != '/' : + cephfs_mntpt = '/' + cephfs_mntpt + + self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_paths[0], + client_remote=self.mount_a.client_remote, + cephfs_name=self.fs1.name, + cephfs_mntpt=cephfs_mntpt, + hostfs_mntpt=self.mount_a.hostfs_mntpt, + wait=True) + self.mount_b.remount(client_id=self.client_id, + client_keyring_path=keyring_paths[1], + client_remote=self.mount_b.client_remote, + cephfs_name=self.fs2.name, + cephfs_mntpt=cephfs_mntpt, + hostfs_mntpt=self.mount_b.hostfs_mntpt, + wait=True) + + +class TestClientsWithoutAuth(TestMultiFS): + + def setUp(self): + super(TestClientsWithoutAuth, self).setUp() + + # TODO: When MON and OSD caps for a Ceph FS are assigned to a + # client but MDS caps are not, mount.ceph prints "permission + # denied". But when MON caps are not assigned and MDS and OSD + # caps are, mount.ceph prints "no mds server or cluster laggy" + # instead of "permission denied". + # + # Before uncommenting the following line a fix would be required + # for latter case to change "no mds server is up or the cluster is + # laggy" to "permission denied". + self.kernel_errmsgs = ('permission denied', 'no mds server is up or ' + 'the cluster is laggy', 'no such file or ' + 'directory', + 'input/output error') + + # TODO: When MON and OSD caps are assigned for a Ceph FS to a + # client but MDS caps are not, ceph-fuse prints "operation not + # permitted". But when MON caps are not assigned and MDS and OSD + # caps are, ceph-fuse prints "no such file or directory" instead + # of "operation not permitted". + # + # Before uncommenting the following line a fix would be required + # for the latter case to change "no such file or directory" to + # "operation not permitted". + #self.assertIn('operation not permitted', retval[2].lower()) + self.fuse_errmsgs = ('operation not permitted', 'no such file or ' + 'directory') + + if 'kernel' in str(type(self.mount_a)).lower(): + self.errmsgs = self.kernel_errmsgs + elif 'fuse' in str(type(self.mount_a)).lower(): + self.errmsgs = self.fuse_errmsgs + else: + raise RuntimeError('strange, the client was neither based on ' + 'kernel nor FUSE.') + + def check_that_mount_failed_for_right_reason(self, stderr): + stderr = stderr.lower() + for errmsg in self.errmsgs: + if errmsg in stderr: + break + else: + raise AssertionError('can\'t find expected set of words in the ' + f'stderr\nself.errmsgs - {self.errmsgs}\n' + f'stderr - {stderr}') + + def test_mount_all_caps_absent(self): + # setup part... + keyring = self.fs1.authorize(self.client_id, ('/', 'rw')) + keyring_path = self.mount_a.client_remote.mktemp(data=keyring) + + # mount the FS for which client has no auth... + retval = self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_name=self.fs2.name, + check_status=False) + + # tests... + self.assertIsInstance(retval, tuple) + self.assertEqual(len(retval), 3) + self.assertIsInstance(retval[0], CommandFailedError) + self.check_that_mount_failed_for_right_reason(retval[2]) + + def test_mount_mon_and_osd_caps_present_mds_caps_absent(self): + # setup part... + moncap = f'allow rw fsname={self.fs1.name}, allow rw fsname={self.fs2.name}' + mdscap = f'allow rw fsname={self.fs1.name}' + osdcap = (f'allow rw tag cephfs data={self.fs1.name}, allow rw tag ' + f'cephfs data={self.fs2.name}') + keyring = self.create_client(self.client_id, moncap, osdcap, mdscap) + keyring_path = self.mount_a.client_remote.mktemp(data=keyring) + + # mount the FS for which client has no auth... + retval = self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_name=self.fs2.name, + check_status=False) + + # tests... + self.assertIsInstance(retval, tuple) + self.assertEqual(len(retval), 3) + self.assertIsInstance(retval[0], CommandFailedError) + self.check_that_mount_failed_for_right_reason(retval[2]) diff --git a/qa/tasks/cephfs/test_multimds_misc.py b/qa/tasks/cephfs/test_multimds_misc.py new file mode 100644 index 000000000..2bb6257c7 --- /dev/null +++ b/qa/tasks/cephfs/test_multimds_misc.py @@ -0,0 +1,223 @@ +import logging +import errno +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.contextutil import safe_while +from teuthology.exceptions import CommandFailedError + +log = logging.getLogger(__name__) + +class TestScrub2(CephFSTestCase): + MDSS_REQUIRED = 3 + CLIENTS_REQUIRED = 1 + + def _check_scrub_status(self, result=None, reverse=False): + self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=1, + sleep=5, timeout=30, + reverse=reverse), True) + self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=2, + sleep=5, timeout=30, + reverse=reverse), True) + self.assertEqual(self.fs.wait_until_scrub_complete(result=result, rank=0, + sleep=5, timeout=30, + reverse=reverse), True) + + def _check_task_status_na(self, timo=120): + """ check absence of scrub status in ceph status """ + with safe_while(sleep=1, tries=120, action='wait for task status') as proceed: + while proceed(): + active = self.fs.get_active_names() + log.debug("current active={0}".format(active)) + task_status = self.fs.get_task_status("scrub status") + if not active[0] in task_status: + return True + + def _check_task_status(self, expected_status, timo=120): + """ check scrub status for current active mds in ceph status """ + with safe_while(sleep=1, tries=120, action='wait for task status') as proceed: + while proceed(): + active = self.fs.get_active_names() + log.debug("current active={0}".format(active)) + task_status = self.fs.get_task_status("scrub status") + try: + if task_status[active[0]].startswith(expected_status): + return True + except KeyError: + pass + + def _find_path_inos(self, root_path): + inos = [] + p = self.mount_a.run_shell(["find", root_path]) + paths = p.stdout.getvalue().strip().split() + for path in paths: + inos.append(self.mount_a.path_to_ino(path)) + return inos + + def _setup_subtrees(self): + self.fs.set_max_mds(3) + self.fs.wait_for_daemons() + status = self.fs.status() + + path = 'd1/d2/d3/d4/d5/d6/d7/d8' + self.mount_a.run_shell(['mkdir', '-p', path]) + self.mount_a.run_shell(['sync', path]) + + self.mount_a.setfattr("d1/d2", "ceph.dir.pin", "0") + self.mount_a.setfattr("d1/d2/d3/d4", "ceph.dir.pin", "1") + self.mount_a.setfattr("d1/d2/d3/d4/d5/d6", "ceph.dir.pin", "2") + + self._wait_subtrees([('/d1/d2', 0), ('/d1/d2/d3/d4', 1)], status, 0) + self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 1) + self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 2) + + for rank in range(3): + self.fs.rank_tell(["flush", "journal"], rank) + + def test_apply_tag(self): + self._setup_subtrees() + inos = self._find_path_inos('d1/d2/d3/') + + tag = "tag123" + out_json = self.fs.rank_tell(["tag", "path", "/d1/d2/d3", tag], 0) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + def assertTagged(ino): + file_obj_name = "{0:x}.00000000".format(ino) + self.fs.radosm(["getxattr", file_obj_name, "scrub_tag"]) + + for ino in inos: + assertTagged(ino) + + def test_scrub_backtrace(self): + self._setup_subtrees() + inos = self._find_path_inos('d1/d2/d3/') + + for ino in inos: + file_obj_name = "{0:x}.00000000".format(ino) + self.fs.radosm(["rmxattr", file_obj_name, "parent"]) + + out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + def _check_damage(mds_rank, inos): + all_damage = self.fs.rank_tell(["damage", "ls"], mds_rank) + damage = [d for d in all_damage if d['ino'] in inos and d['damage_type'] == "backtrace"] + return len(damage) >= len(inos) + + self.assertTrue(_check_damage(0, inos[0:2])) + self.assertTrue(_check_damage(1, inos[2:4])) + self.assertTrue(_check_damage(2, inos[4:6])) + + def test_scrub_non_mds0(self): + self._setup_subtrees() + + def expect_exdev(cmd, mds): + try: + self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(mds), *cmd) + except CommandFailedError as e: + if e.exitstatus == errno.EXDEV: + pass + else: + raise + else: + raise RuntimeError("expected failure") + + rank1 = self.fs.get_rank(rank=1) + expect_exdev(["scrub", "start", "/d1/d2/d3"], rank1["name"]) + expect_exdev(["scrub", "abort"], rank1["name"]) + expect_exdev(["scrub", "pause"], rank1["name"]) + expect_exdev(["scrub", "resume"], rank1["name"]) + + def test_scrub_abort_mds0(self): + self._setup_subtrees() + + inos = self._find_path_inos('d1/d2/d3/') + + for ino in inos: + file_obj_name = "{0:x}.00000000".format(ino) + self.fs.radosm(["rmxattr", file_obj_name, "parent"]) + + out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) + self.assertNotEqual(out_json, None) + + res = self.fs.run_scrub(["abort"]) + self.assertEqual(res['return_code'], 0) + + # Abort and verify in both mdss. We also check the status in rank 0 mds because + # it is supposed to gather the scrub status from other mdss. + self._check_scrub_status() + + # sleep enough to fetch updated task status + checked = self._check_task_status_na() + self.assertTrue(checked) + + def test_scrub_pause_and_resume_mds0(self): + self._setup_subtrees() + + inos = self._find_path_inos('d1/d2/d3/') + + for ino in inos: + file_obj_name = "{0:x}.00000000".format(ino) + self.fs.radosm(["rmxattr", file_obj_name, "parent"]) + + out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) + self.assertNotEqual(out_json, None) + + res = self.fs.run_scrub(["pause"]) + self.assertEqual(res['return_code'], 0) + + self._check_scrub_status(result="PAUSED") + + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # resume and verify + res = self.fs.run_scrub(["resume"]) + self.assertEqual(res['return_code'], 0) + + self._check_scrub_status(result="PAUSED", reverse=True) + + checked = self._check_task_status_na() + self.assertTrue(checked) + + def test_scrub_pause_and_resume_with_abort_mds0(self): + self._setup_subtrees() + + inos = self._find_path_inos('d1/d2/d3/') + + for ino in inos: + file_obj_name = "{0:x}.00000000".format(ino) + self.fs.radosm(["rmxattr", file_obj_name, "parent"]) + + out_json = self.fs.run_scrub(["start", "/d1/d2/d3", "recursive,force"], 0) + self.assertNotEqual(out_json, None) + + res = self.fs.run_scrub(["pause"]) + self.assertEqual(res['return_code'], 0) + + self._check_scrub_status(result="PAUSED") + + checked = self._check_task_status("paused") + self.assertTrue(checked) + + res = self.fs.run_scrub(["abort"]) + self.assertEqual(res['return_code'], 0) + + self._check_scrub_status(result="PAUSED") + self._check_scrub_status(result="0 inodes") + + # scrub status should still be paused... + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # resume and verify + res = self.fs.run_scrub(["resume"]) + self.assertEqual(res['return_code'], 0) + + self._check_scrub_status(result="PAUSED", reverse=True) + + checked = self._check_task_status_na() + self.assertTrue(checked) diff --git a/qa/tasks/cephfs/test_newops.py b/qa/tasks/cephfs/test_newops.py new file mode 100644 index 000000000..0071cb5d3 --- /dev/null +++ b/qa/tasks/cephfs/test_newops.py @@ -0,0 +1,18 @@ +import logging +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + +class TestNewOps(CephFSTestCase): + def test_newops_getvxattr(self): + """ + For nautilus it will crash the MDSs when receive unknown OPs, as a workaround + the clients should avoid sending them to nautilus + """ + + log.info("Test for new getvxattr op...") + self.mount_a.run_shell(["mkdir", "newop_getvxattr_dir"]) + + # to test whether will nautilus crash the MDSs + self.mount_a.getfattr("./newop_getvxattr_dir", "ceph.dir.pin.random") + log.info("Test for new getvxattr op succeeds") diff --git a/qa/tasks/cephfs/test_nfs.py b/qa/tasks/cephfs/test_nfs.py new file mode 100644 index 000000000..0a10709e6 --- /dev/null +++ b/qa/tasks/cephfs/test_nfs.py @@ -0,0 +1,880 @@ +# NOTE: these tests are not yet compatible with vstart_runner.py. +import errno +import json +import time +import logging +from io import BytesIO, StringIO + +from tasks.mgr.mgr_test_case import MgrTestCase +from teuthology import contextutil +from teuthology.exceptions import CommandFailedError + +log = logging.getLogger(__name__) + +NFS_POOL_NAME = '.nfs' # should match mgr_module.py + +# TODO Add test for cluster update when ganesha can be deployed on multiple ports. +class TestNFS(MgrTestCase): + def _cmd(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args) + + def _nfs_cmd(self, *args): + return self._cmd("nfs", *args) + + def _nfs_complete_cmd(self, cmd): + return self.mgr_cluster.mon_manager.run_cluster_cmd(args=f"nfs {cmd}", + stdout=StringIO(), + stderr=StringIO(), + check_status=False) + + def _orch_cmd(self, *args): + return self._cmd("orch", *args) + + def _sys_cmd(self, cmd): + ret = self.ctx.cluster.run(args=cmd, check_status=False, stdout=BytesIO(), stderr=BytesIO()) + stdout = ret[0].stdout + if stdout: + return stdout.getvalue() + + def setUp(self): + super(TestNFS, self).setUp() + self._load_module('nfs') + self.cluster_id = "test" + self.export_type = "cephfs" + self.pseudo_path = "/cephfs" + self.path = "/" + self.fs_name = "nfs-cephfs" + self.expected_name = "nfs.test" + self.sample_export = { + "export_id": 1, + "path": self.path, + "cluster_id": self.cluster_id, + "pseudo": self.pseudo_path, + "access_type": "RW", + "squash": "none", + "security_label": True, + "protocols": [ + 4 + ], + "transports": [ + "TCP" + ], + "fsal": { + "name": "CEPH", + "user_id": "nfs.test.1", + "fs_name": self.fs_name, + }, + "clients": [] + } + + def _check_nfs_server_status(self): + res = self._sys_cmd(['sudo', 'systemctl', 'status', 'nfs-server']) + if isinstance(res, bytes) and b'Active: active' in res: + self._disable_nfs() + + def _disable_nfs(self): + log.info("Disabling NFS") + self._sys_cmd(['sudo', 'systemctl', 'disable', 'nfs-server', '--now']) + + def _fetch_nfs_daemons_details(self, enable_json=False): + args = ('ps', f'--service_name={self.expected_name}') + if enable_json: + args = (*args, '--format=json') + return self._orch_cmd(*args) + + def _check_nfs_cluster_event(self, expected_event): + ''' + Check whether an event occured during the lifetime of the NFS service + :param expected_event: event that was expected to occur + ''' + event_occurred = False + # Wait few seconds for NFS daemons' status to be updated + with contextutil.safe_while(sleep=10, tries=18, _raise=False) as proceed: + while not event_occurred and proceed(): + daemons_details = json.loads( + self._fetch_nfs_daemons_details(enable_json=True)) + log.info('daemons details %s', daemons_details) + # 'events' key may not exist in the daemon description + # after a mgr fail over and could take some time to appear + # (it's populated on first daemon event) + if 'events' not in daemons_details[0]: + continue + for event in daemons_details[0]['events']: + log.info('daemon event %s', event) + if expected_event in event: + event_occurred = True + break + return event_occurred + + def _check_nfs_cluster_status(self, expected_status, fail_msg): + ''' + Check the current status of the NFS service + :param expected_status: Status to be verified + :param fail_msg: Message to be printed if test failed + ''' + # Wait for a minute as ganesha daemon takes some time to be + # deleted/created + with contextutil.safe_while(sleep=6, tries=10, _raise=False) as proceed: + while proceed(): + if expected_status in self._fetch_nfs_daemons_details(): + return + self.fail(fail_msg) + + def _check_auth_ls(self, export_id=1, check_in=False): + ''' + Tests export user id creation or deletion. + :param export_id: Denotes export number + :param check_in: Check specified export id + ''' + output = self._cmd('auth', 'ls') + client_id = f'client.nfs.{self.cluster_id}' + if check_in: + self.assertIn(f'{client_id}.{export_id}', output) + else: + self.assertNotIn(f'{client_id}.{export_id}', output) + + def _test_idempotency(self, cmd_func, cmd_args): + ''' + Test idempotency of commands. It first runs the TestNFS test method + for a command and then checks the result of command run again. TestNFS + test method has required checks to verify that command works. + :param cmd_func: TestNFS method + :param cmd_args: nfs command arguments to be run + ''' + cmd_func() + ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd_args) + if ret != 0: + self.fail("Idempotency test failed") + + def _test_create_cluster(self): + ''' + Test single nfs cluster deployment. + ''' + with contextutil.safe_while(sleep=4, tries=10) as proceed: + while proceed(): + try: + # Disable any running nfs ganesha daemon + self._check_nfs_server_status() + cluster_create = self._nfs_complete_cmd( + f'cluster create {self.cluster_id}') + if cluster_create.stderr and 'cluster already exists' \ + in cluster_create.stderr.getvalue(): + self._test_delete_cluster() + continue + # Check for expected status and daemon name + # (nfs.<cluster_id>) + self._check_nfs_cluster_status( + 'running', 'NFS Ganesha cluster deployment failed') + break + except (AssertionError, CommandFailedError) as e: + log.warning(f'{e}, retrying') + + def _test_delete_cluster(self): + ''' + Test deletion of a single nfs cluster. + ''' + self._nfs_cmd('cluster', 'rm', self.cluster_id) + self._check_nfs_cluster_status('No daemons reported', + 'NFS Ganesha cluster could not be deleted') + + def _test_list_cluster(self, empty=False): + ''' + Test listing of deployed nfs clusters. If nfs cluster is deployed then + it checks for expected cluster id. Otherwise checks nothing is listed. + :param empty: If true it denotes no cluster is deployed. + ''' + nfs_output = self._nfs_cmd('cluster', 'ls') + jdata = json.loads(nfs_output) + if empty: + self.assertEqual(len(jdata), 0) + else: + cluster_id = self.cluster_id + self.assertEqual([cluster_id], jdata) + + def _create_export(self, export_id, create_fs=False, extra_cmd=None): + ''' + Test creation of a single export. + :param export_id: Denotes export number + :param create_fs: If false filesytem exists. Otherwise create it. + :param extra_cmd: List of extra arguments for creating export. + ''' + if create_fs: + self._cmd('fs', 'volume', 'create', self.fs_name) + with contextutil.safe_while(sleep=5, tries=30) as proceed: + while proceed(): + output = self._cmd( + 'orch', 'ls', '-f', 'json', + '--service-name', f'mds.{self.fs_name}' + ) + j = json.loads(output) + if j[0]['status']['running']: + break + export_cmd = ['nfs', 'export', 'create', 'cephfs', + '--fsname', self.fs_name, '--cluster-id', self.cluster_id] + if isinstance(extra_cmd, list): + export_cmd.extend(extra_cmd) + else: + export_cmd.extend(['--pseudo-path', self.pseudo_path]) + # Runs the nfs export create command + self._cmd(*export_cmd) + # Check if user id for export is created + self._check_auth_ls(export_id, check_in=True) + res = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'get', + f'export-{export_id}', '-']) + # Check if export object is created + if res == b'': + self.fail("Export cannot be created") + + def _create_default_export(self): + ''' + Deploy a single nfs cluster and create export with default options. + ''' + self._test_create_cluster() + self._create_export(export_id='1', create_fs=True) + + def _delete_export(self): + ''' + Delete an export. + ''' + self._nfs_cmd('export', 'rm', self.cluster_id, self.pseudo_path) + self._check_auth_ls() + + def _test_list_export(self): + ''' + Test listing of created exports. + ''' + nfs_output = json.loads(self._nfs_cmd('export', 'ls', self.cluster_id)) + self.assertIn(self.pseudo_path, nfs_output) + + def _test_list_detailed(self, sub_vol_path): + ''' + Test listing of created exports with detailed option. + :param sub_vol_path: Denotes path of subvolume + ''' + nfs_output = json.loads(self._nfs_cmd('export', 'ls', self.cluster_id, '--detailed')) + # Export-1 with default values (access type = rw and path = '\') + self.assertDictEqual(self.sample_export, nfs_output[0]) + # Export-2 with r only + self.sample_export['export_id'] = 2 + self.sample_export['pseudo'] = self.pseudo_path + '1' + self.sample_export['access_type'] = 'RO' + self.sample_export['fsal']['user_id'] = f'{self.expected_name}.2' + self.assertDictEqual(self.sample_export, nfs_output[1]) + # Export-3 for subvolume with r only + self.sample_export['export_id'] = 3 + self.sample_export['path'] = sub_vol_path + self.sample_export['pseudo'] = self.pseudo_path + '2' + self.sample_export['fsal']['user_id'] = f'{self.expected_name}.3' + self.assertDictEqual(self.sample_export, nfs_output[2]) + # Export-4 for subvolume + self.sample_export['export_id'] = 4 + self.sample_export['pseudo'] = self.pseudo_path + '3' + self.sample_export['access_type'] = 'RW' + self.sample_export['fsal']['user_id'] = f'{self.expected_name}.4' + self.assertDictEqual(self.sample_export, nfs_output[3]) + + def _get_export(self): + ''' + Returns export block in json format + ''' + return json.loads(self._nfs_cmd('export', 'info', self.cluster_id, self.pseudo_path)) + + def _test_get_export(self): + ''' + Test fetching of created export. + ''' + nfs_output = self._get_export() + self.assertDictEqual(self.sample_export, nfs_output) + + def _check_export_obj_deleted(self, conf_obj=False): + ''' + Test if export or config object are deleted successfully. + :param conf_obj: It denotes config object needs to be checked + ''' + rados_obj_ls = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'ls']) + + if b'export-' in rados_obj_ls or (conf_obj and b'conf-nfs' in rados_obj_ls): + self.fail("Delete export failed") + + def _get_port_ip_info(self): + ''' + Return port and ip for a cluster + ''' + #{'test': {'backend': [{'hostname': 'smithi068', 'ip': '172.21.15.68', + #'port': 2049}]}} + with contextutil.safe_while(sleep=5, tries=6) as proceed: + while proceed(): + try: + info_output = json.loads( + self._nfs_cmd('cluster', 'info', + self.cluster_id))['test']['backend'][0] + return info_output["port"], info_output["ip"] + except (IndexError, CommandFailedError) as e: + if 'list index out of range' in str(e): + log.warning('no port and/or ip found, retrying') + else: + log.warning(f'{e}, retrying') + + def _test_mnt(self, pseudo_path, port, ip, check=True): + ''' + Test mounting of created exports + :param pseudo_path: It is the pseudo root name + :param port: Port of deployed nfs cluster + :param ip: IP of deployed nfs cluster + :param check: It denotes if i/o testing needs to be done + ''' + tries = 3 + while True: + try: + self.ctx.cluster.run( + args=['sudo', 'mount', '-t', 'nfs', '-o', f'port={port}', + f'{ip}:{pseudo_path}', '/mnt']) + break + except CommandFailedError as e: + if tries: + tries -= 1 + time.sleep(2) + continue + # Check if mount failed only when non existing pseudo path is passed + if not check and e.exitstatus == 32: + return + raise + + self.ctx.cluster.run(args=['sudo', 'chmod', '1777', '/mnt']) + + try: + self.ctx.cluster.run(args=['touch', '/mnt/test']) + out_mnt = self._sys_cmd(['ls', '/mnt']) + self.assertEqual(out_mnt, b'test\n') + finally: + self.ctx.cluster.run(args=['sudo', 'umount', '/mnt']) + + def _write_to_read_only_export(self, pseudo_path, port, ip): + ''' + Check if write to read only export fails + ''' + try: + self._test_mnt(pseudo_path, port, ip) + except CommandFailedError as e: + # Write to cephfs export should fail for test to pass + self.assertEqual( + e.exitstatus, errno.EPERM, + 'invalid error code on trying to write to read-only export') + else: + self.fail('expected write to a read-only export to fail') + + def _create_cluster_with_fs(self, fs_name, mnt_pt=None): + """ + create a cluster along with fs and mount it to the path supplied + :param fs_name: name of CephFS volume to be created + :param mnt_pt: mount fs to the path + """ + self._test_create_cluster() + self._cmd('fs', 'volume', 'create', fs_name) + with contextutil.safe_while(sleep=5, tries=30) as proceed: + while proceed(): + output = self._cmd( + 'orch', 'ls', '-f', 'json', + '--service-name', f'mds.{fs_name}' + ) + j = json.loads(output) + if j[0]['status']['running']: + break + if mnt_pt: + with contextutil.safe_while(sleep=3, tries=3) as proceed: + while proceed(): + try: + self.ctx.cluster.run(args=['sudo', 'ceph-fuse', mnt_pt]) + break + except CommandFailedError as e: + log.warning(f'{e}, retrying') + self.ctx.cluster.run(args=['sudo', 'chmod', '1777', mnt_pt]) + + def _delete_cluster_with_fs(self, fs_name, mnt_pt=None, mode=None): + """ + delete cluster along with fs and unmount it from the path supplied + :param fs_name: name of CephFS volume to be deleted + :param mnt_pt: unmount fs from the path + :param mode: revert to this mode + """ + if mnt_pt: + self.ctx.cluster.run(args=['sudo', 'umount', mnt_pt]) + if mode: + if isinstance(mode, bytes): + mode = mode.decode().strip() + self.ctx.cluster.run(args=['sudo', 'chmod', mode, mnt_pt]) + self._cmd('fs', 'volume', 'rm', fs_name, '--yes-i-really-mean-it') + self._test_delete_cluster() + + def test_create_and_delete_cluster(self): + ''' + Test successful creation and deletion of the nfs cluster. + ''' + self._test_create_cluster() + self._test_list_cluster() + self._test_delete_cluster() + # List clusters again to ensure no cluster is shown + self._test_list_cluster(empty=True) + + def test_create_delete_cluster_idempotency(self): + ''' + Test idempotency of cluster create and delete commands. + ''' + self._test_idempotency(self._test_create_cluster, ['nfs', 'cluster', 'create', self.cluster_id]) + self._test_idempotency(self._test_delete_cluster, ['nfs', 'cluster', 'rm', self.cluster_id]) + + def test_create_cluster_with_invalid_cluster_id(self): + ''' + Test nfs cluster deployment failure with invalid cluster id. + ''' + try: + invalid_cluster_id = '/cluster_test' # Only [A-Za-z0-9-_.] chars are valid + self._nfs_cmd('cluster', 'create', invalid_cluster_id) + self.fail(f"Cluster successfully created with invalid cluster id {invalid_cluster_id}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.EINVAL: + raise + + def test_create_and_delete_export(self): + ''' + Test successful creation and deletion of the cephfs export. + ''' + self._create_default_export() + self._test_get_export() + port, ip = self._get_port_ip_info() + self._test_mnt(self.pseudo_path, port, ip) + self._delete_export() + # Check if rados export object is deleted + self._check_export_obj_deleted() + self._test_mnt(self.pseudo_path, port, ip, False) + self._test_delete_cluster() + + def test_create_delete_export_idempotency(self): + ''' + Test idempotency of export create and delete commands. + ''' + self._test_idempotency(self._create_default_export, [ + 'nfs', 'export', 'create', 'cephfs', + '--fsname', self.fs_name, '--cluster-id', self.cluster_id, + '--pseudo-path', self.pseudo_path]) + self._test_idempotency(self._delete_export, ['nfs', 'export', 'rm', self.cluster_id, + self.pseudo_path]) + self._test_delete_cluster() + + def test_create_multiple_exports(self): + ''' + Test creating multiple exports with different access type and path. + ''' + # Export-1 with default values (access type = rw and path = '\') + self._create_default_export() + # Export-2 with r only + self._create_export(export_id='2', + extra_cmd=['--pseudo-path', self.pseudo_path+'1', '--readonly']) + # Export-3 for subvolume with r only + self._cmd('fs', 'subvolume', 'create', self.fs_name, 'sub_vol') + fs_path = self._cmd('fs', 'subvolume', 'getpath', self.fs_name, 'sub_vol').strip() + self._create_export(export_id='3', + extra_cmd=['--pseudo-path', self.pseudo_path+'2', '--readonly', + '--path', fs_path]) + # Export-4 for subvolume + self._create_export(export_id='4', + extra_cmd=['--pseudo-path', self.pseudo_path+'3', + '--path', fs_path]) + # Check if exports gets listed + self._test_list_detailed(fs_path) + self._test_delete_cluster() + # Check if rados ganesha conf object is deleted + self._check_export_obj_deleted(conf_obj=True) + self._check_auth_ls() + + def test_exports_on_mgr_restart(self): + ''' + Test export availability on restarting mgr. + ''' + self._create_default_export() + # unload and load module will restart the mgr + self._unload_module("cephadm") + self._load_module("cephadm") + self._orch_cmd("set", "backend", "cephadm") + # Check if ganesha daemon is running + self._check_nfs_cluster_status('running', 'Failed to redeploy NFS Ganesha cluster') + # Checks if created export is listed + self._test_list_export() + port, ip = self._get_port_ip_info() + self._test_mnt(self.pseudo_path, port, ip) + self._delete_export() + self._test_delete_cluster() + + def test_export_create_with_non_existing_fsname(self): + ''' + Test creating export with non-existing filesystem. + ''' + try: + fs_name = 'nfs-test' + self._test_create_cluster() + self._nfs_cmd('export', 'create', 'cephfs', + '--fsname', fs_name, '--cluster-id', self.cluster_id, + '--pseudo-path', self.pseudo_path) + self.fail(f"Export created with non-existing filesystem {fs_name}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.ENOENT: + raise + finally: + self._test_delete_cluster() + + def test_export_create_with_non_existing_clusterid(self): + ''' + Test creating cephfs export with non-existing nfs cluster. + ''' + try: + cluster_id = 'invalidtest' + self._nfs_cmd('export', 'create', 'cephfs', '--fsname', self.fs_name, + '--cluster-id', cluster_id, '--pseudo-path', self.pseudo_path) + self.fail(f"Export created with non-existing cluster id {cluster_id}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.ENOENT: + raise + + def test_export_create_with_relative_pseudo_path_and_root_directory(self): + ''' + Test creating cephfs export with relative or '/' pseudo path. + ''' + def check_pseudo_path(pseudo_path): + try: + self._nfs_cmd('export', 'create', 'cephfs', '--fsname', self.fs_name, + '--cluster-id', self.cluster_id, + '--pseudo-path', pseudo_path) + self.fail(f"Export created for {pseudo_path}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.EINVAL: + raise + + self._test_create_cluster() + self._cmd('fs', 'volume', 'create', self.fs_name) + check_pseudo_path('invalidpath') + check_pseudo_path('/') + check_pseudo_path('//') + self._cmd('fs', 'volume', 'rm', self.fs_name, '--yes-i-really-mean-it') + self._test_delete_cluster() + + def test_write_to_read_only_export(self): + ''' + Test write to readonly export. + ''' + self._test_create_cluster() + self._create_export(export_id='1', create_fs=True, + extra_cmd=['--pseudo-path', self.pseudo_path, '--readonly']) + port, ip = self._get_port_ip_info() + self._check_nfs_cluster_status('running', 'NFS Ganesha cluster restart failed') + self._write_to_read_only_export(self.pseudo_path, port, ip) + self._test_delete_cluster() + + def test_cluster_info(self): + ''' + Test cluster info outputs correct ip and hostname + ''' + self._test_create_cluster() + info_output = json.loads(self._nfs_cmd('cluster', 'info', self.cluster_id)) + print(f'info {info_output}') + info_ip = info_output[self.cluster_id].get('backend', [])[0].pop("ip") + host_details = { + self.cluster_id: { + 'backend': [ + { + "hostname": self._sys_cmd(['hostname']).decode("utf-8").strip(), + "port": 2049 + } + ], + "virtual_ip": None, + } + } + host_ip = self._sys_cmd(['hostname', '-I']).decode("utf-8").split() + print(f'host_ip is {host_ip}, info_ip is {info_ip}') + self.assertDictEqual(info_output, host_details) + self.assertTrue(info_ip in host_ip) + self._test_delete_cluster() + + def test_cluster_set_reset_user_config(self): + ''' + Test cluster is created using user config and reverts back to default + config on reset. + ''' + self._test_create_cluster() + + pool = NFS_POOL_NAME + user_id = 'test' + fs_name = 'user_test_fs' + pseudo_path = '/ceph' + self._cmd('fs', 'volume', 'create', fs_name) + time.sleep(20) + key = self._cmd('auth', 'get-or-create-key', f'client.{user_id}', 'mon', + 'allow r', 'osd', + f'allow rw pool={pool} namespace={self.cluster_id}, allow rw tag cephfs data={fs_name}', + 'mds', f'allow rw path={self.path}').strip() + config = f""" LOG {{ + Default_log_level = FULL_DEBUG; + }} + + EXPORT {{ + Export_Id = 100; + Transports = TCP; + Path = /; + Pseudo = {pseudo_path}; + Protocols = 4; + Access_Type = RW; + Attr_Expiration_Time = 0; + Squash = None; + FSAL {{ + Name = CEPH; + Filesystem = {fs_name}; + User_Id = {user_id}; + Secret_Access_Key = '{key}'; + }} + }}""" + port, ip = self._get_port_ip_info() + self.ctx.cluster.run(args=['ceph', 'nfs', 'cluster', 'config', + 'set', self.cluster_id, '-i', '-'], stdin=config) + time.sleep(30) + res = self._sys_cmd(['rados', '-p', pool, '-N', self.cluster_id, 'get', + f'userconf-nfs.{user_id}', '-']) + self.assertEqual(config, res.decode('utf-8')) + self._test_mnt(pseudo_path, port, ip) + self._nfs_cmd('cluster', 'config', 'reset', self.cluster_id) + rados_obj_ls = self._sys_cmd(['rados', '-p', NFS_POOL_NAME, '-N', self.cluster_id, 'ls']) + if b'conf-nfs' not in rados_obj_ls and b'userconf-nfs' in rados_obj_ls: + self.fail("User config not deleted") + time.sleep(30) + self._test_mnt(pseudo_path, port, ip, False) + self._cmd('fs', 'volume', 'rm', fs_name, '--yes-i-really-mean-it') + self._test_delete_cluster() + + def test_cluster_set_user_config_with_non_existing_clusterid(self): + ''' + Test setting user config for non-existing nfs cluster. + ''' + cluster_id = 'invalidtest' + with contextutil.safe_while(sleep=3, tries=3) as proceed: + while proceed(): + try: + self.ctx.cluster.run(args=['ceph', 'nfs', 'cluster', + 'config', 'set', cluster_id, + '-i', '-'], stdin='testing') + self.fail(f"User config set for non-existing cluster" + f"{cluster_id}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus == errno.ENOENT: + break + log.warning('exitstatus != ENOENT, retrying') + + def test_cluster_reset_user_config_with_non_existing_clusterid(self): + ''' + Test resetting user config for non-existing nfs cluster. + ''' + try: + cluster_id = 'invalidtest' + self._nfs_cmd('cluster', 'config', 'reset', cluster_id) + self.fail(f"User config reset for non-existing cluster {cluster_id}") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.ENOENT: + raise + + def test_create_export_via_apply(self): + ''' + Test creation of export via apply + ''' + self._test_create_cluster() + self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply', + self.cluster_id, '-i', '-'], + stdin=json.dumps({ + "path": "/", + "pseudo": "/cephfs", + "squash": "none", + "access_type": "rw", + "protocols": [4], + "fsal": { + "name": "CEPH", + "fs_name": self.fs_name + } + })) + port, ip = self._get_port_ip_info() + self._test_mnt(self.pseudo_path, port, ip) + self._check_nfs_cluster_status( + 'running', 'NFS Ganesha cluster not running after new export was applied') + self._test_delete_cluster() + + def test_update_export(self): + ''' + Test update of export's pseudo path and access type from rw to ro + ''' + self._create_default_export() + port, ip = self._get_port_ip_info() + self._test_mnt(self.pseudo_path, port, ip) + export_block = self._get_export() + new_pseudo_path = '/testing' + export_block['pseudo'] = new_pseudo_path + export_block['access_type'] = 'RO' + self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply', + self.cluster_id, '-i', '-'], + stdin=json.dumps(export_block)) + if not self._check_nfs_cluster_event('restart'): + self.fail("updating export's pseudo path should trigger restart of NFS service") + self._check_nfs_cluster_status('running', 'NFS Ganesha cluster not running after restart') + self._write_to_read_only_export(new_pseudo_path, port, ip) + self._test_delete_cluster() + + def test_update_export_ro_to_rw(self): + ''' + Test update of export's access level from ro to rw + ''' + self._test_create_cluster() + self._create_export( + export_id='1', create_fs=True, + extra_cmd=['--pseudo-path', self.pseudo_path, '--readonly']) + port, ip = self._get_port_ip_info() + self._write_to_read_only_export(self.pseudo_path, port, ip) + export_block = self._get_export() + export_block['access_type'] = 'RW' + self.ctx.cluster.run( + args=['ceph', 'nfs', 'export', 'apply', self.cluster_id, '-i', '-'], + stdin=json.dumps(export_block)) + if self._check_nfs_cluster_event('restart'): + self.fail("update of export's access type should not trigger NFS service restart") + self._test_mnt(self.pseudo_path, port, ip) + self._test_delete_cluster() + + def test_update_export_with_invalid_values(self): + ''' + Test update of export with invalid values + ''' + self._create_default_export() + export_block = self._get_export() + + def update_with_invalid_values(key, value, fsal=False): + export_block_new = dict(export_block) + if fsal: + export_block_new['fsal'] = dict(export_block['fsal']) + export_block_new['fsal'][key] = value + else: + export_block_new[key] = value + try: + self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply', + self.cluster_id, '-i', '-'], + stdin=json.dumps(export_block_new)) + except CommandFailedError: + pass + + update_with_invalid_values('export_id', 9) + update_with_invalid_values('cluster_id', 'testing_new') + update_with_invalid_values('pseudo', 'test_relpath') + update_with_invalid_values('access_type', 'W') + update_with_invalid_values('squash', 'no_squash') + update_with_invalid_values('security_label', 'invalid') + update_with_invalid_values('protocols', [2]) + update_with_invalid_values('transports', ['UD']) + update_with_invalid_values('name', 'RGW', True) + update_with_invalid_values('user_id', 'testing_export', True) + update_with_invalid_values('fs_name', 'b', True) + self._test_delete_cluster() + + def test_cmds_without_reqd_args(self): + ''' + Test that cmd fails on not passing required arguments + ''' + def exec_cmd_invalid(*cmd): + try: + self._nfs_cmd(*cmd) + self.fail(f"nfs {cmd} command executed successfully without required arguments") + except CommandFailedError as e: + # Command should fail for test to pass + if e.exitstatus != errno.EINVAL: + raise + + exec_cmd_invalid('cluster', 'create') + exec_cmd_invalid('cluster', 'delete') + exec_cmd_invalid('cluster', 'config', 'set') + exec_cmd_invalid('cluster', 'config', 'reset') + exec_cmd_invalid('export', 'create', 'cephfs') + exec_cmd_invalid('export', 'create', 'cephfs', 'clusterid') + exec_cmd_invalid('export', 'create', 'cephfs', 'clusterid', 'a_fs') + exec_cmd_invalid('export', 'ls') + exec_cmd_invalid('export', 'delete') + exec_cmd_invalid('export', 'delete', 'clusterid') + exec_cmd_invalid('export', 'info') + exec_cmd_invalid('export', 'info', 'clusterid') + exec_cmd_invalid('export', 'apply') + + def test_non_existent_cluster(self): + """ + Test that cluster info doesn't throw junk data for non-existent cluster + """ + cluster_ls = self._nfs_cmd('cluster', 'ls') + self.assertNotIn('foo', cluster_ls, 'cluster foo exists') + try: + self._nfs_cmd('cluster', 'info', 'foo') + self.fail("nfs cluster info foo returned successfully for non-existent cluster") + except CommandFailedError as e: + if e.exitstatus != errno.ENOENT: + raise + + def test_nfs_export_with_invalid_path(self): + """ + Test that nfs exports can't be created with invalid path + """ + mnt_pt = '/mnt' + preserve_mode = self._sys_cmd(['stat', '-c', '%a', mnt_pt]) + self._create_cluster_with_fs(self.fs_name, mnt_pt) + try: + self._create_export(export_id='123', + extra_cmd=['--pseudo-path', self.pseudo_path, + '--path', '/non_existent_dir']) + except CommandFailedError as e: + if e.exitstatus != errno.ENOENT: + raise + self._delete_cluster_with_fs(self.fs_name, mnt_pt, preserve_mode) + + def test_nfs_export_creation_at_filepath(self): + """ + Test that nfs exports can't be created at a filepath + """ + mnt_pt = '/mnt' + preserve_mode = self._sys_cmd(['stat', '-c', '%a', mnt_pt]) + self._create_cluster_with_fs(self.fs_name, mnt_pt) + self.ctx.cluster.run(args=['touch', f'{mnt_pt}/testfile']) + try: + self._create_export(export_id='123', extra_cmd=['--pseudo-path', + self.pseudo_path, + '--path', + '/testfile']) + except CommandFailedError as e: + if e.exitstatus != errno.ENOTDIR: + raise + self.ctx.cluster.run(args=['rm', '-rf', '/mnt/testfile']) + self._delete_cluster_with_fs(self.fs_name, mnt_pt, preserve_mode) + + def test_nfs_export_creation_at_symlink(self): + """ + Test that nfs exports can't be created at a symlink path + """ + mnt_pt = '/mnt' + preserve_mode = self._sys_cmd(['stat', '-c', '%a', mnt_pt]) + self._create_cluster_with_fs(self.fs_name, mnt_pt) + self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir']) + self.ctx.cluster.run(args=['ln', '-s', f'{mnt_pt}/testdir', + f'{mnt_pt}/testdir_symlink']) + try: + self._create_export(export_id='123', + extra_cmd=['--pseudo-path', + self.pseudo_path, + '--path', + '/testdir_symlink']) + except CommandFailedError as e: + if e.exitstatus != errno.ENOTDIR: + raise + self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}/*']) + self._delete_cluster_with_fs(self.fs_name, mnt_pt, preserve_mode) diff --git a/qa/tasks/cephfs/test_openfiletable.py b/qa/tasks/cephfs/test_openfiletable.py new file mode 100644 index 000000000..eff6b5093 --- /dev/null +++ b/qa/tasks/cephfs/test_openfiletable.py @@ -0,0 +1,85 @@ +import time +import logging +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + +class OpenFileTable(CephFSTestCase): + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + + def _check_oft_counter(self, name, count): + perf_dump = self.fs.mds_asok(['perf', 'dump']) + if perf_dump['oft'][name] == count: + return True + return False + + def test_max_items_per_obj(self): + """ + The maximum number of openfiles omap objects keys are now equal to + osd_deep_scrub_large_omap_object_key_threshold option. + """ + self.set_conf("mds", "osd_deep_scrub_large_omap_object_key_threshold", "5") + + self.fs.mds_restart() + self.fs.wait_for_daemons() + + # Write some bytes to a file + size_mb = 1 + + # Hold the file open + file_count = 8 + for i in range(0, file_count): + filename = "open_file{}".format(i) + p = self.mount_a.open_background(filename) + self.mount_a.write_n_mb(filename, size_mb) + + time.sleep(10) + + """ + With osd_deep_scrub_large_omap_object_key_threshold value as 5 and + opening 8 files we should have a new rados object with name + mds0_openfiles.1 to hold the extra keys. + """ + + self.fs.radosm(["stat", "mds0_openfiles.1"]) + + # Now close the file + self.mount_a.kill_background(p) + + def test_perf_counters(self): + """ + Opening a file should increment omap_total_updates by 1. + """ + + self.set_conf("mds", "osd_deep_scrub_large_omap_object_key_threshold", "1") + self.fs.mds_restart() + self.fs.wait_for_daemons() + + perf_dump = self.fs.mds_asok(['perf', 'dump']) + omap_total_updates_0 = perf_dump['oft']['omap_total_updates'] + log.info("omap_total_updates_0:{}".format(omap_total_updates_0)) + + # Open the file + p = self.mount_a.open_background("omap_counter_test_file") + self.wait_until_true(lambda: self._check_oft_counter('omap_total_updates', 2), timeout=120) + + perf_dump = self.fs.mds_asok(['perf', 'dump']) + omap_total_updates_1 = perf_dump['oft']['omap_total_updates'] + log.info("omap_total_updates_1:{}".format(omap_total_updates_1)) + + self.assertTrue((omap_total_updates_1 - omap_total_updates_0) == 2) + + # Now close the file + self.mount_a.kill_background(p) + # Ensure that the file does not exist any more + self.wait_until_true(lambda: self._check_oft_counter('omap_total_removes', 1), timeout=120) + self.wait_until_true(lambda: self._check_oft_counter('omap_total_kv_pairs', 1), timeout=120) + + perf_dump = self.fs.mds_asok(['perf', 'dump']) + omap_total_removes = perf_dump['oft']['omap_total_removes'] + omap_total_kv_pairs = perf_dump['oft']['omap_total_kv_pairs'] + log.info("omap_total_removes:{}".format(omap_total_removes)) + log.info("omap_total_kv_pairs:{}".format(omap_total_kv_pairs)) + self.assertTrue(omap_total_removes == 1) + self.assertTrue(omap_total_kv_pairs == 1) diff --git a/qa/tasks/cephfs/test_pool_perm.py b/qa/tasks/cephfs/test_pool_perm.py new file mode 100644 index 000000000..9912debed --- /dev/null +++ b/qa/tasks/cephfs/test_pool_perm.py @@ -0,0 +1,109 @@ +from textwrap import dedent +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase +import os + + +class TestPoolPerm(CephFSTestCase): + def test_pool_perm(self): + self.mount_a.run_shell(["touch", "test_file"]) + + file_path = os.path.join(self.mount_a.mountpoint, "test_file") + + remote_script = dedent(""" + import os + import errno + + fd = os.open("{path}", os.O_RDWR) + try: + if {check_read}: + ret = os.read(fd, 1024) + else: + os.write(fd, b'content') + except OSError as e: + if e.errno != errno.EPERM: + raise + else: + raise RuntimeError("client does not check permission of data pool") + """) + + client_name = "client.{0}".format(self.mount_a.client_id) + + # set data pool read only + self.fs.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd', + 'allow r pool={0}'.format(self.fs.get_data_pool_name())) + + self.mount_a.umount_wait() + self.mount_a.mount_wait() + + # write should fail + self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(False))) + + # set data pool write only + self.fs.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd', + 'allow w pool={0}'.format(self.fs.get_data_pool_name())) + + self.mount_a.umount_wait() + self.mount_a.mount_wait() + + # read should fail + self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(True))) + + def test_forbidden_modification(self): + """ + That a client who does not have the capability for setting + layout pools is prevented from doing so. + """ + + # Set up + client_name = "client.{0}".format(self.mount_a.client_id) + new_pool_name = "data_new" + self.fs.add_data_pool(new_pool_name) + + self.mount_a.run_shell(["touch", "layoutfile"]) + self.mount_a.run_shell(["mkdir", "layoutdir"]) + + # Set MDS 'rw' perms: missing 'p' means no setting pool layouts + self.fs.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', client_name, 'mds', 'allow rw', 'mon', 'allow r', + 'osd', + 'allow rw pool={0},allow rw pool={1}'.format( + self.fs.get_data_pool_names()[0], + self.fs.get_data_pool_names()[1], + )) + + self.mount_a.umount_wait() + self.mount_a.mount_wait() + + with self.assertRaises(CommandFailedError): + self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool", + new_pool_name) + with self.assertRaises(CommandFailedError): + self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool", + new_pool_name) + self.mount_a.umount_wait() + + # Set MDS 'rwp' perms: should now be able to set layouts + self.fs.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', client_name, 'mds', 'allow rwp', 'mon', 'allow r', + 'osd', + 'allow rw pool={0},allow rw pool={1}'.format( + self.fs.get_data_pool_names()[0], + self.fs.get_data_pool_names()[1], + )) + self.mount_a.mount_wait() + self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool", + new_pool_name) + self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool", + new_pool_name) + self.mount_a.umount_wait() + + def tearDown(self): + self.fs.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(self.mount_a.client_id), + 'mds', 'allow', 'mon', 'allow r', 'osd', + 'allow rw pool={0}'.format(self.fs.get_data_pool_names()[0])) + super(TestPoolPerm, self).tearDown() + diff --git a/qa/tasks/cephfs/test_quota.py b/qa/tasks/cephfs/test_quota.py new file mode 100644 index 000000000..0386672bd --- /dev/null +++ b/qa/tasks/cephfs/test_quota.py @@ -0,0 +1,106 @@ + +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +from teuthology.exceptions import CommandFailedError + +class TestQuota(CephFSTestCase): + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 1 + + def test_remote_update_getfattr(self): + """ + That quota changes made from one client are visible to another + client looking at ceph.quota xattrs + """ + self.mount_a.run_shell(["mkdir", "subdir"]) + + self.assertEqual( + self.mount_a.getfattr("./subdir", "ceph.quota.max_files"), + None) + self.assertEqual( + self.mount_b.getfattr("./subdir", "ceph.quota.max_files"), + None) + + self.mount_a.setfattr("./subdir", "ceph.quota.max_files", "10") + self.assertEqual( + self.mount_a.getfattr("./subdir", "ceph.quota.max_files"), + "10") + + # Should be visible as soon as setxattr operation completes on + # mds (we get here sooner because setfattr gets an early reply) + self.wait_until_equal( + lambda: self.mount_b.getfattr("./subdir", "ceph.quota.max_files"), + "10", timeout=10) + + def test_remote_update_df(self): + """ + That when a client modifies the quota on a directory used + as another client's root, the other client sees the change + reflected in their statfs output. + """ + + self.mount_b.umount_wait() + + self.mount_a.run_shell(["mkdir", "subdir"]) + + size_before = 1024 * 1024 * 128 + self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", + "%s" % size_before) + + self.mount_b.mount_wait(cephfs_mntpt="/subdir") + + self.assertDictEqual( + self.mount_b.df(), + { + "total": size_before, + "used": 0, + "available": size_before + }) + + size_after = 1024 * 1024 * 256 + self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", + "%s" % size_after) + + # Should be visible as soon as setxattr operation completes on + # mds (we get here sooner because setfattr gets an early reply) + self.wait_until_equal( + lambda: self.mount_b.df(), + { + "total": size_after, + "used": 0, + "available": size_after + }, + timeout=10 + ) + + def test_remote_update_write(self): + """ + That when a client modifies the quota on a directory used + as another client's root, the other client sees the effect + of the change when writing data. + """ + + self.mount_a.run_shell(["mkdir", "subdir_files"]) + self.mount_a.run_shell(["mkdir", "subdir_data"]) + + # Set some nice high quotas that mount_b's initial operations + # will be well within + self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "100") + self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "104857600") + + # Do some writes within my quota + self.mount_b.create_n_files("subdir_files/file", 20) + self.mount_b.write_n_mb("subdir_data/file", 20) + + # Set quotas lower than what mount_b already wrote, it should + # refuse to write more once it's seen them + self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "10") + self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "1048576") + + # Do some writes that would have been okay within the old quota, + # but are forbidden under the new quota + with self.assertRaises(CommandFailedError): + self.mount_b.create_n_files("subdir_files/file", 40) + with self.assertRaises(CommandFailedError): + self.mount_b.write_n_mb("subdir_data/file", 40) + diff --git a/qa/tasks/cephfs/test_readahead.py b/qa/tasks/cephfs/test_readahead.py new file mode 100644 index 000000000..7e6270f03 --- /dev/null +++ b/qa/tasks/cephfs/test_readahead.py @@ -0,0 +1,26 @@ +import logging +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + + +class TestReadahead(CephFSTestCase): + def test_flush(self): + # Create 32MB file + self.mount_a.run_shell(["dd", "if=/dev/urandom", "of=foo", "bs=1M", "count=32"]) + + # Unmount and remount the client to flush cache + self.mount_a.umount_wait() + self.mount_a.mount_wait() + + initial_op_read = self.mount_a.get_op_read_count() + self.mount_a.run_shell(["dd", "if=foo", "of=/dev/null", "bs=128k", "count=32"]) + op_read = self.mount_a.get_op_read_count() + self.assertGreaterEqual(op_read, initial_op_read) + op_read -= initial_op_read + log.info("read operations: {0}".format(op_read)) + + # with exponentially increasing readahead, we should see fewer than 10 operations + # but this test simply checks if the client is doing a remote read for each local read + if op_read >= 32: + raise RuntimeError("readahead not working") diff --git a/qa/tasks/cephfs/test_recovery_fs.py b/qa/tasks/cephfs/test_recovery_fs.py new file mode 100644 index 000000000..bbcdf9769 --- /dev/null +++ b/qa/tasks/cephfs/test_recovery_fs.py @@ -0,0 +1,38 @@ +import logging +from os.path import join as os_path_join + +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + +class TestFSRecovery(CephFSTestCase): + """ + Tests for recovering FS after loss of FSMap + """ + + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 3 + + def test_recover_fs_after_fsmap_removal(self): + data_pool = self.fs.get_data_pool_name() + metadata_pool = self.fs.get_metadata_pool_name() + # write data in mount, and fsync + self.mount_a.create_n_files('file_on_fs', 1, sync=True) + # faild MDSs to allow removing the file system in the next step + self.fs.fail() + # Remove file system to lose FSMap and keep the pools intact. + # This mimics the scenario where the monitor store is rebuilt + # using OSDs to recover a cluster with corrupt monitor store. + # The FSMap is permanently lost, but the FS pools are + # recovered/intact + self.fs.rm() + # Recreate file system with pool and previous fscid + self.fs.mon_manager.raw_cluster_cmd( + 'fs', 'new', self.fs.name, metadata_pool, data_pool, + '--recover', '--force', '--fscid', f'{self.fs.id}') + self.fs.set_joinable() + # Check status of file system + self.fs.wait_for_daemons() + # check data in file sytem is intact + filepath = os_path_join(self.mount_a.hostfs_mntpt, 'file_on_fs_0') + self.assertEqual(self.mount_a.read_file(filepath), "0") diff --git a/qa/tasks/cephfs/test_recovery_pool.py b/qa/tasks/cephfs/test_recovery_pool.py new file mode 100644 index 000000000..8c4e1967d --- /dev/null +++ b/qa/tasks/cephfs/test_recovery_pool.py @@ -0,0 +1,179 @@ +""" +Test our tools for recovering metadata from the data pool into an alternate pool +""" + +import logging +import traceback +from collections import namedtuple + +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + + +ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) + + +class OverlayWorkload(object): + def __init__(self): + self._initial_state = None + + # Accumulate backtraces for every failed validation, and return them. Backtraces + # are rather verbose, but we only see them when something breaks, and they + # let us see which check failed without having to decorate each check with + # a string + self._errors = [] + + def assert_equal(self, a, b): + try: + if a != b: + raise AssertionError("{0} != {1}".format(a, b)) + except AssertionError as e: + self._errors.append( + ValidationError(e, traceback.format_exc(3)) + ) + + def write(self): + """ + Write the workload files to the mount + """ + raise NotImplementedError() + + def validate(self): + """ + Read from the mount and validate that the workload files are present (i.e. have + survived or been reconstructed from the test scenario) + """ + raise NotImplementedError() + + def damage(self, fs): + """ + Damage the filesystem pools in ways that will be interesting to recover from. By + default just wipe everything in the metadata pool + """ + + pool = fs.get_metadata_pool_name() + fs.rados(["purge", pool, '--yes-i-really-really-mean-it']) + + def flush(self, fs): + """ + Called after client unmount, after write: flush whatever you want + """ + fs.rank_asok(["flush", "journal"]) + + +class SimpleOverlayWorkload(OverlayWorkload): + """ + Single file, single directory, check that it gets recovered and so does its size + """ + def write(self, mount): + mount.run_shell(["mkdir", "subdir"]) + mount.write_n_mb("subdir/sixmegs", 6) + self._initial_state = mount.stat("subdir/sixmegs") + + def validate(self, recovery_mount): + recovery_mount.run_shell(["ls", "subdir"]) + st = recovery_mount.stat("subdir/sixmegs") + self.assert_equal(st['st_size'], self._initial_state['st_size']) + return self._errors + +class TestRecoveryPool(CephFSTestCase): + MDSS_REQUIRED = 2 + CLIENTS_REQUIRED = 1 + REQUIRE_RECOVERY_FILESYSTEM = True + + def is_marked_damaged(self, rank): + mds_map = self.fs.get_mds_map() + return rank in mds_map['damaged'] + + def _rebuild_metadata(self, workload, other_pool=None, workers=1): + """ + That when all objects in metadata pool are removed, we can rebuild a metadata pool + based on the contents of a data pool, and a client can see and read our files. + """ + + # First, inject some files + + workload.write(self.mount_a) + + # Unmount the client and flush the journal: the tool should also cope with + # situations where there is dirty metadata, but we'll test that separately + self.mount_a.umount_wait() + workload.flush(self.fs) + self.fs.fail() + + # After recovery, we need the MDS to not be strict about stats (in production these options + # are off by default, but in QA we need to explicitly disable them) + # Note: these have to be written to ceph.conf to override existing ceph.conf values. + self.fs.set_ceph_conf('mds', 'mds verify scatter', False) + self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) + self.fs.mds_restart() + + # Apply any data damage the workload wants + workload.damage(self.fs) + + # Create the alternate pool if requested + recovery_fs = self.mds_cluster.newfs(name="recovery_fs", create=False) + recovery_fs.set_data_pool_name(self.fs.get_data_pool_name()) + recovery_fs.create(recover=True, metadata_overlay=True) + + recovery_pool = recovery_fs.get_metadata_pool_name() + recovery_fs.mon_manager.raw_cluster_cmd('-s') + + # Reset the MDS map in case multiple ranks were in play: recovery procedure + # only understands how to rebuild metadata under rank 0 + #self.fs.reset() + #self.fs.table_tool([self.fs.name + ":0", "reset", "session"]) + #self.fs.table_tool([self.fs.name + ":0", "reset", "snap"]) + #self.fs.table_tool([self.fs.name + ":0", "reset", "inode"]) + + # Run the recovery procedure + recovery_fs.data_scan(['init', '--force-init', + '--filesystem', recovery_fs.name, + '--alternate-pool', recovery_pool]) + recovery_fs.table_tool([recovery_fs.name + ":0", "reset", "session"]) + recovery_fs.table_tool([recovery_fs.name + ":0", "reset", "snap"]) + recovery_fs.table_tool([recovery_fs.name + ":0", "reset", "inode"]) + if False: + with self.assertRaises(CommandFailedError): + # Normal reset should fail when no objects are present, we'll use --force instead + self.fs.journal_tool(["journal", "reset"], 0) + + recovery_fs.data_scan(['scan_extents', '--alternate-pool', + recovery_pool, '--filesystem', self.fs.name, + self.fs.get_data_pool_name()]) + recovery_fs.data_scan(['scan_inodes', '--alternate-pool', + recovery_pool, '--filesystem', self.fs.name, + '--force-corrupt', '--force-init', + self.fs.get_data_pool_name()]) + recovery_fs.data_scan(['scan_links', '--filesystem', recovery_fs.name]) + recovery_fs.journal_tool(['event', 'recover_dentries', 'list', + '--alternate-pool', recovery_pool], 0) + recovery_fs.journal_tool(["journal", "reset", "--force"], 0) + + # Start the MDS + recovery_fs.set_joinable() + status = recovery_fs.wait_for_daemons() + + self.config_set('mds', 'debug_mds', '20') + for rank in recovery_fs.get_ranks(status=status): + recovery_fs.rank_tell(['scrub', 'start', '/', 'force,recursive,repair'], rank=rank['rank'], status=status) + log.info(str(recovery_fs.status())) + + # Mount a client + self.mount_a.mount_wait(cephfs_name=recovery_fs.name) + + # See that the files are present and correct + errors = workload.validate(self.mount_a) + if errors: + log.error("Validation errors found: {0}".format(len(errors))) + for e in errors: + log.error(e.exception) + log.error(e.backtrace) + raise AssertionError("Validation failed, first error: {0}\n{1}".format( + errors[0].exception, errors[0].backtrace + )) + + def test_rebuild_simple(self): + self._rebuild_metadata(SimpleOverlayWorkload()) diff --git a/qa/tasks/cephfs/test_scrub.py b/qa/tasks/cephfs/test_scrub.py new file mode 100644 index 000000000..647860129 --- /dev/null +++ b/qa/tasks/cephfs/test_scrub.py @@ -0,0 +1,187 @@ +""" +Test CephFS scrub (distinct from OSD scrub) functionality +""" + +from io import BytesIO +import logging +from collections import namedtuple + +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + +ValidationError = namedtuple("ValidationError", ["exception", "backtrace"]) + + +class Workload(CephFSTestCase): + def __init__(self, test, filesystem, mount): + super().__init__() + self._test = test + self._mount = mount + self._filesystem = filesystem + self._initial_state = None + + # Accumulate backtraces for every failed validation, and return them. Backtraces + # are rather verbose, but we only see them when something breaks, and they + # let us see which check failed without having to decorate each check with + # a string + self._errors = [] + + def write(self): + """ + Write the workload files to the mount + """ + raise NotImplementedError() + + def validate(self): + """ + Read from the mount and validate that the workload files are present (i.e. have + survived or been reconstructed from the test scenario) + """ + raise NotImplementedError() + + def damage(self): + """ + Damage the filesystem pools in ways that will be interesting to recover from. By + default just wipe everything in the metadata pool + """ + # Delete every object in the metadata pool + pool = self._filesystem.get_metadata_pool_name() + self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it']) + + def flush(self): + """ + Called after client unmount, after write: flush whatever you want + """ + self._filesystem.mds_asok(["flush", "journal"]) + + +class BacktraceWorkload(Workload): + """ + Single file, single directory, wipe the backtrace and check it. + """ + def write(self): + self._mount.run_shell(["mkdir", "subdir"]) + self._mount.write_n_mb("subdir/sixmegs", 6) + + def validate(self): + st = self._mount.stat("subdir/sixmegs") + self._filesystem.mds_asok(["flush", "journal"]) + bt = self._filesystem.read_backtrace(st['st_ino']) + parent = bt['ancestors'][0]['dname'] + self.assertEqual(parent, 'sixmegs') + return self._errors + + def damage(self): + st = self._mount.stat("subdir/sixmegs") + self._filesystem.mds_asok(["flush", "journal"]) + self._filesystem._write_data_xattr(st['st_ino'], "parent", "") + + def create_files(self, nfiles=1000): + self._mount.create_n_files("scrub-new-files/file", nfiles) + + +class DupInodeWorkload(Workload): + """ + Duplicate an inode and try scrubbing it twice." + """ + + def write(self): + self._mount.run_shell(["mkdir", "parent"]) + self._mount.run_shell(["mkdir", "parent/child"]) + self._mount.write_n_mb("parent/parentfile", 6) + self._mount.write_n_mb("parent/child/childfile", 6) + + def damage(self): + self._mount.umount_wait() + self._filesystem.mds_asok(["flush", "journal"]) + self._filesystem.fail() + d = self._filesystem.radosmo(["getomapval", "10000000000.00000000", "parentfile_head", "-"]) + self._filesystem.radosm(["setomapval", "10000000000.00000000", "shadow_head"], stdin=BytesIO(d)) + self._test.config_set('mds', 'mds_hack_allow_loading_invalid_metadata', True) + self._filesystem.set_joinable() + self._filesystem.wait_for_daemons() + + def validate(self): + out_json = self._filesystem.run_scrub(["start", "/", "recursive,repair"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self._filesystem.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + self.assertTrue(self._filesystem.are_daemons_healthy()) + return self._errors + + +class TestScrub(CephFSTestCase): + MDSS_REQUIRED = 1 + + def setUp(self): + super().setUp() + + def _scrub(self, workload, workers=1): + """ + That when all objects in metadata pool are removed, we can rebuild a metadata pool + based on the contents of a data pool, and a client can see and read our files. + """ + + # First, inject some files + + workload.write() + + # are off by default, but in QA we need to explicitly disable them) + self.fs.set_ceph_conf('mds', 'mds verify scatter', False) + self.fs.set_ceph_conf('mds', 'mds debug scatterstat', False) + + # Apply any data damage the workload wants + workload.damage() + + out_json = self.fs.run_scrub(["start", "/", "recursive,repair"]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + # See that the files are present and correct + errors = workload.validate() + if errors: + log.error("Validation errors found: {0}".format(len(errors))) + for e in errors: + log.error(e.exception) + log.error(e.backtrace) + raise AssertionError("Validation failed, first error: {0}\n{1}".format( + errors[0].exception, errors[0].backtrace + )) + + def _get_damage_count(self, damage_type='backtrace'): + out_json = self.fs.rank_tell(["damage", "ls"]) + self.assertNotEqual(out_json, None) + + damage_count = 0 + for it in out_json: + if it['damage_type'] == damage_type: + damage_count += 1 + return damage_count + + def _scrub_new_files(self, workload): + """ + That scrubbing new files does not lead to errors + """ + workload.create_files(1000) + self.fs.wait_until_scrub_complete() + self.assertEqual(self._get_damage_count(), 0) + + def test_scrub_backtrace_for_new_files(self): + self._scrub_new_files(BacktraceWorkload(self, self.fs, self.mount_a)) + + def test_scrub_backtrace(self): + self._scrub(BacktraceWorkload(self, self.fs, self.mount_a)) + + def test_scrub_dup_inode(self): + self._scrub(DupInodeWorkload(self, self.fs, self.mount_a)) + + def test_mdsdir_scrub_backtrace(self): + damage_count = self._get_damage_count() + self.assertNotIn("MDS_DAMAGE", self.mds_cluster.mon_manager.get_mon_health()['checks']) + + out_json = self.fs.run_scrub(["start", "~mdsdir", "recursive"]) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + self.assertEqual(self._get_damage_count(), damage_count) + self.assertNotIn("MDS_DAMAGE", self.mds_cluster.mon_manager.get_mon_health()['checks']) diff --git a/qa/tasks/cephfs/test_scrub_checks.py b/qa/tasks/cephfs/test_scrub_checks.py new file mode 100644 index 000000000..e41b997a6 --- /dev/null +++ b/qa/tasks/cephfs/test_scrub_checks.py @@ -0,0 +1,462 @@ +""" +MDS admin socket scrubbing-related tests. +""" +import json +import logging +import errno +import time +from teuthology.exceptions import CommandFailedError +from teuthology.contextutil import safe_while +import os +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + +class TestScrubControls(CephFSTestCase): + """ + Test basic scrub control operations such as abort, pause and resume. + """ + + MDSS_REQUIRED = 2 + CLIENTS_REQUIRED = 1 + + def _abort_scrub(self, expected): + res = self.fs.run_scrub(["abort"]) + self.assertEqual(res['return_code'], expected) + def _pause_scrub(self, expected): + res = self.fs.run_scrub(["pause"]) + self.assertEqual(res['return_code'], expected) + def _resume_scrub(self, expected): + res = self.fs.run_scrub(["resume"]) + self.assertEqual(res['return_code'], expected) + def _check_task_status(self, expected_status, timo=120): + """ check scrub status for current active mds in ceph status """ + with safe_while(sleep=1, tries=120, action='wait for task status') as proceed: + while proceed(): + active = self.fs.get_active_names() + log.debug("current active={0}".format(active)) + task_status = self.fs.get_task_status("scrub status") + try: + if task_status[active[0]].startswith(expected_status): + return True + except KeyError: + pass + + def _check_task_status_na(self, timo=120): + """ check absence of scrub status in ceph status """ + with safe_while(sleep=1, tries=120, action='wait for task status') as proceed: + while proceed(): + active = self.fs.get_active_names() + log.debug("current active={0}".format(active)) + task_status = self.fs.get_task_status("scrub status") + if not active[0] in task_status: + return True + + def create_scrub_data(self, test_dir): + for i in range(32): + dirname = "dir.{0}".format(i) + dirpath = os.path.join(test_dir, dirname) + self.mount_a.run_shell_payload(f""" +set -e +mkdir -p {dirpath} +for ((i = 0; i < 32; i++)); do + dd if=/dev/urandom of={dirpath}/filename.$i bs=1M conv=fdatasync count=1 +done +""") + + def test_scrub_abort(self): + test_dir = "scrub_control_test_path" + abs_test_path = "/{0}".format(test_dir) + + self.create_scrub_data(test_dir) + + out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"]) + self.assertNotEqual(out_json, None) + + # abort and verify + self._abort_scrub(0) + self.fs.wait_until_scrub_complete(sleep=5, timeout=30) + + # sleep enough to fetch updated task status + checked = self._check_task_status_na() + self.assertTrue(checked) + + def test_scrub_pause_and_resume(self): + test_dir = "scrub_control_test_path" + abs_test_path = "/{0}".format(test_dir) + + log.info("mountpoint: {0}".format(self.mount_a.mountpoint)) + client_path = os.path.join(self.mount_a.mountpoint, test_dir) + log.info("client_path: {0}".format(client_path)) + + self.create_scrub_data(test_dir) + + out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"]) + self.assertNotEqual(out_json, None) + + # pause and verify + self._pause_scrub(0) + out_json = self.fs.get_scrub_status() + self.assertTrue("PAUSED" in out_json['status']) + + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # resume and verify + self._resume_scrub(0) + out_json = self.fs.get_scrub_status() + self.assertFalse("PAUSED" in out_json['status']) + + checked = self._check_task_status_na() + self.assertTrue(checked) + + def test_scrub_pause_and_resume_with_abort(self): + test_dir = "scrub_control_test_path" + abs_test_path = "/{0}".format(test_dir) + + self.create_scrub_data(test_dir) + + out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"]) + self.assertNotEqual(out_json, None) + + # pause and verify + self._pause_scrub(0) + out_json = self.fs.get_scrub_status() + self.assertTrue("PAUSED" in out_json['status']) + + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # abort and verify + self._abort_scrub(0) + out_json = self.fs.get_scrub_status() + self.assertTrue("PAUSED" in out_json['status']) + self.assertTrue("0 inodes" in out_json['status']) + + # scrub status should still be paused... + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # resume and verify + self._resume_scrub(0) + self.assertTrue(self.fs.wait_until_scrub_complete(sleep=5, timeout=30)) + + checked = self._check_task_status_na() + self.assertTrue(checked) + + def test_scrub_task_status_on_mds_failover(self): + (original_active, ) = self.fs.get_active_names() + original_standbys = self.mds_cluster.get_standby_daemons() + + test_dir = "scrub_control_test_path" + abs_test_path = "/{0}".format(test_dir) + + self.create_scrub_data(test_dir) + + out_json = self.fs.run_scrub(["start", abs_test_path, "recursive"]) + self.assertNotEqual(out_json, None) + + # pause and verify + self._pause_scrub(0) + out_json = self.fs.get_scrub_status() + self.assertTrue("PAUSED" in out_json['status']) + + checked = self._check_task_status("paused") + self.assertTrue(checked) + + # Kill the rank 0 + self.fs.mds_stop(original_active) + + def promoted(): + active = self.fs.get_active_names() + return active and active[0] in original_standbys + + log.info("Waiting for promotion of one of the original standbys {0}".format( + original_standbys)) + self.wait_until_true(promoted, timeout=self.fs.beacon_timeout) + + self._check_task_status_na() + +class TestScrubChecks(CephFSTestCase): + """ + Run flush and scrub commands on the specified files in the filesystem. This + task will run through a sequence of operations, but it is not comprehensive + on its own -- it doesn't manipulate the mds cache state to test on both + in- and out-of-memory parts of the hierarchy. So it's designed to be run + multiple times within a single test run, so that the test can manipulate + memory state. + + Usage: + mds_scrub_checks: + mds_rank: 0 + path: path/to/test/dir + client: 0 + run_seq: [0-9]+ + + Increment the run_seq on subsequent invocations within a single test run; + it uses that value to generate unique folder and file names. + """ + + MDSS_REQUIRED = 1 + CLIENTS_REQUIRED = 1 + + def test_scrub_checks(self): + self._checks(0) + self._checks(1) + + def _checks(self, run_seq): + mds_rank = 0 + test_dir = "scrub_test_path" + + abs_test_path = "/{0}".format(test_dir) + + log.info("mountpoint: {0}".format(self.mount_a.mountpoint)) + client_path = os.path.join(self.mount_a.mountpoint, test_dir) + log.info("client_path: {0}".format(client_path)) + + log.info("Cloning repo into place") + repo_path = TestScrubChecks.clone_repo(self.mount_a, client_path) + + log.info("Initiating mds_scrub_checks on mds.{id_} test_path {path}, run_seq {seq}".format( + id_=mds_rank, path=abs_test_path, seq=run_seq) + ) + + + success_validator = lambda j, r: self.json_validator(j, r, "return_code", 0) + + nep = "{test_path}/i/dont/exist".format(test_path=abs_test_path) + self.asok_command(mds_rank, "flush_path {nep}".format(nep=nep), + lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT)) + self.tell_command(mds_rank, "scrub start {nep}".format(nep=nep), + lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT)) + + test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=abs_test_path) + dirpath = "{repo_path}/suites".format(repo_path=test_repo_path) + + if run_seq == 0: + log.info("First run: flushing {dirpath}".format(dirpath=dirpath)) + command = "flush_path {dirpath}".format(dirpath=dirpath) + self.asok_command(mds_rank, command, success_validator) + command = "scrub start {dirpath}".format(dirpath=dirpath) + self.tell_command(mds_rank, command, success_validator) + + filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format( + repo_path=test_repo_path) + if run_seq == 0: + log.info("First run: flushing {filepath}".format(filepath=filepath)) + command = "flush_path {filepath}".format(filepath=filepath) + self.asok_command(mds_rank, command, success_validator) + command = "scrub start {filepath}".format(filepath=filepath) + self.tell_command(mds_rank, command, success_validator) + + if run_seq == 0: + log.info("First run: flushing base dir /") + command = "flush_path /" + self.asok_command(mds_rank, command, success_validator) + command = "scrub start /" + self.tell_command(mds_rank, command, success_validator) + + new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq) + test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path, + i=run_seq) + self.mount_a.run_shell(["mkdir", new_dir]) + command = "flush_path {dir}".format(dir=test_new_dir) + self.asok_command(mds_rank, command, success_validator) + + new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path, + i=run_seq) + test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path, + i=run_seq) + self.mount_a.write_n_mb(new_file, 1) + + command = "flush_path {file}".format(file=test_new_file) + self.asok_command(mds_rank, command, success_validator) + + # check that scrub fails on errors + ino = self.mount_a.path_to_ino(new_file) + rados_obj_name = "{ino:x}.00000000".format(ino=ino) + command = "scrub start {file}".format(file=test_new_file) + + def _check_and_clear_damage(ino, dtype): + all_damage = self.fs.rank_tell(["damage", "ls"], mds_rank) + damage = [d for d in all_damage if d['ino'] == ino and d['damage_type'] == dtype] + for d in damage: + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[mds_rank]), + "damage", "rm", str(d['id'])) + return len(damage) > 0 + + # Missing parent xattr + self.assertFalse(_check_and_clear_damage(ino, "backtrace")); + self.fs.rados(["rmxattr", rados_obj_name, "parent"], pool=self.fs.get_data_pool_name()) + self.tell_command(mds_rank, command, success_validator) + self.fs.wait_until_scrub_complete(sleep=5, timeout=30) + self.assertTrue(_check_and_clear_damage(ino, "backtrace")); + + command = "flush_path /" + self.asok_command(mds_rank, command, success_validator) + + def scrub_with_stray_evaluation(self, fs, mnt, path, flag, files=2000, + _hard_links=3): + fs.set_allow_new_snaps(True) + + test_dir = "stray_eval_dir" + mnt.run_shell(["mkdir", test_dir]) + client_path = os.path.join(mnt.mountpoint, test_dir) + mnt.create_n_files(fs_path=f"{test_dir}/file", count=files, + hard_links=_hard_links) + mnt.run_shell(["mkdir", f"{client_path}/.snap/snap1-{test_dir}"]) + mnt.run_shell(f"find {client_path}/ -type f -delete") + mnt.run_shell(["rmdir", f"{client_path}/.snap/snap1-{test_dir}"]) + perf_dump = fs.rank_tell(["perf", "dump"], 0) + self.assertNotEqual(perf_dump.get('mds_cache').get('num_strays'), + 0, "mdcache.num_strays is zero") + + log.info( + f"num of strays: {perf_dump.get('mds_cache').get('num_strays')}") + + out_json = fs.run_scrub(["start", path, flag]) + self.assertNotEqual(out_json, None) + self.assertEqual(out_json["return_code"], 0) + + self.assertEqual( + fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + perf_dump = fs.rank_tell(["perf", "dump"], 0) + self.assertEqual(int(perf_dump.get('mds_cache').get('num_strays')), + 0, "mdcache.num_strays is non-zero") + + def test_scrub_repair(self): + mds_rank = 0 + test_dir = "scrub_repair_path" + + self.mount_a.run_shell(["mkdir", test_dir]) + self.mount_a.run_shell(["touch", "{0}/file".format(test_dir)]) + dir_objname = "{:x}.00000000".format(self.mount_a.path_to_ino(test_dir)) + + self.mount_a.umount_wait() + + # flush journal entries to dirfrag objects, and expire journal + self.fs.mds_asok(['flush', 'journal']) + self.fs.mds_stop() + + # remove the dentry from dirfrag, cause incorrect fragstat/rstat + self.fs.radosm(["rmomapkey", dir_objname, "file_head"]) + + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + + self.mount_a.mount_wait() + + # fragstat indicates the directory is not empty, rmdir should fail + with self.assertRaises(CommandFailedError) as ar: + self.mount_a.run_shell(["rmdir", test_dir]) + self.assertEqual(ar.exception.exitstatus, 1) + + self.tell_command(mds_rank, "scrub start /{0} repair".format(test_dir), + lambda j, r: self.json_validator(j, r, "return_code", 0)) + + # wait a few second for background repair + time.sleep(10) + + # fragstat should be fixed + self.mount_a.run_shell(["rmdir", test_dir]) + + def test_stray_evaluation_with_scrub(self): + """ + test that scrub can iterate over ~mdsdir and evaluate strays + """ + self.scrub_with_stray_evaluation(self.fs, self.mount_a, "~mdsdir", + "recursive") + + def test_flag_scrub_mdsdir(self): + """ + test flag scrub_mdsdir + """ + self.scrub_with_stray_evaluation(self.fs, self.mount_a, "/", + "recursive,scrub_mdsdir") + + @staticmethod + def json_validator(json_out, rc, element, expected_value): + if rc != 0: + return False, "asok command returned error {rc}".format(rc=rc) + element_value = json_out.get(element) + if element_value != expected_value: + return False, "unexpectedly got {jv} instead of {ev}!".format( + jv=element_value, ev=expected_value) + return True, "Succeeded" + + def tell_command(self, mds_rank, command, validator): + log.info("Running command '{command}'".format(command=command)) + + command_list = command.split() + jout = self.fs.rank_tell(command_list, mds_rank) + + log.info("command '{command}' returned '{jout}'".format( + command=command, jout=jout)) + + success, errstring = validator(jout, 0) + if not success: + raise AsokCommandFailedError(command, 0, jout, errstring) + return jout + + def asok_command(self, mds_rank, command, validator): + log.info("Running command '{command}'".format(command=command)) + + command_list = command.split() + + # we just assume there's an active mds for every rank + mds_id = self.fs.get_active_names()[mds_rank] + proc = self.fs.mon_manager.admin_socket('mds', mds_id, + command_list, check_status=False) + rout = proc.exitstatus + sout = proc.stdout.getvalue() + + if sout.strip(): + jout = json.loads(sout) + else: + jout = None + + log.info("command '{command}' got response code '{rout}' and stdout '{sout}'".format( + command=command, rout=rout, sout=sout)) + + success, errstring = validator(jout, rout) + + if not success: + raise AsokCommandFailedError(command, rout, jout, errstring) + + return jout + + @staticmethod + def clone_repo(client_mount, path): + repo = "ceph-qa-suite" + repo_path = os.path.join(path, repo) + client_mount.run_shell(["mkdir", "-p", path]) + + try: + client_mount.stat(repo_path) + except CommandFailedError: + client_mount.run_shell([ + "git", "clone", '--branch', 'giant', + "http://github.com/ceph/{repo}".format(repo=repo), + "{path}/{repo}".format(path=path, repo=repo) + ]) + + return repo_path + + +class AsokCommandFailedError(Exception): + """ + Exception thrown when we get an unexpected response + on an admin socket command + """ + + def __init__(self, command, rc, json_out, errstring): + self.command = command + self.rc = rc + self.json = json_out + self.errstring = errstring + + def __str__(self): + return "Admin socket: {command} failed with rc={rc} json output={json}, because '{es}'".format( + command=self.command, rc=self.rc, json=self.json, es=self.errstring) diff --git a/qa/tasks/cephfs/test_sessionmap.py b/qa/tasks/cephfs/test_sessionmap.py new file mode 100644 index 000000000..ad6fd1d60 --- /dev/null +++ b/qa/tasks/cephfs/test_sessionmap.py @@ -0,0 +1,232 @@ +import time +import json +import logging + +from tasks.cephfs.fuse_mount import FuseMount +from teuthology.exceptions import CommandFailedError +from tasks.cephfs.cephfs_test_case import CephFSTestCase + +log = logging.getLogger(__name__) + + +class TestSessionMap(CephFSTestCase): + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 2 + + def test_tell_session_drop(self): + """ + That when a `tell` command is sent using the python CLI, + its MDS session is gone after it terminates + """ + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + status = self.fs.status() + self.fs.rank_tell(["session", "ls"], status=status) + + ls_data = self.fs.rank_asok(['session', 'ls'], status=status) + self.assertEqual(len(ls_data), 0) + + def _get_connection_count(self, status=None): + perf = self.fs.rank_asok(["perf", "dump"], status=status) + conn = 0 + for module, dump in perf.items(): + if "AsyncMessenger::Worker" in module: + conn += dump['msgr_active_connections'] + return conn + + def test_tell_conn_close(self): + """ + That when a `tell` command is sent using the python CLI, + the conn count goes back to where it started (i.e. we aren't + leaving connections open) + """ + self.config_set('mds', 'ms_async_reap_threshold', '1') + + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + status = self.fs.status() + s = self._get_connection_count(status=status) + self.fs.rank_tell(["session", "ls"], status=status) + self.wait_until_true( + lambda: self._get_connection_count(status=status) == s, + timeout=30 + ) + + def test_mount_conn_close(self): + """ + That when a client unmounts, the thread count on the MDS goes back + to what it was before the client mounted + """ + self.config_set('mds', 'ms_async_reap_threshold', '1') + + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + status = self.fs.status() + s = self._get_connection_count(status=status) + self.mount_a.mount_wait() + self.assertGreater(self._get_connection_count(status=status), s) + self.mount_a.umount_wait() + self.wait_until_true( + lambda: self._get_connection_count(status=status) == s, + timeout=30 + ) + + def test_version_splitting(self): + """ + That when many sessions are updated, they are correctly + split into multiple versions to obey mds_sessionmap_keys_per_op + """ + + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + # Configure MDS to write one OMAP key at once + self.set_conf('mds', 'mds_sessionmap_keys_per_op', 1) + self.fs.mds_fail_restart() + status = self.fs.wait_for_daemons() + + # Bring the clients back + self.mount_a.mount_wait() + self.mount_b.mount_wait() + + # See that they've got sessions + self.assert_session_count(2, mds_id=self.fs.get_rank(status=status)['name']) + + # See that we persist their sessions + self.fs.rank_asok(["flush", "journal"], rank=0, status=status) + table_json = json.loads(self.fs.table_tool(["0", "show", "session"])) + log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2))) + self.assertEqual(table_json['0']['result'], 0) + self.assertEqual(len(table_json['0']['data']['sessions']), 2) + + # Now, induce a "force_open_sessions" event by exporting a dir + self.mount_a.run_shell(["mkdir", "bravo"]) + self.mount_a.run_shell(["touch", "bravo/file_a"]) + self.mount_b.run_shell(["touch", "bravo/file_b"]) + + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + def get_omap_wrs(): + return self.fs.rank_asok(['perf', 'dump', 'objecter'], rank=1, status=status)['objecter']['omap_wr'] + + # Flush so that there are no dirty sessions on rank 1 + self.fs.rank_asok(["flush", "journal"], rank=1, status=status) + + # Export so that we get a force_open to rank 1 for the two sessions from rank 0 + initial_omap_wrs = get_omap_wrs() + self.fs.rank_asok(['export', 'dir', '/bravo', '1'], rank=0, status=status) + + # This is the critical (if rather subtle) check: that in the process of doing an export dir, + # we hit force_open_sessions, and as a result we end up writing out the sessionmap. There + # will be two sessions dirtied here, and because we have set keys_per_op to 1, we should see + # a single session get written out (the first of the two, triggered by the second getting marked + # dirty) + # The number of writes is two per session, because the header (sessionmap version) update and + # KV write both count. Also, multiply by 2 for each openfile table update. + self.wait_until_true( + lambda: get_omap_wrs() - initial_omap_wrs == 2*2, + timeout=30 # Long enough for an export to get acked + ) + + # Now end our sessions and check the backing sessionmap is updated correctly + self.mount_a.umount_wait() + self.mount_b.umount_wait() + + # In-memory sessionmap check + self.assert_session_count(0, mds_id=self.fs.get_rank(status=status)['name']) + + # On-disk sessionmap check + self.fs.rank_asok(["flush", "journal"], rank=0, status=status) + table_json = json.loads(self.fs.table_tool(["0", "show", "session"])) + log.info("SessionMap: {0}".format(json.dumps(table_json, indent=2))) + self.assertEqual(table_json['0']['result'], 0) + self.assertEqual(len(table_json['0']['data']['sessions']), 0) + + def _configure_auth(self, mount, id_name, mds_caps, osd_caps=None, mon_caps=None): + """ + Set up auth credentials for a client mount, and write out the keyring + for the client to use. + """ + + if osd_caps is None: + osd_caps = "allow rw" + + if mon_caps is None: + mon_caps = "allow r" + + out = self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", "client.{name}".format(name=id_name), + "mds", mds_caps, + "osd", osd_caps, + "mon", mon_caps + ) + mount.client_id = id_name + mount.client_remote.write_file(mount.get_keyring_path(), out, sudo=True) + self.set_conf("client.{name}".format(name=id_name), "keyring", mount.get_keyring_path()) + + def test_session_reject(self): + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Requires FUSE client to inject client metadata") + + self.mount_a.run_shell(["mkdir", "foo"]) + self.mount_a.run_shell(["mkdir", "foo/bar"]) + self.mount_a.umount_wait() + + # Mount B will be my rejected client + self.mount_b.umount_wait() + + # Configure a client that is limited to /foo/bar + self._configure_auth(self.mount_b, "badguy", "allow rw path=/foo/bar") + # Check he can mount that dir and do IO + self.mount_b.mount_wait(cephfs_mntpt="/foo/bar") + self.mount_b.create_destroy() + self.mount_b.umount_wait() + + # Configure the client to claim that its mount point metadata is /baz + self.set_conf("client.badguy", "client_metadata", "root=/baz") + # Try to mount the client, see that it fails + with self.assert_cluster_log("client session with non-allowable root '/baz' denied"): + with self.assertRaises(CommandFailedError): + self.mount_b.mount_wait(cephfs_mntpt="/foo/bar") + + def test_session_evict_blocklisted(self): + """ + Check that mds evicts blocklisted client + """ + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Requires FUSE client to use " + "mds_cluster.is_addr_blocklisted()") + + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + self.mount_a.run_shell_payload("mkdir {d0,d1} && touch {d0,d1}/file") + self.mount_a.setfattr("d0", "ceph.dir.pin", "0") + self.mount_a.setfattr("d1", "ceph.dir.pin", "1") + self._wait_subtrees([('/d0', 0), ('/d1', 1)], status=status) + + self.mount_a.run_shell(["touch", "d0/f0"]) + self.mount_a.run_shell(["touch", "d1/f0"]) + self.mount_b.run_shell(["touch", "d0/f1"]) + self.mount_b.run_shell(["touch", "d1/f1"]) + + self.assert_session_count(2, mds_id=self.fs.get_rank(rank=0, status=status)['name']) + self.assert_session_count(2, mds_id=self.fs.get_rank(rank=1, status=status)['name']) + + mount_a_client_id = self.mount_a.get_global_id() + self.fs.mds_asok(['session', 'evict', "%s" % mount_a_client_id], + mds_id=self.fs.get_rank(rank=0, status=status)['name']) + self.wait_until_true(lambda: self.mds_cluster.is_addr_blocklisted( + self.mount_a.get_global_addr()), timeout=30) + + # 10 seconds should be enough for evicting client + time.sleep(10) + self.assert_session_count(1, mds_id=self.fs.get_rank(rank=0, status=status)['name']) + self.assert_session_count(1, mds_id=self.fs.get_rank(rank=1, status=status)['name']) + + self.mount_a.kill_cleanup() + self.mount_a.mount_wait() diff --git a/qa/tasks/cephfs/test_snap_schedules.py b/qa/tasks/cephfs/test_snap_schedules.py new file mode 100644 index 000000000..0264cac32 --- /dev/null +++ b/qa/tasks/cephfs/test_snap_schedules.py @@ -0,0 +1,607 @@ +import os +import json +import time +import errno +import logging + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError +from datetime import datetime, timedelta + +log = logging.getLogger(__name__) + +def extract_schedule_and_retention_spec(spec=[]): + schedule = set([s[0] for s in spec]) + retention = set([s[1] for s in spec]) + return (schedule, retention) + +def seconds_upto_next_schedule(time_from, timo): + ts = int(time_from) + return ((int(ts / 60) * 60) + timo) - ts + +class TestSnapSchedulesHelper(CephFSTestCase): + CLIENTS_REQUIRED = 1 + + TEST_VOLUME_NAME = 'snap_vol' + TEST_DIRECTORY = 'snap_test_dir1' + + # this should be in sync with snap_schedule format + SNAPSHOT_TS_FORMAT = '%Y-%m-%d-%H_%M_%S' + + def check_scheduled_snapshot(self, exec_time, timo): + now = time.time() + delta = now - exec_time + log.debug(f'exec={exec_time}, now = {now}, timo = {timo}') + # tolerate snapshot existance in the range [-5,+5] + self.assertTrue((delta <= timo + 5) and (delta >= timo - 5)) + + def _fs_cmd(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args) + + def fs_snap_schedule_cmd(self, *args, **kwargs): + if 'fs' in kwargs: + fs = kwargs.pop('fs') + args += ('--fs', fs) + if 'format' in kwargs: + fmt = kwargs.pop('format') + args += ('--format', fmt) + for name, val in kwargs.items(): + args += (str(val),) + res = self._fs_cmd('snap-schedule', *args) + log.debug(f'res={res}') + return res + + def _create_or_reuse_test_volume(self): + result = json.loads(self._fs_cmd("volume", "ls")) + if len(result) == 0: + self.vol_created = True + self.volname = TestSnapSchedulesHelper.TEST_VOLUME_NAME + self._fs_cmd("volume", "create", self.volname) + else: + self.volname = result[0]['name'] + + def _enable_snap_schedule(self): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "snap_schedule") + + def _disable_snap_schedule(self): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "snap_schedule") + + def _allow_minute_granularity_snapshots(self): + self.config_set('mgr', 'mgr/snap_schedule/allow_m_granularity', True) + + def _dump_on_update(self): + self.config_set('mgr', 'mgr/snap_schedule/dump_on_update', True) + + def setUp(self): + super(TestSnapSchedulesHelper, self).setUp() + self.volname = None + self.vol_created = False + self._create_or_reuse_test_volume() + self.create_cbks = [] + self.remove_cbks = [] + # used to figure out which snapshots are created/deleted + self.snapshots = set() + self._enable_snap_schedule() + self._allow_minute_granularity_snapshots() + self._dump_on_update() + + def tearDown(self): + if self.vol_created: + self._delete_test_volume() + self._disable_snap_schedule() + super(TestSnapSchedulesHelper, self).tearDown() + + def _schedule_to_timeout(self, schedule): + mult = schedule[-1] + period = int(schedule[0:-1]) + if mult == 'M': + return period * 60 + elif mult == 'h': + return period * 60 * 60 + elif mult == 'd': + return period * 60 * 60 * 24 + elif mult == 'w': + return period * 60 * 60 * 24 * 7 + else: + raise RuntimeError('schedule multiplier not recognized') + + def add_snap_create_cbk(self, cbk): + self.create_cbks.append(cbk) + def remove_snap_create_cbk(self, cbk): + self.create_cbks.remove(cbk) + + def add_snap_remove_cbk(self, cbk): + self.remove_cbks.append(cbk) + def remove_snap_remove_cbk(self, cbk): + self.remove_cbks.remove(cbk) + + def assert_if_not_verified(self): + self.assertListEqual(self.create_cbks, []) + self.assertListEqual(self.remove_cbks, []) + + def verify(self, dir_path, max_trials): + trials = 0 + snap_path = f'{dir_path}/.snap' + while (len(self.create_cbks) or len(self.remove_cbks)) and trials < max_trials: + snapshots = set(self.mount_a.ls(path=snap_path)) + log.info(f'snapshots: {snapshots}') + added = snapshots - self.snapshots + log.info(f'added: {added}') + removed = self.snapshots - snapshots + log.info(f'removed: {removed}') + if added: + for cbk in list(self.create_cbks): + res = cbk(list(added)) + if res: + self.remove_snap_create_cbk(cbk) + break + if removed: + for cbk in list(self.remove_cbks): + res = cbk(list(removed)) + if res: + self.remove_snap_remove_cbk(cbk) + break + self.snapshots = snapshots + trials += 1 + time.sleep(1) + + def calc_wait_time_and_snap_name(self, snap_sched_exec_epoch, schedule): + timo = self._schedule_to_timeout(schedule) + # calculate wait time upto the next minute + wait_timo = seconds_upto_next_schedule(snap_sched_exec_epoch, timo) + + # expected "scheduled" snapshot name + ts_name = (datetime.utcfromtimestamp(snap_sched_exec_epoch) + + timedelta(seconds=wait_timo)).strftime(TestSnapSchedulesHelper.SNAPSHOT_TS_FORMAT) + return (wait_timo, ts_name) + + def verify_schedule(self, dir_path, schedules, retentions=[]): + log.debug(f'expected_schedule: {schedules}, expected_retention: {retentions}') + + result = self.fs_snap_schedule_cmd('list', path=dir_path, format='json') + json_res = json.loads(result) + log.debug(f'json_res: {json_res}') + + for schedule in schedules: + self.assertTrue(schedule in json_res['schedule']) + for retention in retentions: + self.assertTrue(retention in json_res['retention']) + +class TestSnapSchedules(TestSnapSchedulesHelper): + def remove_snapshots(self, dir_path): + snap_path = f'{dir_path}/.snap' + + snapshots = self.mount_a.ls(path=snap_path) + for snapshot in snapshots: + snapshot_path = os.path.join(snap_path, snapshot) + log.debug(f'removing snapshot: {snapshot_path}') + self.mount_a.run_shell(['rmdir', snapshot_path]) + + def test_non_existent_snap_schedule_list(self): + """Test listing snap schedules on a non-existing filesystem path failure""" + try: + self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise RuntimeError('incorrect errno when listing a non-existing snap schedule') + else: + raise RuntimeError('expected "fs snap-schedule list" to fail') + + def test_non_existent_schedule(self): + """Test listing non-existing snap schedules failure""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + try: + self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise RuntimeError('incorrect errno when listing a non-existing snap schedule') + else: + raise RuntimeError('expected "fs snap-schedule list" returned fail') + + self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) + + def test_snap_schedule_list_post_schedule_remove(self): + """Test listing snap schedules post removal of a schedule""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1h') + + self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) + + try: + self.fs_snap_schedule_cmd('list', path=TestSnapSchedules.TEST_DIRECTORY) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise RuntimeError('incorrect errno when listing a non-existing snap schedule') + else: + raise RuntimeError('"fs snap-schedule list" returned error') + + self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) + + def test_snap_schedule(self): + """Test existence of a scheduled snapshot""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + # set a schedule on the dir + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') + exec_time = time.time() + + timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo}s...') + to_wait = timo + 2 # some leeway to avoid false failures... + + # verify snapshot schedule + self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M']) + + def verify_added(snaps_added): + log.debug(f'snapshots added={snaps_added}') + self.assertEqual(len(snaps_added), 1) + snapname = snaps_added[0] + if snapname.startswith('scheduled-'): + if snapname[10:26] == snap_sfx[:16]: + self.check_scheduled_snapshot(exec_time, timo) + return True + return False + self.add_snap_create_cbk(verify_added) + self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait) + self.assert_if_not_verified() + + # remove snapshot schedule + self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) + + # remove all scheduled snapshots + self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY) + + self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) + + def test_multi_snap_schedule(self): + """Test exisitence of multiple scheduled snapshots""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + # set schedules on the dir + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='2M') + exec_time = time.time() + + timo_1, snap_sfx_1 = self.calc_wait_time_and_snap_name(exec_time, '1M') + log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_1} in ~{timo_1}s...') + timo_2, snap_sfx_2 = self.calc_wait_time_and_snap_name(exec_time, '2M') + log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_2} in ~{timo_2}s...') + to_wait = timo_2 + 2 # use max timeout + + # verify snapshot schedule + self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M', '2M']) + + def verify_added_1(snaps_added): + log.debug(f'snapshots added={snaps_added}') + self.assertEqual(len(snaps_added), 1) + snapname = snaps_added[0] + if snapname.startswith('scheduled-'): + if snapname[10:26] == snap_sfx_1[:16]: + self.check_scheduled_snapshot(exec_time, timo_1) + return True + return False + def verify_added_2(snaps_added): + log.debug(f'snapshots added={snaps_added}') + self.assertEqual(len(snaps_added), 1) + snapname = snaps_added[0] + if snapname.startswith('scheduled-'): + if snapname[10:26] == snap_sfx_2[:16]: + self.check_scheduled_snapshot(exec_time, timo_2) + return True + return False + self.add_snap_create_cbk(verify_added_1) + self.add_snap_create_cbk(verify_added_2) + self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait) + self.assert_if_not_verified() + + # remove snapshot schedule + self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) + + # remove all scheduled snapshots + self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY) + + self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) + + def test_snap_schedule_with_retention(self): + """Test scheduled snapshots along with rentention policy""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + # set a schedule on the dir + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') + self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedules.TEST_DIRECTORY, retention_spec_or_period='1M') + exec_time = time.time() + + timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_1}s...') + to_wait = timo_1 + 2 # some leeway to avoid false failures... + + # verify snapshot schedule + self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}]) + + def verify_added(snaps_added): + log.debug(f'snapshots added={snaps_added}') + self.assertEqual(len(snaps_added), 1) + snapname = snaps_added[0] + if snapname.startswith('scheduled-'): + if snapname[10:26] == snap_sfx[:16]: + self.check_scheduled_snapshot(exec_time, timo_1) + return True + return False + self.add_snap_create_cbk(verify_added) + self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait) + self.assert_if_not_verified() + + timo_2 = timo_1 + 60 # expected snapshot removal timeout + def verify_removed(snaps_removed): + log.debug(f'snapshots removed={snaps_removed}') + self.assertEqual(len(snaps_removed), 1) + snapname = snaps_removed[0] + if snapname.startswith('scheduled-'): + if snapname[10:26] == snap_sfx[:16]: + self.check_scheduled_snapshot(exec_time, timo_2) + return True + return False + log.debug(f'expecting removal of snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_2}s...') + to_wait = timo_2 + self.add_snap_remove_cbk(verify_removed) + self.verify(TestSnapSchedules.TEST_DIRECTORY, to_wait+2) + self.assert_if_not_verified() + + # remove snapshot schedule + self.fs_snap_schedule_cmd('remove', path=TestSnapSchedules.TEST_DIRECTORY) + + # remove all scheduled snapshots + self.remove_snapshots(TestSnapSchedules.TEST_DIRECTORY) + + self.mount_a.run_shell(['rmdir', TestSnapSchedules.TEST_DIRECTORY]) + + def get_snap_stats(self, dir_path): + snap_path = f"{dir_path}/.snap"[1:] + snapshots = self.mount_a.ls(path=snap_path) + fs_count = len(snapshots) + log.debug(f'snapshots: {snapshots}') + + result = self.fs_snap_schedule_cmd('status', path=dir_path, + format='json') + json_res = json.loads(result)[0] + db_count = int(json_res['created_count']) + log.debug(f'json_res: {json_res}') + + snap_stats = dict() + snap_stats['fs_count'] = fs_count + snap_stats['db_count'] = db_count + + log.debug(f'fs_count: {fs_count}') + log.debug(f'db_count: {db_count}') + + return snap_stats + + def verify_snap_stats(self, dir_path): + snap_stats = self.get_snap_stats(dir_path) + self.assertTrue(snap_stats['fs_count'] == snap_stats['db_count']) + + def test_concurrent_snap_creates(self): + """Test concurrent snap creates in same file-system without db issues""" + """ + Test snap creates at same cadence on same fs to verify correct stats. + A single SQLite DB Connection handle cannot be used to run concurrent + transactions and results transaction aborts. This test makes sure that + proper care has been taken in the code to avoid such situation by + verifying number of dirs created on the file system with the + created_count in the schedule_meta table for the specific path. + """ + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + + testdirs = [] + for d in range(10): + testdirs.append(os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "dir" + str(d))) + + for d in testdirs: + self.mount_a.run_shell(['mkdir', '-p', d[1:]]) + self.fs_snap_schedule_cmd('add', path=d, snap_schedule='1M') + + exec_time = time.time() + timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + + for d in testdirs: + self.fs_snap_schedule_cmd('activate', path=d, snap_schedule='1M') + + # we wait for 10 snaps to be taken + wait_time = timo_1 + 10 * 60 + 15 + time.sleep(wait_time) + + for d in testdirs: + self.fs_snap_schedule_cmd('deactivate', path=d, snap_schedule='1M') + + for d in testdirs: + self.verify_snap_stats(d) + + for d in testdirs: + self.fs_snap_schedule_cmd('remove', path=d, snap_schedule='1M') + self.remove_snapshots(d[1:]) + self.mount_a.run_shell(['rmdir', d[1:]]) + + def test_snap_schedule_with_mgr_restart(self): + """Test that snap schedule is resumed after mgr restart""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) + testdir = os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "test_restart") + self.mount_a.run_shell(['mkdir', '-p', testdir[1:]]) + self.fs_snap_schedule_cmd('add', path=testdir, snap_schedule='1M') + + exec_time = time.time() + timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + + self.fs_snap_schedule_cmd('activate', path=testdir, snap_schedule='1M') + + # we wait for 10 snaps to be taken + wait_time = timo_1 + 10 * 60 + 15 + time.sleep(wait_time) + + old_stats = self.get_snap_stats(testdir) + self.assertTrue(old_stats['fs_count'] == old_stats['db_count']) + self.assertTrue(old_stats['fs_count'] > 9) + + # restart mgr + active_mgr = self.mgr_cluster.mon_manager.get_mgr_dump()['active_name'] + log.debug(f'restarting active mgr: {active_mgr}') + self.mgr_cluster.mon_manager.revive_mgr(active_mgr) + time.sleep(300) # sleep for 5 minutes + self.fs_snap_schedule_cmd('deactivate', path=testdir, snap_schedule='1M') + + new_stats = self.get_snap_stats(testdir) + self.assertTrue(new_stats['fs_count'] == new_stats['db_count']) + self.assertTrue(new_stats['fs_count'] > old_stats['fs_count']) + self.assertTrue(new_stats['db_count'] > old_stats['db_count']) + + # cleanup + self.fs_snap_schedule_cmd('remove', path=testdir, snap_schedule='1M') + self.remove_snapshots(testdir[1:]) + self.mount_a.run_shell(['rmdir', testdir[1:]]) + + def test_schedule_auto_deactivation_for_non_existent_path(self): + """ + Test that a non-existent path leads to schedule deactivation after a few retries. + """ + self.fs_snap_schedule_cmd('add', path="/bad-path", snap_schedule='1M') + start_time = time.time() + + while time.time() - start_time < 60.0: + s = self.fs_snap_schedule_cmd('status', path="/bad-path", format='json') + json_status = json.loads(s)[0] + + self.assertTrue(int(json_status['active']) == 1) + time.sleep(60) + + s = self.fs_snap_schedule_cmd('status', path="/bad-path", format='json') + json_status = json.loads(s)[0] + self.assertTrue(int(json_status['active']) == 0) + + # remove snapshot schedule + self.fs_snap_schedule_cmd('remove', path="/bad-path") + + def test_snap_schedule_for_number_of_snaps_retention(self): + """ + Test that number of snaps retained are as per user spec. + """ + total_snaps = 55 + test_dir = '/' + TestSnapSchedules.TEST_DIRECTORY + + self.mount_a.run_shell(['mkdir', '-p', test_dir[1:]]) + + # set a schedule on the dir + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1M') + self.fs_snap_schedule_cmd('retention', 'add', path=test_dir, + retention_spec_or_period=f'{total_snaps}n') + exec_time = time.time() + + timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + + # verify snapshot schedule + self.verify_schedule(test_dir, ['1M']) + + # we wait for total_snaps snaps to be taken + wait_time = timo_1 + total_snaps * 60 + 15 + time.sleep(wait_time) + + snap_stats = self.get_snap_stats(test_dir) + self.assertTrue(snap_stats['fs_count'] == total_snaps) + self.assertTrue(snap_stats['db_count'] >= total_snaps) + + # remove snapshot schedule + self.fs_snap_schedule_cmd('remove', path=test_dir) + + # remove all scheduled snapshots + self.remove_snapshots(test_dir[1:]) + + self.mount_a.run_shell(['rmdir', test_dir[1:]]) + + +class TestSnapSchedulesSnapdir(TestSnapSchedulesHelper): + def remove_snapshots(self, dir_path, sdn): + snap_path = f'{dir_path}/{sdn}' + + snapshots = self.mount_a.ls(path=snap_path) + for snapshot in snapshots: + snapshot_path = os.path.join(snap_path, snapshot) + log.debug(f'removing snapshot: {snapshot_path}') + self.mount_a.run_shell(['rmdir', snapshot_path]) + + def get_snap_dir_name(self): + from tasks.cephfs.fuse_mount import FuseMount + from tasks.cephfs.kernel_mount import KernelMount + + if isinstance(self.mount_a, KernelMount): + sdn = self.mount_a.client_config.get('snapdirname', '.snap') + elif isinstance(self.mount_a, FuseMount): + sdn = self.mount_a.client_config.get('client_snapdir', '.snap') + self.fs.set_ceph_conf('client', 'client snapdir', sdn) + self.mount_a.remount() + return sdn + + def test_snap_dir_name(self): + """Test the correctness of snap directory name""" + self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedulesSnapdir.TEST_DIRECTORY]) + + # set a schedule on the dir + self.fs_snap_schedule_cmd('add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, snap_schedule='1M') + self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, retention_spec_or_period='1M') + exec_time = time.time() + + timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + sdn = self.get_snap_dir_name() + log.info(f'expecting snap {TestSnapSchedulesSnapdir.TEST_DIRECTORY}/{sdn}/scheduled-{snap_sfx} in ~{timo}s...') + + # verify snapshot schedule + self.verify_schedule(TestSnapSchedulesSnapdir.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}]) + + # remove snapshot schedule + self.fs_snap_schedule_cmd('remove', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY) + + # remove all scheduled snapshots + self.remove_snapshots(TestSnapSchedulesSnapdir.TEST_DIRECTORY, sdn) + + self.mount_a.run_shell(['rmdir', TestSnapSchedulesSnapdir.TEST_DIRECTORY]) + + +""" +Note that the class TestSnapSchedulesMandatoryFSArgument tests snap-schedule +commands only for multi-fs scenario. Commands for a single default fs should +pass for tests defined above or elsewhere. +""" + + +class TestSnapSchedulesMandatoryFSArgument(TestSnapSchedulesHelper): + REQUIRE_BACKUP_FILESYSTEM = True + TEST_DIRECTORY = 'mandatory_fs_argument_test_dir' + + def test_snap_schedule_without_fs_argument(self): + """Test command fails without --fs argument in presence of multiple fs""" + test_path = TestSnapSchedulesMandatoryFSArgument.TEST_DIRECTORY + self.mount_a.run_shell(['mkdir', '-p', test_path]) + + # try setting a schedule on the dir; this should fail now that we are + # working with mutliple fs; we need the --fs argument if there are more + # than one fs hosted by the same cluster + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('add', test_path, snap_schedule='1M') + + self.mount_a.run_shell(['rmdir', test_path]) + + def test_snap_schedule_for_non_default_fs(self): + """Test command succes with --fs argument for non-default fs""" + test_path = TestSnapSchedulesMandatoryFSArgument.TEST_DIRECTORY + self.mount_a.run_shell(['mkdir', '-p', test_path]) + + # use the backup fs as the second fs; all these commands must pass + self.fs_snap_schedule_cmd('add', test_path, snap_schedule='1M', fs='backup_fs') + self.fs_snap_schedule_cmd('activate', test_path, snap_schedule='1M', fs='backup_fs') + self.fs_snap_schedule_cmd('retention', 'add', test_path, retention_spec_or_period='1M', fs='backup_fs') + self.fs_snap_schedule_cmd('list', test_path, fs='backup_fs', format='json') + self.fs_snap_schedule_cmd('status', test_path, fs='backup_fs', format='json') + self.fs_snap_schedule_cmd('retention', 'remove', test_path, retention_spec_or_period='1M', fs='backup_fs') + self.fs_snap_schedule_cmd('deactivate', test_path, snap_schedule='1M', fs='backup_fs') + self.fs_snap_schedule_cmd('remove', test_path, snap_schedule='1M', fs='backup_fs') + + self.mount_a.run_shell(['rmdir', test_path]) diff --git a/qa/tasks/cephfs/test_snapshots.py b/qa/tasks/cephfs/test_snapshots.py new file mode 100644 index 000000000..608dcc81f --- /dev/null +++ b/qa/tasks/cephfs/test_snapshots.py @@ -0,0 +1,605 @@ +import errno +import logging +import signal +from textwrap import dedent +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.orchestra.run import Raw +from teuthology.exceptions import CommandFailedError + +log = logging.getLogger(__name__) + +MDS_RESTART_GRACE = 60 + +class TestSnapshots(CephFSTestCase): + MDSS_REQUIRED = 3 + LOAD_SETTINGS = ["mds_max_snaps_per_dir"] + + def _check_subtree(self, rank, path, status=None): + got_subtrees = self.fs.rank_asok(["get", "subtrees"], rank=rank, status=status) + for s in got_subtrees: + if s['dir']['path'] == path and s['auth_first'] == rank: + return True + return False + + def _get_snapclient_dump(self, rank=0, status=None): + return self.fs.rank_asok(["dump", "snaps"], rank=rank, status=status) + + def _get_snapserver_dump(self, rank=0, status=None): + return self.fs.rank_asok(["dump", "snaps", "--server"], rank=rank, status=status) + + def _get_last_created_snap(self, rank=0, status=None): + return int(self._get_snapserver_dump(rank,status=status)["last_created"]) + + def _get_last_destroyed_snap(self, rank=0, status=None): + return int(self._get_snapserver_dump(rank,status=status)["last_destroyed"]) + + def _get_pending_snap_update(self, rank=0, status=None): + return self._get_snapserver_dump(rank,status=status)["pending_update"] + + def _get_pending_snap_destroy(self, rank=0, status=None): + return self._get_snapserver_dump(rank,status=status)["pending_destroy"] + + def test_allow_new_snaps_config(self): + """ + Check whether 'allow_new_snaps' setting works + """ + self.mount_a.run_shell(["mkdir", "test-allow-snaps"]) + + self.fs.set_allow_new_snaps(False); + try: + self.mount_a.run_shell(["mkdir", "test-allow-snaps/.snap/snap00"]) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, "expected EPERM") + else: + self.fail("expected snap creatiion to fail") + + self.fs.set_allow_new_snaps(True); + self.mount_a.run_shell(["mkdir", "test-allow-snaps/.snap/snap00"]) + self.mount_a.run_shell(["rmdir", "test-allow-snaps/.snap/snap00"]) + self.mount_a.run_shell(["rmdir", "test-allow-snaps"]) + + def test_kill_mdstable(self): + """ + check snaptable transcation + """ + if not isinstance(self.mount_a, FuseMount): + self.skipTest("Require FUSE client to forcibly kill mount") + + self.fs.set_allow_new_snaps(True); + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + # setup subtrees + self.mount_a.run_shell(["mkdir", "-p", "d1/dir"]) + self.mount_a.setfattr("d1", "ceph.dir.pin", "1") + self._wait_subtrees([("/d1", 1)], rank=1, path="/d1") + + last_created = self._get_last_created_snap(rank=0,status=status) + + # mds_kill_mdstable_at: + # 1: MDSTableServer::handle_prepare + # 2: MDSTableServer::_prepare_logged + # 5: MDSTableServer::handle_commit + # 6: MDSTableServer::_commit_logged + for i in [1,2,5,6]: + log.info("testing snapserver mds_kill_mdstable_at={0}".format(i)) + + status = self.fs.status() + rank0 = self.fs.get_rank(rank=0, status=status) + self.fs.rank_freeze(True, rank=0) + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=0, status=status) + proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s1{0}".format(i)], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=self.fs.beacon_timeout); + self.delete_mds_coredump(rank0['name']); + + self.fs.rank_fail(rank=0) + self.fs.mds_restart(rank0['name']) + self.wait_for_daemon_start([rank0['name']]) + status = self.fs.wait_for_daemons() + + proc.wait() + last_created += 1 + self.wait_until_true(lambda: self._get_last_created_snap(rank=0) == last_created, timeout=30) + + self.set_conf("mds", "mds_reconnect_timeout", "5") + + self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")]) + + # set mds_kill_mdstable_at, also kill snapclient + for i in [2,5,6]: + log.info("testing snapserver mds_kill_mdstable_at={0}, also kill snapclient".format(i)) + status = self.fs.status() + last_created = self._get_last_created_snap(rank=0, status=status) + + rank0 = self.fs.get_rank(rank=0, status=status) + rank1 = self.fs.get_rank(rank=1, status=status) + self.fs.rank_freeze(True, rank=0) # prevent failover... + self.fs.rank_freeze(True, rank=1) # prevent failover... + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=0, status=status) + proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s2{0}".format(i)], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=self.fs.beacon_timeout); + self.delete_mds_coredump(rank0['name']); + + self.fs.rank_signal(signal.SIGKILL, rank=1) + + self.mount_a.kill() + self.mount_a.kill_cleanup() + + self.fs.rank_fail(rank=0) + self.fs.mds_restart(rank0['name']) + self.wait_for_daemon_start([rank0['name']]) + + self.fs.wait_for_state('up:resolve', rank=0, timeout=MDS_RESTART_GRACE) + if i in [2,5]: + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1) + elif i == 6: + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0) + self.assertGreater(self._get_last_created_snap(rank=0), last_created) + + self.fs.rank_fail(rank=1) + self.fs.mds_restart(rank1['name']) + self.wait_for_daemon_start([rank1['name']]) + self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE) + + if i in [2,5]: + self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30) + if i == 2: + self.assertEqual(self._get_last_created_snap(rank=0), last_created) + else: + self.assertGreater(self._get_last_created_snap(rank=0), last_created) + + self.mount_a.mount_wait() + + self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")]) + + # mds_kill_mdstable_at: + # 3: MDSTableClient::handle_request (got agree) + # 4: MDSTableClient::commit + # 7: MDSTableClient::handle_request (got ack) + for i in [3,4,7]: + log.info("testing snapclient mds_kill_mdstable_at={0}".format(i)) + last_created = self._get_last_created_snap(rank=0) + + status = self.fs.status() + rank1 = self.fs.get_rank(rank=1, status=status) + self.fs.rank_freeze(True, rank=1) # prevent failover... + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "{0}".format(i)], rank=1, status=status) + proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s3{0}".format(i)], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=1), timeout=self.fs.beacon_timeout); + self.delete_mds_coredump(rank1['name']); + + self.mount_a.kill() + self.mount_a.kill_cleanup() + + if i in [3,4]: + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1) + elif i == 7: + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 0) + self.assertGreater(self._get_last_created_snap(rank=0), last_created) + + self.fs.rank_fail(rank=1) + self.fs.mds_restart(rank1['name']) + self.wait_for_daemon_start([rank1['name']]) + status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE) + + if i in [3,4]: + self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30) + if i == 3: + self.assertEqual(self._get_last_created_snap(rank=0), last_created) + else: + self.assertGreater(self._get_last_created_snap(rank=0), last_created) + + self.mount_a.mount_wait() + + self.mount_a.run_shell(["rmdir", Raw("d1/dir/.snap/*")]) + + # mds_kill_mdstable_at: + # 3: MDSTableClient::handle_request (got agree) + # 8: MDSTableServer::handle_rollback + log.info("testing snapclient mds_kill_mdstable_at=3, snapserver mds_kill_mdstable_at=8") + last_created = self._get_last_created_snap(rank=0) + + status = self.fs.status() + rank0 = self.fs.get_rank(rank=0, status=status) + rank1 = self.fs.get_rank(rank=1, status=status) + self.fs.rank_freeze(True, rank=0) + self.fs.rank_freeze(True, rank=1) + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "8"], rank=0, status=status) + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "3"], rank=1, status=status) + proc = self.mount_a.run_shell(["mkdir", "d1/dir/.snap/s4"], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=1), timeout=self.fs.beacon_timeout); + self.delete_mds_coredump(rank1['name']); + + self.mount_a.kill() + self.mount_a.kill_cleanup() + + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1) + + self.fs.rank_fail(rank=1) + self.fs.mds_restart(rank1['name']) + self.wait_for_daemon_start([rank1['name']]) + + # rollback triggers assertion + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=0), timeout=self.fs.beacon_timeout); + self.delete_mds_coredump(rank0['name']); + self.fs.rank_fail(rank=0) + self.fs.mds_restart(rank0['name']) + self.wait_for_daemon_start([rank0['name']]) + self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE) + + # mds.1 should re-send rollback message + self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30) + self.assertEqual(self._get_last_created_snap(rank=0), last_created) + + self.mount_a.mount_wait() + + def test_snapclient_cache(self): + """ + check if snapclient cache gets synced properly + """ + self.fs.set_allow_new_snaps(True); + self.fs.set_max_mds(3) + status = self.fs.wait_for_daemons() + + self.mount_a.run_shell(["mkdir", "-p", "d0/d1/dir"]) + self.mount_a.run_shell(["mkdir", "-p", "d0/d2/dir"]) + self.mount_a.setfattr("d0", "ceph.dir.pin", "0") + self.mount_a.setfattr("d0/d1", "ceph.dir.pin", "1") + self.mount_a.setfattr("d0/d2", "ceph.dir.pin", "2") + self._wait_subtrees([("/d0", 0), ("/d0/d1", 1), ("/d0/d2", 2)], rank="all", status=status, path="/d0") + + def _check_snapclient_cache(snaps_dump, cache_dump=None, rank=0): + if cache_dump is None: + cache_dump = self._get_snapclient_dump(rank=rank) + for key, value in cache_dump.items(): + if value != snaps_dump[key]: + return False + return True; + + # sync after mksnap + last_created = self._get_last_created_snap(rank=0) + self.mount_a.run_shell(["mkdir", "d0/d1/dir/.snap/s1", "d0/d1/dir/.snap/s2"]) + self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30) + self.assertGreater(self._get_last_created_snap(rank=0), last_created) + + snaps_dump = self._get_snapserver_dump(rank=0) + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2)); + + # sync after rmsnap + last_destroyed = self._get_last_destroyed_snap(rank=0) + self.mount_a.run_shell(["rmdir", "d0/d1/dir/.snap/s1"]) + self.wait_until_true(lambda: len(self._get_pending_snap_destroy(rank=0)) == 0, timeout=30) + self.assertGreater(self._get_last_destroyed_snap(rank=0), last_destroyed) + + snaps_dump = self._get_snapserver_dump(rank=0) + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2)); + + # sync during mds recovers + self.fs.rank_fail(rank=2) + status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE) + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2)); + + self.fs.rank_fail(rank=0) + self.fs.rank_fail(rank=1) + status = self.fs.wait_for_daemons() + self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE) + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=0)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=1)); + self.assertTrue(_check_snapclient_cache(snaps_dump, rank=2)); + + # kill at MDSTableClient::handle_notify_prep + status = self.fs.status() + rank2 = self.fs.get_rank(rank=2, status=status) + self.fs.rank_freeze(True, rank=2) + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "9"], rank=2, status=status) + proc = self.mount_a.run_shell(["mkdir", "d0/d1/dir/.snap/s3"], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=2), timeout=self.fs.beacon_timeout); + self.delete_mds_coredump(rank2['name']); + + # mksnap should wait for notify ack from mds.2 + self.assertFalse(proc.finished); + + # mksnap should proceed after mds.2 fails + self.fs.rank_fail(rank=2) + self.wait_until_true(lambda: proc.finished, timeout=30); + + self.fs.mds_restart(rank2['name']) + self.wait_for_daemon_start([rank2['name']]) + status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE) + + self.mount_a.run_shell(["rmdir", Raw("d0/d1/dir/.snap/*")]) + + # kill at MDSTableClient::commit + # the recovering mds should sync all mds' cache when it enters resolve stage + self.set_conf("mds", "mds_reconnect_timeout", "5") + for i in range(1, 4): + status = self.fs.status() + rank2 = self.fs.get_rank(rank=2, status=status) + self.fs.rank_freeze(True, rank=2) + self.fs.rank_asok(['config', 'set', "mds_kill_mdstable_at", "4"], rank=2, status=status) + last_created = self._get_last_created_snap(rank=0) + proc = self.mount_a.run_shell(["mkdir", "d0/d2/dir/.snap/s{0}".format(i)], wait=False) + self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(rank=2), timeout=self.fs.beacon_timeout); + self.delete_mds_coredump(rank2['name']); + + self.mount_a.kill() + self.mount_a.kill_cleanup() + + self.assertEqual(len(self._get_pending_snap_update(rank=0)), 1) + + if i in [2,4]: + self.fs.rank_fail(rank=0) + if i in [3,4]: + self.fs.rank_fail(rank=1) + + self.fs.rank_fail(rank=2) + self.fs.mds_restart(rank2['name']) + self.wait_for_daemon_start([rank2['name']]) + status = self.fs.wait_for_daemons(timeout=MDS_RESTART_GRACE) + + rank0_cache = self._get_snapclient_dump(rank=0) + rank1_cache = self._get_snapclient_dump(rank=1) + rank2_cache = self._get_snapclient_dump(rank=2) + + self.assertGreater(int(rank0_cache["last_created"]), last_created) + self.assertEqual(rank0_cache, rank1_cache); + self.assertEqual(rank0_cache, rank2_cache); + + self.wait_until_true(lambda: len(self._get_pending_snap_update(rank=0)) == 0, timeout=30) + + snaps_dump = self._get_snapserver_dump(rank=0) + self.assertEqual(snaps_dump["last_created"], rank0_cache["last_created"]) + self.assertTrue(_check_snapclient_cache(snaps_dump, cache_dump=rank0_cache)); + + self.mount_a.mount_wait() + + self.mount_a.run_shell(["rmdir", Raw("d0/d2/dir/.snap/*")]) + + def test_multimds_mksnap(self): + """ + check if snapshot takes effect across authority subtrees + """ + self.fs.set_allow_new_snaps(True); + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + self.mount_a.run_shell(["mkdir", "-p", "d0/d1/empty"]) + self.mount_a.setfattr("d0", "ceph.dir.pin", "0") + self.mount_a.setfattr("d0/d1", "ceph.dir.pin", "1") + self._wait_subtrees([("/d0", 0), ("/d0/d1", 1)], rank="all", status=status, path="/d0") + + self.mount_a.write_test_pattern("d0/d1/file_a", 8 * 1024 * 1024) + self.mount_a.run_shell(["mkdir", "d0/.snap/s1"]) + self.mount_a.run_shell(["rm", "-f", "d0/d1/file_a"]) + self.mount_a.validate_test_pattern("d0/.snap/s1/d1/file_a", 8 * 1024 * 1024) + + self.mount_a.run_shell(["rmdir", "d0/.snap/s1"]) + self.mount_a.run_shell(["rm", "-rf", "d0"]) + + def test_multimds_past_parents(self): + """ + check if past parents are properly recorded during across authority rename + """ + self.fs.set_allow_new_snaps(True); + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + self.mount_a.run_shell_payload("mkdir -p {d0,d1}/empty") + self.mount_a.setfattr("d0", "ceph.dir.pin", "0") + self.mount_a.setfattr("d1", "ceph.dir.pin", "1") + self._wait_subtrees([("/d0", 0), ("/d1", 1)], rank=0, status=status) + + self.mount_a.run_shell(["mkdir", "d0/d3"]) + self.mount_a.run_shell(["mkdir", "d0/.snap/s1"]) + snap_name = self.mount_a.run_shell(["ls", "d0/d3/.snap"]).stdout.getvalue() + + self.mount_a.run_shell(["mv", "d0/d3", "d1/d3"]) + snap_name1 = self.mount_a.run_shell(["ls", "d1/d3/.snap"]).stdout.getvalue() + self.assertEqual(snap_name1, snap_name); + + self.mount_a.run_shell(["rmdir", "d0/.snap/s1"]) + snap_name1 = self.mount_a.run_shell(["ls", "d1/d3/.snap"]).stdout.getvalue() + self.assertEqual(snap_name1, ""); + + self.mount_a.run_shell(["rm", "-rf", "d0", "d1"]) + + def test_multimds_hardlink(self): + """ + check if hardlink snapshot works in multimds setup + """ + self.fs.set_allow_new_snaps(True); + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + self.mount_a.run_shell_payload("mkdir -p {d0,d1}/empty") + + self.mount_a.setfattr("d0", "ceph.dir.pin", "0") + self.mount_a.setfattr("d1", "ceph.dir.pin", "1") + self._wait_subtrees([("/d0", 0), ("/d1", 1)], rank=0, status=status) + + self.mount_a.run_python(dedent(""" + import os + open(os.path.join("{path}", "d0/file1"), 'w').write("asdf") + open(os.path.join("{path}", "d0/file2"), 'w').write("asdf") + """.format(path=self.mount_a.mountpoint) + )) + + self.mount_a.run_shell(["ln", "d0/file1", "d1/file1"]) + self.mount_a.run_shell(["ln", "d0/file2", "d1/file2"]) + + self.mount_a.run_shell(["mkdir", "d1/.snap/s1"]) + + self.mount_a.run_python(dedent(""" + import os + open(os.path.join("{path}", "d0/file1"), 'w').write("qwer") + """.format(path=self.mount_a.mountpoint) + )) + + self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file1"]) + + self.mount_a.run_shell(["rm", "-f", "d0/file2"]) + self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file2"]) + + self.mount_a.run_shell(["rm", "-f", "d1/file2"]) + self.mount_a.run_shell(["grep", "asdf", "d1/.snap/s1/file2"]) + + self.mount_a.run_shell(["rmdir", "d1/.snap/s1"]) + self.mount_a.run_shell(["rm", "-rf", "d0", "d1"]) + + class SnapLimitViolationException(Exception): + failed_snapshot_number = -1 + + def __init__(self, num): + self.failed_snapshot_number = num + + def get_snap_name(self, dir_name, sno): + sname = "{dir_name}/.snap/s_{sno}".format(dir_name=dir_name, sno=sno) + return sname + + def create_snap_dir(self, sname): + self.mount_a.run_shell(["mkdir", sname]) + + def delete_dir_and_snaps(self, dir_name, snaps): + for sno in range(1, snaps+1, 1): + sname = self.get_snap_name(dir_name, sno) + self.mount_a.run_shell(["rmdir", sname]) + self.mount_a.run_shell(["rmdir", dir_name]) + + def create_dir_and_snaps(self, dir_name, snaps): + self.mount_a.run_shell(["mkdir", dir_name]) + + for sno in range(1, snaps+1, 1): + sname = self.get_snap_name(dir_name, sno) + try: + self.create_snap_dir(sname) + except CommandFailedError as e: + # failing at the last mkdir beyond the limit is expected + if sno == snaps: + log.info("failed while creating snap #{}: {}".format(sno, repr(e))) + raise TestSnapshots.SnapLimitViolationException(sno) + + def test_mds_max_snaps_per_dir_default_limit(self): + """ + Test the newly introudced option named mds_max_snaps_per_dir + Default snaps limit is 100 + Test if the default number of snapshot directories can be created + """ + self.create_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir)) + self.delete_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir)) + + def test_mds_max_snaps_per_dir_with_increased_limit(self): + """ + Test the newly introudced option named mds_max_snaps_per_dir + First create 101 directories and ensure that the 101st directory + creation fails. Then increase the default by one and see if the + additional directory creation succeeds + """ + # first test the default limit + new_limit = int(self.mds_max_snaps_per_dir) + self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)]) + try: + self.create_dir_and_snaps("accounts", new_limit + 1) + except TestSnapshots.SnapLimitViolationException as e: + if e.failed_snapshot_number == (new_limit + 1): + pass + # then increase the limit by one and test + new_limit = new_limit + 1 + self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)]) + sname = self.get_snap_name("accounts", new_limit) + self.create_snap_dir(sname) + self.delete_dir_and_snaps("accounts", new_limit) + + def test_mds_max_snaps_per_dir_with_reduced_limit(self): + """ + Test the newly introudced option named mds_max_snaps_per_dir + First create 99 directories. Then reduce the limit to 98. Then try + creating another directory and ensure that additional directory + creation fails. + """ + # first test the new limit + new_limit = int(self.mds_max_snaps_per_dir) - 1 + self.create_dir_and_snaps("accounts", new_limit) + sname = self.get_snap_name("accounts", new_limit + 1) + # then reduce the limit by one and test + new_limit = new_limit - 1 + self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)]) + try: + self.create_snap_dir(sname) + except CommandFailedError: + # after reducing limit we expect the new snapshot creation to fail + pass + self.delete_dir_and_snaps("accounts", new_limit + 1) + + +class TestMonSnapsAndFsPools(CephFSTestCase): + MDSS_REQUIRED = 3 + + def test_disallow_monitor_managed_snaps_for_fs_pools(self): + """ + Test that creation of monitor managed snaps fails for pools attached + to any file-system + """ + with self.assertRaises(CommandFailedError): + self.fs.rados(["mksnap", "snap1"], pool=self.fs.get_data_pool_name()) + + with self.assertRaises(CommandFailedError): + self.fs.rados(["mksnap", "snap2"], pool=self.fs.get_metadata_pool_name()) + + with self.assertRaises(CommandFailedError): + test_pool_name = self.fs.get_data_pool_name() + base_cmd = f'osd pool mksnap {test_pool_name} snap3' + self.run_cluster_cmd(base_cmd) + + with self.assertRaises(CommandFailedError): + test_pool_name = self.fs.get_metadata_pool_name() + base_cmd = f'osd pool mksnap {test_pool_name} snap4' + self.run_cluster_cmd(base_cmd) + + def test_attaching_pools_with_snaps_to_fs_fails(self): + """ + Test that attempt to attach pool with snapshots to an fs fails + """ + test_pool_name = 'snap-test-pool' + base_cmd = f'osd pool create {test_pool_name}' + ret = self.run_cluster_cmd_result(base_cmd) + self.assertEqual(ret, 0) + + self.fs.rados(["mksnap", "snap3"], pool=test_pool_name) + + base_cmd = f'fs add_data_pool {self.fs.name} {test_pool_name}' + ret = self.run_cluster_cmd_result(base_cmd) + self.assertEqual(ret, errno.EOPNOTSUPP) + + # cleanup + self.fs.rados(["rmsnap", "snap3"], pool=test_pool_name) + base_cmd = f'osd pool delete {test_pool_name}' + ret = self.run_cluster_cmd_result(base_cmd) + + def test_using_pool_with_snap_fails_fs_creation(self): + """ + Test that using a pool with snaps for fs creation fails + """ + base_cmd = 'osd pool create test_data_pool' + ret = self.run_cluster_cmd_result(base_cmd) + self.assertEqual(ret, 0) + base_cmd = 'osd pool create test_metadata_pool' + ret = self.run_cluster_cmd_result(base_cmd) + self.assertEqual(ret, 0) + + self.fs.rados(["mksnap", "snap4"], pool='test_data_pool') + + base_cmd = 'fs new testfs test_metadata_pool test_data_pool' + ret = self.run_cluster_cmd_result(base_cmd) + self.assertEqual(ret, errno.EOPNOTSUPP) + + # cleanup + self.fs.rados(["rmsnap", "snap4"], pool='test_data_pool') + base_cmd = 'osd pool delete test_data_pool' + ret = self.run_cluster_cmd_result(base_cmd) + base_cmd = 'osd pool delete test_metadata_pool' + ret = self.run_cluster_cmd_result(base_cmd) diff --git a/qa/tasks/cephfs/test_strays.py b/qa/tasks/cephfs/test_strays.py new file mode 100644 index 000000000..8bdc126e2 --- /dev/null +++ b/qa/tasks/cephfs/test_strays.py @@ -0,0 +1,1027 @@ +import json +import time +import logging +from textwrap import dedent +import datetime +import gevent + +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra.run import Raw +from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology + +log = logging.getLogger(__name__) + + +class TestStrays(CephFSTestCase): + MDSS_REQUIRED = 2 + + OPS_THROTTLE = 1 + FILES_THROTTLE = 2 + + # Range of different file sizes used in throttle test's workload + throttle_workload_size_range = 16 + + @for_teuthology + def test_ops_throttle(self): + self._test_throttling(self.OPS_THROTTLE) + + @for_teuthology + def test_files_throttle(self): + self._test_throttling(self.FILES_THROTTLE) + + def test_dir_deletion(self): + """ + That when deleting a bunch of dentries and the containing + directory, everything gets purged. + Catches cases where the client might e.g. fail to trim + the unlinked dir from its cache. + """ + file_count = 1000 + create_script = dedent(""" + import os + + mountpoint = "{mountpoint}" + subdir = "delete_me" + size = {size} + file_count = {file_count} + os.mkdir(os.path.join(mountpoint, subdir)) + for i in range(0, file_count): + filename = "{{0}}_{{1}}.bin".format(i, size) + with open(os.path.join(mountpoint, subdir, filename), 'w') as f: + f.write(size * 'x') + """.format( + mountpoint=self.mount_a.mountpoint, + size=1024, + file_count=file_count + )) + + self.mount_a.run_python(create_script) + + # That the dirfrag object is created + self.fs.mds_asok(["flush", "journal"]) + dir_ino = self.mount_a.path_to_ino("delete_me") + self.assertTrue(self.fs.dirfrag_exists(dir_ino, 0)) + + # Remove everything + self.mount_a.run_shell(["rm", "-rf", "delete_me"]) + self.fs.mds_asok(["flush", "journal"]) + + # That all the removed files get created as strays + strays = self.get_mdc_stat("strays_created") + self.assertEqual(strays, file_count + 1) + + # That the strays all get enqueued for purge + self.wait_until_equal( + lambda: self.get_mdc_stat("strays_enqueued"), + strays, + timeout=600 + + ) + + # That all the purge operations execute + self.wait_until_equal( + lambda: self.get_stat("purge_queue", "pq_executed"), + strays, + timeout=600 + ) + + # That finally, the directory metadata object is gone + self.assertFalse(self.fs.dirfrag_exists(dir_ino, 0)) + + # That finally, the data objects are all gone + self.await_data_pool_empty() + + def _test_throttling(self, throttle_type): + self.data_log = [] + try: + return self._do_test_throttling(throttle_type) + except: + for l in self.data_log: + log.info(",".join([l_.__str__() for l_ in l])) + raise + + def _do_test_throttling(self, throttle_type): + """ + That the mds_max_purge_ops setting is respected + """ + + def set_throttles(files, ops): + """ + Helper for updating ops/files limits, and calculating effective + ops_per_pg setting to give the same ops limit. + """ + self.set_conf('mds', 'mds_max_purge_files', "%d" % files) + self.set_conf('mds', 'mds_max_purge_ops', "%d" % ops) + + pgs = self.fs.mon_manager.get_pool_int_property( + self.fs.get_data_pool_name(), + "pg_num" + ) + ops_per_pg = float(ops) / pgs + self.set_conf('mds', 'mds_max_purge_ops_per_pg', "%s" % ops_per_pg) + + # Test conditions depend on what we're going to be exercising. + # * Lift the threshold on whatever throttle we are *not* testing, so + # that the throttle of interest is the one that will be the bottleneck + # * Create either many small files (test file count throttling) or fewer + # large files (test op throttling) + if throttle_type == self.OPS_THROTTLE: + set_throttles(files=100000000, ops=16) + size_unit = 1024 * 1024 # big files, generate lots of ops + file_multiplier = 100 + elif throttle_type == self.FILES_THROTTLE: + # The default value of file limit is pretty permissive, so to avoid + # the test running too fast, create lots of files and set the limit + # pretty low. + set_throttles(ops=100000000, files=6) + size_unit = 1024 # small, numerous files + file_multiplier = 200 + else: + raise NotImplementedError(throttle_type) + + # Pick up config changes + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + + create_script = dedent(""" + import os + + mountpoint = "{mountpoint}" + subdir = "delete_me" + size_unit = {size_unit} + file_multiplier = {file_multiplier} + os.mkdir(os.path.join(mountpoint, subdir)) + for i in range(0, file_multiplier): + for size in range(0, {size_range}*size_unit, size_unit): + filename = "{{0}}_{{1}}.bin".format(i, size // size_unit) + with open(os.path.join(mountpoint, subdir, filename), 'w') as f: + f.write(size * 'x') + """.format( + mountpoint=self.mount_a.mountpoint, + size_unit=size_unit, + file_multiplier=file_multiplier, + size_range=self.throttle_workload_size_range + )) + + self.mount_a.run_python(create_script) + + # We will run the deletion in the background, to reduce the risk of it completing before + # we have started monitoring the stray statistics. + def background(): + self.mount_a.run_shell(["rm", "-rf", "delete_me"]) + self.fs.mds_asok(["flush", "journal"]) + + background_thread = gevent.spawn(background) + + total_inodes = file_multiplier * self.throttle_workload_size_range + 1 + mds_max_purge_ops = int(self.fs.get_config("mds_max_purge_ops", 'mds')) + mds_max_purge_files = int(self.fs.get_config("mds_max_purge_files", 'mds')) + + # During this phase we look for the concurrent ops to exceed half + # the limit (a heuristic) and not exceed the limit (a correctness + # condition). + purge_timeout = 600 + elapsed = 0 + files_high_water = 0 + ops_high_water = 0 + + while True: + stats = self.fs.mds_asok(['perf', 'dump']) + mdc_stats = stats['mds_cache'] + pq_stats = stats['purge_queue'] + if elapsed >= purge_timeout: + raise RuntimeError("Timeout waiting for {0} inodes to purge, stats:{1}".format(total_inodes, mdc_stats)) + + num_strays = mdc_stats['num_strays'] + num_strays_purging = pq_stats['pq_executing'] + num_purge_ops = pq_stats['pq_executing_ops'] + files_high_water = pq_stats['pq_executing_high_water'] + ops_high_water = pq_stats['pq_executing_ops_high_water'] + + self.data_log.append([datetime.datetime.now(), num_strays, num_strays_purging, num_purge_ops, files_high_water, ops_high_water]) + + total_strays_created = mdc_stats['strays_created'] + total_strays_purged = pq_stats['pq_executed'] + + if total_strays_purged == total_inodes: + log.info("Complete purge in {0} seconds".format(elapsed)) + break + elif total_strays_purged > total_inodes: + raise RuntimeError("Saw more strays than expected, mdc stats: {0}".format(mdc_stats)) + else: + if throttle_type == self.OPS_THROTTLE: + # 11 is filer_max_purge_ops plus one for the backtrace: + # limit is allowed to be overshot by this much. + if num_purge_ops > mds_max_purge_ops + 11: + raise RuntimeError("num_purge_ops violates threshold {0}/{1}".format( + num_purge_ops, mds_max_purge_ops + )) + elif throttle_type == self.FILES_THROTTLE: + if num_strays_purging > mds_max_purge_files: + raise RuntimeError("num_strays_purging violates threshold {0}/{1}".format( + num_strays_purging, mds_max_purge_files + )) + else: + raise NotImplementedError(throttle_type) + + log.info("Waiting for purge to complete {0}/{1}, {2}/{3}".format( + num_strays_purging, num_strays, + total_strays_purged, total_strays_created + )) + time.sleep(1) + elapsed += 1 + + background_thread.join() + + # Check that we got up to a respectable rate during the purge. This is totally + # racy, but should be safeish unless the cluster is pathologically slow, or + # insanely fast such that the deletions all pass before we have polled the + # statistics. + if throttle_type == self.OPS_THROTTLE: + if ops_high_water < mds_max_purge_ops // 2: + raise RuntimeError("Ops in flight high water is unexpectedly low ({0} / {1})".format( + ops_high_water, mds_max_purge_ops + )) + # The MDS may go over mds_max_purge_ops for some items, like a + # heavily fragmented directory. The throttle does not kick in + # until *after* we reach or exceed the limit. This is expected + # because we don't want to starve the PQ or never purge a + # particularly large file/directory. + self.assertLessEqual(ops_high_water, mds_max_purge_ops+64) + elif throttle_type == self.FILES_THROTTLE: + if files_high_water < mds_max_purge_files // 2: + raise RuntimeError("Files in flight high water is unexpectedly low ({0} / {1})".format( + files_high_water, mds_max_purge_files + )) + self.assertLessEqual(files_high_water, mds_max_purge_files) + + # Sanity check all MDC stray stats + stats = self.fs.mds_asok(['perf', 'dump']) + mdc_stats = stats['mds_cache'] + pq_stats = stats['purge_queue'] + self.assertEqual(mdc_stats['num_strays'], 0) + self.assertEqual(mdc_stats['num_strays_delayed'], 0) + self.assertEqual(pq_stats['pq_executing'], 0) + self.assertEqual(pq_stats['pq_executing_ops'], 0) + self.assertEqual(mdc_stats['strays_created'], total_inodes) + self.assertEqual(mdc_stats['strays_enqueued'], total_inodes) + self.assertEqual(pq_stats['pq_executed'], total_inodes) + + def get_mdc_stat(self, name, mds_id=None): + return self.get_stat("mds_cache", name, mds_id) + + def get_stat(self, subsys, name, mds_id=None): + return self.fs.mds_asok(['perf', 'dump', subsys, name], + mds_id=mds_id)[subsys][name] + + def _wait_for_counter(self, subsys, counter, expect_val, timeout=60, + mds_id=None): + self.wait_until_equal( + lambda: self.get_stat(subsys, counter, mds_id), + expect_val=expect_val, timeout=timeout, + reject_fn=lambda x: x > expect_val + ) + + def test_open_inode(self): + """ + That the case of a dentry unlinked while a client holds an + inode open is handled correctly. + + The inode should be moved into a stray dentry, while the original + dentry and directory should be purged. + + The inode's data should be purged when the client eventually closes + it. + """ + mount_a_client_id = self.mount_a.get_global_id() + + # Write some bytes to a file + size_mb = 8 + + # Hold the file open + p = self.mount_a.open_background("open_file") + self.mount_a.write_n_mb("open_file", size_mb) + open_file_ino = self.mount_a.path_to_ino("open_file") + + self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2) + + # Unlink the dentry + self.mount_a.run_shell(["rm", "-f", "open_file"]) + + # Wait to see the stray count increment + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=1, timeout=60, reject_fn=lambda x: x > 1) + + # See that while the stray count has incremented, none have passed + # on to the purge queue + self.assertEqual(self.get_mdc_stat("strays_created"), 1) + self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0) + + # See that the client still holds 2 caps + self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2) + + # See that the data objects remain in the data pool + self.assertTrue(self.fs.data_objects_present(open_file_ino, size_mb * 1024 * 1024)) + + # Now close the file + self.mount_a.kill_background(p) + + # Wait to see the client cap count decrement + self.wait_until_equal( + lambda: self.get_session(mount_a_client_id)['num_caps'], + expect_val=1, timeout=60, reject_fn=lambda x: x > 2 or x < 1 + ) + # Wait to see the purge counter increment, stray count go to zero + self._wait_for_counter("mds_cache", "strays_enqueued", 1) + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, timeout=6, reject_fn=lambda x: x > 1 + ) + self._wait_for_counter("purge_queue", "pq_executed", 1) + + # See that the data objects no longer exist + self.assertTrue(self.fs.data_objects_absent(open_file_ino, size_mb * 1024 * 1024)) + + self.await_data_pool_empty() + + def test_reintegration_limit(self): + """ + That the reintegration is not blocked by full directories. + """ + + LOW_LIMIT = 50 + self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT)) + time.sleep(10) # for config to reach MDS; async create is fast!! + + last_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.mount_a.run_shell_payload(""" + mkdir a b + for i in `seq 1 50`; do + touch a/"$i" + ln a/"$i" b/"$i" + done + sync -f a b + rm a/* + """) + + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, + timeout=60 + ) + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.assertGreater(curr_reintegrated, last_reintegrated) + + + def test_hardlink_reintegration(self): + """ + That removal of primary dentry of hardlinked inode results + in reintegration of inode into the previously-remote dentry, + rather than lingering as a stray indefinitely. + """ + # Write some bytes to file_a + size_mb = 8 + self.mount_a.run_shell(["mkdir", "dir_1"]) + self.mount_a.write_n_mb("dir_1/file_a", size_mb) + ino = self.mount_a.path_to_ino("dir_1/file_a") + + # Create a hardlink named file_b + self.mount_a.run_shell(["mkdir", "dir_2"]) + self.mount_a.run_shell(["ln", "dir_1/file_a", "dir_2/file_b"]) + self.assertEqual(self.mount_a.path_to_ino("dir_2/file_b"), ino) + + # Flush journal + self.fs.mds_asok(['flush', 'journal']) + + # See that backtrace for the file points to the file_a path + pre_unlink_bt = self.fs.read_backtrace(ino) + self.assertEqual(pre_unlink_bt['ancestors'][0]['dname'], "file_a") + + # empty mds cache. otherwise mds reintegrates stray when unlink finishes + self.mount_a.umount_wait() + self.fs.mds_asok(['flush', 'journal']) + self.fs.mds_fail_restart() + self.fs.wait_for_daemons() + self.mount_a.mount_wait() + + # Unlink file_a + self.mount_a.run_shell(["rm", "-f", "dir_1/file_a"]) + + # See that a stray was created + self.assertEqual(self.get_mdc_stat("num_strays"), 1) + self.assertEqual(self.get_mdc_stat("strays_created"), 1) + + # Wait, see that data objects are still present (i.e. that the + # stray did not advance to purging given time) + time.sleep(30) + self.assertTrue(self.fs.data_objects_present(ino, size_mb * 1024 * 1024)) + self.assertEqual(self.get_mdc_stat("strays_enqueued"), 0) + + # See that before reintegration, the inode's backtrace points to a stray dir + self.fs.mds_asok(['flush', 'journal']) + self.assertTrue(self.get_backtrace_path(ino).startswith("stray")) + + last_reintegrated = self.get_mdc_stat("strays_reintegrated") + + # Do a metadata operation on the remaining link (mv is heavy handed, but + # others like touch may be satisfied from caps without poking MDS) + self.mount_a.run_shell(["mv", "dir_2/file_b", "dir_2/file_c"]) + + # Stray reintegration should happen as a result of the eval_remote call + # on responding to a client request. + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, + timeout=60 + ) + + # See the reintegration counter increment + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.assertGreater(curr_reintegrated, last_reintegrated) + last_reintegrated = curr_reintegrated + + # Flush the journal + self.fs.mds_asok(['flush', 'journal']) + + # See that the backtrace for the file points to the remaining link's path + post_reint_bt = self.fs.read_backtrace(ino) + self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_c") + + # mds should reintegrates stray when unlink finishes + self.mount_a.run_shell(["ln", "dir_2/file_c", "dir_2/file_d"]) + self.mount_a.run_shell(["rm", "-f", "dir_2/file_c"]) + + # Stray reintegration should happen as a result of the notify_stray call + # on completion of unlink + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, + timeout=60 + ) + + # See the reintegration counter increment + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.assertGreater(curr_reintegrated, last_reintegrated) + last_reintegrated = curr_reintegrated + + # Flush the journal + self.fs.mds_asok(['flush', 'journal']) + + # See that the backtrace for the file points to the newest link's path + post_reint_bt = self.fs.read_backtrace(ino) + self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "file_d") + + # Now really delete it + self.mount_a.run_shell(["rm", "-f", "dir_2/file_d"]) + self._wait_for_counter("mds_cache", "strays_enqueued", 1) + self._wait_for_counter("purge_queue", "pq_executed", 1) + + self.assert_purge_idle() + self.assertTrue(self.fs.data_objects_absent(ino, size_mb * 1024 * 1024)) + + # We caused the inode to go stray 3 times + self.assertEqual(self.get_mdc_stat("strays_created"), 3) + # We purged it at the last + self.assertEqual(self.get_mdc_stat("strays_enqueued"), 1) + + def test_reintegration_via_scrub(self): + """ + That reintegration is triggered via recursive scrub. + """ + + self.mount_a.run_shell_payload(""" + mkdir -p a b + for i in `seq 1 50`; do + touch a/"$i" + ln a/"$i" b/"$i" + done + sync -f . + """) + + self.mount_a.remount() # drop caps/cache + self.fs.rank_tell(["flush", "journal"]) + self.fs.rank_fail() + self.fs.wait_for_daemons() + + # only / in cache, reintegration cannot happen + self.wait_until_equal( + lambda: len(self.fs.rank_tell(["dump", "tree", "/"])), + expect_val=3, + timeout=60 + ) + + last_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.mount_a.run_shell_payload(""" + rm a/* + sync -f . + """) + self.wait_until_equal( + lambda: len(self.fs.rank_tell(["dump", "tree", "/"])), + expect_val=3, + timeout=60 + ) + self.assertEqual(self.get_mdc_stat("num_strays"), 50) + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + self.assertEqual(last_reintegrated, curr_reintegrated) + + self.fs.rank_tell(["scrub", "start", "/", "recursive,force"]) + + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, + timeout=60 + ) + curr_reintegrated = self.get_mdc_stat("strays_reintegrated") + # N.B.: reintegrate (rename RPC) may be tried multiple times from different code paths + self.assertGreaterEqual(curr_reintegrated, last_reintegrated+50) + + def test_mv_hardlink_cleanup(self): + """ + That when doing a rename from A to B, and B has hardlinks, + then we make a stray for B which is then reintegrated + into one of his hardlinks. + """ + # Create file_a, file_b, and a hardlink to file_b + size_mb = 8 + self.mount_a.write_n_mb("file_a", size_mb) + file_a_ino = self.mount_a.path_to_ino("file_a") + + self.mount_a.write_n_mb("file_b", size_mb) + file_b_ino = self.mount_a.path_to_ino("file_b") + + self.mount_a.run_shell(["ln", "file_b", "linkto_b"]) + self.assertEqual(self.mount_a.path_to_ino("linkto_b"), file_b_ino) + + # mv file_a file_b + self.mount_a.run_shell(["mv", "file_a", "file_b"]) + + # Stray reintegration should happen as a result of the notify_stray call on + # completion of rename + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays"), + expect_val=0, + timeout=60 + ) + + self.assertEqual(self.get_mdc_stat("strays_created"), 1) + self.assertGreaterEqual(self.get_mdc_stat("strays_reintegrated"), 1) + + # No data objects should have been deleted, as both files still have linkage. + self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024)) + self.assertTrue(self.fs.data_objects_present(file_b_ino, size_mb * 1024 * 1024)) + + self.fs.mds_asok(['flush', 'journal']) + + post_reint_bt = self.fs.read_backtrace(file_b_ino) + self.assertEqual(post_reint_bt['ancestors'][0]['dname'], "linkto_b") + + def _setup_two_ranks(self): + # Set up two MDSs + self.fs.set_max_mds(2) + + # See that we have two active MDSs + self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30, + reject_fn=lambda v: v > 2 or v < 1) + + active_mds_names = self.fs.get_active_names() + rank_0_id = active_mds_names[0] + rank_1_id = active_mds_names[1] + log.info("Ranks 0 and 1 are {0} and {1}".format( + rank_0_id, rank_1_id)) + + # Get rid of other MDS daemons so that it's easier to know which + # daemons to expect in which ranks after restarts + for unneeded_mds in set(self.mds_cluster.mds_ids) - {rank_0_id, rank_1_id}: + self.mds_cluster.mds_stop(unneeded_mds) + self.mds_cluster.mds_fail(unneeded_mds) + + return rank_0_id, rank_1_id + + def _force_migrate(self, path, rank=1): + """ + :param to_id: MDS id to move it to + :param path: Filesystem path (string) to move + :return: None + """ + self.mount_a.run_shell(["setfattr", "-n", "ceph.dir.pin", "-v", str(rank), path]) + rpath = "/"+path + self._wait_subtrees([(rpath, rank)], rank=rank, path=rpath) + + def _is_stopped(self, rank): + mds_map = self.fs.get_mds_map() + return rank not in [i['rank'] for i in mds_map['info'].values()] + + def test_purge_on_shutdown(self): + """ + That when an MDS rank is shut down, its purge queue is + drained in the process. + """ + rank_0_id, rank_1_id = self._setup_two_ranks() + + self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0") + self.mds_cluster.mds_fail_restart(rank_1_id) + self.fs.wait_for_daemons() + + file_count = 5 + + self.mount_a.create_n_files("delete_me/file", file_count) + + self._force_migrate("delete_me") + + self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")]) + self.mount_a.umount_wait() + + # See all the strays go into purge queue + self._wait_for_counter("mds_cache", "strays_created", file_count, mds_id=rank_1_id) + self._wait_for_counter("mds_cache", "strays_enqueued", file_count, mds_id=rank_1_id) + self.assertEqual(self.get_stat("mds_cache", "num_strays", mds_id=rank_1_id), 0) + + # See nothing get purged from the purge queue (yet) + time.sleep(10) + self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0) + + # Shut down rank 1 + self.fs.set_max_mds(1) + + # It shouldn't proceed past stopping because its still not allowed + # to purge + time.sleep(10) + self.assertEqual(self.get_stat("purge_queue", "pq_executed", mds_id=rank_1_id), 0) + self.assertFalse(self._is_stopped(1)) + + # Permit the daemon to start purging again + self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(rank_1_id), + 'injectargs', + "--mds_max_purge_files 100") + + # It should now proceed through shutdown + self.fs.wait_for_daemons(timeout=120) + + # ...and in the process purge all that data + self.await_data_pool_empty() + + def test_migration_on_shutdown(self): + """ + That when an MDS rank is shut down, any non-purgeable strays + get migrated to another rank. + """ + + rank_0_id, rank_1_id = self._setup_two_ranks() + + # Create a non-purgeable stray in a ~mds1 stray directory + # by doing a hard link and deleting the original file + self.mount_a.run_shell_payload(""" +mkdir dir_1 dir_2 +touch dir_1/original +ln dir_1/original dir_2/linkto +""") + + self._force_migrate("dir_1") + self._force_migrate("dir_2", rank=0) + + # empty mds cache. otherwise mds reintegrates stray when unlink finishes + self.mount_a.umount_wait() + self.fs.mds_asok(['flush', 'journal'], rank_1_id) + self.fs.mds_asok(['cache', 'drop'], rank_1_id) + + self.mount_a.mount_wait() + self.mount_a.run_shell(["rm", "-f", "dir_1/original"]) + self.mount_a.umount_wait() + + self._wait_for_counter("mds_cache", "strays_created", 1, + mds_id=rank_1_id) + + # Shut down rank 1 + self.fs.set_max_mds(1) + self.fs.wait_for_daemons(timeout=120) + + # See that the stray counter on rank 0 has incremented + self.assertEqual(self.get_mdc_stat("strays_created", rank_0_id), 1) + + def test_migrate_unlinked_dir(self): + """ + Reproduce https://tracker.ceph.com/issues/53597 + """ + rank_0_id, rank_1_id = self._setup_two_ranks() + + self.mount_a.run_shell_payload(""" +mkdir pin +touch pin/placeholder +""") + + self._force_migrate("pin") + + # Hold the dir open so it cannot be purged + p = self.mount_a.open_dir_background("pin/to-be-unlinked") + + # Unlink the dentry + self.mount_a.run_shell(["rmdir", "pin/to-be-unlinked"]) + + # Wait to see the stray count increment + self.wait_until_equal( + lambda: self.get_mdc_stat("num_strays", mds_id=rank_1_id), + expect_val=1, timeout=60, reject_fn=lambda x: x > 1) + # but not purged + self.assertEqual(self.get_mdc_stat("strays_created", mds_id=rank_1_id), 1) + self.assertEqual(self.get_mdc_stat("strays_enqueued", mds_id=rank_1_id), 0) + + # Test loading unlinked dir into cache + self.fs.mds_asok(['flush', 'journal'], rank_1_id) + self.fs.mds_asok(['cache', 'drop'], rank_1_id) + + # Shut down rank 1 + self.fs.set_max_mds(1) + self.fs.wait_for_daemons(timeout=120) + # Now the stray should be migrated to rank 0 + # self.assertEqual(self.get_mdc_stat("strays_created", mds_id=rank_0_id), 1) + # https://github.com/ceph/ceph/pull/44335#issuecomment-1125940158 + + self.mount_a.kill_background(p) + + def assert_backtrace(self, ino, expected_path): + """ + Assert that the backtrace in the data pool for an inode matches + an expected /foo/bar path. + """ + expected_elements = expected_path.strip("/").split("/") + bt = self.fs.read_backtrace(ino) + actual_elements = list(reversed([dn['dname'] for dn in bt['ancestors']])) + self.assertListEqual(expected_elements, actual_elements) + + def get_backtrace_path(self, ino): + bt = self.fs.read_backtrace(ino) + elements = reversed([dn['dname'] for dn in bt['ancestors']]) + return "/".join(elements) + + def assert_purge_idle(self): + """ + Assert that the MDS perf counters indicate no strays exist and + no ongoing purge activity. Sanity check for when PurgeQueue should + be idle. + """ + mdc_stats = self.fs.mds_asok(['perf', 'dump', "mds_cache"])['mds_cache'] + pq_stats = self.fs.mds_asok(['perf', 'dump', "purge_queue"])['purge_queue'] + self.assertEqual(mdc_stats["num_strays"], 0) + self.assertEqual(mdc_stats["num_strays_delayed"], 0) + self.assertEqual(pq_stats["pq_executing"], 0) + self.assertEqual(pq_stats["pq_executing_ops"], 0) + + def test_mv_cleanup(self): + """ + That when doing a rename from A to B, and B has no hardlinks, + then we make a stray for B and purge him. + """ + # Create file_a and file_b, write some to both + size_mb = 8 + self.mount_a.write_n_mb("file_a", size_mb) + file_a_ino = self.mount_a.path_to_ino("file_a") + self.mount_a.write_n_mb("file_b", size_mb) + file_b_ino = self.mount_a.path_to_ino("file_b") + + self.fs.mds_asok(['flush', 'journal']) + self.assert_backtrace(file_a_ino, "file_a") + self.assert_backtrace(file_b_ino, "file_b") + + # mv file_a file_b + self.mount_a.run_shell(['mv', 'file_a', 'file_b']) + + # See that stray counter increments + self.assertEqual(self.get_mdc_stat("strays_created"), 1) + # Wait for purge counter to increment + self._wait_for_counter("mds_cache", "strays_enqueued", 1) + self._wait_for_counter("purge_queue", "pq_executed", 1) + + self.assert_purge_idle() + + # file_b should have been purged + self.assertTrue(self.fs.data_objects_absent(file_b_ino, size_mb * 1024 * 1024)) + + # Backtrace should have updated from file_a to file_b + self.fs.mds_asok(['flush', 'journal']) + self.assert_backtrace(file_a_ino, "file_b") + + # file_a's data should still exist + self.assertTrue(self.fs.data_objects_present(file_a_ino, size_mb * 1024 * 1024)) + + def _pool_df(self, pool_name): + """ + Return a dict like + { + "kb_used": 0, + "bytes_used": 0, + "max_avail": 19630292406, + "objects": 0 + } + + :param pool_name: Which pool (must exist) + """ + out = self.fs.mon_manager.raw_cluster_cmd("df", "--format=json-pretty") + for p in json.loads(out)['pools']: + if p['name'] == pool_name: + return p['stats'] + + raise RuntimeError("Pool '{0}' not found".format(pool_name)) + + def await_data_pool_empty(self): + self.wait_until_true( + lambda: self._pool_df( + self.fs.get_data_pool_name() + )['objects'] == 0, + timeout=60) + + def test_snapshot_remove(self): + """ + That removal of a snapshot that references a now-unlinked file results + in purging on the stray for the file. + """ + # Enable snapshots + self.fs.set_allow_new_snaps(True) + + # Create a dir with a file in it + size_mb = 8 + self.mount_a.run_shell(["mkdir", "snapdir"]) + self.mount_a.run_shell(["mkdir", "snapdir/subdir"]) + self.mount_a.write_test_pattern("snapdir/subdir/file_a", size_mb * 1024 * 1024) + file_a_ino = self.mount_a.path_to_ino("snapdir/subdir/file_a") + + # Snapshot the dir + self.mount_a.run_shell(["mkdir", "snapdir/.snap/snap1"]) + + # Cause the head revision to deviate from the snapshot + self.mount_a.write_n_mb("snapdir/subdir/file_a", size_mb) + + # Flush the journal so that backtraces, dirfrag objects will actually be written + self.fs.mds_asok(["flush", "journal"]) + + # Unlink the file + self.mount_a.run_shell(["rm", "-f", "snapdir/subdir/file_a"]) + self.mount_a.run_shell(["rmdir", "snapdir/subdir"]) + + # Unmount the client because when I come back to check the data is still + # in the file I don't want to just see what's in the page cache. + self.mount_a.umount_wait() + + self.assertEqual(self.get_mdc_stat("strays_created"), 2) + + # FIXME: at this stage we see a purge and the stray count drops to + # zero, but there's actually still a stray, so at the very + # least the StrayManager stats code is slightly off + + self.mount_a.mount_wait() + + # See that the data from the snapshotted revision of the file is still present + # and correct + self.mount_a.validate_test_pattern("snapdir/.snap/snap1/subdir/file_a", size_mb * 1024 * 1024) + + # Remove the snapshot + self.mount_a.run_shell(["rmdir", "snapdir/.snap/snap1"]) + + # Purging file_a doesn't happen until after we've flushed the journal, because + # it is referenced by the snapshotted subdir, and the snapshot isn't really + # gone until the journal references to it are gone + self.fs.mds_asok(["flush", "journal"]) + + # Wait for purging to complete, which requires the OSDMap to propagate to the OSDs. + # See also: http://tracker.ceph.com/issues/20072 + self.wait_until_true( + lambda: self.fs.data_objects_absent(file_a_ino, size_mb * 1024 * 1024), + timeout=60 + ) + + # See that a purge happens now + self._wait_for_counter("mds_cache", "strays_enqueued", 2) + self._wait_for_counter("purge_queue", "pq_executed", 2) + + self.await_data_pool_empty() + + def test_fancy_layout(self): + """ + purge stray file with fancy layout + """ + + file_name = "fancy_layout_file" + self.mount_a.run_shell(["touch", file_name]) + + file_layout = "stripe_unit=1048576 stripe_count=4 object_size=8388608" + self.mount_a.setfattr(file_name, "ceph.file.layout", file_layout) + + # 35MB requires 7 objects + size_mb = 35 + self.mount_a.write_n_mb(file_name, size_mb) + + self.mount_a.run_shell(["rm", "-f", file_name]) + self.fs.mds_asok(["flush", "journal"]) + + # can't use self.fs.data_objects_absent here, it does not support fancy layout + self.await_data_pool_empty() + + def test_dirfrag_limit(self): + """ + That the directory fragment size cannot exceed mds_bal_fragment_size_max (using a limit of 50 in all configurations). + """ + + LOW_LIMIT = 50 + self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT)) + time.sleep(10) # for config to reach MDS; async create is fast!! + + try: + self.mount_a.create_n_files("subdir/file", LOW_LIMIT+1, finaldirsync=True) + except CommandFailedError: + pass # ENOSPC + else: + self.fail("fragment size exceeded") + + + def test_dirfrag_limit_fragmented(self): + """ + That fragmentation (forced) will allow more entries to be created. + """ + + LOW_LIMIT = 50 + self.config_set('mds', 'mds_bal_fragment_size_max', str(LOW_LIMIT)) + self.config_set('mds', 'mds_bal_merge_size', 1) # disable merging + time.sleep(10) # for config to reach MDS; async create is fast!! + + # Test that we can go beyond the limit if we fragment the directory + self.mount_a.create_n_files("subdir/file", LOW_LIMIT, finaldirsync=True) + self.mount_a.umount_wait() # release client caps + + # Ensure that subdir is fragmented + self.fs.rank_asok(["dirfrag", "split", "/subdir", "0/0", "1"]) + self.fs.rank_asok(["flush", "journal"]) + + # Create 50% more files than the current fragment limit + self.mount_a.mount_wait() + self.mount_a.create_n_files("subdir/file", (LOW_LIMIT*3)//2, finaldirsync=True) + + def test_dirfrag_limit_strays(self): + """ + That unlinking fails when the stray directory fragment becomes too + large and that unlinking may continue once those strays are purged. + """ + + LOW_LIMIT = 10 + # N.B. this test is inherently racy because stray removal may be faster + # than slow(er) file creation. + self.config_set('mds', 'mds_bal_fragment_size_max', LOW_LIMIT) + time.sleep(10) # for config to reach MDS; async create is fast!! + + # Now test the stray directory size is limited and recovers + strays_before = self.get_mdc_stat("strays_created") + try: + # 10 stray directories: expect collisions + self.mount_a.create_n_files("subdir/file", LOW_LIMIT*10, finaldirsync=True, unlink=True) + except CommandFailedError: + pass # ENOSPC + else: + self.fail("fragment size exceeded") + strays_after = self.get_mdc_stat("strays_created") + self.assertGreaterEqual(strays_after-strays_before, LOW_LIMIT) + + self._wait_for_counter("mds_cache", "strays_enqueued", strays_after) + self._wait_for_counter("purge_queue", "pq_executed", strays_after) + + # verify new files can be created and unlinked + self.mount_a.create_n_files("subdir/file", LOW_LIMIT, dirsync=True, unlink=True) + + def test_purge_queue_upgrade(self): + """ + That when starting on a system with no purge queue in the metadata + pool, we silently create one. + :return: + """ + + self.mds_cluster.mds_stop() + self.mds_cluster.mds_fail() + self.fs.radosm(["rm", "500.00000000"]) + self.mds_cluster.mds_restart() + self.fs.wait_for_daemons() + + def test_replicated_delete_speed(self): + """ + That deletions of replicated metadata are not pathologically slow + """ + rank_0_id, rank_1_id = self._setup_two_ranks() + + self.set_conf("mds.{0}".format(rank_1_id), 'mds_max_purge_files', "0") + self.mds_cluster.mds_fail_restart(rank_1_id) + self.fs.wait_for_daemons() + + file_count = 10 + + self.mount_a.create_n_files("delete_me/file", file_count) + + self._force_migrate("delete_me") + + begin = datetime.datetime.now() + self.mount_a.run_shell(["rm", "-rf", Raw("delete_me/*")]) + end = datetime.datetime.now() + + # What we're really checking here is that we are completing client + # operations immediately rather than delaying until the next tick. + tick_period = float(self.fs.get_config("mds_tick_interval", + service_type="mds")) + + duration = (end - begin).total_seconds() + self.assertLess(duration, (file_count * tick_period) * 0.25) diff --git a/qa/tasks/cephfs/test_subvolume.py b/qa/tasks/cephfs/test_subvolume.py new file mode 100644 index 000000000..1ebb137dd --- /dev/null +++ b/qa/tasks/cephfs/test_subvolume.py @@ -0,0 +1,170 @@ +import logging + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from teuthology.exceptions import CommandFailedError + +log = logging.getLogger(__name__) + + +class TestSubvolume(CephFSTestCase): + CLIENTS_REQUIRED = 1 + MDSS_REQUIRED = 1 + + def setUp(self): + super().setUp() + self.setup_test() + + def tearDown(self): + # clean up + self.cleanup_test() + super().tearDown() + + def setup_test(self): + self.mount_a.run_shell(['mkdir', 'group']) + self.mount_a.run_shell(['mkdir', 'group/subvol1']) + self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume', + '-v', '1', 'group/subvol1']) + self.mount_a.run_shell(['mv', 'group/subvol1', 'group/subvol2']) + + def cleanup_test(self): + self.mount_a.run_shell(['rm', '-rf', 'group']) + + def test_subvolume_move_out_file(self): + """ + To verify that file can't be moved out of subvolume + """ + self.mount_a.run_shell(['touch', 'group/subvol2/file1']) + + # file can't be moved out of a subvolume + with self.assertRaises(CommandFailedError): + self.mount_a.run_shell(['rename', 'group/subvol2/file1', + 'group/file1', 'group/subvol2/file1']) + + + def test_subvolume_move_in_file(self): + """ + To verify that file can't be moved into subvolume + """ + # file can't be moved into a subvolume + self.mount_a.run_shell(['touch', 'group/file2']) + with self.assertRaises(CommandFailedError): + self.mount_a.run_shell(['rename', 'group/file2', + 'group/subvol2/file2', 'group/file2']) + + def test_subvolume_hardlink_to_outside(self): + """ + To verify that file can't be hardlinked to outside subvolume + """ + self.mount_a.run_shell(['touch', 'group/subvol2/file1']) + + # create hard link within subvolume + self.mount_a.run_shell(['ln', + 'group/subvol2/file1', 'group/subvol2/file1_']) + + # hard link can't be created out of subvolume + with self.assertRaises(CommandFailedError): + self.mount_a.run_shell(['ln', + 'group/subvol2/file1', 'group/file1_']) + + def test_subvolume_hardlink_to_inside(self): + """ + To verify that file can't be hardlinked to inside subvolume + """ + self.mount_a.run_shell(['touch', 'group/subvol2/file1']) + + # create hard link within subvolume + self.mount_a.run_shell(['ln', + 'group/subvol2/file1', 'group/subvol2/file1_']) + + # hard link can't be created inside subvolume + self.mount_a.run_shell(['touch', 'group/file2']) + with self.assertRaises(CommandFailedError): + self.mount_a.run_shell(['ln', + 'group/file2', 'group/subvol2/file2_']) + + def test_subvolume_snapshot_inside_subvolume_subdir(self): + """ + To verify that snapshot can't be taken for a subvolume subdir + """ + self.mount_a.run_shell(['touch', 'group/subvol2/file1']) + + # create snapshot at subvolume root + self.mount_a.run_shell(['mkdir', 'group/subvol2/.snap/s1']) + + # can't create snapshot in a descendent dir of subvolume + self.mount_a.run_shell(['mkdir', 'group/subvol2/dir']) + with self.assertRaises(CommandFailedError): + self.mount_a.run_shell(['mkdir', 'group/subvol2/dir/.snap/s2']) + + # clean up + self.mount_a.run_shell(['rmdir', 'group/subvol2/.snap/s1']) + + def test_subvolume_file_move_across_subvolumes(self): + """ + To verify that file can't be moved across subvolumes + """ + self.mount_a.run_shell(['touch', 'group/subvol2/file1']) + + # create another subvol + self.mount_a.run_shell(['mkdir', 'group/subvol3']) + self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume', + '-v', '1', 'group/subvol3']) + + # can't move file across subvolumes + with self.assertRaises(CommandFailedError): + self.mount_a.run_shell(['rename', 'group/subvol2/file1', + 'group/subvol3/file1', + 'group/subvol2/file1']) + + def test_subvolume_hardlink_across_subvolumes(self): + """ + To verify that hardlink can't be created across subvolumes + """ + self.mount_a.run_shell(['touch', 'group/subvol2/file1']) + + # create another subvol + self.mount_a.run_shell(['mkdir', 'group/subvol3']) + self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume', + '-v', '1', 'group/subvol3']) + + # can't create hard link across subvolumes + with self.assertRaises(CommandFailedError): + self.mount_a.run_shell(['ln', 'group/subvol2/file1', + 'group/subvol3/file1']) + + def test_subvolume_create_subvolume_inside_subvolume(self): + """ + To verify that subvolume can't be created inside a subvolume + """ + # can't create subvolume inside a subvolume + self.mount_a.run_shell(['mkdir', 'group/subvol2/dir']) + with self.assertRaises(CommandFailedError): + self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume', + '-v', '1', 'group/subvol2/dir']) + + def test_subvolume_create_snapshot_inside_new_subvolume_parent(self): + """ + To verify that subvolume can't be created inside a new subvolume parent + """ + self.mount_a.run_shell(['touch', 'group/subvol2/file1']) + + # clear subvolume flag + self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume', + '-v', '0', 'group/subvol2']) + + # create a snap + self.mount_a.run_shell(['mkdir', 'group/subvol2/dir']) + self.mount_a.run_shell(['mkdir', 'group/subvol2/dir/.snap/s2']) + + # override subdir subvolume with parent subvolume + self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume', + '-v', '1', 'group/subvol2/dir']) + self.mount_a.run_shell(['setfattr', '-n', 'ceph.dir.subvolume', + '-v', '1', 'group/subvol2']) + + # can't create a snap in a subdir of a subvol parent + with self.assertRaises(CommandFailedError): + self.mount_a.run_shell(['mkdir', 'group/subvol2/dir/.snap/s3']) + + # clean up + self.mount_a.run_shell(['rmdir', 'group/subvol2/dir/.snap/s2']) diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py new file mode 100644 index 000000000..2ecfeb327 --- /dev/null +++ b/qa/tasks/cephfs/test_volumes.py @@ -0,0 +1,7946 @@ +import os +import json +import time +import errno +import random +import logging +import collections +import uuid +import unittest +from hashlib import md5 +from textwrap import dedent +from io import StringIO + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.cephfs.fuse_mount import FuseMount +from teuthology.exceptions import CommandFailedError + +log = logging.getLogger(__name__) + +class TestVolumesHelper(CephFSTestCase): + """Helper class for testing FS volume, subvolume group and subvolume operations.""" + TEST_VOLUME_PREFIX = "volume" + TEST_SUBVOLUME_PREFIX="subvolume" + TEST_GROUP_PREFIX="group" + TEST_SNAPSHOT_PREFIX="snapshot" + TEST_CLONE_PREFIX="clone" + TEST_FILE_NAME_PREFIX="subvolume_file" + + # for filling subvolume with data + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 2 + + # io defaults + DEFAULT_FILE_SIZE = 1 # MB + DEFAULT_NUMBER_OF_FILES = 1024 + + def _fs_cmd(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args) + + def _raw_cmd(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args) + + def __check_clone_state(self, state, clone, clone_group=None, timo=120): + check = 0 + args = ["clone", "status", self.volname, clone] + if clone_group: + args.append(clone_group) + args = tuple(args) + while check < timo: + result = json.loads(self._fs_cmd(*args)) + if result["status"]["state"] == state: + break + check += 1 + time.sleep(1) + self.assertTrue(check < timo) + + def _get_clone_status(self, clone, clone_group=None): + args = ["clone", "status", self.volname, clone] + if clone_group: + args.append(clone_group) + args = tuple(args) + result = json.loads(self._fs_cmd(*args)) + return result + + def _wait_for_clone_to_complete(self, clone, clone_group=None, timo=120): + self.__check_clone_state("complete", clone, clone_group, timo) + + def _wait_for_clone_to_fail(self, clone, clone_group=None, timo=120): + self.__check_clone_state("failed", clone, clone_group, timo) + + def _wait_for_clone_to_be_in_progress(self, clone, clone_group=None, timo=120): + self.__check_clone_state("in-progress", clone, clone_group, timo) + + def _check_clone_canceled(self, clone, clone_group=None): + self.__check_clone_state("canceled", clone, clone_group, timo=1) + + def _get_subvolume_snapshot_path(self, subvolume, snapshot, source_group, subvol_path, source_version): + if source_version == 2: + # v2 + if subvol_path is not None: + (base_path, uuid_str) = os.path.split(subvol_path) + else: + (base_path, uuid_str) = os.path.split(self._get_subvolume_path(self.volname, subvolume, group_name=source_group)) + return os.path.join(base_path, ".snap", snapshot, uuid_str) + + # v1 + base_path = self._get_subvolume_path(self.volname, subvolume, group_name=source_group) + return os.path.join(base_path, ".snap", snapshot) + + def _verify_clone_attrs(self, source_path, clone_path): + path1 = source_path + path2 = clone_path + + p = self.mount_a.run_shell(["find", path1]) + paths = p.stdout.getvalue().strip().split() + + # for each entry in source and clone (sink) verify certain inode attributes: + # inode type, mode, ownership, [am]time. + for source_path in paths: + sink_entry = source_path[len(path1)+1:] + sink_path = os.path.join(path2, sink_entry) + + # mode+type + sval = int(self.mount_a.run_shell(['stat', '-c' '%f', source_path]).stdout.getvalue().strip(), 16) + cval = int(self.mount_a.run_shell(['stat', '-c' '%f', sink_path]).stdout.getvalue().strip(), 16) + self.assertEqual(sval, cval) + + # ownership + sval = int(self.mount_a.run_shell(['stat', '-c' '%u', source_path]).stdout.getvalue().strip()) + cval = int(self.mount_a.run_shell(['stat', '-c' '%u', sink_path]).stdout.getvalue().strip()) + self.assertEqual(sval, cval) + + sval = int(self.mount_a.run_shell(['stat', '-c' '%g', source_path]).stdout.getvalue().strip()) + cval = int(self.mount_a.run_shell(['stat', '-c' '%g', sink_path]).stdout.getvalue().strip()) + self.assertEqual(sval, cval) + + # inode timestamps + # do not check access as kclient will generally not update this like ceph-fuse will. + sval = int(self.mount_a.run_shell(['stat', '-c' '%Y', source_path]).stdout.getvalue().strip()) + cval = int(self.mount_a.run_shell(['stat', '-c' '%Y', sink_path]).stdout.getvalue().strip()) + self.assertEqual(sval, cval) + + def _verify_clone_root(self, source_path, clone_path, clone, clone_group, clone_pool): + # verifies following clone root attrs quota, data_pool and pool_namespace + # remaining attributes of clone root are validated in _verify_clone_attrs + + clone_info = json.loads(self._get_subvolume_info(self.volname, clone, clone_group)) + + # verify quota is inherited from source snapshot + src_quota = self.mount_a.getfattr(source_path, "ceph.quota.max_bytes") + # FIXME: kclient fails to get this quota value: https://tracker.ceph.com/issues/48075 + if isinstance(self.mount_a, FuseMount): + self.assertEqual(clone_info["bytes_quota"], "infinite" if src_quota is None else int(src_quota)) + + if clone_pool: + # verify pool is set as per request + self.assertEqual(clone_info["data_pool"], clone_pool) + else: + # verify pool and pool namespace are inherited from snapshot + self.assertEqual(clone_info["data_pool"], + self.mount_a.getfattr(source_path, "ceph.dir.layout.pool")) + self.assertEqual(clone_info["pool_namespace"], + self.mount_a.getfattr(source_path, "ceph.dir.layout.pool_namespace")) + + def _verify_clone(self, subvolume, snapshot, clone, + source_group=None, clone_group=None, clone_pool=None, + subvol_path=None, source_version=2, timo=120): + # pass in subvol_path (subvolume path when snapshot was taken) when subvolume is removed + # but snapshots are retained for clone verification + path1 = self._get_subvolume_snapshot_path(subvolume, snapshot, source_group, subvol_path, source_version) + path2 = self._get_subvolume_path(self.volname, clone, group_name=clone_group) + + check = 0 + # TODO: currently snapshot rentries are not stable if snapshot source entries + # are removed, https://tracker.ceph.com/issues/46747 + while check < timo and subvol_path is None: + val1 = int(self.mount_a.getfattr(path1, "ceph.dir.rentries")) + val2 = int(self.mount_a.getfattr(path2, "ceph.dir.rentries")) + if val1 == val2: + break + check += 1 + time.sleep(1) + self.assertTrue(check < timo) + + self._verify_clone_root(path1, path2, clone, clone_group, clone_pool) + self._verify_clone_attrs(path1, path2) + + def _generate_random_volume_name(self, count=1): + n = self.volume_start + volumes = [f"{TestVolumes.TEST_VOLUME_PREFIX}_{i:016}" for i in range(n, n+count)] + self.volume_start += count + return volumes[0] if count == 1 else volumes + + def _generate_random_subvolume_name(self, count=1): + n = self.subvolume_start + subvolumes = [f"{TestVolumes.TEST_SUBVOLUME_PREFIX}_{i:016}" for i in range(n, n+count)] + self.subvolume_start += count + return subvolumes[0] if count == 1 else subvolumes + + def _generate_random_group_name(self, count=1): + n = self.group_start + groups = [f"{TestVolumes.TEST_GROUP_PREFIX}_{i:016}" for i in range(n, n+count)] + self.group_start += count + return groups[0] if count == 1 else groups + + def _generate_random_snapshot_name(self, count=1): + n = self.snapshot_start + snaps = [f"{TestVolumes.TEST_SNAPSHOT_PREFIX}_{i:016}" for i in range(n, n+count)] + self.snapshot_start += count + return snaps[0] if count == 1 else snaps + + def _generate_random_clone_name(self, count=1): + n = self.clone_start + clones = [f"{TestVolumes.TEST_CLONE_PREFIX}_{i:016}" for i in range(n, n+count)] + self.clone_start += count + return clones[0] if count == 1 else clones + + def _enable_multi_fs(self): + self._fs_cmd("flag", "set", "enable_multiple", "true", "--yes-i-really-mean-it") + + def _create_or_reuse_test_volume(self): + result = json.loads(self._fs_cmd("volume", "ls")) + if len(result) == 0: + self.vol_created = True + self.volname = self._generate_random_volume_name() + self._fs_cmd("volume", "create", self.volname) + else: + self.volname = result[0]['name'] + + def _get_volume_info(self, vol_name, human_readable=False): + if human_readable: + args = ["volume", "info", vol_name, human_readable] + else: + args = ["volume", "info", vol_name] + args = tuple(args) + vol_md = self._fs_cmd(*args) + return vol_md + + def _get_subvolume_group_path(self, vol_name, group_name): + args = ("subvolumegroup", "getpath", vol_name, group_name) + path = self._fs_cmd(*args) + # remove the leading '/', and trailing whitespaces + return path[1:].rstrip() + + def _get_subvolume_group_info(self, vol_name, group_name): + args = ["subvolumegroup", "info", vol_name, group_name] + args = tuple(args) + group_md = self._fs_cmd(*args) + return group_md + + def _get_subvolume_path(self, vol_name, subvol_name, group_name=None): + args = ["subvolume", "getpath", vol_name, subvol_name] + if group_name: + args.append(group_name) + args = tuple(args) + path = self._fs_cmd(*args) + # remove the leading '/', and trailing whitespaces + return path[1:].rstrip() + + def _get_subvolume_info(self, vol_name, subvol_name, group_name=None): + args = ["subvolume", "info", vol_name, subvol_name] + if group_name: + args.append(group_name) + args = tuple(args) + subvol_md = self._fs_cmd(*args) + return subvol_md + + def _get_subvolume_snapshot_info(self, vol_name, subvol_name, snapname, group_name=None): + args = ["subvolume", "snapshot", "info", vol_name, subvol_name, snapname] + if group_name: + args.append(group_name) + args = tuple(args) + snap_md = self._fs_cmd(*args) + return snap_md + + def _delete_test_volume(self): + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + + def _do_subvolume_pool_and_namespace_update(self, subvolume, pool=None, pool_namespace=None, subvolume_group=None): + subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group) + + if pool is not None: + self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool', pool, sudo=True) + + if pool_namespace is not None: + self.mount_a.setfattr(subvolpath, 'ceph.dir.layout.pool_namespace', pool_namespace, sudo=True) + + def _do_subvolume_attr_update(self, subvolume, uid, gid, mode, subvolume_group=None): + subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group) + + # mode + self.mount_a.run_shell(['sudo', 'chmod', mode, subvolpath], omit_sudo=False) + + # ownership + self.mount_a.run_shell(['sudo', 'chown', uid, subvolpath], omit_sudo=False) + self.mount_a.run_shell(['sudo', 'chgrp', gid, subvolpath], omit_sudo=False) + + def _do_subvolume_io(self, subvolume, subvolume_group=None, create_dir=None, + number_of_files=DEFAULT_NUMBER_OF_FILES, file_size=DEFAULT_FILE_SIZE): + # get subvolume path for IO + args = ["subvolume", "getpath", self.volname, subvolume] + if subvolume_group: + args.append(subvolume_group) + args = tuple(args) + subvolpath = self._fs_cmd(*args) + self.assertNotEqual(subvolpath, None) + subvolpath = subvolpath[1:].rstrip() # remove "/" prefix and any trailing newline + + io_path = subvolpath + if create_dir: + io_path = os.path.join(subvolpath, create_dir) + self.mount_a.run_shell_payload(f"mkdir -p {io_path}") + + log.debug("filling subvolume {0} with {1} files each {2}MB size under directory {3}".format(subvolume, number_of_files, file_size, io_path)) + for i in range(number_of_files): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + self.mount_a.write_n_mb(os.path.join(io_path, filename), file_size) + + def _do_subvolume_io_mixed(self, subvolume, subvolume_group=None): + subvolpath = self._get_subvolume_path(self.volname, subvolume, group_name=subvolume_group) + + reg_file = "regfile.0" + dir_path = os.path.join(subvolpath, "dir.0") + sym_path1 = os.path.join(subvolpath, "sym.0") + # this symlink's ownership would be changed + sym_path2 = os.path.join(dir_path, "sym.0") + + self.mount_a.run_shell(["mkdir", dir_path]) + self.mount_a.run_shell(["ln", "-s", "./{}".format(reg_file), sym_path1]) + self.mount_a.run_shell(["ln", "-s", "./{}".format(reg_file), sym_path2]) + # flip ownership to nobody. assumption: nobody's id is 65534 + self.mount_a.run_shell(["sudo", "chown", "-h", "65534:65534", sym_path2], omit_sudo=False) + + def _wait_for_trash_empty(self, timeout=60): + # XXX: construct the trash dir path (note that there is no mgr + # [sub]volume interface for this). + trashdir = os.path.join("./", "volumes", "_deleting") + self.mount_a.wait_for_dir_empty(trashdir, timeout=timeout) + + def _wait_for_subvol_trash_empty(self, subvol, group="_nogroup", timeout=30): + trashdir = os.path.join("./", "volumes", group, subvol, ".trash") + try: + self.mount_a.wait_for_dir_empty(trashdir, timeout=timeout) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + pass + else: + raise + + def _assert_meta_location_and_version(self, vol_name, subvol_name, subvol_group=None, version=2, legacy=False): + if legacy: + subvol_path = self._get_subvolume_path(vol_name, subvol_name, group_name=subvol_group) + m = md5() + m.update(("/"+subvol_path).encode('utf-8')) + meta_filename = "{0}.meta".format(m.digest().hex()) + metapath = os.path.join(".", "volumes", "_legacy", meta_filename) + else: + group = subvol_group if subvol_group is not None else '_nogroup' + metapath = os.path.join(".", "volumes", group, subvol_name, ".meta") + + out = self.mount_a.run_shell(['sudo', 'cat', metapath], omit_sudo=False) + lines = out.stdout.getvalue().strip().split('\n') + sv_version = -1 + for line in lines: + if line == "version = " + str(version): + sv_version = version + break + self.assertEqual(sv_version, version, "version expected was '{0}' but got '{1}' from meta file at '{2}'".format( + version, sv_version, metapath)) + + def _create_v1_subvolume(self, subvol_name, subvol_group=None, has_snapshot=True, subvol_type='subvolume', state='complete'): + group = subvol_group if subvol_group is not None else '_nogroup' + basepath = os.path.join("volumes", group, subvol_name) + uuid_str = str(uuid.uuid4()) + createpath = os.path.join(basepath, uuid_str) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False) + + # create a v1 snapshot, to prevent auto upgrades + if has_snapshot: + snappath = os.path.join(createpath, ".snap", "fake") + self.mount_a.run_shell(['sudo', 'mkdir', '-p', snappath], omit_sudo=False) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True) + + # create a v1 .meta file + meta_contents = "[GLOBAL]\nversion = 1\ntype = {0}\npath = {1}\nstate = {2}\n".format(subvol_type, "/" + createpath, state) + if state == 'pending': + # add a fake clone source + meta_contents = meta_contents + '[source]\nvolume = fake\nsubvolume = fake\nsnapshot = fake\n' + meta_filepath1 = os.path.join(self.mount_a.mountpoint, basepath, ".meta") + self.mount_a.client_remote.write_file(meta_filepath1, meta_contents, sudo=True) + return createpath + + def _update_fake_trash(self, subvol_name, subvol_group=None, trash_name='fake', create=True): + group = subvol_group if subvol_group is not None else '_nogroup' + trashpath = os.path.join("volumes", group, subvol_name, '.trash', trash_name) + if create: + self.mount_a.run_shell(['sudo', 'mkdir', '-p', trashpath], omit_sudo=False) + else: + self.mount_a.run_shell(['sudo', 'rmdir', trashpath], omit_sudo=False) + + def _configure_guest_auth(self, guest_mount, authid, key): + """ + Set up auth credentials for a guest client. + """ + # Create keyring file for the guest client. + keyring_txt = dedent(""" + [client.{authid}] + key = {key} + + """.format(authid=authid,key=key)) + + guest_mount.client_id = authid + guest_mount.client_remote.write_file(guest_mount.get_keyring_path(), + keyring_txt, sudo=True) + # Add a guest client section to the ceph config file. + self.config_set("client.{0}".format(authid), "debug client", 20) + self.config_set("client.{0}".format(authid), "debug objecter", 20) + self.set_conf("client.{0}".format(authid), + "keyring", guest_mount.get_keyring_path()) + + def _auth_metadata_get(self, filedata): + """ + Return a deserialized JSON object, or None + """ + try: + data = json.loads(filedata) + except json.decoder.JSONDecodeError: + data = None + return data + + def setUp(self): + super(TestVolumesHelper, self).setUp() + self.volname = None + self.vol_created = False + self._enable_multi_fs() + self._create_or_reuse_test_volume() + self.config_set('mon', 'mon_allow_pool_delete', True) + self.volume_start = random.randint(1, (1<<20)) + self.subvolume_start = random.randint(1, (1<<20)) + self.group_start = random.randint(1, (1<<20)) + self.snapshot_start = random.randint(1, (1<<20)) + self.clone_start = random.randint(1, (1<<20)) + + def tearDown(self): + if self.vol_created: + self._delete_test_volume() + super(TestVolumesHelper, self).tearDown() + + +class TestVolumes(TestVolumesHelper): + """Tests for FS volume operations.""" + def test_volume_create(self): + """ + That the volume can be created and then cleans up + """ + volname = self._generate_random_volume_name() + self._fs_cmd("volume", "create", volname) + volumels = json.loads(self._fs_cmd("volume", "ls")) + + if not (volname in ([volume['name'] for volume in volumels])): + raise RuntimeError("Error creating volume '{0}'".format(volname)) + + # check that the pools were created with the correct config + pool_details = json.loads(self._raw_cmd("osd", "pool", "ls", "detail", "--format=json")) + pool_flags = {} + for pool in pool_details: + pool_flags[pool["pool_id"]] = pool["flags_names"].split(",") + + volume_details = json.loads(self._fs_cmd("get", volname, "--format=json")) + for data_pool_id in volume_details['mdsmap']['data_pools']: + self.assertIn("bulk", pool_flags[data_pool_id]) + meta_pool_id = volume_details['mdsmap']['metadata_pool'] + self.assertNotIn("bulk", pool_flags[meta_pool_id]) + + # clean up + self._fs_cmd("volume", "rm", volname, "--yes-i-really-mean-it") + + def test_volume_ls(self): + """ + That the existing and the newly created volumes can be listed and + finally cleans up. + """ + vls = json.loads(self._fs_cmd("volume", "ls")) + volumes = [volume['name'] for volume in vls] + + #create new volumes and add it to the existing list of volumes + volumenames = self._generate_random_volume_name(2) + for volumename in volumenames: + self._fs_cmd("volume", "create", volumename) + volumes.extend(volumenames) + + # list volumes + try: + volumels = json.loads(self._fs_cmd('volume', 'ls')) + if len(volumels) == 0: + raise RuntimeError("Expected the 'fs volume ls' command to list the created volumes.") + else: + volnames = [volume['name'] for volume in volumels] + if collections.Counter(volnames) != collections.Counter(volumes): + raise RuntimeError("Error creating or listing volumes") + finally: + # clean up + for volume in volumenames: + self._fs_cmd("volume", "rm", volume, "--yes-i-really-mean-it") + + def test_volume_rm(self): + """ + That the volume can only be removed when --yes-i-really-mean-it is used + and verify that the deleted volume is not listed anymore. + """ + for m in self.mounts: + m.umount_wait() + try: + self._fs_cmd("volume", "rm", self.volname) + except CommandFailedError as ce: + if ce.exitstatus != errno.EPERM: + raise RuntimeError("expected the 'fs volume rm' command to fail with EPERM, " + "but it failed with {0}".format(ce.exitstatus)) + else: + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + + #check if it's gone + volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty")) + if (self.volname in [volume['name'] for volume in volumes]): + raise RuntimeError("Expected the 'fs volume rm' command to succeed. " + "The volume {0} not removed.".format(self.volname)) + else: + raise RuntimeError("expected the 'fs volume rm' command to fail.") + + def test_volume_rm_arbitrary_pool_removal(self): + """ + That the arbitrary pool added to the volume out of band is removed + successfully on volume removal. + """ + for m in self.mounts: + m.umount_wait() + new_pool = "new_pool" + # add arbitrary data pool + self.fs.add_data_pool(new_pool) + vol_status = json.loads(self._fs_cmd("status", self.volname, "--format=json-pretty")) + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + + #check if fs is gone + volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty")) + volnames = [volume['name'] for volume in volumes] + self.assertNotIn(self.volname, volnames) + + #check if osd pools are gone + pools = json.loads(self._raw_cmd("osd", "pool", "ls", "--format=json-pretty")) + for pool in vol_status["pools"]: + self.assertNotIn(pool["name"], pools) + + def test_volume_rm_when_mon_delete_pool_false(self): + """ + That the volume can only be removed when mon_allowd_pool_delete is set + to true and verify that the pools are removed after volume deletion. + """ + for m in self.mounts: + m.umount_wait() + self.config_set('mon', 'mon_allow_pool_delete', False) + try: + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, + "expected the 'fs volume rm' command to fail with EPERM, " + "but it failed with {0}".format(ce.exitstatus)) + vol_status = json.loads(self._fs_cmd("status", self.volname, "--format=json-pretty")) + self.config_set('mon', 'mon_allow_pool_delete', True) + self._fs_cmd("volume", "rm", self.volname, "--yes-i-really-mean-it") + + #check if fs is gone + volumes = json.loads(self._fs_cmd("volume", "ls", "--format=json-pretty")) + volnames = [volume['name'] for volume in volumes] + self.assertNotIn(self.volname, volnames, + "volume {0} exists after removal".format(self.volname)) + #check if pools are gone + pools = json.loads(self._raw_cmd("osd", "pool", "ls", "--format=json-pretty")) + for pool in vol_status["pools"]: + self.assertNotIn(pool["name"], pools, + "pool {0} exists after volume removal".format(pool["name"])) + + def test_volume_rename(self): + """ + That volume, its file system and pools, can be renamed. + """ + for m in self.mounts: + m.umount_wait() + oldvolname = self.volname + newvolname = self._generate_random_volume_name() + new_data_pool, new_metadata_pool = f"cephfs.{newvolname}.data", f"cephfs.{newvolname}.meta" + self._fs_cmd("volume", "rename", oldvolname, newvolname, + "--yes-i-really-mean-it") + volumels = json.loads(self._fs_cmd('volume', 'ls')) + volnames = [volume['name'] for volume in volumels] + # volume name changed + self.assertIn(newvolname, volnames) + self.assertNotIn(oldvolname, volnames) + # pool names changed + self.fs.get_pool_names(refresh=True) + self.assertEqual(new_metadata_pool, self.fs.get_metadata_pool_name()) + self.assertEqual(new_data_pool, self.fs.get_data_pool_name()) + + def test_volume_rename_idempotency(self): + """ + That volume rename is idempotent. + """ + for m in self.mounts: + m.umount_wait() + oldvolname = self.volname + newvolname = self._generate_random_volume_name() + new_data_pool, new_metadata_pool = f"cephfs.{newvolname}.data", f"cephfs.{newvolname}.meta" + self._fs_cmd("volume", "rename", oldvolname, newvolname, + "--yes-i-really-mean-it") + self._fs_cmd("volume", "rename", oldvolname, newvolname, + "--yes-i-really-mean-it") + volumels = json.loads(self._fs_cmd('volume', 'ls')) + volnames = [volume['name'] for volume in volumels] + self.assertIn(newvolname, volnames) + self.assertNotIn(oldvolname, volnames) + self.fs.get_pool_names(refresh=True) + self.assertEqual(new_metadata_pool, self.fs.get_metadata_pool_name()) + self.assertEqual(new_data_pool, self.fs.get_data_pool_name()) + + def test_volume_rename_fails_without_confirmation_flag(self): + """ + That renaming volume fails without --yes-i-really-mean-it flag. + """ + newvolname = self._generate_random_volume_name() + try: + self._fs_cmd("volume", "rename", self.volname, newvolname) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, + "invalid error code on renaming a FS volume without the " + "'--yes-i-really-mean-it' flag") + else: + self.fail("expected renaming of FS volume to fail without the " + "'--yes-i-really-mean-it' flag") + + def test_volume_rename_for_more_than_one_data_pool(self): + """ + That renaming a volume with more than one data pool does not change + the name of the data pools. + """ + for m in self.mounts: + m.umount_wait() + self.fs.add_data_pool('another-data-pool') + oldvolname = self.volname + newvolname = self._generate_random_volume_name() + self.fs.get_pool_names(refresh=True) + orig_data_pool_names = list(self.fs.data_pools.values()) + new_metadata_pool = f"cephfs.{newvolname}.meta" + self._fs_cmd("volume", "rename", self.volname, newvolname, + "--yes-i-really-mean-it") + volumels = json.loads(self._fs_cmd('volume', 'ls')) + volnames = [volume['name'] for volume in volumels] + # volume name changed + self.assertIn(newvolname, volnames) + self.assertNotIn(oldvolname, volnames) + self.fs.get_pool_names(refresh=True) + # metadata pool name changed + self.assertEqual(new_metadata_pool, self.fs.get_metadata_pool_name()) + # data pool names unchanged + self.assertCountEqual(orig_data_pool_names, list(self.fs.data_pools.values())) + + def test_volume_info(self): + """ + Tests the 'fs volume info' command + """ + vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"] + group = self._generate_random_group_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname)) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + self.assertEqual(vol_info["used_size"], 0, + "Size should be zero when volumes directory is empty") + + def test_volume_info_pending_subvol_deletions(self): + """ + Tests the pending_subvolume_deletions in 'fs volume info' command + """ + subvolname = self._generate_random_subvolume_name() + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--mode=777") + # create 3K zero byte files + self._do_subvolume_io(subvolname, number_of_files=3000, file_size=0) + # Delete the subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname)) + self.assertNotEqual(vol_info['pending_subvolume_deletions'], 0, + "pending_subvolume_deletions should be 1") + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_volume_info_without_subvolumegroup(self): + """ + Tests the 'fs volume info' command without subvolume group + """ + vol_fields = ["pools", "mon_addrs"] + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname)) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + self.assertNotIn("used_size", vol_info, + "'used_size' should not be present in absence of subvolumegroup") + self.assertNotIn("pending_subvolume_deletions", vol_info, + "'pending_subvolume_deletions' should not be present in absence" + " of subvolumegroup") + + def test_volume_info_with_human_readable_flag(self): + """ + Tests the 'fs volume info --human_readable' command + """ + vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"] + group = self._generate_random_group_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable")) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + units = [' ', 'k', 'M', 'G', 'T', 'P', 'E'] + assert vol_info["used_size"][-1] in units, "unit suffix in used_size is absent" + assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent" + assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent" + assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent" + assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent" + self.assertEqual(int(vol_info["used_size"]), 0, + "Size should be zero when volumes directory is empty") + + def test_volume_info_with_human_readable_flag_without_subvolumegroup(self): + """ + Tests the 'fs volume info --human_readable' command without subvolume group + """ + vol_fields = ["pools", "mon_addrs"] + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable")) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + units = [' ', 'k', 'M', 'G', 'T', 'P', 'E'] + assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent" + assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent" + assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent" + assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent" + self.assertNotIn("used_size", vol_info, + "'used_size' should not be present in absence of subvolumegroup") + self.assertNotIn("pending_subvolume_deletions", vol_info, + "'pending_subvolume_deletions' should not be present in absence" + " of subvolumegroup") + + +class TestSubvolumeGroups(TestVolumesHelper): + """Tests for FS subvolume group operations.""" + def test_default_uid_gid_subvolume_group(self): + group = self._generate_random_group_name() + expected_uid = 0 + expected_gid = 0 + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + group_path = self._get_subvolume_group_path(self.volname, group) + + # check group's uid and gid + stat = self.mount_a.stat(group_path) + self.assertEqual(stat['st_uid'], expected_uid) + self.assertEqual(stat['st_gid'], expected_gid) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_nonexistent_subvolume_group_create(self): + subvolume = self._generate_random_subvolume_name() + group = "non_existent_group" + + # try, creating subvolume in a nonexistent group + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolume create' command to fail") + + def test_nonexistent_subvolume_group_rm(self): + group = "non_existent_group" + + # try, remove subvolume group + try: + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolumegroup rm' command to fail") + + def test_subvolume_group_create_with_auto_cleanup_on_fail(self): + group = self._generate_random_group_name() + data_pool = "invalid_pool" + # create group with invalid data pool layout + with self.assertRaises(CommandFailedError): + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool) + + # check whether group path is cleaned up + try: + self._fs_cmd("subvolumegroup", "getpath", self.volname, group) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolumegroup getpath' command to fail") + + def test_subvolume_group_create_with_desired_data_pool_layout(self): + group1, group2 = self._generate_random_group_name(2) + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group1) + group1_path = self._get_subvolume_group_path(self.volname, group1) + + default_pool = self.mount_a.getfattr(group1_path, "ceph.dir.layout.pool") + new_pool = "new_pool" + self.assertNotEqual(default_pool, new_pool) + + # add data pool + newid = self.fs.add_data_pool(new_pool) + + # create group specifying the new data pool as its pool layout + self._fs_cmd("subvolumegroup", "create", self.volname, group2, + "--pool_layout", new_pool) + group2_path = self._get_subvolume_group_path(self.volname, group2) + + desired_pool = self.mount_a.getfattr(group2_path, "ceph.dir.layout.pool") + try: + self.assertEqual(desired_pool, new_pool) + except AssertionError: + self.assertEqual(int(desired_pool), newid) # old kernel returns id + + self._fs_cmd("subvolumegroup", "rm", self.volname, group1) + self._fs_cmd("subvolumegroup", "rm", self.volname, group2) + + def test_subvolume_group_create_with_desired_mode(self): + group1, group2 = self._generate_random_group_name(2) + # default mode + expected_mode1 = "755" + # desired mode + expected_mode2 = "777" + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group2, f"--mode={expected_mode2}") + self._fs_cmd("subvolumegroup", "create", self.volname, group1) + + group1_path = self._get_subvolume_group_path(self.volname, group1) + group2_path = self._get_subvolume_group_path(self.volname, group2) + volumes_path = os.path.dirname(group1_path) + + # check group's mode + actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', group1_path]).stdout.getvalue().strip() + actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', group2_path]).stdout.getvalue().strip() + actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', volumes_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode1, expected_mode1) + self.assertEqual(actual_mode2, expected_mode2) + self.assertEqual(actual_mode3, expected_mode1) + + self._fs_cmd("subvolumegroup", "rm", self.volname, group1) + self._fs_cmd("subvolumegroup", "rm", self.volname, group2) + + def test_subvolume_group_create_with_desired_uid_gid(self): + """ + That the subvolume group can be created with the desired uid and gid and its uid and gid matches the + expected values. + """ + uid = 1000 + gid = 1000 + + # create subvolume group + subvolgroupname = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, subvolgroupname, "--uid", str(uid), "--gid", str(gid)) + + # make sure it exists + subvolgrouppath = self._get_subvolume_group_path(self.volname, subvolgroupname) + self.assertNotEqual(subvolgrouppath, None) + + # verify the uid and gid + suid = int(self.mount_a.run_shell(['stat', '-c' '%u', subvolgrouppath]).stdout.getvalue().strip()) + sgid = int(self.mount_a.run_shell(['stat', '-c' '%g', subvolgrouppath]).stdout.getvalue().strip()) + self.assertEqual(uid, suid) + self.assertEqual(gid, sgid) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, subvolgroupname) + + def test_subvolume_group_create_with_invalid_data_pool_layout(self): + group = self._generate_random_group_name() + data_pool = "invalid_pool" + # create group with invalid data pool layout + try: + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", data_pool) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise + else: + raise RuntimeError("expected the 'fs subvolumegroup create' command to fail") + + def test_subvolume_group_create_with_size(self): + # create group with size -- should set quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") + + # get group metadata + group_info = json.loads(self._get_subvolume_group_info(self.volname, group)) + self.assertEqual(group_info["bytes_quota"], 1000000000) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_info(self): + # tests the 'fs subvolumegroup info' command + + group_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime", + "data_pool", "gid", "mode", "mon_addrs", "mtime", "uid"] + + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # get group metadata + group_info = json.loads(self._get_subvolume_group_info(self.volname, group)) + for md in group_md: + self.assertIn(md, group_info, "'{0}' key not present in metadata of group".format(md)) + + self.assertEqual(group_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set") + self.assertEqual(group_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set") + self.assertEqual(group_info["uid"], 0) + self.assertEqual(group_info["gid"], 0) + + nsize = self.DEFAULT_FILE_SIZE*1024*1024 + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize)) + + # get group metadata after quota set + group_info = json.loads(self._get_subvolume_group_info(self.volname, group)) + for md in group_md: + self.assertIn(md, group_info, "'{0}' key not present in metadata of subvolume".format(md)) + + self.assertNotEqual(group_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is set") + self.assertEqual(group_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize)) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_create_idempotence(self): + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # try creating w/ same subvolume group name -- should be idempotent + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_create_idempotence_mode(self): + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # try creating w/ same subvolume group name with mode -- should set mode + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--mode=766") + + group_path = self._get_subvolume_group_path(self.volname, group) + + # check subvolumegroup's mode + mode = self.mount_a.run_shell(['stat', '-c' '%a', group_path]).stdout.getvalue().strip() + self.assertEqual(mode, "766") + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_create_idempotence_uid_gid(self): + desired_uid = 1000 + desired_gid = 1000 + + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # try creating w/ same subvolume group name with uid/gid -- should set uid/gid + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--uid", str(desired_uid), "--gid", str(desired_gid)) + + group_path = self._get_subvolume_group_path(self.volname, group) + + # verify the uid and gid + actual_uid = int(self.mount_a.run_shell(['stat', '-c' '%u', group_path]).stdout.getvalue().strip()) + actual_gid = int(self.mount_a.run_shell(['stat', '-c' '%g', group_path]).stdout.getvalue().strip()) + self.assertEqual(desired_uid, actual_uid) + self.assertEqual(desired_gid, actual_gid) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_create_idempotence_data_pool(self): + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + group_path = self._get_subvolume_group_path(self.volname, group) + + default_pool = self.mount_a.getfattr(group_path, "ceph.dir.layout.pool") + new_pool = "new_pool" + self.assertNotEqual(default_pool, new_pool) + + # add data pool + newid = self.fs.add_data_pool(new_pool) + + # try creating w/ same subvolume group name with new data pool -- should set pool + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--pool_layout", new_pool) + desired_pool = self.mount_a.getfattr(group_path, "ceph.dir.layout.pool") + try: + self.assertEqual(desired_pool, new_pool) + except AssertionError: + self.assertEqual(int(desired_pool), newid) # old kernel returns id + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_create_idempotence_resize(self): + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # try creating w/ same subvolume name with size -- should set quota + self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") + + # get group metadata + group_info = json.loads(self._get_subvolume_group_info(self.volname, group)) + self.assertEqual(group_info["bytes_quota"], 1000000000) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_quota_mds_path_restriction_to_group_path(self): + """ + Tests subvolumegroup quota enforcement with mds path restriction set to group. + For quota to be enforced, read permission needs to be provided to the parent + of the directory on which quota is set. Please see the tracker comment [1] + [1] https://tracker.ceph.com/issues/55090#note-8 + """ + osize = self.DEFAULT_FILE_SIZE*1024*1024*100 + # create group with 100MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # Create auth_id + authid = "client.guest1" + user = json.loads(self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", authid, + "mds", "allow rw path=/volumes", + "mgr", "allow rw", + "osd", "allow rw tag cephfs *=*", + "mon", "allow r", + "--format=json-pretty" + )) + + # Prepare guest_mount with new authid + guest_mount = self.mount_b + guest_mount.umount_wait() + + # configure credentials for guest client + self._configure_guest_auth(guest_mount, "guest1", user[0]["key"]) + + # mount the subvolume + mount_path = os.path.join("/", subvolpath) + guest_mount.mount_wait(cephfs_mntpt=mount_path) + + # create 99 files of 1MB + guest_mount.run_shell_payload("mkdir -p dir1") + for i in range(99): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir1", filename), self.DEFAULT_FILE_SIZE) + try: + # write two files of 1MB file to exceed the quota + guest_mount.run_shell_payload("mkdir -p dir2") + for i in range(2): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE) + # For quota to be enforced + time.sleep(60) + # create 400 files of 1MB to exceed quota + for i in range(400): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE) + # Sometimes quota enforcement takes time. + if i == 200: + time.sleep(60) + except CommandFailedError: + pass + else: + self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail") + + # clean up + guest_mount.umount_wait() + + # Delete the subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_mds_path_restriction_to_subvolume_path(self): + """ + Tests subvolumegroup quota enforcement with mds path restriction set to subvolume path + The quota should not be enforced because of the fourth limitation mentioned at + https://docs.ceph.com/en/latest/cephfs/quota/#limitations + """ + osize = self.DEFAULT_FILE_SIZE*1024*1024*100 + # create group with 100MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + mount_path = os.path.join("/", subvolpath) + + # Create auth_id + authid = "client.guest1" + user = json.loads(self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", authid, + "mds", f"allow rw path={mount_path}", + "mgr", "allow rw", + "osd", "allow rw tag cephfs *=*", + "mon", "allow r", + "--format=json-pretty" + )) + + # Prepare guest_mount with new authid + guest_mount = self.mount_b + guest_mount.umount_wait() + + # configure credentials for guest client + self._configure_guest_auth(guest_mount, "guest1", user[0]["key"]) + + # mount the subvolume + guest_mount.mount_wait(cephfs_mntpt=mount_path) + + # create 99 files of 1MB to exceed quota + guest_mount.run_shell_payload("mkdir -p dir1") + for i in range(99): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir1", filename), self.DEFAULT_FILE_SIZE) + try: + # write two files of 1MB file to exceed the quota + guest_mount.run_shell_payload("mkdir -p dir2") + for i in range(2): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE) + # For quota to be enforced + time.sleep(60) + # create 400 files of 1MB to exceed quota + for i in range(400): + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, i) + guest_mount.write_n_mb(os.path.join("dir2", filename), self.DEFAULT_FILE_SIZE) + # Sometimes quota enforcement takes time. + if i == 200: + time.sleep(60) + except CommandFailedError: + self.fail(f"Quota should not be enforced, expected filling subvolume {subvolname} with 400 files of size 1MB to succeed") + + # clean up + guest_mount.umount_wait() + + # Delete the subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_exceeded_subvolume_removal(self): + """ + Tests subvolume removal if it's group quota is exceeded + """ + osize = self.DEFAULT_FILE_SIZE*1024*1024*100 + # create group with 100MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create 99 files of 1MB to exceed quota + self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99) + + try: + # write two files of 1MB file to exceed the quota + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2) + # For quota to be enforced + time.sleep(20) + # create 400 files of 1MB to exceed quota + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=400) + except CommandFailedError: + # Delete subvolume when group quota is exceeded + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + else: + self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail") + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_exceeded_subvolume_removal_retained_snaps(self): + """ + Tests retained snapshot subvolume removal if it's group quota is exceeded + """ + group = self._generate_random_group_name() + subvolname = self._generate_random_subvolume_name() + snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + + osize = self.DEFAULT_FILE_SIZE*1024*1024*100 + # create group with 100MB quota + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create 99 files of 1MB to exceed quota + self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot1, "--group_name", group) + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot2, "--group_name", group) + + try: + # write two files of 1MB file to exceed the quota + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2) + # For quota to be enforced + time.sleep(20) + # create 400 files of 1MB to exceed quota + self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=400) + except CommandFailedError: + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group, "--retain-snapshots") + # remove snapshot1 + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot1, "--group_name", group) + # remove snapshot2 (should remove volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot2, "--group_name", group) + # verify subvolume trash is clean + self._wait_for_subvol_trash_empty(subvolname, group=group) + else: + self.fail(f"expected filling subvolume {subvolname} with 400 files of size 1MB to fail") + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_subvolume_removal(self): + """ + Tests subvolume removal if it's group quota is set. + """ + # create group with size -- should set quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # remove subvolume + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set") + + # remove subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_legacy_subvolume_removal(self): + """ + Tests legacy subvolume removal if it's group quota is set. + """ + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume -- in a custom group + createpath1 = os.path.join(".", "volumes", group, subvolume) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath1], omit_sudo=False) + + # this would auto-upgrade on access without anyone noticing + subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume, "--group-name", group) + self.assertNotEqual(subvolpath1, None) + subvolpath1 = subvolpath1.rstrip() # remove "/" prefix and any trailing newline + + # and... the subvolume path returned should be what we created behind the scene + self.assertEqual(createpath1[1:], subvolpath1) + + # Set subvolumegroup quota on idempotent subvolumegroup creation + self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") + + # remove subvolume + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set") + + # remove subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_quota_v1_subvolume_removal(self): + """ + Tests v1 subvolume removal if it's group quota is set. + """ + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a v1 subvolume -- in a custom group + self._create_v1_subvolume(subvolume, subvol_group=group, has_snapshot=False) + + # Set subvolumegroup quota on idempotent subvolumegroup creation + self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") + + # remove subvolume + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume rm' command to succeed if group quota is set") + + # remove subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_resize_fail_invalid_size(self): + """ + That a subvolume group cannot be resized to an invalid size and the quota did not change + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024 + # create group with 1MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize)) + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # try to resize the subvolume with an invalid size -10 + nsize = -10 + try: + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize)) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on resize of subvolume group with invalid size") + else: + self.fail("expected the 'fs subvolumegroup resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_resize_fail_zero_size(self): + """ + That a subvolume group cannot be resized to a zero size and the quota did not change + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024 + # create group with 1MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize)) + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # try to resize the subvolume group with size 0 + nsize = 0 + try: + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize)) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, + "invalid error code on resize of subvolume group with invalid size") + else: + self.fail("expected the 'fs subvolumegroup resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_resize_quota_lt_used_size(self): + """ + That a subvolume group can be resized to a size smaller than the current used size + and the resulting quota matches the expected size. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*20 + # create group with 20MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create one file of 10MB + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+1) + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + + usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes")) + + # shrink the subvolume group + nsize = usedsize // 2 + try: + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize)) + except CommandFailedError: + self.fail("expected the 'fs subvolumegroup resize' command to succeed") + + # verify the quota + size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")) + self.assertEqual(size, nsize) + + # remove subvolume and group + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_resize_fail_quota_lt_used_size_no_shrink(self): + """ + That a subvolume group cannot be resized to a size smaller than the current used size + when --no_shrink is given and the quota did not change. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*20 + # create group with 20MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create one file of 10MB + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+2) + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + + usedsize = int(self.mount_a.getfattr(grouppath, "ceph.dir.rbytes")) + + # shrink the subvolume group + nsize = usedsize // 2 + try: + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize), "--no_shrink") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolumegroup with quota less than used") + else: + self.fail("expected the 'fs subvolumegroup resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove subvolume and group + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_resize_expand_on_full_subvolume(self): + """ + That the subvolume group can be expanded after it is full and future write succeed + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*100 + # create group with 100MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create 99 files of 1MB + self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=99) + + try: + # write two files of 1MB file to exceed the quota + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2) + # For quota to be enforced + time.sleep(20) + # create 500 files of 1MB + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500) + except CommandFailedError: + # Not able to write. So expand the subvolumegroup more and try writing the files again + nsize = osize*7 + self._fs_cmd("subvolumegroup", "resize", self.volname, group, str(nsize)) + try: + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500) + except CommandFailedError: + self.fail("expected filling subvolume {0} with 500 files of size 1MB " + "to succeed".format(subvolname)) + else: + self.fail("expected filling subvolume {0} with 500 files of size 1MB " + "to fail".format(subvolname)) + + # remove subvolume and group + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_resize_infinite_size(self): + """ + That a subvolume group can be resized to an infinite size by unsetting its quota. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024 + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize)) + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # resize inf + self._fs_cmd("subvolumegroup", "resize", self.volname, group, "inf") + + # verify that the quota is None + size = self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes") + self.assertEqual(size, None) + + # remove subvolume group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_group_resize_infinite_size_future_writes(self): + """ + That a subvolume group can be resized to an infinite size and the future writes succeed. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*5 + # create group with 5MB quota + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group, + "--size", str(osize), "--mode=777") + + # make sure it exists + grouppath = self._get_subvolume_group_path(self.volname, group) + self.assertNotEqual(grouppath, None) + + # create subvolume under the group + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, + "--group_name", group, "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname, group_name=group) + self.assertNotEqual(subvolpath, None) + + # create 4 files of 1MB + self._do_subvolume_io(subvolname, subvolume_group=group, number_of_files=4) + + try: + # write two files of 1MB file to exceed the quota + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=2) + # For quota to be enforced + time.sleep(20) + # create 500 files of 1MB + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500) + except CommandFailedError: + # Not able to write. So resize subvolumegroup to 'inf' and try writing the files again + # resize inf + self._fs_cmd("subvolumegroup", "resize", self.volname, group, "inf") + try: + self._do_subvolume_io(subvolname, subvolume_group=group, create_dir='dir1', number_of_files=500) + except CommandFailedError: + self.fail("expected filling subvolume {0} with 500 files of size 1MB " + "to succeed".format(subvolname)) + else: + self.fail("expected filling subvolume {0} with 500 files of size 1MB " + "to fail".format(subvolname)) + + + # verify that the quota is None + size = self.mount_a.getfattr(grouppath, "ceph.quota.max_bytes") + self.assertEqual(size, None) + + # remove subvolume and group + self._fs_cmd("subvolume", "rm", self.volname, subvolname, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_ls(self): + # tests the 'fs subvolumegroup ls' command + + subvolumegroups = [] + + #create subvolumegroups + subvolumegroups = self._generate_random_group_name(3) + for groupname in subvolumegroups: + self._fs_cmd("subvolumegroup", "create", self.volname, groupname) + + subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname)) + if len(subvolumegroupls) == 0: + raise RuntimeError("Expected the 'fs subvolumegroup ls' command to list the created subvolume groups") + else: + subvolgroupnames = [subvolumegroup['name'] for subvolumegroup in subvolumegroupls] + if collections.Counter(subvolgroupnames) != collections.Counter(subvolumegroups): + raise RuntimeError("Error creating or listing subvolume groups") + + def test_subvolume_group_ls_filter(self): + # tests the 'fs subvolumegroup ls' command filters '_deleting' directory + + subvolumegroups = [] + + #create subvolumegroup + subvolumegroups = self._generate_random_group_name(3) + for groupname in subvolumegroups: + self._fs_cmd("subvolumegroup", "create", self.volname, groupname) + + # create subvolume and remove. This creates '_deleting' directory. + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname)) + subvolgroupnames = [subvolumegroup['name'] for subvolumegroup in subvolumegroupls] + if "_deleting" in subvolgroupnames: + self.fail("Listing subvolume groups listed '_deleting' directory") + + def test_subvolume_group_ls_filter_internal_directories(self): + # tests the 'fs subvolumegroup ls' command filters internal directories + # eg: '_deleting', '_nogroup', '_index', "_legacy" + + subvolumegroups = self._generate_random_group_name(3) + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + #create subvolumegroups + for groupname in subvolumegroups: + self._fs_cmd("subvolumegroup", "create", self.volname, groupname) + + # create subvolume which will create '_nogroup' directory + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # create snapshot + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # clone snapshot which will create '_index' directory + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # wait for clone to complete + self._wait_for_clone_to_complete(clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume which will create '_deleting' directory + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # list subvolumegroups + ret = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname)) + self.assertEqual(len(ret), len(subvolumegroups)) + + ret_list = [subvolumegroup['name'] for subvolumegroup in ret] + self.assertEqual(len(ret_list), len(subvolumegroups)) + + self.assertEqual(all(elem in subvolumegroups for elem in ret_list), True) + + # cleanup + self._fs_cmd("subvolume", "rm", self.volname, clone) + for groupname in subvolumegroups: + self._fs_cmd("subvolumegroup", "rm", self.volname, groupname) + + def test_subvolume_group_ls_for_nonexistent_volume(self): + # tests the 'fs subvolumegroup ls' command when /volume doesn't exist + # prerequisite: we expect that the test volume is created and a subvolumegroup is NOT created + + # list subvolume groups + subvolumegroupls = json.loads(self._fs_cmd('subvolumegroup', 'ls', self.volname)) + if len(subvolumegroupls) > 0: + raise RuntimeError("Expected the 'fs subvolumegroup ls' command to output an empty list") + + def test_subvolumegroup_pin_distributed(self): + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + self.config_set('mds', 'mds_export_ephemeral_distributed', True) + + group = "pinme" + self._fs_cmd("subvolumegroup", "create", self.volname, group) + self._fs_cmd("subvolumegroup", "pin", self.volname, group, "distributed", "True") + subvolumes = self._generate_random_subvolume_name(50) + for subvolume in subvolumes: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + self._wait_distributed_subtrees(2 * 2, status=status, rank="all") + + # remove subvolumes + for subvolume in subvolumes: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_group_rm_force(self): + # test removing non-existing subvolume group with --force + group = self._generate_random_group_name() + try: + self._fs_cmd("subvolumegroup", "rm", self.volname, group, "--force") + except CommandFailedError: + raise RuntimeError("expected the 'fs subvolumegroup rm --force' command to succeed") + + def test_subvolume_group_exists_with_subvolumegroup_and_no_subvolume(self): + """Test the presence of any subvolumegroup when only subvolumegroup is present""" + + group = self._generate_random_group_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "subvolumegroup exists") + # delete subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolumegroup exists") + + def test_subvolume_group_exists_with_no_subvolumegroup_and_subvolume(self): + """Test the presence of any subvolumegroup when no subvolumegroup is present""" + + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolumegroup exists") + + def test_subvolume_group_exists_with_subvolumegroup_and_subvolume(self): + """Test the presence of any subvolume when subvolumegroup + and subvolume both are present""" + + group = self._generate_random_group_name() + subvolume = self._generate_random_subvolume_name(2) + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume[0], "--group_name", group) + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume[1]) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "subvolumegroup exists") + # delete subvolume in group + self._fs_cmd("subvolume", "rm", self.volname, subvolume[0], "--group_name", group) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "subvolumegroup exists") + # delete subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume[1]) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "subvolumegroup exists") + # delete subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolumegroup exists") + + def test_subvolume_group_exists_without_subvolumegroup_and_with_subvolume(self): + """Test the presence of any subvolume when subvolume is present + but no subvolumegroup is present""" + + subvolume = self._generate_random_subvolume_name() + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolumegroup exists") + # delete subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + ret = self._fs_cmd("subvolumegroup", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolumegroup exists") + + +class TestSubvolumes(TestVolumesHelper): + """Tests for FS subvolume operations, except snapshot and snapshot clone.""" + def test_async_subvolume_rm(self): + subvolumes = self._generate_random_subvolume_name(100) + + # create subvolumes + for subvolume in subvolumes: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + self._do_subvolume_io(subvolume, number_of_files=10) + + self.mount_a.umount_wait() + + # remove subvolumes + for subvolume in subvolumes: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + self.mount_a.mount_wait() + + # verify trash dir is clean + self._wait_for_trash_empty(timeout=300) + + def test_default_uid_gid_subvolume(self): + subvolume = self._generate_random_subvolume_name() + expected_uid = 0 + expected_gid = 0 + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + # check subvolume's uid and gid + stat = self.mount_a.stat(subvol_path) + self.assertEqual(stat['st_uid'], expected_uid) + self.assertEqual(stat['st_gid'], expected_gid) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_nonexistent_subvolume_rm(self): + # remove non-existing subvolume + subvolume = "non_existent_subvolume" + + # try, remove subvolume + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolume rm' command to fail") + + def test_subvolume_create_and_rm(self): + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # make sure it exists + subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + self.assertNotEqual(subvolpath, None) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + # make sure its gone + try: + self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolume getpath' command to fail. Subvolume not removed.") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_and_rm_in_group(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_create_idempotence(self): + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # try creating w/ same subvolume name -- should be idempotent + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_idempotence_resize(self): + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # try creating w/ same subvolume name with size -- should set quota + self._fs_cmd("subvolume", "create", self.volname, subvolume, "1000000000") + + # get subvolume metadata + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + self.assertEqual(subvol_info["bytes_quota"], 1000000000) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_idempotence_mode(self): + # default mode + default_mode = "755" + + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + actual_mode_1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode_1, default_mode) + + # try creating w/ same subvolume name with --mode 777 + new_mode = "777" + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", new_mode) + + actual_mode_2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode_2, new_mode) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_idempotence_without_passing_mode(self): + # create subvolume + desired_mode = "777" + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", desired_mode) + + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + actual_mode_1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode_1, desired_mode) + + # default mode + default_mode = "755" + + # try creating w/ same subvolume name without passing --mode argument + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + actual_mode_2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode_2, default_mode) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_isolated_namespace(self): + """ + Create subvolume in separate rados namespace + """ + + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated") + + # get subvolume metadata + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + self.assertNotEqual(len(subvol_info), 0) + self.assertEqual(subvol_info["pool_namespace"], "fsvolumens_" + subvolume) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_auto_cleanup_on_fail(self): + subvolume = self._generate_random_subvolume_name() + data_pool = "invalid_pool" + # create subvolume with invalid data pool layout fails + with self.assertRaises(CommandFailedError): + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool) + + # check whether subvol path is cleaned up + try: + self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of non-existent subvolume") + else: + self.fail("expected the 'fs subvolume getpath' command to fail") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_desired_data_pool_layout_in_group(self): + subvol1, subvol2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + # create group. this also helps set default pool layout for subvolumes + # created within the group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group) + subvol1_path = self._get_subvolume_path(self.volname, subvol1, group_name=group) + + default_pool = self.mount_a.getfattr(subvol1_path, "ceph.dir.layout.pool") + new_pool = "new_pool" + self.assertNotEqual(default_pool, new_pool) + + # add data pool + newid = self.fs.add_data_pool(new_pool) + + # create subvolume specifying the new data pool as its pool layout + self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group, + "--pool_layout", new_pool) + subvol2_path = self._get_subvolume_path(self.volname, subvol2, group_name=group) + + desired_pool = self.mount_a.getfattr(subvol2_path, "ceph.dir.layout.pool") + try: + self.assertEqual(desired_pool, new_pool) + except AssertionError: + self.assertEqual(int(desired_pool), newid) # old kernel returns id + + self._fs_cmd("subvolume", "rm", self.volname, subvol2, group) + self._fs_cmd("subvolume", "rm", self.volname, subvol1, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_desired_mode(self): + subvol1 = self._generate_random_subvolume_name() + + # default mode + default_mode = "755" + # desired mode + desired_mode = "777" + + self._fs_cmd("subvolume", "create", self.volname, subvol1, "--mode", "777") + + subvol1_path = self._get_subvolume_path(self.volname, subvol1) + + # check subvolumegroup's mode + subvol_par_path = os.path.dirname(subvol1_path) + group_path = os.path.dirname(subvol_par_path) + actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', group_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode1, default_mode) + # check /volumes mode + volumes_path = os.path.dirname(group_path) + actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', volumes_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode2, default_mode) + # check subvolume's mode + actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', subvol1_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode3, desired_mode) + + self._fs_cmd("subvolume", "rm", self.volname, subvol1) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_desired_mode_in_group(self): + subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3) + + group = self._generate_random_group_name() + # default mode + expected_mode1 = "755" + # desired mode + expected_mode2 = "777" + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group, "--mode", "777") + # check whether mode 0777 also works + self._fs_cmd("subvolume", "create", self.volname, subvol3, "--group_name", group, "--mode", "0777") + + subvol1_path = self._get_subvolume_path(self.volname, subvol1, group_name=group) + subvol2_path = self._get_subvolume_path(self.volname, subvol2, group_name=group) + subvol3_path = self._get_subvolume_path(self.volname, subvol3, group_name=group) + + # check subvolume's mode + actual_mode1 = self.mount_a.run_shell(['stat', '-c' '%a', subvol1_path]).stdout.getvalue().strip() + actual_mode2 = self.mount_a.run_shell(['stat', '-c' '%a', subvol2_path]).stdout.getvalue().strip() + actual_mode3 = self.mount_a.run_shell(['stat', '-c' '%a', subvol3_path]).stdout.getvalue().strip() + self.assertEqual(actual_mode1, expected_mode1) + self.assertEqual(actual_mode2, expected_mode2) + self.assertEqual(actual_mode3, expected_mode2) + + self._fs_cmd("subvolume", "rm", self.volname, subvol1, group) + self._fs_cmd("subvolume", "rm", self.volname, subvol2, group) + self._fs_cmd("subvolume", "rm", self.volname, subvol3, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_desired_uid_gid(self): + """ + That the subvolume can be created with the desired uid and gid and its uid and gid matches the + expected values. + """ + uid = 1000 + gid = 1000 + + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--uid", str(uid), "--gid", str(gid)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # verify the uid and gid + suid = int(self.mount_a.run_shell(['stat', '-c' '%u', subvolpath]).stdout.getvalue().strip()) + sgid = int(self.mount_a.run_shell(['stat', '-c' '%g', subvolpath]).stdout.getvalue().strip()) + self.assertEqual(uid, suid) + self.assertEqual(gid, sgid) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_invalid_data_pool_layout(self): + subvolume = self._generate_random_subvolume_name() + data_pool = "invalid_pool" + # create subvolume with invalid data pool layout + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid pool layout") + else: + self.fail("expected the 'fs subvolume create' command to fail") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_with_invalid_size(self): + # create subvolume with an invalid size -1 + subvolume = self._generate_random_subvolume_name() + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--size", "-1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on create of subvolume with invalid size") + else: + self.fail("expected the 'fs subvolume create' command to fail") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_create_and_ls_providing_group_as_nogroup(self): + """ + That a 'subvolume create' and 'subvolume ls' should throw + permission denied error if option --group=_nogroup is provided. + """ + + subvolname = self._generate_random_subvolume_name() + + # try to create subvolume providing --group_name=_nogroup option + try: + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", "_nogroup") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM) + else: + self.fail("expected the 'fs subvolume create' command to fail") + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolname) + + # try to list subvolumes providing --group_name=_nogroup option + try: + self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_nogroup") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM) + else: + self.fail("expected the 'fs subvolume ls' command to fail") + + # list subvolumes + self._fs_cmd("subvolume", "ls", self.volname) + + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_expand(self): + """ + That a subvolume can be expanded in size and its quota matches the expected size. + """ + + # create subvolume + subvolname = self._generate_random_subvolume_name() + osize = self.DEFAULT_FILE_SIZE*1024*1024 + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # expand the subvolume + nsize = osize*2 + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + + # verify the quota + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, nsize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_info(self): + # tests the 'fs subvolume info' command + + subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime", + "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace", + "type", "uid", "features", "state"] + + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # get subvolume metadata + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + for md in subvol_md: + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) + + self.assertEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should be set to undefined if quota is not set") + self.assertEqual(subvol_info["bytes_quota"], "infinite", "bytes_quota should be set to infinite if quota is not set") + self.assertEqual(subvol_info["pool_namespace"], "", "expected pool namespace to be empty") + self.assertEqual(subvol_info["state"], "complete", "expected state to be complete") + + self.assertEqual(len(subvol_info["features"]), 3, + msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) + for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']: + self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature)) + + nsize = self.DEFAULT_FILE_SIZE*1024*1024 + self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize)) + + # get subvolume metadata after quota set + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + for md in subvol_md: + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) + + self.assertNotEqual(subvol_info["bytes_pcent"], "undefined", "bytes_pcent should not be set to undefined if quota is not set") + self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize)) + self.assertEqual(subvol_info["type"], "subvolume", "type should be set to subvolume") + self.assertEqual(subvol_info["state"], "complete", "expected state to be complete") + + self.assertEqual(len(subvol_info["features"]), 3, + msg="expected 3 features, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) + for feature in ['snapshot-clone', 'snapshot-autoprotect', 'snapshot-retention']: + self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature)) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_ls(self): + # tests the 'fs subvolume ls' command + + subvolumes = [] + + # create subvolumes + subvolumes = self._generate_random_subvolume_name(3) + for subvolume in subvolumes: + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # list subvolumes + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + if len(subvolumels) == 0: + self.fail("Expected the 'fs subvolume ls' command to list the created subvolumes.") + else: + subvolnames = [subvolume['name'] for subvolume in subvolumels] + if collections.Counter(subvolnames) != collections.Counter(subvolumes): + self.fail("Error creating or listing subvolumes") + + # remove subvolume + for subvolume in subvolumes: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_ls_with_groupname_as_internal_directory(self): + # tests the 'fs subvolume ls' command when the default groupname as internal directories + # Eg: '_nogroup', '_legacy', '_deleting', '_index'. + # Expecting 'fs subvolume ls' will be fail with errno EINVAL for '_legacy', '_deleting', '_index' + # Expecting 'fs subvolume ls' will be fail with errno EPERM for '_nogroup' + + # try to list subvolumes providing --group_name=_nogroup option + try: + self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_nogroup") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM) + else: + self.fail("expected the 'fs subvolume ls' command to fail with error 'EPERM' for _nogroup") + + # try to list subvolumes providing --group_name=_legacy option + try: + self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_legacy") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL) + else: + self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _legacy") + + # try to list subvolumes providing --group_name=_deleting option + try: + self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_deleting") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL) + else: + self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _deleting") + + # try to list subvolumes providing --group_name=_index option + try: + self._fs_cmd("subvolume", "ls", self.volname, "--group_name", "_index") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL) + else: + self.fail("expected the 'fs subvolume ls' command to fail with error 'EINVAL' for _index") + + def test_subvolume_ls_for_notexistent_default_group(self): + # tests the 'fs subvolume ls' command when the default group '_nogroup' doesn't exist + # prerequisite: we expect that the volume is created and the default group _nogroup is + # NOT created (i.e. a subvolume without group is not created) + + # list subvolumes + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + if len(subvolumels) > 0: + raise RuntimeError("Expected the 'fs subvolume ls' command to output an empty list.") + + def test_subvolume_marked(self): + """ + ensure a subvolume is marked with the ceph.dir.subvolume xattr + """ + subvolume = self._generate_random_subvolume_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # getpath + subvolpath = self._get_subvolume_path(self.volname, subvolume) + + # subdirectory of a subvolume cannot be moved outside the subvolume once marked with + # the xattr ceph.dir.subvolume, hence test by attempting to rename subvol path (incarnation) + # outside the subvolume + dstpath = os.path.join(self.mount_a.mountpoint, 'volumes', '_nogroup', 'new_subvol_location') + srcpath = os.path.join(self.mount_a.mountpoint, subvolpath) + rename_script = dedent(""" + import os + import errno + try: + os.rename("{src}", "{dst}") + except OSError as e: + if e.errno != errno.EXDEV: + raise RuntimeError("invalid error code on renaming subvolume incarnation out of subvolume directory") + else: + raise RuntimeError("expected renaming subvolume incarnation out of subvolume directory to fail") + """) + self.mount_a.run_python(rename_script.format(src=srcpath, dst=dstpath), sudo=True) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_pin_export(self): + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + self._fs_cmd("subvolume", "pin", self.volname, subvolume, "export", "1") + path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + path = os.path.dirname(path) # get subvolume path + + self._get_subtrees(status=status, rank=1) + self._wait_subtrees([(path, 1)], status=status) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + ### authorize operations + + def test_authorize_deauthorize_legacy_subvolume(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + authid = "alice" + + guest_mount = self.mount_b + guest_mount.umount_wait() + + # emulate a old-fashioned subvolume in a custom group + createpath = os.path.join(".", "volumes", group, subvolume) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True) + + mount_path = os.path.join("/", "volumes", group, subvolume) + + # authorize guest authID read-write access to subvolume + key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid, + "--group_name", group, "--tenant_id", "tenant_id") + + # guest authID should exist + existing_ids = [a['entity'] for a in self.auth_list()] + self.assertIn("client.{0}".format(authid), existing_ids) + + # configure credentials for guest client + self._configure_guest_auth(guest_mount, authid, key) + + # mount the subvolume, and write to it + guest_mount.mount_wait(cephfs_mntpt=mount_path) + guest_mount.write_n_mb("data.bin", 1) + + # authorize guest authID read access to subvolume + key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid, + "--group_name", group, "--tenant_id", "tenant_id", "--access_level", "r") + + # guest client sees the change in access level to read only after a + # remount of the subvolume. + guest_mount.umount_wait() + guest_mount.mount_wait(cephfs_mntpt=mount_path) + + # read existing content of the subvolume + self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"]) + # cannot write into read-only subvolume + with self.assertRaises(CommandFailedError): + guest_mount.write_n_mb("rogue.bin", 1) + + # cleanup + guest_mount.umount_wait() + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid, + "--group_name", group) + # guest authID should no longer exist + existing_ids = [a['entity'] for a in self.auth_list()] + self.assertNotIn("client.{0}".format(authid), existing_ids) + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_authorize_deauthorize_subvolume(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + authid = "alice" + + guest_mount = self.mount_b + guest_mount.umount_wait() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group, "--mode=777") + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume, + "--group_name", group).rstrip() + + # authorize guest authID read-write access to subvolume + key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid, + "--group_name", group, "--tenant_id", "tenant_id") + + # guest authID should exist + existing_ids = [a['entity'] for a in self.auth_list()] + self.assertIn("client.{0}".format(authid), existing_ids) + + # configure credentials for guest client + self._configure_guest_auth(guest_mount, authid, key) + + # mount the subvolume, and write to it + guest_mount.mount_wait(cephfs_mntpt=mount_path) + guest_mount.write_n_mb("data.bin", 1) + + # authorize guest authID read access to subvolume + key = self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid, + "--group_name", group, "--tenant_id", "tenant_id", "--access_level", "r") + + # guest client sees the change in access level to read only after a + # remount of the subvolume. + guest_mount.umount_wait() + guest_mount.mount_wait(cephfs_mntpt=mount_path) + + # read existing content of the subvolume + self.assertListEqual(guest_mount.ls(guest_mount.mountpoint), ["data.bin"]) + # cannot write into read-only subvolume + with self.assertRaises(CommandFailedError): + guest_mount.write_n_mb("rogue.bin", 1) + + # cleanup + guest_mount.umount_wait() + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid, + "--group_name", group) + # guest authID should no longer exist + existing_ids = [a['entity'] for a in self.auth_list()] + self.assertNotIn("client.{0}".format(authid), existing_ids) + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_multitenant_subvolumes(self): + """ + That subvolume access can be restricted to a tenant. + + That metadata used to enforce tenant isolation of + subvolumes is stored as a two-way mapping between auth + IDs and subvolumes that they're authorized to access. + """ + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + guest_mount = self.mount_b + + # Guest clients belonging to different tenants, but using the same + # auth ID. + auth_id = "alice" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + guestclient_2 = { + "auth_id": auth_id, + "tenant_id": "tenant2", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Check that subvolume metadata file is created on subvolume creation. + subvol_metadata_filename = "_{0}:{1}.meta".format(group, subvolume) + self.assertIn(subvol_metadata_filename, guest_mount.ls("volumes")) + + # Authorize 'guestclient_1', using auth ID 'alice' and belonging to + # 'tenant1', with 'rw' access to the volume. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Check that auth metadata file for auth ID 'alice', is + # created on authorizing 'alice' access to the subvolume. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, guest_mount.ls("volumes")) + + # Verify that the auth metadata file stores the tenant ID that the + # auth ID belongs to, the auth ID's authorized access levels + # for different subvolumes, versioning details, etc. + expected_auth_metadata = { + "version": 5, + "compat_version": 6, + "dirty": False, + "tenant_id": "tenant1", + "subvolumes": { + "{0}/{1}".format(group,subvolume): { + "dirty": False, + "access_level": "rw" + } + } + } + + auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename))) + self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"]) + del expected_auth_metadata["version"] + del auth_metadata["version"] + self.assertEqual(expected_auth_metadata, auth_metadata) + + # Verify that the subvolume metadata file stores info about auth IDs + # and their access levels to the subvolume, versioning details, etc. + expected_subvol_metadata = { + "version": 1, + "compat_version": 1, + "auths": { + "alice": { + "dirty": False, + "access_level": "rw" + } + } + } + subvol_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(subvol_metadata_filename))) + + self.assertGreaterEqual(subvol_metadata["version"], expected_subvol_metadata["version"]) + del expected_subvol_metadata["version"] + del subvol_metadata["version"] + self.assertEqual(expected_subvol_metadata, subvol_metadata) + + # Cannot authorize 'guestclient_2' to access the volume. + # It uses auth ID 'alice', which has already been used by a + # 'guestclient_1' belonging to an another tenant for accessing + # the volume. + + try: + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_2["auth_id"], + "--group_name", group, "--tenant_id", guestclient_2["tenant_id"]) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, + "Invalid error code returned on authorize of subvolume with same auth_id but different tenant_id") + else: + self.fail("expected the 'fs subvolume authorize' command to fail") + + # Check that auth metadata file is cleaned up on removing + # auth ID's only access to a volume. + + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, + "--group_name", group) + self.assertNotIn(auth_metadata_filename, guest_mount.ls("volumes")) + + # Check that subvolume metadata file is cleaned up on subvolume deletion. + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self.assertNotIn(subvol_metadata_filename, guest_mount.ls("volumes")) + + # clean up + guest_mount.umount_wait() + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_authorized_list(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + authid1 = "alice" + authid2 = "guest1" + authid3 = "guest2" + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # authorize alice authID read-write access to subvolume + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid1, + "--group_name", group) + # authorize guest1 authID read-write access to subvolume + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid2, + "--group_name", group) + # authorize guest2 authID read access to subvolume + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, authid3, + "--group_name", group, "--access_level", "r") + + # list authorized-ids of the subvolume + expected_auth_list = [{'alice': 'rw'}, {'guest1': 'rw'}, {'guest2': 'r'}] + auth_list = json.loads(self._fs_cmd('subvolume', 'authorized_list', self.volname, subvolume, "--group_name", group)) + self.assertCountEqual(expected_auth_list, auth_list) + + # cleanup + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid1, + "--group_name", group) + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid2, + "--group_name", group) + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, authid3, + "--group_name", group) + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_authorize_auth_id_not_created_by_mgr_volumes(self): + """ + If the auth_id already exists and is not created by mgr plugin, + it's not allowed to authorize the auth-id by default. + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # Create auth_id + self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", "client.guest1", + "mds", "allow *", + "osd", "allow rw", + "mon", "allow *" + ) + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + try: + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EPERM, + "Invalid error code returned on authorize of subvolume for auth_id created out of band") + else: + self.fail("expected the 'fs subvolume authorize' command to fail") + + # clean up + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_authorize_allow_existing_id_option(self): + """ + If the auth_id already exists and is not created by mgr volumes, + it's not allowed to authorize the auth-id by default but is + allowed with option allow_existing_id. + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # Create auth_id + self.fs.mon_manager.raw_cluster_cmd( + "auth", "get-or-create", "client.guest1", + "mds", "allow *", + "osd", "allow rw", + "mon", "allow *" + ) + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Cannot authorize 'guestclient_1' to access the volume by default, + # which already exists and not created by mgr volumes but is allowed + # with option 'allow_existing_id'. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"], "--allow-existing-id") + + # clean up + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, + "--group_name", group) + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_deauthorize_auth_id_after_out_of_band_update(self): + """ + If the auth_id authorized by mgr/volumes plugin is updated + out of band, the auth_id should not be deleted after a + deauthorize. It should only remove caps associated with it. + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Authorize 'guestclient_1' to access the subvolume. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + subvol_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume, + "--group_name", group).rstrip() + + # Update caps for guestclient_1 out of band + out = self.fs.mon_manager.raw_cluster_cmd( + "auth", "caps", "client.guest1", + "mds", "allow rw path=/volumes/{0}, allow rw path={1}".format(group, subvol_path), + "osd", "allow rw pool=cephfs_data", + "mon", "allow r", + "mgr", "allow *" + ) + + # Deauthorize guestclient_1 + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group) + + # Validate the caps of guestclient_1 after deauthorize. It should not have deleted + # guestclient_1. The mgr and mds caps should be present which was updated out of band. + out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.guest1", "--format=json-pretty")) + + self.assertEqual("client.guest1", out[0]["entity"]) + self.assertEqual("allow rw path=/volumes/{0}".format(group), out[0]["caps"]["mds"]) + self.assertEqual("allow *", out[0]["caps"]["mgr"]) + self.assertNotIn("osd", out[0]["caps"]) + + # clean up + out = self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_recover_auth_metadata_during_authorize(self): + """ + That auth metadata manager can recover from partial auth updates using + metadata files, which store auth info and its update status info. This + test validates the recovery during authorize. + """ + + guest_mount = self.mount_b + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Authorize 'guestclient_1' to access the subvolume. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Check that auth metadata file for auth ID 'guest1', is + # created on authorizing 'guest1' access to the subvolume. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, guest_mount.ls("volumes")) + expected_auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename))) + + # Induce partial auth update state by modifying the auth metadata file, + # and then run authorize again. + guest_mount.run_shell(['sudo', 'sed', '-i', 's/false/true/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False) + + # Authorize 'guestclient_1' to access the subvolume. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename))) + self.assertEqual(auth_metadata_content, expected_auth_metadata_content) + + # clean up + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group) + guest_mount.umount_wait() + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_recover_auth_metadata_during_deauthorize(self): + """ + That auth metadata manager can recover from partial auth updates using + metadata files, which store auth info and its update status info. This + test validates the recovery during deauthorize. + """ + + guest_mount = self.mount_b + + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + guestclient_1 = { + "auth_id": "guest1", + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolumes in group + self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group) + + # Authorize 'guestclient_1' to access the subvolume1. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Check that auth metadata file for auth ID 'guest1', is + # created on authorizing 'guest1' access to the subvolume1. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, guest_mount.ls("volumes")) + expected_auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename))) + + # Authorize 'guestclient_1' to access the subvolume2. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Induce partial auth update state by modifying the auth metadata file, + # and then run de-authorize. + guest_mount.run_shell(['sudo', 'sed', '-i', 's/false/true/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False) + + # Deauthorize 'guestclient_1' to access the subvolume2. + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, guestclient_1["auth_id"], + "--group_name", group) + + auth_metadata_content = self._auth_metadata_get(self.mount_a.read_file("volumes/{0}".format(auth_metadata_filename))) + self.assertEqual(auth_metadata_content, expected_auth_metadata_content) + + # clean up + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, "guest1", "--group_name", group) + guest_mount.umount_wait() + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_update_old_style_auth_metadata_to_new_during_authorize(self): + """ + CephVolumeClient stores the subvolume data in auth metadata file with + 'volumes' key as there was no subvolume namespace. It doesn't makes sense + with mgr/volumes. This test validates the transparent update of 'volumes' + key to 'subvolumes' key in auth metadata file during authorize. + """ + + guest_mount = self.mount_b + + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolumes in group + self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group) + + # Authorize 'guestclient_1' to access the subvolume1. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Check that auth metadata file for auth ID 'guest1', is + # created on authorizing 'guest1' access to the subvolume1. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, guest_mount.ls("volumes")) + + # Replace 'subvolumes' to 'volumes', old style auth-metadata file + guest_mount.run_shell(['sudo', 'sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False) + + # Authorize 'guestclient_1' to access the subvolume2. This should transparently update 'volumes' to 'subvolumes' + self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + expected_auth_metadata = { + "version": 5, + "compat_version": 6, + "dirty": False, + "tenant_id": "tenant1", + "subvolumes": { + "{0}/{1}".format(group,subvolume1): { + "dirty": False, + "access_level": "rw" + }, + "{0}/{1}".format(group,subvolume2): { + "dirty": False, + "access_level": "rw" + } + } + } + + auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename))) + + self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"]) + del expected_auth_metadata["version"] + del auth_metadata["version"] + self.assertEqual(expected_auth_metadata, auth_metadata) + + # clean up + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group) + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group) + guest_mount.umount_wait() + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_update_old_style_auth_metadata_to_new_during_deauthorize(self): + """ + CephVolumeClient stores the subvolume data in auth metadata file with + 'volumes' key as there was no subvolume namespace. It doesn't makes sense + with mgr/volumes. This test validates the transparent update of 'volumes' + key to 'subvolumes' key in auth metadata file during deauthorize. + """ + + guest_mount = self.mount_b + + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + auth_id = "guest1" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolumes in group + self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--group_name", group) + + # Authorize 'guestclient_1' to access the subvolume1. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume1, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Authorize 'guestclient_1' to access the subvolume2. + self._fs_cmd("subvolume", "authorize", self.volname, subvolume2, guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + # Check that auth metadata file for auth ID 'guest1', is created. + auth_metadata_filename = "${0}.meta".format(guestclient_1["auth_id"]) + self.assertIn(auth_metadata_filename, guest_mount.ls("volumes")) + + # Replace 'subvolumes' to 'volumes', old style auth-metadata file + guest_mount.run_shell(['sudo', 'sed', '-i', 's/subvolumes/volumes/g', 'volumes/{0}'.format(auth_metadata_filename)], omit_sudo=False) + + # Deauthorize 'guestclient_1' to access the subvolume2. This should update 'volumes' to subvolumes' + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group) + + expected_auth_metadata = { + "version": 5, + "compat_version": 6, + "dirty": False, + "tenant_id": "tenant1", + "subvolumes": { + "{0}/{1}".format(group,subvolume1): { + "dirty": False, + "access_level": "rw" + } + } + } + + auth_metadata = self._auth_metadata_get(guest_mount.read_file("volumes/{0}".format(auth_metadata_filename))) + + self.assertGreaterEqual(auth_metadata["version"], expected_auth_metadata["version"]) + del expected_auth_metadata["version"] + del auth_metadata["version"] + self.assertEqual(expected_auth_metadata, auth_metadata) + + # clean up + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group) + guest_mount.umount_wait() + self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_evict_client(self): + """ + That a subvolume client can be evicted based on the auth ID + """ + + subvolumes = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # mounts[0] and mounts[1] would be used as guests to mount the volumes/shares. + for i in range(0, 2): + self.mounts[i].umount_wait() + guest_mounts = (self.mounts[0], self.mounts[1]) + auth_id = "guest" + guestclient_1 = { + "auth_id": auth_id, + "tenant_id": "tenant1", + } + + # Create two subvolumes. Authorize 'guest' auth ID to mount the two + # subvolumes. Mount the two subvolumes. Write data to the volumes. + for i in range(2): + # Create subvolume. + self._fs_cmd("subvolume", "create", self.volname, subvolumes[i], "--group_name", group, "--mode=777") + + # authorize guest authID read-write access to subvolume + key = self._fs_cmd("subvolume", "authorize", self.volname, subvolumes[i], guestclient_1["auth_id"], + "--group_name", group, "--tenant_id", guestclient_1["tenant_id"]) + + mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolumes[i], + "--group_name", group).rstrip() + # configure credentials for guest client + self._configure_guest_auth(guest_mounts[i], auth_id, key) + + # mount the subvolume, and write to it + guest_mounts[i].mount_wait(cephfs_mntpt=mount_path) + guest_mounts[i].write_n_mb("data.bin", 1) + + # Evict client, guest_mounts[0], using auth ID 'guest' and has mounted + # one volume. + self._fs_cmd("subvolume", "evict", self.volname, subvolumes[0], auth_id, "--group_name", group) + + # Evicted guest client, guest_mounts[0], should not be able to do + # anymore metadata ops. It should start failing all operations + # when it sees that its own address is in the blocklist. + try: + guest_mounts[0].write_n_mb("rogue.bin", 1) + except CommandFailedError: + pass + else: + raise RuntimeError("post-eviction write should have failed!") + + # The blocklisted guest client should now be unmountable + guest_mounts[0].umount_wait() + + # Guest client, guest_mounts[1], using the same auth ID 'guest', but + # has mounted the other volume, should be able to use its volume + # unaffected. + guest_mounts[1].write_n_mb("data.bin.1", 1) + + # Cleanup. + guest_mounts[1].umount_wait() + for i in range(2): + self._fs_cmd("subvolume", "deauthorize", self.volname, subvolumes[i], auth_id, "--group_name", group) + self._fs_cmd("subvolume", "rm", self.volname, subvolumes[i], "--group_name", group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_pin_random(self): + self.fs.set_max_mds(2) + self.fs.wait_for_daemons() + self.config_set('mds', 'mds_export_ephemeral_random', True) + + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + self._fs_cmd("subvolume", "pin", self.volname, subvolume, "random", ".01") + # no verification + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_fail_invalid_size(self): + """ + That a subvolume cannot be resized to an invalid size and the quota did not change + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024 + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # try to resize the subvolume with an invalid size -10 + nsize = -10 + try: + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size") + else: + self.fail("expected the 'fs subvolume resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_fail_zero_size(self): + """ + That a subvolume cannot be resized to a zero size and the quota did not change + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024 + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # try to resize the subvolume with size 0 + nsize = 0 + try: + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size") + else: + self.fail("expected the 'fs subvolume resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_quota_lt_used_size(self): + """ + That a subvolume can be resized to a size smaller than the current used size + and the resulting quota matches the expected size. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*20 + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # create one file of 10MB + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+1) + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + + usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes")) + susedsize = int(self.mount_a.run_shell(['stat', '-c' '%s', subvolpath]).stdout.getvalue().strip()) + if isinstance(self.mount_a, FuseMount): + # kclient dir does not have size==rbytes + self.assertEqual(usedsize, susedsize) + + # shrink the subvolume + nsize = usedsize // 2 + try: + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + except CommandFailedError: + self.fail("expected the 'fs subvolume resize' command to succeed") + + # verify the quota + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, nsize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_fail_quota_lt_used_size_no_shrink(self): + """ + That a subvolume cannot be resized to a size smaller than the current used size + when --no_shrink is given and the quota did not change. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*20 + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # create one file of 10MB + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+2) + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + + usedsize = int(self.mount_a.getfattr(subvolpath, "ceph.dir.rbytes")) + susedsize = int(self.mount_a.run_shell(['stat', '-c' '%s', subvolpath]).stdout.getvalue().strip()) + if isinstance(self.mount_a, FuseMount): + # kclient dir does not have size==rbytes + self.assertEqual(usedsize, susedsize) + + # shrink the subvolume + nsize = usedsize // 2 + try: + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize), "--no_shrink") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on resize of subvolume with invalid size") + else: + self.fail("expected the 'fs subvolume resize' command to fail") + + # verify the quota did not change + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, osize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_expand_on_full_subvolume(self): + """ + That the subvolume can be expanded from a full subvolume and future writes succeed. + """ + + osize = self.DEFAULT_FILE_SIZE*1024*1024*10 + # create subvolume of quota 10MB and make sure it exists + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777") + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # create one file of size 10MB and write + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+3) + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + + # create a file of size 5MB and try write more + file_size=file_size // 2 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+4) + try: + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + except CommandFailedError: + # Not able to write. So expand the subvolume more and try writing the 5MB file again + nsize = osize*2 + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + try: + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + except CommandFailedError: + self.fail("expected filling subvolume {0} with {1} file of size {2}MB" + "to succeed".format(subvolname, number_of_files, file_size)) + else: + self.fail("expected filling subvolume {0} with {1} file of size {2}MB" + "to fail".format(subvolname, number_of_files, file_size)) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_infinite_size(self): + """ + That a subvolume can be resized to an infinite size by unsetting its quota. + """ + + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", + str(self.DEFAULT_FILE_SIZE*1024*1024)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # resize inf + self._fs_cmd("subvolume", "resize", self.volname, subvolname, "inf") + + # verify that the quota is None + size = self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes") + self.assertEqual(size, None) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_resize_infinite_size_future_writes(self): + """ + That a subvolume can be resized to an infinite size and the future writes succeed. + """ + + # create subvolume + subvolname = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", + str(self.DEFAULT_FILE_SIZE*1024*1024*5), "--mode=777") + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # resize inf + self._fs_cmd("subvolume", "resize", self.volname, subvolname, "inf") + + # verify that the quota is None + size = self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes") + self.assertEqual(size, None) + + # create one file of 10MB and try to write + file_size=self.DEFAULT_FILE_SIZE*10 + number_of_files=1 + log.debug("filling subvolume {0} with {1} file of size {2}MB".format(subvolname, + number_of_files, + file_size)) + filename = "{0}.{1}".format(TestVolumes.TEST_FILE_NAME_PREFIX, self.DEFAULT_NUMBER_OF_FILES+5) + + try: + self.mount_a.write_n_mb(os.path.join(subvolpath, filename), file_size) + except CommandFailedError: + self.fail("expected filling subvolume {0} with {1} file of size {2}MB " + "to succeed".format(subvolname, number_of_files, file_size)) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_rm_force(self): + # test removing non-existing subvolume with --force + subvolume = self._generate_random_subvolume_name() + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force") + except CommandFailedError: + self.fail("expected the 'fs subvolume rm --force' command to succeed") + + def test_subvolume_exists_with_subvolumegroup_and_subvolume(self): + """Test the presence of any subvolume by specifying the name of subvolumegroup""" + + group = self._generate_random_group_name() + subvolume1 = self._generate_random_subvolume_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--group_name", group) + ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group) + self.assertEqual(ret.strip('\n'), "subvolume exists") + # delete subvolume in group + self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group) + ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group) + self.assertEqual(ret.strip('\n'), "no subvolume exists") + # delete subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_exists_with_subvolumegroup_and_no_subvolume(self): + """Test the presence of any subvolume specifying the name + of subvolumegroup and no subvolumes""" + + group = self._generate_random_group_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group) + self.assertEqual(ret.strip('\n'), "no subvolume exists") + # delete subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_exists_without_subvolumegroup_and_with_subvolume(self): + """Test the presence of any subvolume without specifying the name + of subvolumegroup""" + + subvolume1 = self._generate_random_subvolume_name() + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume1) + ret = self._fs_cmd("subvolume", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "subvolume exists") + # delete subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + ret = self._fs_cmd("subvolume", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolume exists") + + def test_subvolume_exists_without_subvolumegroup_and_without_subvolume(self): + """Test the presence of any subvolume without any subvolumegroup + and without any subvolume""" + + ret = self._fs_cmd("subvolume", "exist", self.volname) + self.assertEqual(ret.strip('\n'), "no subvolume exists") + + def test_subvolume_shrink(self): + """ + That a subvolume can be shrinked in size and its quota matches the expected size. + """ + + # create subvolume + subvolname = self._generate_random_subvolume_name() + osize = self.DEFAULT_FILE_SIZE*1024*1024 + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) + + # make sure it exists + subvolpath = self._get_subvolume_path(self.volname, subvolname) + self.assertNotEqual(subvolpath, None) + + # shrink the subvolume + nsize = osize // 2 + self._fs_cmd("subvolume", "resize", self.volname, subvolname, str(nsize)) + + # verify the quota + size = int(self.mount_a.getfattr(subvolpath, "ceph.quota.max_bytes")) + self.assertEqual(size, nsize) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_rm_idempotency(self): + """ + ensure subvolume deletion of a subvolume which is already deleted with retain snapshots option passes. + After subvolume deletion with retain snapshots, the subvolume exists until the trash directory (resides inside subvolume) + is cleaned up. The subvolume deletion issued while the trash directory is not empty, should pass and should + not error out with EAGAIN. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=256) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # remove snapshots (removes retained volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume (check idempotency) + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + self.fail(f"expected subvolume rm to pass with error: {os.strerror(ce.exitstatus)}") + + # verify trash dir is clean + self._wait_for_trash_empty() + + + def test_subvolume_user_metadata_set(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata set' command to succeed") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_set_idempotence(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata set' command to succeed") + + # set same metadata again for subvolume. + try: + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata set' command to succeed because it is idempotent operation") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_get(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # get value for specified key. + try: + ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(value, ret) + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_get_for_nonexisting_key(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # try to get value for nonexisting key + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key_nonexist", "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because 'key_nonexist' does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_get_for_nonexisting_section(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # try to get value for nonexisting key (as section does not exist) + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key", "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because section does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_update(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # update metadata against key. + new_value = "new_value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, new_value, "--group_name", group) + + # get metadata for specified key of subvolume. + try: + ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(new_value, ret) + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_list(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + input_metadata_dict = {f'key_{i}' : f'value_{i}' for i in range(3)} + + for k, v in input_metadata_dict.items(): + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, k, v, "--group_name", group) + + # list metadata + try: + ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata ls' command to succeed") + + ret_dict = json.loads(ret) + + # compare output with expected output + self.assertDictEqual(input_metadata_dict, ret_dict) + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_list_if_no_metadata_set(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # list metadata + try: + ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata ls' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # compare output with expected output + # expecting empty json/dictionary + self.assertEqual(ret, "{}") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_remove(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # remove metadata against specified key. + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_remove_for_nonexisting_key(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # try to remove value for nonexisting key + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key_nonexist", "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because 'key_nonexist' does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_remove_for_nonexisting_section(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # try to remove value for nonexisting key (as section does not exist) + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key", "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because section does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_remove_force(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # remove metadata against specified key with --force option. + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group, "--force") + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_remove_force_for_nonexisting_key(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) + + # set metadata for subvolume. + key = "key" + value = "value" + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + + # remove metadata against specified key. + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + # again remove metadata against already removed key with --force option. + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, key, "--group_name", group, "--force") + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata rm' (with --force) command to succeed") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_set_and_get_for_legacy_subvolume(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume in a custom group + createpath = os.path.join(".", "volumes", group, subvolname) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False) + + # set metadata for subvolume. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, key, value, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata set' command to succeed") + + # get value for specified key. + try: + ret = self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, key, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(value, ret) + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_user_metadata_list_and_remove_for_legacy_subvolume(self): + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume in a custom group + createpath = os.path.join(".", "volumes", group, subvolname) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False) + + # set metadata for subvolume. + input_metadata_dict = {f'key_{i}' : f'value_{i}' for i in range(3)} + + for k, v in input_metadata_dict.items(): + self._fs_cmd("subvolume", "metadata", "set", self.volname, subvolname, k, v, "--group_name", group) + + # list metadata + try: + ret = self._fs_cmd("subvolume", "metadata", "ls", self.volname, subvolname, "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata ls' command to succeed") + + ret_dict = json.loads(ret) + + # compare output with expected output + self.assertDictEqual(input_metadata_dict, ret_dict) + + # remove metadata against specified key. + try: + self._fs_cmd("subvolume", "metadata", "rm", self.volname, subvolname, "key_1", "--group_name", group) + except CommandFailedError: + self.fail("expected the 'fs subvolume metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "metadata", "get", self.volname, subvolname, "key_1", "--group_name", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key_1 does not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + +class TestSubvolumeGroupSnapshots(TestVolumesHelper): + """Tests for FS subvolume group snapshot operations.""" + @unittest.skip("skipping subvolumegroup snapshot tests") + def test_nonexistent_subvolume_group_snapshot_rm(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # snapshot group + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + + # remove snapshot + self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot) + + # remove snapshot + try: + self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolumegroup snapshot rm' command to fail") + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + @unittest.skip("skipping subvolumegroup snapshot tests") + def test_subvolume_group_snapshot_create_and_rm(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # snapshot group + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + + # remove snapshot + self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + @unittest.skip("skipping subvolumegroup snapshot tests") + def test_subvolume_group_snapshot_idempotence(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # snapshot group + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + + # try creating snapshot w/ same snapshot name -- shoule be idempotent + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + + # remove snapshot + self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + @unittest.skip("skipping subvolumegroup snapshot tests") + def test_subvolume_group_snapshot_ls(self): + # tests the 'fs subvolumegroup snapshot ls' command + + snapshots = [] + + # create group + group = self._generate_random_group_name() + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolumegroup snapshots + snapshots = self._generate_random_snapshot_name(3) + for snapshot in snapshots: + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + + subvolgrpsnapshotls = json.loads(self._fs_cmd('subvolumegroup', 'snapshot', 'ls', self.volname, group)) + if len(subvolgrpsnapshotls) == 0: + raise RuntimeError("Expected the 'fs subvolumegroup snapshot ls' command to list the created subvolume group snapshots") + else: + snapshotnames = [snapshot['name'] for snapshot in subvolgrpsnapshotls] + if collections.Counter(snapshotnames) != collections.Counter(snapshots): + raise RuntimeError("Error creating or listing subvolume group snapshots") + + @unittest.skip("skipping subvolumegroup snapshot tests") + def test_subvolume_group_snapshot_rm_force(self): + # test removing non-existing subvolume group snapshot with --force + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + # remove snapshot + try: + self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot, "--force") + except CommandFailedError: + raise RuntimeError("expected the 'fs subvolumegroup snapshot rm --force' command to succeed") + + def test_subvolume_group_snapshot_unsupported_status(self): + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # snapshot group + try: + self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOSYS, "invalid error code on subvolumegroup snapshot create") + else: + self.fail("expected subvolumegroup snapshot create command to fail") + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + +class TestSubvolumeSnapshots(TestVolumesHelper): + """Tests for FS subvolume snapshot operations.""" + def test_nonexistent_subvolume_snapshot_rm(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove snapshot again + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOENT: + raise + else: + raise RuntimeError("expected the 'fs subvolume snapshot rm' command to fail") + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_create_and_rm(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_create_idempotence(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # try creating w/ same subvolume snapshot name -- should be idempotent + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info(self): + + """ + tests the 'fs subvolume snapshot info' command + """ + + snap_md = ["created_at", "data_pool", "has_pending_clones"] + + subvolume = self._generate_random_subvolume_name() + snapshot, snap_missing = self._generate_random_snapshot_name(2) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=1) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot)) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # snapshot info for non-existent snapshot + try: + self._get_subvolume_snapshot_info(self.volname, subvolume, snap_missing) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot info of non-existent snapshot") + else: + self.fail("expected snapshot info of non-existent snapshot to fail") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_in_group(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # snapshot subvolume in group + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_snapshot_ls(self): + # tests the 'fs subvolume snapshot ls' command + + snapshots = [] + + # create subvolume + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # create subvolume snapshots + snapshots = self._generate_random_snapshot_name(3) + for snapshot in snapshots: + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume)) + if len(subvolsnapshotls) == 0: + self.fail("Expected the 'fs subvolume snapshot ls' command to list the created subvolume snapshots") + else: + snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls] + if collections.Counter(snapshotnames) != collections.Counter(snapshots): + self.fail("Error creating or listing subvolume snapshots") + + # remove snapshot + for snapshot in snapshots: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_inherited_snapshot_ls(self): + # tests the scenario where 'fs subvolume snapshot ls' command + # should not list inherited snapshots created as part of snapshot + # at ancestral level + + snapshots = [] + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snap_count = 3 + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # create subvolume snapshots + snapshots = self._generate_random_snapshot_name(snap_count) + for snapshot in snapshots: + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group) + + # Create snapshot at ancestral level + ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", "ancestral_snap_1") + ancestral_snappath2 = os.path.join(".", "volumes", group, ".snap", "ancestral_snap_2") + self.mount_a.run_shell(['sudo', 'mkdir', '-p', ancestral_snappath1, ancestral_snappath2], omit_sudo=False) + + subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume, group)) + self.assertEqual(len(subvolsnapshotls), snap_count) + + # remove ancestral snapshots + self.mount_a.run_shell(['sudo', 'rmdir', ancestral_snappath1, ancestral_snappath2], omit_sudo=False) + + # remove snapshot + for snapshot in snapshots: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_inherited_snapshot_info(self): + """ + tests the scenario where 'fs subvolume snapshot info' command + should fail for inherited snapshots created as part of snapshot + at ancestral level + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Create snapshot at ancestral level + ancestral_snap_name = "ancestral_snap_1" + ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", ancestral_snap_name) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', ancestral_snappath1], omit_sudo=False) + + # Validate existence of inherited snapshot + group_path = os.path.join(".", "volumes", group) + inode_number_group_dir = int(self.mount_a.run_shell(['stat', '-c' '%i', group_path]).stdout.getvalue().strip()) + inherited_snap = "_{0}_{1}".format(ancestral_snap_name, inode_number_group_dir) + inherited_snappath = os.path.join(".", "volumes", group, subvolume,".snap", inherited_snap) + self.mount_a.run_shell(['ls', inherited_snappath]) + + # snapshot info on inherited snapshot + try: + self._get_subvolume_snapshot_info(self.volname, subvolume, inherited_snap, group) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on snapshot info of inherited snapshot") + else: + self.fail("expected snapshot info of inherited snapshot to fail") + + # remove ancestral snapshots + self.mount_a.run_shell(['sudo', 'rmdir', ancestral_snappath1], omit_sudo=False) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_inherited_snapshot_rm(self): + """ + tests the scenario where 'fs subvolume snapshot rm' command + should fail for inherited snapshots created as part of snapshot + at ancestral level + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Create snapshot at ancestral level + ancestral_snap_name = "ancestral_snap_1" + ancestral_snappath1 = os.path.join(".", "volumes", group, ".snap", ancestral_snap_name) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', ancestral_snappath1], omit_sudo=False) + + # Validate existence of inherited snap + group_path = os.path.join(".", "volumes", group) + inode_number_group_dir = int(self.mount_a.run_shell(['stat', '-c' '%i', group_path]).stdout.getvalue().strip()) + inherited_snap = "_{0}_{1}".format(ancestral_snap_name, inode_number_group_dir) + inherited_snappath = os.path.join(".", "volumes", group, subvolume,".snap", inherited_snap) + self.mount_a.run_shell(['ls', inherited_snappath]) + + # inherited snapshot should not be deletable + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, inherited_snap, "--group_name", group) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, msg="invalid error code when removing inherited snapshot") + else: + self.fail("expected removing inheirted snapshot to fail") + + # remove ancestral snapshots + self.mount_a.run_shell(['sudo', 'rmdir', ancestral_snappath1], omit_sudo=False) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_subvolumegroup_snapshot_name_conflict(self): + """ + tests the scenario where creation of subvolume snapshot name + with same name as it's subvolumegroup snapshot name. This should + fail. + """ + + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + group_snapshot = self._generate_random_snapshot_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + + # Create subvolumegroup snapshot + group_snapshot_path = os.path.join(".", "volumes", group, ".snap", group_snapshot) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', group_snapshot_path], omit_sudo=False) + + # Validate existence of subvolumegroup snapshot + self.mount_a.run_shell(['ls', group_snapshot_path]) + + # Creation of subvolume snapshot with it's subvolumegroup snapshot name should fail + try: + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, group_snapshot, "--group_name", group) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, msg="invalid error code when creating subvolume snapshot with same name as subvolume group snapshot") + else: + self.fail("expected subvolume snapshot creation with same name as subvolumegroup snapshot to fail") + + # remove subvolumegroup snapshot + self.mount_a.run_shell(['sudo', 'rmdir', group_snapshot_path], omit_sudo=False) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_retain_snapshot_invalid_recreate(self): + """ + ensure retained subvolume recreate does not leave any incarnations in the subvolume and trash + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # recreate subvolume with an invalid pool + data_pool = "invalid_pool" + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--pool_layout", data_pool) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on recreate of subvolume with invalid poolname") + else: + self.fail("expected recreate of subvolume with invalid poolname to fail") + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "snapshot-retained", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + # getpath + try: + self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots") + else: + self.fail("expected getpath of subvolume with retained snapshots to fail") + + # remove snapshot (should remove volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_recreate_subvolume(self): + """ + ensure a retained subvolume can be recreated and further snapshotted + """ + snap_md = ["created_at", "data_pool", "has_pending_clones"] + + subvolume = self._generate_random_subvolume_name() + snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "snapshot-retained", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + # recreate retained subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "complete", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + # snapshot info (older snapshot) + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot1)) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # snap-create (new snapshot) + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2) + + # remove with retain snapshots + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # list snapshots + subvolsnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume)) + self.assertEqual(len(subvolsnapshotls), 2, "Expected the 'fs subvolume snapshot ls' command to list the" + " created subvolume snapshots") + snapshotnames = [snapshot['name'] for snapshot in subvolsnapshotls] + for snap in [snapshot1, snapshot2]: + self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap)) + + # remove snapshots (should remove volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_with_snapshots(self): + """ + ensure retain snapshots based delete of a subvolume with snapshots retains the subvolume + also test allowed and dis-allowed operations on a retained subvolume + """ + snap_md = ["created_at", "data_pool", "has_pending_clones"] + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove subvolume -- should fail with ENOTEMPTY since it has snapshots + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of retained subvolume with snapshots") + else: + self.fail("expected rm of subvolume with retained snapshots to fail") + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # fetch info + subvol_info = json.loads(self._fs_cmd("subvolume", "info", self.volname, subvolume)) + self.assertEqual(subvol_info["state"], "snapshot-retained", + msg="expected state to be 'snapshot-retained', found '{0}".format(subvol_info["state"])) + + ## test allowed ops in retained state + # ls + subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes))) + self.assertEqual(subvolumes[0]['name'], subvolume, + "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name'])) + + # snapshot info + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot)) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # rm --force (allowed but should fail) + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots") + else: + self.fail("expected rm of subvolume with retained snapshots to fail") + + # rm (allowed but should fail) + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on rm of subvolume with retained snapshots") + else: + self.fail("expected rm of subvolume with retained snapshots to fail") + + ## test disallowed ops + # getpath + try: + self._fs_cmd("subvolume", "getpath", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on getpath of subvolume with retained snapshots") + else: + self.fail("expected getpath of subvolume with retained snapshots to fail") + + # resize + nsize = self.DEFAULT_FILE_SIZE*1024*1024 + try: + self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize)) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on resize of subvolume with retained snapshots") + else: + self.fail("expected resize of subvolume with retained snapshots to fail") + + # snap-create + try: + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, "fail") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on snapshot create of subvolume with retained snapshots") + else: + self.fail("expected snapshot create of subvolume with retained snapshots to fail") + + # remove snapshot (should remove volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_without_snapshots(self): + """ + ensure retain snapshots based delete of a subvolume with no snapshots, deletes the subbvolume + """ + subvolume = self._generate_random_subvolume_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # remove with snapshot retention (should remove volume, no snapshots to retain) + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_trash_busy_recreate(self): + """ + ensure retained subvolume recreate fails if its trash is not yet purged + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # fake a trash entry + self._update_fake_trash(subvolume) + + # recreate subvolume + try: + self._fs_cmd("subvolume", "create", self.volname, subvolume) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of subvolume with purge pending") + else: + self.fail("expected recreate of subvolume with purge pending to fail") + + # clear fake trash entry + self._update_fake_trash(subvolume, create=False) + + # recreate subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_rm_with_snapshots(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove subvolume -- should fail with ENOTEMPTY since it has snapshots + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOTEMPTY: + raise RuntimeError("invalid error code returned when deleting subvolume with snapshots") + else: + raise RuntimeError("expected subvolume deletion to fail") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_protect_unprotect_sanity(self): + """ + Snapshot protect/unprotect commands are deprecated. This test exists to ensure that + invoking the command does not cause errors, till they are removed from a subsequent release. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # now, protect snapshot + self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # now, unprotect snapshot + self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_rm_force(self): + # test removing non existing subvolume snapshot with --force + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # remove snapshot + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, "--force") + except CommandFailedError: + raise RuntimeError("expected the 'fs subvolume snapshot rm --force' command to succeed") + + def test_subvolume_snapshot_metadata_set(self): + """ + Set custom metadata for subvolume snapshot. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_set_idempotence(self): + """ + Set custom metadata for subvolume snapshot (Idempotency). + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed") + + # set same metadata again for subvolume. + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed because it is idempotent operation") + + # get value for specified key. + try: + ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(value, ret) + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_get(self): + """ + Get custom metadata for a specified key in subvolume snapshot metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # get value for specified key. + try: + ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(value, ret) + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_get_for_nonexisting_key(self): + """ + Get custom metadata for subvolume snapshot if specified key not exist in metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # try to get value for nonexisting key + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, "key_nonexist", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because 'key_nonexist' does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_get_for_nonexisting_section(self): + """ + Get custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # try to get value for nonexisting key (as section does not exist) + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, "key", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because section does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_update(self): + """ + Update custom metadata for a specified key in subvolume snapshot metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # update metadata against key. + new_value = "new_value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, new_value, group) + + # get metadata for specified key of snapshot. + try: + ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata get' command to succeed") + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(new_value, ret) + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_list(self): + """ + List custom metadata for subvolume snapshot. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for subvolume. + input_metadata_dict = {f'key_{i}' : f'value_{i}' for i in range(3)} + + for k, v in input_metadata_dict.items(): + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, k, v, group) + + # list metadata + try: + ret_dict = json.loads(self._fs_cmd("subvolume", "snapshot", "metadata", "ls", self.volname, subvolname, snapshot, group)) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata ls' command to succeed") + + # compare output with expected output + self.assertDictEqual(input_metadata_dict, ret_dict) + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_list_if_no_metadata_set(self): + """ + List custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # list metadata + try: + ret_dict = json.loads(self._fs_cmd("subvolume", "snapshot", "metadata", "ls", self.volname, subvolname, snapshot, group)) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata ls' command to succeed") + + # compare output with expected output + empty_dict = {} + self.assertDictEqual(ret_dict, empty_dict) + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_remove(self): + """ + Remove custom metadata for a specified key in subvolume snapshot metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # remove metadata against specified key. + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, key, snapshot, group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_remove_for_nonexisting_key(self): + """ + Remove custom metadata for subvolume snapshot if specified key not exist in metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # try to remove value for nonexisting key + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, "key_nonexist", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because 'key_nonexist' does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_remove_for_nonexisting_section(self): + """ + Remove custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # try to remove value for nonexisting key (as section does not exist) + # Expecting ENOENT exit status because key does not exist + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, "key", group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because section does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_remove_force(self): + """ + Forcefully remove custom metadata for a specified key in subvolume snapshot metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # remove metadata against specified key with --force option. + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group, "--force") + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_remove_force_for_nonexisting_key(self): + """ + Forcefully remove custom metadata for subvolume snapshot if specified key not exist in metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # remove metadata against specified key. + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata rm' command to succeed") + + # confirm key is removed by again fetching metadata + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + self.fail("Expected ENOENT because key does not exist") + + # again remove metadata against already removed key with --force option. + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "rm", self.volname, subvolname, snapshot, key, group, "--force") + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata rm' (with --force) command to succeed") + + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_subvolume_snapshot_metadata_after_snapshot_remove(self): + """ + Verify metadata removal of subvolume snapshot after snapshot removal. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + + # get value for specified key. + ret = self._fs_cmd("subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group) + + # remove '\n' from returned value. + ret = ret.strip('\n') + + # match received value with expected value. + self.assertEqual(value, ret) + + # remove subvolume snapshot. + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + + # try to get metadata after removing snapshot. + # Expecting error ENOENT with error message of snapshot does not exist + cmd_ret = self.mgr_cluster.mon_manager.run_cluster_cmd( + args=["fs", "subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group], + check_status=False, stdout=StringIO(), stderr=StringIO()) + self.assertEqual(cmd_ret.returncode, errno.ENOENT, "Expecting ENOENT error") + self.assertIn(f"snapshot '{snapshot}' does not exist", cmd_ret.stderr.getvalue(), + f"Expecting message: snapshot '{snapshot}' does not exist ") + + # confirm metadata is removed by searching section name in .meta file + meta_path = os.path.join(".", "volumes", group, subvolname, ".meta") + section_name = "SNAP_METADATA_" + snapshot + + try: + self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False) + except CommandFailedError as e: + self.assertNotEqual(e.exitstatus, 0) + else: + self.fail("Expected non-zero exist status because section should not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + + def test_clean_stale_subvolume_snapshot_metadata(self): + """ + Validate cleaning of stale subvolume snapshot metadata. + """ + subvolname = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + + # create group. + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume in group. + self._fs_cmd("subvolume", "create", self.volname, subvolname, group) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolname, snapshot, group) + + # set metadata for snapshot. + key = "key" + value = "value" + try: + self._fs_cmd("subvolume", "snapshot", "metadata", "set", self.volname, subvolname, snapshot, key, value, group) + except CommandFailedError: + self.fail("expected the 'fs subvolume snapshot metadata set' command to succeed") + + # save the subvolume config file. + meta_path = os.path.join(".", "volumes", group, subvolname, ".meta") + tmp_meta_path = os.path.join(".", "volumes", group, subvolname, ".meta.stale_snap_section") + self.mount_a.run_shell(['sudo', 'cp', '-p', meta_path, tmp_meta_path], omit_sudo=False) + + # Delete snapshot, this would remove user snap metadata + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolname, snapshot, group) + + # Copy back saved subvolume config file. This would have stale snapshot metadata + self.mount_a.run_shell(['sudo', 'cp', '-p', tmp_meta_path, meta_path], omit_sudo=False) + + # Verify that it has stale snapshot metadata + section_name = "SNAP_METADATA_" + snapshot + try: + self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False) + except CommandFailedError: + self.fail("Expected grep cmd to succeed because stale snapshot metadata exist") + + # Do any subvolume operation to clean the stale snapshot metadata + _ = json.loads(self._get_subvolume_info(self.volname, subvolname, group)) + + # Verify that the stale snapshot metadata is cleaned + try: + self.mount_a.run_shell(f"sudo grep {section_name} {meta_path}", omit_sudo=False) + except CommandFailedError as e: + self.assertNotEqual(e.exitstatus, 0) + else: + self.fail("Expected non-zero exist status because stale snapshot metadata should not exist") + + self._fs_cmd("subvolume", "rm", self.volname, subvolname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean. + self._wait_for_trash_empty() + # Clean tmp config file + self.mount_a.run_shell(['sudo', 'rm', '-f', tmp_meta_path], omit_sudo=False) + + +class TestSubvolumeSnapshotClones(TestVolumesHelper): + """ Tests for FS subvolume snapshot clone operations.""" + def test_clone_subvolume_info(self): + # tests the 'fs subvolume info' command for a clone + subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime", + "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace", + "type", "uid"] + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=1) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + subvol_info = json.loads(self._get_subvolume_info(self.volname, clone)) + if len(subvol_info) == 0: + raise RuntimeError("Expected the 'fs subvolume info' command to list metadata of subvolume") + for md in subvol_md: + if md not in subvol_info.keys(): + raise RuntimeError("%s not present in the metadata of subvolume" % md) + if subvol_info["type"] != "clone": + raise RuntimeError("type should be set to clone") + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info_without_snapshot_clone(self): + """ + Verify subvolume snapshot info output without cloning snapshot. + If no clone is performed then path /volumes/_index/clone/{track_id} + will not exist. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume. + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # list snapshot info + result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot)) + + # verify snapshot info + self.assertEqual(result['has_pending_clones'], "no") + self.assertFalse('orphan_clones_count' in result) + self.assertFalse('pending_clones' in result) + + # remove snapshot, subvolume, clone + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info_if_no_clone_pending(self): + """ + Verify subvolume snapshot info output if no clone is in pending state. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone_list = [f'clone_{i}' for i in range(3)] + + # create subvolume. + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clones + for clone in clone_list: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clones status + for clone in clone_list: + self._wait_for_clone_to_complete(clone) + + # list snapshot info + result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot)) + + # verify snapshot info + self.assertEqual(result['has_pending_clones'], "no") + self.assertFalse('orphan_clones_count' in result) + self.assertFalse('pending_clones' in result) + + # remove snapshot, subvolume, clone + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + for clone in clone_list: + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info_if_clone_pending_for_no_group(self): + """ + Verify subvolume snapshot info output if clones are in pending state. + Clones are not specified for particular target_group. Hence target_group + should not be in the output as we don't show _nogroup (default group) + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone_list = [f'clone_{i}' for i in range(3)] + + # create subvolume. + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clones + for clone in clone_list: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # list snapshot info + result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot)) + + # verify snapshot info + expected_clone_list = [] + for clone in clone_list: + expected_clone_list.append({"name": clone}) + self.assertEqual(result['has_pending_clones'], "yes") + self.assertFalse('orphan_clones_count' in result) + self.assertListEqual(result['pending_clones'], expected_clone_list) + self.assertEqual(len(result['pending_clones']), 3) + + # check clones status + for clone in clone_list: + self._wait_for_clone_to_complete(clone) + + # remove snapshot, subvolume, clone + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + for clone in clone_list: + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info_if_clone_pending_for_target_group(self): + """ + Verify subvolume snapshot info output if clones are in pending state. + Clones are not specified for target_group. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + group = self._generate_random_group_name() + target_group = self._generate_random_group_name() + + # create groups + self._fs_cmd("subvolumegroup", "create", self.volname, group) + self._fs_cmd("subvolumegroup", "create", self.volname, target_group) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, group, "--mode=777") + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group) + + # insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, + "--group_name", group, "--target_group_name", target_group) + + # list snapshot info + result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot, "--group_name", group)) + + # verify snapshot info + expected_clone_list = [{"name": clone, "target_group": target_group}] + self.assertEqual(result['has_pending_clones'], "yes") + self.assertFalse('orphan_clones_count' in result) + self.assertListEqual(result['pending_clones'], expected_clone_list) + self.assertEqual(len(result['pending_clones']), 1) + + # check clone status + self._wait_for_clone_to_complete(clone, clone_group=target_group) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + self._fs_cmd("subvolume", "rm", self.volname, clone, target_group) + + # remove groups + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, target_group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_info_if_orphan_clone(self): + """ + Verify subvolume snapshot info output if orphan clones exists. + Orphan clones should not list under pending clones. + orphan_clones_count should display correct count of orphan clones' + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone_list = [f'clone_{i}' for i in range(3)] + + # create subvolume. + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 15) + + # schedule a clones + for clone in clone_list: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # remove track file for third clone to make it orphan + meta_path = os.path.join(".", "volumes", "_nogroup", subvolume, ".meta") + pending_clones_result = self.mount_a.run_shell(['sudo', 'grep', 'clone snaps', '-A3', meta_path], omit_sudo=False, stdout=StringIO(), stderr=StringIO()) + third_clone_track_id = pending_clones_result.stdout.getvalue().splitlines()[3].split(" = ")[0] + third_clone_track_path = os.path.join(".", "volumes", "_index", "clone", third_clone_track_id) + self.mount_a.run_shell(f"sudo rm -f {third_clone_track_path}", omit_sudo=False) + + # list snapshot info + result = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot)) + + # verify snapshot info + expected_clone_list = [] + for i in range(len(clone_list)-1): + expected_clone_list.append({"name": clone_list[i]}) + self.assertEqual(result['has_pending_clones'], "yes") + self.assertEqual(result['orphan_clones_count'], 1) + self.assertListEqual(result['pending_clones'], expected_clone_list) + self.assertEqual(len(result['pending_clones']), 2) + + # check clones status + for i in range(len(clone_list)-1): + self._wait_for_clone_to_complete(clone_list[i]) + + # list snapshot info after cloning completion + res = json.loads(self._fs_cmd("subvolume", "snapshot", "info", self.volname, subvolume, snapshot)) + + # verify snapshot info (has_pending_clones should be no) + self.assertEqual(res['has_pending_clones'], "no") + + def test_non_clone_status(self): + subvolume = self._generate_random_subvolume_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + try: + self._fs_cmd("clone", "status", self.volname, subvolume) + except CommandFailedError as ce: + if ce.exitstatus != errno.ENOTSUP: + raise RuntimeError("invalid error code when fetching status of a non cloned subvolume") + else: + raise RuntimeError("expected fetching of clone status of a subvolume to fail") + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_inherit_snapshot_namespace_and_size(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + osize = self.DEFAULT_FILE_SIZE*1024*1024*12 + + # create subvolume, in an isolated namespace with a specified size + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated", "--size", str(osize), "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=8) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # create a pool different from current subvolume pool + subvol_path = self._get_subvolume_path(self.volname, subvolume) + default_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool") + new_pool = "new_pool" + self.assertNotEqual(default_pool, new_pool) + self.fs.add_data_pool(new_pool) + + # update source subvolume pool + self._do_subvolume_pool_and_namespace_update(subvolume, pool=new_pool, pool_namespace="") + + # schedule a clone, with NO --pool specification + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_inherit_quota_attrs(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + osize = self.DEFAULT_FILE_SIZE*1024*1024*12 + + # create subvolume with a specified size + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777", "--size", str(osize)) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=8) + + # get subvolume path + subvolpath = self._get_subvolume_path(self.volname, subvolume) + + # set quota on number of files + self.mount_a.setfattr(subvolpath, 'ceph.quota.max_files', "20", sudo=True) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # get subvolume path + clonepath = self._get_subvolume_path(self.volname, clone) + + # verify quota max_files is inherited from source snapshot + subvol_quota = self.mount_a.getfattr(subvolpath, "ceph.quota.max_files") + clone_quota = self.mount_a.getfattr(clonepath, "ceph.quota.max_files") + self.assertEqual(subvol_quota, clone_quota) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_in_progress_getpath(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # clone should not be accessible right now + try: + self._get_subvolume_path(self.volname, clone) + except CommandFailedError as ce: + if ce.exitstatus != errno.EAGAIN: + raise RuntimeError("invalid error code when fetching path of an pending clone") + else: + raise RuntimeError("expected fetching path of an pending clone to fail") + + # check clone status + self._wait_for_clone_to_complete(clone) + + # clone should be accessible now + subvolpath = self._get_subvolume_path(self.volname, clone) + self.assertNotEqual(subvolpath, None) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_in_progress_snapshot_rm(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # snapshot should not be deletable now + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone") + else: + self.fail("expected removing source snapshot of a clone to fail") + + # check clone status + self._wait_for_clone_to_complete(clone) + + # clone should be accessible now + subvolpath = self._get_subvolume_path(self.volname, clone) + self.assertNotEqual(subvolpath, None) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_in_progress_source(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # verify clone source + result = json.loads(self._fs_cmd("clone", "status", self.volname, clone)) + source = result['status']['source'] + self.assertEqual(source['volume'], self.volname) + self.assertEqual(source['subvolume'], subvolume) + self.assertEqual(source.get('group', None), None) + self.assertEqual(source['snapshot'], snapshot) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # clone should be accessible now + subvolpath = self._get_subvolume_path(self.volname, clone) + self.assertNotEqual(subvolpath, None) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_clone_retain_snapshot_with_snapshots(self): + """ + retain snapshots of a cloned subvolume and check disallowed operations + """ + subvolume = self._generate_random_subvolume_name() + snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # store path for clone verification + subvol1_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # clone retained subvolume snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot1, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot1, clone, subvol_path=subvol1_path) + + # create a snapshot on the clone + self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot2) + + # retain a clone + self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots") + + # list snapshots + clonesnapshotls = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, clone)) + self.assertEqual(len(clonesnapshotls), 1, "Expected the 'fs subvolume snapshot ls' command to list the" + " created subvolume snapshots") + snapshotnames = [snapshot['name'] for snapshot in clonesnapshotls] + for snap in [snapshot2]: + self.assertIn(snap, snapshotnames, "Missing snapshot '{0}' in snapshot list".format(snap)) + + ## check disallowed operations on retained clone + # clone-status + try: + self._fs_cmd("clone", "status", self.volname, clone) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone status of clone with retained snapshots") + else: + self.fail("expected clone status of clone with retained snapshots to fail") + + # clone-cancel + try: + self._fs_cmd("clone", "cancel", self.volname, clone) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on clone cancel of clone with retained snapshots") + else: + self.fail("expected clone cancel of clone with retained snapshots to fail") + + # remove snapshots (removes subvolumes as all are in retained state) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot2) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_clone(self): + """ + clone a snapshot from a snapshot retained subvolume + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # store path for clone verification + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # clone retained subvolume snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, subvol_path=subvol_path) + + # remove snapshots (removes retained volume) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_clone_from_newer_snapshot(self): + """ + clone a subvolume from recreated subvolume's latest snapshot + """ + subvolume = self._generate_random_subvolume_name() + snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + clone = self._generate_random_clone_name(1) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot1) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # recreate subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # get and store path for clone verification + subvol2_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot newer subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot2) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # clone retained subvolume's newer snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot2, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot2, clone, subvol_path=subvol2_path) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot1) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot2) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_recreate(self): + """ + recreate a subvolume from one of its retained snapshots + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # store path for clone verification + subvol_path = self._get_subvolume_path(self.volname, subvolume) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=16) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # remove with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--retain-snapshots") + + # recreate retained subvolume using its own snapshot to clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, subvolume) + + # check clone status + self._wait_for_clone_to_complete(subvolume) + + # verify clone + self._verify_clone(subvolume, snapshot, subvolume, subvol_path=subvol_path) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_retain_snapshot_trash_busy_recreate_clone(self): + """ + ensure retained clone recreate fails if its trash is not yet purged + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # clone subvolume snapshot + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # snapshot clone + self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone, snapshot) + + # remove clone with snapshot retention + self._fs_cmd("subvolume", "rm", self.volname, clone, "--retain-snapshots") + + # fake a trash entry + self._update_fake_trash(clone) + + # clone subvolume snapshot (recreate) + try: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on recreate of clone with purge pending") + else: + self.fail("expected recreate of clone with purge pending to fail") + + # clear fake trash entry + self._update_fake_trash(clone, create=False) + + # recreate subvolume + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone, snapshot) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_attr_clone(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io_mixed(subvolume) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_clone_failure_status_pending_in_progress_complete(self): + """ + ensure failure status is not shown when clone is not in failed/cancelled state + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1 = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=200) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clone1 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # pending clone shouldn't show failure status + clone1_result = self._get_clone_status(clone1) + try: + clone1_result["status"]["failure"]["errno"] + except KeyError as e: + self.assertEqual(str(e), "'failure'") + else: + self.fail("clone status shouldn't show failure for pending clone") + + # check clone1 to be in-progress + self._wait_for_clone_to_be_in_progress(clone1) + + # in-progress clone1 shouldn't show failure status + clone1_result = self._get_clone_status(clone1) + try: + clone1_result["status"]["failure"]["errno"] + except KeyError as e: + self.assertEqual(str(e), "'failure'") + else: + self.fail("clone status shouldn't show failure for in-progress clone") + + # wait for clone1 to complete + self._wait_for_clone_to_complete(clone1) + + # complete clone1 shouldn't show failure status + clone1_result = self._get_clone_status(clone1) + try: + clone1_result["status"]["failure"]["errno"] + except KeyError as e: + self.assertEqual(str(e), "'failure'") + else: + self.fail("clone status shouldn't show failure for complete clone") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone1) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_clone_failure_status_failed(self): + """ + ensure failure status is shown when clone is in failed state and validate the reason + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1 = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=200) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clone1 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # remove snapshot from backend to force the clone failure. + snappath = os.path.join(".", "volumes", "_nogroup", subvolume, ".snap", snapshot) + self.mount_a.run_shell(['sudo', 'rmdir', snappath], omit_sudo=False) + + # wait for clone1 to fail. + self._wait_for_clone_to_fail(clone1) + + # check clone1 status + clone1_result = self._get_clone_status(clone1) + self.assertEqual(clone1_result["status"]["state"], "failed") + self.assertEqual(clone1_result["status"]["failure"]["errno"], "2") + self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "snapshot '{0}' does not exist".format(snapshot)) + + # clone removal should succeed after failure, remove clone1 + self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force") + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_clone_failure_status_pending_cancelled(self): + """ + ensure failure status is shown when clone is cancelled during pending state and validate the reason + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1 = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=200) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clone1 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # cancel pending clone1 + self._fs_cmd("clone", "cancel", self.volname, clone1) + + # check clone1 status + clone1_result = self._get_clone_status(clone1) + self.assertEqual(clone1_result["status"]["state"], "canceled") + self.assertEqual(clone1_result["status"]["failure"]["errno"], "4") + self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "user interrupted clone operation") + + # clone removal should succeed with force after cancelled, remove clone1 + self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_clone_failure_status_in_progress_cancelled(self): + """ + ensure failure status is shown when clone is cancelled during in-progress state and validate the reason + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1 = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=200) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + + # schedule a clone1 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # wait for clone1 to be in-progress + self._wait_for_clone_to_be_in_progress(clone1) + + # cancel in-progess clone1 + self._fs_cmd("clone", "cancel", self.volname, clone1) + + # check clone1 status + clone1_result = self._get_clone_status(clone1) + self.assertEqual(clone1_result["status"]["state"], "canceled") + self.assertEqual(clone1_result["status"]["failure"]["errno"], "4") + self.assertEqual(clone1_result["status"]["failure"]["error_msg"], "user interrupted clone operation") + + # clone removal should succeed with force after cancelled, remove clone1 + self._fs_cmd("subvolume", "rm", self.volname, clone1, "--force") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_quota_exceeded(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume with 20MB quota + osize = self.DEFAULT_FILE_SIZE*1024*1024*20 + self._fs_cmd("subvolume", "create", self.volname, subvolume,"--mode=777", "--size", str(osize)) + + # do IO, write 50 files of 1MB each to exceed quota. This mostly succeeds as quota enforcement takes time. + try: + self._do_subvolume_io(subvolume, number_of_files=50) + except CommandFailedError: + # ignore quota enforcement error. + pass + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_in_complete_clone_rm(self): + """ + Validates the removal of clone when it is not in 'complete|cancelled|failed' state. + The forceful removl of subvolume clone succeeds only if it's in any of the + 'complete|cancelled|failed' states. It fails with EAGAIN in any other states. + """ + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # Use --force since clone is not complete. Returns EAGAIN as clone is not either complete or cancelled. + try: + self._fs_cmd("subvolume", "rm", self.volname, clone, "--force") + except CommandFailedError as ce: + if ce.exitstatus != errno.EAGAIN: + raise RuntimeError("invalid error code when trying to remove failed clone") + else: + raise RuntimeError("expected error when removing a failed clone") + + # cancel on-going clone + self._fs_cmd("clone", "cancel", self.volname, clone) + + # verify canceled state + self._check_clone_canceled(clone) + + # clone removal should succeed after cancel + self._fs_cmd("subvolume", "rm", self.volname, clone, "--force") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_retain_suid_guid(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # Create a file with suid, guid bits set along with executable bit. + args = ["subvolume", "getpath", self.volname, subvolume] + args = tuple(args) + subvolpath = self._fs_cmd(*args) + self.assertNotEqual(subvolpath, None) + subvolpath = subvolpath[1:].rstrip() # remove "/" prefix and any trailing newline + + file_path = subvolpath + file_path = os.path.join(subvolpath, "test_suid_file") + self.mount_a.run_shell(["touch", file_path]) + self.mount_a.run_shell(["chmod", "u+sx,g+sx", file_path]) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_and_reclone(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1, clone2 = self._generate_random_clone_name(2) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # check clone status + self._wait_for_clone_to_complete(clone1) + + # verify clone + self._verify_clone(subvolume, snapshot, clone1) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # now the clone is just like a normal subvolume -- snapshot the clone and fork + # another clone. before that do some IO so it's can be differentiated. + self._do_subvolume_io(clone1, create_dir="data", number_of_files=32) + + # snapshot clone -- use same snap name + self._fs_cmd("subvolume", "snapshot", "create", self.volname, clone1, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, clone1, snapshot, clone2) + + # check clone status + self._wait_for_clone_to_complete(clone2) + + # verify clone + self._verify_clone(clone1, snapshot, clone2) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, clone1, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone1) + self._fs_cmd("subvolume", "rm", self.volname, clone2) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_cancel_in_progress(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=128) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # cancel on-going clone + self._fs_cmd("clone", "cancel", self.volname, clone) + + # verify canceled state + self._check_clone_canceled(clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone, "--force") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_cancel_pending(self): + """ + this test is a bit more involved compared to canceling an in-progress clone. + we'd need to ensure that a to-be canceled clone has still not been picked up + by cloner threads. exploit the fact that clones are picked up in an FCFS + fashion and there are four (4) cloner threads by default. When the number of + cloner threads increase, this test _may_ start tripping -- so, the number of + clone operations would need to be jacked up. + """ + # default number of clone threads + NR_THREADS = 4 + # good enough for 4 threads + NR_CLONES = 5 + # yeh, 1gig -- we need the clone to run for sometime + FILE_SIZE_MB = 1024 + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clones = self._generate_random_clone_name(NR_CLONES) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=4, file_size=FILE_SIZE_MB) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule clones + for clone in clones: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + to_wait = clones[0:NR_THREADS] + to_cancel = clones[NR_THREADS:] + + # cancel pending clones and verify + for clone in to_cancel: + status = json.loads(self._fs_cmd("clone", "status", self.volname, clone)) + self.assertEqual(status["status"]["state"], "pending") + self._fs_cmd("clone", "cancel", self.volname, clone) + self._check_clone_canceled(clone) + + # let's cancel on-going clones. handle the case where some of the clones + # _just_ complete + for clone in list(to_wait): + try: + self._fs_cmd("clone", "cancel", self.volname, clone) + to_cancel.append(clone) + to_wait.remove(clone) + except CommandFailedError as ce: + if ce.exitstatus != errno.EINVAL: + raise RuntimeError("invalid error code when cancelling on-going clone") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + for clone in to_wait: + self._fs_cmd("subvolume", "rm", self.volname, clone) + for clone in to_cancel: + self._fs_cmd("subvolume", "rm", self.volname, clone, "--force") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_different_groups(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + s_group, c_group = self._generate_random_group_name(2) + + # create groups + self._fs_cmd("subvolumegroup", "create", self.volname, s_group) + self._fs_cmd("subvolumegroup", "create", self.volname, c_group) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, s_group, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, subvolume_group=s_group, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, s_group) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, + '--group_name', s_group, '--target_group_name', c_group) + + # check clone status + self._wait_for_clone_to_complete(clone, clone_group=c_group) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, source_group=s_group, clone_group=c_group) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, s_group) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume, s_group) + self._fs_cmd("subvolume", "rm", self.volname, clone, c_group) + + # remove groups + self._fs_cmd("subvolumegroup", "rm", self.volname, s_group) + self._fs_cmd("subvolumegroup", "rm", self.volname, c_group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_fail_with_remove(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1, clone2 = self._generate_random_clone_name(2) + + pool_capacity = 32 * 1024 * 1024 + # number of files required to fill up 99% of the pool + nr_files = int((pool_capacity * 0.99) / (TestVolumes.DEFAULT_FILE_SIZE * 1024 * 1024)) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=nr_files) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # add data pool + new_pool = "new_pool" + self.fs.add_data_pool(new_pool) + + self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", new_pool, + "max_bytes", "{0}".format(pool_capacity // 4)) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1, "--pool_layout", new_pool) + + # check clone status -- this should dramatically overshoot the pool quota + self._wait_for_clone_to_complete(clone1) + + # verify clone + self._verify_clone(subvolume, snapshot, clone1, clone_pool=new_pool) + + # wait a bit so that subsequent I/O will give pool full error + time.sleep(120) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone2, "--pool_layout", new_pool) + + # check clone status + self._wait_for_clone_to_fail(clone2) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone1) + try: + self._fs_cmd("subvolume", "rm", self.volname, clone2) + except CommandFailedError as ce: + if ce.exitstatus != errno.EAGAIN: + raise RuntimeError("invalid error code when trying to remove failed clone") + else: + raise RuntimeError("expected error when removing a failed clone") + + # ... and with force, failed clone can be removed + self._fs_cmd("subvolume", "rm", self.volname, clone2, "--force") + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_on_existing_subvolumes(self): + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create subvolumes + self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--mode=777") + self._fs_cmd("subvolume", "create", self.volname, subvolume2, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume1, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume1, snapshot) + + # schedule a clone with target as subvolume2 + try: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, subvolume2) + except CommandFailedError as ce: + if ce.exitstatus != errno.EEXIST: + raise RuntimeError("invalid error code when cloning to existing subvolume") + else: + raise RuntimeError("expected cloning to fail if the target is an existing subvolume") + + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, clone) + + # schedule a clone with target as clone + try: + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume1, snapshot, clone) + except CommandFailedError as ce: + if ce.exitstatus != errno.EEXIST: + raise RuntimeError("invalid error code when cloning to existing clone") + else: + raise RuntimeError("expected cloning to fail if the target is an existing clone") + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume1, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_pool_layout(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # add data pool + new_pool = "new_pool" + newid = self.fs.add_data_pool(new_pool) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, "--pool_layout", new_pool) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, clone_pool=new_pool) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + subvol_path = self._get_subvolume_path(self.volname, clone) + desired_pool = self.mount_a.getfattr(subvol_path, "ceph.dir.layout.pool") + try: + self.assertEqual(desired_pool, new_pool) + except AssertionError: + self.assertEqual(int(desired_pool), newid) # old kernel returns id + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_under_group(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + group = self._generate_random_group_name() + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--target_group_name', group) + + # check clone status + self._wait_for_clone_to_complete(clone, clone_group=group) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, clone_group=group) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone, group) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_with_attrs(self): + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + mode = "777" + uid = "1000" + gid = "1000" + new_uid = "1001" + new_gid = "1001" + new_mode = "700" + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # change subvolume attrs (to ensure clone picks up snapshot attrs) + self._do_subvolume_attr_update(subvolume, new_uid, new_gid, new_mode) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_with_upgrade(self): + """ + yet another poor man's upgrade test -- rather than going through a full + upgrade cycle, emulate old types subvolumes by going through the wormhole + and verify clone operation. + further ensure that a legacy volume is not updated to v2, but clone is. + """ + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # emulate a old-fashioned subvolume + createpath = os.path.join(".", "volumes", "_nogroup", subvolume) + self.mount_a.run_shell_payload(f"sudo mkdir -p -m 777 {createpath}", omit_sudo=False) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=64) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvolume, version=1, legacy=True) + + # Insert delay at the beginning of snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) + + # snapshot should not be deletable now + try: + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, msg="invalid error code when removing source snapshot of a clone") + else: + self.fail("expected removing source snapshot of a clone to fail") + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, source_version=1) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # ensure metadata file is in v2 location, with required version v2 + self._assert_meta_location_and_version(self.volname, clone) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_reconf_max_concurrent_clones(self): + """ + Validate 'max_concurrent_clones' config option + """ + + # get the default number of cloner threads + default_max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(default_max_concurrent_clones, 4) + + # Increase number of cloner threads + self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 6) + max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(max_concurrent_clones, 6) + + # Decrease number of cloner threads + self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2) + max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(max_concurrent_clones, 2) + + def test_subvolume_snapshot_config_snapshot_clone_delay(self): + """ + Validate 'snapshot_clone_delay' config option + """ + + # get the default delay before starting the clone + default_timeout = int(self.config_get('mgr', 'mgr/volumes/snapshot_clone_delay')) + self.assertEqual(default_timeout, 0) + + # Insert delay of 2 seconds at the beginning of the snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 2) + default_timeout = int(self.config_get('mgr', 'mgr/volumes/snapshot_clone_delay')) + self.assertEqual(default_timeout, 2) + + # Decrease number of cloner threads + self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2) + max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(max_concurrent_clones, 2) + + def test_subvolume_under_group_snapshot_clone(self): + subvolume = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + snapshot = self._generate_random_snapshot_name() + clone = self._generate_random_clone_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, group, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, subvolume_group=group, number_of_files=32) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group) + + # schedule a clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone, '--group_name', group) + + # check clone status + self._wait_for_clone_to_complete(clone) + + # verify clone + self._verify_clone(subvolume, snapshot, clone, source_group=group) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot, group) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume, group) + self._fs_cmd("subvolume", "rm", self.volname, clone) + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + +class TestMisc(TestVolumesHelper): + """Miscellaneous tests related to FS volume, subvolume group, and subvolume operations.""" + def test_connection_expiration(self): + # unmount any cephfs mounts + for i in range(0, self.CLIENTS_REQUIRED): + self.mounts[i].umount_wait() + sessions = self._session_list() + self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted + + # Get the mgr to definitely mount cephfs + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + sessions = self._session_list() + self.assertEqual(len(sessions), 1) + + # Now wait for the mgr to expire the connection: + self.wait_until_evicted(sessions[0]['id'], timeout=90) + + def test_mgr_eviction(self): + # unmount any cephfs mounts + for i in range(0, self.CLIENTS_REQUIRED): + self.mounts[i].umount_wait() + sessions = self._session_list() + self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted + + # Get the mgr to definitely mount cephfs + subvolume = self._generate_random_subvolume_name() + self._fs_cmd("subvolume", "create", self.volname, subvolume) + sessions = self._session_list() + self.assertEqual(len(sessions), 1) + + # Now fail the mgr, check the session was evicted + mgr = self.mgr_cluster.get_active_id() + self.mgr_cluster.mgr_fail(mgr) + self.wait_until_evicted(sessions[0]['id']) + + def test_names_can_only_be_goodchars(self): + """ + Test the creating vols, subvols subvolgroups fails when their names uses + characters beyond [a-zA-Z0-9 -_.]. + """ + volname, badname = 'testvol', 'abcd@#' + + with self.assertRaises(CommandFailedError): + self._fs_cmd('volume', 'create', badname) + self._fs_cmd('volume', 'create', volname) + + with self.assertRaises(CommandFailedError): + self._fs_cmd('subvolumegroup', 'create', volname, badname) + + with self.assertRaises(CommandFailedError): + self._fs_cmd('subvolume', 'create', volname, badname) + self._fs_cmd('volume', 'rm', volname, '--yes-i-really-mean-it') + + def test_subvolume_ops_on_nonexistent_vol(self): + # tests the fs subvolume operations on non existing volume + + volname = "non_existent_subvolume" + + # try subvolume operations + for op in ("create", "rm", "getpath", "info", "resize", "pin", "ls"): + try: + if op == "resize": + self._fs_cmd("subvolume", "resize", volname, "subvolname_1", "inf") + elif op == "pin": + self._fs_cmd("subvolume", "pin", volname, "subvolname_1", "export", "1") + elif op == "ls": + self._fs_cmd("subvolume", "ls", volname) + else: + self._fs_cmd("subvolume", op, volname, "subvolume_1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT) + else: + self.fail("expected the 'fs subvolume {0}' command to fail".format(op)) + + # try subvolume snapshot operations and clone create + for op in ("create", "rm", "info", "protect", "unprotect", "ls", "clone"): + try: + if op == "ls": + self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1") + elif op == "clone": + self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1", "snapshot_1", "clone_1") + else: + self._fs_cmd("subvolume", "snapshot", op, volname, "subvolume_1", "snapshot_1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT) + else: + self.fail("expected the 'fs subvolume snapshot {0}' command to fail".format(op)) + + # try, clone status + try: + self._fs_cmd("clone", "status", volname, "clone_1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT) + else: + self.fail("expected the 'fs clone status' command to fail") + + # try subvolumegroup operations + for op in ("create", "rm", "getpath", "pin", "ls"): + try: + if op == "pin": + self._fs_cmd("subvolumegroup", "pin", volname, "group_1", "export", "0") + elif op == "ls": + self._fs_cmd("subvolumegroup", op, volname) + else: + self._fs_cmd("subvolumegroup", op, volname, "group_1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT) + else: + self.fail("expected the 'fs subvolumegroup {0}' command to fail".format(op)) + + # try subvolumegroup snapshot operations + for op in ("create", "rm", "ls"): + try: + if op == "ls": + self._fs_cmd("subvolumegroup", "snapshot", op, volname, "group_1") + else: + self._fs_cmd("subvolumegroup", "snapshot", op, volname, "group_1", "snapshot_1") + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOENT) + else: + self.fail("expected the 'fs subvolumegroup snapshot {0}' command to fail".format(op)) + + def test_subvolume_upgrade_legacy_to_v1(self): + """ + poor man's upgrade test -- rather than going through a full upgrade cycle, + emulate subvolumes by going through the wormhole and verify if they are + accessible. + further ensure that a legacy volume is not updated to v2. + """ + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume -- one in the default group and + # the other in a custom group + createpath1 = os.path.join(".", "volumes", "_nogroup", subvolume1) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath1], omit_sudo=False) + + # create group + createpath2 = os.path.join(".", "volumes", group, subvolume2) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath2], omit_sudo=False) + + # this would auto-upgrade on access without anyone noticing + subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume1) + self.assertNotEqual(subvolpath1, None) + subvolpath1 = subvolpath1.rstrip() # remove "/" prefix and any trailing newline + + subvolpath2 = self._fs_cmd("subvolume", "getpath", self.volname, subvolume2, group) + self.assertNotEqual(subvolpath2, None) + subvolpath2 = subvolpath2.rstrip() # remove "/" prefix and any trailing newline + + # and... the subvolume path returned should be what we created behind the scene + self.assertEqual(createpath1[1:], subvolpath1) + self.assertEqual(createpath2[1:], subvolpath2) + + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvolume1, version=1, legacy=True) + self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1, legacy=True) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_subvolume_no_upgrade_v1_sanity(self): + """ + poor man's upgrade test -- theme continues... + + This test is to ensure v1 subvolumes are retained as is, due to a snapshot being present, and runs through + a series of operations on the v1 subvolume to ensure they work as expected. + """ + subvol_md = ["atime", "bytes_pcent", "bytes_quota", "bytes_used", "created_at", "ctime", + "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace", + "type", "uid", "features", "state"] + snap_md = ["created_at", "data_pool", "has_pending_clones"] + + subvolume = self._generate_random_subvolume_name() + snapshot = self._generate_random_snapshot_name() + clone1, clone2 = self._generate_random_clone_name(2) + mode = "777" + uid = "1000" + gid = "1000" + + # emulate a v1 subvolume -- in the default group + subvolume_path = self._create_v1_subvolume(subvolume) + + # getpath + subvolpath = self._get_subvolume_path(self.volname, subvolume) + self.assertEqual(subvolpath, subvolume_path) + + # ls + subvolumes = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumes), 1, "subvolume ls count mismatch, expected '1', found {0}".format(len(subvolumes))) + self.assertEqual(subvolumes[0]['name'], subvolume, + "subvolume name mismatch in ls output, expected '{0}', found '{1}'".format(subvolume, subvolumes[0]['name'])) + + # info + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + for md in subvol_md: + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) + + self.assertEqual(subvol_info["state"], "complete", + msg="expected state to be 'complete', found '{0}".format(subvol_info["state"])) + self.assertEqual(len(subvol_info["features"]), 2, + msg="expected 1 feature, found '{0}' ({1})".format(len(subvol_info["features"]), subvol_info["features"])) + for feature in ['snapshot-clone', 'snapshot-autoprotect']: + self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature)) + + # resize + nsize = self.DEFAULT_FILE_SIZE*1024*1024*10 + self._fs_cmd("subvolume", "resize", self.volname, subvolume, str(nsize)) + subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume)) + for md in subvol_md: + self.assertIn(md, subvol_info, "'{0}' key not present in metadata of subvolume".format(md)) + self.assertEqual(subvol_info["bytes_quota"], nsize, "bytes_quota should be set to '{0}'".format(nsize)) + + # create (idempotent) (change some attrs, to ensure attrs are preserved from the snapshot on clone) + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", mode, "--uid", uid, "--gid", gid) + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=8) + + # snap-create + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # clone + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # check clone status + self._wait_for_clone_to_complete(clone1) + + # ensure clone is v2 + self._assert_meta_location_and_version(self.volname, clone1, version=2) + + # verify clone + self._verify_clone(subvolume, snapshot, clone1, source_version=1) + + # clone (older snapshot) + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, 'fake', clone2) + + # check clone status + self._wait_for_clone_to_complete(clone2) + + # ensure clone is v2 + self._assert_meta_location_and_version(self.volname, clone2, version=2) + + # verify clone + # TODO: rentries will mismatch till this is fixed https://tracker.ceph.com/issues/46747 + #self._verify_clone(subvolume, 'fake', clone2, source_version=1) + + # snap-info + snap_info = json.loads(self._get_subvolume_snapshot_info(self.volname, subvolume, snapshot)) + for md in snap_md: + self.assertIn(md, snap_info, "'{0}' key not present in metadata of snapshot".format(md)) + self.assertEqual(snap_info["has_pending_clones"], "no") + + # snap-ls + subvol_snapshots = json.loads(self._fs_cmd('subvolume', 'snapshot', 'ls', self.volname, subvolume)) + self.assertEqual(len(subvol_snapshots), 2, "subvolume ls count mismatch, expected 2', found {0}".format(len(subvol_snapshots))) + snapshotnames = [snapshot['name'] for snapshot in subvol_snapshots] + for name in [snapshot, 'fake']: + self.assertIn(name, snapshotnames, msg="expected snapshot '{0}' in subvolume snapshot ls".format(name)) + + # snap-rm + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, "fake") + + # ensure volume is still at version 1 + self._assert_meta_location_and_version(self.volname, subvolume, version=1) + + # rm + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone1) + self._fs_cmd("subvolume", "rm", self.volname, clone2) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_no_upgrade_v1_to_v2(self): + """ + poor man's upgrade test -- theme continues... + ensure v1 to v2 upgrades are not done automatically due to various states of v1 + """ + subvolume1, subvolume2, subvolume3 = self._generate_random_subvolume_name(3) + group = self._generate_random_group_name() + + # emulate a v1 subvolume -- in the default group + subvol1_path = self._create_v1_subvolume(subvolume1) + + # emulate a v1 subvolume -- in a custom group + subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group) + + # emulate a v1 subvolume -- in a clone pending state + self._create_v1_subvolume(subvolume3, subvol_type='clone', has_snapshot=False, state='pending') + + # this would attempt auto-upgrade on access, but fail to do so as snapshots exist + subvolpath1 = self._get_subvolume_path(self.volname, subvolume1) + self.assertEqual(subvolpath1, subvol1_path) + + subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group) + self.assertEqual(subvolpath2, subvol2_path) + + # this would attempt auto-upgrade on access, but fail to do so as volume is not complete + # use clone status, as only certain operations are allowed in pending state + status = json.loads(self._fs_cmd("clone", "status", self.volname, subvolume3)) + self.assertEqual(status["status"]["state"], "pending") + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume1, "fake") + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume2, "fake", group) + + # ensure metadata file is in v1 location, with version retained as v1 + self._assert_meta_location_and_version(self.volname, subvolume1, version=1) + self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=1) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group) + try: + self._fs_cmd("subvolume", "rm", self.volname, subvolume3) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.EAGAIN, "invalid error code on rm of subvolume undergoing clone") + else: + self.fail("expected rm of subvolume undergoing clone to fail") + + # ensure metadata file is in v1 location, with version retained as v1 + self._assert_meta_location_and_version(self.volname, subvolume3, version=1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume3, "--force") + + # verify list subvolumes returns an empty list + subvolumels = json.loads(self._fs_cmd('subvolume', 'ls', self.volname)) + self.assertEqual(len(subvolumels), 0) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_upgrade_v1_to_v2(self): + """ + poor man's upgrade test -- theme continues... + ensure v1 to v2 upgrades work + """ + subvolume1, subvolume2 = self._generate_random_subvolume_name(2) + group = self._generate_random_group_name() + + # emulate a v1 subvolume -- in the default group + subvol1_path = self._create_v1_subvolume(subvolume1, has_snapshot=False) + + # emulate a v1 subvolume -- in a custom group + subvol2_path = self._create_v1_subvolume(subvolume2, subvol_group=group, has_snapshot=False) + + # this would attempt auto-upgrade on access + subvolpath1 = self._get_subvolume_path(self.volname, subvolume1) + self.assertEqual(subvolpath1, subvol1_path) + + subvolpath2 = self._get_subvolume_path(self.volname, subvolume2, group_name=group) + self.assertEqual(subvolpath2, subvol2_path) + + # ensure metadata file is in v2 location, with version retained as v2 + self._assert_meta_location_and_version(self.volname, subvolume1, version=2) + self._assert_meta_location_and_version(self.volname, subvolume2, subvol_group=group, version=2) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume1) + self._fs_cmd("subvolume", "rm", self.volname, subvolume2, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_malicious_metafile_on_legacy_to_v1_upgrade(self): + """ + Validate handcrafted .meta file on legacy subvol root doesn't break the system + on legacy subvol upgrade to v1 + poor man's upgrade test -- theme continues... + """ + subvol1, subvol2 = self._generate_random_subvolume_name(2) + + # emulate a old-fashioned subvolume in the default group + createpath1 = os.path.join(".", "volumes", "_nogroup", subvol1) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath1], omit_sudo=False) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath1, 'ceph.dir.layout.pool', default_pool, sudo=True) + + # create v2 subvolume + self._fs_cmd("subvolume", "create", self.volname, subvol2) + + # Create malicious .meta file in legacy subvolume root. Copy v2 subvolume + # .meta into legacy subvol1's root + subvol2_metapath = os.path.join(".", "volumes", "_nogroup", subvol2, ".meta") + self.mount_a.run_shell(['sudo', 'cp', subvol2_metapath, createpath1], omit_sudo=False) + + # Upgrade legacy subvol1 to v1 + subvolpath1 = self._fs_cmd("subvolume", "getpath", self.volname, subvol1) + self.assertNotEqual(subvolpath1, None) + subvolpath1 = subvolpath1.rstrip() + + # the subvolume path returned should not be of subvol2 from handcrafted + # .meta file + self.assertEqual(createpath1[1:], subvolpath1) + + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvol1, version=1, legacy=True) + + # Authorize alice authID read-write access to subvol1. Verify it authorizes subvol1 path and not subvol2 + # path whose '.meta' file is copied to subvol1 root + authid1 = "alice" + self._fs_cmd("subvolume", "authorize", self.volname, subvol1, authid1) + + # Validate that the mds path added is of subvol1 and not of subvol2 + out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.alice", "--format=json-pretty")) + self.assertEqual("client.alice", out[0]["entity"]) + self.assertEqual("allow rw path={0}".format(createpath1[1:]), out[0]["caps"]["mds"]) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvol1) + self._fs_cmd("subvolume", "rm", self.volname, subvol2) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_binary_metafile_on_legacy_to_v1_upgrade(self): + """ + Validate binary .meta file on legacy subvol root doesn't break the system + on legacy subvol upgrade to v1 + poor man's upgrade test -- theme continues... + """ + subvol = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume -- in a custom group + createpath = os.path.join(".", "volumes", group, subvol) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True) + + # Create unparseable binary .meta file on legacy subvol's root + meta_contents = os.urandom(4096) + meta_filepath = os.path.join(self.mount_a.mountpoint, createpath, ".meta") + self.mount_a.client_remote.write_file(meta_filepath, meta_contents, sudo=True) + + # Upgrade legacy subvol to v1 + subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvol, group) + self.assertNotEqual(subvolpath, None) + subvolpath = subvolpath.rstrip() + + # The legacy subvolume path should be returned for subvol. + # Should ignore unparseable binary .meta file in subvol's root + self.assertEqual(createpath[1:], subvolpath) + + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvol, subvol_group=group, version=1, legacy=True) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvol, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + def test_unparseable_metafile_on_legacy_to_v1_upgrade(self): + """ + Validate unparseable text .meta file on legacy subvol root doesn't break the system + on legacy subvol upgrade to v1 + poor man's upgrade test -- theme continues... + """ + subvol = self._generate_random_subvolume_name() + group = self._generate_random_group_name() + + # emulate a old-fashioned subvolume -- in a custom group + createpath = os.path.join(".", "volumes", group, subvol) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True) + + # Create unparseable text .meta file on legacy subvol's root + meta_contents = "unparseable config\nfile ...\nunparseable config\nfile ...\n" + meta_filepath = os.path.join(self.mount_a.mountpoint, createpath, ".meta") + self.mount_a.client_remote.write_file(meta_filepath, meta_contents, sudo=True) + + # Upgrade legacy subvol to v1 + subvolpath = self._fs_cmd("subvolume", "getpath", self.volname, subvol, group) + self.assertNotEqual(subvolpath, None) + subvolpath = subvolpath.rstrip() + + # The legacy subvolume path should be returned for subvol. + # Should ignore unparseable binary .meta file in subvol's root + self.assertEqual(createpath[1:], subvolpath) + + # ensure metadata file is in legacy location, with required version v1 + self._assert_meta_location_and_version(self.volname, subvol, subvol_group=group, version=1, legacy=True) + + # remove subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvol, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + + # remove group + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + +class TestPerModuleFinsherThread(TestVolumesHelper): + """ + Per module finisher thread tests related to mgr/volume cmds. + This is used in conjuction with check_counter with min val being 4 + as four subvolume cmds are run + """ + def test_volumes_module_finisher_thread(self): + subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3) + group = self._generate_random_group_name() + + # create group + self._fs_cmd("subvolumegroup", "create", self.volname, group) + + # create subvolumes in group + self._fs_cmd("subvolume", "create", self.volname, subvol1, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvol2, "--group_name", group) + self._fs_cmd("subvolume", "create", self.volname, subvol3, "--group_name", group) + + self._fs_cmd("subvolume", "rm", self.volname, subvol1, group) + self._fs_cmd("subvolume", "rm", self.volname, subvol2, group) + self._fs_cmd("subvolume", "rm", self.volname, subvol3, group) + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() diff --git a/qa/tasks/cephfs/xfstests_dev.py b/qa/tasks/cephfs/xfstests_dev.py new file mode 100644 index 000000000..cbb344305 --- /dev/null +++ b/qa/tasks/cephfs/xfstests_dev.py @@ -0,0 +1,303 @@ +from io import StringIO +from logging import getLogger +from os import getcwd as os_getcwd +from os.path import join +from textwrap import dedent + + +from tasks.cephfs.cephfs_test_case import CephFSTestCase +from tasks.cephfs.fuse_mount import FuseMount +from tasks.cephfs.kernel_mount import KernelMount + + +log = getLogger(__name__) + + +# TODO: add code to run non-ACL tests too. +# TODO: make xfstests-dev tests running without running `make install`. +class XFSTestsDev(CephFSTestCase): + + RESULTS_DIR = "results" + + def setUp(self): + super(XFSTestsDev, self).setUp() + self.setup_xfsprogs_devs() + self.prepare_xfstests_devs() + + def setup_xfsprogs_devs(self): + self.install_xfsprogs = False + + def prepare_xfstests_devs(self): + # NOTE: To run a quick test with vstart_runner.py, enable next line + # and disable calls to get_repo(), install_deps(), and + # build_and_install() and also disable lines in tearDown() for repo + # deletion. + #self.xfstests_repo_path = '/path/to/xfstests-dev' + + self.get_repos() + self.get_test_and_scratch_dirs_ready() + self.install_deps() + self.create_reqd_users() + self.write_local_config() + self.write_ceph_exclude() + self.build_and_install() + + def tearDown(self): + self.del_users_and_groups() + self.del_repos() + super(XFSTestsDev, self).tearDown() + + def del_users_and_groups(self): + self.mount_a.client_remote.run(args=['sudo', 'userdel', '--force', + '--remove', 'fsgqa'], + omit_sudo=False, check_status=False) + self.mount_a.client_remote.run(args=['sudo', 'userdel', '--force', + '--remove', '123456-fsgqa'], + omit_sudo=False, check_status=False) + self.mount_a.client_remote.run(args=['sudo', 'groupdel', 'fsgqa'], + omit_sudo=False, check_status=False) + + def del_repos(self): + self.save_results_dir() + self.mount_a.client_remote.run(args=f'sudo rm -rf {self.xfstests_repo_path}', + omit_sudo=False, check_status=False) + + if self.install_xfsprogs: + self.mount_a.client_remote.run(args=f'sudo rm -rf {self.xfsprogs_repo_path}', + omit_sudo=False, check_status=False) + + def save_results_dir(self): + """ + When tests in xfstests-dev repo are executed, logs are created and + saved, under a directory named "results" that lies at the repo root. + In case a test from xfstests-dev repo fails, these logs will help find + the cause of the failure. + + Since there's no option in teuthology to copy a directory lying at a + custom location in order to save it from teuthology test runner's tear + down, let's copy this directory to a standard location that teuthology + copies away before erasing all data on the test machine. The standard + location chosen in the case here is the Ceph log directory. + + In case of vstart_runner.py, this methods does nothing. + """ + # No need to save results dir in case of vstart_runner.py. + for x in ('LocalFuseMount', 'LocalKernelMount'): + if x in self.mount_a.__class__.__name__: + return + + src = join(self.xfstests_repo_path, self.RESULTS_DIR) + + if self.mount_a.run_shell(f'sudo stat {src}', + check_status=False, omit_sudo=False).returncode != 0: + log.info(f'xfstests-dev repo contains not directory named ' + f'"{self.RESULTS_DIR}". repo location: {self.xfstests_repo_path}') + return + + std_loc = '/var/log/ceph' # standard location + dst = join(std_loc, 'xfstests-dev-results') + self.mount_a.run_shell(f'sudo mkdir -p {dst}', omit_sudo=False) + self.mount_a.run_shell(f'sudo cp -r {src} {dst}', omit_sudo=False) + log.info(f'results dir from xfstests-dev has been saved; it was ' + f'copied from {self.xfstests_repo_path} to {std_loc}.') + + def build_and_install(self): + # NOTE: On teuthology machines it's necessary to run "make" as + # superuser since the repo is cloned somewhere in /tmp. + self.mount_a.client_remote.run(args=['sudo', 'make'], + cwd=self.xfstests_repo_path, stdout=StringIO(), + stderr=StringIO()) + self.mount_a.client_remote.run(args=['sudo', 'make', 'install'], + cwd=self.xfstests_repo_path, omit_sudo=False, + stdout=StringIO(), stderr=StringIO()) + + if self.install_xfsprogs: + self.mount_a.client_remote.run(args=['sudo', 'make'], + cwd=self.xfsprogs_repo_path, + stdout=StringIO(), stderr=StringIO()) + self.mount_a.client_remote.run(args=['sudo', 'make', 'install'], + cwd=self.xfsprogs_repo_path, omit_sudo=False, + stdout=StringIO(), stderr=StringIO()) + + def get_repos(self): + """ + Clone xfstests_dev and xfsprogs-dev repositories. If already present, + update them. The xfsprogs-dev will be used to test the encrypt. + """ + # TODO: make sure that repo is not cloned for every test. it should + # happen only once. + remoteurl = 'https://git.ceph.com/xfstests-dev.git' + self.xfstests_repo_path = self.mount_a.client_remote.mkdtemp(suffix= + 'xfstests-dev') + self.mount_a.run_shell(['git', 'clone', remoteurl, '--depth', '1', + self.xfstests_repo_path]) + + if self.install_xfsprogs: + remoteurl = 'https://git.ceph.com/xfsprogs-dev.git' + self.xfsprogs_repo_path = self.mount_a.client_remote.mkdtemp(suffix= + 'xfsprogs-dev') + self.mount_a.run_shell(['git', 'clone', remoteurl, '--depth', '1', + self.xfsprogs_repo_path]) + + def get_admin_key(self): + import configparser + + cp = configparser.ConfigParser() + cp.read_string(self.fs.mon_manager.raw_cluster_cmd( + 'auth', 'get-or-create', 'client.admin')) + + return cp['client.admin']['key'] + + def get_test_and_scratch_dirs_ready(self): + """ "test" and "scratch" directories are directories inside Ceph FS. + And, test and scratch mounts are path on the local FS where "test" + and "scratch" directories would be mounted. Look at xfstests-dev + local.config's template inside this file to get some context. + """ + self.test_dirname = 'test' + self.mount_a.run_shell(['mkdir', self.test_dirname]) + # read var name as "test dir's mount path" + self.test_dirs_mount_path = self.mount_a.client_remote.mkdtemp( + suffix=self.test_dirname) + + self.scratch_dirname = 'scratch' + self.mount_a.run_shell(['mkdir', self.scratch_dirname]) + # read var name as "scratch dir's mount path" + self.scratch_dirs_mount_path = self.mount_a.client_remote.mkdtemp( + suffix=self.scratch_dirname) + + def install_deps(self): + from teuthology.misc import get_system_type + + distro, version = get_system_type(self.mount_a.client_remote, + distro=True, version=True) + distro = distro.lower() + major_ver_num = int(version.split('.')[0]) # only keep major release + # number + log.info(f'distro and version detected is "{distro}" and "{version}".') + + # we keep fedora here so that right deps are installed when this test + # is run locally by a dev. + if distro in ('redhatenterpriseserver', 'redhatenterprise', 'fedora', + 'centos', 'centosstream', 'rhel'): + deps = """acl attr automake bc dbench dump e2fsprogs fio \ + gawk gcc indent libtool lvm2 make psmisc quota sed \ + xfsdump xfsprogs \ + libacl-devel libattr-devel libaio-devel libuuid-devel \ + xfsprogs-devel btrfs-progs-devel python3 sqlite""".split() + + if self.install_xfsprogs: + if distro == 'centosstream' and major_ver_num == 8: + deps += ['--enablerepo=powertools'] + deps += ['inih-devel', 'userspace-rcu-devel', 'libblkid-devel', + 'gettext', 'libedit-devel', 'libattr-devel', + 'device-mapper-devel', 'libicu-devel'] + + deps_old_distros = ['xfsprogs-qa-devel'] + + if distro != 'fedora' and major_ver_num > 7: + deps.remove('btrfs-progs-devel') + + args = ['sudo', 'yum', 'install', '-y'] + deps + deps_old_distros + elif distro == 'ubuntu': + deps = """xfslibs-dev uuid-dev libtool-bin \ + e2fsprogs automake gcc libuuid1 quota attr libattr1-dev make \ + libacl1-dev libaio-dev xfsprogs libgdbm-dev gawk fio dbench \ + uuid-runtime python sqlite3""".split() + + if self.install_xfsprogs: + deps += ['libinih-dev', 'liburcu-dev', 'libblkid-dev', + 'gettext', 'libedit-dev', 'libattr1-dev', + 'libdevmapper-dev', 'libicu-dev', 'pkg-config'] + + if major_ver_num >= 19: + deps[deps.index('python')] ='python2' + args = ['sudo', 'apt-get', 'install', '-y'] + deps + else: + raise RuntimeError('expected a yum based or a apt based system') + + self.mount_a.client_remote.run(args=args, omit_sudo=False) + + def create_reqd_users(self): + self.mount_a.client_remote.run(args=['sudo', 'useradd', '-m', 'fsgqa'], + omit_sudo=False, check_status=False) + self.mount_a.client_remote.run(args=['sudo', 'groupadd', 'fsgqa'], + omit_sudo=False, check_status=False) + self.mount_a.client_remote.run(args=['sudo', 'useradd', 'fsgqa2'], + omit_sudo=False, check_status=False) + self.mount_a.client_remote.run(args=['sudo', 'useradd', + '123456-fsgqa'], omit_sudo=False, + check_status=False) + + def write_local_config(self, options=None): + if isinstance(self.mount_a, KernelMount): + conf_contents = self._gen_conf_for_kernel_mnt(options) + elif isinstance(self.mount_a, FuseMount): + conf_contents = self._gen_conf_for_fuse_mnt(options) + + self.mount_a.client_remote.write_file(join(self.xfstests_repo_path, + 'local.config'), + conf_contents, sudo=True) + log.info(f'local.config\'s contents -\n{conf_contents}') + + def _gen_conf_for_kernel_mnt(self, options=None): + """ + Generate local.config for CephFS kernel client. + """ + _options = '' if not options else ',' + options + mon_sock = self.fs.mon_manager.get_msgrv1_mon_socks()[0] + test_dev = mon_sock + ':/' + self.test_dirname + scratch_dev = mon_sock + ':/' + self.scratch_dirname + + return dedent(f'''\ + export FSTYP=ceph + export TEST_DEV={test_dev} + export TEST_DIR={self.test_dirs_mount_path} + export SCRATCH_DEV={scratch_dev} + export SCRATCH_MNT={self.scratch_dirs_mount_path} + export CEPHFS_MOUNT_OPTIONS="-o name=admin,secret={self.get_admin_key()}{_options}" + ''') + + def _gen_conf_for_fuse_mnt(self, options=None): + """ + Generate local.config for CephFS FUSE client. + """ + mon_sock = self.fs.mon_manager.get_msgrv1_mon_socks()[0] + test_dev = 'ceph-fuse' + scratch_dev = '' + # XXX: Please note that ceph_fuse_bin_path is not ideally required + # because ceph-fuse binary ought to be present in one of the standard + # locations during teuthology tests. But then testing with + # vstart_runner.py will not work since ceph-fuse binary won't be + # present in a standard locations during these sessions. Thus, this + # workaround. + ceph_fuse_bin_path = 'ceph-fuse' # bin expected to be in env + if 'LocalFuseMount' in str(type(self.mount_a)): # for vstart_runner.py runs + ceph_fuse_bin_path = join(os_getcwd(), 'bin', 'ceph-fuse') + + keyring_path = self.mount_a.client_remote.mktemp( + data=self.fs.mon_manager.get_keyring('client.admin')+'\n') + + lastline = (f'export CEPHFS_MOUNT_OPTIONS="-m {mon_sock} -k ' + f'{keyring_path} --client_mountpoint /{self.test_dirname}') + lastline += f'-o {options}"' if options else '"' + + return dedent(f'''\ + export FSTYP=ceph-fuse + export CEPH_FUSE_BIN_PATH={ceph_fuse_bin_path} + export TEST_DEV={test_dev} # without this tests won't get started + export TEST_DIR={self.test_dirs_mount_path} + export SCRATCH_DEV={scratch_dev} + export SCRATCH_MNT={self.scratch_dirs_mount_path} + {lastline} + ''') + + def write_ceph_exclude(self): + # These tests will fail or take too much time and will + # make the test timedout, just skip them for now. + xfstests_exclude_contents = dedent('''\ + {c}/001 {g}/003 {g}/020 {g}/075 {g}/317 {g}/538 {g}/531 + ''').format(g="generic", c="ceph") + + self.mount_a.client_remote.write_file(join(self.xfstests_repo_path, 'ceph.exclude'), + xfstests_exclude_contents, sudo=True) diff --git a/qa/tasks/cephfs_mirror.py b/qa/tasks/cephfs_mirror.py new file mode 100644 index 000000000..9602a5a7f --- /dev/null +++ b/qa/tasks/cephfs_mirror.py @@ -0,0 +1,73 @@ +""" +Task for running cephfs mirror daemons +""" + +import logging + +from teuthology.orchestra import run +from teuthology import misc +from teuthology.exceptions import ConfigError +from teuthology.task import Task +from tasks.ceph_manager import get_valgrind_args +from tasks.util import get_remote_for_role + +log = logging.getLogger(__name__) + +class CephFSMirror(Task): + def __init__(self, ctx, config): + super(CephFSMirror, self).__init__(ctx, config) + self.log = log + + def setup(self): + super(CephFSMirror, self).setup() + try: + self.client = self.config['client'] + except KeyError: + raise ConfigError('cephfs-mirror requires a client to connect') + + self.cluster_name, type_, self.client_id = misc.split_role(self.client) + if not type_ == 'client': + raise ConfigError(f'client role {self.client} must be a client') + self.remote = get_remote_for_role(self.ctx, self.client) + + def begin(self): + super(CephFSMirror, self).begin() + testdir = misc.get_testdir(self.ctx) + + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'daemon-helper', + 'term', + ] + + if 'valgrind' in self.config: + args = get_valgrind_args( + testdir, 'cephfs-mirror-{id}'.format(id=self.client), + args, self.config.get('valgrind')) + + args.extend([ + 'cephfs-mirror', + '--cluster', + self.cluster_name, + '--id', + self.client_id, + ]) + if 'run_in_foreground' in self.config: + args.extend(['--foreground']) + + self.ctx.daemons.add_daemon( + self.remote, 'cephfs-mirror', self.client, + args=args, + logger=self.log.getChild(self.client), + stdin=run.PIPE, + wait=False, + ) + + def end(self): + mirror_daemon = self.ctx.daemons.get_daemon('cephfs-mirror', self.client) + mirror_daemon.stop() + super(CephFSMirror, self).end() + +task = CephFSMirror diff --git a/qa/tasks/cephfs_mirror_thrash.py b/qa/tasks/cephfs_mirror_thrash.py new file mode 100644 index 000000000..91f60ac50 --- /dev/null +++ b/qa/tasks/cephfs_mirror_thrash.py @@ -0,0 +1,219 @@ +""" +Task for thrashing cephfs-mirror daemons +""" + +import contextlib +import logging +import random +import signal +import socket +import time + +from gevent import sleep +from gevent.greenlet import Greenlet +from gevent.event import Event + +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra import run +from tasks.thrasher import Thrasher + +log = logging.getLogger(__name__) + + +class CephFSMirrorThrasher(Thrasher, Greenlet): + """ + CephFSMirrorThrasher:: + + The CephFSMirrorThrasher thrashes cephfs-mirror daemons during execution of other + tasks (workunits, etc). + + The config is optional. Many of the config parameters are a maximum value + to use when selecting a random value from a range. The config is a dict + containing some or all of: + + cluster: [default: ceph] cluster to thrash + + max_thrash: [default: 1] the maximum number of active cephfs-mirror daemons per + cluster will be thrashed at any given time. + + min_thrash_delay: [default: 60] minimum number of seconds to delay before + thrashing again. + + max_thrash_delay: [default: 120] maximum number of seconds to delay before + thrashing again. + + max_revive_delay: [default: 10] maximum number of seconds to delay before + bringing back a thrashed cephfs-mirror daemon. + + randomize: [default: true] enables randomization and use the max/min values + + seed: [no default] seed the random number generator + + Examples:: + + The following example disables randomization, and uses the max delay + values: + + tasks: + - ceph: + - cephfs_mirror_thrash: + randomize: False + max_thrash_delay: 10 + """ + + def __init__(self, ctx, config, cluster, daemons): + super(CephFSMirrorThrasher, self).__init__() + + self.ctx = ctx + self.config = config + self.cluster = cluster + self.daemons = daemons + + self.logger = log + self.name = 'thrasher.cephfs_mirror.[{cluster}]'.format(cluster = cluster) + self.stopping = Event() + + self.randomize = bool(self.config.get('randomize', True)) + self.max_thrash = int(self.config.get('max_thrash', 1)) + self.min_thrash_delay = float(self.config.get('min_thrash_delay', 5.0)) + self.max_thrash_delay = float(self.config.get('max_thrash_delay', 10)) + self.max_revive_delay = float(self.config.get('max_revive_delay', 15.0)) + + def _run(self): + try: + self.do_thrash() + except Exception as e: + # See _run exception comment for MDSThrasher + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # Allow successful completion so gevent doesn't see an exception. + # The DaemonWatchdog will observe the error and tear down the test. + + def log(self, x): + """Write data to logger assigned to this CephFSMirrorThrasher""" + self.logger.info(x) + + def stop(self): + self.stopping.set() + + def do_thrash(self): + """ + Perform the random thrashing action + """ + + self.log('starting thrash for cluster {cluster}'.format(cluster=self.cluster)) + stats = { + "kill": 0, + } + + while not self.stopping.is_set(): + delay = self.max_thrash_delay + if self.randomize: + delay = random.randrange(self.min_thrash_delay, self.max_thrash_delay) + + if delay > 0.0: + self.log('waiting for {delay} secs before thrashing'.format(delay=delay)) + self.stopping.wait(delay) + if self.stopping.is_set(): + continue + + killed_daemons = [] + + weight = 1.0 / len(self.daemons) + count = 0 + for daemon in self.daemons: + skip = random.uniform(0.0, 1.0) + if weight <= skip: + self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format( + label=daemon.id_, skip=skip, weight=weight)) + continue + + self.log('kill {label}'.format(label=daemon.id_)) + try: + daemon.signal(signal.SIGTERM) + except Exception as e: + self.log(f'exception when stopping mirror daemon: {e}') + else: + killed_daemons.append(daemon) + stats['kill'] += 1 + + # if we've reached max_thrash, we're done + count += 1 + if count >= self.max_thrash: + break + + if killed_daemons: + # wait for a while before restarting + delay = self.max_revive_delay + if self.randomize: + delay = random.randrange(0.0, self.max_revive_delay) + + self.log('waiting for {delay} secs before reviving daemons'.format(delay=delay)) + sleep(delay) + + for daemon in killed_daemons: + self.log('waiting for {label}'.format(label=daemon.id_)) + try: + run.wait([daemon.proc], timeout=600) + except CommandFailedError: + pass + except: + self.log('Failed to stop {label}'.format(label=daemon.id_)) + + try: + # try to capture a core dump + daemon.signal(signal.SIGABRT) + except socket.error: + pass + raise + finally: + daemon.reset() + + for daemon in killed_daemons: + self.log('reviving {label}'.format(label=daemon.id_)) + daemon.start() + + for stat in stats: + self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat])) + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the cephfs-mirror by thrashing while another task/workunit + is running. + + Please refer to CephFSMirrorThrasher class for further information on the + available options. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'cephfs_mirror_thrash task only accepts a dict for configuration' + + cluster = config.get('cluster', 'ceph') + daemons = list(ctx.daemons.iter_daemons_of_role('cephfs-mirror', cluster)) + assert len(daemons) > 0, \ + 'cephfs_mirror_thrash task requires at least 1 cephfs-mirror daemon' + + # choose random seed + if 'seed' in config: + seed = int(config['seed']) + else: + seed = int(time.time()) + log.info('cephfs_mirror_thrash using random seed: {seed}'.format(seed=seed)) + random.seed(seed) + + thrasher = CephFSMirrorThrasher(ctx, config, cluster, daemons) + thrasher.start() + ctx.ceph[cluster].thrashers.append(thrasher) + + try: + log.debug('Yielding') + yield + finally: + log.info('joining cephfs_mirror_thrash') + thrasher.stop() + if thrasher.exception is not None: + raise RuntimeError('error during thrashing') + thrasher.join() + log.info('done joining') diff --git a/qa/tasks/cephfs_test_runner.py b/qa/tasks/cephfs_test_runner.py new file mode 100644 index 000000000..8a4919b93 --- /dev/null +++ b/qa/tasks/cephfs_test_runner.py @@ -0,0 +1,213 @@ +import contextlib +import logging +import os +import unittest +from unittest import suite, loader, case +from teuthology.task import interactive +from teuthology import misc +from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster +from tasks.mgr.mgr_test_case import MgrCluster + +log = logging.getLogger(__name__) + + +class DecoratingLoader(loader.TestLoader): + """ + A specialization of TestLoader that tags some extra attributes + onto test classes as they are loaded. + """ + def __init__(self, params): + self._params = params + super(DecoratingLoader, self).__init__() + + def _apply_params(self, obj): + for k, v in self._params.items(): + if obj.__class__ is type: + cls = obj + else: + cls = obj.__class__ + setattr(cls, k, v) + + def loadTestsFromTestCase(self, testCaseClass): + self._apply_params(testCaseClass) + return super(DecoratingLoader, self).loadTestsFromTestCase(testCaseClass) + + def loadTestsFromName(self, name, module=None): + result = super(DecoratingLoader, self).loadTestsFromName(name, module) + + # Special case for when we were called with the name of a method, we get + # a suite with one TestCase + tests_in_result = list(result) + if len(tests_in_result) == 1 and isinstance(tests_in_result[0], case.TestCase): + self._apply_params(tests_in_result[0]) + + return result + + +class LogStream(object): + def __init__(self): + self.buffer = "" + + def write(self, data): + self.buffer += data + if "\n" in self.buffer: + lines = self.buffer.split("\n") + for line in lines[:-1]: + log.info(line) + self.buffer = lines[-1] + + def flush(self): + pass + + +class InteractiveFailureResult(unittest.TextTestResult): + """ + Specialization that implements interactive-on-error style + behavior. + """ + ctx = None + + def addFailure(self, test, err): + log.error(self._exc_info_to_string(err, test)) + log.error("Failure in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=self.ctx, config=None) + + def addError(self, test, err): + log.error(self._exc_info_to_string(err, test)) + log.error("Error in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=self.ctx, config=None) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run the CephFS test cases. + + Run everything in tasks/cephfs/test_*.py: + + :: + + tasks: + - install: + - ceph: + - ceph-fuse: + - cephfs_test_runner: + + `modules` argument allows running only some specific modules: + + :: + + tasks: + ... + - cephfs_test_runner: + modules: + - tasks.cephfs.test_sessionmap + - tasks.cephfs.test_auto_repair + + By default, any cases that can't be run on the current cluster configuration + will generate a failure. When the optional `fail_on_skip` argument is set + to false, any tests that can't be run on the current configuration will + simply be skipped: + + :: + tasks: + ... + - cephfs_test_runner: + fail_on_skip: false + + """ + + ceph_cluster = CephCluster(ctx) + + if len(list(misc.all_roles_of_type(ctx.cluster, 'mds'))): + mds_cluster = MDSCluster(ctx) + fs = Filesystem(ctx) + else: + mds_cluster = None + fs = None + + if len(list(misc.all_roles_of_type(ctx.cluster, 'mgr'))): + mgr_cluster = MgrCluster(ctx) + else: + mgr_cluster = None + + # Mount objects, sorted by ID + if hasattr(ctx, 'mounts'): + mounts = [v for k, v in sorted(ctx.mounts.items(), key=lambda mount: mount[0])] + else: + # The test configuration has a filesystem but no fuse/kclient mounts + mounts = [] + + decorating_loader = DecoratingLoader({ + "ctx": ctx, + "mounts": mounts, + "fs": fs, + "ceph_cluster": ceph_cluster, + "mds_cluster": mds_cluster, + "mgr_cluster": mgr_cluster, + }) + + fail_on_skip = config.get('fail_on_skip', True) + + # Put useful things onto ctx for interactive debugging + ctx.fs = fs + ctx.mds_cluster = mds_cluster + ctx.mgr_cluster = mgr_cluster + + # Depending on config, either load specific modules, or scan for moduless + if config and 'modules' in config and config['modules']: + module_suites = [] + for mod_name in config['modules']: + # Test names like cephfs.test_auto_repair + module_suites.append(decorating_loader.loadTestsFromName(mod_name)) + overall_suite = suite.TestSuite(module_suites) + else: + # Default, run all tests + overall_suite = decorating_loader.discover( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "cephfs/" + ) + ) + + if ctx.config.get("interactive-on-error", False): + InteractiveFailureResult.ctx = ctx + result_class = InteractiveFailureResult + else: + result_class = unittest.TextTestResult + + class LoggingResult(result_class): + def startTest(self, test): + log.info("Starting test: {0}".format(self.getDescription(test))) + return super(LoggingResult, self).startTest(test) + + def addSkip(self, test, reason): + if fail_on_skip: + # Don't just call addFailure because that requires a traceback + self.failures.append((test, reason)) + else: + super(LoggingResult, self).addSkip(test, reason) + + # Execute! + result = unittest.TextTestRunner( + stream=LogStream(), + resultclass=LoggingResult, + verbosity=2, + failfast=True).run(overall_suite) + + if not result.wasSuccessful(): + result.printErrors() # duplicate output at end for convenience + + bad_tests = [] + for test, error in result.errors: + bad_tests.append(str(test)) + for test, failure in result.failures: + bad_tests.append(str(test)) + + raise RuntimeError("Test failure: {0}".format(", ".join(bad_tests))) + + yield diff --git a/qa/tasks/cephfs_upgrade_snap.py b/qa/tasks/cephfs_upgrade_snap.py new file mode 100644 index 000000000..1b0a737a7 --- /dev/null +++ b/qa/tasks/cephfs_upgrade_snap.py @@ -0,0 +1,47 @@ +""" +Upgrade cluster snap format. +""" + +import logging +import time + +from tasks.cephfs.filesystem import Filesystem + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Upgrade CephFS file system snap format. + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'snap-upgrade task only accepts a dict for configuration' + + fs = Filesystem(ctx) + + mds_map = fs.get_mds_map() + assert(mds_map['max_mds'] == 1) + + json = fs.run_scrub(["start", "/", "force", "recursive", "repair"]) + if not json or json['return_code'] == 0: + assert(fs.wait_until_scrub_complete(tag=json["scrub_tag"]) == True) + log.info("scrub / completed") + else: + log.info("scrub / failed: {}".format(json)) + + json = fs.run_scrub(["start", "~mdsdir", "force", "recursive", "repair"]) + if not json or json['return_code'] == 0: + assert(fs.wait_until_scrub_complete(tag=json["scrub_tag"]) == True) + log.info("scrub ~mdsdir completed") + else: + log.info("scrub / failed: {}".format(json)) + + for i in range(0, 10): + mds_map = fs.get_mds_map() + if (mds_map['flags'] & (1<<1)) != 0 and (mds_map['flags'] & (1<<4)) != 0: + break + time.sleep(10) + assert((mds_map['flags'] & (1<<1)) != 0) # Test CEPH_MDSMAP_ALLOW_SNAPS + assert((mds_map['flags'] & (1<<4)) != 0) # Test CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS diff --git a/qa/tasks/check_counter.py b/qa/tasks/check_counter.py new file mode 100644 index 000000000..40818f3f4 --- /dev/null +++ b/qa/tasks/check_counter.py @@ -0,0 +1,130 @@ + +import logging +import json + +from teuthology.task import Task +from teuthology import misc + +from tasks import ceph_manager + +log = logging.getLogger(__name__) + + +class CheckCounter(Task): + """ + Use this task to validate that some daemon perf counters were + incremented by the nested tasks. + + Config: + 'cluster_name': optional, specify which cluster + 'target': dictionary of daemon type to list of performance counters. + 'dry_run': just log the value of the counters, don't fail if they + aren't nonzero. + + Success condition is that for all of the named counters, at least + one of the daemons of that type has the counter nonzero. + + Example to check cephfs dirfrag splits are happening: + - install: + - ceph: + - ceph-fuse: + - check-counter: + counters: + mds: + - "mds.dir_split" + - + name: "mds.dir_update" + min: 3 + - workunit: ... + """ + @property + def admin_remote(self): + first_mon = misc.get_first_mon(self.ctx, None) + (result,) = self.ctx.cluster.only(first_mon).remotes.keys() + return result + + def start(self): + log.info("START") + + def end(self): + overrides = self.ctx.config.get('overrides', {}) + misc.deep_merge(self.config, overrides.get('check-counter', {})) + + cluster_name = self.config.get('cluster_name', None) + dry_run = self.config.get('dry_run', False) + targets = self.config.get('counters', {}) + + if cluster_name is None: + cluster_name = next(iter(self.ctx.managers.keys())) + + + mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=self.ctx, logger=log.getChild('ceph_manager')) + active_mgr = json.loads(mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty"))["active_name"] + + for daemon_type, counters in targets.items(): + # List of 'a', 'b', 'c'... + daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type)) + daemons = dict([(daemon_id, + self.ctx.daemons.get_daemon(daemon_type, daemon_id)) + for daemon_id in daemon_ids]) + + expected = set() + seen = set() + + for daemon_id, daemon in daemons.items(): + if not daemon.running(): + log.info("Ignoring daemon {0}, it isn't running".format(daemon_id)) + continue + elif daemon_type == 'mgr' and daemon_id != active_mgr: + continue + else: + log.debug("Getting stats from {0}".format(daemon_id)) + + manager = self.ctx.managers[cluster_name] + proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"]) + response_data = proc.stdout.getvalue().strip() + if response_data: + perf_dump = json.loads(response_data) + else: + log.warning("No admin socket response from {0}, skipping".format(daemon_id)) + continue + + minval = '' + expected_val = '' + for counter in counters: + if isinstance(counter, dict): + name = counter['name'] + if 'min' in counter: + minval = counter['min'] + if 'expected_val' in counter: + expected_val = counter['expected_val'] + else: + name = counter + minval = 1 + expected.add(name) + + val = perf_dump + for key in name.split('.'): + if key not in val: + log.warning(f"Counter '{name}' not found on daemon {daemon_type}.{daemon_id}") + val = None + break + + val = val[key] + + if val is not None: + log.info(f"Daemon {daemon_type}.{daemon_id} {name}={val}") + if isinstance(minval, int) and val >= minval: + seen.add(name) + elif isinstance(expected_val, int) and val == expected_val: + seen.add(name) + + if not dry_run: + unseen = set(expected) - set(seen) + if unseen: + raise RuntimeError("The following counters failed to be set " + "on {0} daemons: {1}".format( + daemon_type, unseen + )) + +task = CheckCounter diff --git a/qa/tasks/cifs_mount.py b/qa/tasks/cifs_mount.py new file mode 100644 index 000000000..b282b0b7d --- /dev/null +++ b/qa/tasks/cifs_mount.py @@ -0,0 +1,137 @@ +""" +Mount cifs clients. Unmount when finished. +""" +import contextlib +import logging +import os + +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Mount/unmount a cifs client. + + The config is optional and defaults to mounting on all clients. If + a config is given, it is expected to be a list of clients to do + this operation on. + + Example that starts smbd and mounts cifs on all nodes:: + + tasks: + - ceph: + - samba: + - cifs-mount: + - interactive: + + Example that splits smbd and cifs: + + tasks: + - ceph: + - samba: [samba.0] + - cifs-mount: [client.0] + - ceph-fuse: [client.1] + - interactive: + + Example that specifies the share name: + + tasks: + - ceph: + - ceph-fuse: + - samba: + samba.0: + cephfuse: "{testdir}/mnt.0" + - cifs-mount: + client.0: + share: cephfuse + + :param ctx: Context + :param config: Configuration + """ + log.info('Mounting cifs clients...') + + if config is None: + config = dict(('client.{id}'.format(id=id_), None) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')) + elif isinstance(config, list): + config = dict((name, None) for name in config) + + clients = list(teuthology.get_clients(ctx=ctx, roles=config.keys())) + + from .samba import get_sambas + samba_roles = ['samba.{id_}'.format(id_=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')] + sambas = list(get_sambas(ctx=ctx, roles=samba_roles)) + (ip, _) = sambas[0][1].ssh.get_transport().getpeername() + log.info('samba ip: {ip}'.format(ip=ip)) + + for id_, remote in clients: + mnt = os.path.join(teuthology.get_testdir(ctx), 'mnt.{id}'.format(id=id_)) + log.info('Mounting cifs client.{id} at {remote} {mnt}...'.format( + id=id_, remote=remote,mnt=mnt)) + + remote.run( + args=[ + 'mkdir', + '--', + mnt, + ], + ) + + rolestr = 'client.{id_}'.format(id_=id_) + unc = "ceph" + log.info("config: {c}".format(c=config)) + if config[rolestr] is not None and 'share' in config[rolestr]: + unc = config[rolestr]['share'] + + remote.run( + args=[ + 'sudo', + 'mount', + '-t', + 'cifs', + '//{sambaip}/{unc}'.format(sambaip=ip, unc=unc), + '-o', + 'username=ubuntu,password=ubuntu', + mnt, + ], + ) + + remote.run( + args=[ + 'sudo', + 'chown', + 'ubuntu:ubuntu', + '{m}/'.format(m=mnt), + ], + ) + + try: + yield + finally: + log.info('Unmounting cifs clients...') + for id_, remote in clients: + remote.run( + args=[ + 'sudo', + 'umount', + mnt, + ], + ) + for id_, remote in clients: + while True: + try: + remote.run( + args=[ + 'rmdir', '--', mnt, + run.Raw('2>&1'), + run.Raw('|'), + 'grep', 'Device or resource busy', + ], + ) + import time + time.sleep(1) + except Exception: + break diff --git a/qa/tasks/cram.py b/qa/tasks/cram.py new file mode 100644 index 000000000..a445a146f --- /dev/null +++ b/qa/tasks/cram.py @@ -0,0 +1,160 @@ +""" +Cram tests +""" +import logging +import os + +from tasks.util.workunit import get_refspec_after_overrides + +from teuthology import misc as teuthology +from teuthology.parallel import parallel +from teuthology.orchestra import run +from teuthology.config import config as teuth_config + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run all cram tests from the specified paths on the specified + clients. Each client runs tests in parallel as default, and + you can also disable it by adding "parallel: False" option. + + Limitations: + Tests must have a .t suffix. Tests with duplicate names will + overwrite each other, so only the last one will run. + + For example:: + + tasks: + - ceph: + - cram: + clients: + client.0: + - qa/test.t + - qa/test2.t] + client.1: [qa/test.t] + branch: foo + parallel: False + + You can also run a list of cram tests on all clients:: + + tasks: + - ceph: + - cram: + clients: + all: [qa/test.t] + + :param ctx: Context + :param config: Configuration + """ + assert isinstance(config, dict) + assert 'clients' in config and isinstance(config['clients'], dict), \ + 'configuration must contain a dictionary of clients' + + clients = teuthology.replace_all_with_clients(ctx.cluster, + config['clients']) + testdir = teuthology.get_testdir(ctx) + + overrides = ctx.config.get('overrides', {}) + refspec = get_refspec_after_overrides(config, overrides) + + _parallel = config.get('parallel', True) + + git_url = teuth_config.get_ceph_qa_suite_git_url() + log.info('Pulling tests from %s ref %s', git_url, refspec) + + try: + for client, tests in clients.items(): + (remote,) = (ctx.cluster.only(client).remotes.keys()) + client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client) + remote.run( + args=[ + 'mkdir', '--', client_dir, + run.Raw('&&'), + 'python3', '-m', 'venv', '{tdir}/virtualenv'.format(tdir=testdir), + run.Raw('&&'), + '{tdir}/virtualenv/bin/pip'.format(tdir=testdir), + 'install', 'cram==0.6', + ], + ) + clone_dir = '{tdir}/clone.{role}'.format(tdir=testdir, role=client) + remote.run(args=refspec.clone(git_url, clone_dir)) + + for test in tests: + assert test.endswith('.t'), 'tests must end in .t' + remote.run( + args=[ + 'cp', '--', os.path.join(clone_dir, test), client_dir, + ], + ) + + if _parallel: + with parallel() as p: + for role in clients.keys(): + p.spawn(_run_tests, ctx, role) + else: + for role in clients.keys(): + _run_tests(ctx, role) + finally: + for client, tests in clients.items(): + (remote,) = (ctx.cluster.only(client).remotes.keys()) + client_dir = '{tdir}/archive/cram.{role}'.format(tdir=testdir, role=client) + test_files = set([test.rsplit('/', 1)[1] for test in tests]) + + # remove test files unless they failed + for test_file in test_files: + abs_file = os.path.join(client_dir, test_file) + remote.run( + args=[ + 'test', '-f', abs_file + '.err', + run.Raw('||'), + 'rm', '-f', '--', abs_file, + ], + ) + + # ignore failure since more than one client may + # be run on a host, and the client dir should be + # non-empty if the test failed + clone_dir = '{tdir}/clone.{role}'.format(tdir=testdir, role=client) + remote.run( + args=[ + 'rm', '-rf', '--', + '{tdir}/virtualenv'.format(tdir=testdir), + clone_dir, + run.Raw(';'), + 'rmdir', '--ignore-fail-on-non-empty', client_dir, + ], + ) + +def _run_tests(ctx, role): + """ + For each role, check to make sure it's a client, then run the cram on that client + + :param ctx: Context + :param role: Roles + """ + assert isinstance(role, str) + PREFIX = 'client.' + if role.startswith(PREFIX): + id_ = role[len(PREFIX):] + else: + id_ = role + (remote,) = (ctx.cluster.only(role).remotes.keys()) + ceph_ref = ctx.summary.get('ceph-sha1', 'master') + + testdir = teuthology.get_testdir(ctx) + log.info('Running tests for %s...', role) + remote.run( + args=[ + run.Raw('CEPH_REF={ref}'.format(ref=ceph_ref)), + run.Raw('CEPH_ID="{id}"'.format(id=id_)), + run.Raw('PATH=$PATH:/usr/sbin'), + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + '{tdir}/virtualenv/bin/cram'.format(tdir=testdir), + '-v', '--', + run.Raw('{tdir}/archive/cram.{role}/*.t'.format(tdir=testdir, role=role)), + ], + logger=log.getChild(role), + ) diff --git a/qa/tasks/create_verify_lfn_objects.py b/qa/tasks/create_verify_lfn_objects.py new file mode 100644 index 000000000..532541581 --- /dev/null +++ b/qa/tasks/create_verify_lfn_objects.py @@ -0,0 +1,83 @@ +""" +Rados modle-based integration tests +""" +import contextlib +import logging + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + For each combination of namespace and name_length, create + <num_objects> objects with name length <name_length> + on entry. On exit, verify that the objects still exist, can + be deleted, and then don't exist. + + Usage:: + + create_verify_lfn_objects.py: + pool: <pool_name> default: 'data' + prefix: <prefix> default: '' + namespace: [<namespace>] default: [''] + num_objects: [<num_objects>] default: 10 + name_length: [<name_length>] default: [400] + """ + pool = config.get('pool', 'data') + num_objects = config.get('num_objects', 10) + name_length = config.get('name_length', [400]) + namespace = config.get('namespace', [None]) + prefix = config.get('prefix', None) + manager = ctx.managers['ceph'] + + objects = [] + for l in name_length: + for ns in namespace: + def object_name(i): + nslength = 0 + if namespace != '': + nslength = len(namespace) + numstr = str(i) + fillerlen = l - nslength - len(prefix) - len(numstr) + assert fillerlen >= 0 + return prefix + ('a'*fillerlen) + numstr + objects += [(ns, object_name(i)) for i in range(num_objects)] + + for ns, name in objects: + err = manager.do_put( + pool, + name, + '/etc/resolv.conf', + namespace=ns) + log.info("err is " + str(err)) + assert err == 0 + + try: + yield + finally: + log.info('ceph_verify_lfn_objects verifying...') + for ns, name in objects: + err = manager.do_get( + pool, + name, + namespace=ns) + log.info("err is " + str(err)) + assert err == 0 + + log.info('ceph_verify_lfn_objects deleting...') + for ns, name in objects: + err = manager.do_rm( + pool, + name, + namespace=ns) + log.info("err is " + str(err)) + assert err == 0 + + log.info('ceph_verify_lfn_objects verifying absent...') + for ns, name in objects: + err = manager.do_get( + pool, + name, + namespace=ns) + log.info("err is " + str(err)) + assert err != 0 diff --git a/qa/tasks/daemonwatchdog.py b/qa/tasks/daemonwatchdog.py new file mode 100644 index 000000000..c8fa9f3c2 --- /dev/null +++ b/qa/tasks/daemonwatchdog.py @@ -0,0 +1,128 @@ +import logging +import signal +import time + +from gevent import sleep +from gevent.greenlet import Greenlet +from gevent.event import Event + +log = logging.getLogger(__name__) + +class DaemonWatchdog(Greenlet): + """ + DaemonWatchdog:: + + Watch Ceph daemons for failures. If an extended failure is detected (i.e. + not intentional), then the watchdog will unmount file systems and send + SIGTERM to all daemons. The duration of an extended failure is configurable + with watchdog_daemon_timeout. + + ceph: + watchdog: + daemon_restart [default: no]: restart daemon if "normal" exit (status==0). + + daemon_timeout [default: 300]: number of seconds a daemon + is allowed to be failed before the + watchdog will bark. + """ + + def __init__(self, ctx, config, thrashers): + super(DaemonWatchdog, self).__init__() + self.config = ctx.config.get('watchdog', {}) + self.ctx = ctx + self.e = None + self.logger = log.getChild('daemon_watchdog') + self.cluster = config.get('cluster', 'ceph') + self.name = 'watchdog' + self.stopping = Event() + self.thrashers = thrashers + + def _run(self): + try: + self.watch() + except Exception as e: + # See _run exception comment for MDSThrasher + self.e = e + self.logger.exception("exception:") + # allow successful completion so gevent doesn't see an exception... + + def log(self, x): + """Write data to logger""" + self.logger.info(x) + + def stop(self): + self.stopping.set() + + def bark(self): + self.log("BARK! unmounting mounts and killing all daemons") + if hasattr(self.ctx, 'mounts'): + for mount in self.ctx.mounts.values(): + try: + mount.umount_wait(force=True) + except: + self.logger.exception("ignoring exception:") + daemons = [] + daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('osd', cluster=self.cluster))) + daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.cluster))) + daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.cluster))) + daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('rgw', cluster=self.cluster))) + daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mgr', cluster=self.cluster))) + + for daemon in daemons: + try: + daemon.signal(signal.SIGTERM) + except: + self.logger.exception("ignoring exception:") + + def watch(self): + self.log("watchdog starting") + daemon_timeout = int(self.config.get('daemon_timeout', 300)) + daemon_restart = self.config.get('daemon_restart', False) + daemon_failure_time = {} + while not self.stopping.is_set(): + bark = False + now = time.time() + + osds = self.ctx.daemons.iter_daemons_of_role('osd', cluster=self.cluster) + mons = self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.cluster) + mdss = self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.cluster) + rgws = self.ctx.daemons.iter_daemons_of_role('rgw', cluster=self.cluster) + mgrs = self.ctx.daemons.iter_daemons_of_role('mgr', cluster=self.cluster) + + daemon_failures = [] + daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, osds)) + daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mons)) + daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mdss)) + daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, rgws)) + daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mgrs)) + + for daemon in daemon_failures: + name = daemon.role + '.' + daemon.id_ + dt = daemon_failure_time.setdefault(name, (daemon, now)) + assert dt[0] is daemon + delta = now-dt[1] + self.log("daemon {name} is failed for ~{t:.0f}s".format(name=name, t=delta)) + if delta > daemon_timeout: + bark = True + if daemon_restart == 'normal' and daemon.proc.exitstatus == 0: + self.log(f"attempting to restart daemon {name}") + daemon.restart() + + # If a daemon is no longer failed, remove it from tracking: + for name in list(daemon_failure_time.keys()): + if name not in [d.role + '.' + d.id_ for d in daemon_failures]: + self.log("daemon {name} has been restored".format(name=name)) + del daemon_failure_time[name] + + for thrasher in self.thrashers: + if thrasher.exception is not None: + self.log("{name} failed".format(name=thrasher.name)) + bark = True + + if bark: + self.bark() + return + + sleep(5) + + self.log("watchdog finished") diff --git a/qa/tasks/deduplication.py b/qa/tasks/deduplication.py new file mode 100644 index 000000000..d4cdfbf57 --- /dev/null +++ b/qa/tasks/deduplication.py @@ -0,0 +1,220 @@ +""" +Run ceph-dedup-tool +""" +import contextlib +import logging +import gevent +from teuthology import misc as teuthology +import json +import time +from io import StringIO + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run ceph-dedup-tool. + The config should be as follows:: + ceph-dedup-tool: + clients: [client list] + op: <operation name> + pool: <pool name> + chunk_pool: <chunk pool name> + chunk_size: <chunk size> + chunk_algorithm: <chunk algorithm, fixed|fastcdc> + fingerprint_algorithm: <fingerprint algorithm, sha1|sha256|sha512> + chunk_dedup_threashold: <the number of duplicate chunks to trigger chunk dedup> + max_thread: <the number of threads> + wakeup_period: <duration> + For example:: + tasks: + - exec: + client.0: + - sudo ceph osd pool create low_tier 4 + - deduplication: + clients: [client.0] + op: 'sample-dedup' + pool: 'default.rgw.buckets.data' + chunk_pool: 'low_tier' + chunk_size: 131072 + chunk_algorithm: 'fastcdc' + fingerprint_algorithm: 'sha1' + chunk_dedup_threshold: 5 + max_thread: 2 + wakeup_period: 20 + sampling_ratio: 100 + """ + log.info('Beginning deduplication...') + assert isinstance(config, dict), \ + "please list clients to run on" + + args = [ + 'ceph-dedup-tool'] + if config.get('op', None): + args.extend(['--op', config.get('op', None)]) + if config.get('chunk_pool', None): + args.extend(['--chunk-pool', config.get('chunk_pool', None)]) + if config.get('chunk_size', False): + args.extend(['--chunk-size', str(config.get('chunk_size', 131072))]) + if config.get('chunk_algorithm', False): + args.extend(['--chunk-algorithm', config.get('chunk_algorithm', None)] ) + if config.get('fingerprint_algorithm', False): + args.extend(['--fingerprint-algorithm', config.get('fingerprint_algorithm', None)] ) + if config.get('chunk_dedup_threshold', False): + args.extend(['--chunk-dedup-threshold', str(config.get('chunk_dedup_threshold', 1))]) + if config.get('max_thread', False): + args.extend(['--max-thread', str(config.get('max_thread', 2))]) + if config.get('sampling_ratio', False): + args.extend(['--sampling-ratio', str(config.get('sampling_ratio', 100))]) + if config.get('wakeup_period', False): + args.extend(['--wakeup-period', str(config.get('wakeup_period', 20))]) + if config.get('pool', False): + args.extend(['--pool', config.get('pool', None)]) + + args.extend([ + '--debug', + '--daemon', + '--loop']) + + def thread(): + run_remote(args, False, 0) + + def run_remote(args, need_wait, client_num): + clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + log.info('clients are %s' % clients) + role = 'client.{id}'.format(id=client_num) + if role not in clients: + raise Exception('wrong client {c}'.format(c=role)) + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + testdir = teuthology.get_testdir(ctx) + cmd_args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir)] + cmd_args.extend(args) + log.info("cmd: %s", cmd_args) + tries = 0 + while True: + (remote,) = ctx.cluster.only(role).remotes.keys() + proc = remote.run( + args=cmd_args, + wait=need_wait, check_status=False, + stdout=StringIO(), + ) + log.info('exitstatus {r}'.format(r=proc.exitstatus)) + if proc.exitstatus == 0 or need_wait == False: + log.info('proc stdout ', proc.stdout.getvalue()) + return proc.stdout.getvalue().strip() + tries += 1 + if tries > 30: + raise Exception('timed out getting correct exitstatus') + time.sleep(30) + + def get_chunk_objs(chunk_pool): + chunk_obj_list = run_remote(('rados ls -p ' + chunk_pool).split(), True, 1).split() + if chunk_obj_list == False: + return None + else: + return chunk_obj_list + + def get_ref_list(chunk_pool, chunk_obj): + # get reference list of chunk object + dump_str = run_remote( + ('ceph-dedup-tool --op dump-chunk-refs --chunk-pool ' + + chunk_pool + ' --object ' + chunk_obj).split(), + True, 1 + ) + # fail in case that reference object is not written + assert len(dump_str) > 0 + log.info('{0} obj has {1} refs' + .format(chunk_obj, json.loads(dump_str)['count'])) + + # check if chunk object's reference object exists in base-tier + ref_list = json.loads(dump_str)['refs'] + return ref_list + + # To validate whether the sample-dedup operation works well, this function checks if + # 1. sample-dedup has been started and + # 2. reference of chunk objects' exists in correct base pool + def validate(): + log.info('start validating sample-dedup') + base_pool = config.get('pool', None) + chunk_pool = config.get('chunk_pool', None) + max_validation_cnt = 15 + retry_cnt = 0 + # chunk objs for re-validation after chunk-repair + retry_chunk_objs = list() + + # check whether sample-dedup has been started + chunk_obj_list = get_chunk_objs(chunk_pool) + while (chunk_obj_list == None or len(chunk_obj_list) == 0) and retry_cnt < max_validation_cnt: + # retry getting # chunk objs after 30 secs of sleep + time.sleep(30) + chunk_obj_list = get_chunk_objs(chunk_pool) + retry_cnt += 1 + log.info('chunk pool empty. retry ', retry_cnt) + assert retry_cnt < max_validation_cnt + + log.info('sample-dedup started successfully') + + retry_cnt = 0 + max_validation_cnt = 5 + # validate chunk pool for max_validation_cnt times + while retry_cnt < max_validation_cnt: + for chunk_obj in chunk_obj_list: + ref_list = get_ref_list(chunk_pool, chunk_obj) + for ref in ref_list: + ret = run_remote( + ('rados -p ' + base_pool + ' stat ' + ref['oid']) + .split(), True, 1 + ) + # check if ref exists in base pool + if ret == False or len(ret) == 0: + # if ref not exists in base pool, try repair in order to avoid + # false-positive inconsistent reference + ret = run_remote(('ceph osd pool stats ' + base_pool).split(), True, 1) + assert len(ret) > 0 + base_pool_id = ret.split()[3] + ret = run_remote( + ('ceph-dedup-tool --op chunk-repair --chunk-pool ' + + chunk_pool + ' --object ' + chunk_obj + ' --target-ref ' + + ref['oid'] + ' --target-ref-pool-id ' + base_pool_id) + .split(), True, 1 + ) + retry_chunk_objs.append(chunk_obj) + log.info('{0} obj exists in {1}'.format(ref['oid'], base_pool)) + + # retry validation for repaired objects + for chunk_obj in retry_chunk_objs: + ref_list = get_ref_list(chunk_pool, chunk_obj) + for ref in ref_list: + ret = run_remote( + ('rados -p ' + base_pool + ' stat ' + ref['oid']) + .split(), True, 1 + ) + assert len(ret) > 0 + log.info( + '{0} obj exists in {1} after repair'.format(ref['oid'], + base_pool) + ) + retry_chunk_objs = list() + + # get chunk objects for the next loop + chunk_obj_list = get_chunk_objs(chunk_pool) + retry_cnt += 1 + time.sleep(30) + return True + + + running = gevent.spawn(thread) + checker = gevent.spawn(validate) + + try: + yield + finally: + log.info('joining ceph-dedup-tool') + running.get() + checker.get() diff --git a/qa/tasks/devstack.py b/qa/tasks/devstack.py new file mode 100644 index 000000000..2499e9e53 --- /dev/null +++ b/qa/tasks/devstack.py @@ -0,0 +1,371 @@ +#!/usr/bin/env python +import contextlib +import logging +import textwrap +import time +from configparser import ConfigParser +from io import BytesIO, StringIO + +from teuthology.orchestra import run +from teuthology import misc +from teuthology.contextutil import nested + +log = logging.getLogger(__name__) + +DEVSTACK_GIT_REPO = 'https://github.com/openstack-dev/devstack.git' +DS_STABLE_BRANCHES = ("havana", "grizzly") + +is_devstack_node = lambda role: role.startswith('devstack') +is_osd_node = lambda role: role.startswith('osd') + + +@contextlib.contextmanager +def task(ctx, config): + if config is None: + config = {} + if not isinstance(config, dict): + raise TypeError("config must be a dict") + with nested(lambda: install(ctx=ctx, config=config), + lambda: smoke(ctx=ctx, config=config), + ): + yield + + +@contextlib.contextmanager +def install(ctx, config): + """ + Install OpenStack DevStack and configure it to use a Ceph cluster for + Glance and Cinder. + + Requires one node with a role 'devstack' + + Since devstack runs rampant on the system it's used on, typically you will + want to reprovision that machine after using devstack on it. + + Also, the default 2GB of RAM that is given to vps nodes is insufficient. I + recommend 4GB. Downburst can be instructed to give 4GB to a vps node by + adding this to the yaml: + + downburst: + ram: 4G + + This was created using documentation found here: + https://github.com/openstack-dev/devstack/blob/master/README.md + http://docs.ceph.com/en/latest/rbd/rbd-openstack/ + """ + if config is None: + config = {} + if not isinstance(config, dict): + raise TypeError("config must be a dict") + + devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys())) + an_osd_node = next(iter(ctx.cluster.only(is_osd_node).remotes.keys())) + + devstack_branch = config.get("branch", "master") + install_devstack(devstack_node, devstack_branch) + try: + configure_devstack_and_ceph(ctx, config, devstack_node, an_osd_node) + yield + finally: + pass + + +def install_devstack(devstack_node, branch="master"): + log.info("Cloning DevStack repo...") + + args = ['git', 'clone', DEVSTACK_GIT_REPO] + devstack_node.run(args=args) + + if branch != "master": + if branch in DS_STABLE_BRANCHES and not branch.startswith("stable"): + branch = "stable/" + branch + log.info("Checking out {branch} branch...".format(branch=branch)) + cmd = "cd devstack && git checkout " + branch + devstack_node.run(args=cmd) + + log.info("Installing DevStack...") + args = ['cd', 'devstack', run.Raw('&&'), './stack.sh'] + devstack_node.run(args=args) + + +def configure_devstack_and_ceph(ctx, config, devstack_node, ceph_node): + pool_size = config.get('pool_size', '128') + create_pools(ceph_node, pool_size) + distribute_ceph_conf(devstack_node, ceph_node) + # This is where we would install python-ceph and ceph-common but it appears + # the ceph task does that for us. + generate_ceph_keys(ceph_node) + distribute_ceph_keys(devstack_node, ceph_node) + secret_uuid = set_libvirt_secret(devstack_node, ceph_node) + update_devstack_config_files(devstack_node, secret_uuid) + set_apache_servername(devstack_node) + # Rebooting is the most-often-used method of restarting devstack services + misc.reboot(devstack_node) + start_devstack(devstack_node) + restart_apache(devstack_node) + + +def create_pools(ceph_node, pool_size): + log.info("Creating pools on Ceph cluster...") + + for pool_name in ['volumes', 'images', 'backups']: + args = ['sudo', 'ceph', 'osd', 'pool', 'create', pool_name, pool_size] + ceph_node.run(args=args) + + +def distribute_ceph_conf(devstack_node, ceph_node): + log.info("Copying ceph.conf to DevStack node...") + + ceph_conf_path = '/etc/ceph/ceph.conf' + ceph_conf = ceph_node.read_file(ceph_conf_path, sudo=True) + devstack_node.write_file(ceph_conf_path, ceph_conf, sudo=True) + + +def generate_ceph_keys(ceph_node): + log.info("Generating Ceph keys...") + + ceph_auth_cmds = [ + ['sudo', 'ceph', 'auth', 'get-or-create', 'client.cinder', 'mon', + 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rx pool=images'], # noqa + ['sudo', 'ceph', 'auth', 'get-or-create', 'client.glance', 'mon', + 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=images'], # noqa + ['sudo', 'ceph', 'auth', 'get-or-create', 'client.cinder-backup', 'mon', + 'allow r', 'osd', 'allow class-read object_prefix rbd_children, allow rwx pool=backups'], # noqa + ] + for cmd in ceph_auth_cmds: + ceph_node.run(args=cmd) + + +def distribute_ceph_keys(devstack_node, ceph_node): + log.info("Copying Ceph keys to DevStack node...") + + def copy_key(from_remote, key_name, to_remote, dest_path, owner): + key_stringio = BytesIO() + from_remote.run( + args=['sudo', 'ceph', 'auth', 'get-or-create', key_name], + stdout=key_stringio) + key_stringio.seek(0) + to_remote.write_file(dest_path, key_stringio, owner=owner, sudo=True) + keys = [ + dict(name='client.glance', + path='/etc/ceph/ceph.client.glance.keyring', + # devstack appears to just want root:root + #owner='glance:glance', + ), + dict(name='client.cinder', + path='/etc/ceph/ceph.client.cinder.keyring', + # devstack appears to just want root:root + #owner='cinder:cinder', + ), + dict(name='client.cinder-backup', + path='/etc/ceph/ceph.client.cinder-backup.keyring', + # devstack appears to just want root:root + #owner='cinder:cinder', + ), + ] + for key_dict in keys: + copy_key(ceph_node, key_dict['name'], devstack_node, + key_dict['path'], key_dict.get('owner')) + + +def set_libvirt_secret(devstack_node, ceph_node): + log.info("Setting libvirt secret...") + + cinder_key = ceph_node.sh('sudo ceph auth get-key client.cinder').strip() + uuid = devstack_node.sh('uuidgen').strip() + + secret_path = '/tmp/secret.xml' + secret_template = textwrap.dedent(""" + <secret ephemeral='no' private='no'> + <uuid>{uuid}</uuid> + <usage type='ceph'> + <name>client.cinder secret</name> + </usage> + </secret>""") + secret_data = secret_template.format(uuid=uuid) + devstack_node.write_file(secret_path, secret_data) + devstack_node.run(args=['sudo', 'virsh', 'secret-define', '--file', + secret_path]) + devstack_node.run(args=['sudo', 'virsh', 'secret-set-value', '--secret', + uuid, '--base64', cinder_key]) + return uuid + + +def update_devstack_config_files(devstack_node, secret_uuid): + log.info("Updating DevStack config files to use Ceph...") + + def backup_config(node, file_name, backup_ext='.orig.teuth'): + node.run(args=['cp', '-f', file_name, file_name + backup_ext]) + + def update_config(config_name, config_stream, update_dict, + section='DEFAULT'): + parser = ConfigParser() + parser.read_file(config_stream) + for (key, value) in update_dict.items(): + parser.set(section, key, value) + out_stream = StringIO() + parser.write(out_stream) + out_stream.seek(0) + return out_stream + + updates = [ + dict(name='/etc/glance/glance-api.conf', options=dict( + default_store='rbd', + rbd_store_user='glance', + rbd_store_pool='images', + show_image_direct_url='True',)), + dict(name='/etc/cinder/cinder.conf', options=dict( + volume_driver='cinder.volume.drivers.rbd.RBDDriver', + rbd_pool='volumes', + rbd_ceph_conf='/etc/ceph/ceph.conf', + rbd_flatten_volume_from_snapshot='false', + rbd_max_clone_depth='5', + glance_api_version='2', + rbd_user='cinder', + rbd_secret_uuid=secret_uuid, + backup_driver='cinder.backup.drivers.ceph', + backup_ceph_conf='/etc/ceph/ceph.conf', + backup_ceph_user='cinder-backup', + backup_ceph_chunk_size='134217728', + backup_ceph_pool='backups', + backup_ceph_stripe_unit='0', + backup_ceph_stripe_count='0', + restore_discard_excess_bytes='true', + )), + dict(name='/etc/nova/nova.conf', options=dict( + libvirt_images_type='rbd', + libvirt_images_rbd_pool='volumes', + libvirt_images_rbd_ceph_conf='/etc/ceph/ceph.conf', + rbd_user='cinder', + rbd_secret_uuid=secret_uuid, + libvirt_inject_password='false', + libvirt_inject_key='false', + libvirt_inject_partition='-2', + )), + ] + + for update in updates: + file_name = update['name'] + options = update['options'] + config_data = devstack_node.read_file(file_name, sudo=True) + config_stream = StringIO(config_data) + backup_config(devstack_node, file_name) + new_config_stream = update_config(file_name, config_stream, options) + devstack_node.write_file(file_name, new_config_stream, sudo=True) + + +def set_apache_servername(node): + # Apache complains: "Could not reliably determine the server's fully + # qualified domain name, using 127.0.0.1 for ServerName" + # So, let's make sure it knows its name. + log.info("Setting Apache ServerName...") + + hostname = node.hostname + config_file = '/etc/apache2/conf.d/servername' + config_data = "ServerName {name}".format(name=hostname) + node.write_file(config_file, config_data, sudo=True) + + +def start_devstack(devstack_node): + log.info("Patching devstack start script...") + # This causes screen to start headless - otherwise rejoin-stack.sh fails + # because there is no terminal attached. + cmd = "cd devstack && sed -ie 's/screen -c/screen -dm -c/' rejoin-stack.sh" + devstack_node.run(args=cmd) + + log.info("Starting devstack...") + cmd = "cd devstack && ./rejoin-stack.sh" + devstack_node.run(args=cmd) + + # This was added because I was getting timeouts on Cinder requests - which + # were trying to access Keystone on port 5000. A more robust way to handle + # this would be to introduce a wait-loop on devstack_node that checks to + # see if a service is listening on port 5000. + log.info("Waiting 30s for devstack to start...") + time.sleep(30) + + +def restart_apache(node): + node.run(args=['sudo', '/etc/init.d/apache2', 'restart'], wait=True) + + +@contextlib.contextmanager +def exercise(ctx, config): + log.info("Running devstack exercises...") + + if config is None: + config = {} + if not isinstance(config, dict): + raise TypeError("config must be a dict") + + devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys())) + + # TODO: save the log *and* preserve failures + #devstack_archive_dir = create_devstack_archive(ctx, devstack_node) + + try: + #cmd = "cd devstack && ./exercise.sh 2>&1 | tee {dir}/exercise.log".format( # noqa + # dir=devstack_archive_dir) + cmd = "cd devstack && ./exercise.sh" + devstack_node.run(args=cmd, wait=True) + yield + finally: + pass + + +def create_devstack_archive(ctx, devstack_node): + test_dir = misc.get_testdir(ctx) + devstack_archive_dir = "{test_dir}/archive/devstack".format( + test_dir=test_dir) + devstack_node.run(args="mkdir -p " + devstack_archive_dir) + return devstack_archive_dir + + +@contextlib.contextmanager +def smoke(ctx, config): + log.info("Running a basic smoketest...") + + devstack_node = next(iter(ctx.cluster.only(is_devstack_node).remotes.keys())) + an_osd_node = next(iter(ctx.cluster.only(is_osd_node).remotes.keys())) + + try: + create_volume(devstack_node, an_osd_node, 'smoke0', 1) + yield + finally: + pass + + +def create_volume(devstack_node, ceph_node, vol_name, size): + """ + :param size: The size of the volume, in GB + """ + size = str(size) + log.info("Creating a {size}GB volume named {name}...".format( + name=vol_name, + size=size)) + args = ['source', 'devstack/openrc', run.Raw('&&'), 'cinder', 'create', + '--display-name', vol_name, size] + cinder_create = devstack_node.sh(args, wait=True) + vol_info = parse_os_table(cinder_create) + log.debug("Volume info: %s", str(vol_info)) + + try: + rbd_output = ceph_node.sh("rbd --id cinder ls -l volumes", wait=True) + except run.CommandFailedError: + log.debug("Original rbd call failed; retrying without '--id cinder'") + rbd_output = ceph_node.sh("rbd ls -l volumes", wait=True) + + assert vol_info['id'] in rbd_output, \ + "Volume not found on Ceph cluster" + assert vol_info['size'] == size, \ + "Volume size on Ceph cluster is different than specified" + return vol_info['id'] + + +def parse_os_table(table_str): + out_dict = dict() + for line in table_str.split('\n'): + if line.startswith('|'): + items = line.split() + out_dict[items[1]] = items[3] + return out_dict diff --git a/qa/tasks/die_on_err.py b/qa/tasks/die_on_err.py new file mode 100644 index 000000000..a6aa4c632 --- /dev/null +++ b/qa/tasks/die_on_err.py @@ -0,0 +1,70 @@ +""" +Raise exceptions on osd coredumps or test err directories +""" +import contextlib +import logging +import time +from teuthology.orchestra import run + +from tasks import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Die if {testdir}/err exists or if an OSD dumps core + """ + if config is None: + config = {} + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < num_osds: + time.sleep(10) + + testdir = teuthology.get_testdir(ctx) + + while True: + for i in range(num_osds): + (osd_remote,) = ctx.cluster.only('osd.%d' % i).remotes.keys() + p = osd_remote.run( + args = [ 'test', '-e', '{tdir}/err'.format(tdir=testdir) ], + wait=True, + check_status=False, + ) + exit_status = p.exitstatus + + if exit_status == 0: + log.info("osd %d has an error" % i) + raise Exception("osd %d error" % i) + + log_path = '/var/log/ceph/osd.%d.log' % (i) + + p = osd_remote.run( + args = [ + 'tail', '-1', log_path, + run.Raw('|'), + 'grep', '-q', 'end dump' + ], + wait=True, + check_status=False, + ) + exit_status = p.exitstatus + + if exit_status == 0: + log.info("osd %d dumped core" % i) + raise Exception("osd %d dumped core" % i) + + time.sleep(5) diff --git a/qa/tasks/divergent_priors.py b/qa/tasks/divergent_priors.py new file mode 100644 index 000000000..e000bb2bb --- /dev/null +++ b/qa/tasks/divergent_priors.py @@ -0,0 +1,160 @@ +""" +Special case divergence test +""" +import logging +import time + +from teuthology import misc as teuthology +from tasks.util.rados import rados + + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Test handling of divergent entries with prior_version + prior to log_tail + + overrides: + ceph: + conf: + osd: + debug osd: 5 + + Requires 3 osds on a single test node. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'divergent_priors task only accepts a dict for configuration' + + manager = ctx.managers['ceph'] + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'noin') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + + # create 1 pg pool + log.info('creating foo') + manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') + + osds = [0, 1, 2] + for i in osds: + manager.set_config(i, osd_min_pg_log_entries=10) + manager.set_config(i, osd_max_pg_log_entries=10) + manager.set_config(i, osd_pg_log_trim_min=5) + + # determine primary + divergent = manager.get_pg_primary('foo', 0) + log.info("primary and soon to be divergent is %d", divergent) + non_divergent = list(osds) + non_divergent.remove(divergent) + + log.info('writing initial objects') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + # write 100 objects + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) + + manager.wait_for_clean() + + # blackhole non_divergent + log.info("blackholing osds %s", str(non_divergent)) + for i in non_divergent: + manager.set_config(i, objectstore_blackhole=1) + + DIVERGENT_WRITE = 5 + DIVERGENT_REMOVE = 5 + # Write some soon to be divergent + log.info('writing divergent objects') + for i in range(DIVERGENT_WRITE): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, + dummyfile2], wait=False) + # Remove some soon to be divergent + log.info('remove divergent objects') + for i in range(DIVERGENT_REMOVE): + rados(ctx, mon, ['-p', 'foo', 'rm', + 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) + time.sleep(10) + mon.run( + args=['killall', '-9', 'rados'], + wait=True, + check_status=False) + + # kill all the osds but leave divergent in + log.info('killing all the osds') + for i in osds: + manager.kill_osd(i) + for i in osds: + manager.mark_down_osd(i) + for i in non_divergent: + manager.mark_out_osd(i) + + # bring up non-divergent + log.info("bringing up non_divergent %s", str(non_divergent)) + for i in non_divergent: + manager.revive_osd(i) + for i in non_divergent: + manager.mark_in_osd(i) + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + log.info('writing non-divergent object ' + objname) + rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) + + manager.wait_for_recovery() + + # ensure no recovery of up osds first + log.info('delay recovery') + for i in non_divergent: + manager.wait_run_admin_socket( + 'osd', i, ['set_recovery_delay', '100000']) + + # bring in our divergent friend + log.info("revive divergent %d", divergent) + manager.raw_cluster_cmd('osd', 'set', 'noup') + manager.revive_osd(divergent) + + log.info('delay recovery divergent') + manager.wait_run_admin_socket( + 'osd', divergent, ['set_recovery_delay', '100000']) + + manager.raw_cluster_cmd('osd', 'unset', 'noup') + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + # At this point the divergent_priors should have been detected + + log.info("killing divergent %d", divergent) + manager.kill_osd(divergent) + log.info("reviving divergent %d", divergent) + manager.revive_osd(divergent) + + time.sleep(20) + + log.info('allowing recovery') + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in osds: + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', + 'kick_recovery_wq', ' 0') + + log.info('reading divergent objects') + for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): + exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, + '/tmp/existing']) + assert exit_status == 0 + + log.info("success") diff --git a/qa/tasks/divergent_priors2.py b/qa/tasks/divergent_priors2.py new file mode 100644 index 000000000..4d4b07fc4 --- /dev/null +++ b/qa/tasks/divergent_priors2.py @@ -0,0 +1,192 @@ +""" +Special case divergence test with ceph-objectstore-tool export/remove/import +""" +import logging +import time + +from teuthology.exceptions import CommandFailedError +from teuthology import misc as teuthology +from tasks.util.rados import rados +import os + + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Test handling of divergent entries with prior_version + prior to log_tail and a ceph-objectstore-tool export/import + + overrides: + ceph: + conf: + osd: + debug osd: 5 + + Requires 3 osds on a single test node. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'divergent_priors task only accepts a dict for configuration' + + manager = ctx.managers['ceph'] + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'noin') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + testdir = teuthology.get_testdir(ctx) + + # create 1 pg pool + log.info('creating foo') + manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') + + osds = [0, 1, 2] + for i in osds: + manager.set_config(i, osd_min_pg_log_entries=10) + manager.set_config(i, osd_max_pg_log_entries=10) + manager.set_config(i, osd_pg_log_trim_min=5) + + # determine primary + divergent = manager.get_pg_primary('foo', 0) + log.info("primary and soon to be divergent is %d", divergent) + non_divergent = list(osds) + non_divergent.remove(divergent) + + log.info('writing initial objects') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + # write 100 objects + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) + + manager.wait_for_clean() + + # blackhole non_divergent + log.info("blackholing osds %s", str(non_divergent)) + for i in non_divergent: + manager.set_config(i, objectstore_blackhole=1) + + DIVERGENT_WRITE = 5 + DIVERGENT_REMOVE = 5 + # Write some soon to be divergent + log.info('writing divergent objects') + for i in range(DIVERGENT_WRITE): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, + dummyfile2], wait=False) + # Remove some soon to be divergent + log.info('remove divergent objects') + for i in range(DIVERGENT_REMOVE): + rados(ctx, mon, ['-p', 'foo', 'rm', + 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) + time.sleep(10) + mon.run( + args=['killall', '-9', 'rados'], + wait=True, + check_status=False) + + # kill all the osds but leave divergent in + log.info('killing all the osds') + for i in osds: + manager.kill_osd(i) + for i in osds: + manager.mark_down_osd(i) + for i in non_divergent: + manager.mark_out_osd(i) + + # bring up non-divergent + log.info("bringing up non_divergent %s", str(non_divergent)) + for i in non_divergent: + manager.revive_osd(i) + for i in non_divergent: + manager.mark_in_osd(i) + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + log.info('writing non-divergent object ' + objname) + rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) + + manager.wait_for_recovery() + + # ensure no recovery of up osds first + log.info('delay recovery') + for i in non_divergent: + manager.wait_run_admin_socket( + 'osd', i, ['set_recovery_delay', '100000']) + + # bring in our divergent friend + log.info("revive divergent %d", divergent) + manager.raw_cluster_cmd('osd', 'set', 'noup') + manager.revive_osd(divergent) + + log.info('delay recovery divergent') + manager.wait_run_admin_socket( + 'osd', divergent, ['set_recovery_delay', '100000']) + + manager.raw_cluster_cmd('osd', 'unset', 'noup') + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + # At this point the divergent_priors should have been detected + + log.info("killing divergent %d", divergent) + manager.kill_osd(divergent) + + # Export a pg + (exp_remote,) = ctx.\ + cluster.only('osd.{o}'.format(o=divergent)).remotes.keys() + FSPATH = manager.get_filepath() + JPATH = os.path.join(FSPATH, "journal") + prefix = ("sudo adjust-ulimits ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--log-file=" + "/var/log/ceph/objectstore_tool.$$.log ". + format(fpath=FSPATH, jpath=JPATH)) + pid = os.getpid() + expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) + cmd = ((prefix + "--op export-remove --pgid 2.0 --file {file}"). + format(id=divergent, file=expfile)) + try: + exp_remote.sh(cmd, wait=True) + except CommandFailedError as e: + assert e.exitstatus == 0 + + cmd = ((prefix + "--op import --file {file}"). + format(id=divergent, file=expfile)) + try: + exp_remote.sh(cmd, wait=True) + except CommandFailedError as e: + assert e.exitstatus == 0 + + log.info("reviving divergent %d", divergent) + manager.revive_osd(divergent) + manager.wait_run_admin_socket('osd', divergent, ['dump_ops_in_flight']) + time.sleep(20); + + log.info('allowing recovery') + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in osds: + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', + 'kick_recovery_wq', ' 0') + + log.info('reading divergent objects') + for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): + exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, + '/tmp/existing']) + assert exit_status == 0 + + cmd = 'rm {file}'.format(file=expfile) + exp_remote.run(args=cmd, wait=True) + log.info("success") diff --git a/qa/tasks/dnsmasq.py b/qa/tasks/dnsmasq.py new file mode 100644 index 000000000..df8ccecb1 --- /dev/null +++ b/qa/tasks/dnsmasq.py @@ -0,0 +1,170 @@ +""" +Task for dnsmasq configuration +""" +import contextlib +import logging + +from teuthology import misc +from teuthology.exceptions import ConfigError +from teuthology import contextutil +from teuthology import packaging +from tasks.util import get_remote_for_role + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def install_dnsmasq(remote): + """ + If dnsmasq is not installed, install it for the duration of the task. + """ + try: + existing = packaging.get_package_version(remote, 'dnsmasq') + except: + existing = None + + if existing is None: + packaging.install_package('dnsmasq', remote) + try: + yield + finally: + if existing is None: + packaging.remove_package('dnsmasq', remote) + +@contextlib.contextmanager +def backup_resolv(remote, path): + """ + Store a backup of resolv.conf in the testdir and restore it after the task. + """ + remote.run(args=['cp', '/etc/resolv.conf', path]) + try: + yield + finally: + # restore with 'cp' to avoid overwriting its security context + remote.run(args=['sudo', 'cp', path, '/etc/resolv.conf']) + remote.run(args=['rm', path]) + +@contextlib.contextmanager +def replace_resolv(remote, path): + """ + Update resolv.conf to point the nameserver at localhost. + """ + remote.write_file(path, "nameserver 127.0.0.1\n") + try: + # install it + if remote.os.package_type == "rpm": + # for centos ovh resolv.conf has immutable attribute set + remote.run(args=['sudo', 'chattr', '-i', '/etc/resolv.conf'], check_status=False) + remote.run(args=['sudo', 'cp', path, '/etc/resolv.conf']) + yield + finally: + remote.run(args=['rm', path]) + +@contextlib.contextmanager +def setup_dnsmasq(remote, testdir, cnames): + """ configure dnsmasq on the given remote, adding each cname given """ + log.info('Configuring dnsmasq on remote %s..', remote.name) + + # add address entries for each cname + dnsmasq = "server=8.8.8.8\nserver=8.8.4.4\n" + address_template = "address=/{cname}/{ip_address}\n" + for cname, ip_address in cnames.items(): + dnsmasq += address_template.format(cname=cname, ip_address=ip_address) + + # write to temporary dnsmasq file + dnsmasq_tmp = '/'.join((testdir, 'ceph.tmp')) + remote.write_file(dnsmasq_tmp, dnsmasq) + + # move into /etc/dnsmasq.d/ + dnsmasq_path = '/etc/dnsmasq.d/ceph' + remote.run(args=['sudo', 'mv', dnsmasq_tmp, dnsmasq_path]) + # restore selinux context if necessary + remote.run(args=['sudo', 'restorecon', dnsmasq_path], check_status=False) + + # restart dnsmasq + remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq']) + # verify dns name is set + remote.run(args=['ping', '-c', '4', next(iter(cnames.keys()))]) + + try: + yield + finally: + log.info('Removing dnsmasq configuration from remote %s..', remote.name) + # remove /etc/dnsmasq.d/ceph + remote.run(args=['sudo', 'rm', dnsmasq_path]) + # restart dnsmasq + remote.run(args=['sudo', 'systemctl', 'restart', 'dnsmasq']) + +@contextlib.contextmanager +def task(ctx, config): + """ + Configures dnsmasq to add cnames for teuthology remotes. The task expects a + dictionary, where each key is a role. If all cnames for that role use the + same address as that role, the cnames can be given as a list. For example, + this entry configures dnsmasq on the remote associated with client.0, adding + two cnames for the ip address associated with client.0: + + - dnsmasq: + client.0: + - client0.example.com + - c0.example.com + + If the addresses do not all match the given role, a dictionary can be given + to specify the ip address by its target role. For example: + + - dnsmasq: + client.0: + client.0.example.com: client.0 + client.1.example.com: client.1 + + Cnames that end with a . are treated as prefix for the existing hostname. + For example, if the remote for client.0 has a hostname of 'example.com', + this task will add cnames for dev.example.com and test.example.com: + + - dnsmasq: + client.0: [dev., test.] + """ + # apply overrides + overrides = config.get('overrides', {}) + misc.deep_merge(config, overrides.get('dnsmasq', {})) + + # multiple roles may map to the same remote, so collect names by remote + remote_names = {} + for role, cnames in config.items(): + remote = get_remote_for_role(ctx, role) + if remote is None: + raise ConfigError('no remote for role %s' % role) + + names = remote_names.get(remote, {}) + + if isinstance(cnames, list): + # when given a list of cnames, point to local ip + for cname in cnames: + if cname.endswith('.'): + cname += remote.hostname + names[cname] = remote.ip_address + elif isinstance(cnames, dict): + # when given a dict, look up the remote ip for each + for cname, client in cnames.items(): + r = get_remote_for_role(ctx, client) + if r is None: + raise ConfigError('no remote for role %s' % client) + if cname.endswith('.'): + cname += r.hostname + names[cname] = r.ip_address + + remote_names[remote] = names + + testdir = misc.get_testdir(ctx) + resolv_bak = '/'.join((testdir, 'resolv.bak')) + resolv_tmp = '/'.join((testdir, 'resolv.tmp')) + + # run subtasks for each unique remote + subtasks = [] + for remote, cnames in remote_names.items(): + subtasks.extend([ lambda r=remote: install_dnsmasq(r) ]) + subtasks.extend([ lambda r=remote: backup_resolv(r, resolv_bak) ]) + subtasks.extend([ lambda r=remote: replace_resolv(r, resolv_tmp) ]) + subtasks.extend([ lambda r=remote, cn=cnames: setup_dnsmasq(r, testdir, cn) ]) + + with contextutil.nested(*subtasks): + yield diff --git a/qa/tasks/dump_stuck.py b/qa/tasks/dump_stuck.py new file mode 100644 index 000000000..4971f1916 --- /dev/null +++ b/qa/tasks/dump_stuck.py @@ -0,0 +1,161 @@ +""" +Dump_stuck command +""" +import logging +import time + +from tasks import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + +def check_stuck(manager, num_inactive, num_unclean, num_stale, timeout=10): + """ + Do checks. Make sure get_stuck_pgs return the right amount of information, then + extract health information from the raw_cluster_cmd and compare the results with + values passed in. This passes if all asserts pass. + + :param num_manager: Ceph manager + :param num_inactive: number of inaactive pages that are stuck + :param num_unclean: number of unclean pages that are stuck + :param num_stale: number of stale pages that are stuck + :param timeout: timeout value for get_stuck_pgs calls + """ + inactive = manager.get_stuck_pgs('inactive', timeout) + unclean = manager.get_stuck_pgs('unclean', timeout) + stale = manager.get_stuck_pgs('stale', timeout) + log.info('inactive %s / %d, unclean %s / %d, stale %s / %d', + len(inactive), num_inactive, + len(unclean), num_unclean, + len(stale), num_stale) + assert len(inactive) == num_inactive + assert len(unclean) == num_unclean + assert len(stale) == num_stale + +def task(ctx, config): + """ + Test the dump_stuck command. + + :param ctx: Context + :param config: Configuration + """ + assert config is None, \ + 'dump_stuck requires no configuration' + assert teuthology.num_instances_of_type(ctx.cluster, 'osd') == 2, \ + 'dump_stuck requires exactly 2 osds' + + timeout = 60 + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_clean(timeout) + + manager.raw_cluster_cmd('tell', 'mon.a', 'injectargs', '--', +# '--mon-osd-report-timeout 90', + '--mon-pg-stuck-threshold 10') + + # all active+clean + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=0, + ) + num_pgs = manager.get_num_pgs() + + manager.mark_out_osd(0) + time.sleep(timeout) + manager.flush_pg_stats([1]) + manager.wait_for_recovery(timeout) + + # all active+clean+remapped + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=0, + ) + + manager.mark_in_osd(0) + manager.flush_pg_stats([0, 1]) + manager.wait_for_clean(timeout) + + # all active+clean + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=0, + ) + + log.info('stopping first osd') + manager.kill_osd(0) + manager.mark_down_osd(0) + manager.wait_for_active(timeout) + + log.info('waiting for all to be unclean') + starttime = time.time() + done = False + while not done: + try: + check_stuck( + manager, + num_inactive=0, + num_unclean=num_pgs, + num_stale=0, + ) + done = True + except AssertionError: + # wait up to 15 minutes to become stale + if time.time() - starttime > 900: + raise + + + log.info('stopping second osd') + manager.kill_osd(1) + manager.mark_down_osd(1) + + log.info('waiting for all to be stale') + starttime = time.time() + done = False + while not done: + try: + check_stuck( + manager, + num_inactive=0, + num_unclean=num_pgs, + num_stale=num_pgs, + ) + done = True + except AssertionError: + # wait up to 15 minutes to become stale + if time.time() - starttime > 900: + raise + + log.info('reviving') + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'osd'): + manager.revive_osd(id_) + manager.mark_in_osd(id_) + while True: + try: + manager.flush_pg_stats([0, 1]) + break + except Exception: + log.exception('osds must not be started yet, waiting...') + time.sleep(1) + manager.wait_for_clean(timeout) + + check_stuck( + manager, + num_inactive=0, + num_unclean=0, + num_stale=0, + ) diff --git a/qa/tasks/ec_inconsistent_hinfo.py b/qa/tasks/ec_inconsistent_hinfo.py new file mode 100644 index 000000000..fa10f2c45 --- /dev/null +++ b/qa/tasks/ec_inconsistent_hinfo.py @@ -0,0 +1,225 @@ +""" +Inconsistent_hinfo +""" +import logging +import time +from dateutil.parser import parse +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def wait_for_deep_scrub_complete(manager, pgid, check_time_now, inconsistent): + log.debug("waiting for pg %s deep-scrub complete (check_time_now=%s)" % + (pgid, check_time_now)) + for i in range(300): + time.sleep(5) + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg); + assert pg + + last_deep_scrub_time = parse(pg['last_deep_scrub_stamp']).strftime('%s') + if last_deep_scrub_time < check_time_now: + log.debug('not scrubbed') + continue + + status = pg['state'].split('+') + if inconsistent: + assert 'inconsistent' in status + else: + assert 'inconsistent' not in status + return + + assert False, 'not scrubbed' + + +def wait_for_backfilling_complete(manager, pgid, from_osd, to_osd): + log.debug("waiting for pg %s backfill from osd.%s to osd.%s complete" % + (pgid, from_osd, to_osd)) + for i in range(300): + time.sleep(5) + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.info('pg=%s' % pg); + assert pg + status = pg['state'].split('+') + if 'active' not in status: + log.debug('not active') + continue + if 'backfilling' in status: + assert from_osd in pg['acting'] and to_osd in pg['up'] + log.debug('backfilling') + continue + if to_osd not in pg['up']: + log.debug('backfill not started yet') + continue + log.debug('backfilled!') + break + +def task(ctx, config): + """ + Test handling of objects with inconsistent hash info during backfill and deep-scrub. + + A pretty rigid cluster is brought up and tested by this task + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'ec_inconsistent_hinfo task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + profile = config.get('erasure_code_profile', { + 'k': '2', + 'm': '1', + 'crush-failure-domain': 'osd' + }) + profile_name = profile.get('name', 'backfill_unfound') + manager.create_erasure_code_profile(profile_name, profile) + pool = manager.create_pool_with_unique_name( + pg_num=1, + erasure_code_profile_name=profile_name, + min_size=2) + manager.raw_cluster_cmd('osd', 'pool', 'set', pool, + 'pg_autoscale_mode', 'off') + + manager.flush_pg_stats([0, 1, 2, 3]) + manager.wait_for_clean() + + pool_id = manager.get_pool_num(pool) + pgid = '%d.0' % pool_id + pgs = manager.get_pg_stats() + acting = next((pg['acting'] for pg in pgs if pg['pgid'] == pgid), None) + log.info("acting=%s" % acting) + assert acting + primary = acting[0] + + # something that is always there, readable and never empty + dummyfile = '/etc/group' + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_recovery() + + log.debug("create test object") + obj = 'test' + rados(ctx, mon, ['-p', pool, 'put', obj, dummyfile]) + + victim = acting[1] + + log.info("remove test object hash info from osd.%s shard and test deep-scrub and repair" + % victim) + + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=victim) + check_time_now = time.strftime('%s') + manager.raw_cluster_cmd('pg', 'deep-scrub', pgid) + wait_for_deep_scrub_complete(manager, pgid, check_time_now, True) + + check_time_now = time.strftime('%s') + manager.raw_cluster_cmd('pg', 'repair', pgid) + wait_for_deep_scrub_complete(manager, pgid, check_time_now, False) + + log.info("remove test object hash info from primary osd.%s shard and test backfill" + % primary) + + log.debug("write some data") + rados(ctx, mon, ['-p', pool, 'bench', '30', 'write', '-b', '4096', + '--no-cleanup']) + + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=primary) + + # mark the osd out to trigger a rebalance/backfill + source = acting[1] + target = [x for x in [0, 1, 2, 3] if x not in acting][0] + manager.mark_out_osd(source) + + # wait for everything to peer, backfill and recover + wait_for_backfilling_complete(manager, pgid, source, target) + manager.wait_for_clean() + + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + assert 'clean' in pg['state'].split('+') + assert 'inconsistent' not in pg['state'].split('+') + unfound = manager.get_num_unfound_objects() + log.debug("there are %d unfound objects" % unfound) + assert unfound == 0 + + source, target = target, source + log.info("remove test object hash info from non-primary osd.%s shard and test backfill" + % source) + + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=source) + + # mark the osd in to trigger a rebalance/backfill + manager.mark_in_osd(target) + + # wait for everything to peer, backfill and recover + wait_for_backfilling_complete(manager, pgid, source, target) + manager.wait_for_clean() + + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + assert 'clean' in pg['state'].split('+') + assert 'inconsistent' not in pg['state'].split('+') + unfound = manager.get_num_unfound_objects() + log.debug("there are %d unfound objects" % unfound) + assert unfound == 0 + + log.info("remove hash info from two shards and test backfill") + + source = acting[2] + target = [x for x in [0, 1, 2, 3] if x not in acting][0] + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=primary) + manager.objectstore_tool(pool, options='', args='rm-attr hinfo_key', + object_name=obj, osd=source) + + # mark the osd out to trigger a rebalance/backfill + manager.mark_out_osd(source) + + # wait for everything to peer, backfill and detect unfound object + wait_for_backfilling_complete(manager, pgid, source, target) + + # verify that there is unfound object + manager.flush_pg_stats([0, 1, 2, 3]) + pgs = manager.get_pg_stats() + pg = next((pg for pg in pgs if pg['pgid'] == pgid), None) + log.debug('pg=%s' % pg) + assert pg + assert 'backfill_unfound' in pg['state'].split('+') + unfound = manager.get_num_unfound_objects() + log.debug("there are %d unfound objects" % unfound) + assert unfound == 1 + m = manager.list_pg_unfound(pgid) + log.debug('list_pg_unfound=%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + + # mark stuff lost + pgs = manager.get_pg_stats() + manager.raw_cluster_cmd('pg', pgid, 'mark_unfound_lost', 'delete') + + # wait for everything to peer and be happy... + manager.flush_pg_stats([0, 1, 2, 3]) + manager.wait_for_recovery() diff --git a/qa/tasks/ec_lost_unfound.py b/qa/tasks/ec_lost_unfound.py new file mode 100644 index 000000000..57a9364ec --- /dev/null +++ b/qa/tasks/ec_lost_unfound.py @@ -0,0 +1,159 @@ +""" +Lost_unfound +""" +import logging +import time +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of lost objects on an ec pool. + + A pretty rigid cluster is brought up and tested by this task + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + manager.wait_for_clean() + + profile = config.get('erasure_code_profile', { + 'k': '2', + 'm': '2', + 'crush-failure-domain': 'osd' + }) + profile_name = profile.get('name', 'lost_unfound') + manager.create_erasure_code_profile(profile_name, profile) + pool = manager.create_pool_with_unique_name( + erasure_code_profile_name=profile_name, + min_size=2) + + # something that is always there, readable and never empty + dummyfile = '/etc/group' + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', pool, 'put', 'dummy', dummyfile]) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_recovery() + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'rm', 'existed_%d' % f]) + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' + ) + + manager.kill_osd(0) + manager.mark_down_osd(0) + manager.kill_osd(3) + manager.mark_down_osd(3) + + for f in range(1, 10): + rados(ctx, mon, ['-p', pool, 'put', 'new_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', pool, 'put', 'existing_%d' % f, dummyfile]) + + # take out osd.1 and a necessary shard of those objects. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') + manager.revive_osd(0) + manager.wait_till_osd_is_up(0) + manager.revive_osd(3) + manager.wait_till_osd_is_up(3) + + manager.flush_pg_stats([0, 2, 3]) + manager.wait_till_active() + manager.flush_pg_stats([0, 2, 3]) + + # verify that there are unfound objects + unfound = manager.get_num_unfound_objects() + log.info("there are %d unfound objects" % unfound) + assert unfound + + testdir = teuthology.get_testdir(ctx) + procs = [] + if config.get('parallel_bench', True): + procs.append(mon.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', 'client.admin', + '-b', str(4<<10), + '-p' , pool, + '-t', '20', + 'bench', '240', 'write', + ]).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id='client.admin')), + stdin=run.PIPE, + wait=False + )) + time.sleep(10) + + # mark stuff lost + pgs = manager.get_pg_stats() + for pg in pgs: + if pg['stat_sum']['num_objects_unfound'] > 0: + # verify that i can list them direct from the osd + log.info('listing missing/lost in %s state %s', pg['pgid'], + pg['state']); + m = manager.list_pg_unfound(pg['pgid']) + log.info('%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + + log.info("reverting unfound in %s", pg['pgid']) + manager.raw_cluster_cmd('pg', pg['pgid'], + 'mark_unfound_lost', 'delete') + else: + log.info("no unfound in %s", pg['pgid']) + + manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.3', 'debug', 'kick_recovery_wq', '5') + manager.flush_pg_stats([0, 2, 3]) + manager.wait_for_recovery() + + if not config.get('parallel_bench', True): + time.sleep(20) + + # verify result + for f in range(1, 10): + err = rados(ctx, mon, ['-p', pool, 'get', 'new_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', pool, 'get', 'existed_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', pool, 'get', 'existing_%d' % f, '-']) + assert err + + # see if osd.1 can cope + manager.revive_osd(1) + manager.wait_till_osd_is_up(1) + manager.wait_for_clean() + run.wait(procs) + manager.wait_for_clean() diff --git a/qa/tasks/exec_on_cleanup.py b/qa/tasks/exec_on_cleanup.py new file mode 100644 index 000000000..5a630781a --- /dev/null +++ b/qa/tasks/exec_on_cleanup.py @@ -0,0 +1,61 @@ +""" +Exececute custom commands during unwind/cleanup +""" +import logging +import contextlib + +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Execute commands on a given role + + tasks: + - ceph: + - kclient: [client.a] + - exec: + client.a: + - "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control" + - "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control" + - interactive: + + It stops and fails with the first command that does not return on success. It means + that if the first command fails, the second won't run at all. + + To avoid confusion it is recommended to explicitly enclose the commands in + double quotes. For instance if the command is false (without double quotes) it will + be interpreted as a boolean by the YAML parser. + + :param ctx: Context + :param config: Configuration + """ + try: + yield + finally: + log.info('Executing custom commands...') + assert isinstance(config, dict), "task exec got invalid config" + + testdir = teuthology.get_testdir(ctx) + + if 'all' in config and len(config) == 1: + a = config['all'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles) + + for role, ls in config.items(): + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Running commands on role %s host %s', role, remote.name) + for c in ls: + c.replace('$TESTDIR', testdir) + remote.run( + args=[ + 'sudo', + 'TESTDIR={tdir}'.format(tdir=testdir), + 'bash', + '-c', + c], + ) + diff --git a/qa/tasks/fs.py b/qa/tasks/fs.py new file mode 100644 index 000000000..7e62c8031 --- /dev/null +++ b/qa/tasks/fs.py @@ -0,0 +1,167 @@ +""" +CephFS sub-tasks. +""" + +import logging +import re + +from tasks.cephfs.filesystem import Filesystem, MDSCluster + +log = logging.getLogger(__name__) + +# Everything up to CEPH_MDSMAP_ALLOW_STANDBY_REPLAY +CEPH_MDSMAP_ALLOW_STANDBY_REPLAY = (1<<5) +CEPH_MDSMAP_NOT_JOINABLE = (1 << 0) +CEPH_MDSMAP_LAST = CEPH_MDSMAP_ALLOW_STANDBY_REPLAY +UPGRADE_FLAGS_MASK = ((CEPH_MDSMAP_LAST<<1) - 1) +def pre_upgrade_save(ctx, config): + """ + That the upgrade procedure doesn't clobber state: save state. + """ + + mdsc = MDSCluster(ctx) + status = mdsc.status() + + state = {} + ctx['mds-upgrade-state'] = state + + for fs in list(status.get_filesystems()): + fscid = fs['id'] + mdsmap = fs['mdsmap'] + fs_state = {} + fs_state['epoch'] = mdsmap['epoch'] + fs_state['max_mds'] = mdsmap['max_mds'] + fs_state['flags'] = mdsmap['flags'] & UPGRADE_FLAGS_MASK + state[fscid] = fs_state + log.debug(f"fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}") + + +def post_upgrade_checks(ctx, config): + """ + That the upgrade procedure doesn't clobber state. + """ + + state = ctx['mds-upgrade-state'] + + mdsc = MDSCluster(ctx) + status = mdsc.status() + + for fs in list(status.get_filesystems()): + fscid = fs['id'] + mdsmap = fs['mdsmap'] + fs_state = state[fscid] + log.debug(f"checking fs fscid={fscid},name={mdsmap['fs_name']} state = {fs_state}") + + # check state was restored to previous values + assert fs_state['max_mds'] == mdsmap['max_mds'] + assert fs_state['flags'] == (mdsmap['flags'] & UPGRADE_FLAGS_MASK) + + # now confirm that the upgrade procedure was followed + epoch = mdsmap['epoch'] + pre_upgrade_epoch = fs_state['epoch'] + assert pre_upgrade_epoch < epoch + multiple_max_mds = fs_state['max_mds'] > 1 + did_decrease_max_mds = False + should_disable_allow_standby_replay = fs_state['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY + did_disable_allow_standby_replay = False + did_fail_fs = False + for i in range(pre_upgrade_epoch+1, mdsmap['epoch']): + old_status = mdsc.status(epoch=i) + old_fs = old_status.get_fsmap(fscid) + old_mdsmap = old_fs['mdsmap'] + if not multiple_max_mds \ + and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE): + raise RuntimeError('mgr is failing fs when there is only one ' + f'rank in epoch {i}.') + if multiple_max_mds \ + and (old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE) \ + and old_mdsmap['max_mds'] == 1: + raise RuntimeError('mgr is failing fs as well the max_mds ' + f'is reduced in epoch {i}') + if old_mdsmap['flags'] & CEPH_MDSMAP_NOT_JOINABLE: + log.debug(f"max_mds not reduced in epoch {i} as fs was failed " + "for carrying out rapid multi-rank mds upgrade") + did_fail_fs = True + if multiple_max_mds and old_mdsmap['max_mds'] == 1: + log.debug(f"max_mds reduced in epoch {i}") + did_decrease_max_mds = True + if should_disable_allow_standby_replay and not (old_mdsmap['flags'] & CEPH_MDSMAP_ALLOW_STANDBY_REPLAY): + log.debug(f"allow_standby_replay disabled in epoch {i}") + did_disable_allow_standby_replay = True + assert not multiple_max_mds or did_fail_fs or did_decrease_max_mds + assert not should_disable_allow_standby_replay or did_disable_allow_standby_replay + + +def ready(ctx, config): + """ + That the file system is ready for clients. + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + + timeout = config.get('timeout', 300) + + mdsc = MDSCluster(ctx) + status = mdsc.status() + + for filesystem in status.get_filesystems(): + fs = Filesystem(ctx, fscid=filesystem['id']) + fs.wait_for_daemons(timeout=timeout, status=status) + +def clients_evicted(ctx, config): + """ + Check clients are evicted, unmount (cleanup) if so. + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + + clients = config.get('clients') + + if clients is None: + clients = {("client."+client_id): True for client_id in ctx.mounts} + + log.info("clients is {}".format(str(clients))) + + fs = Filesystem(ctx) + status = fs.status() + + has_session = set() + mounts = {} + for client in clients: + client_id = re.match("^client.([0-9]+)$", client).groups(1)[0] + mounts[client] = ctx.mounts.get(client_id) + + for rank in fs.get_ranks(status=status): + ls = fs.rank_asok(['session', 'ls'], rank=rank['rank'], status=status) + for session in ls: + for client, evicted in clients.items(): + mount = mounts.get(client) + if mount is not None: + global_id = mount.get_global_id() + if session['id'] == global_id: + if evicted: + raise RuntimeError("client still has session: {}".format(str(session))) + else: + log.info("client {} has a session with MDS {}.{}".format(client, fs.id, rank['rank'])) + has_session.add(client) + + no_session = set(clients) - has_session + should_assert = False + for client, evicted in clients.items(): + mount = mounts.get(client) + if mount is not None: + if evicted: + log.info("confirming client {} is blocklisted".format(client)) + assert fs.is_addr_blocklisted(mount.get_global_addr()) + elif client in no_session: + log.info("client {} should not be evicted but has no session with an MDS".format(client)) + fs.is_addr_blocklisted(mount.get_global_addr()) # for debugging + should_assert = True + if should_assert: + raise RuntimeError("some clients which should not be evicted have no session with an MDS?") diff --git a/qa/tasks/fwd_scrub.py b/qa/tasks/fwd_scrub.py new file mode 100644 index 000000000..c1e0059cd --- /dev/null +++ b/qa/tasks/fwd_scrub.py @@ -0,0 +1,165 @@ +""" +Thrash mds by simulating failures +""" +import logging +import contextlib + +from gevent import sleep, GreenletExit +from gevent.greenlet import Greenlet +from gevent.event import Event +from teuthology import misc as teuthology + +from tasks import ceph_manager +from tasks.cephfs.filesystem import MDSCluster, Filesystem +from tasks.thrasher import Thrasher + +log = logging.getLogger(__name__) + +class ForwardScrubber(Thrasher, Greenlet): + """ + ForwardScrubber:: + + The ForwardScrubber does forward scrubbing of file-systems during execution + of other tasks (workunits, etc). + """ + + def __init__(self, fs, scrub_timeout=300, sleep_between_iterations=1): + super(ForwardScrubber, self).__init__() + + self.logger = log.getChild('fs.[{f}]'.format(f=fs.name)) + self.fs = fs + self.name = 'thrasher.fs.[{f}]'.format(f=fs.name) + self.stopping = Event() + self.scrub_timeout = scrub_timeout + self.sleep_between_iterations = sleep_between_iterations + + def _run(self): + try: + self.do_scrub() + except Exception as e: + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # allow successful completion so gevent doesn't see an exception... + + def stop(self): + self.stopping.set() + + def do_scrub(self): + """ + Perform the file-system scrubbing + """ + self.logger.info(f'start scrubbing fs: {self.fs.name}') + + try: + while not self.stopping.is_set(): + self._scrub() + sleep(self.sleep_between_iterations) + except GreenletExit: + pass + + self.logger.info(f'end scrubbing fs: {self.fs.name}') + + def _scrub(self, path="/", recursive=True): + self.logger.info(f"scrubbing fs: {self.fs.name}") + scrubopts = ["force"] + if recursive: + scrubopts.append("recursive") + out_json = self.fs.run_scrub(["start", path, ",".join(scrubopts)]) + assert out_json is not None + + tag = out_json['scrub_tag'] + + assert tag is not None + assert out_json['return_code'] == 0 + assert out_json['mode'] == 'asynchronous' + + done = self.fs.wait_until_scrub_complete(tag=tag, sleep=30, timeout=self.scrub_timeout) + if not done: + raise RuntimeError('scrub timeout') + self._check_damage() + + def _check_damage(self): + rdmg = self.fs.get_damage() + types = set() + for rank, dmg in rdmg.items(): + if dmg: + for d in dmg: + types.add(d['damage_type']) + log.error(f"rank {rank} damaged:\n{dmg}") + if types: + raise RuntimeError(f"rank damage found: {types}") + +def stop_all_fwd_scrubbers(thrashers): + for thrasher in thrashers: + if not isinstance(thrasher, ForwardScrubber): + continue + thrasher.stop() + thrasher.join() + if thrasher.exception is not None: + raise RuntimeError(f"error during scrub thrashing: {thrasher.exception}") + + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the mds by running scrub iterations while another task/workunit + is running. + Example config: + + - fwd_scrub: + scrub_timeout: 300 + sleep_between_iterations: 1 + """ + + mds_cluster = MDSCluster(ctx) + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'fwd_scrub task only accepts a dict for configuration' + mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) + assert len(mdslist) > 0, \ + 'fwd_scrub task requires at least 1 metadata server' + + (first,) = ctx.cluster.only(f'mds.{mdslist[0]}').remotes.keys() + manager = ceph_manager.CephManager( + first, ctx=ctx, logger=log.getChild('ceph_manager'), + ) + + # make sure everyone is in active, standby, or standby-replay + log.info('Wait for all MDSs to reach steady state...') + status = mds_cluster.status() + while True: + steady = True + for info in status.get_all(): + state = info['state'] + if state not in ('up:active', 'up:standby', 'up:standby-replay'): + steady = False + break + if steady: + break + sleep(2) + status = mds_cluster.status() + + log.info('Ready to start scrub thrashing') + + manager.wait_for_clean() + assert manager.is_clean() + + if 'cluster' not in config: + config['cluster'] = 'ceph' + + for fs in status.get_filesystems(): + fwd_scrubber = ForwardScrubber(Filesystem(ctx, fscid=fs['id']), + config['scrub_timeout'], + config['sleep_between_iterations']) + fwd_scrubber.start() + ctx.ceph[config['cluster']].thrashers.append(fwd_scrubber) + + try: + log.debug('Yielding') + yield + finally: + log.info('joining ForwardScrubbers') + stop_all_fwd_scrubbers(ctx.ceph[config['cluster']].thrashers) + log.info('done joining') diff --git a/qa/tasks/immutable_object_cache.py b/qa/tasks/immutable_object_cache.py new file mode 100644 index 000000000..b8034de47 --- /dev/null +++ b/qa/tasks/immutable_object_cache.py @@ -0,0 +1,72 @@ +""" +immutable object cache task +""" +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def immutable_object_cache(ctx, config): + """ + setup and cleanup immutable object cache + """ + log.info("start immutable object cache daemon") + for client, client_config in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + # make sure that there is one immutable object cache daemon on the same node. + remote.run( + args=[ + 'sudo', 'killall', '-s', '9', 'ceph-immutable-object-cache', run.Raw('||'), 'true', + ] + ) + remote.run( + args=[ + 'ceph-immutable-object-cache', '-b', + ] + ) + try: + yield + finally: + log.info("check and cleanup immutable object cache") + for client, client_config in config.items(): + client_config = client_config if client_config is not None else dict() + (remote,) = ctx.cluster.only(client).remotes.keys() + cache_path = client_config.get('immutable object cache path', '/tmp/ceph-immutable-object-cache') + ls_command = '"$(ls {} )"'.format(cache_path) + remote.run( + args=[ + 'test', '-n', run.Raw(ls_command), + ] + ) + remote.run( + args=[ + 'sudo', 'killall', '-s', '9', 'ceph-immutable-object-cache', run.Raw('||'), 'true', + ] + ) + remote.run( + args=[ + 'sudo', 'rm', '-rf', cache_path, run.Raw('||'), 'true', + ] + ) + +@contextlib.contextmanager +def task(ctx, config): + """ + This is task for start immutable_object_cache. + """ + assert isinstance(config, dict), \ + "task immutable_object_cache only supports a dictionary for configuration" + + managers = [] + config = teuthology.replace_all_with_clients(ctx.cluster, config) + managers.append( + lambda: immutable_object_cache(ctx=ctx, config=config) + ) + + with contextutil.nested(*managers): + yield diff --git a/qa/tasks/immutable_object_cache_thrash.py b/qa/tasks/immutable_object_cache_thrash.py new file mode 100644 index 000000000..0bf3ad3a0 --- /dev/null +++ b/qa/tasks/immutable_object_cache_thrash.py @@ -0,0 +1,79 @@ +""" +immutable object cache thrash task +""" +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + +DEFAULT_KILL_DAEMON_TIME = 2 +DEFAULT_DEAD_TIME = 30 +DEFAULT_LIVE_TIME = 120 + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def thrashes_immutable_object_cache_daemon(ctx, config): + """ + thrashes immutable object cache daemon. + It can test reconnection feature of RO cache when RO daemon crash + TODO : replace sleep with better method. + """ + log.info("thrashes immutable object cache daemon") + + # just thrash one rbd client. + client, client_config = list(config.items())[0] + (remote,) = ctx.cluster.only(client).remotes.keys() + client_config = client_config if client_config is not None else dict() + kill_daemon_time = client_config.get('kill_daemon_time', DEFAULT_KILL_DAEMON_TIME) + dead_time = client_config.get('dead_time', DEFAULT_DEAD_TIME) + live_time = client_config.get('live_time', DEFAULT_LIVE_TIME) + + for i in range(kill_daemon_time): + log.info("ceph-immutable-object-cache crash....") + remote.run( + args=[ + 'sudo', 'killall', '-s', '9', 'ceph-immutable-object-cache', run.Raw('||'), 'true', + ] + ) + # librbd shoud normally run when ceph-immutable-object-cache + remote.run( + args=[ + 'sleep', '{dead_time}'.format(dead_time=dead_time), + ] + ) + # librbd should reconnect daemon + log.info("startup ceph-immutable-object-cache") + remote.run( + args=[ + 'ceph-immutable-object-cache', '-b', + ] + ) + remote.run( + args=[ + 'sleep', '{live_time}'.format(live_time=live_time), + ] + ) + try: + yield + finally: + log.info("cleanup") + +@contextlib.contextmanager +def task(ctx, config): + """ + This is task for testing immutable_object_cache thrash. + """ + assert isinstance(config, dict), \ + "task immutable_object_cache_thrash only supports a dictionary for configuration" + + managers = [] + config = teuthology.replace_all_with_clients(ctx.cluster, config) + managers.append( + lambda: thrashes_immutable_object_cache_daemon(ctx=ctx, config=config) + ) + + with contextutil.nested(*managers): + yield diff --git a/qa/tasks/kafka.py b/qa/tasks/kafka.py new file mode 100644 index 000000000..48bf3611f --- /dev/null +++ b/qa/tasks/kafka.py @@ -0,0 +1,204 @@ +""" +Deploy and configure Kafka for Teuthology +""" +import contextlib +import logging +import time + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def get_kafka_version(config): + for client, client_config in config.items(): + if 'kafka_version' in client_config: + kafka_version = client_config.get('kafka_version') + return kafka_version + +def get_kafka_dir(ctx, config): + kafka_version = get_kafka_version(config) + current_version = 'kafka-' + kafka_version + '-src' + return '{tdir}/{ver}'.format(tdir=teuthology.get_testdir(ctx),ver=current_version) + + +@contextlib.contextmanager +def install_kafka(ctx, config): + """ + Downloading the kafka tar file. + """ + assert isinstance(config, dict) + log.info('Installing Kafka...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + test_dir=teuthology.get_testdir(ctx) + current_version = get_kafka_version(config) + + link1 = 'https://archive.apache.org/dist/kafka/' + current_version + '/kafka-' + current_version + '-src.tgz' + ctx.cluster.only(client).run( + args=['cd', '{tdir}'.format(tdir=test_dir), run.Raw('&&'), 'wget', link1], + ) + + file1 = 'kafka-' + current_version + '-src.tgz' + ctx.cluster.only(client).run( + args=['cd', '{tdir}'.format(tdir=test_dir), run.Raw('&&'), 'tar', '-xvzf', file1], + ) + + try: + yield + finally: + log.info('Removing packaged dependencies of Kafka...') + test_dir=get_kafka_dir(ctx, config) + current_version = get_kafka_version(config) + for (client,_) in config.items(): + ctx.cluster.only(client).run( + args=['rm', '-rf', '{tdir}/logs'.format(tdir=test_dir)], + ) + + ctx.cluster.only(client).run( + args=['rm', '-rf', test_dir], + ) + + rmfile1 = 'kafka-' + current_version + '-src.tgz' + ctx.cluster.only(client).run( + args=['rm', '-rf', '{tdir}/{doc}'.format(tdir=teuthology.get_testdir(ctx),doc=rmfile1)], + ) + + +@contextlib.contextmanager +def run_kafka(ctx,config): + """ + This includes two parts: + 1. Starting Zookeeper service + 2. Starting Kafka service + """ + assert isinstance(config, dict) + log.info('Bringing up Zookeeper and Kafka services...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + ctx.cluster.only(client).run( + args=['cd', '{tdir}'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), + './gradlew', 'jar', + '-PscalaVersion=2.13.2' + ], + ) + + ctx.cluster.only(client).run( + args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), + './zookeeper-server-start.sh', + '{tir}/config/zookeeper.properties'.format(tir=get_kafka_dir(ctx, config)), + run.Raw('&'), 'exit' + ], + ) + + ctx.cluster.only(client).run( + args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), + './kafka-server-start.sh', + '{tir}/config/server.properties'.format(tir=get_kafka_dir(ctx, config)), + run.Raw('&'), 'exit' + ], + ) + + try: + yield + finally: + log.info('Stopping Zookeeper and Kafka Services...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + ctx.cluster.only(client).run( + args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), + './kafka-server-stop.sh', + '{tir}/config/kafka.properties'.format(tir=get_kafka_dir(ctx, config)), + ], + ) + + time.sleep(5) + + ctx.cluster.only(client).run( + args=['cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), + './zookeeper-server-stop.sh', + '{tir}/config/zookeeper.properties'.format(tir=get_kafka_dir(ctx, config)), + ], + ) + + time.sleep(5) + + ctx.cluster.only(client).run(args=['killall', '-9', 'java']) + + +@contextlib.contextmanager +def run_admin_cmds(ctx,config): + """ + Running Kafka Admin commands in order to check the working of producer anf consumer and creation of topic. + """ + assert isinstance(config, dict) + log.info('Checking kafka server through producer/consumer commands...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), + './kafka-topics.sh', '--create', '--topic', 'quickstart-events', + '--bootstrap-server', 'localhost:9092' + ], + ) + + ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), + 'echo', "First", run.Raw('|'), + './kafka-console-producer.sh', '--topic', 'quickstart-events', + '--bootstrap-server', 'localhost:9092' + ], + ) + + ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/bin'.format(tdir=get_kafka_dir(ctx, config)), run.Raw('&&'), + './kafka-console-consumer.sh', '--topic', 'quickstart-events', + '--from-beginning', + '--bootstrap-server', 'localhost:9092', + run.Raw('&'), 'exit' + ], + ) + + try: + yield + finally: + pass + + +@contextlib.contextmanager +def task(ctx,config): + """ + Following is the way how to run kafka:: + tasks: + - kafka: + client.0: + kafka_version: 2.6.0 + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task kafka only supports a list or dictionary for configuration" + + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + log.debug('Kafka config is %s', config) + + with contextutil.nested( + lambda: install_kafka(ctx=ctx, config=config), + lambda: run_kafka(ctx=ctx, config=config), + lambda: run_admin_cmds(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/kclient.py b/qa/tasks/kclient.py new file mode 100644 index 000000000..ca202df71 --- /dev/null +++ b/qa/tasks/kclient.py @@ -0,0 +1,144 @@ +""" +Mount/unmount a ``kernel`` client. +""" +import contextlib +import logging + +from teuthology.misc import deep_merge +from teuthology.exceptions import CommandFailedError +from teuthology import misc +from teuthology.contextutil import MaxWhileTries +from tasks.cephfs.kernel_mount import KernelMount + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Mount/unmount a ``kernel`` client. + + The config is optional and defaults to mounting on all clients. If + a config is given, it is expected to be a list of clients to do + this operation on. This lets you e.g. set up one client with + ``ceph-fuse`` and another with ``kclient``. + + ``brxnet`` should be a Private IPv4 Address range, default range is + [192.168.0.0/16] + + Example that mounts all clients:: + + tasks: + - ceph: + - kclient: + - interactive: + - brxnet: [192.168.0.0/16] + + Example that uses both ``kclient` and ``ceph-fuse``:: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - kclient: [client.1] + - interactive: + + + Pass a dictionary instead of lists to specify per-client config: + + tasks: + -kclient: + client.0: + debug: true + mntopts: ["nowsync"] + + :param ctx: Context + :param config: Configuration + """ + log.info('Mounting kernel clients...') + + if config is None: + ids = misc.all_roles_of_type(ctx.cluster, 'client') + client_roles = [f'client.{id_}' for id_ in ids] + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, list): + client_roles = config + config = dict([r, dict()] for r in client_roles) + elif isinstance(config, dict): + client_roles = filter(lambda x: 'client.' in x, config.keys()) + else: + raise ValueError(f"Invalid config object: {config} ({config.__class__})") + log.info(f"config is {config}") + + clients = list(misc.get_clients(ctx=ctx, roles=client_roles)) + + test_dir = misc.get_testdir(ctx) + + for id_, remote in clients: + KernelMount.cleanup_stale_netnses_and_bridge(remote) + + mounts = {} + overrides = ctx.config.get('overrides', {}).get('kclient', {}) + top_overrides = dict(filter(lambda x: 'client.' not in x[0], overrides.items())) + for id_, remote in clients: + entity = f"client.{id_}" + client_config = config.get(entity) + if client_config is None: + client_config = {} + # top level overrides + deep_merge(client_config, top_overrides) + # mount specific overrides + client_config_overrides = overrides.get(entity) + deep_merge(client_config, client_config_overrides) + log.info(f"{entity} config is {client_config}") + + cephfs_name = client_config.get("cephfs_name") + if config.get("disabled", False) or not client_config.get('mounted', True): + continue + + kernel_mount = KernelMount( + ctx=ctx, + test_dir=test_dir, + client_id=id_, + client_remote=remote, + brxnet=ctx.teuthology_config.get('brxnet', None), + client_config=client_config, + cephfs_name=cephfs_name) + + mounts[id_] = kernel_mount + + if client_config.get('debug', False): + remote.run(args=["sudo", "bash", "-c", "echo 'module ceph +p' > /sys/kernel/debug/dynamic_debug/control"]) + remote.run(args=["sudo", "bash", "-c", "echo 'module libceph +p' > /sys/kernel/debug/dynamic_debug/control"]) + + kernel_mount.mount(mntopts=client_config.get('mntopts', [])) + + def umount_all(): + log.info('Unmounting kernel clients...') + + forced = False + for mount in mounts.values(): + if mount.is_mounted(): + try: + mount.umount() + except (CommandFailedError, MaxWhileTries): + log.warning("Ordinary umount failed, forcing...") + forced = True + mount.umount_wait(force=True) + + for id_, remote in clients: + KernelMount.cleanup_stale_netnses_and_bridge(remote) + + return forced + + ctx.mounts = mounts + try: + yield mounts + except: + umount_all() # ignore forced retval, we are already in error handling + finally: + + forced = umount_all() + if forced: + # The context managers within the kclient manager worked (i.e. + # the test workload passed) but for some reason we couldn't + # umount, so turn this into a test failure. + raise RuntimeError("Kernel mounts did not umount cleanly") diff --git a/qa/tasks/keycloak.py b/qa/tasks/keycloak.py new file mode 100644 index 000000000..1d89a27a5 --- /dev/null +++ b/qa/tasks/keycloak.py @@ -0,0 +1,468 @@ +""" +Deploy and configure Keycloak for Teuthology +""" +import contextlib +import logging +import os + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.exceptions import ConfigError + +log = logging.getLogger(__name__) + +def get_keycloak_version(config): + for client, client_config in config.items(): + if 'keycloak_version' in client_config: + keycloak_version = client_config.get('keycloak_version') + return keycloak_version + +def get_keycloak_dir(ctx, config): + keycloak_version = get_keycloak_version(config) + current_version = 'keycloak-'+keycloak_version + return '{tdir}/{ver}'.format(tdir=teuthology.get_testdir(ctx),ver=current_version) + +def run_in_keycloak_dir(ctx, client, config, args, **kwargs): + return ctx.cluster.only(client).run( + args=[ 'cd', get_keycloak_dir(ctx,config), run.Raw('&&'), ] + args, + **kwargs + ) + +def get_toxvenv_dir(ctx): + return ctx.tox.venv_path + +def toxvenv_sh(ctx, remote, args, **kwargs): + activate = get_toxvenv_dir(ctx) + '/bin/activate' + return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs) + +@contextlib.contextmanager +def install_packages(ctx, config): + """ + Downloading the two required tar files + 1. Keycloak + 2. Wildfly (Application Server) + """ + assert isinstance(config, dict) + log.info('Installing packages for Keycloak...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + test_dir=teuthology.get_testdir(ctx) + current_version = get_keycloak_version(config) + link1 = 'https://downloads.jboss.org/keycloak/'+current_version+'/keycloak-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['wget', link1]) + + file1 = 'keycloak-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['tar', '-C', test_dir, '-xvzf', file1]) + + link2 ='https://downloads.jboss.org/keycloak/'+current_version+'/adapters/keycloak-oidc/keycloak-wildfly-adapter-dist-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['cd', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), 'wget', link2]) + + file2 = 'keycloak-wildfly-adapter-dist-'+current_version+'.tar.gz' + toxvenv_sh(ctx, remote, ['tar', '-C', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config)), '-xvzf', '{tdr}/{file}'.format(tdr=get_keycloak_dir(ctx,config),file=file2)]) + + try: + yield + finally: + log.info('Removing packaged dependencies of Keycloak...') + for client in config: + current_version = get_keycloak_version(config) + ctx.cluster.only(client).run( + args=['cd', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), 'rm', '-rf', 'keycloak-wildfly-adapter-dist-' + current_version + '.tar.gz'], + ) + + ctx.cluster.only(client).run( + args=['rm', '-rf', '{tdir}'.format(tdir=get_keycloak_dir(ctx,config))], + ) + +@contextlib.contextmanager +def download_conf(ctx, config): + """ + Downloads confi.py used in run_admin_cmds + """ + assert isinstance(config, dict) + log.info('Downloading conf...') + testdir = teuthology.get_testdir(ctx) + conf_branch = 'main' + conf_repo = 'https://github.com/TRYTOBE8TME/scripts.git' + for (client, _) in config.items(): + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', conf_branch, + conf_repo, + '{tdir}/scripts'.format(tdir=testdir), + ], + ) + try: + yield + finally: + log.info('Removing conf...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/scripts'.format(tdir=testdir), + ], + ) + +@contextlib.contextmanager +def build(ctx,config): + """ + Build process which needs to be done before starting a server. + """ + assert isinstance(config, dict) + log.info('Building Keycloak...') + for (client,_) in config.items(): + run_in_keycloak_dir(ctx, client, config,['cd', 'bin', run.Raw('&&'), './jboss-cli.sh', '--file=adapter-elytron-install-offline.cli']) + try: + yield + finally: + pass + +@contextlib.contextmanager +def run_keycloak(ctx,config): + """ + This includes two parts: + 1. Adding a user to keycloak which is actually used to log in when we start the server and check in browser. + 2. Starting the server. + """ + assert isinstance(config, dict) + log.info('Bringing up Keycloak...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + ctx.cluster.only(client).run( + args=[ + '{tdir}/bin/add-user-keycloak.sh'.format(tdir=get_keycloak_dir(ctx,config)), + '-r', 'master', + '-u', 'admin', + '-p', 'admin', + ], + ) + + toxvenv_sh(ctx, remote, ['cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), './standalone.sh', run.Raw('&'), 'exit']) + try: + yield + finally: + log.info('Stopping Keycloak Server...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + toxvenv_sh(ctx, remote, ['cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), './jboss-cli.sh', '--connect', 'command=:shutdown']) + +@contextlib.contextmanager +def run_admin_cmds(ctx,config): + """ + Running Keycloak Admin commands(kcadm commands) in order to get the token, aud value, thumbprint and realm name. + """ + assert isinstance(config, dict) + log.info('Running admin commands...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'config', 'credentials', + '--server', 'http://localhost:8080/auth', + '--realm', 'master', + '--user', 'admin', + '--password', 'admin', + '--client', 'admin-cli', + ], + ) + + realm_name='demorealm' + realm='realm={}'.format(realm_name) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'create', 'realms', + '-s', realm, + '-s', 'enabled=true', + '-s', 'accessTokenLifespan=1800', + '-o', + ], + ) + + client_name='my_client' + client='clientId={}'.format(client_name) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'create', 'clients', + '-r', realm_name, + '-s', client, + '-s', 'directAccessGrantsEnabled=true', + '-s', 'redirectUris=["http://localhost:8080/myapp/*"]', + ], + ) + + ans1= toxvenv_sh(ctx, remote, + [ + 'cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), + './kcadm.sh', 'get', 'clients', + '-r', realm_name, + '-F', 'id,clientId', run.Raw('|'), + 'jq', '-r', '.[] | select (.clientId == "my_client") | .id' + ]) + + pre0=ans1.rstrip() + pre1="clients/{}".format(pre0) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'update', pre1, + '-r', realm_name, + '-s', 'enabled=true', + '-s', 'serviceAccountsEnabled=true', + '-s', 'redirectUris=["http://localhost:8080/myapp/*"]', + ], + ) + + ans2= pre1+'/client-secret' + + out2= toxvenv_sh(ctx, remote, + [ + 'cd', '{tdir}/bin'.format(tdir=get_keycloak_dir(ctx,config)), run.Raw('&&'), + './kcadm.sh', 'get', ans2, + '-r', realm_name, + '-F', 'value' + ]) + + ans0= '{client}:{secret}'.format(client=client_name,secret=out2[15:51]) + ans3= 'client_secret={}'.format(out2[15:51]) + clientid='client_id={}'.format(client_name) + + proto_map = pre1+"/protocol-mappers/models" + uname = "username=testuser" + upass = "password=testuser" + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'create', 'users', + '-s', uname, + '-s', 'enabled=true', + '-s', 'attributes.\"https://aws.amazon.com/tags\"=\"{"principal_tags":{"Department":["Engineering", "Marketing"]}}\"', + '-r', realm_name, + ], + ) + + sample = 'testuser' + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'set-password', + '-r', realm_name, + '--username', sample, + '--new-password', sample, + ], + ) + + file_path = '{tdir}/scripts/confi.py'.format(tdir=teuthology.get_testdir(ctx)) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'create', proto_map, + '-r', realm_name, + '-f', file_path, + ], + ) + + remote.run( + args=[ + '{tdir}/bin/kcadm.sh'.format(tdir=get_keycloak_dir(ctx,config)), + 'config', 'credentials', + '--server', 'http://localhost:8080/auth', + '--realm', realm_name, + '--user', sample, + '--password', sample, + '--client', 'admin-cli', + ], + ) + + out9= toxvenv_sh(ctx, remote, + [ + 'curl', '-k', '-v', + '-X', 'POST', + '-H', 'Content-Type:application/x-www-form-urlencoded', + '-d', 'scope=openid', + '-d', 'grant_type=password', + '-d', clientid, + '-d', ans3, + '-d', uname, + '-d', upass, + 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/token', run.Raw('|'), + 'jq', '-r', '.access_token' + ]) + + user_token_pre = out9.rstrip() + user_token = '{}'.format(user_token_pre) + + out3= toxvenv_sh(ctx, remote, + [ + 'curl', '-k', '-v', + '-X', 'POST', + '-H', 'Content-Type:application/x-www-form-urlencoded', + '-d', 'scope=openid', + '-d', 'grant_type=client_credentials', + '-d', clientid, + '-d', ans3, + 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/token', run.Raw('|'), + 'jq', '-r', '.access_token' + ]) + + pre2=out3.rstrip() + acc_token= 'token={}'.format(pre2) + ans4= '{}'.format(pre2) + + out4= toxvenv_sh(ctx, remote, + [ + 'curl', '-k', '-v', + '-X', 'GET', + '-H', 'Content-Type:application/x-www-form-urlencoded', + 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/certs', run.Raw('|'), + 'jq', '-r', '.keys[].x5c[]' + ]) + + pre3=out4.rstrip() + cert_value='{}'.format(pre3) + start_value= "-----BEGIN CERTIFICATE-----\n" + end_value= "\n-----END CERTIFICATE-----" + user_data="" + user_data+=start_value + user_data+=cert_value + user_data+=end_value + + remote.write_file( + path='{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)), + data=user_data + ) + + out5= toxvenv_sh(ctx, remote, + [ + 'openssl', 'x509', + '-in', '{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)), + '--fingerprint', '--noout', '-sha1' + ]) + + pre_ans= '{}'.format(out5[17:76]) + ans5="" + + for character in pre_ans: + if(character!=':'): + ans5+=character + + str1 = 'curl' + str2 = '-k' + str3 = '-v' + str4 = '-X' + str5 = 'POST' + str6 = '-u' + str7 = '-d' + str8 = 'http://localhost:8080/auth/realms/'+realm_name+'/protocol/openid-connect/token/introspect' + + out6= toxvenv_sh(ctx, remote, + [ + str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.aud' + ]) + + out7= toxvenv_sh(ctx, remote, + [ + str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.sub' + ]) + + out8= toxvenv_sh(ctx, remote, + [ + str1, str2, str3, str4, str5, str6, ans0, str7, acc_token, str8, run.Raw('|'), 'jq', '-r', '.azp' + ]) + + ans6=out6.rstrip() + ans7=out7.rstrip() + ans8=out8.rstrip() + + os.environ['TOKEN']=ans4 + os.environ['THUMBPRINT']=ans5 + os.environ['AUD']=ans6 + os.environ['SUB']=ans7 + os.environ['AZP']=ans8 + os.environ['USER_TOKEN']=user_token + os.environ['KC_REALM']=realm_name + + try: + yield + finally: + log.info('Removing certificate.crt file...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=['rm', '-f', + '{tdir}/bin/certificate.crt'.format(tdir=get_keycloak_dir(ctx,config)), + ], + ) + + remote.run( + args=['rm', '-f', + '{tdir}/confi.py'.format(tdir=teuthology.get_testdir(ctx)), + ], + ) + +@contextlib.contextmanager +def task(ctx,config): + """ + To run keycloak the prerequisite is to run the tox task. Following is the way how to run + tox and then keycloak:: + + tasks: + - tox: [ client.0 ] + - keycloak: + client.0: + keycloak_version: 11.0.0 + + To pass extra arguments to nose (e.g. to run a certain test):: + + tasks: + - tox: [ client.0 ] + - keycloak: + client.0: + keycloak_version: 11.0.0 + - s3tests: + client.0: + extra_attrs: ['webidentity_test'] + + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task keycloak only supports a list or dictionary for configuration" + + if not hasattr(ctx, 'tox'): + raise ConfigError('keycloak must run after the tox task') + + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + log.debug('Keycloak config is %s', config) + + with contextutil.nested( + lambda: install_packages(ctx=ctx, config=config), + lambda: build(ctx=ctx, config=config), + lambda: run_keycloak(ctx=ctx, config=config), + lambda: download_conf(ctx=ctx, config=config), + lambda: run_admin_cmds(ctx=ctx, config=config), + ): + yield + diff --git a/qa/tasks/keystone.py b/qa/tasks/keystone.py new file mode 100644 index 000000000..7aa785055 --- /dev/null +++ b/qa/tasks/keystone.py @@ -0,0 +1,481 @@ +""" +Deploy and configure Keystone for Teuthology +""" +import argparse +import contextlib +import logging + +# still need this for python3.6 +from collections import OrderedDict +from itertools import chain + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.packaging import install_package +from teuthology.packaging import remove_package +from teuthology.exceptions import ConfigError + +log = logging.getLogger(__name__) + + +def get_keystone_dir(ctx): + return '{tdir}/keystone'.format(tdir=teuthology.get_testdir(ctx)) + +def run_in_keystone_dir(ctx, client, args, **kwargs): + return ctx.cluster.only(client).run( + args=[ 'cd', get_keystone_dir(ctx), run.Raw('&&'), ] + args, + **kwargs + ) + +def get_toxvenv_dir(ctx): + return ctx.tox.venv_path + +def toxvenv_sh(ctx, remote, args, **kwargs): + activate = get_toxvenv_dir(ctx) + '/bin/activate' + return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs) + +def run_in_keystone_venv(ctx, client, args): + run_in_keystone_dir(ctx, client, + [ 'source', + '.tox/venv/bin/activate', + run.Raw('&&') + ] + args) + +def get_keystone_venved_cmd(ctx, cmd, args, env=[]): + kbindir = get_keystone_dir(ctx) + '/.tox/venv/bin/' + return env + [ kbindir + 'python', kbindir + cmd ] + args + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the Keystone from github. + Remove downloaded file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading keystone...') + keystonedir = get_keystone_dir(ctx) + + for (client, cconf) in config.items(): + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', cconf.get('force-branch', 'master'), + 'https://github.com/openstack/keystone.git', + keystonedir, + ], + ) + + sha1 = cconf.get('sha1') + if sha1 is not None: + run_in_keystone_dir(ctx, client, [ + 'git', 'reset', '--hard', sha1, + ], + ) + + # hax for http://tracker.ceph.com/issues/23659 + run_in_keystone_dir(ctx, client, [ + 'sed', '-i', + 's/pysaml2<4.0.3,>=2.4.0/pysaml2>=4.5.0/', + 'requirements.txt' + ], + ) + try: + yield + finally: + log.info('Removing keystone...') + for client in config: + ctx.cluster.only(client).run( + args=[ 'rm', '-rf', keystonedir ], + ) + +patch_bindep_template = """\ +import fileinput +import sys +import os +fixed=False +os.chdir("{keystone_dir}") +for line in fileinput.input("bindep.txt", inplace=True): + if line == "python34-devel [platform:centos]\\n": + line="python34-devel [platform:centos-7]\\npython36-devel [platform:centos-8]\\n" + fixed=True + print(line,end="") + +print("Fixed line" if fixed else "No fix necessary", file=sys.stderr) +exit(0) +""" + +@contextlib.contextmanager +def install_packages(ctx, config): + """ + Download the packaged dependencies of Keystone. + Remove install packages upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Installing packages for Keystone...') + + patch_bindep = patch_bindep_template \ + .replace("{keystone_dir}", get_keystone_dir(ctx)) + packages = {} + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + toxvenv_sh(ctx, remote, ['python'], stdin=patch_bindep) + # use bindep to read which dependencies we need from keystone/bindep.txt + toxvenv_sh(ctx, remote, ['pip', 'install', 'bindep']) + packages[client] = toxvenv_sh(ctx, remote, + ['bindep', '--brief', '--file', '{}/bindep.txt'.format(get_keystone_dir(ctx))], + check_status=False).splitlines() # returns 1 on success? + for dep in packages[client]: + install_package(dep, remote) + try: + yield + finally: + log.info('Removing packaged dependencies of Keystone...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + for dep in packages[client]: + remove_package(dep, remote) + +def run_mysql_query(ctx, remote, query): + query_arg = '--execute="{}"'.format(query) + args = ['sudo', 'mysql', run.Raw(query_arg)] + remote.run(args=args) + +@contextlib.contextmanager +def setup_database(ctx, config): + """ + Setup database for Keystone. + """ + assert isinstance(config, dict) + log.info('Setting up database for keystone...') + + for (client, cconf) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + # MariaDB on RHEL/CentOS needs service started after package install + # while Ubuntu starts service by default. + if remote.os.name == 'rhel' or remote.os.name == 'centos': + remote.run(args=['sudo', 'systemctl', 'restart', 'mariadb']) + + run_mysql_query(ctx, remote, "CREATE USER 'keystone'@'localhost' IDENTIFIED BY 'SECRET';") + run_mysql_query(ctx, remote, "CREATE DATABASE keystone;") + run_mysql_query(ctx, remote, "GRANT ALL PRIVILEGES ON keystone.* TO 'keystone'@'localhost';") + run_mysql_query(ctx, remote, "FLUSH PRIVILEGES;") + + try: + yield + finally: + pass + +@contextlib.contextmanager +def setup_venv(ctx, config): + """ + Setup the virtualenv for Keystone using tox. + """ + assert isinstance(config, dict) + log.info('Setting up virtualenv for keystone...') + for (client, _) in config.items(): + run_in_keystone_dir(ctx, client, + ['sed', '-i', 's/usedevelop.*/usedevelop=false/g', 'tox.ini']) + + run_in_keystone_dir(ctx, client, + [ 'source', + '{tvdir}/bin/activate'.format(tvdir=get_toxvenv_dir(ctx)), + run.Raw('&&'), + 'tox', '-e', 'venv', '--notest' + ]) + + run_in_keystone_venv(ctx, client, + [ 'pip', 'install', + 'python-openstackclient==5.2.1', + 'osc-lib==2.0.0' + ]) + try: + yield + finally: + pass + +@contextlib.contextmanager +def configure_instance(ctx, config): + assert isinstance(config, dict) + log.info('Configuring keystone...') + + kdir = get_keystone_dir(ctx) + keyrepo_dir = '{kdir}/etc/fernet-keys'.format(kdir=kdir) + for (client, _) in config.items(): + # prepare the config file + run_in_keystone_dir(ctx, client, + [ + 'source', + f'{get_toxvenv_dir(ctx)}/bin/activate', + run.Raw('&&'), + 'tox', '-e', 'genconfig' + ]) + run_in_keystone_dir(ctx, client, + [ + 'cp', '-f', + 'etc/keystone.conf.sample', + 'etc/keystone.conf' + ]) + run_in_keystone_dir(ctx, client, + [ + 'sed', + '-e', 's^#key_repository =.*^key_repository = {kr}^'.format(kr = keyrepo_dir), + '-i', 'etc/keystone.conf' + ]) + run_in_keystone_dir(ctx, client, + [ + 'sed', + '-e', 's^#connection =.*^connection = mysql+pymysql://keystone:SECRET@localhost/keystone^', + '-i', 'etc/keystone.conf' + ]) + # log to a file that gets archived + log_file = '{p}/archive/keystone.{c}.log'.format(p=teuthology.get_testdir(ctx), c=client) + run_in_keystone_dir(ctx, client, + [ + 'sed', + '-e', 's^#log_file =.*^log_file = {}^'.format(log_file), + '-i', 'etc/keystone.conf' + ]) + # copy the config to archive + run_in_keystone_dir(ctx, client, [ + 'cp', 'etc/keystone.conf', + '{}/archive/keystone.{}.conf'.format(teuthology.get_testdir(ctx), client) + ]) + + conf_file = '{kdir}/etc/keystone.conf'.format(kdir=get_keystone_dir(ctx)) + + # prepare key repository for Fetnet token authenticator + run_in_keystone_dir(ctx, client, [ 'mkdir', '-p', keyrepo_dir ]) + run_in_keystone_venv(ctx, client, [ 'keystone-manage', '--config-file', conf_file, 'fernet_setup' ]) + + # sync database + run_in_keystone_venv(ctx, client, [ 'keystone-manage', '--config-file', conf_file, 'db_sync' ]) + yield + +@contextlib.contextmanager +def run_keystone(ctx, config): + assert isinstance(config, dict) + log.info('Configuring keystone...') + + conf_file = '{kdir}/etc/keystone.conf'.format(kdir=get_keystone_dir(ctx)) + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + cluster_name, _, client_id = teuthology.split_role(client) + + # start the public endpoint + client_public_with_id = 'keystone.public' + '.' + client_id + + public_host, public_port = ctx.keystone.public_endpoints[client] + run_cmd = get_keystone_venved_cmd(ctx, 'keystone-wsgi-public', + [ '--host', public_host, '--port', str(public_port), + # Let's put the Keystone in background, wait for EOF + # and after receiving it, send SIGTERM to the daemon. + # This crazy hack is because Keystone, in contrast to + # our other daemons, doesn't quit on stdin.close(). + # Teuthology relies on this behaviour. + run.Raw('& { read; kill %1; }') + ], + [ + run.Raw('OS_KEYSTONE_CONFIG_FILES={}'.format(conf_file)), + ], + ) + ctx.daemons.add_daemon( + remote, 'keystone', client_public_with_id, + cluster=cluster_name, + args=run_cmd, + logger=log.getChild(client), + stdin=run.PIPE, + wait=False, + check_status=False, + ) + + # sleep driven synchronization + run_in_keystone_venv(ctx, client, [ 'sleep', '15' ]) + try: + yield + finally: + log.info('Stopping Keystone public instance') + ctx.daemons.get_daemon('keystone', client_public_with_id, + cluster_name).stop() + + +def dict_to_args(specials, items): + """ + Transform + [(key1, val1), (special, val_special), (key3, val3) ] + into: + [ '--key1', 'val1', '--key3', 'val3', 'val_special' ] + """ + args = [] + special_vals = OrderedDict((k, '') for k in specials.split(',')) + for (k, v) in items: + if k in special_vals: + special_vals[k] = v + else: + args.append('--{k}'.format(k=k)) + args.append(v) + args.extend(arg for arg in special_vals.values() if arg) + return args + +def run_section_cmds(ctx, cclient, section_cmd, specials, + section_config_list): + public_host, public_port = ctx.keystone.public_endpoints[cclient] + + auth_section = [ + ( 'os-username', 'admin' ), + ( 'os-password', 'ADMIN' ), + ( 'os-user-domain-id', 'default' ), + ( 'os-project-name', 'admin' ), + ( 'os-project-domain-id', 'default' ), + ( 'os-identity-api-version', '3' ), + ( 'os-auth-url', 'http://{host}:{port}/v3'.format(host=public_host, + port=public_port) ), + ] + + for section_item in section_config_list: + run_in_keystone_venv(ctx, cclient, + [ 'openstack' ] + section_cmd.split() + + dict_to_args(specials, auth_section + list(section_item.items())) + + [ '--debug' ]) + +def create_endpoint(ctx, cclient, service, url, adminurl=None): + endpoint_sections = [ + {'service': service, 'interface': 'public', 'url': url}, + ] + if adminurl: + endpoint_sections.append( + {'service': service, 'interface': 'admin', 'url': adminurl} + ) + run_section_cmds(ctx, cclient, 'endpoint create', + 'service,interface,url', + endpoint_sections) + +@contextlib.contextmanager +def fill_keystone(ctx, config): + assert isinstance(config, dict) + + for (cclient, cconfig) in config.items(): + public_host, public_port = ctx.keystone.public_endpoints[cclient] + url = 'http://{host}:{port}/v3'.format(host=public_host, + port=public_port) + opts = {'password': 'ADMIN', + 'region-id': 'RegionOne', + 'internal-url': url, + 'admin-url': url, + 'public-url': url} + bootstrap_args = chain.from_iterable(('--bootstrap-{}'.format(k), v) + for k, v in opts.items()) + conf_file = '{kdir}/etc/keystone.conf'.format(kdir=get_keystone_dir(ctx)) + run_in_keystone_venv(ctx, cclient, + ['keystone-manage', '--config-file', conf_file, 'bootstrap'] + + list(bootstrap_args)) + + # configure tenants/projects + run_section_cmds(ctx, cclient, 'domain create --or-show', 'name', + cconfig.get('domains', [])) + run_section_cmds(ctx, cclient, 'project create --or-show', 'name', + cconfig.get('projects', [])) + run_section_cmds(ctx, cclient, 'user create --or-show', 'name', + cconfig.get('users', [])) + run_section_cmds(ctx, cclient, 'role create --or-show', 'name', + cconfig.get('roles', [])) + run_section_cmds(ctx, cclient, 'role add', 'name', + cconfig.get('role-mappings', [])) + run_section_cmds(ctx, cclient, 'service create', 'type', + cconfig.get('services', [])) + + # for the deferred endpoint creation; currently it's used in rgw.py + ctx.keystone.create_endpoint = create_endpoint + + # sleep driven synchronization -- just in case + run_in_keystone_venv(ctx, cclient, [ 'sleep', '3' ]) + try: + yield + finally: + pass + +def assign_ports(ctx, config, initial_port): + """ + Assign port numbers starting from @initial_port + """ + port = initial_port + role_endpoints = {} + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in roles_for_host: + if role in config: + role_endpoints[role] = (remote.name.split('@')[1], port) + port += 1 + + return role_endpoints + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy and configure Keystone + + Example of configuration: + + - install: + - ceph: + - tox: [ client.0 ] + - keystone: + client.0: + force-branch: master + domains: + - name: custom + description: Custom domain + projects: + - name: custom + description: Custom project + users: + - name: custom + password: SECRET + project: custom + roles: [ name: custom ] + role-mappings: + - name: custom + user: custom + project: custom + services: + - name: swift + type: object-store + description: Swift Service + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task keystone only supports a list or dictionary for configuration" + + if not hasattr(ctx, 'tox'): + raise ConfigError('keystone must run after the tox task') + + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + log.debug('Keystone config is %s', config) + + ctx.keystone = argparse.Namespace() + ctx.keystone.public_endpoints = assign_ports(ctx, config, 5000) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: install_packages(ctx=ctx, config=config), + lambda: setup_database(ctx=ctx, config=config), + lambda: setup_venv(ctx=ctx, config=config), + lambda: configure_instance(ctx=ctx, config=config), + lambda: run_keystone(ctx=ctx, config=config), + lambda: fill_keystone(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/kubeadm.py b/qa/tasks/kubeadm.py new file mode 100644 index 000000000..00417fc86 --- /dev/null +++ b/qa/tasks/kubeadm.py @@ -0,0 +1,563 @@ +""" +Kubernetes cluster task, deployed via kubeadm +""" +import argparse +import contextlib +import ipaddress +import json +import logging +import random +import yaml +from io import BytesIO + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +def _kubectl(ctx, config, args, **kwargs): + cluster_name = config['cluster'] + ctx.kubeadm[cluster_name].bootstrap_remote.run( + args=['kubectl'] + args, + **kwargs, + ) + + +def kubectl(ctx, config): + if isinstance(config, str): + config = [config] + assert isinstance(config, list) + for c in config: + if isinstance(c, str): + _kubectl(ctx, config, c.split(' ')) + else: + _kubectl(ctx, config, c) + + +@contextlib.contextmanager +def preflight(ctx, config): + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'modprobe', 'br_netfilter', + run.Raw('&&'), + 'sudo', 'sysctl', 'net.bridge.bridge-nf-call-ip6tables=1', + run.Raw('&&'), + 'sudo', 'sysctl', 'net.bridge.bridge-nf-call-iptables=1', + run.Raw('&&'), + 'sudo', 'sysctl', 'net.ipv4.ip_forward=1', + run.Raw('&&'), + 'sudo', 'swapoff', '-a', + ], + wait=False, + ) + ) + + # set docker cgroup driver = systemd + # see https://kubernetes.io/docs/setup/production-environment/container-runtimes/#docker + # see https://github.com/kubernetes/kubeadm/issues/2066 + for remote in ctx.cluster.remotes.keys(): + try: + orig = remote.read_file('/etc/docker/daemon.json', sudo=True) + j = json.loads(orig) + except Exception as e: + log.info(f'Failed to pull old daemon.json: {e}') + j = {} + j["exec-opts"] = ["native.cgroupdriver=systemd"] + j["log-driver"] = "json-file" + j["log-opts"] = {"max-size": "100m"} + j["storage-driver"] = "overlay2" + remote.write_file('/etc/docker/daemon.json', json.dumps(j), sudo=True) + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'systemctl', 'restart', 'docker', + run.Raw('||'), + 'true', + ], + wait=False, + ) + ) + yield + + +@contextlib.contextmanager +def kubeadm_install(ctx, config): + version = config.get('version', '1.21') + + os_type = teuthology.get_distro(ctx) + os_version = teuthology.get_distro_version(ctx) + + try: + if os_type in ['centos', 'rhel']: + os = f"CentOS_{os_version.split('.')[0]}" + log.info('Installing cri-o') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'curl', '-L', '-o', + '/etc/yum.repos.d/devel:kubic:libcontainers:stable.repo', + f'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/{os}/devel:kubic:libcontainers:stable.repo', + run.Raw('&&'), + 'sudo', + 'curl', '-L', '-o', + f'/etc/yum.repos.d/devel:kubic:libcontainers:stable:cri-o:{version}.repo', + f'https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/{version}/{os}/devel:kubic:libcontainers:stable:cri-o:{version}.repo', + run.Raw('&&'), + 'sudo', 'dnf', 'install', '-y', 'cri-o', + ], + wait=False, + ) + ) + + log.info('Installing kube{adm,ctl,let}') + repo = """[kubernetes] +name=Kubernetes +baseurl=https://packages.cloud.google.com/yum/repos/kubernetes-el7-$basearch +enabled=1 +gpgcheck=1 +repo_gpgcheck=1 +gpgkey=https://packages.cloud.google.com/yum/doc/yum-key.gpg https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg +""" + for remote in ctx.cluster.remotes.keys(): + remote.write_file( + '/etc/yum.repos.d/kubernetes.repo', + repo, + sudo=True, + ) + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'dnf', 'install', '-y', + 'kubelet', 'kubeadm', 'kubectl', + 'iproute-tc', 'bridge-utils', + ], + wait=False, + ) + ) + + # fix cni config + for remote in ctx.cluster.remotes.keys(): + conf = """# from https://github.com/cri-o/cri-o/blob/master/tutorials/kubernetes.md#flannel-network +{ + "name": "crio", + "type": "flannel" +} +""" + remote.write_file('/etc/cni/net.d/10-crio-flannel.conf', conf, sudo=True) + remote.run(args=[ + 'sudo', 'rm', '-f', + '/etc/cni/net.d/87-podman-bridge.conflist', + '/etc/cni/net.d/100-crio-bridge.conf', + ]) + + # start crio + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'systemctl', 'daemon-reload', + run.Raw('&&'), + 'sudo', 'systemctl', 'enable', 'crio', '--now', + ], + wait=False, + ) + ) + + elif os_type == 'ubuntu': + os = f"xUbuntu_{os_version}" + log.info('Installing kube{adm,ctl,let}') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'apt', 'update', + run.Raw('&&'), + 'sudo', 'apt', 'install', '-y', + 'apt-transport-https', 'ca-certificates', 'curl', + run.Raw('&&'), + 'sudo', 'curl', '-fsSLo', + '/usr/share/keyrings/kubernetes-archive-keyring.gpg', + 'https://packages.cloud.google.com/apt/doc/apt-key.gpg', + run.Raw('&&'), + 'echo', 'deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] https://apt.kubernetes.io/ kubernetes-xenial main', + run.Raw('|'), + 'sudo', 'tee', '/etc/apt/sources.list.d/kubernetes.list', + run.Raw('&&'), + 'sudo', 'apt', 'update', + run.Raw('&&'), + 'sudo', 'apt', 'install', '-y', + 'kubelet', 'kubeadm', 'kubectl', + 'bridge-utils', + ], + wait=False, + ) + ) + + else: + raise RuntimeError(f'unsupported distro {os_type} for cri-o') + + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'systemctl', 'enable', '--now', 'kubelet', + run.Raw('&&'), + 'sudo', 'kubeadm', 'config', 'images', 'pull', + ], + wait=False, + ) + ) + + yield + + finally: + if config.get('uninstall', True): + log.info('Uninstalling kube{adm,let,ctl}') + if os_type in ['centos', 'rhel']: + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'rm', '-f', + '/etc/yum.repos.d/kubernetes.repo', + run.Raw('&&'), + 'sudo', 'dnf', 'remove', '-y', + 'kubeadm', 'kubelet', 'kubectl', 'cri-o', + ], + wait=False + ) + ) + elif os_type == 'ubuntu' and False: + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'rm', '-f', + '/etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list', + f'/etc/apt/sources.list.d/devel:kubic:libcontainers:stable:cri-o:{version}.list', + '/etc/apt/trusted.gpg.d/libcontainers-cri-o.gpg', + run.Raw('&&'), + 'sudo', 'apt', 'remove', '-y', + 'kkubeadm', 'kubelet', 'kubectl', 'cri-o', 'cri-o-runc', + ], + wait=False, + ) + ) + + +@contextlib.contextmanager +def kubeadm_init_join(ctx, config): + cluster_name = config['cluster'] + + bootstrap_remote = None + remotes = {} # remote -> ip + for remote, roles in ctx.cluster.remotes.items(): + for role in roles: + if role.startswith('host.'): + if not bootstrap_remote: + bootstrap_remote = remote + if remote not in remotes: + remotes[remote] = remote.ssh.get_transport().getpeername()[0] + if not bootstrap_remote: + raise RuntimeError('must define at least one host.something role') + ctx.kubeadm[cluster_name].bootstrap_remote = bootstrap_remote + ctx.kubeadm[cluster_name].remotes = remotes + ctx.kubeadm[cluster_name].token = 'abcdef.' + ''.join([ + random.choice('0123456789abcdefghijklmnopqrstuvwxyz') for _ in range(16) + ]) + log.info(f'Token: {ctx.kubeadm[cluster_name].token}') + log.info(f'Remotes: {ctx.kubeadm[cluster_name].remotes}') + + try: + # init + cmd = [ + 'sudo', 'kubeadm', 'init', + '--node-name', ctx.kubeadm[cluster_name].bootstrap_remote.shortname, + '--token', ctx.kubeadm[cluster_name].token, + '--pod-network-cidr', str(ctx.kubeadm[cluster_name].pod_subnet), + ] + bootstrap_remote.run(args=cmd) + + # join additional nodes + joins = [] + for remote, ip in ctx.kubeadm[cluster_name].remotes.items(): + if remote == bootstrap_remote: + continue + cmd = [ + 'sudo', 'kubeadm', 'join', + ctx.kubeadm[cluster_name].remotes[ctx.kubeadm[cluster_name].bootstrap_remote] + ':6443', + '--node-name', remote.shortname, + '--token', ctx.kubeadm[cluster_name].token, + '--discovery-token-unsafe-skip-ca-verification', + ] + joins.append(remote.run(args=cmd, wait=False)) + run.wait(joins) + yield + + except Exception as e: + log.exception(e) + raise + + finally: + log.info('Cleaning up node') + run.wait( + ctx.cluster.run( + args=['sudo', 'kubeadm', 'reset', 'cleanup-node', '-f'], + wait=False, + ) + ) + + +@contextlib.contextmanager +def kubectl_config(ctx, config): + cluster_name = config['cluster'] + bootstrap_remote = ctx.kubeadm[cluster_name].bootstrap_remote + + ctx.kubeadm[cluster_name].admin_conf = \ + bootstrap_remote.read_file('/etc/kubernetes/admin.conf', sudo=True) + + log.info('Setting up kubectl') + try: + ctx.cluster.run(args=[ + 'mkdir', '-p', '.kube', + run.Raw('&&'), + 'sudo', 'mkdir', '-p', '/root/.kube', + ]) + for remote in ctx.kubeadm[cluster_name].remotes.keys(): + remote.write_file('.kube/config', ctx.kubeadm[cluster_name].admin_conf) + remote.sudo_write_file('/root/.kube/config', + ctx.kubeadm[cluster_name].admin_conf) + yield + + except Exception as e: + log.exception(e) + raise + + finally: + log.info('Deconfiguring kubectl') + ctx.cluster.run(args=[ + 'rm', '-rf', '.kube', + run.Raw('&&'), + 'sudo', 'rm', '-rf', '/root/.kube', + ]) + + +def map_vnet(mip): + for mapping in teuth_config.get('vnet', []): + mnet = ipaddress.ip_network(mapping['machine_subnet']) + vnet = ipaddress.ip_network(mapping['virtual_subnet']) + if vnet.prefixlen >= mnet.prefixlen: + log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix") + return None + if mip in mnet: + pos = list(mnet.hosts()).index(mip) + log.info(f"{mip} is in {mnet} at pos {pos}") + sub = list(vnet.subnets(32 - mnet.prefixlen))[pos] + return sub + return None + + +@contextlib.contextmanager +def allocate_pod_subnet(ctx, config): + """ + Allocate a private subnet that will not collide with other test machines/clusters + """ + cluster_name = config['cluster'] + assert cluster_name == 'kubeadm', 'multiple subnets not yet implemented' + + log.info('Identifying pod subnet') + remote = list(ctx.cluster.remotes.keys())[0] + ip = remote.ssh.get_transport().getpeername()[0] + mip = ipaddress.ip_address(ip) + vnet = map_vnet(mip) + assert vnet + log.info(f'Pod subnet: {vnet}') + ctx.kubeadm[cluster_name].pod_subnet = vnet + yield + + +@contextlib.contextmanager +def pod_network(ctx, config): + cluster_name = config['cluster'] + pnet = config.get('pod_network', 'calico') + if pnet == 'flannel': + r = ctx.kubeadm[cluster_name].bootstrap_remote.run( + args=[ + 'curl', + 'https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml', + ], + stdout=BytesIO(), + ) + assert r.exitstatus == 0 + flannel = list(yaml.load_all(r.stdout.getvalue(), Loader=yaml.FullLoader)) + for o in flannel: + if o.get('data', {}).get('net-conf.json'): + log.info(f'Updating {o}') + o['data']['net-conf.json'] = o['data']['net-conf.json'].replace( + '10.244.0.0/16', + str(ctx.kubeadm[cluster_name].pod_subnet) + ) + log.info(f'Now {o}') + flannel_yaml = yaml.dump_all(flannel) + log.debug(f'Flannel:\n{flannel_yaml}') + _kubectl(ctx, config, ['apply', '-f', '-'], stdin=flannel_yaml) + + elif pnet == 'calico': + _kubectl(ctx, config, [ + 'create', '-f', + 'https://docs.projectcalico.org/manifests/tigera-operator.yaml' + ]) + cr = { + 'apiVersion': 'operator.tigera.io/v1', + 'kind': 'Installation', + 'metadata': {'name': 'default'}, + 'spec': { + 'calicoNetwork': { + 'ipPools': [ + { + 'blockSize': 26, + 'cidr': str(ctx.kubeadm[cluster_name].pod_subnet), + 'encapsulation': 'IPIPCrossSubnet', + 'natOutgoing': 'Enabled', + 'nodeSelector': 'all()', + } + ] + } + } + } + _kubectl(ctx, config, ['create', '-f', '-'], stdin=yaml.dump(cr)) + + else: + raise RuntimeError(f'unrecognized pod_network {pnet}') + + try: + yield + + finally: + if pnet == 'flannel': + _kubectl(ctx, config, [ + 'delete', '-f', + 'https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml', + ]) + + elif pnet == 'calico': + _kubectl(ctx, config, ['delete', 'installation', 'default']) + _kubectl(ctx, config, [ + 'delete', '-f', + 'https://docs.projectcalico.org/manifests/tigera-operator.yaml' + ]) + + +@contextlib.contextmanager +def setup_pvs(ctx, config): + """ + Create PVs for all scratch LVs and set up a trivial provisioner + """ + log.info('Scanning for scratch devices') + crs = [] + for remote in ctx.cluster.remotes.keys(): + ls = remote.read_file('/scratch_devs').decode('utf-8').strip().splitlines() + log.info(f'Scratch devices on {remote.shortname}: {ls}') + for dev in ls: + devname = dev.split('/')[-1].replace("_", "-") + crs.append({ + 'apiVersion': 'v1', + 'kind': 'PersistentVolume', + 'metadata': {'name': f'{remote.shortname}-{devname}'}, + 'spec': { + 'volumeMode': 'Block', + 'accessModes': ['ReadWriteOnce'], + 'capacity': {'storage': '100Gi'}, # doesn't matter? + 'persistentVolumeReclaimPolicy': 'Retain', + 'storageClassName': 'scratch', + 'local': {'path': dev}, + 'nodeAffinity': { + 'required': { + 'nodeSelectorTerms': [ + { + 'matchExpressions': [ + { + 'key': 'kubernetes.io/hostname', + 'operator': 'In', + 'values': [remote.shortname] + } + ] + } + ] + } + } + } + }) + # overwriting first few MB is enough to make k8s happy + remote.run(args=[ + 'sudo', 'dd', 'if=/dev/zero', f'of={dev}', 'bs=1M', 'count=10' + ]) + crs.append({ + 'kind': 'StorageClass', + 'apiVersion': 'storage.k8s.io/v1', + 'metadata': {'name': 'scratch'}, + 'provisioner': 'kubernetes.io/no-provisioner', + 'volumeBindingMode': 'WaitForFirstConsumer', + }) + y = yaml.dump_all(crs) + log.info('Creating PVs + StorageClass') + log.debug(y) + _kubectl(ctx, config, ['create', '-f', '-'], stdin=y) + + yield + + +@contextlib.contextmanager +def final(ctx, config): + cluster_name = config['cluster'] + + # remove master node taint + _kubectl(ctx, config, [ + 'taint', 'node', + ctx.kubeadm[cluster_name].bootstrap_remote.shortname, + 'node-role.kubernetes.io/master-', + run.Raw('||'), + 'true', + ]) + + yield + + +@contextlib.contextmanager +def task(ctx, config): + if not config: + config = {} + assert isinstance(config, dict), \ + "task only supports a dictionary for configuration" + + log.info('Kubeadm start') + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('kubeadm', {})) + log.info('Config: ' + str(config)) + + # set up cluster context + if not hasattr(ctx, 'kubeadm'): + ctx.kubeadm = {} + if 'cluster' not in config: + config['cluster'] = 'kubeadm' + cluster_name = config['cluster'] + if cluster_name not in ctx.kubeadm: + ctx.kubeadm[cluster_name] = argparse.Namespace() + + with contextutil.nested( + lambda: preflight(ctx, config), + lambda: allocate_pod_subnet(ctx, config), + lambda: kubeadm_install(ctx, config), + lambda: kubeadm_init_join(ctx, config), + lambda: kubectl_config(ctx, config), + lambda: pod_network(ctx, config), + lambda: setup_pvs(ctx, config), + lambda: final(ctx, config), + ): + try: + log.info('Kubeadm complete, yielding') + yield + + finally: + log.info('Tearing down kubeadm') diff --git a/qa/tasks/locktest.py b/qa/tasks/locktest.py new file mode 100755 index 000000000..9de5ba40c --- /dev/null +++ b/qa/tasks/locktest.py @@ -0,0 +1,134 @@ +""" +locktests +""" +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run locktests, from the xfstests suite, on the given + clients. Whether the clients are ceph-fuse or kernel does not + matter, and the two clients can refer to the same mount. + + The config is a list of two clients to run the locktest on. The + first client will be the host. + + For example: + tasks: + - ceph: + - ceph-fuse: [client.0, client.1] + - locktest: + [client.0, client.1] + + This task does not yield; there would be little point. + + :param ctx: Context + :param config: Configuration + """ + + assert isinstance(config, list) + log.info('fetching and building locktests...') + (host,) = ctx.cluster.only(config[0]).remotes + (client,) = ctx.cluster.only(config[1]).remotes + ( _, _, host_id) = config[0].partition('.') + ( _, _, client_id) = config[1].partition('.') + testdir = teuthology.get_testdir(ctx) + hostmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=host_id) + clientmnt = '{tdir}/mnt.{id}'.format(tdir=testdir, id=client_id) + + try: + for client_name in config: + log.info('building on {client_}'.format(client_=client_name)) + ctx.cluster.only(client_name).run( + args=[ + # explicitly does not support multiple autotest tasks + # in a single run; the result archival would conflict + 'mkdir', '{tdir}/archive/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'mkdir', '{tdir}/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'wget', + '-nv', + 'https://raw.github.com/gregsfortytwo/xfstests-ceph/master/src/locktest.c', + '-O', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + run.Raw('&&'), + 'g++', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + '-o', '{tdir}/locktest/locktest'.format(tdir=testdir) + ], + logger=log.getChild('locktest_client.{id}'.format(id=client_name)), + ) + + log.info('built locktest on each client') + + host.run(args=['sudo', 'touch', + '{mnt}/locktestfile'.format(mnt=hostmnt), + run.Raw('&&'), + 'sudo', 'chown', 'ubuntu.ubuntu', + '{mnt}/locktestfile'.format(mnt=hostmnt) + ] + ) + + log.info('starting on host') + hostproc = host.run( + args=[ + '{tdir}/locktest/locktest'.format(tdir=testdir), + '-p', '6788', + '-d', + '{mnt}/locktestfile'.format(mnt=hostmnt), + ], + wait=False, + logger=log.getChild('locktest.host'), + ) + log.info('starting on client') + (_,_,hostaddr) = host.name.partition('@') + clientproc = client.run( + args=[ + '{tdir}/locktest/locktest'.format(tdir=testdir), + '-p', '6788', + '-d', + '-h', hostaddr, + '{mnt}/locktestfile'.format(mnt=clientmnt), + ], + logger=log.getChild('locktest.client'), + wait=False + ) + + hostresult = hostproc.wait() + clientresult = clientproc.wait() + if (hostresult != 0) or (clientresult != 0): + raise Exception("Did not pass locking test!") + log.info('finished locktest executable with results {r} and {s}'. \ + format(r=hostresult, s=clientresult)) + + finally: + log.info('cleaning up host dir') + host.run( + args=[ + 'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rmdir', '{tdir}/locktest' + ], + logger=log.getChild('.{id}'.format(id=config[0])), + ) + log.info('cleaning up client dir') + client.run( + args=[ + 'mkdir', '-p', '{tdir}/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest.c'.format(tdir=testdir), + run.Raw('&&'), + 'rm', '-f', '{tdir}/locktest/locktest'.format(tdir=testdir), + run.Raw('&&'), + 'rmdir', '{tdir}/locktest'.format(tdir=testdir) + ], + logger=log.getChild('.{id}'.format(\ + id=config[1])), + ) diff --git a/qa/tasks/logrotate.conf b/qa/tasks/logrotate.conf new file mode 100644 index 000000000..b0cb8012f --- /dev/null +++ b/qa/tasks/logrotate.conf @@ -0,0 +1,13 @@ +/var/log/ceph/*{daemon_type}*.log {{ + rotate 100 + size {max_size} + compress + sharedscripts + postrotate + killall {daemon_type} -1 || true + endscript + missingok + notifempty + su root root +}} + diff --git a/qa/tasks/lost_unfound.py b/qa/tasks/lost_unfound.py new file mode 100644 index 000000000..5a9142a70 --- /dev/null +++ b/qa/tasks/lost_unfound.py @@ -0,0 +1,180 @@ +""" +Lost_unfound +""" +import logging +import time +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of lost objects. + + A pretty rigid cluster is brought up and tested by this task + """ + POOL = 'unfound_pool' + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + manager.wait_for_clean() + + manager.create_pool(POOL) + + # something that is always there + dummyfile = '/etc/fstab' + + # take an osd out until the very end + manager.kill_osd(2) + manager.mark_down_osd(2) + manager.mark_out_osd(2) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_recovery() + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' + ) + + manager.kill_osd(0) + manager.mark_down_osd(0) + + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + + # bring osd.0 back up, let it peer, but don't replicate the new + # objects... + log.info('osd.0 command_args is %s' % 'foo') + log.info(ctx.daemons.get_daemon('osd', 0).command_args) + ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([ + '--osd-recovery-delay-start', '1000' + ]) + manager.revive_osd(0) + manager.mark_in_osd(0) + manager.wait_till_osd_is_up(0) + + manager.flush_pg_stats([1, 0]) + manager.wait_till_active() + + # take out osd.1 and the only copy of those objects. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.mark_out_osd(1) + manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') + + # bring up osd.2 so that things would otherwise, in theory, recovery fully + manager.revive_osd(2) + manager.mark_in_osd(2) + manager.wait_till_osd_is_up(2) + + manager.flush_pg_stats([0, 2]) + manager.wait_till_active() + manager.flush_pg_stats([0, 2]) + + # verify that there are unfound objects + unfound = manager.get_num_unfound_objects() + log.info("there are %d unfound objects" % unfound) + assert unfound + + testdir = teuthology.get_testdir(ctx) + procs = [] + if config.get('parallel_bench', True): + procs.append(mon.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', 'client.admin', + '-b', str(4<<10), + '-p' , POOL, + '-t', '20', + 'bench', '240', 'write', + ]).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id='client.admin')), + stdin=run.PIPE, + wait=False + )) + time.sleep(10) + + # mark stuff lost + pgs = manager.get_pg_stats() + for pg in pgs: + if pg['stat_sum']['num_objects_unfound'] > 0: + primary = 'osd.%d' % pg['acting'][0] + + # verify that i can list them direct from the osd + log.info('listing missing/lost in %s state %s', pg['pgid'], + pg['state']); + m = manager.list_pg_unfound(pg['pgid']) + #log.info('%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + assert m['available_might_have_unfound'] == True + assert m['might_have_unfound'][0]['osd'] == "1" + assert m['might_have_unfound'][0]['status'] == "osd is down" + num_unfound=0 + for o in m['objects']: + if len(o['locations']) == 0: + num_unfound += 1 + assert m['num_unfound'] == num_unfound + + log.info("reverting unfound in %s on %s", pg['pgid'], primary) + manager.raw_cluster_cmd('pg', pg['pgid'], + 'mark_unfound_lost', 'revert') + else: + log.info("no unfound in %s", pg['pgid']) + + manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') + manager.flush_pg_stats([0, 2]) + manager.wait_for_recovery() + + # verify result + for f in range(1, 10): + err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) + assert not err + + # see if osd.1 can cope + manager.mark_in_osd(1) + manager.revive_osd(1) + manager.wait_till_osd_is_up(1) + manager.wait_for_clean() + run.wait(procs) + manager.wait_for_clean() diff --git a/qa/tasks/manypools.py b/qa/tasks/manypools.py new file mode 100644 index 000000000..7fe7e43e1 --- /dev/null +++ b/qa/tasks/manypools.py @@ -0,0 +1,73 @@ +""" +Force pg creation on all osds +""" +from teuthology import misc as teuthology +from teuthology.orchestra import run +import logging + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Create the specified number of pools and write 16 objects to them (thereby forcing + the PG creation on each OSD). This task creates pools from all the clients, + in parallel. It is easy to add other daemon types which have the appropriate + permissions, but I don't think anything else does. + The config is just the number of pools to create. I recommend setting + "mon create pg interval" to a very low value in your ceph config to speed + this up. + + You probably want to do this to look at memory consumption, and + maybe to test how performance changes with the number of PGs. For example: + + tasks: + - ceph: + config: + mon: + mon create pg interval: 1 + - manypools: 3000 + - radosbench: + clients: [client.0] + time: 360 + """ + + log.info('creating {n} pools'.format(n=config)) + + poolnum = int(config) + creator_remotes = [] + client_roles = teuthology.all_roles_of_type(ctx.cluster, 'client') + log.info('got client_roles={client_roles_}'.format(client_roles_=client_roles)) + for role in client_roles: + log.info('role={role_}'.format(role_=role)) + (creator_remote, ) = ctx.cluster.only('client.{id}'.format(id=role)).remotes.keys() + creator_remotes.append((creator_remote, 'client.{id}'.format(id=role))) + + remaining_pools = poolnum + poolprocs=dict() + while (remaining_pools > 0): + log.info('{n} pools remaining to create'.format(n=remaining_pools)) + for remote, role_ in creator_remotes: + poolnum = remaining_pools + remaining_pools -= 1 + if remaining_pools < 0: + continue + log.info('creating pool{num} on {role}'.format(num=poolnum, role=role_)) + proc = remote.run( + args=[ + 'ceph', + '--name', role_, + 'osd', 'pool', 'create', 'pool{num}'.format(num=poolnum), '8', + run.Raw('&&'), + 'rados', + '--name', role_, + '--pool', 'pool{num}'.format(num=poolnum), + 'bench', '0', 'write', '-t', '16', '--block-size', '1' + ], + wait = False + ) + log.info('waiting for pool and object creates') + poolprocs[remote] = proc + + run.wait(poolprocs.values()) + + log.info('created all {n} pools and wrote 16 objects to each'.format(n=poolnum)) diff --git a/qa/tasks/mds_creation_failure.py b/qa/tasks/mds_creation_failure.py new file mode 100644 index 000000000..2ab8f70dd --- /dev/null +++ b/qa/tasks/mds_creation_failure.py @@ -0,0 +1,70 @@ +# FIXME: this file has many undefined vars which are accessed! +# flake8: noqa +import logging +import contextlib +import time +from tasks import ceph_manager +from teuthology import misc +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra.run import Raw + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Go through filesystem creation with a synthetic failure in an MDS + in its 'up:creating' state, to exercise the retry behaviour. + """ + # Grab handles to the teuthology objects of interest + mdslist = list(misc.all_roles_of_type(ctx.cluster, 'mds')) + if len(mdslist) != 1: + # Require exactly one MDS, the code path for creation failure when + # a standby is available is different + raise RuntimeError("This task requires exactly one MDS") + + mds_id = mdslist[0] + (mds_remote,) = ctx.cluster.only('mds.{_id}'.format(_id=mds_id)).remotes.keys() + manager = ceph_manager.CephManager( + mds_remote, ctx=ctx, logger=log.getChild('ceph_manager'), + ) + + # Stop MDS + self.fs.set_max_mds(0) + self.fs.mds_stop(mds_id) + self.fs.mds_fail(mds_id) + + # Reset the filesystem so that next start will go into CREATING + manager.raw_cluster_cmd('fs', 'rm', "default", "--yes-i-really-mean-it") + manager.raw_cluster_cmd('fs', 'new', "default", "metadata", "data") + + # Start the MDS with mds_kill_create_at set, it will crash during creation + mds.restart_with_args(["--mds_kill_create_at=1"]) + try: + mds.wait_for_exit() + except CommandFailedError as e: + if e.exitstatus == 1: + log.info("MDS creation killed as expected") + else: + log.error("Unexpected status code %s" % e.exitstatus) + raise + + # Since I have intentionally caused a crash, I will clean up the resulting core + # file to avoid task.internal.coredump seeing it as a failure. + log.info("Removing core file from synthetic MDS failure") + mds_remote.run(args=['rm', '-f', Raw("{archive}/coredump/*.core".format(archive=misc.get_archive_dir(ctx)))]) + + # It should have left the MDS map state still in CREATING + status = self.fs.status().get_mds(mds_id) + assert status['state'] == 'up:creating' + + # Start the MDS again without the kill flag set, it should proceed with creation successfully + mds.restart() + + # Wait for state ACTIVE + self.fs.wait_for_state("up:active", timeout=120, mds_id=mds_id) + + # The system should be back up in a happy healthy state, go ahead and run any further tasks + # inside this context. + yield diff --git a/qa/tasks/mds_pre_upgrade.py b/qa/tasks/mds_pre_upgrade.py new file mode 100644 index 000000000..812d402ed --- /dev/null +++ b/qa/tasks/mds_pre_upgrade.py @@ -0,0 +1,27 @@ +""" +Prepare MDS cluster for upgrade. +""" + +import logging + +from tasks.cephfs.filesystem import Filesystem + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Prepare MDS cluster for upgrade. + + This task reduces ranks to 1 and stops all standbys. + """ + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'snap-upgrade task only accepts a dict for configuration' + + fs = Filesystem(ctx) + fs.getinfo() # load name + fs.set_allow_standby_replay(False) + fs.set_max_mds(1) + fs.reach_max_mds() diff --git a/qa/tasks/mds_thrash.py b/qa/tasks/mds_thrash.py new file mode 100644 index 000000000..7b7b420f9 --- /dev/null +++ b/qa/tasks/mds_thrash.py @@ -0,0 +1,434 @@ +""" +Thrash mds by simulating failures +""" +import logging +import contextlib +import itertools +import random +import time + +from gevent import sleep +from gevent.greenlet import Greenlet +from gevent.event import Event +from teuthology import misc as teuthology + +from tasks import ceph_manager +from tasks.cephfs.filesystem import MDSCluster, Filesystem, FSMissing +from tasks.thrasher import Thrasher + +log = logging.getLogger(__name__) + +class MDSThrasher(Thrasher, Greenlet): + """ + MDSThrasher:: + + The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc). + + The config is optional. Many of the config parameters are a a maximum value + to use when selecting a random value from a range. To always use the maximum + value, set no_random to true. The config is a dict containing some or all of: + + max_thrash: [default: 1] the maximum number of active MDSs per FS that will be thrashed at + any given time. + + max_thrash_delay: [default: 30] maximum number of seconds to delay before + thrashing again. + + max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in + the replay state before thrashing. + + max_revive_delay: [default: 10] maximum number of seconds to delay before + bringing back a thrashed MDS. + + randomize: [default: true] enables randomization and use the max/min values + + seed: [no default] seed the random number generator + + thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed + during replay. Value should be between 0.0 and 1.0. + + thrash_max_mds: [default: 0.05] likelihood that the max_mds of the mds + cluster will be modified to a value [1, current) or (current, starting + max_mds]. Value should be between 0.0 and 1.0. + + thrash_while_stopping: [default: false] thrash an MDS while there + are MDS in up:stopping (because max_mds was changed and some + MDS were deactivated). + + thrash_weights: allows specific MDSs to be thrashed more/less frequently. + This option overrides anything specified by max_thrash. This option is a + dict containing mds.x: weight pairs. For example, [mds.a: 0.7, mds.b: + 0.3, mds.c: 0.0]. Each weight is a value from 0.0 to 1.0. Any MDSs not + specified will be automatically given a weight of 0.0 (not thrashed). + For a given MDS, by default the trasher delays for up to + max_thrash_delay, trashes, waits for the MDS to recover, and iterates. + If a non-zero weight is specified for an MDS, for each iteration the + thrasher chooses whether to thrash during that iteration based on a + random value [0-1] not exceeding the weight of that MDS. + + Examples:: + + + The following example sets the likelihood that mds.a will be thrashed + to 80%, mds.b to 20%, and other MDSs will not be thrashed. It also sets the + likelihood that an MDS will be thrashed in replay to 40%. + Thrash weights do not have to sum to 1. + + tasks: + - ceph: + - mds_thrash: + thrash_weights: + - mds.a: 0.8 + - mds.b: 0.2 + thrash_in_replay: 0.4 + - ceph-fuse: + - workunit: + clients: + all: [suites/fsx.sh] + + The following example disables randomization, and uses the max delay values: + + tasks: + - ceph: + - mds_thrash: + max_thrash_delay: 10 + max_revive_delay: 1 + max_replay_thrash_delay: 4 + + """ + + def __init__(self, ctx, manager, config, fs, max_mds): + super(MDSThrasher, self).__init__() + + self.config = config + self.ctx = ctx + self.logger = log.getChild('fs.[{f}]'.format(f = fs.name)) + self.fs = fs + self.manager = manager + self.max_mds = max_mds + self.name = 'thrasher.fs.[{f}]'.format(f = fs.name) + self.stopping = Event() + + self.randomize = bool(self.config.get('randomize', True)) + self.thrash_max_mds = float(self.config.get('thrash_max_mds', 0.05)) + self.max_thrash = int(self.config.get('max_thrash', 1)) + self.max_thrash_delay = float(self.config.get('thrash_delay', 120.0)) + self.thrash_in_replay = float(self.config.get('thrash_in_replay', False)) + assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format( + v=self.thrash_in_replay) + self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0)) + self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0)) + + def _run(self): + try: + self.do_thrash() + except FSMissing: + pass + except Exception as e: + # Log exceptions here so we get the full backtrace (gevent loses them). + # Also allow successful completion as gevent exception handling is a broken mess: + # + # 2017-02-03T14:34:01.259 CRITICAL:root: File "gevent.libev.corecext.pyx", line 367, in gevent.libev.corecext.loop.handle_error (src/gevent/libev/gevent.corecext.c:5051) + # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 558, in handle_error + # self.print_exception(context, type, value, tb) + # File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 605, in print_exception + # traceback.print_exception(type, value, tb, file=errstream) + # File "/usr/lib/python2.7/traceback.py", line 124, in print_exception + # _print(file, 'Traceback (most recent call last):') + # File "/usr/lib/python2.7/traceback.py", line 13, in _print + # file.write(str+terminator) + # 2017-02-03T14:34:01.261 CRITICAL:root:IOError + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # allow successful completion so gevent doesn't see an exception... + + def log(self, x): + """Write data to the logger assigned to MDSThrasher""" + self.logger.info(x) + + def stop(self): + self.stopping.set() + + def kill_mds(self, mds): + if self.config.get('powercycle'): + (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)). + remotes.keys()) + self.log('kill_mds on mds.{m} doing powercycle of {s}'. + format(m=mds, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_off() + else: + self.ctx.daemons.get_daemon('mds', mds).stop() + + @staticmethod + def _assert_ipmi(remote): + assert remote.console.has_ipmi_credentials, ( + "powercycling requested but RemoteConsole is not " + "initialized. Check ipmi config.") + + def revive_mds(self, mds): + """ + Revive mds -- do an ipmpi powercycle (if indicated by the config) + and then restart. + """ + if self.config.get('powercycle'): + (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)). + remotes.keys()) + self.log('revive_mds on mds.{m} doing powercycle of {s}'. + format(m=mds, s=remote.name)) + self._assert_ipmi(remote) + remote.console.power_on() + self.manager.make_admin_daemon_dir(self.ctx, remote) + args = [] + self.ctx.daemons.get_daemon('mds', mds).restart(*args) + + def wait_for_stable(self, rank = None, gid = None): + self.log('waiting for mds cluster to stabilize...') + for itercount in itertools.count(): + status = self.fs.status() + max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds'] + ranks = list(status.get_ranks(self.fs.id)) + stopping = sum(1 for _ in ranks if "up:stopping" == _['state']) + actives = sum(1 for _ in ranks + if "up:active" == _['state'] and "laggy_since" not in _) + + if not bool(self.config.get('thrash_while_stopping', False)) and stopping > 0: + if itercount % 5 == 0: + self.log('cluster is considered unstable while MDS are in up:stopping (!thrash_while_stopping)') + else: + if rank is not None: + try: + info = status.get_rank(self.fs.id, rank) + if info['gid'] != gid and "up:active" == info['state']: + self.log('mds.{name} has gained rank={rank}, replacing gid={gid}'.format(name = info['name'], rank = rank, gid = gid)) + return status + except: + pass # no rank present + if actives >= max_mds: + # no replacement can occur! + self.log("cluster has {actives} actives (max_mds is {max_mds}), no MDS can replace rank {rank}".format( + actives=actives, max_mds=max_mds, rank=rank)) + return status + else: + if actives == max_mds: + self.log('mds cluster has {count} alive and active, now stable!'.format(count = actives)) + return status, None + if itercount > 300/2: # 5 minutes + raise RuntimeError('timeout waiting for cluster to stabilize') + elif itercount % 5 == 0: + self.log('mds map: {status}'.format(status=status)) + else: + self.log('no change') + sleep(2) + + def do_thrash(self): + """ + Perform the random thrashing action + """ + + self.log('starting mds_do_thrash for fs {fs}'.format(fs = self.fs.name)) + stats = { + "max_mds": 0, + "deactivate": 0, + "kill": 0, + } + + while not self.stopping.is_set(): + delay = self.max_thrash_delay + if self.randomize: + delay = random.randrange(0.0, self.max_thrash_delay) + + if delay > 0.0: + self.log('waiting for {delay} secs before thrashing'.format(delay=delay)) + self.stopping.wait(delay) + if self.stopping.is_set(): + continue + + status = self.fs.status() + + if random.random() <= self.thrash_max_mds: + max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds'] + options = [i for i in range(1, self.max_mds + 1) if i != max_mds] + if len(options) > 0: + new_max_mds = random.choice(options) + self.log('thrashing max_mds: %d -> %d' % (max_mds, new_max_mds)) + self.fs.set_max_mds(new_max_mds) + stats['max_mds'] += 1 + self.wait_for_stable() + + count = 0 + for info in status.get_ranks(self.fs.id): + name = info['name'] + label = 'mds.' + name + rank = info['rank'] + gid = info['gid'] + + # if thrash_weights isn't specified and we've reached max_thrash, + # we're done + count = count + 1 + if 'thrash_weights' not in self.config and count > self.max_thrash: + break + + weight = 1.0 + if 'thrash_weights' in self.config: + weight = self.config['thrash_weights'].get(label, '0.0') + skip = random.random() + if weight <= skip: + self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, weight=weight)) + continue + + self.log('kill {label} (rank={rank})'.format(label=label, rank=rank)) + self.kill_mds(name) + stats['kill'] += 1 + + # wait for mon to report killed mds as crashed + last_laggy_since = None + itercount = 0 + while True: + status = self.fs.status() + info = status.get_mds(name) + if not info: + break + if 'laggy_since' in info: + last_laggy_since = info['laggy_since'] + break + if any([(f == name) for f in status.get_fsmap(self.fs.id)['mdsmap']['failed']]): + break + self.log( + 'waiting till mds map indicates {label} is laggy/crashed, in failed state, or {label} is removed from mdsmap'.format( + label=label)) + itercount = itercount + 1 + if itercount > 10: + self.log('mds map: {status}'.format(status=status)) + sleep(2) + + if last_laggy_since: + self.log( + '{label} reported laggy/crashed since: {since}'.format(label=label, since=last_laggy_since)) + else: + self.log('{label} down, removed from mdsmap'.format(label=label)) + + # wait for a standby mds to takeover and become active + status = self.wait_for_stable(rank, gid) + + # wait for a while before restarting old active to become new + # standby + delay = self.max_revive_delay + if self.randomize: + delay = random.randrange(0.0, self.max_revive_delay) + + self.log('waiting for {delay} secs before reviving {label}'.format( + delay=delay, label=label)) + sleep(delay) + + self.log('reviving {label}'.format(label=label)) + self.revive_mds(name) + + for itercount in itertools.count(): + if itercount > 300/2: # 5 minutes + raise RuntimeError('timeout waiting for MDS to revive') + status = self.fs.status() + info = status.get_mds(name) + if info and info['state'] in ('up:standby', 'up:standby-replay', 'up:active'): + self.log('{label} reported in {state} state'.format(label=label, state=info['state'])) + break + self.log( + 'waiting till mds map indicates {label} is in active, standby or standby-replay'.format(label=label)) + sleep(2) + + for stat in stats: + self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat])) + + # don't do replay thrashing right now +# for info in status.get_replays(self.fs.id): +# # this might race with replay -> active transition... +# if status['state'] == 'up:replay' and random.randrange(0.0, 1.0) < self.thrash_in_replay: +# delay = self.max_replay_thrash_delay +# if self.randomize: +# delay = random.randrange(0.0, self.max_replay_thrash_delay) +# sleep(delay) +# self.log('kill replaying mds.{id}'.format(id=self.to_kill)) +# self.kill_mds(self.to_kill) +# +# delay = self.max_revive_delay +# if self.randomize: +# delay = random.randrange(0.0, self.max_revive_delay) +# +# self.log('waiting for {delay} secs before reviving mds.{id}'.format( +# delay=delay, id=self.to_kill)) +# sleep(delay) +# +# self.log('revive mds.{id}'.format(id=self.to_kill)) +# self.revive_mds(self.to_kill) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the mds by thrashing while another task/workunit + is running. + + Please refer to MDSThrasher class for further information on the + available options. + """ + + mds_cluster = MDSCluster(ctx) + + if config is None: + config = {} + assert isinstance(config, dict), \ + 'mds_thrash task only accepts a dict for configuration' + mdslist = list(teuthology.all_roles_of_type(ctx.cluster, 'mds')) + assert len(mdslist) > 1, \ + 'mds_thrash task requires at least 2 metadata servers' + + # choose random seed + if 'seed' in config: + seed = int(config['seed']) + else: + seed = int(time.time()) + log.info('mds thrasher using random seed: {seed}'.format(seed=seed)) + random.seed(seed) + + (first,) = ctx.cluster.only('mds.{_id}'.format(_id=mdslist[0])).remotes.keys() + manager = ceph_manager.CephManager( + first, ctx=ctx, logger=log.getChild('ceph_manager'), + ) + + # make sure everyone is in active, standby, or standby-replay + log.info('Wait for all MDSs to reach steady state...') + status = mds_cluster.status() + while True: + steady = True + for info in status.get_all(): + state = info['state'] + if state not in ('up:active', 'up:standby', 'up:standby-replay'): + steady = False + break + if steady: + break + sleep(2) + status = mds_cluster.status() + log.info('Ready to start thrashing') + + manager.wait_for_clean() + assert manager.is_clean() + + if 'cluster' not in config: + config['cluster'] = 'ceph' + + for fs in status.get_filesystems(): + thrasher = MDSThrasher(ctx, manager, config, Filesystem(ctx, fscid=fs['id']), fs['mdsmap']['max_mds']) + thrasher.start() + ctx.ceph[config['cluster']].thrashers.append(thrasher) + + try: + log.debug('Yielding') + yield + finally: + log.info('joining mds_thrasher') + thrasher.stop() + if thrasher.exception is not None: + raise RuntimeError('error during thrashing') + thrasher.join() + log.info('done joining') diff --git a/qa/tasks/metadata.yaml b/qa/tasks/metadata.yaml new file mode 100644 index 000000000..ccdc3b077 --- /dev/null +++ b/qa/tasks/metadata.yaml @@ -0,0 +1,2 @@ +instance-id: test +local-hostname: test diff --git a/qa/tasks/mgr/__init__.py b/qa/tasks/mgr/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/tasks/mgr/__init__.py diff --git a/qa/tasks/mgr/dashboard/__init__.py b/qa/tasks/mgr/dashboard/__init__.py new file mode 100644 index 000000000..2b022e024 --- /dev/null +++ b/qa/tasks/mgr/dashboard/__init__.py @@ -0,0 +1 @@ +DEFAULT_API_VERSION = '1.0' diff --git a/qa/tasks/mgr/dashboard/helper.py b/qa/tasks/mgr/dashboard/helper.py new file mode 100644 index 000000000..d80e238a2 --- /dev/null +++ b/qa/tasks/mgr/dashboard/helper.py @@ -0,0 +1,724 @@ +# -*- coding: utf-8 -*- +# pylint: disable=W0212,too-many-return-statements,too-many-public-methods +from __future__ import absolute_import + +import json +import logging +import random +import re +import string +import time +from collections import namedtuple +from typing import List + +import requests +from tasks.mgr.mgr_test_case import MgrTestCase +from teuthology.exceptions import \ + CommandFailedError # pylint: disable=import-error + +from . import DEFAULT_API_VERSION + +log = logging.getLogger(__name__) + + +class DashboardTestCase(MgrTestCase): + # Display full error diffs + maxDiff = None + + # Increased x3 (20 -> 60) + TIMEOUT_HEALTH_CLEAR = 60 + + MGRS_REQUIRED = 2 + MDSS_REQUIRED = 1 + REQUIRE_FILESYSTEM = True + CLIENTS_REQUIRED = 1 + CEPHFS = False + ORCHESTRATOR = False + ORCHESTRATOR_TEST_DATA = { + 'inventory': [ + { + 'name': 'test-host0', + 'addr': '1.2.3.4', + 'devices': [ + { + 'path': '/dev/sda', + } + ] + }, + { + 'name': 'test-host1', + 'addr': '1.2.3.5', + 'devices': [ + { + 'path': '/dev/sdb', + } + ] + } + ], + 'daemons': [ + { + 'nodename': 'test-host0', + 'daemon_type': 'mon', + 'daemon_id': 'a' + }, + { + 'nodename': 'test-host0', + 'daemon_type': 'mgr', + 'daemon_id': 'x' + }, + { + 'nodename': 'test-host0', + 'daemon_type': 'osd', + 'daemon_id': '0' + }, + { + 'nodename': 'test-host1', + 'daemon_type': 'osd', + 'daemon_id': '1' + } + ] + } + + _session = None # type: requests.sessions.Session + _token = None + _resp = None # type: requests.models.Response + _loggedin = False + _base_uri = None + + AUTO_AUTHENTICATE = True + + AUTH_ROLES = ['administrator'] + + @classmethod + def create_user(cls, username, password, roles=None, + force_password=True, cmd_args=None): + # pylint: disable=too-many-arguments + """ + :param username: The name of the user. + :type username: str + :param password: The password. + :type password: str + :param roles: A list of roles. + :type roles: list + :param force_password: Force the use of the specified password. This + will bypass the password complexity check. Defaults to 'True'. + :type force_password: bool + :param cmd_args: Additional command line arguments for the + 'ac-user-create' command. + :type cmd_args: None | list[str] + """ + try: + cls._ceph_cmd(['dashboard', 'ac-user-show', username]) + cls._ceph_cmd(['dashboard', 'ac-user-delete', username]) + except CommandFailedError as ex: + if ex.exitstatus != 2: + raise ex + + user_create_args = [ + 'dashboard', 'ac-user-create', username + ] + if force_password: + user_create_args.append('--force-password') + if cmd_args: + user_create_args.extend(cmd_args) + cls._ceph_cmd_with_secret(user_create_args, password) + if roles: + set_roles_args = ['dashboard', 'ac-user-set-roles', username] + for idx, role in enumerate(roles): + if isinstance(role, str): + set_roles_args.append(role) + else: + assert isinstance(role, dict) + rolename = 'test_role_{}'.format(idx) + try: + cls._ceph_cmd(['dashboard', 'ac-role-show', rolename]) + cls._ceph_cmd(['dashboard', 'ac-role-delete', rolename]) + except CommandFailedError as ex: + if ex.exitstatus != 2: + raise ex + cls._ceph_cmd(['dashboard', 'ac-role-create', rolename]) + for mod, perms in role.items(): + args = ['dashboard', 'ac-role-add-scope-perms', rolename, mod] + args.extend(perms) + cls._ceph_cmd(args) + set_roles_args.append(rolename) + cls._ceph_cmd(set_roles_args) + + @classmethod + def create_pool(cls, name, pg_num, pool_type, application='rbd'): + data = { + 'pool': name, + 'pg_num': pg_num, + 'pool_type': pool_type, + 'application_metadata': [application] + } + if pool_type == 'erasure': + data['flags'] = ['ec_overwrites'] + cls._task_post("/api/pool", data) + + @classmethod + def login(cls, username, password, set_cookies=False): + if cls._loggedin: + cls.logout() + cls._post('/api/auth', {'username': username, + 'password': password}, set_cookies=set_cookies) + cls._assertEq(cls._resp.status_code, 201) + cls._token = cls.jsonBody()['token'] + cls._loggedin = True + + @classmethod + def logout(cls, set_cookies=False): + if cls._loggedin: + cls._post('/api/auth/logout', set_cookies=set_cookies) + cls._assertEq(cls._resp.status_code, 200) + cls._token = None + cls._loggedin = False + + @classmethod + def delete_user(cls, username, roles=None): + if roles is None: + roles = [] + cls._ceph_cmd(['dashboard', 'ac-user-delete', username]) + for idx, role in enumerate(roles): + if isinstance(role, dict): + cls._ceph_cmd(['dashboard', 'ac-role-delete', 'test_role_{}'.format(idx)]) + + @classmethod + def RunAs(cls, username, password, roles=None, force_password=True, + cmd_args=None, login=True): + # pylint: disable=too-many-arguments + def wrapper(func): + def execute(self, *args, **kwargs): + self.create_user(username, password, roles, + force_password, cmd_args) + if login: + self.login(username, password) + res = func(self, *args, **kwargs) + if login: + self.logout() + self.delete_user(username, roles) + return res + + return execute + + return wrapper + + @classmethod + def set_jwt_token(cls, token): + cls._token = token + + @classmethod + def setUpClass(cls): + super(DashboardTestCase, cls).setUpClass() + cls._assign_ports("dashboard", "ssl_server_port") + cls._load_module("dashboard") + cls.update_base_uri() + + if cls.CEPHFS: + cls.mds_cluster.clear_firewall() + + # To avoid any issues with e.g. unlink bugs, we destroy and recreate + # the filesystem rather than just doing a rm -rf of files + cls.mds_cluster.mds_stop() + cls.mds_cluster.mds_fail() + cls.mds_cluster.delete_all_filesystems() + cls.fs = None # is now invalid! + + cls.fs = cls.mds_cluster.newfs(create=True) + cls.fs.mds_restart() + + # In case some test messed with auth caps, reset them + # pylint: disable=not-an-iterable + client_mount_ids = [m.client_id for m in cls.mounts] + for client_id in client_mount_ids: + cls.mds_cluster.mon_manager.raw_cluster_cmd_result( + 'auth', 'caps', "client.{0}".format(client_id), + 'mds', 'allow', + 'mon', 'allow r', + 'osd', 'allow rw pool={0}'.format(cls.fs.get_data_pool_name())) + + # wait for mds restart to complete... + cls.fs.wait_for_daemons() + + if cls.ORCHESTRATOR: + cls._load_module("test_orchestrator") + + cmd = ['orch', 'set', 'backend', 'test_orchestrator'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd(*cmd) + + cmd = ['test_orchestrator', 'load_data', '-i', '-'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd, stdin=json.dumps( + cls.ORCHESTRATOR_TEST_DATA + )) + + cls._token = None + cls._session = requests.Session() + cls._resp = None + + cls.create_user('admin', 'admin', cls.AUTH_ROLES) + if cls.AUTO_AUTHENTICATE: + cls.login('admin', 'admin') + + @classmethod + def update_base_uri(cls): + if cls._base_uri is None: + cls._base_uri = cls._get_uri("dashboard").rstrip('/') + + def setUp(self): + super(DashboardTestCase, self).setUp() + if not self._loggedin and self.AUTO_AUTHENTICATE: + self.login('admin', 'admin') + self.wait_for_health_clear(self.TIMEOUT_HEALTH_CLEAR) + + @classmethod + def tearDownClass(cls): + super(DashboardTestCase, cls).tearDownClass() + + # pylint: disable=inconsistent-return-statements, too-many-arguments, too-many-branches + @classmethod + def _request(cls, url, method, data=None, params=None, version=DEFAULT_API_VERSION, + set_cookies=False, headers=None): + url = "{}{}".format(cls._base_uri, url) + log.debug("Request %s to %s", method, url) + if headers is None: + headers = {} + cookies = {} + if cls._token: + if set_cookies: + cookies['token'] = cls._token + else: + headers['Authorization'] = "Bearer {}".format(cls._token) + if version is None: + headers['Accept'] = 'application/json' + else: + headers['Accept'] = 'application/vnd.ceph.api.v{}+json'.format(version) + + if set_cookies: + if method == 'GET': + cls._resp = cls._session.get(url, params=params, verify=False, + headers=headers, cookies=cookies) + elif method == 'POST': + cls._resp = cls._session.post(url, json=data, params=params, + verify=False, headers=headers, cookies=cookies) + elif method == 'DELETE': + cls._resp = cls._session.delete(url, json=data, params=params, + verify=False, headers=headers, cookies=cookies) + elif method == 'PUT': + cls._resp = cls._session.put(url, json=data, params=params, + verify=False, headers=headers, cookies=cookies) + else: + assert False + else: + if method == 'GET': + cls._resp = cls._session.get(url, params=params, verify=False, + headers=headers) + elif method == 'POST': + cls._resp = cls._session.post(url, json=data, params=params, + verify=False, headers=headers) + elif method == 'DELETE': + cls._resp = cls._session.delete(url, json=data, params=params, + verify=False, headers=headers) + elif method == 'PUT': + cls._resp = cls._session.put(url, json=data, params=params, + verify=False, headers=headers) + else: + assert False + try: + if not cls._resp.ok: + # Output response for easier debugging. + log.error("Request response: %s", cls._resp.text) + content_type = cls._resp.headers['content-type'] + if re.match(r'^application/.*json', + content_type) and cls._resp.text and cls._resp.text != "": + return cls._resp.json() + return cls._resp.text + except ValueError as ex: + log.exception("Failed to decode response: %s", cls._resp.text) + raise ex + + @classmethod + def _get(cls, url, params=None, version=DEFAULT_API_VERSION, set_cookies=False, headers=None): + return cls._request(url, 'GET', params=params, version=version, + set_cookies=set_cookies, headers=headers) + + @classmethod + def _view_cache_get(cls, url, retries=5): + retry = True + while retry and retries > 0: + retry = False + res = cls._get(url, version=DEFAULT_API_VERSION) + if isinstance(res, dict): + res = [res] + for view in res: + assert 'value' in view + if not view['value']: + retry = True + retries -= 1 + if retries == 0: + raise Exception("{} view cache exceeded number of retries={}" + .format(url, retries)) + return res + + @classmethod + def _post(cls, url, data=None, params=None, version=DEFAULT_API_VERSION, set_cookies=False): + cls._request(url, 'POST', data, params, version=version, set_cookies=set_cookies) + + @classmethod + def _delete(cls, url, data=None, params=None, version=DEFAULT_API_VERSION, set_cookies=False): + cls._request(url, 'DELETE', data, params, version=version, set_cookies=set_cookies) + + @classmethod + def _put(cls, url, data=None, params=None, version=DEFAULT_API_VERSION, set_cookies=False): + cls._request(url, 'PUT', data, params, version=version, set_cookies=set_cookies) + + @classmethod + def _assertEq(cls, v1, v2): + if not v1 == v2: + raise Exception("assertion failed: {} != {}".format(v1, v2)) + + @classmethod + def _assertIn(cls, v1, v2): + if v1 not in v2: + raise Exception("assertion failed: {} not in {}".format(v1, v2)) + + @classmethod + def _assertIsInst(cls, v1, v2): + if not isinstance(v1, v2): + raise Exception("assertion failed: {} not instance of {}".format(v1, v2)) + + # pylint: disable=too-many-arguments + @classmethod + def _task_request(cls, method, url, data, timeout, version=DEFAULT_API_VERSION, + set_cookies=False): + res = cls._request(url, method, data, version=version, set_cookies=set_cookies) + cls._assertIn(cls._resp.status_code, [200, 201, 202, 204, 400, 403, 404]) + + if cls._resp.status_code == 403: + return None + + if cls._resp.status_code != 202: + log.debug("task finished immediately") + return res + + cls._assertIn('name', res) + cls._assertIn('metadata', res) + task_name = res['name'] + task_metadata = res['metadata'] + + retries = int(timeout) + res_task = None + while retries > 0 and not res_task: + retries -= 1 + log.debug("task (%s, %s) is still executing", task_name, task_metadata) + time.sleep(1) + _res = cls._get('/api/task?name={}'.format(task_name), version=version) + cls._assertEq(cls._resp.status_code, 200) + executing_tasks = [task for task in _res['executing_tasks'] if + task['metadata'] == task_metadata] + finished_tasks = [task for task in _res['finished_tasks'] if + task['metadata'] == task_metadata] + if not executing_tasks and finished_tasks: + res_task = finished_tasks[0] + + if retries <= 0: + raise Exception("Waiting for task ({}, {}) to finish timed out. {}" + .format(task_name, task_metadata, _res)) + + log.debug("task (%s, %s) finished", task_name, task_metadata) + if res_task['success']: + if method == 'POST': + cls._resp.status_code = 201 + elif method == 'PUT': + cls._resp.status_code = 200 + elif method == 'DELETE': + cls._resp.status_code = 204 + return res_task['ret_value'] + + if 'status' in res_task['exception']: + cls._resp.status_code = res_task['exception']['status'] + else: + cls._resp.status_code = 500 + return res_task['exception'] + + @classmethod + def _task_post(cls, url, data=None, timeout=60, version=DEFAULT_API_VERSION, set_cookies=False): + return cls._task_request('POST', url, data, timeout, version=version, + set_cookies=set_cookies) + + @classmethod + def _task_delete(cls, url, timeout=60, version=DEFAULT_API_VERSION, set_cookies=False): + return cls._task_request('DELETE', url, None, timeout, version=version, + set_cookies=set_cookies) + + @classmethod + def _task_put(cls, url, data=None, timeout=60, version=DEFAULT_API_VERSION, set_cookies=False): + return cls._task_request('PUT', url, data, timeout, version=version, + set_cookies=set_cookies) + + @classmethod + def cookies(cls): + return cls._resp.cookies + + @classmethod + def jsonBody(cls): + return cls._resp.json() + + @classmethod + def reset_session(cls): + cls._session = requests.Session() + + def assertSubset(self, data, biggerData): + for key, value in data.items(): + self.assertEqual(biggerData[key], value) + + def assertJsonBody(self, data): + body = self._resp.json() + self.assertEqual(body, data) + + def assertJsonSubset(self, data): + self.assertSubset(data, self._resp.json()) + + def assertSchema(self, data, schema): + try: + return _validate_json(data, schema) + except _ValError as e: + self.assertEqual(data, str(e)) + + def assertSchemaBody(self, schema): + self.assertSchema(self.jsonBody(), schema) + + def assertBody(self, body): + self.assertEqual(self._resp.text, body) + + def assertStatus(self, status): + if isinstance(status, list): + self.assertIn(self._resp.status_code, status) + else: + self.assertEqual(self._resp.status_code, status) + + def assertHeaders(self, headers): + for name, value in headers.items(): + self.assertIn(name, self._resp.headers) + self.assertEqual(self._resp.headers[name], value) + + def assertError(self, code=None, component=None, detail=None): + body = self._resp.json() + if code: + self.assertEqual(body['code'], code) + if component: + self.assertEqual(body['component'], component) + if detail: + self.assertEqual(body['detail'], detail) + + @classmethod + def _ceph_cmd(cls, cmd): + res = cls.mgr_cluster.mon_manager.raw_cluster_cmd(*cmd) + log.debug("command result: %s", res) + return res + + @classmethod + def _ceph_cmd_result(cls, cmd): + exitstatus = cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd) + log.debug("command exit status: %d", exitstatus) + return exitstatus + + @classmethod + def _ceph_cmd_with_secret(cls, cmd: List[str], secret: str, return_exit_code: bool = False): + cmd.append('-i') + cmd.append('{}'.format(cls._ceph_create_tmp_file(secret))) + if return_exit_code: + return cls._ceph_cmd_result(cmd) + return cls._ceph_cmd(cmd) + + @classmethod + def _ceph_create_tmp_file(cls, content: str) -> str: + """Create a temporary file in the remote cluster""" + file_name = ''.join(random.choices(string.ascii_letters + string.digits, k=20)) + file_path = '/tmp/{}'.format(file_name) + cls._cmd(['sh', '-c', 'echo -n {} > {}'.format(content, file_path)]) + return file_path + + def set_config_key(self, key, value): + self._ceph_cmd(['config-key', 'set', key, value]) + + def get_config_key(self, key): + return self._ceph_cmd(['config-key', 'get', key]) + + @classmethod + def _cmd(cls, args): + return cls.mgr_cluster.admin_remote.run(args=args) + + @classmethod + def _rbd_cmd(cls, cmd): + args = ['rbd'] + args.extend(cmd) + cls._cmd(args) + + @classmethod + def _radosgw_admin_cmd(cls, cmd): + args = ['radosgw-admin'] + args.extend(cmd) + cls._cmd(args) + + @classmethod + def _rados_cmd(cls, cmd): + args = ['rados'] + args.extend(cmd) + cls._cmd(args) + + @classmethod + def mons(cls): + out = cls.ceph_cluster.mon_manager.raw_cluster_cmd('quorum_status') + j = json.loads(out) + return [mon['name'] for mon in j['monmap']['mons']] + + @classmethod + def find_object_in_list(cls, key, value, iterable): + """ + Get the first occurrence of an object within a list with + the specified key/value. + :param key: The name of the key. + :param value: The value to search for. + :param iterable: The list to process. + :return: Returns the found object or None. + """ + for obj in iterable: + if key in obj and obj[key] == value: + return obj + return None + + +# TODP: pass defaults=(False,) to namedtuple() if python3.7 +class JLeaf(namedtuple('JLeaf', ['typ', 'none'])): + def __new__(cls, typ, none=False): + return super().__new__(cls, typ, none) + + +JList = namedtuple('JList', ['elem_typ']) + +JTuple = namedtuple('JTuple', ['elem_typs']) + +JUnion = namedtuple('JUnion', ['elem_typs']) + + +class JObj(namedtuple('JObj', ['sub_elems', 'allow_unknown', 'none', 'unknown_schema'])): + def __new__(cls, sub_elems, allow_unknown=False, none=False, unknown_schema=None): + """ + :type sub_elems: dict[str, JAny | JLeaf | JList | JObj | type] + :type allow_unknown: bool + :type none: bool + :type unknown_schema: int, str, JAny | JLeaf | JList | JObj + :return: + """ + return super(JObj, cls).__new__(cls, sub_elems, allow_unknown, none, unknown_schema) + + +JAny = namedtuple('JAny', ['none']) + +module_options_object_schema = JObj({ + 'name': str, + 'type': str, + 'level': str, + 'flags': int, + 'default_value': JAny(none=True), + 'min': JAny(none=False), + 'max': JAny(none=False), + 'enum_allowed': JList(str), + 'see_also': JList(str), + 'desc': str, + 'long_desc': str, + 'tags': JList(str), +}) + +module_options_schema = JObj( + {}, + allow_unknown=True, + unknown_schema=module_options_object_schema) + +addrvec_schema = JList(JObj({ + 'addr': str, + 'nonce': int, + 'type': str +})) + +devices_schema = JList(JObj({ + 'daemons': JList(str), + 'devid': str, + 'location': JList(JObj({ + 'host': str, + 'dev': str, + 'path': str + })) +}, allow_unknown=True)) + + +class _ValError(Exception): + def __init__(self, msg, path): + path_str = ''.join('[{}]'.format(repr(p)) for p in path) + super(_ValError, self).__init__('In `input{}`: {}'.format(path_str, msg)) + + +# pylint: disable=dangerous-default-value,inconsistent-return-statements,too-many-branches +def _validate_json(val, schema, path=[]): + """ + >>> d = {'a': 1, 'b': 'x', 'c': range(10)} + ... ds = JObj({'a': int, 'b': str, 'c': JList(int)}) + ... _validate_json(d, ds) + True + >>> _validate_json({'num': 1}, JObj({'num': JUnion([int,float])})) + True + >>> _validate_json({'num': 'a'}, JObj({'num': JUnion([int,float])})) + False + """ + if isinstance(schema, JAny): + if not schema.none and val is None: + raise _ValError('val is None', path) + return True + if isinstance(schema, JLeaf): + if schema.none and val is None: + return True + if not isinstance(val, schema.typ): + raise _ValError('val not of type {}'.format(schema.typ), path) + return True + if isinstance(schema, JList): + if not isinstance(val, list): + raise _ValError('val="{}" is not a list'.format(val), path) + return all(_validate_json(e, schema.elem_typ, path + [i]) for i, e in enumerate(val)) + if isinstance(schema, JTuple): + return all(_validate_json(val[i], typ, path + [i]) + for i, typ in enumerate(schema.elem_typs)) + if isinstance(schema, JUnion): + for typ in schema.elem_typs: + try: + if _validate_json(val, typ, path): + return True + except _ValError: + pass + return False + if isinstance(schema, JObj): + if val is None and schema.none: + return True + if val is None: + raise _ValError('val is None', path) + if not hasattr(val, 'keys'): + raise _ValError('val="{}" is not a dict'.format(val), path) + missing_keys = set(schema.sub_elems.keys()).difference(set(val.keys())) + if missing_keys: + raise _ValError('missing keys: {}'.format(missing_keys), path) + unknown_keys = set(val.keys()).difference(set(schema.sub_elems.keys())) + if not schema.allow_unknown and unknown_keys: + raise _ValError('unknown keys: {}'.format(unknown_keys), path) + result = all( + _validate_json(val[key], sub_schema, path + [key]) + for key, sub_schema in schema.sub_elems.items() + ) + if unknown_keys and schema.allow_unknown and schema.unknown_schema: + result += all( + _validate_json(val[key], schema.unknown_schema, path + [key]) + for key in unknown_keys + ) + return result + if schema in [str, int, float, bool]: + return _validate_json(val, JLeaf(schema), path) + + assert False, str(path) diff --git a/qa/tasks/mgr/dashboard/test_api.py b/qa/tasks/mgr/dashboard/test_api.py new file mode 100644 index 000000000..22f235698 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_api.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +import unittest + +from . import DEFAULT_API_VERSION +from .helper import DashboardTestCase + + +class VersionReqTest(DashboardTestCase, unittest.TestCase): + def test_version(self): + for (version, expected_status) in [ + (DEFAULT_API_VERSION, 200), + (None, 415), + ("99.99", 415) + ]: + with self.subTest(version=version): + self._get('/api/summary', version=version) + self.assertStatus(expected_status) diff --git a/qa/tasks/mgr/dashboard/test_auth.py b/qa/tasks/mgr/dashboard/test_auth.py new file mode 100644 index 000000000..a2266229b --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_auth.py @@ -0,0 +1,352 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +import time + +import jwt +from teuthology.orchestra.run import \ + CommandFailedError # pylint: disable=import-error + +from .helper import DashboardTestCase, JLeaf, JObj + + +class AuthTest(DashboardTestCase): + + AUTO_AUTHENTICATE = False + + def setUp(self): + super(AuthTest, self).setUp() + self.reset_session() + + def _validate_jwt_token(self, token, username, permissions): + payload = jwt.decode(token, options={'verify_signature': False}) + self.assertIn('username', payload) + self.assertEqual(payload['username'], username) + + for scope, perms in permissions.items(): + self.assertIsNotNone(scope) + self.assertIn('read', perms) + self.assertIn('update', perms) + self.assertIn('create', perms) + self.assertIn('delete', perms) + + def test_login_without_password(self): + with self.assertRaises(CommandFailedError): + self.create_user('admin2', '', ['administrator'], force_password=True) + + def test_a_set_login_credentials(self): + # test with Authorization header + self.create_user('admin2', 'admin2', ['administrator']) + self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'}) + self.assertStatus(201) + data = self.jsonBody() + self._validate_jwt_token(data['token'], "admin2", data['permissions']) + self.delete_user('admin2') + + # test with Cookies set + self.create_user('admin2', 'admin2', ['administrator']) + self._post("/api/auth", {'username': 'admin2', 'password': 'admin2'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self._validate_jwt_token(data['token'], "admin2", data['permissions']) + self.delete_user('admin2') + + def test_login_valid(self): + # test with Authorization header + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'token': JLeaf(str), + 'username': JLeaf(str), + 'permissions': JObj(sub_elems={}, allow_unknown=True), + 'sso': JLeaf(bool), + 'pwdExpirationDate': JLeaf(int, none=True), + 'pwdUpdateRequired': JLeaf(bool) + }, allow_unknown=False)) + self._validate_jwt_token(data['token'], "admin", data['permissions']) + + # test with Cookies set + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'token': JLeaf(str), + 'username': JLeaf(str), + 'permissions': JObj(sub_elems={}, allow_unknown=True), + 'sso': JLeaf(bool), + 'pwdExpirationDate': JLeaf(int, none=True), + 'pwdUpdateRequired': JLeaf(bool) + }, allow_unknown=False)) + self._validate_jwt_token(data['token'], "admin", data['permissions']) + + def test_login_invalid(self): + # test with Authorization header + self._post("/api/auth", {'username': 'admin', 'password': 'inval'}) + self.assertStatus(400) + self.assertJsonBody({ + "component": "auth", + "code": "invalid_credentials", + "detail": "Invalid credentials" + }) + + def test_lockout_user(self): + # test with Authorization header + self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3']) + for _ in range(3): + self._post("/api/auth", {'username': 'admin', 'password': 'inval'}) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(400) + self.assertJsonBody({ + "component": "auth", + "code": "invalid_credentials", + "detail": "Invalid credentials" + }) + self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'token': JLeaf(str), + 'username': JLeaf(str), + 'permissions': JObj(sub_elems={}, allow_unknown=True), + 'sso': JLeaf(bool), + 'pwdExpirationDate': JLeaf(int, none=True), + 'pwdUpdateRequired': JLeaf(bool) + }, allow_unknown=False)) + self._validate_jwt_token(data['token'], "admin", data['permissions']) + + # test with Cookies set + self._ceph_cmd(['dashboard', 'set-account-lockout-attempts', '3']) + for _ in range(3): + self._post("/api/auth", {'username': 'admin', 'password': 'inval'}, set_cookies=True) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(400) + self.assertJsonBody({ + "component": "auth", + "code": "invalid_credentials", + "detail": "Invalid credentials" + }) + self._ceph_cmd(['dashboard', 'ac-user-enable', 'admin']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'token': JLeaf(str), + 'username': JLeaf(str), + 'permissions': JObj(sub_elems={}, allow_unknown=True), + 'sso': JLeaf(bool), + 'pwdExpirationDate': JLeaf(int, none=True), + 'pwdUpdateRequired': JLeaf(bool) + }, allow_unknown=False)) + self._validate_jwt_token(data['token'], "admin", data['permissions']) + + def test_logout(self): + # test with Authorization header + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + data = self.jsonBody() + self._validate_jwt_token(data['token'], "admin", data['permissions']) + self.set_jwt_token(data['token']) + self._post("/api/auth/logout") + self.assertStatus(200) + self.assertJsonBody({ + "redirect_url": "#/login" + }) + self._get("/api/host", version='1.1') + self.assertStatus(401) + self.set_jwt_token(None) + + # test with Cookies set + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + data = self.jsonBody() + self._validate_jwt_token(data['token'], "admin", data['permissions']) + self.set_jwt_token(data['token']) + self._post("/api/auth/logout", set_cookies=True) + self.assertStatus(200) + self.assertJsonBody({ + "redirect_url": "#/login" + }) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + self.set_jwt_token(None) + + def test_token_ttl(self): + # test with Authorization header + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", version='1.1') + self.assertStatus(200) + time.sleep(6) + self._get("/api/host", version='1.1') + self.assertStatus(401) + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800']) + self.set_jwt_token(None) + + # test with Cookies set + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(200) + time.sleep(6) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800']) + self.set_jwt_token(None) + + def test_remove_from_blocklist(self): + # test with Authorization header + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + # the following call adds the token to the blocklist + self._post("/api/auth/logout") + self.assertStatus(200) + self._get("/api/host", version='1.1') + self.assertStatus(401) + time.sleep(6) + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800']) + self.set_jwt_token(None) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + # the following call removes expired tokens from the blocklist + self._post("/api/auth/logout") + self.assertStatus(200) + + # test with Cookies set + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '5']) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + # the following call adds the token to the blocklist + self._post("/api/auth/logout", set_cookies=True) + self.assertStatus(200) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + time.sleep(6) + self._ceph_cmd(['dashboard', 'set-jwt-token-ttl', '28800']) + self.set_jwt_token(None) + self._post("/api/auth", {'username': 'admin', 'password': 'admin'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + # the following call removes expired tokens from the blocklist + self._post("/api/auth/logout", set_cookies=True) + self.assertStatus(200) + + def test_unauthorized(self): + # test with Authorization header + self._get("/api/host", version='1.1') + self.assertStatus(401) + + # test with Cookies set + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + + def test_invalidate_token_by_admin(self): + # test with Authorization header + self._get("/api/host", version='1.1') + self.assertStatus(401) + self.create_user('user', 'user', ['read-only']) + time.sleep(1) + self._post("/api/auth", {'username': 'user', 'password': 'user'}) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", version='1.1') + self.assertStatus(200) + time.sleep(1) + self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', '--force-password', + 'user'], + 'user2') + time.sleep(1) + self._get("/api/host", version='1.1') + self.assertStatus(401) + self.set_jwt_token(None) + self._post("/api/auth", {'username': 'user', 'password': 'user2'}) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", version='1.1') + self.assertStatus(200) + self.delete_user("user") + + # test with Cookies set + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + self.create_user('user', 'user', ['read-only']) + time.sleep(1) + self._post("/api/auth", {'username': 'user', 'password': 'user'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(200) + time.sleep(1) + self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', '--force-password', + 'user'], + 'user2') + time.sleep(1) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(401) + self.set_jwt_token(None) + self._post("/api/auth", {'username': 'user', 'password': 'user2'}, set_cookies=True) + self.assertStatus(201) + self.set_jwt_token(self.jsonBody()['token']) + self._get("/api/host", set_cookies=True, version='1.1') + self.assertStatus(200) + self.delete_user("user") + + def test_check_token(self): + # test with Authorization header + self.login("admin", "admin") + self._post("/api/auth/check", {"token": self.jsonBody()["token"]}) + self.assertStatus(200) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + "username": JLeaf(str), + "permissions": JObj(sub_elems={}, allow_unknown=True), + "sso": JLeaf(bool), + "pwdUpdateRequired": JLeaf(bool) + }, allow_unknown=False)) + self.logout() + + # test with Cookies set + self.login("admin", "admin", set_cookies=True) + self._post("/api/auth/check", {"token": self.jsonBody()["token"]}, set_cookies=True) + self.assertStatus(200) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + "username": JLeaf(str), + "permissions": JObj(sub_elems={}, allow_unknown=True), + "sso": JLeaf(bool), + "pwdUpdateRequired": JLeaf(bool) + }, allow_unknown=False)) + self.logout(set_cookies=True) + + def test_check_wo_token(self): + # test with Authorization header + self.login("admin", "admin") + self._post("/api/auth/check", {"token": ""}) + self.assertStatus(200) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + "login_url": JLeaf(str), + "cluster_status": JLeaf(str) + }, allow_unknown=False)) + self.logout() + + # test with Cookies set + self.login("admin", "admin", set_cookies=True) + self._post("/api/auth/check", {"token": ""}, set_cookies=True) + self.assertStatus(200) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + "login_url": JLeaf(str), + "cluster_status": JLeaf(str) + }, allow_unknown=False)) + self.logout(set_cookies=True) diff --git a/qa/tasks/mgr/dashboard/test_cephfs.py b/qa/tasks/mgr/dashboard/test_cephfs.py new file mode 100644 index 000000000..4295b580f --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_cephfs.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-public-methods + +from contextlib import contextmanager + +from .helper import DashboardTestCase, JLeaf, JList, JObj + + +class CephfsTest(DashboardTestCase): + CEPHFS = True + + AUTH_ROLES = ['cephfs-manager'] + + QUOTA_PATH = '/quotas' + + def assertToHave(self, data, key): + self.assertIn(key, data) + self.assertIsNotNone(data[key]) + + def get_fs_id(self): + return self.fs.get_namespace_id() + + def mk_dirs(self, path, expectedStatus=200): + self._post("/api/cephfs/{}/tree".format(self.get_fs_id()), + params={'path': path}) + self.assertStatus(expectedStatus) + + def rm_dir(self, path, expectedStatus=200): + self._delete("/api/cephfs/{}/tree".format(self.get_fs_id()), + params={'path': path}) + self.assertStatus(expectedStatus) + + def get_root_directory(self, expectedStatus=200): + data = self._get("/api/cephfs/{}/get_root_directory".format(self.get_fs_id())) + self.assertStatus(expectedStatus) + self.assertIsInstance(data, dict) + return data + + def ls_dir(self, path, expectedLength, depth=None): + return self._ls_dir(path, expectedLength, depth, "api") + + def ui_ls_dir(self, path, expectedLength, depth=None): + return self._ls_dir(path, expectedLength, depth, "ui-api") + + def _ls_dir(self, path, expectedLength, depth, baseApiPath): + params = {'path': path} + if depth is not None: + params['depth'] = depth + data = self._get("/{}/cephfs/{}/ls_dir".format(baseApiPath, self.get_fs_id()), + params=params) + self.assertStatus(200) + self.assertIsInstance(data, list) + self.assertEqual(len(data), expectedLength) + return data + + def set_quotas(self, max_bytes=None, max_files=None): + quotas = { + 'max_bytes': max_bytes, + 'max_files': max_files + } + self._put("/api/cephfs/{}/quota".format(self.get_fs_id()), data=quotas, + params={'path': self.QUOTA_PATH}) + self.assertStatus(200) + + def assert_quotas(self, max_bytes, files): + data = self.ls_dir('/', 1)[0] + self.assertEqual(data['quotas']['max_bytes'], max_bytes) + self.assertEqual(data['quotas']['max_files'], files) + + @contextmanager + def new_quota_dir(self): + self.mk_dirs(self.QUOTA_PATH) + self.set_quotas(1024 ** 3, 1024) + yield 1 + self.rm_dir(self.QUOTA_PATH) + + @DashboardTestCase.RunAs('test', 'test', ['block-manager']) + def test_access_permissions(self): + fs_id = self.get_fs_id() + self._get("/api/cephfs/{}/clients".format(fs_id)) + self.assertStatus(403) + self._get("/api/cephfs/{}".format(fs_id)) + self.assertStatus(403) + self._get("/api/cephfs/{}/mds_counters".format(fs_id)) + self.assertStatus(403) + self._get("/ui-api/cephfs/{}/tabs".format(fs_id)) + self.assertStatus(403) + + def test_cephfs_clients(self): + fs_id = self.get_fs_id() + data = self._get("/api/cephfs/{}/clients".format(fs_id)) + self.assertStatus(200) + + self.assertIn('status', data) + self.assertIn('data', data) + + def test_cephfs_evict_client_does_not_exist(self): + fs_id = self.get_fs_id() + self._delete("/api/cephfs/{}/client/1234".format(fs_id)) + self.assertStatus(404) + + def test_cephfs_evict_invalid_client_id(self): + fs_id = self.get_fs_id() + self._delete("/api/cephfs/{}/client/xyz".format(fs_id)) + self.assertStatus(400) + self.assertJsonBody({ + "component": 'cephfs', + "code": "invalid_cephfs_client_id", + "detail": "Invalid cephfs client ID xyz" + }) + + def test_cephfs_get(self): + fs_id = self.get_fs_id() + data = self._get("/api/cephfs/{}/".format(fs_id)) + self.assertStatus(200) + + self.assertToHave(data, 'cephfs') + self.assertToHave(data, 'standbys') + self.assertToHave(data, 'versions') + + def test_cephfs_mds_counters(self): + fs_id = self.get_fs_id() + data = self._get("/api/cephfs/{}/mds_counters".format(fs_id)) + self.assertStatus(200) + + self.assertIsInstance(data, dict) + self.assertIsNotNone(data) + + def test_cephfs_mds_counters_wrong(self): + self._get("/api/cephfs/baadbaad/mds_counters") + self.assertStatus(400) + self.assertJsonBody({ + "component": 'cephfs', + "code": "invalid_cephfs_id", + "detail": "Invalid cephfs ID baadbaad" + }) + + def test_cephfs_list(self): + data = self._get("/api/cephfs/") + self.assertStatus(200) + + self.assertIsInstance(data, list) + cephfs = data[0] + self.assertToHave(cephfs, 'id') + self.assertToHave(cephfs, 'mdsmap') + + def test_cephfs_get_quotas(self): + fs_id = self.get_fs_id() + data = self._get("/api/cephfs/{}/quota?path=/".format(fs_id)) + self.assertStatus(200) + self.assertSchema(data, JObj({ + 'max_bytes': int, + 'max_files': int + })) + + def test_cephfs_tabs(self): + fs_id = self.get_fs_id() + data = self._get("/ui-api/cephfs/{}/tabs".format(fs_id)) + self.assertStatus(200) + self.assertIsInstance(data, dict) + + # Pools + pools = data['pools'] + self.assertIsInstance(pools, list) + self.assertGreater(len(pools), 0) + for pool in pools: + self.assertEqual(pool['size'], pool['used'] + pool['avail']) + + # Ranks + self.assertToHave(data, 'ranks') + self.assertIsInstance(data['ranks'], list) + + # Name + self.assertToHave(data, 'name') + self.assertIsInstance(data['name'], str) + + # Standbys + self.assertToHave(data, 'standbys') + self.assertIsInstance(data['standbys'], str) + + # MDS counters + counters = data['mds_counters'] + self.assertIsInstance(counters, dict) + self.assertGreater(len(counters.keys()), 0) + for k, v in counters.items(): + self.assertEqual(v['name'], k) + + # Clients + self.assertToHave(data, 'clients') + clients = data['clients'] + self.assertToHave(clients, 'data') + self.assertIsInstance(clients['data'], list) + self.assertToHave(clients, 'status') + self.assertIsInstance(clients['status'], int) + + def test_ls_mk_rm_dir(self): + self.ls_dir('/', 0) + + self.mk_dirs('/pictures/birds') + self.ls_dir('/', 2, 3) + self.ls_dir('/pictures', 1) + + self.rm_dir('/pictures', 500) + self.rm_dir('/pictures/birds') + self.rm_dir('/pictures') + + self.ls_dir('/', 0) + + def test_snapshots(self): + fs_id = self.get_fs_id() + self.mk_dirs('/movies/dune/extended_version') + + self._post("/api/cephfs/{}/snapshot".format(fs_id), + params={'path': '/movies/dune', 'name': 'test'}) + self.assertStatus(200) + + data = self.ls_dir('/movies', 1) + self.assertSchema(data[0], JObj(sub_elems={ + 'name': JLeaf(str), + 'path': JLeaf(str), + 'parent': JLeaf(str), + 'snapshots': JList(JObj(sub_elems={ + 'name': JLeaf(str), + 'path': JLeaf(str), + 'created': JLeaf(str) + })), + 'quotas': JObj(sub_elems={ + 'max_bytes': JLeaf(int), + 'max_files': JLeaf(int) + }) + })) + snapshots = data[0]['snapshots'] + self.assertEqual(len(snapshots), 1) + snapshot = snapshots[0] + self.assertEqual(snapshot['name'], "test") + self.assertEqual(snapshot['path'], "/movies/dune/.snap/test") + + # Should have filtered out "_test_$timestamp" + data = self.ls_dir('/movies/dune', 1) + snapshots = data[0]['snapshots'] + self.assertEqual(len(snapshots), 0) + + self._delete("/api/cephfs/{}/snapshot".format(fs_id), + params={'path': '/movies/dune', 'name': 'test'}) + self.assertStatus(200) + + data = self.ls_dir('/movies', 1) + self.assertEqual(len(data[0]['snapshots']), 0) + + # Cleanup. Note, the CephFS Python extension (and therefor the Dashboard + # REST API) does not support recursive deletion of a directory. + self.rm_dir('/movies/dune/extended_version') + self.rm_dir('/movies/dune') + self.rm_dir('/movies') + + def test_quotas_default(self): + self.mk_dirs(self.QUOTA_PATH) + self.assert_quotas(0, 0) + self.rm_dir(self.QUOTA_PATH) + + def test_quotas_set_both(self): + with self.new_quota_dir(): + self.assert_quotas(1024 ** 3, 1024) + + def test_quotas_set_only_bytes(self): + with self.new_quota_dir(): + self.set_quotas(2048 ** 3) + self.assert_quotas(2048 ** 3, 1024) + + def test_quotas_set_only_files(self): + with self.new_quota_dir(): + self.set_quotas(None, 2048) + self.assert_quotas(1024 ** 3, 2048) + + def test_quotas_unset_both(self): + with self.new_quota_dir(): + self.set_quotas(0, 0) + self.assert_quotas(0, 0) + + def test_listing_of_root_dir(self): + self.ls_dir('/', 0) # Should not list root + ui_root = self.ui_ls_dir('/', 1)[0] # Should list root by default + root = self.get_root_directory() + self.assertEqual(ui_root, root) + + def test_listing_of_ui_api_ls_on_deeper_levels(self): + # The UI-API and API ls_dir methods should behave the same way on deeper levels + self.mk_dirs('/pictures') + api_ls = self.ls_dir('/pictures', 0) + ui_api_ls = self.ui_ls_dir('/pictures', 0) + self.assertEqual(api_ls, ui_api_ls) + self.rm_dir('/pictures') diff --git a/qa/tasks/mgr/dashboard/test_cluster.py b/qa/tasks/mgr/dashboard/test_cluster.py new file mode 100644 index 000000000..14f854279 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_cluster.py @@ -0,0 +1,23 @@ +from .helper import DashboardTestCase, JLeaf, JObj + + +class ClusterTest(DashboardTestCase): + + def setUp(self): + super().setUp() + self.reset_session() + + def test_get_status(self): + data = self._get('/api/cluster', version='0.1') + self.assertStatus(200) + self.assertSchema(data, JObj(sub_elems={ + "status": JLeaf(str) + }, allow_unknown=False)) + + def test_update_status(self): + req = {'status': 'POST_INSTALLED'} + self._put('/api/cluster', req, version='0.1') + self.assertStatus(200) + data = self._get('/api/cluster', version='0.1') + self.assertStatus(200) + self.assertEqual(data, req) diff --git a/qa/tasks/mgr/dashboard/test_cluster_configuration.py b/qa/tasks/mgr/dashboard/test_cluster_configuration.py new file mode 100644 index 000000000..9c8245d23 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_cluster_configuration.py @@ -0,0 +1,398 @@ +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class ClusterConfigurationTest(DashboardTestCase): + + def test_list(self): + data = self._get('/api/cluster_conf') + self.assertStatus(200) + self.assertIsInstance(data, list) + self.assertGreater(len(data), 1000) + for conf in data: + self._validate_single(conf) + + def test_get(self): + data = self._get('/api/cluster_conf/admin_socket') + self.assertStatus(200) + self._validate_single(data) + self.assertIn('enum_values', data) + + data = self._get('/api/cluster_conf/fantasy_name') + self.assertStatus(404) + + def test_get_specific_db_config_option(self): + config_name = 'mon_allow_pool_delete' + + orig_value = self._get_config_by_name(config_name) + + self._ceph_cmd(['config', 'set', 'mon', config_name, 'true']) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + [{'section': 'mon', 'value': 'true'}], + timeout=30, + period=1) + + self._ceph_cmd(['config', 'set', 'mon', config_name, 'false']) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + [{'section': 'mon', 'value': 'false'}], + timeout=30, + period=1) + + # restore value + if orig_value: + self._ceph_cmd(['config', 'set', 'mon', config_name, orig_value[0]['value']]) + + def test_filter_config_options(self): + config_names = ['osd_scrub_during_recovery', 'osd_scrub_begin_hour', 'osd_scrub_end_hour'] + data = self._get('/api/cluster_conf/filter?names={}'.format(','.join(config_names))) + self.assertStatus(200) + self.assertIsInstance(data, list) + self.assertEqual(len(data), 3) + for conf in data: + self._validate_single(conf) + self.assertIn(conf['name'], config_names) + + def test_filter_config_options_empty_names(self): + self._get('/api/cluster_conf/filter?names=') + self.assertStatus(404) + self.assertEqual(self._resp.json()['detail'], 'Config options `` not found') + + def test_filter_config_options_unknown_name(self): + self._get('/api/cluster_conf/filter?names=abc') + self.assertStatus(404) + self.assertEqual(self._resp.json()['detail'], 'Config options `abc` not found') + + def test_filter_config_options_contains_unknown_name(self): + config_names = ['osd_scrub_during_recovery', 'osd_scrub_begin_hour', 'abc'] + data = self._get('/api/cluster_conf/filter?names={}'.format(','.join(config_names))) + self.assertStatus(200) + self.assertIsInstance(data, list) + self.assertEqual(len(data), 2) + for conf in data: + self._validate_single(conf) + self.assertIn(conf['name'], config_names) + + def test_create(self): + config_name = 'debug_ms' + orig_value = self._get_config_by_name(config_name) + + # remove all existing settings for equal preconditions + self._clear_all_values_for_config_option(config_name) + + expected_result = [{'section': 'mon', 'value': '0/3'}] + + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': expected_result + }) + self.assertStatus(201) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + expected_result, + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_value) + + def test_delete(self): + config_name = 'debug_ms' + orig_value = self._get_config_by_name(config_name) + + # set a config option + expected_result = [{'section': 'mon', 'value': '0/3'}] + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': expected_result + }) + self.assertStatus(201) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + expected_result, + timeout=30, + period=1) + + # delete it and check if it's deleted + self._delete('/api/cluster_conf/{}?section={}'.format(config_name, 'mon')) + self.assertStatus(204) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + None, + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_value) + + def test_create_cant_update_at_runtime(self): + config_name = 'public_bind_addr' # not updatable + config_value = [{'section': 'global', 'value': 'true'}] + orig_value = self._get_config_by_name(config_name) + + # try to set config option and check if it fails + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': config_value + }) + self.assertStatus(400) + self.assertError(code='config_option_not_updatable_at_runtime', + component='cluster_configuration', + detail='Config option {} is/are not updatable at runtime'.format( + config_name)) + + # check if config option value is still the original one + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + orig_value, + timeout=30, + period=1) + + def test_create_two_values(self): + config_name = 'debug_ms' + orig_value = self._get_config_by_name(config_name) + + # remove all existing settings for equal preconditions + self._clear_all_values_for_config_option(config_name) + + expected_result = [{'section': 'mon', 'value': '0/3'}, + {'section': 'osd', 'value': '0/5'}] + + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': expected_result + }) + self.assertStatus(201) + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + expected_result, + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_value) + + def test_create_can_handle_none_values(self): + config_name = 'debug_ms' + orig_value = self._get_config_by_name(config_name) + + # remove all existing settings for equal preconditions + self._clear_all_values_for_config_option(config_name) + + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': [{'section': 'mon', 'value': '0/3'}, + {'section': 'osd', 'value': None}] + }) + self.assertStatus(201) + + expected_result = [{'section': 'mon', 'value': '0/3'}] + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + expected_result, + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_value) + + def test_create_can_handle_boolean_values(self): + config_name = 'mon_allow_pool_delete' + orig_value = self._get_config_by_name(config_name) + + # remove all existing settings for equal preconditions + self._clear_all_values_for_config_option(config_name) + + expected_result = [{'section': 'mon', 'value': 'true'}] + + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': [{'section': 'mon', 'value': True}]}) + self.assertStatus(201) + + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + expected_result, + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_value) + + def test_bulk_set(self): + expected_result = { + 'osd_max_backfills': {'section': 'osd', 'value': '1'}, + 'osd_recovery_max_active': {'section': 'osd', 'value': '3'}, + 'osd_recovery_max_single_start': {'section': 'osd', 'value': '1'}, + 'osd_recovery_sleep': {'section': 'osd', 'value': '2.000000'} + } + orig_values = dict() + + for config_name in expected_result: + orig_values[config_name] = self._get_config_by_name(config_name) + + # remove all existing settings for equal preconditions + self._clear_all_values_for_config_option(config_name) + + self._put('/api/cluster_conf', {'options': expected_result}) + self.assertStatus(200) + + for config_name, value in expected_result.items(): + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + [value], + timeout=30, + period=1) + + # reset original value + self._clear_all_values_for_config_option(config_name) + self._reset_original_values(config_name, orig_values[config_name]) + + def test_bulk_set_cant_update_at_runtime(self): + config_options = { + 'public_bind_addr': {'section': 'global', 'value': '1.2.3.4:567'}, # not updatable + 'public_network': {'section': 'global', 'value': '10.0.0.0/8'} # not updatable + } + orig_values = dict() + + for config_name in config_options: + orig_values[config_name] = self._get_config_by_name(config_name) + + # try to set config options and see if it fails + self._put('/api/cluster_conf', {'options': config_options}) + self.assertStatus(400) + self.assertError(code='config_option_not_updatable_at_runtime', + component='cluster_configuration', + detail='Config option {} is/are not updatable at runtime'.format( + ', '.join(config_options.keys()))) + + # check if config option values are still the original ones + for config_name, value in orig_values.items(): + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + value, + timeout=30, + period=1) + + def test_bulk_set_cant_update_at_runtime_partial(self): + config_options = { + 'public_bind_addr': {'section': 'global', 'value': 'true'}, # not updatable + 'log_to_stderr': {'section': 'global', 'value': 'true'} # updatable + } + orig_values = dict() + + for config_name in config_options: + orig_values[config_name] = self._get_config_by_name(config_name) + + # try to set config options and see if it fails + self._put('/api/cluster_conf', {'options': config_options}) + self.assertStatus(400) + self.assertError(code='config_option_not_updatable_at_runtime', + component='cluster_configuration', + detail='Config option {} is/are not updatable at runtime'.format( + 'public_bind_addr')) + + # check if config option values are still the original ones + for config_name, value in orig_values.items(): + self.wait_until_equal( + lambda: self._get_config_by_name(config_name), + value, + timeout=30, + period=1) + + def test_check_existence(self): + """ + This test case is intended to check the existence of all hard coded config options used by + the dashboard. + If you include further hard coded options in the dashboard, feel free to add them to the + list. + """ + hard_coded_options = [ + 'osd_max_backfills', # osd-recv-speed + 'osd_recovery_max_active', # osd-recv-speed + 'osd_recovery_max_single_start', # osd-recv-speed + 'osd_recovery_sleep', # osd-recv-speed + 'osd_scrub_during_recovery', # osd-pg-scrub + 'osd_scrub_begin_hour', # osd-pg-scrub + 'osd_scrub_end_hour', # osd-pg-scrub + 'osd_scrub_begin_week_day', # osd-pg-scrub + 'osd_scrub_end_week_day', # osd-pg-scrub + 'osd_scrub_min_interval', # osd-pg-scrub + 'osd_scrub_max_interval', # osd-pg-scrub + 'osd_deep_scrub_interval', # osd-pg-scrub + 'osd_scrub_auto_repair', # osd-pg-scrub + 'osd_max_scrubs', # osd-pg-scrub + 'osd_scrub_priority', # osd-pg-scrub + 'osd_scrub_sleep', # osd-pg-scrub + 'osd_scrub_auto_repair_num_errors', # osd-pg-scrub + 'osd_debug_deep_scrub_sleep', # osd-pg-scrub + 'osd_deep_scrub_keys', # osd-pg-scrub + 'osd_deep_scrub_large_omap_object_key_threshold', # osd-pg-scrub + 'osd_deep_scrub_large_omap_object_value_sum_threshold', # osd-pg-scrub + 'osd_deep_scrub_randomize_ratio', # osd-pg-scrub + 'osd_deep_scrub_stride', # osd-pg-scrub + 'osd_deep_scrub_update_digest_min_age', # osd-pg-scrub + 'osd_requested_scrub_priority', # osd-pg-scrub + 'osd_scrub_backoff_ratio', # osd-pg-scrub + 'osd_scrub_chunk_max', # osd-pg-scrub + 'osd_scrub_chunk_min', # osd-pg-scrub + 'osd_scrub_cost', # osd-pg-scrub + 'osd_scrub_interval_randomize_ratio', # osd-pg-scrub + 'osd_scrub_invalid_stats', # osd-pg-scrub + 'osd_scrub_load_threshold', # osd-pg-scrub + 'osd_scrub_max_preemptions', # osd-pg-scrub + 'mon_allow_pool_delete' # pool-list + ] + + for config_option in hard_coded_options: + self._get('/api/cluster_conf/{}'.format(config_option)) + self.assertStatus(200) + + def _validate_single(self, data): + self.assertIn('name', data) + self.assertIn('daemon_default', data) + self.assertIn('long_desc', data) + self.assertIn('level', data) + self.assertIn('default', data) + self.assertIn('see_also', data) + self.assertIn('tags', data) + self.assertIn('min', data) + self.assertIn('max', data) + self.assertIn('services', data) + self.assertIn('type', data) + self.assertIn('desc', data) + self.assertIn(data['type'], ['str', 'bool', 'float', 'int', 'size', 'uint', 'addr', + 'addrvec', 'uuid', 'secs', 'millisecs']) + + if 'value' in data: + self.assertIn('source', data) + self.assertIsInstance(data['value'], list) + + for entry in data['value']: + self.assertIsInstance(entry, dict) + self.assertIn('section', entry) + self.assertIn('value', entry) + + def _get_config_by_name(self, conf_name): + data = self._get('/api/cluster_conf/{}'.format(conf_name)) + if 'value' in data: + return data['value'] + return None + + def _clear_all_values_for_config_option(self, config_name): + values = self._get_config_by_name(config_name) + if values: + for value in values: + self._ceph_cmd(['config', 'rm', value['section'], config_name]) + + def _reset_original_values(self, config_name, orig_values): + if orig_values: + for value in orig_values: + self._ceph_cmd(['config', 'set', value['section'], config_name, value['value']]) diff --git a/qa/tasks/mgr/dashboard/test_crush_rule.py b/qa/tasks/mgr/dashboard/test_crush_rule.py new file mode 100644 index 000000000..aa2250b1d --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_crush_rule.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from .helper import DashboardTestCase, JList, JObj + + +class CrushRuleTest(DashboardTestCase): + + AUTH_ROLES = ['pool-manager'] + + rule_schema = JObj(sub_elems={ + 'rule_id': int, + 'rule_name': str, + 'steps': JList(JObj({}, allow_unknown=True)) + }, allow_unknown=True) + + def create_and_delete_rule(self, data): + name = data['name'] + # Creates rule + self._post('/api/crush_rule', data) + self.assertStatus(201) + # Makes sure rule exists + rule = self._get('/api/crush_rule/{}'.format(name), version='2.0') + self.assertStatus(200) + self.assertSchemaBody(self.rule_schema) + self.assertEqual(rule['rule_name'], name) + # Deletes rule + self._delete('/api/crush_rule/{}'.format(name)) + self.assertStatus(204) + + @DashboardTestCase.RunAs('test', 'test', ['rgw-manager']) + def test_read_access_permissions(self): + self._get('/api/crush_rule', version='2.0') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', ['read-only']) + def test_write_access_permissions(self): + self._get('/api/crush_rule', version='2.0') + self.assertStatus(200) + data = {'name': 'some_rule', 'root': 'default', 'failure_domain': 'osd'} + self._post('/api/crush_rule', data) + self.assertStatus(403) + self._delete('/api/crush_rule/default') + self.assertStatus(403) + + @classmethod + def tearDownClass(cls): + super(CrushRuleTest, cls).tearDownClass() + cls._ceph_cmd(['osd', 'crush', 'rule', 'rm', 'some_rule']) + cls._ceph_cmd(['osd', 'crush', 'rule', 'rm', 'another_rule']) + + def test_list(self): + self._get('/api/crush_rule', version='2.0') + self.assertStatus(200) + self.assertSchemaBody(JList(self.rule_schema)) + + def test_create(self): + self.create_and_delete_rule({ + 'name': 'some_rule', + 'root': 'default', + 'failure_domain': 'osd' + }) + + @DashboardTestCase.RunAs('test', 'test', ['pool-manager', 'cluster-manager']) + def test_create_with_ssd(self): + data = self._get('/api/osd/0') + self.assertStatus(200) + device_class = data['osd_metadata']['default_device_class'] + self.create_and_delete_rule({ + 'name': 'another_rule', + 'root': 'default', + 'failure_domain': 'osd', + 'device_class': device_class + }) + + def test_crush_rule_info(self): + self._get('/ui-api/crush_rule/info') + self.assertStatus(200) + self.assertSchemaBody(JObj({ + 'names': JList(str), + 'nodes': JList(JObj({}, allow_unknown=True)), + 'roots': JList(int) + })) diff --git a/qa/tasks/mgr/dashboard/test_erasure_code_profile.py b/qa/tasks/mgr/dashboard/test_erasure_code_profile.py new file mode 100644 index 000000000..7fb7c1c82 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_erasure_code_profile.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from .helper import DashboardTestCase, JList, JObj + + +class ECPTest(DashboardTestCase): + + AUTH_ROLES = ['pool-manager'] + + @DashboardTestCase.RunAs('test', 'test', ['rgw-manager']) + def test_read_access_permissions(self): + self._get('/api/erasure_code_profile') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', ['read-only']) + def test_write_access_permissions(self): + self._get('/api/erasure_code_profile') + self.assertStatus(200) + data = {'name': 'ecp32', 'k': 3, 'm': 2} + self._post('/api/erasure_code_profile', data) + self.assertStatus(403) + self._delete('/api/erasure_code_profile/default') + self.assertStatus(403) + + @classmethod + def tearDownClass(cls): + super(ECPTest, cls).tearDownClass() + cls._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'ecp32']) + cls._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'lrc']) + + def test_list(self): + data = self._get('/api/erasure_code_profile') + self.assertStatus(200) + + default = [p for p in data if p['name'] == 'default'] + if default: + default_ecp = { + 'k': 2, + 'technique': 'reed_sol_van', + 'm': 1, + 'name': 'default', + 'plugin': 'jerasure' + } + if 'crush-failure-domain' in default[0]: + default_ecp['crush-failure-domain'] = default[0]['crush-failure-domain'] + self.assertSubset(default_ecp, default[0]) + get_data = self._get('/api/erasure_code_profile/default') + self.assertEqual(get_data, default[0]) + + def test_create(self): + data = {'name': 'ecp32', 'k': 3, 'm': 2} + self._post('/api/erasure_code_profile', data) + self.assertStatus(201) + + self._get('/api/erasure_code_profile/ecp32') + self.assertJsonSubset({ + 'crush-device-class': '', + 'crush-failure-domain': 'osd', + 'crush-root': 'default', + 'jerasure-per-chunk-alignment': 'false', + 'k': 3, + 'm': 2, + 'name': 'ecp32', + 'plugin': 'jerasure', + 'technique': 'reed_sol_van', + }) + + self.assertStatus(200) + + self._delete('/api/erasure_code_profile/ecp32') + self.assertStatus(204) + + def test_create_plugin(self): + data = {'name': 'lrc', 'k': '2', 'm': '2', 'l': '2', 'plugin': 'lrc'} + self._post('/api/erasure_code_profile', data) + self.assertJsonBody(None) + self.assertStatus(201) + + self._get('/api/erasure_code_profile/lrc') + self.assertJsonBody({ + 'crush-device-class': '', + 'crush-failure-domain': 'host', + 'crush-root': 'default', + 'k': 2, + 'l': '2', + 'm': 2, + 'name': 'lrc', + 'plugin': 'lrc' + }) + + self.assertStatus(200) + + self._delete('/api/erasure_code_profile/lrc') + self.assertStatus(204) + + def test_ecp_info(self): + self._get('/ui-api/erasure_code_profile/info') + self.assertSchemaBody(JObj({ + 'names': JList(str), + 'plugins': JList(str), + 'directory': str, + 'nodes': JList(JObj({}, allow_unknown=True)) + })) diff --git a/qa/tasks/mgr/dashboard/test_feedback.py b/qa/tasks/mgr/dashboard/test_feedback.py new file mode 100644 index 000000000..0ec5ac318 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_feedback.py @@ -0,0 +1,36 @@ +import time + +from .helper import DashboardTestCase + + +class FeedbackTest(DashboardTestCase): + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls._ceph_cmd(['mgr', 'module', 'enable', 'feedback']) + time.sleep(10) + + def test_create_api_key(self): + self._post('/api/feedback/api_key', {'api_key': 'testapikey'}, version='0.1') + self.assertStatus(201) + + def test_get_api_key(self): + response = self._get('/api/feedback/api_key', version='0.1') + self.assertStatus(200) + self.assertEqual(response, 'testapikey') + + def test_remove_api_key(self): + self._delete('/api/feedback/api_key', version='0.1') + self.assertStatus(204) + + def test_issue_tracker_create_with_invalid_key(self): + self._post('/api/feedback', {'api_key': 'invalidapikey', 'description': 'test', + 'project': 'dashboard', 'subject': 'sub', 'tracker': 'bug'}, + version='0.1') + self.assertStatus(400) + + def test_issue_tracker_create_with_invalid_params(self): + self._post('/api/feedback', {'api_key': '', 'description': 'test', 'project': 'xyz', + 'subject': 'testsub', 'tracker': 'invalid'}, version='0.1') + self.assertStatus(400) diff --git a/qa/tasks/mgr/dashboard/test_health.py b/qa/tasks/mgr/dashboard/test_health.py new file mode 100644 index 000000000..b6ffade4c --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_health.py @@ -0,0 +1,309 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import (DashboardTestCase, JAny, JLeaf, JList, JObj, + addrvec_schema, module_options_schema) + + +class HealthTest(DashboardTestCase): + CEPHFS = True + + __pg_info_schema = JObj({ + 'object_stats': JObj({ + 'num_objects': int, + 'num_object_copies': int, + 'num_objects_degraded': int, + 'num_objects_misplaced': int, + 'num_objects_unfound': int + }), + 'pgs_per_osd': float, + 'statuses': JObj({}, allow_unknown=True, unknown_schema=int) + }) + + __mdsmap_schema = JObj({ + 'session_autoclose': int, + 'balancer': str, + 'bal_rank_mask': str, + 'up': JObj({}, allow_unknown=True), + 'last_failure_osd_epoch': int, + 'in': JList(int), + 'last_failure': int, + 'max_file_size': int, + 'explicitly_allowed_features': int, + 'damaged': JList(int), + 'tableserver': int, + 'failed': JList(int), + 'metadata_pool': int, + 'epoch': int, + 'stopped': JList(int), + 'max_mds': int, + 'compat': JObj({ + 'compat': JObj({}, allow_unknown=True), + 'ro_compat': JObj({}, allow_unknown=True), + 'incompat': JObj({}, allow_unknown=True) + }), + 'required_client_features': JObj({}, allow_unknown=True), + 'data_pools': JList(int), + 'info': JObj({}, allow_unknown=True), + 'fs_name': str, + 'created': str, + 'standby_count_wanted': int, + 'enabled': bool, + 'modified': str, + 'session_timeout': int, + 'flags': int, + 'flags_state': JObj({ + 'joinable': bool, + 'allow_snaps': bool, + 'allow_multimds_snaps': bool, + 'allow_standby_replay': bool, + 'refuse_client_session': bool + }), + 'ever_allowed_features': int, + 'root': int + }) + + def test_minimal_health(self): + data = self._get('/api/health/minimal') + self.assertStatus(200) + schema = JObj({ + 'client_perf': JObj({ + 'read_bytes_sec': int, + 'read_op_per_sec': int, + 'recovering_bytes_per_sec': int, + 'write_bytes_sec': int, + 'write_op_per_sec': int + }), + 'df': JObj({ + 'stats': JObj({ + 'total_avail_bytes': int, + 'total_bytes': int, + 'total_used_raw_bytes': int, + }) + }), + 'fs_map': JObj({ + 'filesystems': JList( + JObj({ + 'mdsmap': self.__mdsmap_schema + }), + ), + 'standbys': JList(JObj({}, allow_unknown=True)), + }), + 'health': JObj({ + 'checks': JList(JObj({}, allow_unknown=True)), + 'mutes': JList(JObj({}, allow_unknown=True)), + 'status': str, + }), + 'hosts': int, + 'iscsi_daemons': JObj({ + 'up': int, + 'down': int + }), + 'mgr_map': JObj({ + 'active_name': str, + 'standbys': JList(JLeaf(dict)) + }), + 'mon_status': JObj({ + 'monmap': JObj({ + 'mons': JList(JLeaf(dict)), + }), + 'quorum': JList(int) + }), + 'osd_map': JObj({ + 'osds': JList( + JObj({ + 'in': int, + 'up': int, + 'state': JList(str) + })), + }), + 'pg_info': self.__pg_info_schema, + 'pools': JList(JLeaf(dict)), + 'rgw': int, + 'scrub_status': str + }) + self.assertSchema(data, schema) + + def test_full_health(self): + data = self._get('/api/health/full') + self.assertStatus(200) + module_info_schema = JObj({ + 'can_run': bool, + 'error_string': str, + 'name': str, + 'module_options': module_options_schema + }) + schema = JObj({ + 'client_perf': JObj({ + 'read_bytes_sec': int, + 'read_op_per_sec': int, + 'recovering_bytes_per_sec': int, + 'write_bytes_sec': int, + 'write_op_per_sec': int + }), + 'df': JObj({ + 'pools': JList(JObj({ + 'stats': JObj({ + 'stored': int, + 'stored_data': int, + 'stored_omap': int, + 'objects': int, + 'kb_used': int, + 'bytes_used': int, + 'data_bytes_used': int, + 'omap_bytes_used': int, + 'percent_used': float, + 'max_avail': int, + 'quota_objects': int, + 'quota_bytes': int, + 'dirty': int, + 'rd': int, + 'rd_bytes': int, + 'wr': int, + 'wr_bytes': int, + 'compress_bytes_used': int, + 'compress_under_bytes': int, + 'stored_raw': int, + 'avail_raw': int + }), + 'name': str, + 'id': int + })), + 'stats': JObj({ + 'total_avail_bytes': int, + 'total_bytes': int, + 'total_used_bytes': int, + 'total_used_raw_bytes': int, + 'total_used_raw_ratio': float, + 'num_osds': int, + 'num_per_pool_osds': int, + 'num_per_pool_omap_osds': int + }) + }), + 'fs_map': JObj({ + 'compat': JObj({ + 'compat': JObj({}, allow_unknown=True, unknown_schema=str), + 'incompat': JObj( + {}, allow_unknown=True, unknown_schema=str), + 'ro_compat': JObj( + {}, allow_unknown=True, unknown_schema=str) + }), + 'default_fscid': int, + 'epoch': int, + 'feature_flags': JObj( + {}, allow_unknown=True, unknown_schema=bool), + 'filesystems': JList( + JObj({ + 'id': int, + 'mdsmap': self.__mdsmap_schema + }), + ), + 'standbys': JList(JObj({}, allow_unknown=True)), + }), + 'health': JObj({ + 'checks': JList(JObj({}, allow_unknown=True)), + 'mutes': JList(JObj({}, allow_unknown=True)), + 'status': str, + }), + 'hosts': int, + 'iscsi_daemons': JObj({ + 'up': int, + 'down': int + }), + 'mgr_map': JObj({ + 'active_addr': str, + 'active_addrs': JObj({ + 'addrvec': addrvec_schema + }), + 'active_change': str, # timestamp + 'active_mgr_features': int, + 'active_gid': int, + 'active_name': str, + 'always_on_modules': JObj({}, allow_unknown=True), + 'available': bool, + 'available_modules': JList(module_info_schema), + 'epoch': int, + 'modules': JList(str), + 'services': JObj( + {'dashboard': str}, # This module should always be present + allow_unknown=True, unknown_schema=str + ), + 'standbys': JList(JObj({ + 'available_modules': JList(module_info_schema), + 'gid': int, + 'name': str, + 'mgr_features': int + }, allow_unknown=True)) + }, allow_unknown=True), + 'mon_status': JObj({ + 'election_epoch': int, + 'extra_probe_peers': JList(JAny(none=True)), + 'feature_map': JObj( + {}, allow_unknown=True, unknown_schema=JList(JObj({ + 'features': str, + 'num': int, + 'release': str + })) + ), + 'features': JObj({ + 'quorum_con': str, + 'quorum_mon': JList(str), + 'required_con': str, + 'required_mon': JList(str) + }), + 'monmap': JObj({ + # @TODO: expand on monmap schema + 'mons': JList(JLeaf(dict)), + }, allow_unknown=True), + 'name': str, + 'outside_quorum': JList(int), + 'quorum': JList(int), + 'quorum_age': int, + 'rank': int, + 'state': str, + # @TODO: What type should be expected here? + 'sync_provider': JList(JAny(none=True)), + 'stretch_mode': bool + }), + 'osd_map': JObj({ + # @TODO: define schema for crush map and osd_metadata, among + # others + 'osds': JList( + JObj({ + 'in': int, + 'up': int, + }, allow_unknown=True)), + }, allow_unknown=True), + 'pg_info': self.__pg_info_schema, + 'pools': JList(JLeaf(dict)), + 'rgw': int, + 'scrub_status': str + }) + self.assertSchema(data, schema) + + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + self.assertEqual(len(cluster_pools), len(data['pools'])) + for pool in data['pools']: + self.assertIn(pool['pool_name'], cluster_pools) + + @DashboardTestCase.RunAs('test', 'test', ['pool-manager']) + def test_health_permissions(self): + data = self._get('/api/health/full') + self.assertStatus(200) + + schema = JObj({ + 'client_perf': JObj({}, allow_unknown=True), + 'df': JObj({}, allow_unknown=True), + 'health': JObj({ + 'checks': JList(JObj({}, allow_unknown=True)), + 'mutes': JList(JObj({}, allow_unknown=True)), + 'status': str + }), + 'pools': JList(JLeaf(dict)), + }) + self.assertSchema(data, schema) + + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + self.assertEqual(len(cluster_pools), len(data['pools'])) + for pool in data['pools']: + self.assertIn(pool['pool_name'], cluster_pools) diff --git a/qa/tasks/mgr/dashboard/test_host.py b/qa/tasks/mgr/dashboard/test_host.py new file mode 100644 index 000000000..78d784473 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_host.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import DashboardTestCase, JList, JObj, devices_schema + + +class HostControllerTest(DashboardTestCase): + + AUTH_ROLES = ['read-only'] + + URL_HOST = '/api/host' + URL_UI_HOST = '/ui-api/host' + + ORCHESTRATOR = True + + @classmethod + def setUpClass(cls): + super(HostControllerTest, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + cmd = ['test_orchestrator', 'load_data', '-i', '-'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd, stdin='{}') + + @property + def test_data_inventory(self): + return self.ORCHESTRATOR_TEST_DATA['inventory'] + + @property + def test_data_daemons(self): + return self.ORCHESTRATOR_TEST_DATA['daemons'] + + @DashboardTestCase.RunAs('test', 'test', ['block-manager']) + def test_access_permissions(self): + self._get(self.URL_HOST, version='1.1') + self.assertStatus(403) + + def test_host_list(self): + data = self._get(self.URL_HOST, version='1.1') + self.assertStatus(200) + + orch_hostnames = {inventory_node['name'] for inventory_node in + self.ORCHESTRATOR_TEST_DATA['inventory']} + + for server in data: + self.assertIn('services', server) + self.assertIn('hostname', server) + self.assertIn('ceph_version', server) + self.assertIsNotNone(server['hostname']) + self.assertIsNotNone(server['ceph_version']) + for service in server['services']: + self.assertIn('type', service) + self.assertIn('id', service) + self.assertIsNotNone(service['type']) + self.assertIsNotNone(service['id']) + + self.assertIn('sources', server) + in_ceph, in_orchestrator = server['sources']['ceph'], server['sources']['orchestrator'] + if in_ceph: + self.assertGreaterEqual(len(server['services']), 1) + if not in_orchestrator: + self.assertNotIn(server['hostname'], orch_hostnames) + if in_orchestrator: + self.assertEqual(len(server['services']), 0) + self.assertIn(server['hostname'], orch_hostnames) + + def test_host_list_with_sources(self): + data = self._get('{}?sources=orchestrator'.format(self.URL_HOST), version='1.1') + self.assertStatus(200) + test_hostnames = {inventory_node['name'] for inventory_node in + self.ORCHESTRATOR_TEST_DATA['inventory']} + resp_hostnames = {host['hostname'] for host in data} + self.assertEqual(test_hostnames, resp_hostnames) + + data = self._get('{}?sources=ceph'.format(self.URL_HOST), version='1.1') + self.assertStatus(200) + test_hostnames = {inventory_node['name'] for inventory_node in + self.ORCHESTRATOR_TEST_DATA['inventory']} + resp_hostnames = {host['hostname'] for host in data} + self.assertEqual(len(test_hostnames.intersection(resp_hostnames)), 0) + + def test_host_devices(self): + hosts = self._get('{}'.format(self.URL_HOST), version='1.1') + hosts = [host['hostname'] for host in hosts if host['hostname'] != ''] + assert hosts[0] + data = self._get('{}/devices'.format('{}/{}'.format(self.URL_HOST, hosts[0]))) + self.assertStatus(200) + self.assertSchema(data, devices_schema) + + def test_host_daemons(self): + hosts = self._get('{}'.format(self.URL_HOST), version='1.1') + hosts = [host['hostname'] for host in hosts if host['hostname'] != ''] + assert hosts[0] + data = self._get('{}/daemons'.format('{}/{}'.format(self.URL_HOST, hosts[0]))) + self.assertStatus(200) + self.assertSchema(data, JList(JObj({ + 'hostname': str, + 'daemon_id': str, + 'daemon_type': str + }))) + + def test_host_smart(self): + hosts = self._get('{}'.format(self.URL_HOST), version='1.1') + hosts = [host['hostname'] for host in hosts if host['hostname'] != ''] + assert hosts[0] + self._get('{}/smart'.format('{}/{}'.format(self.URL_HOST, hosts[0]))) + self.assertStatus(200) + + def _validate_inventory(self, data, resp_data): + self.assertEqual(data['name'], resp_data['name']) + self.assertEqual(len(data['devices']), len(resp_data['devices'])) + + if not data['devices']: + return + test_devices = sorted(data['devices'], key=lambda d: d['path']) + resp_devices = sorted(resp_data['devices'], key=lambda d: d['path']) + + for test, resp in zip(test_devices, resp_devices): + self._validate_device(test, resp) + + def _validate_device(self, data, resp_data): + for key, value in data.items(): + self.assertEqual(value, resp_data[key]) + + def test_inventory_get(self): + # get a inventory + node = self.test_data_inventory[0] + resp = self._get('{}/{}/inventory'.format(self.URL_HOST, node['name'])) + self.assertStatus(200) + self._validate_inventory(node, resp) + + def test_inventory_list(self): + # get all inventory + data = self._get('{}/inventory'.format(self.URL_UI_HOST)) + self.assertStatus(200) + + def sorting_key(node): + return node['name'] + + test_inventory = sorted(self.test_data_inventory, key=sorting_key) + resp_inventory = sorted(data, key=sorting_key) + self.assertEqual(len(test_inventory), len(resp_inventory)) + for test, resp in zip(test_inventory, resp_inventory): + self._validate_inventory(test, resp) + + +class HostControllerNoOrchestratorTest(DashboardTestCase): + def test_host_create(self): + self._post('/api/host?hostname=foo', {'status': ''}, version='0.1') + self.assertStatus(503) + self.assertError(code='orchestrator_status_unavailable', + component='orchestrator') + + def test_host_delete(self): + self._delete('/api/host/bar') + self.assertStatus(503) + self.assertError(code='orchestrator_status_unavailable', + component='orchestrator') diff --git a/qa/tasks/mgr/dashboard/test_logs.py b/qa/tasks/mgr/dashboard/test_logs.py new file mode 100644 index 000000000..63f6e16ed --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_logs.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import DashboardTestCase, JList, JObj, addrvec_schema + + +class LogsTest(DashboardTestCase): + CEPHFS = True + + def test_logs(self): + data = self._get("/api/logs/all") + self.assertStatus(200) + log_entry_schema = JList(JObj({ + 'addrs': JObj({ + 'addrvec': addrvec_schema + }), + 'channel': str, + 'message': str, + 'name': str, + 'priority': str, + 'rank': str, + 'seq': int, + 'stamp': str + })) + schema = JObj({ + 'audit_log': log_entry_schema, + 'clog': log_entry_schema + }) + self.assertSchema(data, schema) + + @DashboardTestCase.RunAs('test', 'test', ['pool-manager']) + def test_log_perms(self): + self._get("/api/logs/all") + self.assertStatus(403) diff --git a/qa/tasks/mgr/dashboard/test_mgr_module.py b/qa/tasks/mgr/dashboard/test_mgr_module.py new file mode 100644 index 000000000..c196c7124 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_mgr_module.py @@ -0,0 +1,154 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import logging + +import requests + +from .helper import (DashboardTestCase, JLeaf, JList, JObj, + module_options_object_schema, module_options_schema) + +logger = logging.getLogger(__name__) + + +class MgrModuleTestCase(DashboardTestCase): + MGRS_REQUIRED = 1 + + def wait_until_rest_api_accessible(self): + """ + Wait until the REST API is accessible. + """ + + def _check_connection(): + try: + # Try reaching an API endpoint successfully. + self._get('/api/mgr/module') + if self._resp.status_code == 200: + return True + except requests.ConnectionError: + pass + return False + + self.wait_until_true(_check_connection, timeout=30) + + +class MgrModuleTest(MgrModuleTestCase): + + def test_list_disabled_module(self): + self._ceph_cmd(['mgr', 'module', 'disable', 'iostat']) + self.wait_until_rest_api_accessible() + data = self._get('/api/mgr/module') + self.assertStatus(200) + self.assertSchema( + data, + JList( + JObj(sub_elems={ + 'name': JLeaf(str), + 'enabled': JLeaf(bool), + 'always_on': JLeaf(bool), + 'options': module_options_schema + }))) + module_info = self.find_object_in_list('name', 'iostat', data) + self.assertIsNotNone(module_info) + self.assertFalse(module_info['enabled']) + + def test_list_enabled_module(self): + self._ceph_cmd(['mgr', 'module', 'enable', 'iostat']) + self.wait_until_rest_api_accessible() + data = self._get('/api/mgr/module') + self.assertStatus(200) + self.assertSchema( + data, + JList( + JObj(sub_elems={ + 'name': JLeaf(str), + 'enabled': JLeaf(bool), + 'always_on': JLeaf(bool), + 'options': module_options_schema + }))) + module_info = self.find_object_in_list('name', 'iostat', data) + self.assertIsNotNone(module_info) + self.assertTrue(module_info['enabled']) + + def test_get(self): + data = self._get('/api/mgr/module/telemetry') + self.assertStatus(200) + self.assertSchema( + data, + JObj( + allow_unknown=True, + sub_elems={ + 'channel_basic': bool, + 'channel_ident': bool, + 'channel_crash': bool, + 'channel_device': bool, + 'channel_perf': bool, + 'contact': str, + 'description': str, + 'enabled': bool, + 'interval': int, + 'last_opt_revision': int, + 'leaderboard': bool, + 'leaderboard_description': str, + 'organization': str, + 'proxy': str, + 'url': str + })) + + def test_module_options(self): + data = self._get('/api/mgr/module/telemetry/options') + self.assertStatus(200) + schema = JObj({ + 'channel_basic': module_options_object_schema, + 'channel_crash': module_options_object_schema, + 'channel_device': module_options_object_schema, + 'channel_ident': module_options_object_schema, + 'channel_perf': module_options_object_schema, + 'contact': module_options_object_schema, + 'description': module_options_object_schema, + 'device_url': module_options_object_schema, + 'enabled': module_options_object_schema, + 'interval': module_options_object_schema, + 'last_opt_revision': module_options_object_schema, + 'leaderboard': module_options_object_schema, + 'leaderboard_description': module_options_object_schema, + 'log_level': module_options_object_schema, + 'log_to_cluster': module_options_object_schema, + 'log_to_cluster_level': module_options_object_schema, + 'log_to_file': module_options_object_schema, + 'organization': module_options_object_schema, + 'proxy': module_options_object_schema, + 'url': module_options_object_schema + }) + self.assertSchema(data, schema) + + def test_module_enable(self): + self._post('/api/mgr/module/telemetry/enable') + self.assertStatus(200) + + def test_disable(self): + self._post('/api/mgr/module/iostat/disable') + self.assertStatus(200) + + def test_put(self): + self.set_config_key('config/mgr/mgr/iostat/log_level', 'critical') + self.set_config_key('config/mgr/mgr/iostat/log_to_cluster', 'False') + self.set_config_key('config/mgr/mgr/iostat/log_to_cluster_level', 'info') + self.set_config_key('config/mgr/mgr/iostat/log_to_file', 'True') + self._put( + '/api/mgr/module/iostat', + data={ + 'config': { + 'log_level': 'debug', + 'log_to_cluster': True, + 'log_to_cluster_level': 'warning', + 'log_to_file': False + } + }) + self.assertStatus(200) + data = self._get('/api/mgr/module/iostat') + self.assertStatus(200) + self.assertEqual(data['log_level'], 'debug') + self.assertTrue(data['log_to_cluster']) + self.assertEqual(data['log_to_cluster_level'], 'warning') + self.assertFalse(data['log_to_file']) diff --git a/qa/tasks/mgr/dashboard/test_monitor.py b/qa/tasks/mgr/dashboard/test_monitor.py new file mode 100644 index 000000000..e32c2c10c --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_monitor.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class MonitorTest(DashboardTestCase): + AUTH_ROLES = ['cluster-manager'] + + @DashboardTestCase.RunAs('test', 'test', ['block-manager']) + def test_access_permissions(self): + self._get('/api/monitor') + self.assertStatus(403) + + def test_monitor_default(self): + data = self._get("/api/monitor") + self.assertStatus(200) + + self.assertIn('mon_status', data) + self.assertIn('in_quorum', data) + self.assertIn('out_quorum', data) + self.assertIsNotNone(data['mon_status']) + self.assertIsNotNone(data['in_quorum']) + self.assertIsNotNone(data['out_quorum']) diff --git a/qa/tasks/mgr/dashboard/test_motd.py b/qa/tasks/mgr/dashboard/test_motd.py new file mode 100644 index 000000000..2edbf36ba --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_motd.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-public-methods + +from __future__ import absolute_import + +import time + +from .helper import DashboardTestCase + + +class MotdTest(DashboardTestCase): + @classmethod + def tearDownClass(cls): + cls._ceph_cmd(['dashboard', 'motd', 'clear']) + super(MotdTest, cls).tearDownClass() + + def setUp(self): + super(MotdTest, self).setUp() + self._ceph_cmd(['dashboard', 'motd', 'clear']) + + def test_none(self): + data = self._get('/ui-api/motd') + self.assertStatus(200) + self.assertIsNone(data) + + def test_set(self): + self._ceph_cmd(['dashboard', 'motd', 'set', 'info', '0', 'foo bar baz']) + data = self._get('/ui-api/motd') + self.assertStatus(200) + self.assertIsInstance(data, dict) + + def test_expired(self): + self._ceph_cmd(['dashboard', 'motd', 'set', 'info', '2s', 'foo bar baz']) + time.sleep(5) + data = self._get('/ui-api/motd') + self.assertStatus(200) + self.assertIsNone(data) diff --git a/qa/tasks/mgr/dashboard/test_orchestrator.py b/qa/tasks/mgr/dashboard/test_orchestrator.py new file mode 100644 index 000000000..2a804c4c2 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_orchestrator.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class OrchestratorControllerTest(DashboardTestCase): + + AUTH_ROLES = ['cluster-manager'] + + URL_STATUS = '/ui-api/orchestrator/status' + + ORCHESTRATOR = True + + @classmethod + def setUpClass(cls): + super(OrchestratorControllerTest, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + cmd = ['test_orchestrator', 'load_data', '-i', '-'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd, stdin='{}') + + def test_status_get(self): + data = self._get(self.URL_STATUS) + self.assertStatus(200) + self.assertTrue(data['available']) diff --git a/qa/tasks/mgr/dashboard/test_osd.py b/qa/tasks/mgr/dashboard/test_osd.py new file mode 100644 index 000000000..71cf3d871 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_osd.py @@ -0,0 +1,368 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +import json + +from .helper import (DashboardTestCase, JAny, JLeaf, JList, JObj, JTuple, + devices_schema) + + +class OsdTest(DashboardTestCase): + + AUTH_ROLES = ['cluster-manager'] + + @classmethod + def setUpClass(cls): + super(OsdTest, cls).setUpClass() + cls._load_module('test_orchestrator') + cmd = ['orch', 'set', 'backend', 'test_orchestrator'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd(*cmd) + + def tearDown(self): + self._put('/api/osd/0/mark', data={'action': 'in'}) + + @DashboardTestCase.RunAs('test', 'test', ['block-manager']) + def test_access_permissions(self): + self._get('/api/osd') + self.assertStatus(403) + self._get('/api/osd/0') + self.assertStatus(403) + + def assert_in_and_not_none(self, data, properties): + self.assertSchema(data, JObj({p: JAny(none=False) for p in properties}, allow_unknown=True)) + + def test_list(self): + data = self._get('/api/osd') + self.assertStatus(200) + + self.assertGreaterEqual(len(data), 1) + data = data[0] + self.assert_in_and_not_none(data, ['host', 'tree', 'state', 'stats', 'stats_history']) + self.assert_in_and_not_none(data['host'], ['name']) + self.assert_in_and_not_none(data['tree'], ['id']) + self.assert_in_and_not_none(data['stats'], ['numpg', 'stat_bytes_used', 'stat_bytes', + 'op_r', 'op_w']) + self.assert_in_and_not_none(data['stats_history'], ['op_out_bytes', 'op_in_bytes']) + self.assertSchema(data['stats_history']['op_out_bytes'], + JList(JTuple([JLeaf(float), JLeaf(float)]))) + + def test_details(self): + data = self._get('/api/osd/0') + self.assertStatus(200) + self.assert_in_and_not_none(data, ['osd_metadata']) + + def test_histogram(self): + data = self._get('/api/osd/0/histogram') + self.assertStatus(200) + self.assert_in_and_not_none(data['osd'], ['op_w_latency_in_bytes_histogram', + 'op_r_latency_out_bytes_histogram']) + + def test_scrub(self): + self._post('/api/osd/0/scrub?deep=False') + self.assertStatus(200) + + self._post('/api/osd/0/scrub?deep=True') + self.assertStatus(200) + + def test_safe_to_delete(self): + data = self._get('/api/osd/safe_to_delete?svc_ids=0') + self.assertStatus(200) + self.assertSchema(data, JObj({ + 'is_safe_to_delete': JAny(none=True), + 'message': str + })) + self.assertTrue(data['is_safe_to_delete']) + + def test_osd_smart(self): + self._get('/api/osd/0/smart') + self.assertStatus(200) + + def test_mark_out_and_in(self): + self._put('/api/osd/0/mark', data={'action': 'out'}) + self.assertStatus(200) + + self._put('/api/osd/0/mark', data={'action': 'in'}) + self.assertStatus(200) + + def test_mark_down(self): + self._put('/api/osd/0/mark', data={'action': 'down'}) + self.assertStatus(200) + + def test_reweight(self): + self._post('/api/osd/0/reweight', {'weight': 0.4}) + self.assertStatus(200) + + def get_reweight_value(): + self._get('/api/osd/0') + response = self.jsonBody() + if 'osd_map' in response and 'weight' in response['osd_map']: + return round(response['osd_map']['weight'], 1) + return None + self.wait_until_equal(get_reweight_value, 0.4, 10) + self.assertStatus(200) + + # Undo + self._post('/api/osd/0/reweight', {'weight': 1}) + + def test_create_lost_destroy_remove(self): + sample_data = { + 'uuid': 'f860ca2e-757d-48ce-b74a-87052cad563f', + 'svc_id': 5 + } + + # Create + self._task_post('/api/osd', { + 'method': 'bare', + 'data': sample_data, + 'tracking_id': 'bare-5' + }) + self.assertStatus(201) + + # invalid method + self._task_post('/api/osd', { + 'method': 'xyz', + 'data': { + 'uuid': 'f860ca2e-757d-48ce-b74a-87052cad563f', + 'svc_id': 5 + }, + 'tracking_id': 'bare-5' + }) + self.assertStatus(400) + + # Lost + self._put('/api/osd/5/mark', data={'action': 'lost'}) + self.assertStatus(200) + # Destroy + self._post('/api/osd/5/destroy') + self.assertStatus(200) + # Purge + self._post('/api/osd/5/purge') + self.assertStatus(200) + + def test_create_with_drive_group(self): + data = { + 'method': 'drive_groups', + 'data': [ + { + 'service_type': 'osd', + 'service_id': 'test', + 'host_pattern': '*', + 'data_devices': { + 'vendor': 'abc', + 'model': 'cba', + 'rotational': True, + 'size': '4 TB' + }, + 'wal_devices': { + 'vendor': 'def', + 'model': 'fed', + 'rotational': False, + 'size': '1 TB' + }, + 'db_devices': { + 'vendor': 'ghi', + 'model': 'ihg', + 'rotational': False, + 'size': '512 GB' + }, + 'wal_slots': 5, + 'db_slots': 5, + 'encrypted': True + } + ], + 'tracking_id': 'test' + } + self._post('/api/osd', data) + self.assertStatus(201) + + def test_safe_to_destroy(self): + osd_dump = json.loads(self._ceph_cmd(['osd', 'dump', '-f', 'json'])) + max_id = max(map(lambda e: e['osd'], osd_dump['osds'])) + + def get_pg_status_equal_unknown(osd_ids): + self._get('/api/osd/safe_to_destroy?ids={}'.format(osd_ids)) + if 'message' in self.jsonBody(): + return 'pgs have unknown state' in self.jsonBody()['message'] + return False + + # 1 OSD safe to destroy + unused_osd_id = max_id + 10 + self.wait_until_equal( + lambda: get_pg_status_equal_unknown(unused_osd_id), False, 30) + self.assertStatus(200) + self.assertJsonBody({ + 'is_safe_to_destroy': True, + 'active': [], + 'missing_stats': [], + 'safe_to_destroy': [unused_osd_id], + 'stored_pgs': [], + }) + + # multiple OSDs safe to destroy + unused_osd_ids = [max_id + 11, max_id + 12] + self.wait_until_equal( + lambda: get_pg_status_equal_unknown(str(unused_osd_ids)), False, 30) + self.assertStatus(200) + self.assertJsonBody({ + 'is_safe_to_destroy': True, + 'active': [], + 'missing_stats': [], + 'safe_to_destroy': unused_osd_ids, + 'stored_pgs': [], + }) + + # 1 OSD unsafe to destroy + def get_destroy_status(): + self._get('/api/osd/safe_to_destroy?ids=0') + if 'is_safe_to_destroy' in self.jsonBody(): + return self.jsonBody()['is_safe_to_destroy'] + return None + self.wait_until_equal(get_destroy_status, False, 10) + self.assertStatus(200) + + def test_osd_devices(self): + data = self._get('/api/osd/0/devices') + self.assertStatus(200) + self.assertSchema(data, devices_schema) + + +class OsdFlagsTest(DashboardTestCase): + def __init__(self, *args, **kwargs): + super(OsdFlagsTest, self).__init__(*args, **kwargs) + self._initial_flags = ['sortbitwise', 'recovery_deletes', 'purged_snapdirs', + 'pglog_hardlimit'] # These flags cannot be unset + + @classmethod + def _put_flags(cls, flags, ids=None): + url = '/api/osd/flags' + data = {'flags': flags} + + if ids: + url = url + '/individual' + data['ids'] = ids + + cls._put(url, data=data) + return cls._resp.json() + + def test_list_osd_flags(self): + flags = self._get('/api/osd/flags') + self.assertStatus(200) + self.assertEqual(len(flags), 4) + self.assertCountEqual(flags, self._initial_flags) + + def test_add_osd_flag(self): + flags = self._put_flags([ + 'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout', + 'pause', 'pglog_hardlimit' + ]) + self.assertCountEqual(flags, [ + 'sortbitwise', 'recovery_deletes', 'purged_snapdirs', 'noout', + 'pause', 'pglog_hardlimit' + ]) + + # Restore flags + self._put_flags(self._initial_flags) + + def test_get_indiv_flag(self): + initial = self._get('/api/osd/flags/individual') + self.assertStatus(200) + self.assertSchema(initial, JList(JObj({ + 'osd': int, + 'flags': JList(str) + }))) + + self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + flags_added = self._get('/api/osd/flags/individual') + self.assertStatus(200) + for osd in flags_added: + if osd['osd'] in [0, 1, 2]: + self.assertIn('noout', osd['flags']) + self.assertIn('noin', osd['flags']) + for osd_initial in initial: + if osd['osd'] == osd_initial['osd']: + self.assertGreater(len(osd['flags']), len(osd_initial['flags'])) + + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + flags_removed = self._get('/api/osd/flags/individual') + self.assertStatus(200) + for osd in flags_removed: + if osd['osd'] in [0, 1, 2]: + self.assertNotIn('noout', osd['flags']) + self.assertNotIn('noin', osd['flags']) + + def test_add_indiv_flag(self): + flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': True} + svc_id = 0 + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], ['noout'], [], ['noup', 'nodown', 'noin']) + self._check_indiv_flags_osd([svc_id], ['noout'], ['noup', 'nodown', 'noin']) + + self._ceph_cmd(['osd', 'unset-group', 'noout', 'osd.{}'.format(svc_id)]) + + def test_add_multiple_indiv_flags(self): + flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True} + svc_id = 0 + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], ['noout', 'noin'], [], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown']) + + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.{}'.format(svc_id)]) + + def test_add_multiple_indiv_flags_multiple_osds(self): + flags_update = {'noup': None, 'nodown': None, 'noin': True, 'noout': True} + svc_id = [0, 1, 2] + + resp = self._put_flags(flags_update, svc_id) + self._check_indiv_flags_resp(resp, svc_id, ['noout', 'noin'], [], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], ['noout', 'noin'], ['noup', 'nodown']) + + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + + def test_remove_indiv_flag(self): + flags_update = {'noup': None, 'nodown': None, 'noin': None, 'noout': False} + svc_id = 0 + self._ceph_cmd(['osd', 'set-group', 'noout', 'osd.{}'.format(svc_id)]) + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], [], ['noout'], ['noup', 'nodown', 'noin']) + self._check_indiv_flags_osd([svc_id], [], ['noup', 'nodown', 'noin', 'noout']) + + def test_remove_multiple_indiv_flags(self): + flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False} + svc_id = 0 + self._ceph_cmd(['osd', 'set-group', 'noout,noin', 'osd.{}'.format(svc_id)]) + + resp = self._put_flags(flags_update, [svc_id]) + self._check_indiv_flags_resp(resp, [svc_id], [], ['noout', 'noin'], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown']) + + def test_remove_multiple_indiv_flags_multiple_osds(self): + flags_update = {'noup': None, 'nodown': None, 'noin': False, 'noout': False} + svc_id = [0, 1, 2] + self._ceph_cmd(['osd', 'unset-group', 'noout,noin', 'osd.0', 'osd.1', 'osd.2']) + + resp = self._put_flags(flags_update, svc_id) + self._check_indiv_flags_resp(resp, svc_id, [], ['noout', 'noin'], ['noup', 'nodown']) + self._check_indiv_flags_osd([svc_id], [], ['noout', 'noin', 'noup', 'nodown']) + + def _check_indiv_flags_resp(self, resp, ids, added, removed, ignored): + self.assertStatus(200) + self.assertCountEqual(resp['ids'], ids) + self.assertCountEqual(resp['added'], added) + self.assertCountEqual(resp['removed'], removed) + + for flag in ignored: + self.assertNotIn(flag, resp['added']) + self.assertNotIn(flag, resp['removed']) + + def _check_indiv_flags_osd(self, ids, activated_flags, deactivated_flags): + osds = json.loads(self._ceph_cmd(['osd', 'dump', '--format=json']))['osds'] + for osd in osds: + if osd['osd'] in ids: + for flag in activated_flags: + self.assertIn(flag, osd['state']) + for flag in deactivated_flags: + self.assertNotIn(flag, osd['state']) diff --git a/qa/tasks/mgr/dashboard/test_perf_counters.py b/qa/tasks/mgr/dashboard/test_perf_counters.py new file mode 100644 index 000000000..c01368bce --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_perf_counters.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +from .helper import DashboardTestCase, JObj + + +class PerfCountersControllerTest(DashboardTestCase): + + def test_perf_counters_list(self): + data = self._get('/api/perf_counters') + self.assertStatus(200) + + self.assertIsInstance(data, dict) + for mon in self.mons(): + self.assertIn('mon.{}'.format(mon), data) + + osds = self.ceph_cluster.mon_manager.get_osd_dump() + for osd in osds: + self.assertIn('osd.{}'.format(osd['osd']), data) + + def _validate_perf(self, srv_id, srv_type, data, allow_empty): + self.assertIsInstance(data, dict) + self.assertEqual(srv_type, data['service']['type']) + self.assertEqual(str(srv_id), data['service']['id']) + self.assertIsInstance(data['counters'], list) + if not allow_empty: + self.assertGreater(len(data['counters']), 0) + for counter in data['counters'][0:1]: + self.assertIsInstance(counter, dict) + self.assertIn('description', counter) + self.assertIn('name', counter) + self.assertIn('unit', counter) + self.assertIn('value', counter) + + def test_perf_counters_mon_get(self): + mon = self.mons()[0] + data = self._get('/api/perf_counters/mon/{}'.format(mon)) + self.assertStatus(200) + self._validate_perf(mon, 'mon', data, allow_empty=False) + + def test_perf_counters_mgr_get(self): + mgr = list(self.mgr_cluster.mgr_ids)[0] + data = self._get('/api/perf_counters/mgr/{}'.format(mgr)) + self.assertStatus(200) + self._validate_perf(mgr, 'mgr', data, allow_empty=False) + + def test_perf_counters_mds_get(self): + for mds in self.mds_cluster.mds_ids: + data = self._get('/api/perf_counters/mds/{}'.format(mds)) + self.assertStatus(200) + self._validate_perf(mds, 'mds', data, allow_empty=True) + + def test_perf_counters_osd_get(self): + for osd in self.ceph_cluster.mon_manager.get_osd_dump(): + osd = osd['osd'] + data = self._get('/api/perf_counters/osd/{}'.format(osd)) + self.assertStatus(200) + self._validate_perf(osd, 'osd', data, allow_empty=False) + + def test_perf_counters_not_found(self): + osds = self.ceph_cluster.mon_manager.get_osd_dump() + unused_id = int(list(map(lambda o: o['osd'], osds)).pop()) + 1 + + self._get('/api/perf_counters/osd/{}'.format(unused_id)) + self.assertStatus(404) + schema = JObj(sub_elems={ + 'status': str, + 'detail': str, + }, allow_unknown=True) + self.assertEqual(self._resp.json()['detail'], "'osd.{}' not found".format(unused_id)) + self.assertSchemaBody(schema) diff --git a/qa/tasks/mgr/dashboard/test_pool.py b/qa/tasks/mgr/dashboard/test_pool.py new file mode 100644 index 000000000..0699be48c --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_pool.py @@ -0,0 +1,435 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import logging +import time +from contextlib import contextmanager + +from .helper import DashboardTestCase, JAny, JList, JObj, JUnion + +log = logging.getLogger(__name__) + + +class PoolTest(DashboardTestCase): + AUTH_ROLES = ['pool-manager'] + + pool_schema = JObj(sub_elems={ + 'pool_name': str, + 'type': str, + 'application_metadata': JList(str), + 'flags': int, + 'flags_names': str, + }, allow_unknown=True) + + pool_list_stat_schema = JObj(sub_elems={ + 'latest': JUnion([int, float]), + 'rate': float, + 'rates': JList(JAny(none=False)), + }) + + pool_list_stats_schema = JObj(sub_elems={ + 'avail_raw': pool_list_stat_schema, + 'bytes_used': pool_list_stat_schema, + 'max_avail': pool_list_stat_schema, + 'percent_used': pool_list_stat_schema, + 'rd_bytes': pool_list_stat_schema, + 'wr_bytes': pool_list_stat_schema, + 'rd': pool_list_stat_schema, + 'wr': pool_list_stat_schema, + }, allow_unknown=True) + + pool_rbd_conf_schema = JList(JObj(sub_elems={ + 'name': str, + 'value': str, + 'source': int + })) + + @contextmanager + def __yield_pool(self, name=None, data=None, deletion_name=None): + """ + Use either just a name or whole description of a pool to create one. + This also validates the correct creation and deletion after the pool was used. + + :param name: Name of the pool + :param data: Describes the pool in full length + :param deletion_name: Only needed if the pool was renamed + :return: + """ + data = self._create_pool(name, data) + yield data + self._delete_pool(deletion_name or data['pool']) + + def _create_pool(self, name, data): + data = data or { + 'pool': name, + 'pg_num': '32', + 'pool_type': 'replicated', + 'compression_algorithm': 'snappy', + 'compression_mode': 'passive', + 'compression_max_blob_size': '131072', + 'compression_required_ratio': '0.875', + 'application_metadata': ['rbd'], + 'configuration': { + 'rbd_qos_bps_limit': 1024000, + 'rbd_qos_iops_limit': 5000, + } + } + self._task_post('/api/pool/', data) + self.assertStatus(201) + self._validate_pool_properties(data, self._get_pool(data['pool'])) + return data + + def _delete_pool(self, name): + self._task_delete('/api/pool/' + name) + self.assertStatus(204) + + def _validate_pool_properties(self, data, pool, timeout=DashboardTestCase.TIMEOUT_HEALTH_CLEAR): + # pylint: disable=too-many-branches + for prop, value in data.items(): + if prop == 'pool_type': + self.assertEqual(pool['type'], value) + elif prop == 'size': + self.assertEqual(pool[prop], int(value), + '{}: {} != {}'.format(prop, pool[prop], value)) + elif prop == 'pg_num': + self._check_pg_num(pool['pool_name'], int(value)) + elif prop == 'application_metadata': + self.assertIsInstance(pool[prop], list) + self.assertEqual(value, pool[prop]) + elif prop == 'pool': + self.assertEqual(pool['pool_name'], value) + elif prop.startswith('compression'): + if value is not None: + if prop.endswith('size'): + value = int(value) + elif prop.endswith('ratio'): + value = float(value) + self.assertEqual(pool['options'][prop], value) + else: + self.assertEqual(pool['options'], {}) + elif prop == 'configuration': + # configuration cannot really be checked here for two reasons: + # 1. The default value cannot be given to this method, which becomes relevant + # when resetting a value, because it's not always zero. + # 2. The expected `source` cannot be given to this method, and it cannot + # reliably be determined (see 1) + pass + else: + self.assertEqual(pool[prop], value, '{}: {} != {}'.format(prop, pool[prop], value)) + + self.wait_for_health_clear(timeout) + + def _get_pool(self, pool_name): + pool = self._get("/api/pool/" + pool_name) + self.assertStatus(200) + self.assertSchemaBody(self.pool_schema) + return pool + + def _check_pg_num(self, pool_name, pg_num): + """ + If both properties have not the same value, the cluster goes into a warning state, which + will only happen during a pg update on an existing pool. The test that does that is + currently commented out because our QA systems can't deal with the change. Feel free to test + it locally. + """ + self.wait_until_equal( + lambda: self._get_pool(pool_name)['pg_placement_num'], + expect_val=pg_num, + timeout=180 + ) + + pool = self._get_pool(pool_name) + + for prop in ['pg_num', 'pg_placement_num']: + self.assertEqual(pool[prop], int(pg_num), + '{}: {} != {}'.format(prop, pool[prop], pg_num)) + + @DashboardTestCase.RunAs('test', 'test', [{'pool': ['create', 'update', 'delete']}]) + def test_read_access_permissions(self): + self._get('/api/pool') + self.assertStatus(403) + self._get('/api/pool/bla') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'pool': ['read', 'update', 'delete']}]) + def test_create_access_permissions(self): + self._task_post('/api/pool/', {}) + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'pool': ['read', 'create', 'update']}]) + def test_delete_access_permissions(self): + self._delete('/api/pool/ddd') + self.assertStatus(403) + + def test_pool_configuration(self): + pool_name = '.mgr' + data = self._get('/api/pool/{}/configuration'.format(pool_name)) + self.assertStatus(200) + self.assertSchema(data, JList(JObj({ + 'name': str, + 'value': str, + 'source': int + }))) + + def test_pool_list(self): + data = self._get("/api/pool") + self.assertStatus(200) + + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + self.assertEqual(len(cluster_pools), len(data)) + self.assertSchemaBody(JList(self.pool_schema)) + for pool in data: + self.assertNotIn('pg_status', pool) + self.assertNotIn('stats', pool) + self.assertIn(pool['pool_name'], cluster_pools) + + def test_pool_list_attrs(self): + data = self._get("/api/pool?attrs=type,flags") + self.assertStatus(200) + + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + self.assertEqual(len(cluster_pools), len(data)) + for pool in data: + self.assertIn('pool_name', pool) + self.assertIn('type', pool) + self.assertIn('flags', pool) + self.assertNotIn('flags_names', pool) + self.assertNotIn('pg_status', pool) + self.assertNotIn('stats', pool) + self.assertIn(pool['pool_name'], cluster_pools) + + def test_pool_list_stats(self): + data = self._get("/api/pool?stats=true") + self.assertStatus(200) + + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + self.assertEqual(len(cluster_pools), len(data)) + self.assertSchemaBody(JList(self.pool_schema)) + for pool in data: + self.assertIn('pool_name', pool) + self.assertIn('type', pool) + self.assertIn('application_metadata', pool) + self.assertIn('flags', pool) + self.assertIn('pg_status', pool) + self.assertSchema(pool['stats'], self.pool_list_stats_schema) + self.assertIn('flags_names', pool) + self.assertIn(pool['pool_name'], cluster_pools) + + def test_pool_get(self): + cluster_pools = self.ceph_cluster.mon_manager.list_pools() + pool = self._get("/api/pool/{}?stats=true&attrs=type,flags,stats" + .format(cluster_pools[0])) + self.assertEqual(pool['pool_name'], cluster_pools[0]) + self.assertIn('type', pool) + self.assertIn('flags', pool) + self.assertNotIn('pg_status', pool) + self.assertSchema(pool['stats'], self.pool_list_stats_schema) + self.assertNotIn('flags_names', pool) + self.assertSchema(pool['configuration'], self.pool_rbd_conf_schema) + + def test_pool_create_with_two_applications(self): + self.__yield_pool(None, { + 'pool': 'dashboard_pool1', + 'pg_num': '32', + 'pool_type': 'replicated', + 'application_metadata': ['rbd', 'sth'], + }) + + def test_pool_create_with_ecp_and_rule(self): + self._ceph_cmd(['osd', 'crush', 'rule', 'create-erasure', 'ecrule']) + self._ceph_cmd( + ['osd', 'erasure-code-profile', 'set', 'ecprofile', 'crush-failure-domain=osd']) + self.__yield_pool(None, { + 'pool': 'dashboard_pool2', + 'pg_num': '32', + 'pool_type': 'erasure', + 'application_metadata': ['rbd'], + 'erasure_code_profile': 'ecprofile', + 'crush_rule': 'ecrule', + }) + self._ceph_cmd(['osd', 'erasure-code-profile', 'rm', 'ecprofile']) + + def test_pool_create_with_compression(self): + pool = { + 'pool': 'dashboard_pool3', + 'pg_num': '32', + 'pool_type': 'replicated', + 'compression_algorithm': 'zstd', + 'compression_mode': 'aggressive', + 'compression_max_blob_size': '10000000', + 'compression_required_ratio': '0.8', + 'application_metadata': ['rbd'], + 'configuration': { + 'rbd_qos_bps_limit': 2048, + 'rbd_qos_iops_limit': None, + }, + } + with self.__yield_pool(None, pool): + expected_configuration = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 1, + 'value': '2048', + }, { + 'name': 'rbd_qos_iops_limit', + 'source': 0, + 'value': '0', + }] + new_pool = self._get_pool(pool['pool']) + for conf in expected_configuration: + self.assertIn(conf, new_pool['configuration']) + + def test_pool_create_with_quotas(self): + pools = [ + { + 'pool_data': { + 'pool': 'dashboard_pool_quota1', + 'pg_num': '32', + 'pool_type': 'replicated', + 'application_metadata': ['rbd'], + }, + 'pool_quotas_to_check': { + 'quota_max_objects': 0, + 'quota_max_bytes': 0, + } + }, + { + 'pool_data': { + 'pool': 'dashboard_pool_quota2', + 'pg_num': '32', + 'pool_type': 'replicated', + 'application_metadata': ['rbd'], + 'quota_max_objects': 1024, + 'quota_max_bytes': 1000, + }, + 'pool_quotas_to_check': { + 'quota_max_objects': 1024, + 'quota_max_bytes': 1000, + } + } + ] + + for pool in pools: + pool_name = pool['pool_data']['pool'] + with self.__yield_pool(pool_name, pool['pool_data']): + self._validate_pool_properties(pool['pool_quotas_to_check'], + self._get_pool(pool_name)) + + def test_pool_update_name(self): + name = 'pool_update' + updated_name = 'pool_updated_name' + with self.__yield_pool(name, None, updated_name): + props = {'pool': updated_name} + self._task_put('/api/pool/{}'.format(name), props) + time.sleep(5) + self.assertStatus(200) + self._validate_pool_properties(props, self._get_pool(updated_name)) + + def test_pool_update_metadata(self): + pool_name = 'pool_update_metadata' + with self.__yield_pool(pool_name): + props = {'application_metadata': ['rbd', 'sth']} + self._task_put('/api/pool/{}'.format(pool_name), props) + self._validate_pool_properties(props, self._get_pool(pool_name), + self.TIMEOUT_HEALTH_CLEAR * 2) + + properties = {'application_metadata': ['rgw']} + self._task_put('/api/pool/' + pool_name, properties) + self._validate_pool_properties(properties, self._get_pool(pool_name), + self.TIMEOUT_HEALTH_CLEAR * 2) + + properties = {'application_metadata': ['rbd', 'sth']} + self._task_put('/api/pool/' + pool_name, properties) + self._validate_pool_properties(properties, self._get_pool(pool_name), + self.TIMEOUT_HEALTH_CLEAR * 2) + + properties = {'application_metadata': ['rgw']} + self._task_put('/api/pool/' + pool_name, properties) + self._validate_pool_properties(properties, self._get_pool(pool_name), + self.TIMEOUT_HEALTH_CLEAR * 2) + + def test_pool_update_configuration(self): + pool_name = 'pool_update_configuration' + with self.__yield_pool(pool_name): + configuration = { + 'rbd_qos_bps_limit': 1024, + 'rbd_qos_iops_limit': None, + } + expected_configuration = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 1, + 'value': '1024', + }, { + 'name': 'rbd_qos_iops_limit', + 'source': 0, + 'value': '0', + }] + self._task_put('/api/pool/' + pool_name, {'configuration': configuration}) + time.sleep(5) + pool_config = self._get_pool(pool_name)['configuration'] + for conf in expected_configuration: + self.assertIn(conf, pool_config) + + def test_pool_update_compression(self): + pool_name = 'pool_update_compression' + with self.__yield_pool(pool_name): + properties = { + 'compression_algorithm': 'zstd', + 'compression_mode': 'aggressive', + 'compression_max_blob_size': '10000000', + 'compression_required_ratio': '0.8', + } + self._task_put('/api/pool/' + pool_name, properties) + time.sleep(5) + self._validate_pool_properties(properties, self._get_pool(pool_name)) + + def test_pool_update_unset_compression(self): + pool_name = 'pool_update_unset_compression' + with self.__yield_pool(pool_name): + self._task_put('/api/pool/' + pool_name, {'compression_mode': 'unset'}) + time.sleep(5) + self._validate_pool_properties({ + 'compression_algorithm': None, + 'compression_mode': None, + 'compression_max_blob_size': None, + 'compression_required_ratio': None, + }, self._get_pool(pool_name)) + + def test_pool_update_quotas(self): + pool_name = 'pool_update_quotas' + with self.__yield_pool(pool_name): + properties = { + 'quota_max_objects': 1024, + 'quota_max_bytes': 1000, + } + self._task_put('/api/pool/' + pool_name, properties) + time.sleep(5) + self._validate_pool_properties(properties, self._get_pool(pool_name)) + + def test_pool_create_fail(self): + data = {'pool_type': u'replicated', 'rule_name': u'dnf', 'pg_num': u'8', 'pool': u'sadfs'} + self._task_post('/api/pool/', data) + self.assertStatus(400) + self.assertJsonBody({ + 'component': 'pool', + 'code': "2", + 'detail': "[errno -2] specified rule dnf doesn't exist" + }) + + def test_pool_info(self): + self._get("/ui-api/pool/info") + self.assertSchemaBody(JObj({ + 'pool_names': JList(str), + 'compression_algorithms': JList(str), + 'compression_modes': JList(str), + 'is_all_bluestore': bool, + 'bluestore_compression_algorithm': str, + 'osd_count': int, + 'crush_rules_replicated': JList(JObj({}, allow_unknown=True)), + 'crush_rules_erasure': JList(JObj({}, allow_unknown=True)), + 'pg_autoscale_default_mode': str, + 'pg_autoscale_modes': JList(str), + 'erasure_code_profiles': JList(JObj({}, allow_unknown=True)), + 'used_rules': JObj({}, allow_unknown=True), + 'used_profiles': JObj({}, allow_unknown=True), + 'nodes': JList(JObj({}, allow_unknown=True)), + })) diff --git a/qa/tasks/mgr/dashboard/test_rbd.py b/qa/tasks/mgr/dashboard/test_rbd.py new file mode 100644 index 000000000..c2ffbd48e --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_rbd.py @@ -0,0 +1,978 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-public-methods + +from __future__ import absolute_import + +import time + +from .helper import DashboardTestCase, JLeaf, JList, JObj + + +class RbdTest(DashboardTestCase): + AUTH_ROLES = ['pool-manager', 'block-manager', 'cluster-manager'] + LIST_VERSION = '2.0' + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['create', 'update', 'delete']}]) + def test_read_access_permissions(self): + self._get('/api/block/image?offset=0&limit=-1&search=&sort=+name', + version=RbdTest.LIST_VERSION) + self.assertStatus(403) + self.get_image('pool', None, 'image') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'update', 'delete']}]) + def test_create_access_permissions(self): + self.create_image('pool', None, 'name', 0) + self.assertStatus(403) + self.create_snapshot('pool', None, 'image', 'snapshot', False) + self.assertStatus(403) + self.copy_image('src_pool', None, 'src_image', 'dest_pool', None, 'dest_image') + self.assertStatus(403) + self.clone_image('parent_pool', None, 'parent_image', 'parent_snap', 'pool', None, 'name') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'create', 'delete']}]) + def test_update_access_permissions(self): + self.edit_image('pool', None, 'image') + self.assertStatus(403) + self.update_snapshot('pool', None, 'image', 'snapshot', None, None) + self.assertStatus(403) + self.rollback_snapshot('rbd', None, 'rollback_img', 'snap1') + self.assertStatus(403) + self.flatten_image('pool', None, 'image') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-image': ['read', 'create', 'update']}]) + def test_delete_access_permissions(self): + self.remove_image('pool', None, 'image') + self.assertStatus(403) + self.remove_snapshot('pool', None, 'image', 'snapshot') + self.assertStatus(403) + + @classmethod + def create_namespace(cls, pool, namespace): + data = {'namespace': namespace} + return cls._post('/api/block/pool/{}/namespace'.format(pool), data) + + @classmethod + def remove_namespace(cls, pool, namespace): + return cls._delete('/api/block/pool/{}/namespace/{}'.format(pool, namespace)) + + @classmethod + def create_image(cls, pool, namespace, name, size, **kwargs): + data = {'name': name, 'pool_name': pool, 'namespace': namespace, 'size': size} + data.update(kwargs) + return cls._task_post('/api/block/image', data) + + @classmethod + def get_image(cls, pool, namespace, name): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._get('/api/block/image/{}%2F{}{}'.format(pool, namespace, name)) + + @classmethod + def clone_image(cls, parent_pool, parent_namespace, parent_image, parent_snap, pool, namespace, + name, **kwargs): + # pylint: disable=too-many-arguments + data = {'child_image_name': name, 'child_namespace': namespace, 'child_pool_name': pool} + data.update(kwargs) + parent_namespace = '{}%2F'.format(parent_namespace) if parent_namespace else '' + return cls._task_post('/api/block/image/{}%2F{}{}/snap/{}/clone' + .format(parent_pool, parent_namespace, parent_image, parent_snap), + data) + + @classmethod + def copy_image(cls, src_pool, src_namespace, src_image, dest_pool, dest_namespace, dest_image, + **kwargs): + # pylint: disable=too-many-arguments + data = {'dest_image_name': dest_image, + 'dest_pool_name': dest_pool, + 'dest_namespace': dest_namespace} + data.update(kwargs) + src_namespace = '{}%2F'.format(src_namespace) if src_namespace else '' + return cls._task_post('/api/block/image/{}%2F{}{}/copy' + .format(src_pool, src_namespace, src_image), data) + + @classmethod + def remove_image(cls, pool, namespace, image): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_delete('/api/block/image/{}%2F{}{}'.format(pool, namespace, image)) + + # pylint: disable=too-many-arguments + @classmethod + def edit_image(cls, pool, namespace, image, name=None, size=None, features=None, **kwargs): + kwargs.update({'name': name, 'size': size, 'features': features}) + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_put('/api/block/image/{}%2F{}{}'.format(pool, namespace, image), kwargs) + + @classmethod + def flatten_image(cls, pool, namespace, image): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_post('/api/block/image/{}%2F{}{}/flatten'.format(pool, namespace, image)) + + @classmethod + def create_snapshot(cls, pool, namespace, image, snapshot, mirrorImageSnapshot): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_post('/api/block/image/{}%2F{}{}/snap'.format(pool, namespace, image), + {'snapshot_name': snapshot, 'mirrorImageSnapshot': mirrorImageSnapshot}) # noqa E501 #pylint: disable=line-too-long + + @classmethod + def remove_snapshot(cls, pool, namespace, image, snapshot): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_delete('/api/block/image/{}%2F{}{}/snap/{}'.format(pool, namespace, image, + snapshot)) + + @classmethod + def update_snapshot(cls, pool, namespace, image, snapshot, new_name, is_protected): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_put('/api/block/image/{}%2F{}{}/snap/{}'.format(pool, namespace, image, + snapshot), + {'new_snap_name': new_name, 'is_protected': is_protected}) + + @classmethod + def rollback_snapshot(cls, pool, namespace, image, snapshot): + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_post('/api/block/image/{}%2F{}{}/snap/{}/rollback'.format(pool, + namespace, + image, + snapshot)) + + @classmethod + def setUpClass(cls): + super(RbdTest, cls).setUpClass() + cls.create_pool('rbd', 2**3, 'replicated') + cls.create_pool('rbd_iscsi', 2**3, 'replicated') + + cls.create_image('rbd', None, 'img1', 2**30) + cls.create_image('rbd', None, 'img2', 2*2**30) + cls.create_image('rbd_iscsi', None, 'img1', 2**30) + cls.create_image('rbd_iscsi', None, 'img2', 2*2**30) + + osd_metadata = cls.ceph_cluster.mon_manager.get_osd_metadata() + cls.bluestore_support = True + for osd in osd_metadata: + if osd['osd_objectstore'] != 'bluestore': + cls.bluestore_support = False + break + + @classmethod + def tearDownClass(cls): + super(RbdTest, cls).tearDownClass() + cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it']) + cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd_iscsi', 'rbd_iscsi', + '--yes-i-really-really-mean-it']) + cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd_data', 'rbd_data', + '--yes-i-really-really-mean-it']) + + def create_image_in_trash(self, pool, name, delay=0): + self.create_image(pool, None, name, 10240) + img = self._get('/api/block/image/{}%2F{}'.format(pool, name)) + + self._task_post("/api/block/image/{}%2F{}/move_trash".format(pool, name), + {'delay': delay}) + self.assertStatus([200, 201]) + return img['id'] + + @classmethod + def remove_trash(cls, pool, image_id, force=False): + return cls._task_delete('/api/block/image/trash/{}%2F{}/?force={}'.format( + pool, image_id, force)) + + @classmethod + def restore_trash(cls, pool, namespace, image_id, new_image_name): + data = {'new_image_name': new_image_name} + namespace = '{}%2F'.format(namespace) if namespace else '' + return cls._task_post('/api/block/image/trash/{}%2F{}{}/restore'.format(pool, + namespace, + image_id), data) + + @classmethod + def purge_trash(cls, pool): + return cls._task_post('/api/block/image/trash/purge?pool_name={}'.format(pool)) + + @classmethod + def get_trash(cls, pool, image_id): + trash = cls._get('/api/block/image/trash/?pool_name={}'.format(pool)) + if isinstance(trash, list): + for trash_pool in trash: + for image in trash_pool['value']: + if image['id'] == image_id: + return image + + return None + + def _validate_image(self, img, **kwargs): + """ + Example of an RBD image json: + + { + "size": 1073741824, + "obj_size": 4194304, + "mirror_mode": "journal", + "num_objs": 256, + "order": 22, + "block_name_prefix": "rbd_data.10ae2ae8944a", + "name": "img1", + "pool_name": "rbd", + "features": 61, + "primary": true, + "features_name": ["deep-flatten", "exclusive-lock", "fast-diff", "layering", + "object-map"] + } + """ + schema = JObj(sub_elems={ + 'size': JLeaf(int), + 'obj_size': JLeaf(int), + 'num_objs': JLeaf(int), + 'order': JLeaf(int), + 'block_name_prefix': JLeaf(str), + 'name': JLeaf(str), + 'id': JLeaf(str), + 'unique_id': JLeaf(str), + 'image_format': JLeaf(int), + 'pool_name': JLeaf(str), + 'namespace': JLeaf(str, none=True), + 'primary': JLeaf(bool, none=True), + 'features': JLeaf(int), + 'features_name': JList(JLeaf(str)), + 'stripe_count': JLeaf(int, none=True), + 'stripe_unit': JLeaf(int, none=True), + 'parent': JObj(sub_elems={'pool_name': JLeaf(str), + 'pool_namespace': JLeaf(str, none=True), + 'image_name': JLeaf(str), + 'snap_name': JLeaf(str)}, none=True), + 'data_pool': JLeaf(str, none=True), + 'snapshots': JList(JLeaf(dict)), + 'timestamp': JLeaf(str, none=True), + 'disk_usage': JLeaf(int, none=True), + 'total_disk_usage': JLeaf(int, none=True), + 'configuration': JList(JObj(sub_elems={ + 'name': JLeaf(str), + 'source': JLeaf(int), + 'value': JLeaf(str), + })), + 'metadata': JObj({}, allow_unknown=True), + 'mirror_mode': JLeaf(str), + }) + self.assertSchema(img, schema) + + for k, v in kwargs.items(): + if isinstance(v, list): + self.assertSetEqual(set(img[k]), set(v)) + else: + self.assertEqual(img[k], v) + + def _validate_snapshot(self, snap, **kwargs): + self.assertIn('id', snap) + self.assertIn('name', snap) + self.assertIn('is_protected', snap) + self.assertIn('timestamp', snap) + self.assertIn('size', snap) + self.assertIn('children', snap) + + for k, v in kwargs.items(): + if isinstance(v, list): + self.assertSetEqual(set(snap[k]), set(v)) + else: + self.assertEqual(snap[k], v) + + def _validate_snapshot_list(self, snap_list, snap_name=None, **kwargs): + found = False + for snap in snap_list: + self.assertIn('name', snap) + if snap_name and snap['name'] == snap_name: + found = True + self._validate_snapshot(snap, **kwargs) + break + if snap_name and not found: + self.fail("Snapshot {} not found".format(snap_name)) + + def test_list(self): + data = self._get('/api/block/image?offset=0&limit=-1&search=&sort=+name', + version=RbdTest.LIST_VERSION) + self.assertStatus(200) + self.assertEqual(len(data), 2) + + for pool_view in data: + self.assertIsNotNone(pool_view['value']) + self.assertIn('pool_name', pool_view) + self.assertIn(pool_view['pool_name'], ['rbd', 'rbd_iscsi']) + image_list = pool_view['value'] + self.assertEqual(len(image_list), 2) + + for img in image_list: + self.assertIn('name', img) + self.assertIn('pool_name', img) + self.assertIn(img['pool_name'], ['rbd', 'rbd_iscsi']) + if img['name'] == 'img1': + self._validate_image(img, size=1073741824, + num_objs=256, obj_size=4194304, + features_name=['deep-flatten', + 'exclusive-lock', + 'fast-diff', + 'layering', + 'object-map']) + elif img['name'] == 'img2': + self._validate_image(img, size=2147483648, + num_objs=512, obj_size=4194304, + features_name=['deep-flatten', + 'exclusive-lock', + 'fast-diff', + 'layering', + 'object-map']) + else: + assert False, "Unexcepted image '{}' in result list".format(img['name']) + + def test_create(self): + rbd_name = 'test_rbd' + self.create_image('rbd', None, rbd_name, 10240) + self.assertStatus(201) + + img = self.get_image('rbd', None, 'test_rbd') + self.assertStatus(200) + + self._validate_image(img, name=rbd_name, size=10240, + num_objs=1, obj_size=4194304, + features_name=['deep-flatten', + 'exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + + self.remove_image('rbd', None, rbd_name) + + def test_create_with_configuration(self): + pool = 'rbd' + image_name = 'image_with_config' + size = 10240 + configuration = { + 'rbd_qos_bps_limit': 10240, + 'rbd_qos_bps_burst': 10240 * 2, + } + expected = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 2, + 'value': str(10240), + }, { + 'name': 'rbd_qos_bps_burst', + 'source': 2, + 'value': str(10240 * 2), + }] + + self.create_image(pool, None, image_name, size, configuration=configuration) + self.assertStatus(201) + img = self.get_image('rbd', None, image_name) + self.assertStatus(200) + for conf in expected: + self.assertIn(conf, img['configuration']) + + self.remove_image(pool, None, image_name) + + def test_create_with_metadata(self): + pool = 'rbd' + image_name = 'image_with_meta' + size = 10240 + metadata = { + 'test1': 'test', + 'test2': 'value', + } + + self.create_image(pool, None, image_name, size, metadata=metadata) + self.assertStatus(201) + img = self.get_image('rbd', None, image_name) + self.assertStatus(200) + self.assertEqual(len(metadata), len(img['metadata'])) + for meta in metadata: + self.assertIn(meta, img['metadata']) + + self.remove_image(pool, None, image_name) + + def test_create_rbd_in_data_pool(self): + if not self.bluestore_support: + self.skipTest('requires bluestore cluster') + + self.create_pool('data_pool', 2**4, 'erasure') + + rbd_name = 'test_rbd_in_data_pool' + self.create_image('rbd', None, rbd_name, 10240, data_pool='data_pool') + self.assertStatus(201) + + img = self.get_image('rbd', None, 'test_rbd_in_data_pool') + self.assertStatus(200) + + self._validate_image(img, name=rbd_name, size=10240, + num_objs=1, obj_size=4194304, + data_pool='data_pool', + features_name=['data-pool', 'deep-flatten', + 'exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + + self.remove_image('rbd', None, rbd_name) + self.assertStatus(204) + self._ceph_cmd(['osd', 'pool', 'delete', 'data_pool', 'data_pool', + '--yes-i-really-really-mean-it']) + + def test_create_rbd_twice(self): + res = self.create_image('rbd', None, 'test_rbd_twice', 10240) + + res = self.create_image('rbd', None, 'test_rbd_twice', 10240) + self.assertStatus(400) + self.assertEqual(res, {"code": '17', 'status': 400, "component": "rbd", + "detail": "[errno 17] RBD image already exists (error creating " + "image)", + 'task': {'name': 'rbd/create', + 'metadata': {'pool_name': 'rbd', 'namespace': None, + 'image_name': 'test_rbd_twice'}}}) + self.remove_image('rbd', None, 'test_rbd_twice') + self.assertStatus(204) + + def test_snapshots_and_clone_info(self): + self.create_snapshot('rbd', None, 'img1', 'snap1', False) + self.create_snapshot('rbd', None, 'img1', 'snap2', False) + self._rbd_cmd(['snap', 'protect', 'rbd/img1@snap1']) + self._rbd_cmd(['clone', 'rbd/img1@snap1', 'rbd_iscsi/img1_clone']) + + img = self.get_image('rbd', None, 'img1') + self.assertStatus(200) + self._validate_image(img, name='img1', size=1073741824, + num_objs=256, obj_size=4194304, parent=None, + features_name=['deep-flatten', 'exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + for snap in img['snapshots']: + if snap['name'] == 'snap1': + self._validate_snapshot(snap, is_protected=True) + self.assertEqual(len(snap['children']), 1) + self.assertDictEqual(snap['children'][0], + {'pool_name': 'rbd_iscsi', + 'image_name': 'img1_clone'}) + elif snap['name'] == 'snap2': + self._validate_snapshot(snap, is_protected=False) + + img = self.get_image('rbd_iscsi', None, 'img1_clone') + self.assertStatus(200) + self._validate_image(img, name='img1_clone', size=1073741824, + num_objs=256, obj_size=4194304, + parent={'pool_name': 'rbd', 'pool_namespace': '', + 'image_name': 'img1', 'snap_name': 'snap1'}, + features_name=['deep-flatten', 'exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + self.remove_image('rbd_iscsi', None, 'img1_clone') + self.assertStatus(204) + + def test_disk_usage(self): + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '50M', 'rbd/img2']) + self.create_snapshot('rbd', None, 'img2', 'snap1', False) + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '20M', 'rbd/img2']) + self.create_snapshot('rbd', None, 'img2', 'snap2', False) + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '10M', 'rbd/img2']) + self.create_snapshot('rbd', None, 'img2', 'snap3', False) + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M', 'rbd/img2']) + img = self.get_image('rbd', None, 'img2') + self.assertStatus(200) + self._validate_image(img, name='img2', size=2147483648, + total_disk_usage=268435456, disk_usage=67108864) + + def test_delete_non_existent_image(self): + res = self.remove_image('rbd', None, 'i_dont_exist') + self.assertStatus(404) + self.assertEqual(res, {u'code': 404, "status": 404, "component": None, + "detail": "(404, 'Image not found')", + 'task': {'name': 'rbd/delete', + 'metadata': {'image_spec': 'rbd/i_dont_exist'}}}) + + def test_image_delete(self): + self.create_image('rbd', None, 'delete_me', 2**30) + self.assertStatus(201) + self.create_snapshot('rbd', None, 'delete_me', 'snap1', False) + self.assertStatus(201) + self.create_snapshot('rbd', None, 'delete_me', 'snap2', False) + self.assertStatus(201) + + img = self.get_image('rbd', None, 'delete_me') + self.assertStatus(200) + self._validate_image(img, name='delete_me', size=2**30) + self.assertEqual(len(img['snapshots']), 2) + + self.remove_snapshot('rbd', None, 'delete_me', 'snap1') + self.assertStatus(204) + self.remove_snapshot('rbd', None, 'delete_me', 'snap2') + self.assertStatus(204) + + img = self.get_image('rbd', None, 'delete_me') + self.assertStatus(200) + self._validate_image(img, name='delete_me', size=2**30) + self.assertEqual(len(img['snapshots']), 0) + + self.remove_image('rbd', None, 'delete_me') + self.assertStatus(204) + + def test_image_delete_with_snapshot(self): + self.create_image('rbd', None, 'delete_me', 2**30) + self.assertStatus(201) + self.create_snapshot('rbd', None, 'delete_me', 'snap1', False) + self.assertStatus(201) + self.create_snapshot('rbd', None, 'delete_me', 'snap2', False) + self.assertStatus(201) + + img = self.get_image('rbd', None, 'delete_me') + self.assertStatus(200) + self._validate_image(img, name='delete_me', size=2**30) + self.assertEqual(len(img['snapshots']), 2) + + self.remove_image('rbd', None, 'delete_me') + self.assertStatus(204) + + def test_image_rename(self): + self.create_image('rbd', None, 'edit_img', 2**30) + self.assertStatus(201) + self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self.edit_image('rbd', None, 'edit_img', 'new_edit_img') + self.assertStatus(200) + self.get_image('rbd', None, 'edit_img') + self.assertStatus(404) + self.get_image('rbd', None, 'new_edit_img') + self.assertStatus(200) + self.remove_image('rbd', None, 'new_edit_img') + self.assertStatus(204) + + def test_image_resize(self): + self.create_image('rbd', None, 'edit_img', 2**30) + self.assertStatus(201) + img = self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self._validate_image(img, size=2**30) + self.edit_image('rbd', None, 'edit_img', size=2*2**30) + self.assertStatus(200) + img = self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self._validate_image(img, size=2*2**30) + self.remove_image('rbd', None, 'edit_img') + self.assertStatus(204) + + def test_image_change_features(self): + self.create_image('rbd', None, 'edit_img', 2**30, features=["layering"]) + self.assertStatus(201) + img = self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self._validate_image(img, features_name=["layering"]) + self.edit_image('rbd', None, 'edit_img', + features=["fast-diff", "object-map", "exclusive-lock"]) + self.assertStatus(200) + img = self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self._validate_image(img, features_name=['exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + self.edit_image('rbd', None, 'edit_img', + features=["journaling", "exclusive-lock"]) + self.assertStatus(200) + img = self.get_image('rbd', None, 'edit_img') + self.assertStatus(200) + self._validate_image(img, features_name=['exclusive-lock', + 'journaling', 'layering']) + self.remove_image('rbd', None, 'edit_img') + self.assertStatus(204) + + def test_image_change_config(self): + pool = 'rbd' + image = 'image_with_config' + initial_conf = { + 'rbd_qos_bps_limit': 10240, + 'rbd_qos_write_iops_limit': None + } + initial_expect = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 2, + 'value': '10240', + }, { + 'name': 'rbd_qos_write_iops_limit', + 'source': 0, + 'value': '0', + }] + new_conf = { + 'rbd_qos_bps_limit': 0, + 'rbd_qos_bps_burst': 20480, + 'rbd_qos_write_iops_limit': None + } + new_expect = [{ + 'name': 'rbd_qos_bps_limit', + 'source': 2, + 'value': '0', + }, { + 'name': 'rbd_qos_bps_burst', + 'source': 2, + 'value': '20480', + }, { + 'name': 'rbd_qos_write_iops_limit', + 'source': 0, + 'value': '0', + }] + + self.create_image(pool, None, image, 2**30, configuration=initial_conf) + self.assertStatus(201) + img = self.get_image(pool, None, image) + self.assertStatus(200) + for conf in initial_expect: + self.assertIn(conf, img['configuration']) + + self.edit_image(pool, None, image, configuration=new_conf) + img = self.get_image(pool, None, image) + self.assertStatus(200) + for conf in new_expect: + self.assertIn(conf, img['configuration']) + + self.remove_image(pool, None, image) + self.assertStatus(204) + + def test_image_change_meta(self): + pool = 'rbd' + image = 'image_with_meta' + initial_meta = { + 'test1': 'test', + 'test2': 'value', + 'test3': None, + } + initial_expect = { + 'test1': 'test', + 'test2': 'value', + } + new_meta = { + 'test1': None, + 'test2': 'new_value', + 'test3': 'value', + 'test4': None, + } + new_expect = { + 'test2': 'new_value', + 'test3': 'value', + } + + self.create_image(pool, None, image, 2**30, metadata=initial_meta) + self.assertStatus(201) + img = self.get_image(pool, None, image) + self.assertStatus(200) + self.assertEqual(len(initial_expect), len(img['metadata'])) + for meta in initial_expect: + self.assertIn(meta, img['metadata']) + + self.edit_image(pool, None, image, metadata=new_meta) + img = self.get_image(pool, None, image) + self.assertStatus(200) + self.assertEqual(len(new_expect), len(img['metadata'])) + for meta in new_expect: + self.assertIn(meta, img['metadata']) + + self.remove_image(pool, None, image) + self.assertStatus(204) + + def test_update_snapshot(self): + self.create_snapshot('rbd', None, 'img1', 'snap5', False) + self.assertStatus(201) + img = self.get_image('rbd', None, 'img1') + self._validate_snapshot_list(img['snapshots'], 'snap5', is_protected=False) + + self.update_snapshot('rbd', None, 'img1', 'snap5', 'snap6', None) + self.assertStatus(200) + img = self.get_image('rbd', None, 'img1') + self._validate_snapshot_list(img['snapshots'], 'snap6', is_protected=False) + + self.update_snapshot('rbd', None, 'img1', 'snap6', None, True) + self.assertStatus(200) + img = self.get_image('rbd', None, 'img1') + self._validate_snapshot_list(img['snapshots'], 'snap6', is_protected=True) + + self.update_snapshot('rbd', None, 'img1', 'snap6', 'snap5', False) + self.assertStatus(200) + img = self.get_image('rbd', None, 'img1') + self._validate_snapshot_list(img['snapshots'], 'snap5', is_protected=False) + + self.remove_snapshot('rbd', None, 'img1', 'snap5') + self.assertStatus(204) + + def test_snapshot_rollback(self): + self.create_image('rbd', None, 'rollback_img', 2**30, + features=["layering", "exclusive-lock", "fast-diff", + "object-map"]) + self.assertStatus(201) + self.create_snapshot('rbd', None, 'rollback_img', 'snap1', False) + self.assertStatus(201) + + img = self.get_image('rbd', None, 'rollback_img') + self.assertStatus(200) + self.assertEqual(img['disk_usage'], 0) + + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M', + 'rbd/rollback_img']) + + img = self.get_image('rbd', None, 'rollback_img') + self.assertStatus(200) + self.assertGreater(img['disk_usage'], 0) + + self.rollback_snapshot('rbd', None, 'rollback_img', 'snap1') + self.assertStatus([201, 200]) + + img = self.get_image('rbd', None, 'rollback_img') + self.assertStatus(200) + self.assertEqual(img['disk_usage'], 0) + + self.remove_snapshot('rbd', None, 'rollback_img', 'snap1') + self.assertStatus(204) + self.remove_image('rbd', None, 'rollback_img') + self.assertStatus(204) + + def test_clone(self): + self.create_image('rbd', None, 'cimg', 2**30, features=["layering"], + metadata={'key1': 'val1'}) + self.assertStatus(201) + self.create_snapshot('rbd', None, 'cimg', 'snap1', False) + self.assertStatus(201) + self.update_snapshot('rbd', None, 'cimg', 'snap1', None, True) + self.assertStatus(200) + self.clone_image('rbd', None, 'cimg', 'snap1', 'rbd', None, 'cimg-clone', + features=["layering", "exclusive-lock", "fast-diff", + "object-map"], + metadata={'key1': None, 'key2': 'val2'}) + self.assertStatus([200, 201]) + + img = self.get_image('rbd', None, 'cimg-clone') + self.assertStatus(200) + self._validate_image(img, features_name=['exclusive-lock', + 'fast-diff', 'layering', + 'object-map'], + parent={'pool_name': 'rbd', 'pool_namespace': '', + 'image_name': 'cimg', 'snap_name': 'snap1'}, + metadata={'key2': 'val2'}) + + res = self.remove_image('rbd', None, 'cimg') + self.assertStatus(400) + self.assertIn('code', res) + self.assertEqual(res['code'], '16') + + self.remove_image('rbd', None, 'cimg-clone') + self.assertStatus(204) + self.remove_image('rbd', None, 'cimg') + self.assertStatus(204) + + def test_copy(self): + self.create_image('rbd', None, 'coimg', 2**30, + features=["layering", "exclusive-lock", "fast-diff", + "object-map"], + metadata={'key1': 'val1'}) + self.assertStatus(201) + + self._rbd_cmd(['bench', '--io-type', 'write', '--io-total', '5M', + 'rbd/coimg']) + + self.copy_image('rbd', None, 'coimg', 'rbd_iscsi', None, 'coimg-copy', + features=["layering", "fast-diff", "exclusive-lock", + "object-map"], + metadata={'key1': None, 'key2': 'val2'}) + self.assertStatus([200, 201]) + + img = self.get_image('rbd', None, 'coimg') + self.assertStatus(200) + self._validate_image(img, features_name=['layering', 'exclusive-lock', + 'fast-diff', 'object-map'], + metadata={'key1': 'val1'}) + + img_copy = self.get_image('rbd_iscsi', None, 'coimg-copy') + self._validate_image(img_copy, features_name=['exclusive-lock', + 'fast-diff', 'layering', + 'object-map'], + metadata={'key2': 'val2'}, + disk_usage=img['disk_usage']) + + self.remove_image('rbd', None, 'coimg') + self.assertStatus(204) + self.remove_image('rbd_iscsi', None, 'coimg-copy') + self.assertStatus(204) + + def test_flatten(self): + self.create_snapshot('rbd', None, 'img1', 'snapf', False) + self.update_snapshot('rbd', None, 'img1', 'snapf', None, True) + self.clone_image('rbd', None, 'img1', 'snapf', 'rbd_iscsi', None, 'img1_snapf_clone') + + img = self.get_image('rbd_iscsi', None, 'img1_snapf_clone') + self.assertStatus(200) + self.assertIsNotNone(img['parent']) + + self.flatten_image('rbd_iscsi', None, 'img1_snapf_clone') + self.assertStatus([200, 201]) + + img = self.get_image('rbd_iscsi', None, 'img1_snapf_clone') + self.assertStatus(200) + self.assertIsNone(img['parent']) + + self.update_snapshot('rbd', None, 'img1', 'snapf', None, False) + self.remove_snapshot('rbd', None, 'img1', 'snapf') + self.assertStatus(204) + + self.remove_image('rbd_iscsi', None, 'img1_snapf_clone') + self.assertStatus(204) + + def test_default_features(self): + default_features = self._get('/api/block/image/default_features') + self.assertEqual(default_features, [ + 'deep-flatten', 'exclusive-lock', 'fast-diff', 'layering', 'object-map']) + + def test_clone_format_version(self): + config_name = 'rbd_default_clone_format' + + def _get_config_by_name(conf_name): + data = self._get('/api/cluster_conf/{}'.format(conf_name)) + if 'value' in data: + return data['value'] + return None + + # with rbd_default_clone_format = auto + clone_format_version = self._get('/api/block/image/clone_format_version') + self.assertEqual(clone_format_version, 1) + self.assertStatus(200) + + # with rbd_default_clone_format = 1 + value = [{'section': "global", 'value': "1"}] + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': value + }) + self.wait_until_equal( + lambda: _get_config_by_name(config_name), + value, + timeout=60) + clone_format_version = self._get('/api/block/image/clone_format_version') + self.assertEqual(clone_format_version, 1) + self.assertStatus(200) + + # with rbd_default_clone_format = 2 + value = [{'section': "global", 'value': "2"}] + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': value + }) + self.wait_until_equal( + lambda: _get_config_by_name(config_name), + value, + timeout=60) + clone_format_version = self._get('/api/block/image/clone_format_version') + self.assertEqual(clone_format_version, 2) + self.assertStatus(200) + + value = [] + self._post('/api/cluster_conf', { + 'name': config_name, + 'value': value + }) + self.wait_until_equal( + lambda: _get_config_by_name(config_name), + None, + timeout=60) + + def test_image_with_namespace(self): + self.create_namespace('rbd', 'ns') + self.create_image('rbd', 'ns', 'test', 10240) + self.assertStatus(201) + + img = self.get_image('rbd', 'ns', 'test') + self.assertStatus(200) + + self._validate_image(img, name='test', size=10240, + pool_name='rbd', namespace='ns', + num_objs=1, obj_size=4194304, + features_name=['deep-flatten', + 'exclusive-lock', + 'fast-diff', 'layering', + 'object-map']) + + self.remove_image('rbd', 'ns', 'test') + self.remove_namespace('rbd', 'ns') + + def test_move_image_to_trash(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd') + + self.get_image('rbd', None, 'test_rbd') + self.assertStatus(404) + + time.sleep(1) + + image = self.get_trash('rbd', img_id) + self.assertIsNotNone(image) + + self.remove_trash('rbd', img_id) + + def test_list_trash(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd', 0) + data = self._get('/api/block/image/trash/?pool_name={}'.format('rbd')) + self.assertStatus(200) + self.assertIsInstance(data, list) + self.assertIsNotNone(data) + + self.remove_trash('rbd', img_id) + self.assertStatus(204) + + def test_restore_trash(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd') + + self.restore_trash('rbd', None, img_id, 'test_rbd') + + self.get_image('rbd', None, 'test_rbd') + self.assertStatus(200) + + image = self.get_trash('rbd', img_id) + self.assertIsNone(image) + + self.remove_image('rbd', None, 'test_rbd') + + def test_remove_expired_trash(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd', 0) + self.remove_trash('rbd', img_id, False) + self.assertStatus(204) + + image = self.get_trash('rbd', img_id) + self.assertIsNone(image) + + def test_remove_not_expired_trash(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd', 9999) + self.remove_trash('rbd', img_id, False) + self.assertStatus(400) + + time.sleep(1) + + image = self.get_trash('rbd', img_id) + self.assertIsNotNone(image) + + self.remove_trash('rbd', img_id, True) + + def test_remove_not_expired_trash_with_force(self): + img_id = self.create_image_in_trash('rbd', 'test_rbd', 9999) + self.remove_trash('rbd', img_id, True) + self.assertStatus(204) + + image = self.get_trash('rbd', img_id) + self.assertIsNone(image) + + def test_purge_trash(self): + id_expired = self.create_image_in_trash('rbd', 'test_rbd_expired', 0) + id_not_expired = self.create_image_in_trash('rbd', 'test_rbd', 9999) + + time.sleep(1) + + self.purge_trash('rbd') + self.assertStatus([200, 201]) + + time.sleep(1) + + trash_not_expired = self.get_trash('rbd', id_not_expired) + self.assertIsNotNone(trash_not_expired) + + self.wait_until_equal(lambda: self.get_trash('rbd', id_expired), None, 60) + + def test_list_namespaces(self): + self.create_namespace('rbd', 'ns') + + namespaces = self._get('/api/block/pool/rbd/namespace') + self.assertStatus(200) + self.assertEqual(len(namespaces), 1) + + self.remove_namespace('rbd', 'ns') diff --git a/qa/tasks/mgr/dashboard/test_rbd_mirroring.py b/qa/tasks/mgr/dashboard/test_rbd_mirroring.py new file mode 100644 index 000000000..b6a86e405 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_rbd_mirroring.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-public-methods + +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class RbdMirroringTest(DashboardTestCase): + AUTH_ROLES = ['pool-manager', 'block-manager'] + + @classmethod + def get_pool(cls, pool): + data = cls._get('/api/block/mirroring/pool/{}'.format(pool)) + if isinstance(data, dict): + return data + return {} + + @classmethod + def update_pool(cls, pool, mirror_mode): + data = {'mirror_mode': mirror_mode} + return cls._task_put('/api/block/mirroring/pool/{}'.format(pool), + data) + + @classmethod + def list_peers(cls, pool): + data = cls._get('/api/block/mirroring/pool/{}/peer'.format(pool)) + if isinstance(data, list): + return data + return [] + + @classmethod + def get_peer(cls, pool, peer_uuid): + data = cls._get('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid)) + if isinstance(data, dict): + return data + return {} + + @classmethod + def create_peer(cls, pool, cluster_name, client_id, **kwargs): + data = {'cluster_name': cluster_name, 'client_id': client_id} + data.update(kwargs) + return cls._task_post('/api/block/mirroring/pool/{}/peer'.format(pool), + data) + + @classmethod + def update_peer(cls, pool, peer_uuid, **kwargs): + return cls._task_put('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid), + kwargs) + + @classmethod + def delete_peer(cls, pool, peer_uuid): + return cls._task_delete('/api/block/mirroring/pool/{}/peer/{}'.format(pool, peer_uuid)) + + @classmethod + def setUpClass(cls): + super(RbdMirroringTest, cls).setUpClass() + cls.create_pool('rbd', 2**3, 'replicated') + + @classmethod + def tearDownClass(cls): + super(RbdMirroringTest, cls).tearDownClass() + cls._ceph_cmd(['osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it']) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['create', 'update', 'delete']}]) + def test_read_access_permissions(self): + self.get_pool('rbd') + self.assertStatus(403) + self.list_peers('rbd') + self.assertStatus(403) + self.get_peer('rbd', '123') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'update', 'delete']}]) + def test_create_access_permissions(self): + self.create_peer('rbd', 'remote', 'id') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'create', 'delete']}]) + def test_update_access_permissions(self): + self.update_peer('rbd', '123') + self.assertStatus(403) + + @DashboardTestCase.RunAs('test', 'test', [{'rbd-mirroring': ['read', 'create', 'update']}]) + def test_delete_access_permissions(self): + self.delete_peer('rbd', '123') + self.assertStatus(403) + + def test_mirror_mode(self): + self.update_pool('rbd', 'disabled') + mode = self.get_pool('rbd').get('mirror_mode') + self.assertEqual(mode, 'disabled') + + self.update_pool('rbd', 'image') + mode = self.get_pool('rbd').get('mirror_mode') + self.assertEqual(mode, 'image') + + self.update_pool('rbd', 'pool') + mode = self.get_pool('rbd').get('mirror_mode') + self.assertEqual(mode, 'pool') + + self.update_pool('rbd', 'disabled') + mode = self.get_pool('rbd').get('mirror_mode') + self.assertEqual(mode, 'disabled') + + def test_set_invalid_mirror_mode(self): + self.update_pool('rbd', 'invalid') + self.assertStatus(400) + + def test_set_same_mirror_mode(self): + self.update_pool('rbd', 'disabled') + self.update_pool('rbd', 'disabled') + self.assertStatus(200) + + def test_peer(self): + self.update_pool('rbd', 'image') + self.assertStatus(200) + + peers = self.list_peers('rbd') + self.assertStatus(200) + self.assertEqual([], peers) + + uuid = self.create_peer('rbd', 'remote', 'admin')['uuid'] + self.assertStatus(201) + + peers = self.list_peers('rbd') + self.assertStatus(200) + self.assertEqual([uuid], peers) + + expected_peer = { + 'uuid': uuid, + 'cluster_name': 'remote', + 'site_name': 'remote', + 'client_id': 'admin', + 'mon_host': '', + 'key': '', + 'direction': 'rx-tx', + 'mirror_uuid': '' + } + peer = self.get_peer('rbd', uuid) + self.assertEqual(expected_peer, peer) + + self.update_peer('rbd', uuid, mon_host='1.2.3.4') + self.assertStatus(200) + + expected_peer['mon_host'] = '1.2.3.4' + peer = self.get_peer('rbd', uuid) + self.assertEqual(expected_peer, peer) + + self.delete_peer('rbd', uuid) + self.assertStatus(204) + + self.update_pool('rbd', 'disabled') + self.assertStatus(200) + + def test_disable_mirror_with_peers(self): + self.update_pool('rbd', 'image') + self.assertStatus(200) + + uuid = self.create_peer('rbd', 'remote', 'admin')['uuid'] + self.assertStatus(201) + + self.update_pool('rbd', 'disabled') + self.assertStatus(400) + + self.delete_peer('rbd', uuid) + self.assertStatus(204) + + self.update_pool('rbd', 'disabled') + self.assertStatus(200) + + def test_site_name(self): + expected_site_name = {'site_name': 'site-a'} + self._task_put('/api/block/mirroring/site_name', expected_site_name) + self.assertStatus(200) + + site_name = self._get('/api/block/mirroring/site_name') + self.assertStatus(200) + self.assertEqual(expected_site_name, site_name) + + def test_bootstrap(self): + self.update_pool('rbd', 'image') + token_data = self._task_post('/api/block/mirroring/pool/rbd/bootstrap/token', {}) + self.assertStatus(200) + + import_data = { + 'token': token_data['token'], + 'direction': 'invalid'} + self._task_post('/api/block/mirroring/pool/rbd/bootstrap/peer', import_data) + self.assertStatus(400) + + # cannot import "youself" as peer + import_data['direction'] = 'rx' + self._task_post('/api/block/mirroring/pool/rbd/bootstrap/peer', import_data) + self.assertStatus(400) diff --git a/qa/tasks/mgr/dashboard/test_requests.py b/qa/tasks/mgr/dashboard/test_requests.py new file mode 100644 index 000000000..834ba174a --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_requests.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from . import DEFAULT_API_VERSION +from .helper import DashboardTestCase + + +class RequestsTest(DashboardTestCase): + def test_gzip(self): + self._get('/api/summary') + self.assertHeaders({ + 'Content-Encoding': 'gzip', + 'Content-Type': 'application/vnd.ceph.api.v{}+json'.format(DEFAULT_API_VERSION) + }) + + def test_force_no_gzip(self): + self._get('/api/summary', headers={'Accept-Encoding': 'identity'}) + self.assertNotIn('Content-Encoding', self._resp.headers) + self.assertHeaders({ + 'Content-Type': 'application/vnd.ceph.api.v{}+json'.format(DEFAULT_API_VERSION) + }) + + def test_server(self): + self._get('/api/summary') + self.assertHeaders({ + 'server': 'Ceph-Dashboard', + 'Content-Type': 'application/vnd.ceph.api.v{}+json'.format(DEFAULT_API_VERSION), + 'Content-Security-Policy': "frame-ancestors 'self';", + 'X-Content-Type-Options': 'nosniff', + 'Strict-Transport-Security': 'max-age=63072000; includeSubDomains; preload' + }) diff --git a/qa/tasks/mgr/dashboard/test_rgw.py b/qa/tasks/mgr/dashboard/test_rgw.py new file mode 100644 index 000000000..01dbae59f --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_rgw.py @@ -0,0 +1,868 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import base64 +import logging +import time +from urllib import parse + +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.hashes import SHA1 +from cryptography.hazmat.primitives.twofactor.totp import TOTP + +from .helper import DashboardTestCase, JLeaf, JList, JObj + +logger = logging.getLogger(__name__) + + +class RgwTestCase(DashboardTestCase): + + maxDiff = None + create_test_user = False + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + super(RgwTestCase, cls).setUpClass() + # Create the administrator account. + cls._radosgw_admin_cmd([ + 'user', 'create', '--uid', 'admin', '--display-name', 'admin', + '--system', '--access-key', 'admin', '--secret', 'admin' + ]) + # Update the dashboard configuration. + cls._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'admin') + cls._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'admin') + # Create a test user? + if cls.create_test_user: + cls._radosgw_admin_cmd([ + 'user', 'create', '--uid', 'teuth-test-user', '--display-name', + 'teuth-test-user' + ]) + cls._radosgw_admin_cmd([ + 'caps', 'add', '--uid', 'teuth-test-user', '--caps', + 'metadata=write' + ]) + cls._radosgw_admin_cmd([ + 'subuser', 'create', '--uid', 'teuth-test-user', '--subuser', + 'teuth-test-subuser', '--access', 'full', '--key-type', 's3', + '--access-key', 'xyz123' + ]) + cls._radosgw_admin_cmd([ + 'subuser', 'create', '--uid', 'teuth-test-user', '--subuser', + 'teuth-test-subuser2', '--access', 'full', '--key-type', + 'swift' + ]) + + @classmethod + def tearDownClass(cls): + # Delete administrator account. + cls._radosgw_admin_cmd(['user', 'rm', '--uid', 'admin']) + if cls.create_test_user: + cls._radosgw_admin_cmd(['user', 'rm', '--uid=teuth-test-user', '--purge-data']) + super(RgwTestCase, cls).tearDownClass() + + def get_rgw_user(self, uid, stats=True): + return self._get('/api/rgw/user/{}?stats={}'.format(uid, stats)) + + +class RgwApiCredentialsTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + def test_invalid_credentials(self): + self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'invalid') + self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'invalid') + resp = self._get('/api/rgw/user') + self.assertStatus(404) + self.assertIn('detail', resp) + self.assertIn('component', resp) + self.assertIn('Error connecting to Object Gateway', resp['detail']) + self.assertEqual(resp['component'], 'rgw') + + def test_success(self): + # Set the default credentials. + self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-secret-key'], 'admin') + self._ceph_cmd_with_secret(['dashboard', 'set-rgw-api-access-key'], 'admin') + data = self._get('/ui-api/rgw/status') + self.assertStatus(200) + self.assertIn('available', data) + self.assertIn('message', data) + self.assertTrue(data['available']) + + +class RgwSiteTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + def test_get_placement_targets(self): + data = self._get('/api/rgw/site?query=placement-targets') + self.assertStatus(200) + self.assertSchema(data, JObj({ + 'zonegroup': str, + 'placement_targets': JList(JObj({ + 'name': str, + 'data_pool': str + })) + })) + + def test_get_realms(self): + data = self._get('/api/rgw/site?query=realms') + self.assertStatus(200) + self.assertSchema(data, JList(str)) + + +class RgwBucketTest(RgwTestCase): + + _mfa_token_serial = '1' + _mfa_token_seed = '23456723' + _mfa_token_time_step = 2 + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + cls.create_test_user = True + super(RgwBucketTest, cls).setUpClass() + # Create MFA TOTP token for test user. + cls._radosgw_admin_cmd([ + 'mfa', 'create', '--uid', 'teuth-test-user', '--totp-serial', cls._mfa_token_serial, + '--totp-seed', cls._mfa_token_seed, '--totp-seed-type', 'base32', + '--totp-seconds', str(cls._mfa_token_time_step), '--totp-window', '1' + ]) + # Create tenanted users. + cls._radosgw_admin_cmd([ + 'user', 'create', '--tenant', 'testx', '--uid', 'teuth-test-user', + '--display-name', 'tenanted teuth-test-user' + ]) + cls._radosgw_admin_cmd([ + 'user', 'create', '--tenant', 'testx2', '--uid', 'teuth-test-user2', + '--display-name', 'tenanted teuth-test-user 2' + ]) + + @classmethod + def tearDownClass(cls): + cls._radosgw_admin_cmd( + ['user', 'rm', '--tenant', 'testx', '--uid=teuth-test-user', '--purge-data']) + cls._radosgw_admin_cmd( + ['user', 'rm', '--tenant', 'testx2', '--uid=teuth-test-user2', '--purge-data']) + super(RgwBucketTest, cls).tearDownClass() + + def _get_mfa_token_pin(self): + totp_key = base64.b32decode(self._mfa_token_seed) + totp = TOTP(totp_key, 6, SHA1(), self._mfa_token_time_step, backend=default_backend(), + enforce_key_length=False) + time_value = int(time.time()) + return totp.generate(time_value) + + def test_all(self): + # Create a new bucket. + self._post( + '/api/rgw/bucket', + params={ + 'bucket': 'teuth-test-bucket', + 'uid': 'admin', + 'zonegroup': 'default', + 'placement_target': 'default-placement' + }) + self.assertStatus(201) + data = self.jsonBody() + self.assertSchema(data, JObj(sub_elems={ + 'bucket_info': JObj(sub_elems={ + 'bucket': JObj(allow_unknown=True, sub_elems={ + 'name': JLeaf(str), + 'bucket_id': JLeaf(str), + 'tenant': JLeaf(str) + }), + 'quota': JObj(sub_elems={}, allow_unknown=True), + 'creation_time': JLeaf(str) + }, allow_unknown=True) + }, allow_unknown=True)) + data = data['bucket_info']['bucket'] + self.assertEqual(data['name'], 'teuth-test-bucket') + self.assertEqual(data['tenant'], '') + + # List all buckets. + data = self._get('/api/rgw/bucket', version='1.1') + self.assertStatus(200) + self.assertEqual(len(data), 1) + self.assertIn('teuth-test-bucket', data) + + # List all buckets with stats. + data = self._get('/api/rgw/bucket?stats=true', version='1.1') + self.assertStatus(200) + self.assertEqual(len(data), 1) + self.assertSchema(data[0], JObj(sub_elems={ + 'bid': JLeaf(str), + 'bucket': JLeaf(str), + 'bucket_quota': JObj(sub_elems={}, allow_unknown=True), + 'id': JLeaf(str), + 'owner': JLeaf(str), + 'usage': JObj(sub_elems={}, allow_unknown=True), + 'tenant': JLeaf(str), + }, allow_unknown=True)) + + # List all buckets names without stats. + data = self._get('/api/rgw/bucket?stats=false', version='1.1') + self.assertStatus(200) + self.assertEqual(data, ['teuth-test-bucket']) + + # Get the bucket. + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertSchema(data, JObj(sub_elems={ + 'id': JLeaf(str), + 'bid': JLeaf(str), + 'tenant': JLeaf(str), + 'bucket': JLeaf(str), + 'bucket_quota': JObj(sub_elems={}, allow_unknown=True), + 'owner': JLeaf(str), + 'mfa_delete': JLeaf(str), + 'usage': JObj(sub_elems={}, allow_unknown=True), + 'versioning': JLeaf(str) + }, allow_unknown=True)) + self.assertEqual(data['bucket'], 'teuth-test-bucket') + self.assertEqual(data['owner'], 'admin') + self.assertEqual(data['placement_rule'], 'default-placement') + self.assertEqual(data['versioning'], 'Suspended') + + # Update bucket: change owner, enable versioning. + self._put( + '/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'teuth-test-user', + 'versioning_state': 'Enabled' + }) + self.assertStatus(200) + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertSchema(data, JObj(sub_elems={ + 'owner': JLeaf(str), + 'bid': JLeaf(str), + 'tenant': JLeaf(str) + }, allow_unknown=True)) + self.assertEqual(data['owner'], 'teuth-test-user') + self.assertEqual(data['versioning'], 'Enabled') + + # Update bucket: enable MFA Delete. + self._put( + '/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'teuth-test-user', + 'versioning_state': 'Enabled', + 'mfa_delete': 'Enabled', + 'mfa_token_serial': self._mfa_token_serial, + 'mfa_token_pin': self._get_mfa_token_pin() + }) + self.assertStatus(200) + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertEqual(data['versioning'], 'Enabled') + self.assertEqual(data['mfa_delete'], 'Enabled') + + # Update bucket: disable versioning & MFA Delete. + time.sleep(self._mfa_token_time_step * 3) # Required to get new TOTP pin. + self._put( + '/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'teuth-test-user', + 'versioning_state': 'Suspended', + 'mfa_delete': 'Disabled', + 'mfa_token_serial': self._mfa_token_serial, + 'mfa_token_pin': self._get_mfa_token_pin() + }) + self.assertStatus(200) + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertEqual(data['versioning'], 'Suspended') + self.assertEqual(data['mfa_delete'], 'Disabled') + + # Delete the bucket. + self._delete('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(204) + data = self._get('/api/rgw/bucket', version='1.1') + self.assertStatus(200) + self.assertEqual(len(data), 0) + + def test_crud_w_tenant(self): + # Create a new bucket. The tenant of the user is used when + # the bucket is created. + self._post( + '/api/rgw/bucket', + params={ + 'bucket': 'teuth-test-bucket', + 'uid': 'testx$teuth-test-user', + 'zonegroup': 'default', + 'placement_target': 'default-placement' + }) + self.assertStatus(201) + # It's not possible to validate the result because there + # IS NO result object returned by the RGW Admin OPS API + # when a tenanted bucket is created. + data = self.jsonBody() + self.assertIsNone(data) + + # List all buckets. + data = self._get('/api/rgw/bucket', version='1.1') + self.assertStatus(200) + self.assertEqual(len(data), 1) + self.assertIn('testx/teuth-test-bucket', data) + + def _verify_tenant_bucket(bucket, tenant, uid): + full_bucket_name = '{}/{}'.format(tenant, bucket) + _data = self._get('/api/rgw/bucket/{}'.format( + parse.quote_plus(full_bucket_name))) + self.assertStatus(200) + self.assertSchema(_data, JObj(sub_elems={ + 'owner': JLeaf(str), + 'bucket': JLeaf(str), + 'tenant': JLeaf(str), + 'bid': JLeaf(str) + }, allow_unknown=True)) + self.assertEqual(_data['owner'], '{}${}'.format(tenant, uid)) + self.assertEqual(_data['bucket'], bucket) + self.assertEqual(_data['tenant'], tenant) + self.assertEqual(_data['bid'], full_bucket_name) + return _data + + # Get the bucket. + data = _verify_tenant_bucket('teuth-test-bucket', 'testx', 'teuth-test-user') + self.assertEqual(data['placement_rule'], 'default-placement') + self.assertEqual(data['versioning'], 'Suspended') + + # Update bucket: different user with different tenant, enable versioning. + self._put( + '/api/rgw/bucket/{}'.format( + parse.quote_plus('testx/teuth-test-bucket')), + params={ + 'bucket_id': data['id'], + 'uid': 'testx2$teuth-test-user2', + 'versioning_state': 'Enabled' + }) + data = _verify_tenant_bucket('teuth-test-bucket', 'testx2', 'teuth-test-user2') + self.assertEqual(data['versioning'], 'Enabled') + + # Change owner to a non-tenanted user + self._put( + '/api/rgw/bucket/{}'.format( + parse.quote_plus('testx2/teuth-test-bucket')), + params={ + 'bucket_id': data['id'], + 'uid': 'admin' + }) + self.assertStatus(200) + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertIn('owner', data) + self.assertEqual(data['owner'], 'admin') + self.assertEqual(data['tenant'], '') + self.assertEqual(data['bucket'], 'teuth-test-bucket') + self.assertEqual(data['bid'], 'teuth-test-bucket') + self.assertEqual(data['versioning'], 'Enabled') + + # Change owner back to tenanted user, suspend versioning. + self._put( + '/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'testx$teuth-test-user', + 'versioning_state': 'Suspended' + }) + self.assertStatus(200) + data = _verify_tenant_bucket('teuth-test-bucket', 'testx', 'teuth-test-user') + self.assertEqual(data['versioning'], 'Suspended') + + # Delete the bucket. + self._delete('/api/rgw/bucket/{}'.format( + parse.quote_plus('testx/teuth-test-bucket'))) + self.assertStatus(204) + data = self._get('/api/rgw/bucket', version='1.1') + self.assertStatus(200) + self.assertEqual(len(data), 0) + + def test_crud_w_locking(self): + # Create + self._post('/api/rgw/bucket', + params={ + 'bucket': 'teuth-test-bucket', + 'uid': 'teuth-test-user', + 'zonegroup': 'default', + 'placement_target': 'default-placement', + 'lock_enabled': 'true', + 'lock_mode': 'GOVERNANCE', + 'lock_retention_period_days': '0', + 'lock_retention_period_years': '1' + }) + self.assertStatus(201) + # Read + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(200) + self.assertSchema( + data, + JObj(sub_elems={ + 'lock_enabled': JLeaf(bool), + 'lock_mode': JLeaf(str), + 'lock_retention_period_days': JLeaf(int), + 'lock_retention_period_years': JLeaf(int) + }, + allow_unknown=True)) + self.assertTrue(data['lock_enabled']) + self.assertEqual(data['lock_mode'], 'GOVERNANCE') + self.assertEqual(data['lock_retention_period_days'], 0) + self.assertEqual(data['lock_retention_period_years'], 1) + # Update + self._put('/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'teuth-test-user', + 'lock_mode': 'COMPLIANCE', + 'lock_retention_period_days': '15', + 'lock_retention_period_years': '0' + }) + self.assertStatus(200) + data = self._get('/api/rgw/bucket/teuth-test-bucket') + self.assertTrue(data['lock_enabled']) + self.assertEqual(data['lock_mode'], 'COMPLIANCE') + self.assertEqual(data['lock_retention_period_days'], 15) + self.assertEqual(data['lock_retention_period_years'], 0) + self.assertStatus(200) + + # Update: Disabling bucket versioning should fail if object locking enabled + self._put('/api/rgw/bucket/teuth-test-bucket', + params={ + 'bucket_id': data['id'], + 'uid': 'teuth-test-user', + 'versioning_state': 'Suspended' + }) + self.assertStatus(409) + + # Delete + self._delete('/api/rgw/bucket/teuth-test-bucket') + self.assertStatus(204) + + +class RgwDaemonTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @DashboardTestCase.RunAs('test', 'test', [{ + 'rgw': ['create', 'update', 'delete'] + }]) + def test_read_access_permissions(self): + self._get('/api/rgw/daemon') + self.assertStatus(403) + self._get('/api/rgw/daemon/id') + self.assertStatus(403) + + def test_list(self): + data = self._get('/api/rgw/daemon') + self.assertStatus(200) + self.assertEqual(len(data), 1) + data = data[0] + self.assertIn('id', data) + self.assertIn('version', data) + self.assertIn('server_hostname', data) + self.assertIn('zonegroup_name', data) + self.assertIn('zone_name', data) + self.assertIn('port', data) + + def test_get(self): + data = self._get('/api/rgw/daemon') + self.assertStatus(200) + + data = self._get('/api/rgw/daemon/{}'.format(data[0]['id'])) + self.assertStatus(200) + self.assertIn('rgw_metadata', data) + self.assertIn('rgw_id', data) + self.assertIn('rgw_status', data) + self.assertTrue(data['rgw_metadata']) + + def test_status(self): + data = self._get('/ui-api/rgw/status') + self.assertStatus(200) + self.assertIn('available', data) + self.assertIn('message', data) + self.assertTrue(data['available']) + + +class RgwUserTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + super(RgwUserTest, cls).setUpClass() + + def _assert_user_data(self, data): + self.assertSchema(data, JObj(sub_elems={ + 'caps': JList(JObj(sub_elems={}, allow_unknown=True)), + 'display_name': JLeaf(str), + 'email': JLeaf(str), + 'keys': JList(JObj(sub_elems={}, allow_unknown=True)), + 'max_buckets': JLeaf(int), + 'subusers': JList(JLeaf(str)), + 'suspended': JLeaf(int), + 'swift_keys': JList(JObj(sub_elems={}, allow_unknown=True)), + 'tenant': JLeaf(str), + 'user_id': JLeaf(str), + 'uid': JLeaf(str) + }, allow_unknown=True)) + self.assertGreaterEqual(len(data['keys']), 1) + + def test_get(self): + data = self.get_rgw_user('admin') + self.assertStatus(200) + self._assert_user_data(data) + self.assertEqual(data['user_id'], 'admin') + self.assertTrue(data['stats']) + self.assertIsInstance(data['stats'], dict) + # Test without stats. + data = self.get_rgw_user('admin', False) + self.assertStatus(200) + self._assert_user_data(data) + self.assertEqual(data['user_id'], 'admin') + + def test_list(self): + data = self._get('/api/rgw/user') + self.assertStatus(200) + self.assertGreaterEqual(len(data), 1) + self.assertIn('admin', data) + + def test_get_emails(self): + data = self._get('/api/rgw/user/get_emails') + self.assertStatus(200) + self.assertSchema(data, JList(str)) + + def test_create_get_update_delete(self): + # Create a new user. + self._post('/api/rgw/user', params={ + 'uid': 'teuth-test-user', + 'display_name': 'display name' + }) + self.assertStatus(201) + data = self.jsonBody() + self._assert_user_data(data) + self.assertEqual(data['user_id'], 'teuth-test-user') + self.assertEqual(data['display_name'], 'display name') + + # Get the user. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + self._assert_user_data(data) + self.assertEqual(data['tenant'], '') + self.assertEqual(data['user_id'], 'teuth-test-user') + self.assertEqual(data['uid'], 'teuth-test-user') + + # Update the user. + self._put( + '/api/rgw/user/teuth-test-user', + params={'display_name': 'new name'}) + self.assertStatus(200) + data = self.jsonBody() + self._assert_user_data(data) + self.assertEqual(data['display_name'], 'new name') + + # Delete the user. + self._delete('/api/rgw/user/teuth-test-user') + self.assertStatus(204) + self.get_rgw_user('teuth-test-user') + self.assertStatus(500) + resp = self.jsonBody() + self.assertIn('detail', resp) + self.assertIn('failed request with status code 404', resp['detail']) + self.assertIn('"Code":"NoSuchUser"', resp['detail']) + self.assertIn('"HostId"', resp['detail']) + self.assertIn('"RequestId"', resp['detail']) + + def test_create_get_update_delete_w_tenant(self): + # Create a new user. + self._post( + '/api/rgw/user', + params={ + 'uid': 'test01$teuth-test-user', + 'display_name': 'display name' + }) + self.assertStatus(201) + data = self.jsonBody() + self._assert_user_data(data) + self.assertEqual(data['user_id'], 'teuth-test-user') + self.assertEqual(data['display_name'], 'display name') + + # Get the user. + data = self.get_rgw_user('test01$teuth-test-user') + self.assertStatus(200) + self._assert_user_data(data) + self.assertEqual(data['tenant'], 'test01') + self.assertEqual(data['user_id'], 'teuth-test-user') + self.assertEqual(data['uid'], 'test01$teuth-test-user') + + # Update the user. + self._put( + '/api/rgw/user/test01$teuth-test-user', + params={'display_name': 'new name'}) + self.assertStatus(200) + data = self.jsonBody() + self._assert_user_data(data) + self.assertEqual(data['display_name'], 'new name') + + # Delete the user. + self._delete('/api/rgw/user/test01$teuth-test-user') + self.assertStatus(204) + self.get_rgw_user('test01$teuth-test-user') + self.assertStatus(500) + resp = self.jsonBody() + self.assertIn('detail', resp) + self.assertIn('failed request with status code 404', resp['detail']) + self.assertIn('"Code":"NoSuchUser"', resp['detail']) + self.assertIn('"HostId"', resp['detail']) + self.assertIn('"RequestId"', resp['detail']) + + +class RgwUserCapabilityTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + cls.create_test_user = True + super(RgwUserCapabilityTest, cls).setUpClass() + + def test_set(self): + self._post( + '/api/rgw/user/teuth-test-user/capability', + params={ + 'type': 'usage', + 'perm': 'read' + }) + self.assertStatus(201) + data = self.jsonBody() + self.assertEqual(len(data), 1) + data = data[0] + self.assertEqual(data['type'], 'usage') + self.assertEqual(data['perm'], 'read') + + # Get the user data to validate the capabilities. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + self.assertGreaterEqual(len(data['caps']), 1) + self.assertEqual(data['caps'][0]['type'], 'usage') + self.assertEqual(data['caps'][0]['perm'], 'read') + + def test_delete(self): + self._delete( + '/api/rgw/user/teuth-test-user/capability', + params={ + 'type': 'metadata', + 'perm': 'write' + }) + self.assertStatus(204) + + # Get the user data to validate the capabilities. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + self.assertEqual(len(data['caps']), 0) + + +class RgwUserKeyTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + cls.create_test_user = True + super(RgwUserKeyTest, cls).setUpClass() + + def test_create_s3(self): + self._post( + '/api/rgw/user/teuth-test-user/key', + params={ + 'key_type': 's3', + 'generate_key': 'false', + 'access_key': 'abc987', + 'secret_key': 'aaabbbccc' + }) + data = self.jsonBody() + self.assertStatus(201) + self.assertGreaterEqual(len(data), 3) + key = self.find_object_in_list('access_key', 'abc987', data) + self.assertIsInstance(key, object) + self.assertEqual(key['secret_key'], 'aaabbbccc') + + def test_create_swift(self): + self._post( + '/api/rgw/user/teuth-test-user/key', + params={ + 'key_type': 'swift', + 'subuser': 'teuth-test-subuser', + 'generate_key': 'false', + 'secret_key': 'xxxyyyzzz' + }) + data = self.jsonBody() + self.assertStatus(201) + self.assertGreaterEqual(len(data), 2) + key = self.find_object_in_list('secret_key', 'xxxyyyzzz', data) + self.assertIsInstance(key, object) + + def test_delete_s3(self): + self._delete( + '/api/rgw/user/teuth-test-user/key', + params={ + 'key_type': 's3', + 'access_key': 'xyz123' + }) + self.assertStatus(204) + + def test_delete_swift(self): + self._delete( + '/api/rgw/user/teuth-test-user/key', + params={ + 'key_type': 'swift', + 'subuser': 'teuth-test-user:teuth-test-subuser2' + }) + self.assertStatus(204) + + +class RgwUserQuotaTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + cls.create_test_user = True + super(RgwUserQuotaTest, cls).setUpClass() + + def _assert_quota(self, data): + self.assertIn('user_quota', data) + self.assertIn('max_objects', data['user_quota']) + self.assertIn('enabled', data['user_quota']) + self.assertIn('max_size_kb', data['user_quota']) + self.assertIn('max_size', data['user_quota']) + self.assertIn('bucket_quota', data) + self.assertIn('max_objects', data['bucket_quota']) + self.assertIn('enabled', data['bucket_quota']) + self.assertIn('max_size_kb', data['bucket_quota']) + self.assertIn('max_size', data['bucket_quota']) + + def test_get_quota(self): + data = self._get('/api/rgw/user/teuth-test-user/quota') + self.assertStatus(200) + self._assert_quota(data) + + def test_set_user_quota(self): + self._put( + '/api/rgw/user/teuth-test-user/quota', + params={ + 'quota_type': 'user', + 'enabled': 'true', + 'max_size_kb': 2048, + 'max_objects': 101 + }) + self.assertStatus(200) + + data = self._get('/api/rgw/user/teuth-test-user/quota') + self.assertStatus(200) + self._assert_quota(data) + self.assertEqual(data['user_quota']['max_objects'], 101) + self.assertTrue(data['user_quota']['enabled']) + self.assertEqual(data['user_quota']['max_size_kb'], 2048) + + def test_set_bucket_quota(self): + self._put( + '/api/rgw/user/teuth-test-user/quota', + params={ + 'quota_type': 'bucket', + 'enabled': 'false', + 'max_size_kb': 4096, + 'max_objects': 2000 + }) + self.assertStatus(200) + + data = self._get('/api/rgw/user/teuth-test-user/quota') + self.assertStatus(200) + self._assert_quota(data) + self.assertEqual(data['bucket_quota']['max_objects'], 2000) + self.assertFalse(data['bucket_quota']['enabled']) + self.assertEqual(data['bucket_quota']['max_size_kb'], 4096) + + +class RgwUserSubuserTest(RgwTestCase): + + AUTH_ROLES = ['rgw-manager'] + + @classmethod + def setUpClass(cls): + cls.create_test_user = True + super(RgwUserSubuserTest, cls).setUpClass() + + def test_create_swift(self): + self._post( + '/api/rgw/user/teuth-test-user/subuser', + params={ + 'subuser': 'tux', + 'access': 'readwrite', + 'key_type': 'swift' + }) + self.assertStatus(200) + data = self.jsonBody() + subuser = self.find_object_in_list('id', 'teuth-test-user:tux', data) + self.assertIsInstance(subuser, object) + self.assertEqual(subuser['permissions'], 'read-write') + + # Get the user data to validate the keys. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + key = self.find_object_in_list('user', 'teuth-test-user:tux', + data['swift_keys']) + self.assertIsInstance(key, object) + + def test_create_s3(self): + self._post( + '/api/rgw/user/teuth-test-user/subuser', + params={ + 'subuser': 'hugo', + 'access': 'write', + 'generate_secret': 'false', + 'access_key': 'yyy', + 'secret_key': 'xxx' + }) + self.assertStatus(200) + data = self.jsonBody() + subuser = self.find_object_in_list('id', 'teuth-test-user:hugo', data) + self.assertIsInstance(subuser, object) + self.assertEqual(subuser['permissions'], 'write') + + # Get the user data to validate the keys. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + key = self.find_object_in_list('user', 'teuth-test-user:hugo', + data['keys']) + self.assertIsInstance(key, object) + self.assertEqual(key['secret_key'], 'xxx') + + def test_delete_w_purge(self): + self._delete( + '/api/rgw/user/teuth-test-user/subuser/teuth-test-subuser2') + self.assertStatus(204) + + # Get the user data to check that the keys don't exist anymore. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + key = self.find_object_in_list( + 'user', 'teuth-test-user:teuth-test-subuser2', data['swift_keys']) + self.assertIsNone(key) + + def test_delete_wo_purge(self): + self._delete( + '/api/rgw/user/teuth-test-user/subuser/teuth-test-subuser', + params={'purge_keys': 'false'}) + self.assertStatus(204) + + # Get the user data to check whether they keys still exist. + data = self.get_rgw_user('teuth-test-user') + self.assertStatus(200) + key = self.find_object_in_list( + 'user', 'teuth-test-user:teuth-test-subuser', data['keys']) + self.assertIsInstance(key, object) diff --git a/qa/tasks/mgr/dashboard/test_role.py b/qa/tasks/mgr/dashboard/test_role.py new file mode 100644 index 000000000..dbfaea9e4 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_role.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class RoleTest(DashboardTestCase): + @classmethod + def _create_role(cls, name=None, description=None, scopes_permissions=None): + data = {} + if name: + data['name'] = name + if description: + data['description'] = description + if scopes_permissions: + data['scopes_permissions'] = scopes_permissions + cls._post('/api/role', data) + + def test_crud_role(self): + self._create_role(name='role1', + description='Description 1', + scopes_permissions={'osd': ['read']}) + self.assertStatus(201) + self.assertJsonBody({ + 'name': 'role1', + 'description': 'Description 1', + 'scopes_permissions': {'osd': ['read']}, + 'system': False + }) + + self._get('/api/role/role1') + self.assertStatus(200) + self.assertJsonBody({ + 'name': 'role1', + 'description': 'Description 1', + 'scopes_permissions': {'osd': ['read']}, + 'system': False + }) + + self._put('/api/role/role1', { + 'description': 'Description 2', + 'scopes_permissions': {'osd': ['read', 'update']}, + }) + self.assertStatus(200) + self.assertJsonBody({ + 'name': 'role1', + 'description': 'Description 2', + 'scopes_permissions': {'osd': ['read', 'update']}, + 'system': False + }) + + self._delete('/api/role/role1') + self.assertStatus(204) + + def test_list_roles(self): + roles = self._get('/api/role') + self.assertStatus(200) + + self.assertGreaterEqual(len(roles), 1) + for role in roles: + self.assertIn('name', role) + self.assertIn('description', role) + self.assertIn('scopes_permissions', role) + self.assertIn('system', role) + + def test_get_role_does_not_exist(self): + self._get('/api/role/role2') + self.assertStatus(404) + + def test_create_role_already_exists(self): + self._create_role(name='read-only', + description='Description 1', + scopes_permissions={'osd': ['read']}) + self.assertStatus(400) + self.assertError(code='role_already_exists', + component='role') + + def test_create_role_no_name(self): + self._create_role(description='Description 1', + scopes_permissions={'osd': ['read']}) + self.assertStatus(400) + self.assertError(code='name_required', + component='role') + + def test_create_role_invalid_scope(self): + self._create_role(name='role1', + description='Description 1', + scopes_permissions={'invalid-scope': ['read']}) + self.assertStatus(400) + self.assertError(code='invalid_scope', + component='role') + + def test_create_role_invalid_permission(self): + self._create_role(name='role1', + description='Description 1', + scopes_permissions={'osd': ['invalid-permission']}) + self.assertStatus(400) + self.assertError(code='invalid_permission', + component='role') + + def test_delete_role_does_not_exist(self): + self._delete('/api/role/role2') + self.assertStatus(404) + + def test_delete_system_role(self): + self._delete('/api/role/read-only') + self.assertStatus(400) + self.assertError(code='cannot_delete_system_role', + component='role') + + def test_delete_role_associated_with_user(self): + self.create_user("user", "user", ['read-only']) + self._create_role(name='role1', + description='Description 1', + scopes_permissions={'user': ['create', 'read', 'update', 'delete']}) + self.assertStatus(201) + self._put('/api/user/user', {'roles': ['role1']}) + self.assertStatus(200) + + self._delete('/api/role/role1') + self.assertStatus(400) + self.assertError(code='role_is_associated_with_user', + component='role') + + self._put('/api/user/user', {'roles': ['administrator']}) + self.assertStatus(200) + self._delete('/api/role/role1') + self.assertStatus(204) + self.delete_user("user") + + def test_update_role_does_not_exist(self): + self._put('/api/role/role2', {}) + self.assertStatus(404) + + def test_update_system_role(self): + self._put('/api/role/read-only', {}) + self.assertStatus(400) + self.assertError(code='cannot_update_system_role', + component='role') + + def test_clone_role(self): + self._post('/api/role/read-only/clone', {'new_name': 'foo'}) + self.assertStatus(201) + self._delete('/api/role/foo') diff --git a/qa/tasks/mgr/dashboard/test_settings.py b/qa/tasks/mgr/dashboard/test_settings.py new file mode 100644 index 000000000..d6ad1e762 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_settings.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +from .helper import DashboardTestCase, JAny, JList, JObj + + +class SettingsTest(DashboardTestCase): + def setUp(self): + super(SettingsTest, self).setUp() + self.settings = self._get('/api/settings') + + def tearDown(self): + self._put( + '/api/settings', + {setting['name']: setting['value'] + for setting in self.settings}) + + def test_list_settings(self): + settings = self._get('/api/settings') + self.assertGreater(len(settings), 10) + self.assertSchema( + settings, + JList( + JObj({ + 'default': JAny(none=False), + 'name': str, + 'type': str, + 'value': JAny(none=False) + }))) + self.assertStatus(200) + + def test_get_setting(self): + setting = self._get('/api/settings/rgw-api-access-key') + self.assertSchema( + setting, + JObj({ + 'default': JAny(none=False), + 'name': str, + 'type': str, + 'value': JAny(none=False) + })) + self.assertStatus(200) + + def test_set_setting(self): + self._put('/api/settings/rgw-api-access-key', {'value': 'foo'}) + self.assertStatus(200) + + value = self._get('/api/settings/rgw-api-access-key')['value'] + self.assertEqual('foo', value) + + def test_bulk_set(self): + self._put('/api/settings', { + 'RGW_API_ACCESS_KEY': 'dummy-key', + 'RGW_API_SECRET_KEY': 'dummy-secret', + }) + self.assertStatus(200) + + access_key = self._get('/api/settings/rgw-api-access-key')['value'] + self.assertStatus(200) + self.assertEqual('dummy-key', access_key) + + secret_key = self._get('/api/settings/rgw-api-secret-key')['value'] + self.assertStatus(200) + self.assertEqual('dummy-secret', secret_key) diff --git a/qa/tasks/mgr/dashboard/test_summary.py b/qa/tasks/mgr/dashboard/test_summary.py new file mode 100644 index 000000000..a31f89146 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_summary.py @@ -0,0 +1,39 @@ +from __future__ import absolute_import + +from .helper import DashboardTestCase + + +class SummaryTest(DashboardTestCase): + CEPHFS = True + + def test_summary(self): + data = self._get("/api/summary") + self.assertStatus(200) + + self.assertIn('health_status', data) + self.assertIn('mgr_id', data) + self.assertIn('have_mon_connection', data) + self.assertIn('rbd_mirroring', data) + self.assertIn('executing_tasks', data) + self.assertIn('finished_tasks', data) + self.assertIn('version', data) + self.assertIsNotNone(data['health_status']) + self.assertIsNotNone(data['mgr_id']) + self.assertIsNotNone(data['have_mon_connection']) + self.assertEqual(data['rbd_mirroring'], {'errors': 0, 'warnings': 0}) + + @DashboardTestCase.RunAs('test', 'test', ['pool-manager']) + def test_summary_permissions(self): + data = self._get("/api/summary") + self.assertStatus(200) + + self.assertIn('health_status', data) + self.assertIn('mgr_id', data) + self.assertIn('have_mon_connection', data) + self.assertNotIn('rbd_mirroring', data) + self.assertIn('executing_tasks', data) + self.assertIn('finished_tasks', data) + self.assertIn('version', data) + self.assertIsNotNone(data['health_status']) + self.assertIsNotNone(data['mgr_id']) + self.assertIsNotNone(data['have_mon_connection']) diff --git a/qa/tasks/mgr/dashboard/test_telemetry.py b/qa/tasks/mgr/dashboard/test_telemetry.py new file mode 100644 index 000000000..65c62c748 --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_telemetry.py @@ -0,0 +1,98 @@ +from .helper import DashboardTestCase, JObj + + +class TelemetryTest(DashboardTestCase): + + pre_enabled_status = True + + @classmethod + def setUpClass(cls): + super(TelemetryTest, cls).setUpClass() + data = cls._get('/api/mgr/module/telemetry') + cls.pre_enabled_status = data['enabled'] + + # identify ourselves so we can filter these reports out on the server side + cls._put( + '/api/settings', + { + 'mgr/telemetry/channel_ident': True, + 'mgr/telemetry/organization': 'ceph-qa', + } + ) + + @classmethod + def tearDownClass(cls): + if cls.pre_enabled_status: + cls._enable_module() + else: + cls._disable_module() + super(TelemetryTest, cls).tearDownClass() + + def test_disable_module(self): + self._enable_module() + self._check_telemetry_enabled(True) + self._disable_module() + self._check_telemetry_enabled(False) + + def test_enable_module_correct_license(self): + self._disable_module() + self._check_telemetry_enabled(False) + + self._put('/api/telemetry', { + 'enable': True, + 'license_name': 'sharing-1-0' + }) + self.assertStatus(200) + self._check_telemetry_enabled(True) + + def test_enable_module_empty_license(self): + self._disable_module() + self._check_telemetry_enabled(False) + + self._put('/api/telemetry', { + 'enable': True, + 'license_name': '' + }) + self.assertStatus(400) + self.assertError(code='telemetry_enable_license_missing') + self._check_telemetry_enabled(False) + + def test_enable_module_invalid_license(self): + self._disable_module() + self._check_telemetry_enabled(False) + + self._put('/api/telemetry', { + 'enable': True, + 'license_name': 'invalid-license' + }) + self.assertStatus(400) + self.assertError(code='telemetry_enable_license_missing') + self._check_telemetry_enabled(False) + + def test_get_report(self): + self._enable_module() + data = self._get('/api/telemetry/report') + self.assertStatus(200) + schema = JObj({ + 'report': JObj({}, allow_unknown=True), + 'device_report': JObj({}, allow_unknown=True) + }) + self.assertSchema(data, schema) + + @classmethod + def _enable_module(cls): + cls._put('/api/telemetry', { + 'enable': True, + 'license_name': 'sharing-1-0' + }) + + @classmethod + def _disable_module(cls): + cls._put('/api/telemetry', { + 'enable': False + }) + + def _check_telemetry_enabled(self, enabled): + data = self._get('/api/mgr/module/telemetry') + self.assertStatus(200) + self.assertEqual(data['enabled'], enabled) diff --git a/qa/tasks/mgr/dashboard/test_user.py b/qa/tasks/mgr/dashboard/test_user.py new file mode 100644 index 000000000..3a6464f5a --- /dev/null +++ b/qa/tasks/mgr/dashboard/test_user.py @@ -0,0 +1,565 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-public-methods + +from __future__ import absolute_import + +import time +from datetime import datetime, timedelta + +from .helper import DashboardTestCase + + +class UserTest(DashboardTestCase): + @classmethod + def setUpClass(cls): + super(UserTest, cls).setUpClass() + cls._ceph_cmd(['dashboard', 'set-pwd-policy-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-length-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-oldpwd-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-username-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-exclusion-list-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-complexity-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-sequential-chars-enabled', 'true']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-repetitive-chars-enabled', 'true']) + + @classmethod + def tearDownClass(cls): + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-username-enabled', 'false']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-exclusion-list-enabled', 'false']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-complexity-enabled', 'false']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-sequential-chars-enabled', 'false']) + cls._ceph_cmd(['dashboard', 'set-pwd-policy-check-repetitive-chars-enabled', 'false']) + super(UserTest, cls).tearDownClass() + + @classmethod + def _create_user(cls, username=None, password=None, name=None, email=None, roles=None, + enabled=True, pwd_expiration_date=None, pwd_update_required=False): + data = {} + if username: + data['username'] = username + if password: + data['password'] = password + if name: + data['name'] = name + if email: + data['email'] = email + if roles: + data['roles'] = roles + if pwd_expiration_date: + data['pwdExpirationDate'] = pwd_expiration_date + data['pwdUpdateRequired'] = pwd_update_required + data['enabled'] = enabled + cls._post("/api/user", data) + + @classmethod + def _reset_login_to_admin(cls, username=None): + cls.logout() + if username: + cls.delete_user(username) + cls.login('admin', 'admin') + + def test_crud_user(self): + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator']) + self.assertStatus(201) + user = self.jsonBody() + + self._get('/api/user/user1') + self.assertStatus(200) + self.assertJsonBody({ + 'username': 'user1', + 'name': 'My Name', + 'email': 'my@email.com', + 'roles': ['administrator'], + 'lastUpdate': user['lastUpdate'], + 'enabled': True, + 'pwdExpirationDate': None, + 'pwdUpdateRequired': False + }) + + self._put('/api/user/user1', { + 'name': 'My New Name', + 'email': 'mynew@email.com', + 'roles': ['block-manager'], + }) + self.assertStatus(200) + user = self.jsonBody() + self.assertJsonBody({ + 'username': 'user1', + 'name': 'My New Name', + 'email': 'mynew@email.com', + 'roles': ['block-manager'], + 'lastUpdate': user['lastUpdate'], + 'enabled': True, + 'pwdExpirationDate': None, + 'pwdUpdateRequired': False + }) + + self._delete('/api/user/user1') + self.assertStatus(204) + + def test_crd_disabled_user(self): + self._create_user(username='klara', + password='mypassword10#', + name='Klara Musterfrau', + email='klara@musterfrau.com', + roles=['administrator'], + enabled=False) + self.assertStatus(201) + user = self.jsonBody() + + # Restart dashboard module. + self._unload_module('dashboard') + self._load_module('dashboard') + time.sleep(10) + + self._get('/api/user/klara') + self.assertStatus(200) + self.assertJsonBody({ + 'username': 'klara', + 'name': 'Klara Musterfrau', + 'email': 'klara@musterfrau.com', + 'roles': ['administrator'], + 'lastUpdate': user['lastUpdate'], + 'enabled': False, + 'pwdExpirationDate': None, + 'pwdUpdateRequired': False + }) + + self._delete('/api/user/klara') + self.assertStatus(204) + + def test_list_users(self): + self._get('/api/user') + self.assertStatus(200) + user = self.jsonBody() + self.assertEqual(len(user), 1) + user = user[0] + self.assertJsonBody([{ + 'username': 'admin', + 'name': None, + 'email': None, + 'roles': ['administrator'], + 'lastUpdate': user['lastUpdate'], + 'enabled': True, + 'pwdExpirationDate': None, + 'pwdUpdateRequired': False + }]) + + def test_create_user_already_exists(self): + self._create_user(username='admin', + password='mypassword10#', + name='administrator', + email='my@email.com', + roles=['administrator']) + self.assertStatus(400) + self.assertError(code='username_already_exists', + component='user') + + def test_create_user_invalid_role(self): + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['invalid-role']) + self.assertStatus(400) + self.assertError(code='role_does_not_exist', + component='user') + + def test_create_user_invalid_chars_in_name(self): + self._create_user(username='userö', + password='mypassword10#', + name='administrator', + email='my@email.com', + roles=['administrator']) + self.assertStatus(400) + self.assertError(code='ceph_type_not_valid', + component='user') + + def test_delete_user_does_not_exist(self): + self._delete('/api/user/user2') + self.assertStatus(404) + + @DashboardTestCase.RunAs('test', 'test', [{'user': ['create', 'read', 'update', 'delete']}]) + def test_delete_current_user(self): + self._delete('/api/user/test') + self.assertStatus(400) + self.assertError(code='cannot_delete_current_user', + component='user') + + @DashboardTestCase.RunAs('test', 'test', [{'user': ['create', 'read', 'update', 'delete']}]) + def test_disable_current_user(self): + self._put('/api/user/test', {'enabled': False}) + self.assertStatus(400) + self.assertError(code='cannot_disable_current_user', + component='user') + + def test_update_user_does_not_exist(self): + self._put('/api/user/user2', {'name': 'My New Name'}) + self.assertStatus(404) + + def test_update_user_invalid_role(self): + self._put('/api/user/admin', {'roles': ['invalid-role']}) + self.assertStatus(400) + self.assertError(code='role_does_not_exist', + component='user') + + def test_change_password_from_other_user(self): + self._post('/api/user/test2/change_password', { + 'old_password': 'abc', + 'new_password': 'xyz' + }) + self.assertStatus(400) + self.assertError(code='invalid_user_context', component='user') + + def test_change_password_old_not_match(self): + self._post('/api/user/admin/change_password', { + 'old_password': 'foo', + 'new_password': 'bar' + }) + self.assertStatus(400) + self.assertError(code='invalid_old_password', component='user') + + def test_change_password_as_old_password(self): + self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False) + self.login('test1', 'mypassword10#') + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'mypassword10#' + }) + self.assertStatus(400) + self.assertError('password_policy_validation_failed', 'user', + 'Password must not be the same as the previous one.') + self._reset_login_to_admin('test1') + + def test_change_password_contains_username(self): + self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False) + self.login('test1', 'mypassword10#') + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'mypasstest1@#' + }) + self.assertStatus(400) + self.assertError('password_policy_validation_failed', 'user', + 'Password must not contain username.') + self._reset_login_to_admin('test1') + + def test_change_password_contains_forbidden_words(self): + self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False) + self.login('test1', 'mypassword10#') + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'mypassOSD01' + }) + self.assertStatus(400) + self.assertError('password_policy_validation_failed', 'user', + 'Password must not contain the keyword "OSD".') + self._reset_login_to_admin('test1') + + def test_change_password_contains_sequential_characters(self): + self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False) + self.login('test1', 'mypassword10#') + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'mypass123456!@$' + }) + self.assertStatus(400) + self.assertError('password_policy_validation_failed', 'user', + 'Password must not contain sequential characters.') + self._reset_login_to_admin('test1') + + def test_change_password_contains_repetetive_characters(self): + self.create_user('test1', 'mypassword10#', ['read-only'], force_password=False) + self.login('test1', 'mypassword10#') + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'aaaaA1@!#' + }) + self.assertStatus(400) + self.assertError('password_policy_validation_failed', 'user', + 'Password must not contain repetitive characters.') + self._reset_login_to_admin('test1') + + @DashboardTestCase.RunAs('test1', 'mypassword10#', ['read-only'], False) + def test_change_password(self): + self._post('/api/user/test1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'newpassword01#' + }) + self.assertStatus(200) + self.logout() + self._post('/api/auth', {'username': 'test1', 'password': 'mypassword10#'}) + self.assertStatus(400) + self.assertError(code='invalid_credentials', component='auth') + + def test_create_user_password_cli(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-create', + 'test1'], + 'mypassword10#', + return_exit_code=True) + self.assertEqual(exitcode, 0) + self.delete_user('test1') + + @DashboardTestCase.RunAs('test2', 'foo_bar_10#', force_password=False, login=False) + def test_change_user_password_cli(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', + 'test2'], + 'foo_new-password01#', + return_exit_code=True) + self.assertEqual(exitcode, 0) + + def test_create_user_password_force_cli(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-create', + '--force-password', 'test11'], + 'bar', + return_exit_code=True) + self.assertEqual(exitcode, 0) + self.delete_user('test11') + + @DashboardTestCase.RunAs('test22', 'foo_bar_10#', force_password=False, login=False) + def test_change_user_password_force_cli(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', + '--force-password', 'test22'], + 'bar', + return_exit_code=True) + self.assertEqual(exitcode, 0) + + def test_create_user_password_cli_fail(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-create', + 'test3'], + 'foo', + return_exit_code=True) + self.assertNotEqual(exitcode, 0) + + @DashboardTestCase.RunAs('test4', 'x1z_tst+_10#', force_password=False, login=False) + def test_change_user_password_cli_fail(self): + exitcode = self._ceph_cmd_with_secret(['dashboard', 'ac-user-set-password', + 'test4'], + 'bar', + return_exit_code=True) + self.assertNotEqual(exitcode, 0) + + def test_create_user_with_pwd_expiration_date(self): + future_date = datetime.utcnow() + timedelta(days=10) + future_date = int(time.mktime(future_date.timetuple())) + + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator'], + pwd_expiration_date=future_date) + self.assertStatus(201) + user = self.jsonBody() + + self._get('/api/user/user1') + self.assertStatus(200) + self.assertJsonBody({ + 'username': 'user1', + 'name': 'My Name', + 'email': 'my@email.com', + 'roles': ['administrator'], + 'lastUpdate': user['lastUpdate'], + 'enabled': True, + 'pwdExpirationDate': future_date, + 'pwdUpdateRequired': False + }) + self._delete('/api/user/user1') + + def test_create_with_pwd_expiration_date_not_valid(self): + past_date = datetime.utcnow() - timedelta(days=10) + past_date = int(time.mktime(past_date.timetuple())) + + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator'], + pwd_expiration_date=past_date) + self.assertStatus(400) + self.assertError(code='pwd_past_expiration_date', component='user') + + def test_create_with_default_expiration_date(self): + future_date_1 = datetime.utcnow() + timedelta(days=9) + future_date_1 = int(time.mktime(future_date_1.timetuple())) + future_date_2 = datetime.utcnow() + timedelta(days=11) + future_date_2 = int(time.mktime(future_date_2.timetuple())) + + self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '10']) + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator']) + self.assertStatus(201) + + user = self._get('/api/user/user1') + self.assertStatus(200) + self.assertIsNotNone(user['pwdExpirationDate']) + self.assertGreater(user['pwdExpirationDate'], future_date_1) + self.assertLess(user['pwdExpirationDate'], future_date_2) + + self._delete('/api/user/user1') + self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '0']) + + def test_pwd_expiration_date_update(self): + self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '10']) + self.create_user('user1', 'mypassword10#', ['administrator']) + + user_1 = self._get('/api/user/user1') + self.assertStatus(200) + + # Let's wait 1 s to ensure pwd expiration date is not the same + time.sleep(1) + + self.login('user1', 'mypassword10#') + self._post('/api/user/user1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'newpassword01#' + }) + self.assertStatus(200) + + # Compare password expiration dates. + self._reset_login_to_admin() + user_1_pwd_changed = self._get('/api/user/user1') + self.assertStatus(200) + self.assertLess(user_1['pwdExpirationDate'], user_1_pwd_changed['pwdExpirationDate']) + + # Cleanup + self.delete_user('user1') + self._ceph_cmd(['dashboard', 'set-user-pwd-expiration-span', '0']) + + def test_pwd_update_required(self): + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator'], + pwd_update_required=True) + self.assertStatus(201) + + user_1 = self._get('/api/user/user1') + self.assertStatus(200) + self.assertEqual(user_1['pwdUpdateRequired'], True) + + self.login('user1', 'mypassword10#') + self.assertStatus(201) + + self._get('/api/osd') + self.assertStatus(403) + self._reset_login_to_admin('user1') + + def test_pwd_update_required_change_pwd(self): + self._create_user(username='user1', + password='mypassword10#', + name='My Name', + email='my@email.com', + roles=['administrator'], + pwd_update_required=True) + self.assertStatus(201) + + self.login('user1', 'mypassword10#') + self._post('/api/user/user1/change_password', { + 'old_password': 'mypassword10#', + 'new_password': 'newpassword01#' + }) + + self.login('user1', 'newpassword01#') + user_1 = self._get('/api/user/user1') + self.assertStatus(200) + self.assertEqual(user_1['pwdUpdateRequired'], False) + self._get('/api/osd') + self.assertStatus(200) + self._reset_login_to_admin('user1') + + def test_validate_password_weak(self): + self._post('/api/user/validate_password', { + 'password': 'mypassword1' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': True, + 'credits': 11, + 'valuation': 'Weak' + }) + + def test_validate_password_ok(self): + self._post('/api/user/validate_password', { + 'password': 'mypassword1!@' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': True, + 'credits': 17, + 'valuation': 'OK' + }) + + def test_validate_password_strong(self): + self._post('/api/user/validate_password', { + 'password': 'testpassword0047!@' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': True, + 'credits': 22, + 'valuation': 'Strong' + }) + + def test_validate_password_very_strong(self): + self._post('/api/user/validate_password', { + 'password': 'testpassword#!$!@$' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': True, + 'credits': 30, + 'valuation': 'Very strong' + }) + + def test_validate_password_fail(self): + self._post('/api/user/validate_password', { + 'password': 'foo' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': False, + 'credits': 0, + 'valuation': 'Password is too weak.' + }) + + def test_validate_password_fail_name(self): + self._post('/api/user/validate_password', { + 'password': 'x1zhugo_10', + 'username': 'hugo' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': False, + 'credits': 0, + 'valuation': 'Password must not contain username.' + }) + + def test_validate_password_fail_oldpwd(self): + self._post('/api/user/validate_password', { + 'password': 'x1zt-st10', + 'old_password': 'x1zt-st10' + }) + self.assertStatus(200) + self.assertJsonBody({ + 'valid': False, + 'credits': 0, + 'valuation': 'Password must not be the same as the previous one.' + }) + + def test_create_user_pwd_update_required(self): + self.create_user('foo', 'bar', cmd_args=['--pwd_update_required']) + self._get('/api/user/foo') + self.assertStatus(200) + self.assertJsonSubset({ + 'username': 'foo', + 'pwdUpdateRequired': True + }) + self.delete_user('foo') diff --git a/qa/tasks/mgr/mgr_test_case.py b/qa/tasks/mgr/mgr_test_case.py new file mode 100644 index 000000000..94a230c8d --- /dev/null +++ b/qa/tasks/mgr/mgr_test_case.py @@ -0,0 +1,228 @@ +import json +import logging + +from unittest import SkipTest + +from teuthology import misc +from tasks.ceph_test_case import CephTestCase + +# TODO move definition of CephCluster away from the CephFS stuff +from tasks.cephfs.filesystem import CephCluster + + +log = logging.getLogger(__name__) + + +class MgrCluster(CephCluster): + def __init__(self, ctx): + super(MgrCluster, self).__init__(ctx) + self.mgr_ids = list(misc.all_roles_of_type(ctx.cluster, 'mgr')) + + if len(self.mgr_ids) == 0: + raise RuntimeError( + "This task requires at least one manager daemon") + + self.mgr_daemons = dict( + [(mgr_id, self._ctx.daemons.get_daemon('mgr', mgr_id)) for mgr_id + in self.mgr_ids]) + + def mgr_stop(self, mgr_id): + self.mgr_daemons[mgr_id].stop() + + def mgr_fail(self, mgr_id): + self.mon_manager.raw_cluster_cmd("mgr", "fail", mgr_id) + + def mgr_restart(self, mgr_id): + self.mgr_daemons[mgr_id].restart() + + def get_mgr_map(self): + return json.loads( + self.mon_manager.raw_cluster_cmd("mgr", "dump", "--format=json-pretty")) + + def get_registered_clients(self, name, mgr_map = None): + if mgr_map is None: + mgr_map = self.get_mgr_map() + for c in mgr_map['active_clients']: + if c['name'] == name: + return c['addrvec'] + return None + + def get_active_id(self): + return self.get_mgr_map()["active_name"] + + def get_standby_ids(self): + return [s['name'] for s in self.get_mgr_map()["standbys"]] + + def set_module_conf(self, module, key, val): + self.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/{0}/{1}".format( + module, key + ), val) + + def set_module_localized_conf(self, module, mgr_id, key, val, force): + cmd = ["config", "set", "mgr", + "/".join(["mgr", module, mgr_id, key]), + val] + if force: + cmd.append("--force") + self.mon_manager.raw_cluster_cmd(*cmd) + + +class MgrTestCase(CephTestCase): + MGRS_REQUIRED = 1 + + @classmethod + def setup_mgrs(cls): + # Stop all the daemons + for daemon in cls.mgr_cluster.mgr_daemons.values(): + daemon.stop() + + for mgr_id in cls.mgr_cluster.mgr_ids: + cls.mgr_cluster.mgr_fail(mgr_id) + + # Unload all non-default plugins + loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "ls", "--format=json-pretty"))['enabled_modules'] + unload_modules = set(loaded) - {"cephadm", "restful"} + + for m in unload_modules: + cls.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "disable", m) + + # Start all the daemons + for daemon in cls.mgr_cluster.mgr_daemons.values(): + daemon.restart() + + # Wait for an active to come up + cls.wait_until_true(lambda: cls.mgr_cluster.get_active_id() != "", + timeout=20) + + expect_standbys = set(cls.mgr_cluster.mgr_ids) \ + - {cls.mgr_cluster.get_active_id()} + cls.wait_until_true( + lambda: set(cls.mgr_cluster.get_standby_ids()) == expect_standbys, + timeout=20) + + @classmethod + def setUpClass(cls): + # The test runner should have populated this + assert cls.mgr_cluster is not None + + if len(cls.mgr_cluster.mgr_ids) < cls.MGRS_REQUIRED: + raise SkipTest( + "Only have {0} manager daemons, {1} are required".format( + len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED)) + + cls.setup_mgrs() + + @classmethod + def _unload_module(cls, module_name): + def is_disabled(): + enabled_modules = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd( + 'mgr', 'module', 'ls', "--format=json-pretty"))['enabled_modules'] + return module_name not in enabled_modules + + if is_disabled(): + return + + log.debug("Unloading Mgr module %s ...", module_name) + cls.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'module', 'disable', module_name) + cls.wait_until_true(is_disabled, timeout=30) + + @classmethod + def _load_module(cls, module_name): + loaded = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "ls", "--format=json-pretty"))['enabled_modules'] + if module_name in loaded: + # The enable command is idempotent, but our wait for a restart + # isn't, so let's return now if it's already loaded + return + + initial_mgr_map = cls.mgr_cluster.get_mgr_map() + + # check if the the module is configured as an always on module + mgr_daemons = json.loads(cls.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "metadata")) + + for daemon in mgr_daemons: + if daemon["name"] == initial_mgr_map["active_name"]: + ceph_version = daemon["ceph_release"] + always_on = initial_mgr_map["always_on_modules"].get(ceph_version, []) + if module_name in always_on: + return + + log.debug("Loading Mgr module %s ...", module_name) + initial_gid = initial_mgr_map['active_gid'] + cls.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "enable", module_name, "--force") + + # Wait for the module to load + def has_restarted(): + mgr_map = cls.mgr_cluster.get_mgr_map() + done = mgr_map['active_gid'] != initial_gid and mgr_map['available'] + if done: + log.debug("Restarted after module load (new active {0}/{1})".format( + mgr_map['active_name'], mgr_map['active_gid'])) + return done + cls.wait_until_true(has_restarted, timeout=30) + + + @classmethod + def _get_uri(cls, service_name): + # Little dict hack so that I can assign into this from + # the get_or_none function + mgr_map = {'x': None} + + def _get_or_none(): + mgr_map['x'] = cls.mgr_cluster.get_mgr_map() + result = mgr_map['x']['services'].get(service_name, None) + return result + + cls.wait_until_true(lambda: _get_or_none() is not None, 30) + + uri = mgr_map['x']['services'][service_name] + + log.debug("Found {0} at {1} (daemon {2}/{3})".format( + service_name, uri, mgr_map['x']['active_name'], + mgr_map['x']['active_gid'])) + + return uri + + @classmethod + def _assign_ports(cls, module_name, config_name, min_port=7789): + """ + To avoid the need to run lots of hosts in teuthology tests to + get different URLs per mgr, we will hand out different ports + to each mgr here. + + This is already taken care of for us when running in a vstart + environment. + """ + # Start handing out ports well above Ceph's range. + assign_port = min_port + + for mgr_id in cls.mgr_cluster.mgr_ids: + cls.mgr_cluster.mgr_stop(mgr_id) + cls.mgr_cluster.mgr_fail(mgr_id) + + for mgr_id in cls.mgr_cluster.mgr_ids: + log.debug("Using port {0} for {1} on mgr.{2}".format( + assign_port, module_name, mgr_id + )) + cls.mgr_cluster.set_module_localized_conf(module_name, mgr_id, + config_name, + str(assign_port), + force=True) + assign_port += 1 + + for mgr_id in cls.mgr_cluster.mgr_ids: + cls.mgr_cluster.mgr_restart(mgr_id) + + def is_available(): + mgr_map = cls.mgr_cluster.get_mgr_map() + done = mgr_map['available'] + if done: + log.debug("Available after assign ports (new active {0}/{1})".format( + mgr_map['active_name'], mgr_map['active_gid'])) + return done + cls.wait_until_true(is_available, timeout=30) diff --git a/qa/tasks/mgr/test_cache.py b/qa/tasks/mgr/test_cache.py new file mode 100644 index 000000000..71131cbc6 --- /dev/null +++ b/qa/tasks/mgr/test_cache.py @@ -0,0 +1,83 @@ +import json + +from .mgr_test_case import MgrTestCase + +class TestCache(MgrTestCase): + + def setUp(self): + super(TestCache, self).setUp() + self.setup_mgrs() + self._load_module("cli_api") + self.ttl = 10 + self.enable_cache(self.ttl) + + def tearDown(self): + self.disable_cache() + + def get_hit_miss_ratio(self): + perf_dump_command = f"daemon mgr.{self.mgr_cluster.get_active_id()} perf dump" + perf_dump_res = self.cluster_cmd(perf_dump_command) + perf_dump = json.loads(perf_dump_res) + h = perf_dump["mgr"]["cache_hit"] + m = perf_dump["mgr"]["cache_miss"] + return int(h), int(m) + + def enable_cache(self, ttl): + set_ttl = f"config set mgr mgr_ttl_cache_expire_seconds {ttl}" + self.cluster_cmd(set_ttl) + + def disable_cache(self): + set_ttl = "config set mgr mgr_ttl_cache_expire_seconds 0" + self.cluster_cmd(set_ttl) + + + def test_init_cache(self): + get_ttl = "config get mgr mgr_ttl_cache_expire_seconds" + res = self.cluster_cmd(get_ttl) + self.assertEquals(int(res), 10) + + def test_health_not_cached(self): + get_health = "mgr api get health" + + h_start, m_start = self.get_hit_miss_ratio() + self.cluster_cmd(get_health) + h, m = self.get_hit_miss_ratio() + + self.assertEquals(h, h_start) + self.assertEquals(m, m_start) + + def test_osdmap(self): + get_osdmap = "mgr api get osd_map" + + # store in cache + self.cluster_cmd(get_osdmap) + # get from cache + res = self.cluster_cmd(get_osdmap) + osd_map = json.loads(res) + self.assertIn("osds", osd_map) + self.assertGreater(len(osd_map["osds"]), 0) + self.assertIn("epoch", osd_map) + + + + def test_hit_miss_ratio(self): + get_osdmap = "mgr api get osd_map" + + hit_start, miss_start = self.get_hit_miss_ratio() + + def wait_miss(): + self.cluster_cmd(get_osdmap) + _, m = self.get_hit_miss_ratio() + return m == miss_start + 1 + + # Miss, add osd_map to cache + self.wait_until_true(wait_miss, self.ttl + 5) + h, m = self.get_hit_miss_ratio() + self.assertEquals(h, hit_start) + self.assertEquals(m, miss_start+1) + + # Hit, get osd_map from cache + self.cluster_cmd(get_osdmap) + h, m = self.get_hit_miss_ratio() + self.assertEquals(h, hit_start+1) + self.assertEquals(m, miss_start+1) diff --git a/qa/tasks/mgr/test_crash.py b/qa/tasks/mgr/test_crash.py new file mode 100644 index 000000000..49191127f --- /dev/null +++ b/qa/tasks/mgr/test_crash.py @@ -0,0 +1,108 @@ +import json +import logging +import datetime + +from .mgr_test_case import MgrTestCase + + +log = logging.getLogger(__name__) +UUID = 'd5775432-0742-44a3-a435-45095e32e6b1' +DATEFMT = '%Y-%m-%d %H:%M:%S.%f' + + +class TestCrash(MgrTestCase): + + def setUp(self): + super(TestCrash, self).setUp() + self.setup_mgrs() + self._load_module('crash') + + # Whip up some crash data + self.crashes = dict() + now = datetime.datetime.utcnow() + + for i in (0, 1, 3, 4, 8): + timestamp = now - datetime.timedelta(days=i) + timestamp = timestamp.strftime(DATEFMT) + 'Z' + crash_id = '_'.join((timestamp, UUID)).replace(' ', '_') + self.crashes[crash_id] = { + 'crash_id': crash_id, 'timestamp': timestamp, + } + + self.assertEqual( + 0, + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'post', '-i', '-', + stdin=json.dumps(self.crashes[crash_id]), + ) + ) + + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + log.warning("setUp: crash ls returns %s" % retstr) + + self.oldest_crashid = crash_id + + def tearDown(self): + for crash in self.crashes.values(): + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'rm', crash['crash_id'] + ) + + def test_info(self): + for crash in self.crashes.values(): + log.warning('test_info: crash %s' % crash) + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls' + ) + log.warning('ls output: %s' % retstr) + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'info', crash['crash_id'], + ) + log.warning('crash info output: %s' % retstr) + crashinfo = json.loads(retstr) + self.assertIn('crash_id', crashinfo) + self.assertIn('timestamp', crashinfo) + + def test_ls(self): + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + for crash in self.crashes.values(): + self.assertIn(crash['crash_id'], retstr) + + def test_rm(self): + crashid = next(iter(self.crashes.keys())) + self.assertEqual( + 0, + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'rm', crashid, + ) + ) + + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + self.assertNotIn(crashid, retstr) + + def test_stat(self): + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'stat', + ) + self.assertIn('5 crashes recorded', retstr) + self.assertIn('4 older than 1 days old:', retstr) + self.assertIn('3 older than 3 days old:', retstr) + self.assertIn('1 older than 7 days old:', retstr) + + def test_prune(self): + self.assertEqual( + 0, + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'prune', '5' + ) + ) + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + self.assertNotIn(self.oldest_crashid, retstr) diff --git a/qa/tasks/mgr/test_dashboard.py b/qa/tasks/mgr/test_dashboard.py new file mode 100644 index 000000000..c3459ec02 --- /dev/null +++ b/qa/tasks/mgr/test_dashboard.py @@ -0,0 +1,177 @@ +import logging +import ssl + +import requests +from requests.adapters import HTTPAdapter + +from .mgr_test_case import MgrTestCase + +log = logging.getLogger(__name__) + + +class TestDashboard(MgrTestCase): + MGRS_REQUIRED = 3 + + def setUp(self): + super(TestDashboard, self).setUp() + + self._assign_ports("dashboard", "ssl_server_port") + self._load_module("dashboard") + self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard", + "create-self-signed-cert") + + def tearDown(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_behaviour", + "redirect") + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_error_status_code", + "500") + + def wait_until_webserver_available(self, url): + def _check_connection(): + try: + requests.get(url, allow_redirects=False, verify=False) + return True + except requests.ConnectionError: + pass + return False + self.wait_until_true(_check_connection, timeout=30) + + def test_standby(self): + # skip this test if mgr_standby_modules=false + if self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config", "get", "mgr", "mgr_standby_modules").strip() == "false": + log.info("Skipping test_standby since mgr_standby_modules=false") + return + + original_active_id = self.mgr_cluster.get_active_id() + original_uri = self._get_uri("dashboard") + log.info("Originally running manager '{}' at {}".format( + original_active_id, original_uri)) + + # Force a failover and wait until the previously active manager + # is listed as standby. + self.mgr_cluster.mgr_fail(original_active_id) + self.wait_until_true( + lambda: original_active_id in self.mgr_cluster.get_standby_ids(), + timeout=30) + + failed_active_id = self.mgr_cluster.get_active_id() + failed_over_uri = self._get_uri("dashboard") + log.info("After failover running manager '{}' at {}".format( + failed_active_id, failed_over_uri)) + + self.assertNotEqual(original_uri, failed_over_uri) + + # Wait until web server of the standby node is settled. + self.wait_until_webserver_available(original_uri) + + # The original active daemon should have come back up as a standby + # and be doing redirects to the new active daemon. + r = requests.get(original_uri, allow_redirects=False, verify=False) + self.assertEqual(r.status_code, 303) + self.assertEqual(r.headers['Location'], failed_over_uri) + + # Ensure that every URL redirects to the active daemon. + r = requests.get("{}/runtime.js".format(original_uri.strip('/')), + allow_redirects=False, + verify=False) + self.assertEqual(r.status_code, 303) + self.assertEqual(r.headers['Location'], failed_over_uri) + + def test_standby_disable_redirect(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_behaviour", + "error") + + original_active_id = self.mgr_cluster.get_active_id() + original_uri = self._get_uri("dashboard") + log.info("Originally running manager '{}' at {}".format( + original_active_id, original_uri)) + + # Force a failover and wait until the previously active manager + # is listed as standby. + self.mgr_cluster.mgr_fail(original_active_id) + self.wait_until_true( + lambda: original_active_id in self.mgr_cluster.get_standby_ids(), + timeout=30) + + failed_active_id = self.mgr_cluster.get_active_id() + failed_over_uri = self._get_uri("dashboard") + log.info("After failover running manager '{}' at {}".format( + failed_active_id, failed_over_uri)) + + self.assertNotEqual(original_uri, failed_over_uri) + + # Wait until web server of the standby node is settled. + self.wait_until_webserver_available(original_uri) + + # Redirection should be disabled now, instead a 500 must be returned. + r = requests.get(original_uri, allow_redirects=False, verify=False) + self.assertEqual(r.status_code, 500) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("config", "set", "mgr", + "mgr/dashboard/standby_error_status_code", + "503") + + # The customized HTTP status code (503) must be returned. + r = requests.get(original_uri, allow_redirects=False, verify=False) + self.assertEqual(r.status_code, 503) + + def test_urls(self): + base_uri = self._get_uri("dashboard") + + # This is a very simple smoke test to check that the dashboard can + # give us a 200 response to requests. We're not testing that + # the content is correct or even renders! + + urls = [ + "/", + ] + + failures = [] + + for url in urls: + r = requests.get(base_uri + url, allow_redirects=False, + verify=False) + if r.status_code >= 300 and r.status_code < 400: + log.error("Unexpected redirect to: {0} (from {1})".format( + r.headers['Location'], base_uri)) + if r.status_code != 200: + failures.append(url) + + log.info("{0}: {1} ({2} bytes)".format( + url, r.status_code, len(r.content) + )) + + self.assertListEqual(failures, []) + + def test_tls(self): + class CustomHTTPAdapter(HTTPAdapter): + def __init__(self, ssl_version): + self.ssl_version = ssl_version + super().__init__() + + def init_poolmanager(self, *args, **kwargs): + kwargs['ssl_version'] = self.ssl_version + return super().init_poolmanager(*args, **kwargs) + + uri = self._get_uri("dashboard") + + # TLSv1 + with self.assertRaises(requests.exceptions.SSLError): + session = requests.Session() + session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLSv1)) + session.get(uri, allow_redirects=False, verify=False) + + # TLSv1.1 + with self.assertRaises(requests.exceptions.SSLError): + session = requests.Session() + session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLSv1_1)) + session.get(uri, allow_redirects=False, verify=False) + + session = requests.Session() + session.mount(uri, CustomHTTPAdapter(ssl.PROTOCOL_TLS)) + r = session.get(uri, allow_redirects=False, verify=False) + self.assertEqual(r.status_code, 200) diff --git a/qa/tasks/mgr/test_failover.py b/qa/tasks/mgr/test_failover.py new file mode 100644 index 000000000..bfff11262 --- /dev/null +++ b/qa/tasks/mgr/test_failover.py @@ -0,0 +1,182 @@ + +import logging +import json + +from .mgr_test_case import MgrTestCase + + +log = logging.getLogger(__name__) + + +class TestFailover(MgrTestCase): + MGRS_REQUIRED = 2 + + def setUp(self): + super(TestFailover, self).setUp() + self.setup_mgrs() + + def test_timeout(self): + """ + That when an active mgr stops responding, a standby is promoted + after mon_mgr_beacon_grace. + """ + + # Query which mgr is active + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + # Stop that daemon + self.mgr_cluster.mgr_stop(original_active) + + # Assert that the other mgr becomes active + self.wait_until_true( + lambda: self.mgr_cluster.get_active_id() in original_standbys, + timeout=60 + ) + + self.mgr_cluster.mgr_restart(original_active) + self.wait_until_true( + lambda: original_active in self.mgr_cluster.get_standby_ids(), + timeout=10 + ) + + def test_timeout_nostandby(self): + """ + That when an active mgr stop responding, and no standby is + available, the active mgr is removed from the map anyway. + """ + # Query which mgr is active + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + for s in original_standbys: + self.mgr_cluster.mgr_stop(s) + self.mgr_cluster.mgr_fail(s) + + self.assertListEqual(self.mgr_cluster.get_standby_ids(), []) + self.assertEqual(self.mgr_cluster.get_active_id(), original_active) + + grace = int(self.mgr_cluster.get_config("mon_mgr_beacon_grace")) + log.info("Should time out in about {0} seconds".format(grace)) + + self.mgr_cluster.mgr_stop(original_active) + + # Now wait for the mon to notice the mgr is gone and remove it + # from the map. + self.wait_until_equal( + lambda: self.mgr_cluster.get_active_id(), + "", + timeout=grace * 2 + ) + + self.assertListEqual(self.mgr_cluster.get_standby_ids(), []) + self.assertEqual(self.mgr_cluster.get_active_id(), "") + + def test_explicit_fail(self): + """ + That when a user explicitly fails a daemon, a standby immediately + replaces it. + :return: + """ + # Query which mgr is active + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + self.mgr_cluster.mgr_fail(original_active) + + # A standby should take over + self.wait_until_true( + lambda: self.mgr_cluster.get_active_id() in original_standbys, + timeout=60 + ) + + # The one we failed should come back as a standby (he isn't + # really dead) + self.wait_until_true( + lambda: original_active in self.mgr_cluster.get_standby_ids(), + timeout=10 + ) + + # Both daemons should have fully populated metadata + # (regression test for http://tracker.ceph.com/issues/21260) + meta = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "metadata")) + id_to_meta = dict([(i['name'], i) for i in meta]) + for i in [original_active] + original_standbys: + self.assertIn(i, id_to_meta) + self.assertIn('ceph_version', id_to_meta[i]) + + # We should be able to fail back over again: the exercises + # our re-initialization of the python runtime within + # a single process lifetime. + + # Get rid of any bystander standbys so that the original_active + # will be selected as next active. + new_active = self.mgr_cluster.get_active_id() + for daemon in original_standbys: + if daemon != new_active: + self.mgr_cluster.mgr_stop(daemon) + self.mgr_cluster.mgr_fail(daemon) + + self.assertListEqual(self.mgr_cluster.get_standby_ids(), + [original_active]) + + self.mgr_cluster.mgr_stop(new_active) + self.mgr_cluster.mgr_fail(new_active) + + self.assertEqual(self.mgr_cluster.get_active_id(), original_active) + self.assertEqual(self.mgr_cluster.get_standby_ids(), []) + + def test_standby_timeout(self): + """ + That when a standby daemon stops sending beacons, it is + removed from the list of standbys + :return: + """ + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + victim = original_standbys[0] + self.mgr_cluster.mgr_stop(victim) + + expect_standbys = set(original_standbys) - {victim} + + self.wait_until_true( + lambda: set(self.mgr_cluster.get_standby_ids()) == expect_standbys, + timeout=60 + ) + self.assertEqual(self.mgr_cluster.get_active_id(), original_active) + +class TestLibCephSQLiteFailover(MgrTestCase): + MGRS_REQUIRED = 1 + + def setUp(self): + super(TestLibCephSQLiteFailover, self).setUp() + self.setup_mgrs() + + def get_libcephsqlite(self): + mgr_map = self.mgr_cluster.get_mgr_map() + addresses = self.mgr_cluster.get_registered_clients('libcephsqlite', mgr_map=mgr_map) + self.assertEqual(len(addresses), 1) + return addresses[0] + + def test_maybe_reonnect(self): + """ + That the devicehealth module can recover after losing its libcephsqlite lock. + """ + + # make sure the database is populated and loaded by the module + self.mgr_cluster.mon_manager.ceph("device scrape-health-metrics") + + oldaddr = self.get_libcephsqlite() + self.mgr_cluster.mon_manager.ceph(f"osd blocklist add {oldaddr['addr']}/{oldaddr['nonce']}") + + def test(): + self.mgr_cluster.mon_manager.ceph("device scrape-health-metrics") + newaddr = self.get_libcephsqlite() + return oldaddr != newaddr + + self.wait_until_true( + test, + timeout=30 + ) diff --git a/qa/tasks/mgr/test_insights.py b/qa/tasks/mgr/test_insights.py new file mode 100644 index 000000000..aa2548881 --- /dev/null +++ b/qa/tasks/mgr/test_insights.py @@ -0,0 +1,192 @@ +import logging +import json +import datetime +import time + +from .mgr_test_case import MgrTestCase + + +log = logging.getLogger(__name__) +UUID = 'd5775432-0742-44a3-a435-45095e32e6b2' +DATEFMT = '%Y-%m-%d %H:%M:%S.%f' + +class TestInsights(MgrTestCase): + def setUp(self): + super(TestInsights, self).setUp() + self.setup_mgrs() + self._load_module("insights") + self._load_module("selftest") + self.crash_ids = [] + + def tearDown(self): + self._clear_crashes() + + def _insights(self): + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd("insights") + return json.loads(retstr) + + def _add_crash(self, hours, make_invalid = False): + now = datetime.datetime.utcnow() + timestamp = now - datetime.timedelta(hours = hours) + timestamp = timestamp.strftime(DATEFMT) + 'Z' + crash_id = '_'.join((timestamp, UUID)).replace(' ', '_') + crash = { + 'crash_id': crash_id, + 'timestamp': timestamp, + } + if make_invalid: + crash["timestamp"] = "not a timestamp" + + ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'post', '-i', '-', + stdin=json.dumps(crash) + ) + self.crash_ids.append(crash_id) + self.assertEqual(0, ret) + + def _clear_crashes(self): + for crash_id in self.crash_ids: + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'rm', crash_id + ) + + def _wait_for_health_history_checks(self, *args): + """Wait for a set of health checks to appear in the health history""" + timeout = datetime.datetime.utcnow() + \ + datetime.timedelta(seconds = 15) + while True: + report = self._insights() + missing = False + for check in args: + if check not in report["health"]["history"]["checks"]: + missing = True + break + if not missing: + return + self.assertGreater(timeout, + datetime.datetime.utcnow()) + time.sleep(0.25) + + def _wait_for_curr_health_cleared(self, check): + timeout = datetime.datetime.utcnow() + \ + datetime.timedelta(seconds = 15) + while True: + report = self._insights() + if check not in report["health"]["current"]["checks"]: + return + self.assertGreater(timeout, + datetime.datetime.utcnow()) + time.sleep(0.25) + + def test_health_history(self): + # use empty health history as starting point + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "insights", "prune-health", "0") + report = self._insights() + self.assertFalse(report["health"]["history"]["checks"]) + + # generate health check history entries. we want to avoid the edge case + # of running these tests at _exactly_ the top of the hour so we can + # explicitly control when hourly work occurs. for this we use the + # current time offset to a half hour. + now = datetime.datetime.utcnow() + now = datetime.datetime( + year = now.year, + month = now.month, + day = now.day, + hour = now.hour, + minute = 30) + + check_names = set() + for hours in [-18, -11, -5, -1, 0]: + # change the insight module's perception of "now" ... + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "mgr", "self-test", "insights_set_now_offset", str(hours)) + + # ... to simulate health check arrivals in the past + unique_check_name = "insights_health_check_{}".format(hours) + health_check = { + unique_check_name: { + "severity": "warning", + "summary": "summary", + "detail": ["detail"] + } + } + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "mgr", "self-test", "health", "set", + json.dumps(health_check)) + + check_names.add(unique_check_name) + + # and also set the same health check to test deduplication + dupe_check_name = "insights_health_check" + health_check = { + dupe_check_name: { + "severity": "warning", + "summary": "summary", + "detail": ["detail"] + } + } + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "mgr", "self-test", "health", "set", + json.dumps(health_check)) + + check_names.add(dupe_check_name) + + # wait for the health check to show up in the history report + self._wait_for_health_history_checks(unique_check_name, dupe_check_name) + + # clear out the current health checks before moving on + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "mgr", "self-test", "health", "clear") + self._wait_for_curr_health_cleared(unique_check_name) + + report = self._insights() + for check in check_names: + self.assertIn(check, report["health"]["history"]["checks"]) + + # restart the manager + active_id = self.mgr_cluster.get_active_id() + self.mgr_cluster.mgr_restart(active_id) + + # pruning really removes history + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + "insights", "prune-health", "0") + report = self._insights() + self.assertFalse(report["health"]["history"]["checks"]) + + def test_schema(self): + """TODO: assert conformance to a full schema specification?""" + report = self._insights() + for key in ["osd_metadata", + "pg_summary", + "mon_status", + "manager_map", + "service_map", + "mon_map", + "crush_map", + "fs_map", + "osd_tree", + "df", + "osd_dump", + "config", + "health", + "crashes", + "version", + "errors"]: + self.assertIn(key, report) + + def test_crash_history(self): + self._clear_crashes() + report = self._insights() + self.assertFalse(report["crashes"]["summary"]) + self.assertFalse(report["errors"]) + + # crashes show up in the report + self._add_crash(1) + report = self._insights() + self.assertTrue(report["crashes"]["summary"]) + self.assertFalse(report["errors"]) + log.warning("{}".format(json.dumps(report["crashes"], indent=2))) + + self._clear_crashes() diff --git a/qa/tasks/mgr/test_module_selftest.py b/qa/tasks/mgr/test_module_selftest.py new file mode 100644 index 000000000..7ac296037 --- /dev/null +++ b/qa/tasks/mgr/test_module_selftest.py @@ -0,0 +1,254 @@ + +import time +import requests +import errno +import logging + +from teuthology.exceptions import CommandFailedError + +from .mgr_test_case import MgrTestCase + + +log = logging.getLogger(__name__) + + +class TestModuleSelftest(MgrTestCase): + """ + That modules with a self-test command can be loaded and execute it + without errors. + + This is not a substitute for really testing the modules, but it + is quick and is designed to catch regressions that could occur + if data structures change in a way that breaks how the modules + touch them. + """ + MGRS_REQUIRED = 1 + + def setUp(self): + super(TestModuleSelftest, self).setUp() + self.setup_mgrs() + + def _selftest_plugin(self, module_name): + self._load_module("selftest") + self._load_module(module_name) + + # Execute the module's self_test() method + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "module", module_name) + + def test_zabbix(self): + # Set these mandatory config fields so that the zabbix module + # won't trigger health/log errors on load/serve. + self.mgr_cluster.set_module_conf("zabbix", "zabbix_host", "localhost") + self.mgr_cluster.set_module_conf("zabbix", "identifier", "foo") + self._selftest_plugin("zabbix") + + def test_prometheus(self): + self._assign_ports("prometheus", "server_port", min_port=8100) + self._selftest_plugin("prometheus") + + def test_influx(self): + self._selftest_plugin("influx") + + def test_diskprediction_local(self): + self._load_module("selftest") + python_version = self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "python-version") + if tuple(int(v) for v in python_version.split('.')) == (3, 8): + # https://tracker.ceph.com/issues/45147 + self.skipTest(f'python {python_version} not compatible with ' + 'diskprediction_local') + self._selftest_plugin("diskprediction_local") + + def test_telegraf(self): + self._selftest_plugin("telegraf") + + def test_iostat(self): + self._selftest_plugin("iostat") + + def test_devicehealth(self): + self._selftest_plugin("devicehealth") + + def test_selftest_run(self): + self._load_module("selftest") + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", "run") + + def test_telemetry(self): + self._selftest_plugin("telemetry") + + def test_crash(self): + self._selftest_plugin("crash") + + def test_orchestrator(self): + self._selftest_plugin("orchestrator") + + + def test_selftest_config_update(self): + """ + That configuration updates are seen by running mgr modules + """ + self._load_module("selftest") + + def get_value(): + return self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "config", "get", "testkey").strip() + + self.assertEqual(get_value(), "None") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config", "set", "mgr", "mgr/selftest/testkey", "foo") + self.wait_until_equal(get_value, "foo", timeout=10) + + def get_localized_value(): + return self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "config", "get_localized", "testkey").strip() + + self.assertEqual(get_localized_value(), "foo") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "config", "set", "mgr", "mgr/selftest/{}/testkey".format( + self.mgr_cluster.get_active_id()), + "bar") + self.wait_until_equal(get_localized_value, "bar", timeout=10) + + + def test_selftest_command_spam(self): + # Use the selftest module to stress the mgr daemon + self._load_module("selftest") + + # Use the dashboard to test that the mgr is still able to do its job + self._assign_ports("dashboard", "ssl_server_port") + self._load_module("dashboard") + self.mgr_cluster.mon_manager.raw_cluster_cmd("dashboard", + "create-self-signed-cert") + + original_active = self.mgr_cluster.get_active_id() + original_standbys = self.mgr_cluster.get_standby_ids() + + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", + "background", "start", + "command_spam") + + dashboard_uri = self._get_uri("dashboard") + + delay = 10 + periods = 10 + for i in range(0, periods): + t1 = time.time() + # Check that an HTTP module remains responsive + r = requests.get(dashboard_uri, verify=False) + self.assertEqual(r.status_code, 200) + + # Check that a native non-module command remains responsive + self.mgr_cluster.mon_manager.raw_cluster_cmd("osd", "df") + + time.sleep(delay - (time.time() - t1)) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", + "background", "stop") + + # Check that all mgr daemons are still running + self.assertEqual(original_active, self.mgr_cluster.get_active_id()) + self.assertEqual(original_standbys, self.mgr_cluster.get_standby_ids()) + + def test_module_commands(self): + """ + That module-handled commands have appropriate behavior on + disabled/failed/recently-enabled modules. + """ + + # Calling a command on a disabled module should return the proper + # error code. + self._load_module("selftest") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "disable", "selftest") + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "run") + + self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP) + + # Calling a command that really doesn't exist should give me EINVAL. + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "osd", "albatross") + + self.assertEqual(exc_raised.exception.exitstatus, errno.EINVAL) + + # Enabling a module and then immediately using ones of its commands + # should work (#21683) + self._load_module("selftest") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "config", "get", "testkey") + + # Calling a command for a failed module should return the proper + # error code. + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "background", "start", "throw_exception") + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "run" + ) + self.assertEqual(exc_raised.exception.exitstatus, errno.EIO) + + # A health alert should be raised for a module that has thrown + # an exception from its serve() method + self.wait_for_health( + "Module 'selftest' has failed: Synthetic exception in serve", + timeout=30) + # prune the crash reports, so that the health report is back to + # clean + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "crash", "prune", "0") + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "module", "disable", "selftest") + + self.wait_for_health_clear(timeout=30) + + def test_module_remote(self): + """ + Use the selftest module to exercise inter-module communication + """ + self._load_module("selftest") + # The "self-test remote" operation just happens to call into + # influx. + self._load_module("influx") + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "remote") + + def test_selftest_cluster_log(self): + """ + Use the selftest module to test the cluster/audit log interface. + """ + priority_map = { + "info": "INF", + "security": "SEC", + "warning": "WRN", + "error": "ERR" + } + self._load_module("selftest") + for priority in priority_map.keys(): + message = "foo bar {}".format(priority) + log_message = "[{}] {}".format(priority_map[priority], message) + # Check for cluster/audit logs: + # 2018-09-24 09:37:10.977858 mgr.x [INF] foo bar info + # 2018-09-24 09:37:10.977860 mgr.x [SEC] foo bar security + # 2018-09-24 09:37:10.977863 mgr.x [WRN] foo bar warning + # 2018-09-24 09:37:10.977866 mgr.x [ERR] foo bar error + with self.assert_cluster_log(log_message): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "cluster-log", "cluster", + priority, message) + with self.assert_cluster_log(log_message, watch_channel="audit"): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "cluster-log", "audit", + priority, message) + + def test_selftest_cluster_log_unknown_channel(self): + """ + Use the selftest module to test the cluster/audit log interface. + """ + with self.assertRaises(CommandFailedError) as exc_raised: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + "mgr", "self-test", "cluster-log", "xyz", + "ERR", "The channel does not exist") + self.assertEqual(exc_raised.exception.exitstatus, errno.EOPNOTSUPP) diff --git a/qa/tasks/mgr/test_orchestrator_cli.py b/qa/tasks/mgr/test_orchestrator_cli.py new file mode 100644 index 000000000..3fccef9a6 --- /dev/null +++ b/qa/tasks/mgr/test_orchestrator_cli.py @@ -0,0 +1,250 @@ +import errno +import json +import logging + + +from .mgr_test_case import MgrTestCase + + +log = logging.getLogger(__name__) + + +class TestOrchestratorCli(MgrTestCase): + MGRS_REQUIRED = 1 + + def _cmd(self, module, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd(module, *args) + + def _orch_cmd(self, *args): + return self._cmd("orch", *args) + + def _progress_cmd(self, *args): + return self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", *args) + + def _orch_cmd_result(self, *args, **kwargs): + """ + raw_cluster_cmd doesn't support kwargs. + """ + return self.mgr_cluster.mon_manager.raw_cluster_cmd_result("orch", *args, **kwargs) + + def _test_orchestrator_cmd_result(self, *args, **kwargs): + return self.mgr_cluster.mon_manager.raw_cluster_cmd_result("test_orchestrator", *args, **kwargs) + + def setUp(self): + super(TestOrchestratorCli, self).setUp() + + self._load_module("orchestrator") + self._load_module("test_orchestrator") + self._orch_cmd("set", "backend", "test_orchestrator") + + def test_status(self): + ret = self._orch_cmd("status") + self.assertIn("test_orchestrator", ret) + + def test_device_ls(self): + ret = self._orch_cmd("device", "ls") + self.assertIn("localhost", ret) + + def test_device_ls_refresh(self): + ret = self._orch_cmd("device", "ls", "--refresh") + self.assertIn("localhost", ret) + + def test_device_ls_hoshs(self): + ret = self._orch_cmd("device", "ls", "localhost", "host1") + self.assertIn("localhost", ret) + + + def test_device_ls_json(self): + ret = self._orch_cmd("device", "ls", "--format", "json") + self.assertIn("localhost", ret) + self.assertIsInstance(json.loads(ret), list) + + def test_ps(self): + ret = self._orch_cmd("ps") + self.assertIn("mgr", ret) + + def test_ps_json(self): + ret = self._orch_cmd("ps", "--format", "json") + self.assertIsInstance(json.loads(ret), list) + self.assertIn("mgr", ret) + + + def test_service_action(self): + self._orch_cmd("restart", "mds.cephfs") + self._orch_cmd("stop", "mds.cephfs") + self._orch_cmd("start", "mds.cephfs") + + def test_service_instance_action(self): + self._orch_cmd("daemon", "restart", "mds.a") + self._orch_cmd("daemon", "stop", "mds.a") + self._orch_cmd("daemon", "start", "mds.a") + + def test_osd_create(self): + drive_group = """ +service_type: osd +service_id: any.sda +placement: + host_pattern: '*' +data_devices: + all: True +""" + res = self._orch_cmd_result("apply", "osd", "-i", "-", + stdin=drive_group) + self.assertEqual(res, 0) + + def test_blink_device_light(self): + def _ls_lights(what): + return json.loads(self._cmd("device", "ls-lights"))[what] + + metadata = json.loads(self._cmd("osd", "metadata")) + dev_name_ids = [osd["device_ids"] for osd in metadata] + _, dev_id = [d.split('=') for d in dev_name_ids if len(d.split('=')) == 2][0] + + for t in ["ident", "fault"]: + self.assertNotIn(dev_id, _ls_lights(t)) + self._cmd("device", "light", "on", dev_id, t) + self.assertIn(dev_id, _ls_lights(t)) + + health = { + 'ident': 'DEVICE_IDENT_ON', + 'fault': 'DEVICE_FAULT_ON', + }[t] + self.wait_for_health(health, 30) + + self._cmd("device", "light", "off", dev_id, t) + self.assertNotIn(dev_id, _ls_lights(t)) + + self.wait_for_health_clear(30) + + def test_mds_add(self): + self._orch_cmd('daemon', 'add', 'mds', 'fsname') + + def test_rgw_add(self): + self._orch_cmd('daemon', 'add', 'rgw', 'realm', 'zone') + + def test_nfs_add(self): + self._orch_cmd('daemon', 'add', "nfs", "service_name") + + def test_osd_rm(self): + self._orch_cmd('daemon', "rm", "osd.0", '--force') + + def test_mds_rm(self): + self._orch_cmd("daemon", "rm", "mds.fsname") + + def test_rgw_rm(self): + self._orch_cmd("daemon", "rm", "rgw.myrealm.myzone") + + def test_nfs_rm(self): + self._orch_cmd("daemon", "rm", "nfs.service_name") + + def test_host_ls(self): + out = self._orch_cmd("host", "ls", "--format=json") + hosts = json.loads(out) + self.assertEqual(len(hosts), 1) + self.assertEqual(hosts[0]["hostname"], "localhost") + + def test_host_add(self): + self._orch_cmd("host", "add", "hostname") + + def test_host_rm(self): + self._orch_cmd("host", "rm", "hostname") + + def test_mon_update(self): + self._orch_cmd("apply", "mon", "3 host1:1.2.3.0/24 host2:1.2.3.0/24 host3:10.0.0.0/8") + self._orch_cmd("apply", "mon", "3 host1:1.2.3.4 host2:1.2.3.4 host3:10.0.0.1") + + def test_mgr_update(self): + self._orch_cmd("apply", "mgr", "3") + + def test_nfs_update(self): + self._orch_cmd("apply", "nfs", "service_name", "2") + + def test_error(self): + ret = self._orch_cmd_result("host", "add", "raise_validation_error") + self.assertEqual(ret, errno.EINVAL) + ret = self._orch_cmd_result("host", "add", "raise_error") + self.assertEqual(ret, errno.EINVAL) + ret = self._orch_cmd_result("host", "add", "raise_bug") + self.assertEqual(ret, errno.EINVAL) + ret = self._orch_cmd_result("host", "add", "raise_not_implemented") + self.assertEqual(ret, errno.ENOENT) + ret = self._orch_cmd_result("host", "add", "raise_no_orchestrator") + self.assertEqual(ret, errno.ENOENT) + ret = self._orch_cmd_result("host", "add", "raise_import_error") + self.assertEqual(ret, errno.ENOENT) + + def test_load_data(self): + data = { + 'inventory': [ + { + 'name': 'host0', + 'devices': [ + { + 'type': 'hdd', + 'id': '/dev/sda', + 'size': 1024**4 * 4, + 'rotates': True + } + ] + }, + { + 'name': 'host1', + 'devices': [ + { + 'type': 'hdd', + 'id': '/dev/sda', + 'size': 1024**4 * 4, + 'rotates': True + } + ] + } + ], + 'daemons': [ + { + 'hostname': 'host0', + 'daemon_type': 'mon', + 'daemon_id': 'a' + }, + { + 'hostname': 'host1', + 'daemon_type': 'osd', + 'daemon_id': '1' + } + ] + } + + ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin=json.dumps(data)) + self.assertEqual(ret, 0) + out = self._orch_cmd('device', 'ls', '--format=json') + inventory = data['inventory'] + inventory_result = json.loads(out) + self.assertEqual(len(inventory), len(inventory_result)) + + out = self._orch_cmd('device', 'ls', 'host0', '--format=json') + inventory_result = json.loads(out) + self.assertEqual(len(inventory_result), 1) + self.assertEqual(inventory_result[0]['name'], 'host0') + + out = self._orch_cmd('ps', '--format=json') + daemons = data['daemons'] + daemons_result = json.loads(out) + self.assertEqual(len(daemons), len(daemons_result)) + + out = self._orch_cmd('ps', 'host0', '--format=json') + daemons_result = json.loads(out) + self.assertEqual(len(daemons_result), 1) + self.assertEqual(daemons_result[0]['hostname'], 'host0') + + # test invalid input file: invalid json + json_str = '{ "inventory: ' + ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin=json_str) + self.assertEqual(ret, errno.EINVAL) + + # test invalid input file: missing key + json_str = '{ "inventory": [{"devices": []}] }' + ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin=json_str) + self.assertEqual(ret, errno.EINVAL) + + # load empty data for other tests + ret = self._test_orchestrator_cmd_result('load_data', '-i', '-', stdin='{}') + self.assertEqual(ret, 0) diff --git a/qa/tasks/mgr/test_progress.py b/qa/tasks/mgr/test_progress.py new file mode 100644 index 000000000..a80600c6a --- /dev/null +++ b/qa/tasks/mgr/test_progress.py @@ -0,0 +1,423 @@ + +import json +import logging +import time +from .mgr_test_case import MgrTestCase +from contextlib import contextmanager + +log = logging.getLogger(__name__) + + +class TestProgress(MgrTestCase): + POOL = "progress_data" + + # How long we expect to wait at most between taking an OSD out + # and seeing the progress event pop up. + EVENT_CREATION_PERIOD = 60 + + WRITE_PERIOD = 30 + + # Generous period for OSD recovery, should be same order of magnitude + # to how long it took to write the data to begin with + RECOVERY_PERIOD = WRITE_PERIOD * 4 + + def _get_progress(self): + out = self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "json") + return json.loads(out) + + def _all_events(self): + """ + To avoid racing on completion, we almost always want to look + for events in the total list of active and complete, so + munge them into a single list. + """ + p = self._get_progress() + log.info(json.dumps(p, indent=2)) + return p['events'] + p['completed'] + + def _events_in_progress(self): + """ + this function returns all events that are in progress + """ + p = self._get_progress() + log.info(json.dumps(p, indent=2)) + return p['events'] + + def _completed_events(self): + """ + This function returns all events that are completed + """ + p = self._get_progress() + log.info(json.dumps(p, indent=2)) + return p['completed'] + + def is_osd_marked_out(self, ev): + return ev['message'].endswith('marked out') + + def is_osd_marked_in(self, ev): + return ev['message'].endswith('marked in') + + def _get_osd_in_out_events(self, marked='both'): + """ + Return the event that deals with OSDs being + marked in, out or both + """ + + marked_in_events = [] + marked_out_events = [] + + events_in_progress = self._events_in_progress() + for ev in events_in_progress: + if self.is_osd_marked_out(ev): + marked_out_events.append(ev) + elif self.is_osd_marked_in(ev): + marked_in_events.append(ev) + + if marked == 'both': + return [marked_in_events] + [marked_out_events] + elif marked == 'in': + return marked_in_events + else: + return marked_out_events + + def _osd_in_out_events_count(self, marked='both'): + """ + Count the number of on going recovery events that deals with + OSDs being marked in, out or both. + """ + events_in_progress = self._events_in_progress() + marked_in_count = 0 + marked_out_count = 0 + + for ev in events_in_progress: + if self.is_osd_marked_out(ev): + marked_out_count += 1 + elif self.is_osd_marked_in(ev): + marked_in_count += 1 + + if marked == 'both': + return marked_in_count + marked_out_count + elif marked == 'in': + return marked_in_count + else: + return marked_out_count + + def _setup_pool(self, size=None): + self.mgr_cluster.mon_manager.create_pool(self.POOL) + if size is not None: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'pool', 'set', self.POOL, 'size', str(size)) + + def _osd_in_out_completed_events_count(self, marked='both'): + """ + Count the number of completed recovery events that deals with + OSDs being marked in, out, or both. + """ + + completed_events = self._completed_events() + marked_in_count = 0 + marked_out_count = 0 + + for ev in completed_events: + if self.is_osd_marked_out(ev): + marked_out_count += 1 + elif self.is_osd_marked_in(ev): + marked_in_count += 1 + + if marked == 'both': + return marked_in_count + marked_out_count + elif marked == 'in': + return marked_in_count + else: + return marked_out_count + + def _write_some_data(self, t): + """ + To adapt to test systems of varying performance, we write + data for a defined time period, rather than to a defined + capacity. This will hopefully result in a similar timescale + for PG recovery after an OSD failure. + """ + + args = [ + "rados", "-p", self.POOL, "bench", str(t), "write", "-t", "16"] + + self.mgr_cluster.admin_remote.run(args=args, wait=True) + + def _osd_count(self): + osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json() + return len(osd_map['osds']) + + @contextmanager + def recovery_backfill_disabled(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'set', 'nobackfill') + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'set', 'norecover') + yield + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'unset', 'nobackfill') + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'unset', 'norecover') + + def setUp(self): + super(TestProgress, self).setUp() + # Ensure we have at least four OSDs + if self._osd_count() < 4: + self.skipTest("Not enough OSDS!") + + # Remove any filesystems so that we can remove their pools + if self.mds_cluster: + self.mds_cluster.mds_stop() + self.mds_cluster.mds_fail() + self.mds_cluster.delete_all_filesystems() + + # Remove all other pools + for pool in self.mgr_cluster.mon_manager.get_osd_dump_json()['pools']: + self.mgr_cluster.mon_manager.remove_pool(pool['pool_name']) + + self._load_module("progress") + self.mgr_cluster.mon_manager.raw_cluster_cmd('progress', 'clear') + + def _simulate_failure(self, osd_ids=None): + """ + Common lead-in to several tests: get some data in the cluster, + then mark an OSD out to trigger the start of a progress event. + + Return the JSON representation of the failure event. + """ + + if osd_ids is None: + osd_ids = [0] + + self._setup_pool() + self._write_some_data(self.WRITE_PERIOD) + with self.recovery_backfill_disabled(): + for osd_id in osd_ids: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'out', str(osd_id)) + + # Wait for a progress event to pop up + self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1, + timeout=self.EVENT_CREATION_PERIOD, + period=1) + + ev = self._get_osd_in_out_events('out')[0] + log.info(json.dumps(ev, indent=1)) + self.assertIn("Rebalancing after osd.0 marked out", ev['message']) + return ev + + def _simulate_back_in(self, osd_ids, initial_event): + for osd_id in osd_ids: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'in', str(osd_id)) + + # First Event should complete promptly + self.wait_until_true(lambda: self._is_complete(initial_event['id']), + timeout=self.RECOVERY_PERIOD) + + with self.recovery_backfill_disabled(): + + try: + # Wait for progress event marked in to pop up + self.wait_until_equal(lambda: self._osd_in_out_events_count('in'), 1, + timeout=self.EVENT_CREATION_PERIOD, + period=1) + except RuntimeError as ex: + if not "Timed out after" in str(ex): + raise ex + + log.info("There was no PGs affected by osd being marked in") + return None + + new_event = self._get_osd_in_out_events('in')[0] + return new_event + + def _no_events_anywhere(self): + """ + Whether there are any live or completed events + """ + p = self._get_progress() + total_events = len(p['events']) + len(p['completed']) + return total_events == 0 + + def _is_quiet(self): + """ + Whether any progress events are live. + """ + return len(self._get_progress()['events']) == 0 + + def _is_complete(self, ev_id): + progress = self._get_progress() + live_ids = [ev['id'] for ev in progress['events']] + complete_ids = [ev['id'] for ev in progress['completed']] + if ev_id in complete_ids: + assert ev_id not in live_ids + return True + else: + assert ev_id in live_ids + return False + + def _is_inprogress_or_complete(self, ev_id): + for ev in self._events_in_progress(): + if ev['id'] == ev_id: + return ev['progress'] > 0 + # check if the event completed + return self._is_complete(ev_id) + + def tearDown(self): + if self.POOL in self.mgr_cluster.mon_manager.pools: + self.mgr_cluster.mon_manager.remove_pool(self.POOL) + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'unset', 'nobackfill') + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'unset', 'norecover') + + osd_map = self.mgr_cluster.mon_manager.get_osd_dump_json() + for osd in osd_map['osds']: + if osd['weight'] == 0.0: + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'in', str(osd['osd'])) + + # Unset allow_pg_recovery_event in case it's set to true + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'config', 'set', 'mgr', + 'mgr/progress/allow_pg_recovery_event', 'false') + + super(TestProgress, self).tearDown() + + def test_osd_healthy_recovery(self): + """ + The simple recovery case: an OSD goes down, its PGs get a new + placement, and we wait for the PG to get healthy in its new + locations. + """ + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'config', 'set', 'mgr', + 'mgr/progress/allow_pg_recovery_event', 'true') + + ev = self._simulate_failure() + + # Wait for progress event to ultimately reach completion + self.wait_until_true(lambda: self._is_complete(ev['id']), + timeout=self.RECOVERY_PERIOD) + self.assertEqual(self._osd_in_out_events_count(), 0) + + def test_pool_removal(self): + """ + That a pool removed during OSD recovery causes the + progress event to be correctly marked complete once there + is no more data to move. + """ + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'config', 'set', 'mgr', + 'mgr/progress/allow_pg_recovery_event', 'true') + + ev = self._simulate_failure() + + self.mgr_cluster.mon_manager.remove_pool(self.POOL) + + # Event should complete promptly + self.wait_until_true(lambda: self._is_complete(ev['id']), + timeout=self.RECOVERY_PERIOD) + self.assertEqual(self._osd_in_out_events_count(), 0) + + def test_osd_came_back(self): + """ + When a recovery is underway, but then the out OSD + comes back in, such that recovery is no longer necessary. + It should create another event for when osd is marked in + and cancel the one that is still ongoing. + """ + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'config', 'set', 'mgr', + 'mgr/progress/allow_pg_recovery_event', 'true') + + ev1 = self._simulate_failure() + + ev2 = self._simulate_back_in([0], ev1) + + if ev2 is not None: + # Wait for progress event to ultimately complete + self.wait_until_true(lambda: self._is_complete(ev2['id']), + timeout=self.RECOVERY_PERIOD) + + self.assertEqual(self._osd_in_out_events_count(), 0) + + def test_turn_off_module(self): + """ + When the the module is turned off, there should not + be any on going events or completed events. + Also module should not accept any kind of Remote Event + coming in from other module, however, once it is turned + back, on creating an event should be working as it is. + """ + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'config', 'set', 'mgr', + 'mgr/progress/allow_pg_recovery_event', 'true') + + pool_size = 3 + self._setup_pool(size=pool_size) + self._write_some_data(self.WRITE_PERIOD) + self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "off") + + with self.recovery_backfill_disabled(): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'out', '0') + + time.sleep(self.EVENT_CREATION_PERIOD/2) + + with self.recovery_backfill_disabled(): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'in', '0') + + time.sleep(self.EVENT_CREATION_PERIOD/2) + + self.assertTrue(self._no_events_anywhere()) + + self.mgr_cluster.mon_manager.raw_cluster_cmd("progress", "on") + + self._write_some_data(self.WRITE_PERIOD) + + with self.recovery_backfill_disabled(): + + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'out', '0') + + # Wait for a progress event to pop up + self.wait_until_equal(lambda: self._osd_in_out_events_count('out'), 1, + timeout=self.EVENT_CREATION_PERIOD, + period=1) + + ev1 = self._get_osd_in_out_events('out')[0] + + log.info(json.dumps(ev1, indent=1)) + + self.wait_until_true(lambda: self._is_complete(ev1['id']), + check_fn=lambda: self._is_inprogress_or_complete(ev1['id']), + timeout=self.RECOVERY_PERIOD) + self.assertTrue(self._is_quiet()) + + def test_default_progress_test(self): + """ + progress module disabled the event of pg recovery event + by default, we test this to see if this holds true + """ + pool_size = 3 + self._setup_pool(size=pool_size) + self._write_some_data(self.WRITE_PERIOD) + + with self.recovery_backfill_disabled(): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'out', '0') + + time.sleep(self.EVENT_CREATION_PERIOD/2) + + with self.recovery_backfill_disabled(): + self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'osd', 'in', '0') + + time.sleep(self.EVENT_CREATION_PERIOD/2) + + self.assertEqual(self._osd_in_out_events_count(), 0) diff --git a/qa/tasks/mgr/test_prometheus.py b/qa/tasks/mgr/test_prometheus.py new file mode 100644 index 000000000..376556ab3 --- /dev/null +++ b/qa/tasks/mgr/test_prometheus.py @@ -0,0 +1,79 @@ +import json +import logging +import requests + +from .mgr_test_case import MgrTestCase + +log = logging.getLogger(__name__) + + +class TestPrometheus(MgrTestCase): + MGRS_REQUIRED = 3 + + def setUp(self): + super(TestPrometheus, self).setUp() + self.setup_mgrs() + + def test_file_sd_command(self): + self._assign_ports("prometheus", "server_port") + self._load_module("prometheus") + + result = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd( + "prometheus", "file_sd_config")) + mgr_map = self.mgr_cluster.get_mgr_map() + self.assertEqual(len(result[0]['targets']), len(mgr_map['standbys']) + 1) + + + + def test_standby(self): + self._assign_ports("prometheus", "server_port") + self._load_module("prometheus") + + original_active = self.mgr_cluster.get_active_id() + + original_uri = self._get_uri("prometheus") + log.info("Originally running at {0}".format(original_uri)) + + self.mgr_cluster.mgr_fail(original_active) + + failed_over_uri = self._get_uri("prometheus") + log.info("After failover running at {0}".format(failed_over_uri)) + + self.assertNotEqual(original_uri, failed_over_uri) + + # The original active daemon should have come back up as a standby + # and serve some html under "/" and an empty answer under /metrics + r = requests.get(original_uri, allow_redirects=False) + self.assertEqual(r.status_code, 200) + r = requests.get(original_uri + "metrics", allow_redirects=False) + self.assertEqual(r.status_code, 200) + self.assertEqual(r.headers["content-type"], "text/plain;charset=utf-8") + self.assertEqual(r.headers["server"], "Ceph-Prometheus") + + def test_urls(self): + self._assign_ports("prometheus", "server_port") + self._load_module("prometheus") + + base_uri = self._get_uri("prometheus") + + # This is a very simple smoke test to check that the module can + # give us a 200 response to requests. We're not testing that + # the content is correct or even renders! + + urls = [ + "/", + "/metrics" + ] + + failures = [] + + for url in urls: + r = requests.get(base_uri + url, allow_redirects=False) + if r.status_code != 200: + failures.append(url) + + log.info("{0}: {1} ({2} bytes)".format( + url, r.status_code, len(r.content) + )) + + self.assertListEqual(failures, []) diff --git a/qa/tasks/mon_clock_skew_check.py b/qa/tasks/mon_clock_skew_check.py new file mode 100644 index 000000000..59d4169d1 --- /dev/null +++ b/qa/tasks/mon_clock_skew_check.py @@ -0,0 +1,73 @@ +""" +Handle clock skews in monitors. +""" +import logging +import time +from tasks import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +class ClockSkewCheck: + """ + Check if there are any clock skews among the monitors in the + quorum. + + This task accepts the following options: + + interval amount of seconds to wait before check. (default: 30.0) + expect-skew 'true' or 'false', to indicate whether to expect a skew during + the run or not. If 'true', the test will fail if no skew is + found, and succeed if a skew is indeed found; if 'false', it's + the other way around. (default: false) + + - mon_clock_skew_check: + expect-skew: true + """ + + def __init__(self, ctx, manager, config, logger): + self.ctx = ctx + self.manager = manager + + self.stopping = False + self.logger = logger + self.config = config + + if self.config is None: + self.config = dict() + + +def task(ctx, config): + if config is None: + config = {} + assert isinstance(config, dict), \ + 'mon_clock_skew_check task only accepts a dict for configuration' + interval = float(config.get('interval', 30.0)) + expect_skew = config.get('expect-skew', False) + + log.info('Beginning mon_clock_skew_check...') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + quorum_size = len(teuthology.get_mon_names(ctx)) + manager.wait_for_mon_quorum_size(quorum_size) + + # wait a bit + log.info('sleeping for {s} seconds'.format( + s=interval)) + time.sleep(interval) + + health = manager.get_mon_health(True) + log.info('got health %s' % health) + if expect_skew: + if 'MON_CLOCK_SKEW' not in health['checks']: + raise RuntimeError('expected MON_CLOCK_SKEW but got none') + else: + if 'MON_CLOCK_SKEW' in health['checks']: + raise RuntimeError('got MON_CLOCK_SKEW but expected none') + diff --git a/qa/tasks/mon_recovery.py b/qa/tasks/mon_recovery.py new file mode 100644 index 000000000..fa7aa1a8d --- /dev/null +++ b/qa/tasks/mon_recovery.py @@ -0,0 +1,80 @@ +""" +Monitor recovery +""" +import logging +from tasks import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test monitor recovery. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + mons = [f.split('.')[1] for f in teuthology.get_mon_names(ctx)] + log.info("mon ids = %s" % mons) + + manager.wait_for_mon_quorum_size(len(mons)) + + log.info('verifying all monitors are in the quorum') + for m in mons: + s = manager.get_mon_status(m) + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons) + + log.info('restarting each monitor in turn') + for m in mons: + # stop a monitor + manager.kill_mon(m) + manager.wait_for_mon_quorum_size(len(mons) - 1) + + # restart + manager.revive_mon(m) + manager.wait_for_mon_quorum_size(len(mons)) + + # in forward and reverse order, + rmons = mons + rmons.reverse() + for mons in mons, rmons: + log.info('stopping all monitors') + for m in mons: + manager.kill_mon(m) + + log.info('forming a minimal quorum for %s, then adding monitors' % mons) + qnum = (len(mons) // 2) + 1 + num = 0 + for m in mons: + manager.revive_mon(m) + num += 1 + if num >= qnum: + manager.wait_for_mon_quorum_size(num) + + # on both leader and non-leader ranks... + for rank in [0, 1]: + # take one out + log.info('removing mon %s' % mons[rank]) + manager.kill_mon(mons[rank]) + manager.wait_for_mon_quorum_size(len(mons) - 1) + + log.info('causing some monitor log activity') + m = 30 + for n in range(1, m): + manager.raw_cluster_cmd('log', '%d of %d' % (n, m)) + + log.info('adding mon %s back in' % mons[rank]) + manager.revive_mon(mons[rank]) + manager.wait_for_mon_quorum_size(len(mons)) diff --git a/qa/tasks/mon_thrash.py b/qa/tasks/mon_thrash.py new file mode 100644 index 000000000..30a7555b5 --- /dev/null +++ b/qa/tasks/mon_thrash.py @@ -0,0 +1,420 @@ +""" +Monitor thrash +""" +import logging +import contextlib +import random +import time +import gevent +import json +import math +from teuthology import misc as teuthology +from teuthology.contextutil import safe_while +from tasks import ceph_manager +from tasks.cephfs.filesystem import MDSCluster +from tasks.thrasher import Thrasher + +log = logging.getLogger(__name__) + +def _get_mons(ctx): + """ + Get monitor names from the context value. + """ + mons = [f[len('mon.'):] for f in teuthology.get_mon_names(ctx)] + return mons + +class MonitorThrasher(Thrasher): + """ + How it works:: + + - pick a monitor + - kill it + - wait for quorum to be formed + - sleep for 'revive_delay' seconds + - revive monitor + - wait for quorum to be formed + - sleep for 'thrash_delay' seconds + + Options:: + + seed Seed to use on the RNG to reproduce a previous + behaviour (default: None; i.e., not set) + revive_delay Number of seconds to wait before reviving + the monitor (default: 10) + thrash_delay Number of seconds to wait in-between + test iterations (default: 0) + store_thrash Thrash monitor store before killing the monitor being thrashed (default: False) + store_thrash_probability Probability of thrashing a monitor's store + (default: 50) + thrash_many Thrash multiple monitors instead of just one. If + 'maintain_quorum' is set to False, then we will + thrash up to as many monitors as there are + available. (default: False) + maintain_quorum Always maintain quorum, taking care on how many + monitors we kill during the thrashing. If we + happen to only have one or two monitors configured, + if this option is set to True, then we won't run + this task as we cannot guarantee maintenance of + quorum. Setting it to false however would allow the + task to run with as many as just one single monitor. + (default: True) + freeze_mon_probability: how often to freeze the mon instead of killing it, + in % (default: 0) + freeze_mon_duration: how many seconds to freeze the mon (default: 15) + scrub Scrub after each iteration (default: True) + check_mds_failover Check if mds failover happened (default: False) + + Note: if 'store_thrash' is set to True, then 'maintain_quorum' must also + be set to True. + + For example:: + + tasks: + - ceph: + - mon_thrash: + revive_delay: 20 + thrash_delay: 1 + store_thrash: true + store_thrash_probability: 40 + seed: 31337 + maintain_quorum: true + thrash_many: true + check_mds_failover: True + - ceph-fuse: + - workunit: + clients: + all: + - mon/workloadgen.sh + """ + def __init__(self, ctx, manager, config, name, logger): + super(MonitorThrasher, self).__init__() + + self.ctx = ctx + self.manager = manager + self.manager.wait_for_clean() + + self.stopping = False + self.logger = logger + self.config = config + self.name = name + + if self.config is None: + self.config = dict() + + """ Test reproducibility """ + self.random_seed = self.config.get('seed', None) + + if self.random_seed is None: + self.random_seed = int(time.time()) + + self.rng = random.Random() + self.rng.seed(int(self.random_seed)) + + """ Monitor thrashing """ + self.revive_delay = float(self.config.get('revive_delay', 10.0)) + self.thrash_delay = float(self.config.get('thrash_delay', 0.0)) + + self.thrash_many = self.config.get('thrash_many', False) + self.maintain_quorum = self.config.get('maintain_quorum', True) + + self.scrub = self.config.get('scrub', True) + + self.freeze_mon_probability = float(self.config.get('freeze_mon_probability', 10)) + self.freeze_mon_duration = float(self.config.get('freeze_mon_duration', 15.0)) + + assert self.max_killable() > 0, \ + 'Unable to kill at least one monitor with the current config.' + + """ Store thrashing """ + self.store_thrash = self.config.get('store_thrash', False) + self.store_thrash_probability = int( + self.config.get('store_thrash_probability', 50)) + if self.store_thrash: + assert self.store_thrash_probability > 0, \ + 'store_thrash is set, probability must be > 0' + assert self.maintain_quorum, \ + 'store_thrash = true must imply maintain_quorum = true' + + #MDS failover + self.mds_failover = self.config.get('check_mds_failover', False) + + if self.mds_failover: + self.mds_cluster = MDSCluster(ctx) + + self.thread = gevent.spawn(self.do_thrash) + + def log(self, x): + """ + locally log info messages + """ + self.logger.info(x) + + def do_join(self): + """ + Break out of this processes thrashing loop. + """ + self.stopping = True + self.thread.get() + + def should_thrash_store(self): + """ + If allowed, indicate that we should thrash a certain percentage of + the time as determined by the store_thrash_probability value. + """ + if not self.store_thrash: + return False + return self.rng.randrange(0, 101) < self.store_thrash_probability + + def thrash_store(self, mon): + """ + Thrash the monitor specified. + :param mon: monitor to thrash + """ + self.log('thrashing mon.{id} store'.format(id=mon)) + out = self.manager.raw_cluster_cmd( + 'tell', 'mon.%s' % mon, 'sync_force', + '--yes-i-really-mean-it') + j = json.loads(out) + assert j['ret'] == 0, \ + 'error forcing store sync on mon.{id}:\n{ret}'.format( + id=mon,ret=out) + + def should_freeze_mon(self): + """ + Indicate that we should freeze a certain percentago of the time + as determined by the freeze_mon_probability value. + """ + return self.rng.randrange(0, 101) < self.freeze_mon_probability + + def freeze_mon(self, mon): + """ + Send STOP signal to freeze the monitor. + """ + log.info('Sending STOP to mon %s', mon) + self.manager.signal_mon(mon, 19) # STOP + + def unfreeze_mon(self, mon): + """ + Send CONT signal to unfreeze the monitor. + """ + log.info('Sending CONT to mon %s', mon) + self.manager.signal_mon(mon, 18) # CONT + + def kill_mon(self, mon): + """ + Kill the monitor specified + """ + self.log('killing mon.{id}'.format(id=mon)) + self.manager.kill_mon(mon) + + def revive_mon(self, mon): + """ + Revive the monitor specified + """ + self.log('killing mon.{id}'.format(id=mon)) + self.log('reviving mon.{id}'.format(id=mon)) + self.manager.revive_mon(mon) + + def max_killable(self): + """ + Return the maximum number of monitors we can kill. + """ + m = len(_get_mons(self.ctx)) + if self.maintain_quorum: + return max(math.ceil(m/2.0)-1, 0) + else: + return m + + def _wait_until_quorum(self, mon, size, timeout=300): + """ + Wait until the monitor specified is in the quorum. + """ + self.log('waiting for quorum size %d for mon %s' % (size, mon)) + s = {} + + with safe_while(sleep=3, + tries=timeout // 3, + action=f'wait for quorum size {size} on mon {mon}') as proceed: + while proceed(): + s = self.manager.get_mon_status(mon) + if len(s['quorum']) == size: + break + self.log("quorum is size %d" % len(s['quorum'])) + + self.log("final quorum is size %d" % len(s['quorum'])) + return s + + def do_thrash(self): + """ + _do_thrash() wrapper. + """ + try: + self._do_thrash() + except Exception as e: + # See _run exception comment for MDSThrasher + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # Allow successful completion so gevent doesn't see an exception. + # The DaemonWatchdog will observe the error and tear down the test. + + def _do_thrash(self): + """ + Continuously loop and thrash the monitors. + """ + #status before mon thrashing + if self.mds_failover: + oldstatus = self.mds_cluster.status() + + self.log('start thrashing') + self.log('seed: {s}, revive delay: {r}, thrash delay: {t} '\ + 'thrash many: {tm}, maintain quorum: {mq} '\ + 'store thrash: {st}, probability: {stp} '\ + 'freeze mon: prob {fp} duration {fd}'.format( + s=self.random_seed,r=self.revive_delay,t=self.thrash_delay, + tm=self.thrash_many, mq=self.maintain_quorum, + st=self.store_thrash,stp=self.store_thrash_probability, + fp=self.freeze_mon_probability,fd=self.freeze_mon_duration, + )) + + while not self.stopping: + mons = _get_mons(self.ctx) + self.manager.wait_for_mon_quorum_size(len(mons)) + self.log('making sure all monitors are in the quorum') + for m in mons: + try: + s = self._wait_until_quorum(m, len(mons), timeout=30) + except Exception as e: + self.log('mon.{m} is not in quorum size, exception: {e}'.format(m=m,e=e)) + self.log('mon_status: {s}'.format(s=s)) + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons) + + kill_up_to = self.rng.randrange(1, self.max_killable()+1) + mons_to_kill = self.rng.sample(mons, kill_up_to) + self.log('monitors to thrash: {m}'.format(m=mons_to_kill)) + + mons_to_freeze = [] + for mon in mons: + if mon in mons_to_kill: + continue + if self.should_freeze_mon(): + mons_to_freeze.append(mon) + self.log('monitors to freeze: {m}'.format(m=mons_to_freeze)) + + for mon in mons_to_kill: + self.log('thrashing mon.{m}'.format(m=mon)) + + """ we only thrash stores if we are maintaining quorum """ + if self.should_thrash_store() and self.maintain_quorum: + self.thrash_store(mon) + + self.kill_mon(mon) + + if mons_to_freeze: + for mon in mons_to_freeze: + self.freeze_mon(mon) + self.log('waiting for {delay} secs to unfreeze mons'.format( + delay=self.freeze_mon_duration)) + time.sleep(self.freeze_mon_duration) + for mon in mons_to_freeze: + self.unfreeze_mon(mon) + + if self.maintain_quorum: + self.manager.wait_for_mon_quorum_size(len(mons)-len(mons_to_kill)) + for m in mons: + if m in mons_to_kill: + continue + try: + s = self._wait_until_quorum(m, len(mons)-len(mons_to_kill), timeout=30) + except Exception as e: + self.log('mon.{m} is not in quorum size, exception: {e}'.format(m=m,e=e)) + self.log('mon_status: {s}'.format(s=s)) + + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons)-len(mons_to_kill) + + self.log('waiting for {delay} secs before reviving monitors'.format( + delay=self.revive_delay)) + time.sleep(self.revive_delay) + + for mon in mons_to_kill: + self.revive_mon(mon) + # do more freezes + if mons_to_freeze: + for mon in mons_to_freeze: + self.freeze_mon(mon) + self.log('waiting for {delay} secs to unfreeze mons'.format( + delay=self.freeze_mon_duration)) + time.sleep(self.freeze_mon_duration) + for mon in mons_to_freeze: + self.unfreeze_mon(mon) + + self.manager.wait_for_mon_quorum_size(len(mons)) + for m in mons: + try: + s = self._wait_until_quorum(m, len(mons), timeout=30) + except Exception as e: + self.log('mon.{m} is not in quorum size, exception: {e}'.format(m=m,e=e)) + self.log('mon_status: {s}'.format(s=s)) + + assert s['state'] == 'leader' or s['state'] == 'peon' + assert len(s['quorum']) == len(mons) + + if self.scrub: + self.log('triggering scrub') + try: + self.manager.raw_cluster_cmd('mon', 'scrub') + except Exception as e: + log.warning("Ignoring exception while triggering scrub: %s", e) + + if self.thrash_delay > 0.0: + self.log('waiting for {delay} secs before continuing thrashing'.format( + delay=self.thrash_delay)) + time.sleep(self.thrash_delay) + + #status after thrashing + if self.mds_failover: + status = self.mds_cluster.status() + assert not oldstatus.hadfailover(status), \ + 'MDS Failover' + + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the monitor by thrashing them while another task/workunit + is running. + + Please refer to MonitorThrasher class for further information on the + available options. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'mon_thrash task only accepts a dict for configuration' + assert len(_get_mons(ctx)) > 2, \ + 'mon_thrash task requires at least 3 monitors' + + if 'cluster' not in config: + config['cluster'] = 'ceph' + + log.info('Beginning mon_thrash...') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + thrash_proc = MonitorThrasher(ctx, + manager, config, "MonitorThrasher", + logger=log.getChild('mon_thrasher')) + ctx.ceph[config['cluster']].thrashers.append(thrash_proc) + try: + log.debug('Yielding') + yield + finally: + log.info('joining mon_thrasher') + thrash_proc.do_join() + mons = _get_mons(ctx) + manager.wait_for_mon_quorum_size(len(mons)) diff --git a/qa/tasks/multibench.py b/qa/tasks/multibench.py new file mode 100644 index 000000000..c2a7299f1 --- /dev/null +++ b/qa/tasks/multibench.py @@ -0,0 +1,61 @@ +""" +Multibench testing +""" +import contextlib +import logging +import time +import copy +import gevent + +from tasks import radosbench + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run multibench + + The config should be as follows: + + multibench: + time: <seconds to run total> + segments: <number of concurrent benches> + radosbench: <config for radosbench> + + example: + + tasks: + - ceph: + - multibench: + clients: [client.0] + time: 360 + - interactive: + """ + log.info('Beginning multibench...') + assert isinstance(config, dict), \ + "please list clients to run on" + + def run_one(num): + """Run test spawn from gevent""" + start = time.time() + if not config.get('radosbench'): + benchcontext = {} + else: + benchcontext = copy.copy(config.get('radosbench')) + iterations = 0 + while time.time() - start < int(config.get('time', 600)): + log.info("Starting iteration %s of segment %s"%(iterations, num)) + benchcontext['pool'] = str(num) + "-" + str(iterations) + with radosbench.task(ctx, benchcontext): + time.sleep() + iterations += 1 + log.info("Starting %s threads"%(str(config.get('segments', 3)),)) + segments = [ + gevent.spawn(run_one, i) + for i in range(0, int(config.get('segments', 3)))] + + try: + yield + finally: + [i.get() for i in segments] diff --git a/qa/tasks/netem.py b/qa/tasks/netem.py new file mode 100644 index 000000000..1d9fd98f7 --- /dev/null +++ b/qa/tasks/netem.py @@ -0,0 +1,268 @@ +""" +Task to run tests with network delay between two remotes using tc and netem. +Reference:https://wiki.linuxfoundation.org/networking/netem. + +""" + +import logging +import contextlib +from paramiko import SSHException +import socket +import time +import gevent +import argparse + +log = logging.getLogger(__name__) + + +def set_priority(interface): + + # create a priority queueing discipline + return ['sudo', 'tc', 'qdisc', 'add', 'dev', interface, 'root', 'handle', '1:', 'prio'] + + +def show_tc(interface): + + # shows tc device present + return ['sudo', 'tc', 'qdisc', 'show', 'dev', interface] + + +def del_tc(interface): + + return ['sudo', 'tc', 'qdisc', 'del', 'dev', interface, 'root'] + + +def cmd_prefix(interface): + + # prepare command to set delay + cmd1 = ['sudo', 'tc', 'qdisc', 'add', 'dev', interface, 'parent', + '1:1', 'handle', '2:', 'netem', 'delay'] + + # prepare command to change delay + cmd2 = ['sudo', 'tc', 'qdisc', 'replace', 'dev', interface, 'root', 'netem', 'delay'] + + # prepare command to apply filter to the matched ip/host + + cmd3 = ['sudo', 'tc', 'filter', 'add', 'dev', interface, + 'parent', '1:0', 'protocol', 'ip', 'pref', '55', + 'handle', '::55', 'u32', 'match', 'ip', 'dst'] + + return cmd1, cmd2, cmd3 + + +def static_delay(remote, host, interface, delay): + + """ Sets a constant delay between two hosts to emulate network delays using tc qdisc and netem""" + + set_delay, change_delay, set_ip = cmd_prefix(interface) + + ip = socket.gethostbyname(host.hostname) + + tc = remote.sh(show_tc(interface)) + if tc.strip().find('refcnt') == -1: + # call set_priority() func to create priority queue + # if not already created(indicated by -1) + log.info('Create priority queue') + remote.run(args=set_priority(interface)) + + # set static delay, with +/- 5ms jitter with normal distribution as default + log.info('Setting delay to %s' % delay) + set_delay.extend(['%s' % delay, '5ms', 'distribution', 'normal']) + remote.run(args=set_delay) + + # set delay to a particular remote node via ip + log.info('Delay set on %s' % remote) + set_ip.extend(['%s' % ip, 'flowid', '2:1']) + remote.run(args=set_ip) + else: + # if the device is already created, only change the delay + log.info('Setting delay to %s' % delay) + change_delay.extend(['%s' % delay, '5ms', 'distribution', 'normal']) + remote.run(args=change_delay) + + +def variable_delay(remote, host, interface, delay_range=[]): + + """ Vary delay between two values""" + + set_delay, change_delay, set_ip = cmd_prefix(interface) + + ip = socket.gethostbyname(host.hostname) + + # delay1 has to be lower than delay2 + delay1 = delay_range[0] + delay2 = delay_range[1] + + tc = remote.sh(show_tc(interface)) + if tc.strip().find('refcnt') == -1: + # call set_priority() func to create priority queue + # if not already created(indicated by -1) + remote.run(args=set_priority(interface)) + + # set variable delay + log.info('Setting varying delay') + set_delay.extend(['%s' % delay1, '%s' % delay2]) + remote.run(args=set_delay) + + # set delay to a particular remote node via ip + log.info('Delay set on %s' % remote) + set_ip.extend(['%s' % ip, 'flowid', '2:1']) + remote.run(args=set_ip) + else: + # if the device is already created, only change the delay + log.info('Setting varying delay') + change_delay.extend(['%s' % delay1, '%s' % delay2]) + remote.run(args=change_delay) + + +def delete_dev(remote, interface): + + """ Delete the qdisc if present""" + + log.info('Delete tc') + tc = remote.sh(show_tc(interface)) + if tc.strip().find('refcnt') != -1: + remote.run(args=del_tc(interface)) + + +class Toggle: + + stop_event = gevent.event.Event() + + def __init__(self, ctx, remote, host, interface, interval): + self.ctx = ctx + self.remote = remote + self.host = host + self.interval = interval + self.interface = interface + self.ip = socket.gethostbyname(self.host.hostname) + + def packet_drop(self): + + """ Drop packets to the remote ip specified""" + + _, _, set_ip = cmd_prefix(self.interface) + + tc = self.remote.sh(show_tc(self.interface)) + if tc.strip().find('refcnt') == -1: + self.remote.run(args=set_priority(self.interface)) + # packet drop to specific ip + log.info('Drop all packets to %s' % self.host) + set_ip.extend(['%s' % self.ip, 'action', 'drop']) + self.remote.run(args=set_ip) + + def link_toggle(self): + + """ + For toggling packet drop and recovery in regular interval. + If interval is 5s, link is up for 5s and link is down for 5s + """ + + while not self.stop_event.is_set(): + self.stop_event.wait(timeout=self.interval) + # simulate link down + try: + self.packet_drop() + log.info('link down') + except SSHException: + log.debug('Failed to run command') + + self.stop_event.wait(timeout=self.interval) + # if qdisc exist,delete it. + try: + delete_dev(self.remote, self.interface) + log.info('link up') + except SSHException: + log.debug('Failed to run command') + + def begin(self, gname): + self.thread = gevent.spawn(self.link_toggle) + self.ctx.netem.names[gname] = self.thread + + def end(self, gname): + self.stop_event.set() + log.info('gname is {}'.format(self.ctx.netem.names[gname])) + self.ctx.netem.names[gname].get() + + def cleanup(self): + """ + Invoked during unwinding if the test fails or exits before executing task 'link_recover' + """ + log.info('Clean up') + self.stop_event.set() + self.thread.get() + + +@contextlib.contextmanager +def task(ctx, config): + + """ + - netem: + clients: [c1.rgw.0] + iface: eno1 + dst_client: [c2.rgw.1] + delay: 10ms + + - netem: + clients: [c1.rgw.0] + iface: eno1 + dst_client: [c2.rgw.1] + delay_range: [10ms, 20ms] # (min, max) + + - netem: + clients: [rgw.1, mon.0] + iface: eno1 + gname: t1 + dst_client: [c2.rgw.1] + link_toggle_interval: 10 # no unit mentioned. By default takes seconds. + + - netem: + clients: [rgw.1, mon.0] + iface: eno1 + link_recover: [t1, t2] + + + """ + + log.info('config %s' % config) + + assert isinstance(config, dict), \ + "please list clients to run on" + if not hasattr(ctx, 'netem'): + ctx.netem = argparse.Namespace() + ctx.netem.names = {} + + if config.get('dst_client') is not None: + dst = config.get('dst_client') + (host,) = ctx.cluster.only(dst).remotes.keys() + + for role in config.get('clients', None): + (remote,) = ctx.cluster.only(role).remotes.keys() + ctx.netem.remote = remote + if config.get('delay', False): + static_delay(remote, host, config.get('iface'), config.get('delay')) + if config.get('delay_range', False): + variable_delay(remote, host, config.get('iface'), config.get('delay_range')) + if config.get('link_toggle_interval', False): + log.info('Toggling link for %s' % config.get('link_toggle_interval')) + global toggle + toggle = Toggle(ctx, remote, host, config.get('iface'), config.get('link_toggle_interval')) + toggle.begin(config.get('gname')) + if config.get('link_recover', False): + log.info('Recovering link') + for gname in config.get('link_recover'): + toggle.end(gname) + log.info('sleeping') + time.sleep(config.get('link_toggle_interval')) + delete_dev(ctx.netem.remote, config.get('iface')) + del ctx.netem.names[gname] + + try: + yield + finally: + if ctx.netem.names: + toggle.cleanup() + for role in config.get('clients'): + (remote,) = ctx.cluster.only(role).remotes.keys() + delete_dev(remote, config.get('iface')) + diff --git a/qa/tasks/netsplit.py b/qa/tasks/netsplit.py new file mode 100644 index 000000000..0a9484a89 --- /dev/null +++ b/qa/tasks/netsplit.py @@ -0,0 +1,73 @@ +""" +Functions to netsplit test machines. + +At present, you must specify monitors to disconnect, and it +drops those IP pairs. This means OSDs etc on the hosts which use +the same IP will also be blocked! If you are using multiple IPs on the +same host within the cluster, daemons on those other IPs will get +through. +""" +import logging +import re + +log = logging.getLogger(__name__) + +def get_ip_and_ports(ctx, daemon): + assert daemon.startswith('mon.') + addr = ctx.ceph['ceph'].mons['{a}'.format(a=daemon)] + ips = re.findall("[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+[:[0-9]*]*", addr) + assert len(ips) > 0 + plain_ip = re.match("[0-9\.]*", ips[0]).group() + assert plain_ip is not None + port_list = [] + for ip in ips: + ip_str, port_str = re.match("([0-9\.]*)([:[0-9]*]*)", ip).groups() + assert ip_str == plain_ip + if len(port_str) > 0: + port_list.append(port_str) + return (plain_ip, port_list) + +def disconnect(ctx, config): + assert len(config) == 2 # we can only disconnect pairs right now + # and we can only disconnect mons right now + assert config[0].startswith('mon.') + assert config[1].startswith('mon.') + (ip1, _) = get_ip_and_ports(ctx, config[0]) + (ip2, _) = get_ip_and_ports(ctx, config[1]) + + (host1,) = ctx.cluster.only(config[0]).remotes.keys() + (host2,) = ctx.cluster.only(config[1]).remotes.keys() + assert host1 is not None + assert host2 is not None + + host1.run( + args = ["sudo", "iptables", "-A", "INPUT", "-p", "tcp", "-s", + ip2, "-j", "DROP"] + ) + host2.run( + args = ["sudo", "iptables", "-A", "INPUT", "-p", "tcp", "-s", + ip1, "-j", "DROP"] + ) + +def reconnect(ctx, config): + assert len(config) == 2 # we can only disconnect pairs right now + # and we can only disconnect mons right now + assert config[0].startswith('mon.') + assert config[1].startswith('mon.') + + (ip1, _) = get_ip_and_ports(ctx, config[0]) + (ip2, _) = get_ip_and_ports(ctx, config[1]) + + (host1,) = ctx.cluster.only(config[0]).remotes.keys() + (host2,) = ctx.cluster.only(config[1]).remotes.keys() + assert host1 is not None + assert host2 is not None + + host1.run( + args = ["sudo", "iptables", "-D", "INPUT", "-p", "tcp", "-s", + ip2, "-j", "DROP"] + ) + host2.run( + args = ["sudo", "iptables", "-D", "INPUT", "-p", "tcp", "-s", + ip1, "-j", "DROP"] + ) diff --git a/qa/tasks/notification_tests.py b/qa/tasks/notification_tests.py new file mode 100644 index 000000000..7a3a401ab --- /dev/null +++ b/qa/tasks/notification_tests.py @@ -0,0 +1,320 @@ +""" +Run a set of bucket notification tests on rgw. +""" +from io import BytesIO +from configobj import ConfigObj +import base64 +import contextlib +import logging +import os +import random +import string + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download(ctx, config): + assert isinstance(config, dict) + log.info('Downloading bucket-notifications-tests...') + testdir = teuthology.get_testdir(ctx) + branch = ctx.config.get('suite_branch') + repo = ctx.config.get('suite_repo') + log.info('Using branch %s from %s for bucket notifications tests', branch, repo) + for (client, client_config) in config.items(): + ctx.cluster.only(client).run( + args=['git', 'clone', '-b', branch, repo, '{tdir}/ceph'.format(tdir=testdir)], + ) + + sha1 = client_config.get('sha1') + + if sha1 is not None: + ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/ceph'.format(tdir=testdir), + run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ], + ) + + try: + yield + finally: + log.info('Removing bucket-notifications-tests...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/ceph'.format(tdir=testdir), + ], + ) + +def _config_user(bntests_conf, section, user): + """ + Configure users for this section by stashing away keys, ids, and + email addresses. + """ + bntests_conf[section].setdefault('user_id', user) + bntests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user)) + bntests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user)) + bntests_conf[section].setdefault('access_key', + ''.join(random.choice(string.ascii_uppercase) for i in range(20))) + bntests_conf[section].setdefault('secret_key', + base64.b64encode(os.urandom(40)).decode()) + + +@contextlib.contextmanager +def pre_process(ctx, config): + """ + This function creates a directory which is required to run some AMQP tests. + """ + assert isinstance(config, dict) + log.info('Pre-processing...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + test_dir=teuthology.get_testdir(ctx) + + ctx.cluster.only(client).run( + args=[ + 'mkdir', '-p', '/home/ubuntu/.aws/models/s3/2006-03-01/', + ], + ) + + ctx.cluster.only(client).run( + args=[ + 'cd', '/home/ubuntu/.aws/models/s3/2006-03-01/', run.Raw('&&'), 'cp', '{tdir}/ceph/examples/rgw/boto3/service-2.sdk-extras.json'.format(tdir=test_dir), 'service-2.sdk-extras.json' + ], + ) + + try: + yield + finally: + log.info('Pre-processing completed...') + test_dir = teuthology.get_testdir(ctx) + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + ctx.cluster.only(client).run( + args=[ + 'rm', '-rf', '/home/ubuntu/.aws/models/s3/2006-03-01/service-2.sdk-extras.json', + ], + ) + + ctx.cluster.only(client).run( + args=[ + 'cd', '/home/ubuntu/', run.Raw('&&'), 'rmdir', '-p', '.aws/models/s3/2006-03-01/', + ], + ) + + +@contextlib.contextmanager +def create_users(ctx, config): + """ + Create a main and an alternate s3 user. + """ + assert isinstance(config, dict) + log.info('Creating rgw user...') + testdir = teuthology.get_testdir(ctx) + + users = {'s3 main': 'foo'} + for client in config['clients']: + bntests_conf = config['bntests_conf'][client] + for section, user in users.items(): + _config_user(bntests_conf, section, '{user}.{client}'.format(user=user, client=client)) + log.debug('Creating user {user} on {host}'.format(user=bntests_conf[section]['user_id'], host=client)) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'create', + '--uid', bntests_conf[section]['user_id'], + '--display-name', bntests_conf[section]['display_name'], + '--access-key', bntests_conf[section]['access_key'], + '--secret', bntests_conf[section]['secret_key'], + '--cluster', cluster_name, + ], + ) + + try: + yield + finally: + for client in config['clients']: + for user in users.values(): + uid = '{user}.{client}'.format(user=user, client=client) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'rm', + '--uid', uid, + '--purge-data', + '--cluster', cluster_name, + ], + ) + +@contextlib.contextmanager +def configure(ctx, config): + assert isinstance(config, dict) + log.info('Configuring bucket-notifications-tests...') + testdir = teuthology.get_testdir(ctx) + for client, properties in config['clients'].items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + bntests_conf = config['bntests_conf'][client] + + conf_fp = BytesIO() + bntests_conf.write(conf_fp) + remote.write_file( + path='{tdir}/ceph/src/test/rgw/bucket_notification/bn-tests.{client}.conf'.format(tdir=testdir, client=client), + data=conf_fp.getvalue(), + ) + + remote.run( + args=[ + 'cd', + '{tdir}/ceph/src/test/rgw/bucket_notification'.format(tdir=testdir), + run.Raw('&&'), + './bootstrap', + ], + ) + + try: + yield + finally: + log.info('Removing bn-tests.conf file...') + testdir = teuthology.get_testdir(ctx) + for client, properties in config['clients'].items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=['rm', '-f', + '{tdir}/ceph/src/test/rgw/bucket_notification/bn-tests.{client}.conf'.format(tdir=testdir,client=client), + ], + ) + +@contextlib.contextmanager +def run_tests(ctx, config): + """ + Run the bucket notifications tests after everything is set up. + :param ctx: Context passed to task + :param config: specific configuration information + """ + assert isinstance(config, dict) + log.info('Running bucket-notifications-tests...') + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + attr = ["!kafka_test", "!amqp_test", "!amqp_ssl_test", "!kafka_ssl_test", "!modification_required", "!manual_test"] + + if 'extra_attr' in client_config: + attr = client_config.get('extra_attr') + + args = [ + 'BNTESTS_CONF={tdir}/ceph/src/test/rgw/bucket_notification/bn-tests.{client}.conf'.format(tdir=testdir, client=client), + '{tdir}/ceph/src/test/rgw/bucket_notification/virtualenv/bin/python'.format(tdir=testdir), + '-m', 'nose', + '-s', + '{tdir}/ceph/src/test/rgw/bucket_notification/test_bn.py'.format(tdir=testdir), + '-v', + '-a', ','.join(attr), + ] + + remote.run( + args=args, + label="bucket notification tests against different endpoints" + ) + yield + +@contextlib.contextmanager +def task(ctx,config): + """ + To run bucket notification tests under Kafka endpoint the prerequisite is to run the kafka server. Also you need to pass the + 'extra_attr' to the notification tests. Following is the way how to run kafka and finally bucket notification tests:: + + tasks: + - kafka: + client.0: + kafka_version: 2.6.0 + - notification_tests: + client.0: + extra_attr: ["kafka_test"] + + To run bucket notification tests under AMQP endpoint the prerequisite is to run the rabbitmq server. Also you need to pass the + 'extra_attr' to the notification tests. Following is the way how to run rabbitmq and finally bucket notification tests:: + + tasks: + - rabbitmq: + client.0: + - notification_tests: + client.0: + extra_attr: ["amqp_test"] + + If you want to run the tests against your changes pushed to your remote repo you can provide 'suite_branch' and 'suite_repo' + parameters in your teuthology-suite command. Example command for this is as follows:: + + teuthology-suite --ceph-repo https://github.com/ceph/ceph-ci.git -s rgw:notifications --ceph your_ceph_branch_name -m smithi --suite-repo https://github.com/your_name/ceph.git --suite-branch your_branch_name + + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task kafka only supports a list or dictionary for configuration" + + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients=config.keys() + + log.debug('Notifications config is %s', config) + + bntests_conf = {} + + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 'bntests: no rgw endpoint for {}'.format(client) + + bntests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port':endpoint.port, + 'host':endpoint.dns_name, + }, + 's3 main':{} + } + ) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: pre_process(ctx=ctx, config=config), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + bntests_conf=bntests_conf, + )), + lambda: configure(ctx=ctx, config=dict( + clients=config, + bntests_conf=bntests_conf, + )), + lambda: run_tests(ctx=ctx, config=config), + ): + pass + yield diff --git a/qa/tasks/nvme_loop.py b/qa/tasks/nvme_loop.py new file mode 100644 index 000000000..c9d8f0dc7 --- /dev/null +++ b/qa/tasks/nvme_loop.py @@ -0,0 +1,106 @@ +import contextlib +import logging + +from io import StringIO +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + log.info('Setting up nvme_loop on scratch devices...') + host = 'hostnqn' + port = '1' + devs_by_remote = {} + old_scratch_by_remote = {} + for remote, roles in ctx.cluster.remotes.items(): + if remote.is_container: + continue + devs = teuthology.get_scratch_devices(remote) + devs_by_remote[remote] = devs + base = '/sys/kernel/config/nvmet' + remote.run( + args=[ + 'grep', '^nvme_loop', '/proc/modules', run.Raw('||'), + 'sudo', 'modprobe', 'nvme_loop', + run.Raw('&&'), + 'sudo', 'mkdir', '-p', f'{base}/hosts/{host}', + run.Raw('&&'), + 'sudo', 'mkdir', '-p', f'{base}/ports/{port}', + run.Raw('&&'), + 'echo', 'loop', run.Raw('|'), + 'sudo', 'tee', f'{base}/ports/{port}/addr_trtype', + ] + ) + for dev in devs: + short = dev.split('/')[-1] + log.info(f'Connecting nvme_loop {remote.shortname}:{dev}...') + remote.run( + args=[ + 'sudo', 'mkdir', '-p', f'{base}/subsystems/{short}', + run.Raw('&&'), + 'echo', '1', run.Raw('|'), + 'sudo', 'tee', f'{base}/subsystems/{short}/attr_allow_any_host', + run.Raw('&&'), + 'sudo', 'mkdir', '-p', f'{base}/subsystems/{short}/namespaces/1', + run.Raw('&&'), + 'echo', '-n', dev, run.Raw('|'), + 'sudo', 'tee', f'{base}/subsystems/{short}/namespaces/1/device_path', + run.Raw('&&'), + 'echo', '1', run.Raw('|'), + 'sudo', 'tee', f'{base}/subsystems/{short}/namespaces/1/enable', + run.Raw('&&'), + 'sudo', 'ln', '-s', f'{base}/subsystems/{short}', + f'{base}/ports/{port}/subsystems/{short}', + run.Raw('&&'), + 'sudo', 'nvme', 'connect', '-t', 'loop', '-n', short, '-q', host, + ] + ) + + # identify nvme_loops devices + old_scratch_by_remote[remote] = remote.read_file('/scratch_devs') + + with contextutil.safe_while(sleep=1, tries=15) as proceed: + while proceed(): + p = remote.run(args=['sudo', 'nvme', 'list'], stdout=StringIO()) + new_devs = [] + for line in p.stdout.getvalue().splitlines(): + dev, _, vendor = line.split()[0:3] + if dev.startswith('/dev/') and vendor == 'Linux': + new_devs.append(dev) + log.info(f'new_devs {new_devs}') + assert len(new_devs) <= len(devs) + if len(new_devs) == len(devs): + break + + remote.write_file( + path='/scratch_devs', + data='\n'.join(new_devs) + '\n', + sudo=True + ) + + try: + yield + + finally: + for remote, devs in devs_by_remote.items(): + if remote.is_container: + continue + for dev in devs: + short = dev.split('/')[-1] + log.info(f'Disconnecting nvme_loop {remote.shortname}:{dev}...') + remote.run( + args=[ + 'sudo', 'nvme', 'disconnect', '-n', short + ], + check_status=False, + ) + remote.write_file( + path='/scratch_devs', + data=old_scratch_by_remote[remote], + sudo=True + ) diff --git a/qa/tasks/object_source_down.py b/qa/tasks/object_source_down.py new file mode 100644 index 000000000..e4519bb6f --- /dev/null +++ b/qa/tasks/object_source_down.py @@ -0,0 +1,101 @@ +""" +Test Object locations going down +""" +import logging +import time +from teuthology import misc as teuthology +from tasks import ceph_manager +from tasks.util.rados import rados + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of object location going down + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + + # take 0, 1 out + manager.mark_out_osd(0) + manager.mark_out_osd(1) + manager.wait_for_clean() + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.0', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.2', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.3', + 'injectargs', + '--osd-recovery-delay-start 10000 --osd-min-pg-log-entries 100000000' + ) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', 'data', 'put', 'dummy', dummyfile]) + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', 'data', 'put', 'existing_%d' % f, dummyfile]) + + manager.mark_out_osd(3) + manager.wait_till_active() + + manager.mark_in_osd(0) + manager.wait_till_active() + + manager.flush_pg_stats([2, 0]) + + manager.mark_out_osd(2) + manager.wait_till_active() + + # bring up 1 + manager.mark_in_osd(1) + manager.wait_till_active() + + manager.flush_pg_stats([0, 1]) + log.info("Getting unfound objects") + unfound = manager.get_num_unfound_objects() + assert not unfound + + manager.kill_osd(2) + manager.mark_down_osd(2) + manager.kill_osd(3) + manager.mark_down_osd(3) + + manager.flush_pg_stats([0, 1]) + log.info("Getting unfound objects") + unfound = manager.get_num_unfound_objects() + assert unfound diff --git a/qa/tasks/omapbench.py b/qa/tasks/omapbench.py new file mode 100644 index 000000000..a5bd3a4df --- /dev/null +++ b/qa/tasks/omapbench.py @@ -0,0 +1,83 @@ +""" +Run omapbench executable within teuthology +""" +import contextlib +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run omapbench + + The config should be as follows:: + + omapbench: + clients: [client list] + threads: <threads at once> + objects: <number of objects to write> + entries: <number of entries per object map> + keysize: <number of characters per object map key> + valsize: <number of characters per object map val> + increment: <interval to show in histogram (in ms)> + omaptype: <how the omaps should be generated> + + example:: + + tasks: + - ceph: + - omapbench: + clients: [client.0] + threads: 30 + objects: 1000 + entries: 10 + keysize: 10 + valsize: 100 + increment: 100 + omaptype: uniform + - interactive: + """ + log.info('Beginning omapbench...') + assert isinstance(config, dict), \ + "please list clients to run on" + omapbench = {} + testdir = teuthology.get_testdir(ctx) + print(str(config.get('increment',-1))) + for role in config.get('clients', ['client.0']): + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.keys() + proc = remote.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'omapbench', + '--name', role[len(PREFIX):], + '-t', str(config.get('threads', 30)), + '-o', str(config.get('objects', 1000)), + '--entries', str(config.get('entries',10)), + '--keysize', str(config.get('keysize',10)), + '--valsize', str(config.get('valsize',1000)), + '--inc', str(config.get('increment',10)), + '--omaptype', str(config.get('omaptype','uniform')) + ]).format(tdir=testdir), + ], + logger=log.getChild('omapbench.{id}'.format(id=id_)), + stdin=run.PIPE, + wait=False + ) + omapbench[id_] = proc + + try: + yield + finally: + log.info('joining omapbench') + run.wait(omapbench.values()) diff --git a/qa/tasks/openssl_keys.py b/qa/tasks/openssl_keys.py new file mode 100644 index 000000000..2d26a8729 --- /dev/null +++ b/qa/tasks/openssl_keys.py @@ -0,0 +1,245 @@ +""" +Generates and installs a signed SSL certificate. +""" +import argparse +import logging +import os + +from teuthology import misc +from teuthology.exceptions import ConfigError +from teuthology.orchestra import run +from teuthology.task import Task + +log = logging.getLogger(__name__) + +class OpenSSLKeys(Task): + name = 'openssl_keys' + """ + Generates and installs a signed SSL certificate. + + To create a self-signed certificate: + + - openssl_keys: + # certificate name + root: # results in root.key and root.crt + + # [required] make the private key and certificate available in this client's test directory + client: client.0 + + # common name, defaults to `hostname`. chained certificates must not share a common name + cn: teuthology + + # private key type for -newkey, defaults to rsa:2048 + key-type: rsa:4096 + + # install the certificate as trusted on these clients: + install: [client.0, client.1] + + + To create a certificate signed by a ca certificate: + + - openssl_keys: + root: (self-signed certificate as above) + ... + + cert-for-client1: + client: client.1 + + # use another ssl certificate (by 'name') as the certificate authority + ca: root # --CAkey=root.key -CA=root.crt + + # embed the private key in the certificate file + embed-key: true + """ + + def __init__(self, ctx, config): + super(OpenSSLKeys, self).__init__(ctx, config) + self.certs = [] + self.installed = [] + + def setup(self): + # global dictionary allows other tasks to look up certificate paths + if not hasattr(self.ctx, 'ssl_certificates'): + self.ctx.ssl_certificates = {} + + # use testdir/ca as a working directory + self.cadir = '/'.join((misc.get_testdir(self.ctx), 'ca')) + # make sure self-signed certs get added first, they don't have 'ca' field + configs = sorted(self.config.items(), key=lambda x: 'ca' in x[1]) + for name, config in configs: + # names must be unique to avoid clobbering each others files + if name in self.ctx.ssl_certificates: + raise ConfigError('ssl: duplicate certificate name {}'.format(name)) + + # create the key and certificate + cert = self.create_cert(name, config) + + self.ctx.ssl_certificates[name] = cert + self.certs.append(cert) + + # install as trusted on the requested clients + for client in config.get('install', []): + installed = self.install_cert(cert, client) + self.installed.append(installed) + + def teardown(self): + """ + Clean up any created/installed certificate files. + """ + for cert in self.certs: + self.remove_cert(cert) + + for installed in self.installed: + self.uninstall_cert(installed) + + def create_cert(self, name, config): + """ + Create a certificate with the given configuration. + """ + cert = argparse.Namespace() + cert.name = name + cert.key_type = config.get('key-type', 'rsa:2048') + + cert.client = config.get('client', None) + if not cert.client: + raise ConfigError('ssl: missing required field "client"') + + (cert.remote,) = self.ctx.cluster.only(cert.client).remotes.keys() + + cert.remote.run(args=['mkdir', '-p', self.cadir]) + + cert.key = f'{self.cadir}/{cert.name}.key' + cert.certificate = f'{self.cadir}/{cert.name}.crt' + + san_ext = [] + add_san_default = False + cn = config.get('cn', '') + if cn == '': + cn = cert.remote.hostname + add_san_default = True + if config.get('add-san', add_san_default): + ext = f'{self.cadir}/{cert.name}.ext' + san_ext = ['-extfile', ext] + + # provide the common name in -subj to avoid the openssl command prompts + subject = f'/CN={cn}' + + # if a ca certificate is provided, use it to sign the new certificate + ca = config.get('ca', None) + if ca: + # the ca certificate must have been created by a prior ssl task + ca_cert = self.ctx.ssl_certificates.get(ca, None) + if not ca_cert: + raise ConfigError(f'ssl: ca {ca} not found for certificate {cert.name}') + + csr = f'{self.cadir}/{cert.name}.csr' + srl = f'{self.cadir}/{ca_cert.name}.srl' + remove_files = ['rm', '-f', csr, srl] + + # these commands are run on the ca certificate's client because + # they need access to its private key and cert + + # generate a private key and signing request + ca_cert.remote.run(args=['openssl', 'req', '-nodes', + '-newkey', cert.key_type, '-keyout', cert.key, + '-out', csr, '-subj', subject]) + + if san_ext: + remove_files.append(ext) + ca_cert.remote.write_file(path=ext, + data='subjectAltName = DNS:{},IP:{}'.format( + cn, + config.get('ip', cert.remote.ip_address))) + + # create the signed certificate + ca_cert.remote.run(args=['openssl', 'x509', '-req', '-in', csr, + '-CA', ca_cert.certificate, '-CAkey', ca_cert.key, '-CAcreateserial', + '-out', cert.certificate, '-days', '365', '-sha256'] + san_ext) + + ca_cert.remote.run(args=remove_files) # clean up the signing request and serial + + # verify the new certificate against its ca cert + ca_cert.remote.run(args=['openssl', 'verify', + '-CAfile', ca_cert.certificate, cert.certificate]) + + if cert.remote != ca_cert.remote: + # copy to remote client + self.remote_copy_file(ca_cert.remote, cert.certificate, cert.remote, cert.certificate) + self.remote_copy_file(ca_cert.remote, cert.key, cert.remote, cert.key) + # clean up the local copies + ca_cert.remote.run(args=['rm', cert.certificate, cert.key]) + # verify the remote certificate (requires ca to be in its trusted ca certificate store) + cert.remote.run(args=['openssl', 'verify', cert.certificate]) + else: + # otherwise, generate a private key and use it to self-sign a new certificate + cert.remote.run(args=['openssl', 'req', '-x509', '-nodes', + '-newkey', cert.key_type, '-keyout', cert.key, + '-days', '365', '-out', cert.certificate, '-subj', subject]) + + if config.get('embed-key', False): + # append the private key to the certificate file + cert.remote.run(args=['cat', cert.key, run.Raw('>>'), cert.certificate]) + + return cert + + def remove_cert(self, cert): + """ + Delete all of the files associated with the given certificate. + """ + # remove the private key and certificate + cert.remote.run(args=['rm', '-f', cert.certificate, cert.key]) + + # remove ca subdirectory if it's empty + cert.remote.run(args=['rmdir', '--ignore-fail-on-non-empty', self.cadir]) + + def install_cert(self, cert, client): + """ + Install as a trusted ca certificate on the given client. + """ + (remote,) = self.ctx.cluster.only(client).remotes.keys() + + installed = argparse.Namespace() + installed.remote = remote + + if remote.os.package_type == 'deb': + installed.path = '/usr/local/share/ca-certificates/{}.crt'.format(cert.name) + installed.command = ['sudo', 'update-ca-certificates'] + else: + installed.path = '/usr/share/pki/ca-trust-source/anchors/{}.crt'.format(cert.name) + installed.command = ['sudo', 'update-ca-trust'] + + cp_or_mv = 'cp' + if remote != cert.remote: + # copy into remote cadir (with mkdir if necessary) + remote.run(args=['mkdir', '-p', self.cadir]) + self.remote_copy_file(cert.remote, cert.certificate, remote, cert.certificate) + cp_or_mv = 'mv' # move this remote copy into the certificate store + + # install into certificate store as root + remote.run(args=['sudo', cp_or_mv, cert.certificate, installed.path]) + remote.run(args=installed.command) + + return installed + + def uninstall_cert(self, installed): + """ + Uninstall a certificate from the trusted certificate store. + """ + installed.remote.run(args=['sudo', 'rm', installed.path]) + installed.remote.run(args=installed.command) + + def remote_copy_file(self, from_remote, from_path, to_remote, to_path): + """ + Copies a file from one remote to another. + + The remotes don't have public-key auth for 'scp' or misc.copy_file(), + so this copies through an intermediate local tmp file. + """ + log.info('copying from {}:{} to {}:{}...'.format(from_remote, from_path, to_remote, to_path)) + local_path = from_remote.get_file(from_path) + try: + to_remote.put_file(local_path, to_path) + finally: + os.remove(local_path) + +task = OpenSSLKeys diff --git a/qa/tasks/osd_backfill.py b/qa/tasks/osd_backfill.py new file mode 100644 index 000000000..b33e1c912 --- /dev/null +++ b/qa/tasks/osd_backfill.py @@ -0,0 +1,104 @@ +""" +Osd backfill test +""" +import logging +import time +from tasks import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + + +def rados_start(ctx, remote, cmd): + """ + Run a remote rados command (currently used to only write data) + """ + log.info("rados %s" % ' '.join(cmd)) + testdir = teuthology.get_testdir(ctx) + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rados', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=False, + ) + return proc + +def task(ctx, config): + """ + Test backfill + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'thrashosds task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + assert num_osds == 3 + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() + + # write some data + p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096', + '--no-cleanup']) + err = p.wait() + log.info('err is %d' % err) + + # mark osd.0 out to trigger a rebalance/backfill + manager.mark_out_osd(0) + + # also mark it down to it won't be included in pg_temps + manager.kill_osd(0) + manager.mark_down_osd(0) + + # wait for everything to peer and be happy... + manager.flush_pg_stats([1, 2]) + manager.wait_for_recovery() + + # write some new data + p = rados_start(ctx, mon, ['-p', 'rbd', 'bench', '30', 'write', '-b', '4096', + '--no-cleanup']) + + time.sleep(15) + + # blackhole + restart osd.1 + # this triggers a divergent backfill target + manager.blackhole_kill_osd(1) + time.sleep(2) + manager.revive_osd(1) + + # wait for our writes to complete + succeed + err = p.wait() + log.info('err is %d' % err) + + # wait for osd.1 and osd.2 to be up + manager.wait_till_osd_is_up(1) + manager.wait_till_osd_is_up(2) + + # cluster must recover + manager.flush_pg_stats([1, 2]) + manager.wait_for_recovery() + + # re-add osd.0 + manager.revive_osd(0) + manager.flush_pg_stats([1, 2]) + manager.wait_for_clean() + + diff --git a/qa/tasks/osd_failsafe_enospc.py b/qa/tasks/osd_failsafe_enospc.py new file mode 100644 index 000000000..fe2996a78 --- /dev/null +++ b/qa/tasks/osd_failsafe_enospc.py @@ -0,0 +1,218 @@ +""" +Handle osdfailsafe configuration settings (nearfull ratio and full ratio) +""" +from io import StringIO +import logging +import time + +from teuthology.orchestra import run +from tasks.util.rados import rados +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio + configuration settings + + In order for test to pass must use log-ignorelist as follows + + tasks: + - chef: + - install: + - ceph: + log-ignorelist: ['OSD near full', 'OSD full dropping all updates'] + - osd_failsafe_enospc: + + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'osd_failsafe_enospc task only accepts a dict for configuration' + + # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding + sleep_time = 50 + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + + manager = ctx.managers['ceph'] + + # create 1 pg pool with 1 rep which can only be on osd.0 + osds = manager.get_osd_dump() + for osd in osds: + if osd['osd'] != 0: + manager.mark_out_osd(osd['osd']) + + log.info('creating pool foo') + manager.create_pool("foo") + manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1') + + # State NONE -> NEAR + log.info('1. Verify warning messages when exceeding nearfull_ratio') + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + proc = mon.run( + args=[ + 'sudo', + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001') + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count + + # State NEAR -> FULL + log.info('2. Verify error messages when exceeding full_ratio') + + proc = mon.run( + args=[ + 'sudo', + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count + + log.info('3. Verify write failure when exceeding full_ratio') + + # Write data should fail + ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile]) + assert ret != 0, 'Expected write failure but it succeeded with exit status 0' + + # Put back default + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') + time.sleep(10) + + # State FULL -> NEAR + log.info('4. Verify write success when NOT exceeding full_ratio') + + # Write should succeed + ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2]) + assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret + + log.info('5. Verify warning messages again when exceeding nearfull_ratio') + + proc = mon.run( + args=[ + 'sudo', + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count + + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90') + time.sleep(10) + + # State NONE -> FULL + log.info('6. Verify error messages again when exceeding full_ratio') + + proc = mon.run( + args=[ + 'sudo', + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001') + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count + + # State FULL -> NONE + log.info('7. Verify no messages settings back to default') + + manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97') + time.sleep(10) + + proc = mon.run( + args=[ + 'sudo', + 'daemon-helper', + 'kill', + 'ceph', '-w' + ], + stdin=run.PIPE, + stdout=StringIO(), + wait=False, + ) + + time.sleep(sleep_time) + proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w + proc.wait() + + lines = proc.stdout.getvalue().split('\n') + + count = len(filter(lambda line: '[WRN] OSD near full' in line, lines)) + assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count + count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines)) + assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count + + log.info('Test Passed') + + # Bring all OSDs back in + manager.remove_pool("foo") + for osd in osds: + if osd['osd'] != 0: + manager.mark_in_osd(osd['osd']) diff --git a/qa/tasks/osd_max_pg_per_osd.py b/qa/tasks/osd_max_pg_per_osd.py new file mode 100644 index 000000000..6680fe6e3 --- /dev/null +++ b/qa/tasks/osd_max_pg_per_osd.py @@ -0,0 +1,126 @@ +import logging +import random + + +log = logging.getLogger(__name__) + + +def pg_num_in_all_states(pgs, *states): + return sum(1 for state in pgs.values() + if all(s in state for s in states)) + + +def pg_num_in_any_state(pgs, *states): + return sum(1 for state in pgs.values() + if any(s in state for s in states)) + + +def test_create_from_mon(ctx, config): + """ + osd should stop creating new pools if the number of pg it servers + exceeds the max-pg-per-osd setting, and it should resume the previously + suspended pg creations once the its pg number drops down below the setting + How it works:: + 1. set the hard limit of pg-per-osd to "2" + 2. create pool.a with pg_num=2 + # all pgs should be active+clean + 2. create pool.b with pg_num=2 + # new pgs belonging to this pool should be unknown (the primary osd + reaches the limit) or creating (replica osd reaches the limit) + 3. remove pool.a + 4. all pg belonging to pool.b should be active+clean + """ + pg_num = config.get('pg_num', 2) + manager = ctx.managers['ceph'] + log.info('1. creating pool.a') + pool_a = manager.create_pool_with_unique_name(pg_num) + pg_states = manager.wait_till_pg_convergence(300) + pg_created = pg_num_in_all_states(pg_states, 'active', 'clean') + assert pg_created == pg_num + + log.info('2. creating pool.b') + pool_b = manager.create_pool_with_unique_name(pg_num) + pg_states = manager.wait_till_pg_convergence(300) + pg_created = pg_num_in_all_states(pg_states, 'active', 'clean') + assert pg_created == pg_num + pg_pending = pg_num_in_any_state(pg_states, 'unknown', 'creating') + assert pg_pending == pg_num + + log.info('3. removing pool.a') + manager.remove_pool(pool_a) + pg_states = manager.wait_till_pg_convergence(300) + assert len(pg_states) == pg_num + pg_created = pg_num_in_all_states(pg_states, 'active', 'clean') + assert pg_created == pg_num + + # cleanup + manager.remove_pool(pool_b) + + +def test_create_from_peer(ctx, config): + """ + osd should stop creating new pools if the number of pg it servers + exceeds the max-pg-per-osd setting, and it should resume the previously + suspended pg creations once the its pg number drops down below the setting + + How it works:: + 0. create 4 OSDs. + 1. create pool.a with pg_num=1, size=2 + pg will be mapped to osd.0, and osd.1, and it should be active+clean + 2. create pool.b with pg_num=1, size=2. + if the pgs stuck in creating, delete the pool since the pool and try + again, eventually we'll get the pool to land on the other 2 osds that + aren't occupied by pool.a. (this will also verify that pgs for deleted + pools get cleaned out of the creating wait list.) + 3. mark an osd out. verify that some pgs get stuck stale or peering. + 4. delete a pool, verify pgs go active. + """ + pg_num = config.get('pg_num', 1) + from_primary = config.get('from_primary', True) + + manager = ctx.managers['ceph'] + log.info('1. creating pool.a') + pool_a = manager.create_pool_with_unique_name(pg_num) + pg_states = manager.wait_till_pg_convergence(300) + pg_created = pg_num_in_all_states(pg_states, 'active', 'clean') + assert pg_created == pg_num + + log.info('2. creating pool.b') + while True: + pool_b = manager.create_pool_with_unique_name(pg_num) + pg_states = manager.wait_till_pg_convergence(300) + pg_created = pg_num_in_all_states(pg_states, 'active', 'clean') + assert pg_created >= pg_num + pg_pending = pg_num_in_any_state(pg_states, 'unknown', 'creating') + assert pg_pending == pg_num * 2 - pg_created + if pg_created == pg_num * 2: + break + manager.remove_pool(pool_b) + + log.info('3. mark an osd out') + pg_stats = manager.get_pg_stats() + pg = random.choice(pg_stats) + if from_primary: + victim = pg['acting'][-1] + else: + victim = pg['acting'][0] + manager.mark_out_osd(victim) + pg_states = manager.wait_till_pg_convergence(300) + pg_stuck = pg_num_in_any_state(pg_states, 'activating', 'stale', 'peering') + assert pg_stuck > 0 + + log.info('4. removing pool.b') + manager.remove_pool(pool_b) + manager.wait_for_clean(30) + + # cleanup + manager.remove_pool(pool_a) + + +def task(ctx, config): + assert isinstance(config, dict), \ + 'osd_max_pg_per_osd task only accepts a dict for config' + if config.get('test_create_from_mon', True): + test_create_from_mon(ctx, config) + else: + test_create_from_peer(ctx, config) diff --git a/qa/tasks/osd_recovery.py b/qa/tasks/osd_recovery.py new file mode 100644 index 000000000..b0623c21b --- /dev/null +++ b/qa/tasks/osd_recovery.py @@ -0,0 +1,193 @@ +""" +osd recovery +""" +import logging +import time +from tasks import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + + +def rados_start(testdir, remote, cmd): + """ + Run a remote rados command (currently used to only write data) + """ + log.info("rados %s" % ' '.join(cmd)) + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rados', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + wait=False, + ) + return proc + +def task(ctx, config): + """ + Test (non-backfill) recovery + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + testdir = teuthology.get_testdir(ctx) + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + assert num_osds == 3 + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() + + # test some osdmap flags + manager.raw_cluster_cmd('osd', 'set', 'noin') + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'noup') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + manager.raw_cluster_cmd('osd', 'unset', 'noin') + manager.raw_cluster_cmd('osd', 'unset', 'noout') + manager.raw_cluster_cmd('osd', 'unset', 'noup') + manager.raw_cluster_cmd('osd', 'unset', 'nodown') + + # write some new data + p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '20', 'write', '-b', '4096', + '--no-cleanup']) + + time.sleep(15) + + # trigger a divergent target: + # blackhole + restart osd.1 (shorter log) + manager.blackhole_kill_osd(1) + # kill osd.2 (longer log... we'll make it divergent below) + manager.kill_osd(2) + time.sleep(2) + manager.revive_osd(1) + + # wait for our writes to complete + succeed + err = p.wait() + log.info('err is %d' % err) + + # cluster must repeer + manager.flush_pg_stats([0, 1]) + manager.wait_for_active_or_down() + + # write some more (make sure osd.2 really is divergent) + p = rados_start(testdir, mon, ['-p', 'rbd', 'bench', '15', 'write', '-b', '4096']) + p.wait() + + # revive divergent osd + manager.revive_osd(2) + + while len(manager.get_osd_status()['up']) < 3: + log.info('waiting a bit...') + time.sleep(2) + log.info('3 are up!') + + # cluster must recover + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() + + +def test_incomplete_pgs(ctx, config): + """ + Test handling of incomplete pgs. Requires 4 osds. + """ + testdir = teuthology.get_testdir(ctx) + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + assert num_osds == 4 + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 4: + time.sleep(10) + + manager.flush_pg_stats([0, 1, 2, 3]) + manager.wait_for_clean() + + log.info('Testing incomplete pgs...') + + for i in range(4): + manager.set_config( + i, + osd_recovery_delay_start=1000) + + # move data off of osd.0, osd.1 + manager.raw_cluster_cmd('osd', 'out', '0', '1') + manager.flush_pg_stats([0, 1, 2, 3], [0, 1]) + manager.wait_for_clean() + + # lots of objects in rbd (no pg log, will backfill) + p = rados_start(testdir, mon, + ['-p', 'rbd', 'bench', '20', 'write', '-b', '1', + '--no-cleanup']) + p.wait() + + # few objects in rbd pool (with pg log, normal recovery) + for f in range(1, 20): + p = rados_start(testdir, mon, ['-p', 'rbd', 'put', + 'foo.%d' % f, '/etc/passwd']) + p.wait() + + # move it back + manager.raw_cluster_cmd('osd', 'in', '0', '1') + manager.raw_cluster_cmd('osd', 'out', '2', '3') + time.sleep(10) + manager.flush_pg_stats([0, 1, 2, 3], [2, 3]) + time.sleep(10) + manager.wait_for_active() + + assert not manager.is_clean() + assert not manager.is_recovered() + + # kill 2 + 3 + log.info('stopping 2,3') + manager.kill_osd(2) + manager.kill_osd(3) + log.info('...') + manager.raw_cluster_cmd('osd', 'down', '2', '3') + manager.flush_pg_stats([0, 1]) + manager.wait_for_active_or_down() + + assert manager.get_num_down() > 0 + + # revive 2 + 3 + manager.revive_osd(2) + manager.revive_osd(3) + while len(manager.get_osd_status()['up']) < 4: + log.info('waiting a bit...') + time.sleep(2) + log.info('all are up!') + + for i in range(4): + manager.kick_recovery_wq(i) + + # cluster must recover + manager.wait_for_clean() diff --git a/qa/tasks/peer.py b/qa/tasks/peer.py new file mode 100644 index 000000000..6b19096b1 --- /dev/null +++ b/qa/tasks/peer.py @@ -0,0 +1,90 @@ +""" +Peer test (Single test, not much configurable here) +""" +import logging +import json +import time + +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test peering. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'peer task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() + + for i in range(3): + manager.set_config( + i, + osd_recovery_delay_start=120) + + # take on osd down + manager.kill_osd(2) + manager.mark_down_osd(2) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', 'data', 'get', 'dummy', '-']) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_recovery() + + # kill another and revive 2, so that some pgs can't peer. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.revive_osd(2) + manager.wait_till_osd_is_up(2) + + manager.flush_pg_stats([0, 2]) + + manager.wait_for_active_or_down() + + manager.flush_pg_stats([0, 2]) + + # look for down pgs + num_down_pgs = 0 + pgs = manager.get_pg_stats() + for pg in pgs: + out = manager.raw_cluster_cmd('pg', pg['pgid'], 'query') + log.debug("out string %s",out) + j = json.loads(out) + log.info("pg is %s, query json is %s", pg, j) + + if pg['state'].count('down'): + num_down_pgs += 1 + # verify that it is blocked on osd.1 + rs = j['recovery_state'] + assert len(rs) >= 2 + assert rs[0]['name'] == 'Started/Primary/Peering/Down' + assert rs[1]['name'] == 'Started/Primary/Peering' + assert rs[1]['blocked'] + assert rs[1]['down_osds_we_would_probe'] == [1] + assert len(rs[1]['peering_blocked_by']) == 1 + assert rs[1]['peering_blocked_by'][0]['osd'] == 1 + + assert num_down_pgs > 0 + + # bring it all back + manager.revive_osd(1) + manager.wait_till_osd_is_up(1) + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() diff --git a/qa/tasks/peering_speed_test.py b/qa/tasks/peering_speed_test.py new file mode 100644 index 000000000..9dc658361 --- /dev/null +++ b/qa/tasks/peering_speed_test.py @@ -0,0 +1,87 @@ +""" +Remotely run peering tests. +""" +import logging +import time + +log = logging.getLogger(__name__) + +from teuthology.task.args import argify + +POOLNAME = "POOLNAME" +ARGS = [ + ('num_pgs', 'number of pgs to create', 256, int), + ('max_time', 'seconds to complete peering', 0, int), + ('runs', 'trials to run', 10, int), + ('num_objects', 'objects to create', 256 * 1024, int), + ('object_size', 'size in bytes for objects', 64, int), + ('creation_time_limit', 'time limit for pool population', 60*60, int), + ('create_threads', 'concurrent writes for create', 256, int) + ] + +def setup(ctx, config): + """ + Setup peering test on remotes. + """ + manager = ctx.managers['ceph'] + manager.clear_pools() + manager.create_pool(POOLNAME, config.num_pgs) + log.info("populating pool") + manager.rados_write_objects( + POOLNAME, + config.num_objects, + config.object_size, + config.creation_time_limit, + config.create_threads) + log.info("done populating pool") + +def do_run(ctx, config): + """ + Perform the test. + """ + start = time.time() + # mark in osd + manager = ctx.managers['ceph'] + manager.mark_in_osd(0) + log.info("writing out objects") + manager.rados_write_objects( + POOLNAME, + config.num_pgs, # write 1 object per pg or so + 1, + config.creation_time_limit, + config.num_pgs, # lots of concurrency + cleanup = True) + peering_end = time.time() + + log.info("peering done, waiting on recovery") + manager.wait_for_clean() + + log.info("recovery done") + recovery_end = time.time() + if config.max_time: + assert(peering_end - start < config.max_time) + manager.mark_out_osd(0) + manager.wait_for_clean() + return { + 'time_to_active': peering_end - start, + 'time_to_clean': recovery_end - start + } + +@argify("peering_speed_test", ARGS) +def task(ctx, config): + """ + Peering speed test + """ + setup(ctx, config) + manager = ctx.managers['ceph'] + manager.mark_out_osd(0) + manager.wait_for_clean() + ret = [] + for i in range(config.runs): + log.info("Run {i}".format(i = i)) + ret.append(do_run(ctx, config)) + + manager.mark_in_osd(0) + ctx.summary['recovery_times'] = { + 'runs': ret + } diff --git a/qa/tasks/populate_rbd_pool.py b/qa/tasks/populate_rbd_pool.py new file mode 100644 index 000000000..76395eb68 --- /dev/null +++ b/qa/tasks/populate_rbd_pool.py @@ -0,0 +1,82 @@ +""" +Populate rbd pools +""" +import contextlib +import logging + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Populate <num_pools> pools with prefix <pool_prefix> with <num_images> + rbd images at <num_snaps> snaps + + The config could be as follows:: + + populate_rbd_pool: + client: <client> + pool_prefix: foo + num_pools: 5 + num_images: 10 + num_snaps: 3 + image_size: 10737418240 + """ + if config is None: + config = {} + client = config.get("client", "client.0") + pool_prefix = config.get("pool_prefix", "foo") + num_pools = config.get("num_pools", 2) + num_images = config.get("num_images", 20) + num_snaps = config.get("num_snaps", 4) + image_size = config.get("image_size", 100) + write_size = config.get("write_size", 1024*1024) + write_threads = config.get("write_threads", 10) + write_total_per_snap = config.get("write_total_per_snap", 1024*1024*30) + + (remote,) = ctx.cluster.only(client).remotes.keys() + + for poolid in range(num_pools): + poolname = "%s-%s" % (pool_prefix, str(poolid)) + log.info("Creating pool %s" % (poolname,)) + ctx.managers['ceph'].create_pool(poolname) + for imageid in range(num_images): + imagename = "rbd-%s" % (str(imageid),) + log.info("Creating imagename %s" % (imagename,)) + remote.run( + args = [ + "rbd", + "create", + imagename, + "--image-format", "1", + "--size", str(image_size), + "--pool", str(poolname)]) + def bench_run(): + remote.run( + args = [ + "rbd", + "bench-write", + imagename, + "--pool", poolname, + "--io-size", str(write_size), + "--io-threads", str(write_threads), + "--io-total", str(write_total_per_snap), + "--io-pattern", "rand"]) + log.info("imagename %s first bench" % (imagename,)) + bench_run() + for snapid in range(num_snaps): + snapname = "snap-%s" % (str(snapid),) + log.info("imagename %s creating snap %s" % (imagename, snapname)) + remote.run( + args = [ + "rbd", "snap", "create", + "--pool", poolname, + "--snap", snapname, + imagename + ]) + bench_run() + + try: + yield + finally: + log.info('done') diff --git a/qa/tasks/pykmip.py b/qa/tasks/pykmip.py new file mode 100644 index 000000000..45a5af689 --- /dev/null +++ b/qa/tasks/pykmip.py @@ -0,0 +1,465 @@ +""" +Deploy and configure PyKMIP for Teuthology +""" +import argparse +import contextlib +import logging +import time +import tempfile +import json +import os +from io import BytesIO +from teuthology.orchestra.daemon import DaemonGroup +from teuthology.orchestra.remote import Remote + +import pprint + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.packaging import install_package +from teuthology.packaging import remove_package +from teuthology.exceptions import ConfigError +from tasks.util import get_remote_for_role + +log = logging.getLogger(__name__) + + +def get_pykmip_dir(ctx): + return '{tdir}/pykmip'.format(tdir=teuthology.get_testdir(ctx)) + +def run_in_pykmip_dir(ctx, client, args, **kwargs): + (remote,) = [client] if isinstance(client,Remote) else ctx.cluster.only(client).remotes.keys() + return remote.run( + args=['cd', get_pykmip_dir(ctx), run.Raw('&&'), ] + args, + **kwargs + ) + +def run_in_pykmip_venv(ctx, client, args, **kwargs): + return run_in_pykmip_dir(ctx, client, + args = ['.', '.pykmipenv/bin/activate', + run.Raw('&&') + ] + args, **kwargs) + +@contextlib.contextmanager +def download(ctx, config): + """ + Download PyKMIP from github. + Remove downloaded file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading pykmip...') + pykmipdir = get_pykmip_dir(ctx) + + for (client, cconf) in config.items(): + branch = cconf.get('force-branch', 'master') + repo = cconf.get('force-repo', 'https://github.com/OpenKMIP/PyKMIP') + sha1 = cconf.get('sha1') + log.info("Using branch '%s' for pykmip", branch) + log.info('sha1=%s', sha1) + + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', '-b', branch, repo, + pykmipdir, + ], + ) + if sha1 is not None: + run_in_pykmip_dir(ctx, client, [ + 'git', 'reset', '--hard', sha1, + ], + ) + try: + yield + finally: + log.info('Removing pykmip...') + for client in config: + ctx.cluster.only(client).run( + args=[ 'rm', '-rf', pykmipdir ], + ) + +_bindep_txt = """# should be part of PyKMIP +libffi-dev [platform:dpkg] +libffi-devel [platform:rpm] +libssl-dev [platform:dpkg] +openssl-devel [platform:redhat] +libopenssl-devel [platform:suse] +libsqlite3-dev [platform:dpkg] +sqlite-devel [platform:rpm] +python-dev [platform:dpkg] +python-devel [(platform:redhat platform:base-py2)] +python3-dev [platform:dpkg] +python3-devel [(platform:redhat platform:base-py3) platform:suse] +python3 [platform:suse] +""" + +@contextlib.contextmanager +def install_packages(ctx, config): + """ + Download the packaged dependencies of PyKMIP. + Remove install packages upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Installing system dependenies for PyKMIP...') + + packages = {} + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + # use bindep to read which dependencies we need from temp/bindep.txt + fd, local_temp_path = tempfile.mkstemp(suffix='.txt', + prefix='bindep-') + os.write(fd, _bindep_txt.encode()) + os.close(fd) + fd, remote_temp_path = tempfile.mkstemp(suffix='.txt', + prefix='bindep-') + os.close(fd) + remote.put_file(local_temp_path, remote_temp_path) + os.remove(local_temp_path) + run_in_pykmip_venv(ctx, remote, ['pip', 'install', 'bindep']) + r = run_in_pykmip_venv(ctx, remote, + ['bindep', '--brief', '--file', remote_temp_path], + stdout=BytesIO(), + check_status=False) # returns 1 on success? + packages[client] = r.stdout.getvalue().decode().splitlines() + for dep in packages[client]: + install_package(dep, remote) + try: + yield + finally: + log.info('Removing system dependencies of PyKMIP...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + for dep in packages[client]: + remove_package(dep, remote) + +@contextlib.contextmanager +def setup_venv(ctx, config): + """ + Setup the virtualenv for PyKMIP using pip. + """ + assert isinstance(config, dict) + log.info('Setting up virtualenv for pykmip...') + for (client, _) in config.items(): + run_in_pykmip_dir(ctx, client, ['python3', '-m', 'venv', '.pykmipenv']) + run_in_pykmip_venv(ctx, client, ['pip', 'install', '--upgrade', 'pip']) + run_in_pykmip_venv(ctx, client, ['pip', 'install', 'pytz', '-e', get_pykmip_dir(ctx)]) + yield + +def assign_ports(ctx, config, initial_port): + """ + Assign port numbers starting from @initial_port + """ + port = initial_port + role_endpoints = {} + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in roles_for_host: + if role in config: + r = get_remote_for_role(ctx, role) + role_endpoints[role] = r.ip_address, port, r.hostname + port += 1 + + return role_endpoints + +def copy_policy_json(ctx, cclient, cconfig): + run_in_pykmip_dir(ctx, cclient, + ['cp', + get_pykmip_dir(ctx)+'/examples/policy.json', + get_pykmip_dir(ctx)]) + +_pykmip_configuration = """# configuration for pykmip +[server] +hostname={ipaddr} +port={port} +certificate_path={servercert} +key_path={serverkey} +ca_path={clientca} +auth_suite=TLS1.2 +policy_path={confdir} +enable_tls_client_auth=False +tls_cipher_suites= + TLS_RSA_WITH_AES_128_CBC_SHA256 + TLS_RSA_WITH_AES_256_CBC_SHA256 + TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 +logging_level=DEBUG +database_path={confdir}/pykmip.sqlite +[client] +host={hostname} +port=5696 +certfile={clientcert} +keyfile={clientkey} +ca_certs={clientca} +ssl_version=PROTOCOL_TLSv1_2 +""" + +def create_pykmip_conf(ctx, cclient, cconfig): + log.info('#0 cclient={} cconfig={}'.format(pprint.pformat(cclient),pprint.pformat(cconfig))) + (remote,) = ctx.cluster.only(cclient).remotes.keys() + pykmip_ipaddr, pykmip_port, pykmip_hostname = ctx.pykmip.endpoints[cclient] + log.info('#1 ip,p,h {} {} {}'.format(pykmip_ipaddr, pykmip_port, pykmip_hostname)) + clientca = cconfig.get('clientca', None) + log.info('#2 clientca {}'.format(clientca)) + serverkey = None + servercert = cconfig.get('servercert', None) + log.info('#3 servercert {}'.format(servercert)) + servercert = ctx.ssl_certificates.get(servercert) + log.info('#4 servercert {}'.format(servercert)) + clientkey = None + clientcert = cconfig.get('clientcert', None) + log.info('#3 clientcert {}'.format(clientcert)) + clientcert = ctx.ssl_certificates.get(clientcert) + log.info('#4 clientcert {}'.format(clientcert)) + clientca = ctx.ssl_certificates.get(clientca) + log.info('#5 clientca {}'.format(clientca)) + if servercert != None: + serverkey = servercert.key + servercert = servercert.certificate + log.info('#6 serverkey {} servercert {}'.format(serverkey, servercert)) + if clientcert != None: + clientkey = clientcert.key + clientcert = clientcert.certificate + log.info('#6 clientkey {} clientcert {}'.format(clientkey, clientcert)) + if clientca != None: + clientca = clientca.certificate + log.info('#7 clientca {}'.format(clientca)) + if servercert == None or clientca == None or serverkey == None: + log.info('#8 clientca {} serverkey {} servercert {}'.format(clientca, serverkey, servercert)) + raise ConfigError('pykmip: Missing/bad servercert or clientca') + pykmipdir = get_pykmip_dir(ctx) + kmip_conf = _pykmip_configuration.format( + ipaddr=pykmip_ipaddr, + port=pykmip_port, + confdir=pykmipdir, + hostname=pykmip_hostname, + clientca=clientca, + clientkey=clientkey, + clientcert=clientcert, + serverkey=serverkey, + servercert=servercert + ) + fd, local_temp_path = tempfile.mkstemp(suffix='.conf', + prefix='pykmip') + os.write(fd, kmip_conf.encode()) + os.close(fd) + remote.put_file(local_temp_path, pykmipdir+'/pykmip.conf') + os.remove(local_temp_path) + +@contextlib.contextmanager +def configure_pykmip(ctx, config): + """ + Configure pykmip paste-api and pykmip-api. + """ + assert isinstance(config, dict) + (cclient, cconfig) = next(iter(config.items())) + + copy_policy_json(ctx, cclient, cconfig) + create_pykmip_conf(ctx, cclient, cconfig) + try: + yield + finally: + pass + +def has_ceph_task(tasks): + for task in tasks: + for name, conf in task.items(): + if name == 'ceph': + return True + return False + +@contextlib.contextmanager +def run_pykmip(ctx, config): + assert isinstance(config, dict) + if hasattr(ctx, 'daemons'): + pass + elif has_ceph_task(ctx.config['tasks']): + log.info('Delay start pykmip so ceph can do once-only daemon logic') + try: + yield + finally: + pass + else: + ctx.daemons = DaemonGroup() + log.info('Running pykmip...') + + pykmipdir = get_pykmip_dir(ctx) + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + cluster_name, _, client_id = teuthology.split_role(client) + + # start the public endpoint + client_public_with_id = 'pykmip.public' + '.' + client_id + + run_cmd = 'cd ' + pykmipdir + ' && ' + \ + '. .pykmipenv/bin/activate && ' + \ + 'HOME={}'.format(pykmipdir) + ' && ' + \ + 'exec pykmip-server -f pykmip.conf -l ' + \ + pykmipdir + '/pykmip.log & { read; kill %1; }' + + ctx.daemons.add_daemon( + remote, 'pykmip', client_public_with_id, + cluster=cluster_name, + args=['bash', '-c', run_cmd], + logger=log.getChild(client), + stdin=run.PIPE, + cwd=pykmipdir, + wait=False, + check_status=False, + ) + + # sleep driven synchronization + time.sleep(10) + try: + yield + finally: + log.info('Stopping PyKMIP instance') + ctx.daemons.get_daemon('pykmip', client_public_with_id, + cluster_name).stop() + +make_keys_template = """ +from kmip.pie import client +from kmip import enums +import ssl +import sys +import json +from io import BytesIO + +c = client.ProxyKmipClient(config_file="{replace-with-config-file-path}") + +rl=[] +for kwargs in {replace-with-secrets}: + with c: + key_id = c.create( + enums.CryptographicAlgorithm.AES, + 256, + operation_policy_name='default', + cryptographic_usage_mask=[ + enums.CryptographicUsageMask.ENCRYPT, + enums.CryptographicUsageMask.DECRYPT + ], + **kwargs + ) + c.activate(key_id) + attrs = c.get_attributes(uid=key_id) + r = {} + for a in attrs[1]: + r[str(a.attribute_name)] = str(a.attribute_value) + rl.append(r) +print(json.dumps(rl)) +""" + +@contextlib.contextmanager +def create_secrets(ctx, config): + """ + Create and activate any requested keys in kmip + """ + assert isinstance(config, dict) + + pykmipdir = get_pykmip_dir(ctx) + pykmip_conf_path = pykmipdir + '/pykmip.conf' + my_output = BytesIO() + for (client,cconf) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + secrets=cconf.get('secrets') + if secrets: + secrets_json = json.dumps(cconf['secrets']) + make_keys = make_keys_template \ + .replace("{replace-with-secrets}",secrets_json) \ + .replace("{replace-with-config-file-path}",pykmip_conf_path) + my_output.truncate() + remote.run(args=[run.Raw('. cephtest/pykmip/.pykmipenv/bin/activate;' \ + + 'python')], stdin=make_keys, stdout = my_output) + ctx.pykmip.keys[client] = json.loads(my_output.getvalue().decode()) + try: + yield + finally: + pass + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy and configure PyKMIP + + Example of configuration: + + tasks: + - install: + - ceph: + conf: + client: + rgw crypt s3 kms backend: kmip + rgw crypt kmip ca path: /home/ubuntu/cephtest/ca/kmiproot.crt + rgw crypt kmip client cert: /home/ubuntu/cephtest/ca/kmip-client.crt + rgw crypt kmip client key: /home/ubuntu/cephtest/ca/kmip-client.key + rgw crypt kmip kms key template: pykmip-$keyid + - openssl_keys: + kmiproot: + client: client.0 + cn: kmiproot + key-type: rsa:4096 + - openssl_keys: + kmip-server: + client: client.0 + ca: kmiproot + kmip-client: + client: client.0 + ca: kmiproot + cn: rgw-client + - pykmip: + client.0: + force-branch: master + clientca: kmiproot + servercert: kmip-server + clientcert: kmip-client + secrets: + - name: pykmip-key-1 + - name: pykmip-key-2 + - rgw: + client.0: + use-pykmip-role: client.0 + - s3tests: + client.0: + force-branch: master + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task pykmip only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('pykmip', {})) + + log.debug('PyKMIP config is %s', config) + + if not hasattr(ctx, 'ssl_certificates'): + raise ConfigError('pykmip must run after the openssl_keys task') + + + ctx.pykmip = argparse.Namespace() + ctx.pykmip.endpoints = assign_ports(ctx, config, 5696) + ctx.pykmip.keys = {} + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: setup_venv(ctx=ctx, config=config), + lambda: install_packages(ctx=ctx, config=config), + lambda: configure_pykmip(ctx=ctx, config=config), + lambda: run_pykmip(ctx=ctx, config=config), + lambda: create_secrets(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/python.py b/qa/tasks/python.py new file mode 100644 index 000000000..4ddb14f71 --- /dev/null +++ b/qa/tasks/python.py @@ -0,0 +1,45 @@ +import logging +from teuthology import misc as teuthology +from tasks.vip import subst_vip + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Execute some python code. + + tasks: + - python: + host.a: | + import boto3 + c = boto3.resource(...) + + The provided dict is normally indexed by role. You can also include a + 'sudo: false' key to run the code without sudo. + + tasks: + - python: + sudo: false + host.b: | + import boto3 + c = boto3.resource(...) + """ + assert isinstance(config, dict), "task python got invalid config" + + testdir = teuthology.get_testdir(ctx) + + sudo = config.pop('sudo', True) + + for role, code in config.items(): + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Running python on role %s host %s', role, remote.name) + log.info(code) + args=[ + 'TESTDIR={tdir}'.format(tdir=testdir), + 'python3', + ] + if sudo: + args = ['sudo'] + args + remote.run(args=args, stdin=subst_vip(ctx, code)) + diff --git a/qa/tasks/qemu.py b/qa/tasks/qemu.py new file mode 100644 index 000000000..6533026b4 --- /dev/null +++ b/qa/tasks/qemu.py @@ -0,0 +1,713 @@ +""" +Qemu task +""" + +import contextlib +import logging +import os +import yaml +import time + +from tasks import rbd +from tasks.util.workunit import get_refspec_after_overrides +from teuthology import contextutil +from teuthology import misc as teuthology +from teuthology.config import config as teuth_config +from teuthology.orchestra import run +from teuthology.packaging import install_package, remove_package + +log = logging.getLogger(__name__) + +DEFAULT_NUM_DISKS = 2 +DEFAULT_IMAGE_URL = 'http://download.ceph.com/qa/ubuntu-12.04.qcow2' +DEFAULT_IMAGE_SIZE = 10240 # in megabytes +ENCRYPTION_HEADER_SIZE = 16 # in megabytes +DEFAULT_CPUS = 1 +DEFAULT_MEM = 4096 # in megabytes + +def normalize_disks(config): + # normalize the 'disks' parameter into a list of dictionaries + for client, client_config in config.items(): + clone = client_config.get('clone', False) + image_url = client_config.get('image_url', DEFAULT_IMAGE_URL) + device_type = client_config.get('type', 'filesystem') + encryption_format = client_config.get('encryption_format', 'none') + parent_encryption_format = client_config.get( + 'parent_encryption_format', 'none') + + disks = client_config.get('disks', DEFAULT_NUM_DISKS) + if not isinstance(disks, list): + disks = [{'image_name': '{client}.{num}'.format(client=client, + num=i)} + for i in range(int(disks))] + client_config['disks'] = disks + + for i, disk in enumerate(disks): + if 'action' not in disk: + disk['action'] = 'create' + assert disk['action'] in ['none', 'create', 'clone'], 'invalid disk action' + assert disk['action'] != 'clone' or 'parent_name' in disk, 'parent_name required for clone' + + if 'image_size' not in disk: + disk['image_size'] = DEFAULT_IMAGE_SIZE + disk['image_size'] = int(disk['image_size']) + + if 'image_url' not in disk and i == 0: + disk['image_url'] = image_url + + if 'device_type' not in disk: + disk['device_type'] = device_type + + disk['device_letter'] = chr(ord('a') + i) + + if 'encryption_format' not in disk: + if clone: + disk['encryption_format'] = parent_encryption_format + else: + disk['encryption_format'] = encryption_format + assert disk['encryption_format'] in ['none', 'luks1', 'luks2'], 'invalid encryption format' + + assert disks, 'at least one rbd device must be used' + + if clone: + for disk in disks: + if disk['action'] != 'create': + continue + clone = dict(disk) + clone['action'] = 'clone' + clone['parent_name'] = clone['image_name'] + clone['image_name'] += '-clone' + del disk['device_letter'] + + clone['encryption_format'] = encryption_format + assert clone['encryption_format'] in ['none', 'luks1', 'luks2'], 'invalid encryption format' + + clone['parent_encryption_format'] = parent_encryption_format + assert clone['parent_encryption_format'] in ['none', 'luks1', 'luks2'], 'invalid encryption format' + + disks.append(clone) + +def create_images(ctx, config, managers): + for client, client_config in config.items(): + disks = client_config['disks'] + for disk in disks: + if disk.get('action') != 'create' or ( + 'image_url' in disk and + disk['encryption_format'] == 'none'): + continue + image_size = disk['image_size'] + if disk['encryption_format'] != 'none': + image_size += ENCRYPTION_HEADER_SIZE + create_config = { + client: { + 'image_name': disk['image_name'], + 'image_format': 2, + 'image_size': image_size, + 'encryption_format': disk['encryption_format'], + } + } + managers.append( + lambda create_config=create_config: + rbd.create_image(ctx=ctx, config=create_config) + ) + +def create_clones(ctx, config, managers): + for client, client_config in config.items(): + disks = client_config['disks'] + for disk in disks: + if disk['action'] != 'clone': + continue + + create_config = { + client: { + 'image_name': disk['image_name'], + 'parent_name': disk['parent_name'], + 'encryption_format': disk['encryption_format'], + } + } + managers.append( + lambda create_config=create_config: + rbd.clone_image(ctx=ctx, config=create_config) + ) + +def create_encrypted_devices(ctx, config, managers): + for client, client_config in config.items(): + disks = client_config['disks'] + for disk in disks: + if (disk['encryption_format'] == 'none' and + disk.get('parent_encryption_format', 'none') == 'none') or \ + 'device_letter' not in disk: + continue + + dev_config = {client: disk} + managers.append( + lambda dev_config=dev_config: + rbd.dev_create(ctx=ctx, config=dev_config) + ) + +@contextlib.contextmanager +def create_dirs(ctx, config): + """ + Handle directory creation and cleanup + """ + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.items(): + assert 'test' in client_config, 'You must specify a test to run' + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'install', '-d', '-m0755', '--', + '{tdir}/qemu'.format(tdir=testdir), + '{tdir}/archive/qemu'.format(tdir=testdir), + ] + ) + try: + yield + finally: + for client, client_config in config.items(): + assert 'test' in client_config, 'You must specify a test to run' + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rmdir', '{tdir}/qemu'.format(tdir=testdir), run.Raw('||'), 'true', + ] + ) + +@contextlib.contextmanager +def install_block_rbd_driver(ctx, config): + """ + Make sure qemu rbd block driver (block-rbd.so) is installed + """ + packages = {} + for client, _ in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + if remote.os.package_type == 'rpm': + packages[client] = ['qemu-kvm-block-rbd'] + else: + packages[client] = ['qemu-block-extra', 'qemu-utils'] + for pkg in packages[client]: + install_package(pkg, remote) + try: + yield + finally: + for client, _ in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + for pkg in packages[client]: + remove_package(pkg, remote) + +@contextlib.contextmanager +def generate_iso(ctx, config): + """Execute system commands to generate iso""" + log.info('generating iso...') + testdir = teuthology.get_testdir(ctx) + + # use ctx.config instead of config, because config has been + # through teuthology.replace_all_with_clients() + refspec = get_refspec_after_overrides(ctx.config, {}) + + git_url = teuth_config.get_ceph_qa_suite_git_url() + log.info('Pulling tests from %s ref %s', git_url, refspec) + + for client, client_config in config.items(): + assert 'test' in client_config, 'You must specify a test to run' + test = client_config['test'] + + (remote,) = ctx.cluster.only(client).remotes.keys() + + clone_dir = '{tdir}/qemu_clone.{role}'.format(tdir=testdir, role=client) + remote.run(args=refspec.clone(git_url, clone_dir)) + + src_dir = os.path.dirname(__file__) + userdata_path = os.path.join(testdir, 'qemu', 'userdata.' + client) + metadata_path = os.path.join(testdir, 'qemu', 'metadata.' + client) + + with open(os.path.join(src_dir, 'userdata_setup.yaml')) as f: + test_setup = ''.join(f.readlines()) + # configuring the commands to setup the nfs mount + mnt_dir = "/export/{client}".format(client=client) + test_setup = test_setup.format( + mnt_dir=mnt_dir + ) + + with open(os.path.join(src_dir, 'userdata_teardown.yaml')) as f: + test_teardown = ''.join(f.readlines()) + + user_data = test_setup + + disks = client_config['disks'] + for disk in disks: + if disk['device_type'] != 'filesystem' or \ + 'device_letter' not in disk or \ + 'image_url' in disk: + continue + if disk['encryption_format'] == 'none' and \ + disk.get('parent_encryption_format', 'none') == 'none': + dev_name = 'vd' + disk['device_letter'] + else: + # encrypted disks use if=ide interface, instead of if=virtio + dev_name = 'sd' + disk['device_letter'] + user_data += """ +- | + #!/bin/bash + mkdir /mnt/test_{dev_name} + mkfs -t xfs /dev/{dev_name} + mount -t xfs /dev/{dev_name} /mnt/test_{dev_name} +""".format(dev_name=dev_name) + + user_data += """ +- | + #!/bin/bash + test -d /etc/ceph || mkdir /etc/ceph + cp /mnt/cdrom/ceph.* /etc/ceph/ +""" + + cloud_config_archive = client_config.get('cloud_config_archive', []) + if cloud_config_archive: + user_data += yaml.safe_dump(cloud_config_archive, default_style='|', + default_flow_style=False) + + # this may change later to pass the directories as args to the + # script or something. xfstests needs that. + user_data += """ +- | + #!/bin/bash + test -d /mnt/test_b && cd /mnt/test_b + /mnt/cdrom/test.sh > /mnt/log/test.log 2>&1 && touch /mnt/log/success +""" + test_teardown + + user_data = user_data.format( + ceph_branch=ctx.config.get('branch'), + ceph_sha1=ctx.config.get('sha1')) + remote.write_file(userdata_path, user_data) + + with open(os.path.join(src_dir, 'metadata.yaml'), 'rb') as f: + remote.write_file(metadata_path, f) + + test_file = '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client) + + log.info('fetching test %s for %s', test, client) + remote.run( + args=[ + 'cp', '--', os.path.join(clone_dir, test), test_file, + run.Raw('&&'), + 'chmod', '755', test_file, + ], + ) + remote.run( + args=[ + 'genisoimage', '-quiet', '-input-charset', 'utf-8', + '-volid', 'cidata', '-joliet', '-rock', + '-o', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), + '-graft-points', + 'user-data={userdata}'.format(userdata=userdata_path), + 'meta-data={metadata}'.format(metadata=metadata_path), + 'ceph.conf=/etc/ceph/ceph.conf', + 'ceph.keyring=/etc/ceph/ceph.keyring', + 'test.sh={file}'.format(file=test_file), + ], + ) + try: + yield + finally: + for client in config.keys(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rm', '-rf', + '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), + os.path.join(testdir, 'qemu', 'userdata.' + client), + os.path.join(testdir, 'qemu', 'metadata.' + client), + '{tdir}/qemu/{client}.test.sh'.format(tdir=testdir, client=client), + '{tdir}/qemu_clone.{client}'.format(tdir=testdir, client=client), + ], + ) + +@contextlib.contextmanager +def download_image(ctx, config): + """Downland base image, remove image file when done""" + log.info('downloading base image') + testdir = teuthology.get_testdir(ctx) + + client_base_files = {} + for client, client_config in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + client_base_files[client] = [] + disks = client_config['disks'] + for disk in disks: + if disk['action'] != 'create' or 'image_url' not in disk: + continue + + base_file = '{tdir}/qemu/base.{name}.qcow2'.format(tdir=testdir, + name=disk['image_name']) + client_base_files[client].append(base_file) + + remote.run( + args=[ + 'wget', '-nv', '-O', base_file, disk['image_url'], + ] + ) + + if disk['encryption_format'] == 'none': + remote.run( + args=[ + 'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw', + base_file, 'rbd:rbd/{image_name}'.format(image_name=disk['image_name']) + ] + ) + else: + dev_config = {client: {'image_name': disk['image_name'], + 'encryption_format': disk['encryption_format']}} + raw_file = '{tdir}/qemu/base.{name}.raw'.format( + tdir=testdir, name=disk['image_name']) + client_base_files[client].append(raw_file) + remote.run( + args=[ + 'qemu-img', 'convert', '-f', 'qcow2', '-O', 'raw', + base_file, raw_file + ] + ) + with rbd.dev_create(ctx, dev_config): + remote.run( + args=[ + 'dd', 'if={name}'.format(name=raw_file), + 'of={name}'.format(name=dev_config[client]['device_path']), + 'bs=4M', 'conv=fdatasync' + ] + ) + + for disk in disks: + if disk['action'] == 'clone' or \ + disk['encryption_format'] != 'none' or \ + (disk['action'] == 'create' and 'image_url' not in disk): + continue + + remote.run( + args=[ + 'rbd', 'resize', + '--size={image_size}M'.format(image_size=disk['image_size']), + disk['image_name'], run.Raw('||'), 'true' + ] + ) + + try: + yield + finally: + log.debug('cleaning up base image files') + for client, base_files in client_base_files.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + for base_file in base_files: + remote.run( + args=[ + 'rm', '-f', base_file, + ], + ) + + +def _setup_nfs_mount(remote, client, service_name, mount_dir): + """ + Sets up an nfs mount on the remote that the guest can use to + store logs. This nfs mount is also used to touch a file + at the end of the test to indicate if the test was successful + or not. + """ + export_dir = "/export/{client}".format(client=client) + log.info("Creating the nfs export directory...") + remote.run(args=[ + 'sudo', 'mkdir', '-p', export_dir, + ]) + log.info("Mounting the test directory...") + remote.run(args=[ + 'sudo', 'mount', '--bind', mount_dir, export_dir, + ]) + log.info("Adding mount to /etc/exports...") + export = "{dir} *(rw,no_root_squash,no_subtree_check,insecure)".format( + dir=export_dir + ) + log.info("Deleting export from /etc/exports...") + remote.run(args=[ + 'sudo', 'sed', '-i', "\|{export_dir}|d".format(export_dir=export_dir), + '/etc/exports' + ]) + remote.run(args=[ + 'echo', export, run.Raw("|"), + 'sudo', 'tee', '-a', "/etc/exports", + ]) + log.info("Restarting NFS...") + if remote.os.package_type == "deb": + remote.run(args=['sudo', 'service', 'nfs-kernel-server', 'restart']) + else: + remote.run(args=['sudo', 'systemctl', 'restart', service_name]) + + +def _teardown_nfs_mount(remote, client, service_name): + """ + Tears down the nfs mount on the remote used for logging and reporting the + status of the tests being ran in the guest. + """ + log.info("Tearing down the nfs mount for {remote}".format(remote=remote)) + export_dir = "/export/{client}".format(client=client) + log.info("Stopping NFS...") + if remote.os.package_type == "deb": + remote.run(args=[ + 'sudo', 'service', 'nfs-kernel-server', 'stop' + ]) + else: + remote.run(args=[ + 'sudo', 'systemctl', 'stop', service_name + ]) + log.info("Unmounting exported directory...") + remote.run(args=[ + 'sudo', 'umount', export_dir + ]) + log.info("Deleting export from /etc/exports...") + remote.run(args=[ + 'sudo', 'sed', '-i', "\|{export_dir}|d".format(export_dir=export_dir), + '/etc/exports' + ]) + log.info("Starting NFS...") + if remote.os.package_type == "deb": + remote.run(args=[ + 'sudo', 'service', 'nfs-kernel-server', 'start' + ]) + else: + remote.run(args=[ + 'sudo', 'systemctl', 'start', service_name + ]) + + +@contextlib.contextmanager +def run_qemu(ctx, config): + """Setup kvm environment and start qemu""" + procs = [] + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir, client=client) + remote.run( + args=[ + 'mkdir', log_dir, run.Raw('&&'), + 'sudo', 'modprobe', 'kvm', + ] + ) + + nfs_service_name = 'nfs' + if remote.os.name in ['rhel', 'centos'] and float(remote.os.version) >= 8: + nfs_service_name = 'nfs-server' + + # make an nfs mount to use for logging and to + # allow to test to tell teuthology the tests outcome + _setup_nfs_mount(remote, client, nfs_service_name, log_dir) + + # Hack to make sure /dev/kvm permissions are set correctly + # See http://tracker.ceph.com/issues/17977 and + # https://bugzilla.redhat.com/show_bug.cgi?id=1333159 + remote.run(args='sudo udevadm control --reload') + remote.run(args='sudo udevadm trigger /dev/kvm') + remote.run(args='ls -l /dev/kvm') + + qemu_cmd = 'qemu-system-x86_64' + if remote.os.package_type == "rpm": + qemu_cmd = "/usr/libexec/qemu-kvm" + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'daemon-helper', + 'term', + qemu_cmd, '-enable-kvm', '-nographic', '-cpu', 'host', + '-smp', str(client_config.get('cpus', DEFAULT_CPUS)), + '-m', str(client_config.get('memory', DEFAULT_MEM)), + # cd holding metadata for cloud-init + '-cdrom', '{tdir}/qemu/{client}.iso'.format(tdir=testdir, client=client), + ] + + cachemode = 'none' + ceph_config = ctx.ceph['ceph'].conf.get('global', {}) + ceph_config.update(ctx.ceph['ceph'].conf.get('client', {})) + ceph_config.update(ctx.ceph['ceph'].conf.get(client, {})) + if ceph_config.get('rbd cache', True): + if ceph_config.get('rbd cache max dirty', 1) > 0: + cachemode = 'writeback' + else: + cachemode = 'writethrough' + + disks = client_config['disks'] + for disk in disks: + if 'device_letter' not in disk: + continue + + if disk['encryption_format'] == 'none' and \ + disk.get('parent_encryption_format', 'none') == 'none': + interface = 'virtio' + disk_spec = 'rbd:rbd/{img}:id={id}'.format( + img=disk['image_name'], + id=client[len('client.'):] + ) + else: + # encrypted disks use ide as a temporary workaround for + # a bug in qemu when using virtio over nbd + # TODO: use librbd encryption directly via qemu (not via nbd) + interface = 'ide' + disk_spec = disk['device_path'] + + args.extend([ + '-drive', + 'file={disk_spec},format=raw,if={interface},cache={cachemode}'.format( + disk_spec=disk_spec, + interface=interface, + cachemode=cachemode, + ), + ]) + time_wait = client_config.get('time_wait', 0) + + log.info('starting qemu...') + procs.append( + remote.run( + args=args, + logger=log.getChild(client), + stdin=run.PIPE, + wait=False, + ) + ) + + try: + yield + finally: + log.info('waiting for qemu tests to finish...') + run.wait(procs) + + if time_wait > 0: + log.debug('waiting {time_wait} sec for workloads detect finish...'.format( + time_wait=time_wait)); + time.sleep(time_wait) + + log.debug('checking that qemu tests succeeded...') + for client in config.keys(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + # ensure we have permissions to all the logs + log_dir = '{tdir}/archive/qemu/{client}'.format(tdir=testdir, + client=client) + remote.run( + args=[ + 'sudo', 'chmod', 'a+rw', '-R', log_dir + ] + ) + + # teardown nfs mount + _teardown_nfs_mount(remote, client, nfs_service_name) + # check for test status + remote.run( + args=[ + 'test', '-f', + '{tdir}/archive/qemu/{client}/success'.format( + tdir=testdir, + client=client + ), + ], + ) + log.info("Deleting exported directory...") + for client in config.keys(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run(args=[ + 'sudo', 'rm', '-r', '/export' + ]) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run a test inside of QEMU on top of rbd. Only one test + is supported per client. + + For example, you can specify which clients to run on:: + + tasks: + - ceph: + - qemu: + client.0: + test: http://download.ceph.com/qa/test.sh + client.1: + test: http://download.ceph.com/qa/test2.sh + + Or use the same settings on all clients: + + tasks: + - ceph: + - qemu: + all: + test: http://download.ceph.com/qa/test.sh + + For tests that want to explicitly describe the RBD images to connect: + + tasks: + - ceph: + - qemu: + client.0: + test: http://download.ceph.com/qa/test.sh + clone: True/False (optionally clone all created disks), + image_url: <URL> (optional default image URL) + type: filesystem / block (optional default device type) + disks: [ + { + action: create / clone / none (optional, defaults to create) + image_name: <image name> (optional) + parent_name: <parent_name> (if action == clone), + type: filesystem / block (optional, defaults to fileystem) + image_url: <URL> (optional), + image_size: <MiB> (optional) + encryption_format: luks1 / luks2 / none (optional, defaults to none) + }, ... + ] + + You can set the amount of CPUs and memory the VM has (default is 1 CPU and + 4096 MB):: + + tasks: + - ceph: + - qemu: + client.0: + test: http://download.ceph.com/qa/test.sh + cpus: 4 + memory: 512 # megabytes + + If you need to configure additional cloud-config options, set cloud_config + to the required data set:: + + tasks: + - ceph + - qemu: + client.0: + test: http://ceph.com/qa/test.sh + cloud_config_archive: + - | + #/bin/bash + touch foo1 + - content: | + test data + type: text/plain + filename: /tmp/data + """ + assert isinstance(config, dict), \ + "task qemu only supports a dictionary for configuration" + + config = teuthology.replace_all_with_clients(ctx.cluster, config) + normalize_disks(config) + + managers = [] + create_images(ctx=ctx, config=config, managers=managers) + managers.extend([ + lambda: create_dirs(ctx=ctx, config=config), + lambda: install_block_rbd_driver(ctx=ctx, config=config), + lambda: generate_iso(ctx=ctx, config=config), + lambda: download_image(ctx=ctx, config=config), + ]) + create_clones(ctx=ctx, config=config, managers=managers) + create_encrypted_devices(ctx=ctx, config=config, managers=managers) + managers.append( + lambda: run_qemu(ctx=ctx, config=config), + ) + + with contextutil.nested(*managers): + yield diff --git a/qa/tasks/rabbitmq.py b/qa/tasks/rabbitmq.py new file mode 100644 index 000000000..c78ac1e56 --- /dev/null +++ b/qa/tasks/rabbitmq.py @@ -0,0 +1,130 @@ +""" +Deploy and configure RabbitMQ for Teuthology +""" +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def install_rabbitmq(ctx, config): + """ + Downloading the RabbitMQ package. + """ + assert isinstance(config, dict) + log.info('Installing RabbitMQ...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + ctx.cluster.only(client).run(args=[ + 'sudo', 'yum', '-y', 'install', 'epel-release' + ]) + + link1 = 'https://packagecloud.io/install/repositories/rabbitmq/erlang/script.rpm.sh' + + ctx.cluster.only(client).run(args=[ + 'curl', '-s', link1, run.Raw('|'), 'sudo', 'bash' + ]) + + ctx.cluster.only(client).run(args=[ + 'sudo', 'yum', '-y', 'install', 'erlang' + ]) + + link2 = 'https://packagecloud.io/install/repositories/rabbitmq/rabbitmq-server/script.rpm.sh' + + ctx.cluster.only(client).run(args=[ + 'curl', '-s', link2, run.Raw('|'), 'sudo', 'bash' + ]) + + ctx.cluster.only(client).run(args=[ + 'sudo', 'yum', '-y', 'install', 'rabbitmq-server' + ]) + + try: + yield + finally: + log.info('Removing packaged dependencies of RabbitMQ...') + + for (client, _) in config.items(): + ctx.cluster.only(client).run(args=[ + 'sudo', 'yum', '-y', 'remove', 'rabbitmq-server.noarch' + ]) + + +@contextlib.contextmanager +def run_rabbitmq(ctx, config): + """ + This includes two parts: + 1. Starting Daemon + 2. Starting RabbitMQ service + """ + assert isinstance(config, dict) + log.info('Bringing up Daemon and RabbitMQ service...') + for (client,_) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + ctx.cluster.only(client).run(args=[ + 'sudo', 'chkconfig', 'rabbitmq-server', 'on' + ], + ) + + ctx.cluster.only(client).run(args=[ + 'sudo', '/sbin/service', 'rabbitmq-server', 'start' + ], + ) + + ''' + # To check whether rabbitmq-server is running or not + ctx.cluster.only(client).run(args=[ + 'sudo', '/sbin/service', 'rabbitmq-server', 'status' + ], + ) + ''' + + try: + yield + finally: + log.info('Stopping RabbitMQ Service...') + + for (client, _) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + + ctx.cluster.only(client).run(args=[ + 'sudo', '/sbin/service', 'rabbitmq-server', 'stop' + ], + ) + + +@contextlib.contextmanager +def task(ctx,config): + """ + To run rabbitmq the prerequisite is to run the tox task. Following is the way how to run + tox and then rabbitmq:: + tasks: + - rabbitmq: + client.0: + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task rabbitmq only supports a list or dictionary for configuration" + + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + log.debug('RabbitMQ config is %s', config) + + with contextutil.nested( + lambda: install_rabbitmq(ctx=ctx, config=config), + lambda: run_rabbitmq(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/rados.py b/qa/tasks/rados.py new file mode 100644 index 000000000..a730a7299 --- /dev/null +++ b/qa/tasks/rados.py @@ -0,0 +1,286 @@ +""" +Rados modle-based integration tests +""" +import contextlib +import logging +import gevent +from teuthology import misc as teuthology + + +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run RadosModel-based integration tests. + + The config should be as follows:: + + rados: + clients: [client list] + ops: <number of ops> + objects: <number of objects to use> + max_in_flight: <max number of operations in flight> + object_size: <size of objects in bytes> + min_stride_size: <minimum write stride size in bytes> + max_stride_size: <maximum write stride size in bytes> + op_weights: <dictionary mapping operation type to integer weight> + runs: <number of times to run> - the pool is remade between runs + ec_pool: use an ec pool + erasure_code_profile: profile to use with the erasure coded pool + fast_read: enable ec_pool's fast_read + min_size: set the min_size of created pool + pool_snaps: use pool snapshots instead of selfmanaged snapshots + write_fadvise_dontneed: write behavior like with LIBRADOS_OP_FLAG_FADVISE_DONTNEED. + This mean data don't access in the near future. + Let osd backend don't keep data in cache. + + For example:: + + tasks: + - ceph: + - rados: + clients: [client.0] + ops: 1000 + max_seconds: 0 # 0 for no limit + objects: 25 + max_in_flight: 16 + object_size: 4000000 + min_stride_size: 1024 + max_stride_size: 4096 + op_weights: + read: 20 + write: 10 + delete: 2 + snap_create: 3 + rollback: 2 + snap_remove: 0 + ec_pool: create an ec pool, defaults to False + erasure_code_use_overwrites: test overwrites, default false + erasure_code_profile: + name: teuthologyprofile + k: 2 + m: 1 + crush-failure-domain: osd + pool_snaps: true + write_fadvise_dontneed: true + runs: 10 + - interactive: + + Optionally, you can provide the pool name to run against: + + tasks: + - ceph: + - exec: + client.0: + - ceph osd pool create foo + - rados: + clients: [client.0] + pools: [foo] + ... + + Alternatively, you can provide a pool prefix: + + tasks: + - ceph: + - exec: + client.0: + - ceph osd pool create foo.client.0 + - rados: + clients: [client.0] + pool_prefix: foo + ... + + The tests are run asynchronously, they are not complete when the task + returns. For instance: + + - rados: + clients: [client.0] + pools: [ecbase] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + - print: "**** done rados ec-cache-agent (part 2)" + + will run the print task immediately after the rados tasks begins but + not after it completes. To make the rados task a blocking / sequential + task, use: + + - sequential: + - rados: + clients: [client.0] + pools: [ecbase] + ops: 4000 + objects: 500 + op_weights: + read: 100 + write: 100 + delete: 50 + copy_from: 50 + - print: "**** done rados ec-cache-agent (part 2)" + + """ + log.info('Beginning rados...') + assert isinstance(config, dict), \ + "please list clients to run on" + + object_size = int(config.get('object_size', 4000000)) + op_weights = config.get('op_weights', {}) + testdir = teuthology.get_testdir(ctx) + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'ceph_test_rados'] + if config.get('ec_pool', False): + args.extend(['--no-omap']) + if not config.get('erasure_code_use_overwrites', False): + args.extend(['--ec-pool']) + if config.get('write_fadvise_dontneed', False): + args.extend(['--write-fadvise-dontneed']) + if config.get('set_redirect', False): + args.extend(['--set_redirect']) + if config.get('set_chunk', False): + args.extend(['--set_chunk']) + if config.get('enable_dedup', False): + args.extend(['--enable_dedup']) + if config.get('low_tier_pool', None): + args.extend(['--low_tier_pool', config.get('low_tier_pool', None)]) + if config.get('dedup_chunk_size', False): + args.extend(['--dedup_chunk_size', config.get('dedup_chunk_size', None)] ) + if config.get('dedup_chunk_algo', False): + args.extend(['--dedup_chunk_algo', config.get('dedup_chunk_algo', None)]) + if config.get('pool_snaps', False): + args.extend(['--pool-snaps']) + if config.get('balance_reads', False): + args.extend(['--balance-reads']) + if config.get('localize_reads', False): + args.extend(['--localize-reads']) + args.extend([ + '--max-ops', str(config.get('ops', 10000)), + '--objects', str(config.get('objects', 500)), + '--max-in-flight', str(config.get('max_in_flight', 16)), + '--size', str(object_size), + '--min-stride-size', str(config.get('min_stride_size', object_size // 10)), + '--max-stride-size', str(config.get('max_stride_size', object_size // 5)), + '--max-seconds', str(config.get('max_seconds', 0)) + ]) + + weights = {} + weights['read'] = 100 + weights['write'] = 100 + weights['delete'] = 10 + # Parallel of the op_types in test/osd/TestRados.cc + for field in [ + # read handled above + # write handled above + # delete handled above + "snap_create", + "snap_remove", + "rollback", + "setattr", + "rmattr", + "watch", + "copy_from", + "hit_set_list", + "is_dirty", + "undirty", + "cache_flush", + "cache_try_flush", + "cache_evict", + "append", + "write", + "read", + "delete", + "set_chunk", + "tier_promote", + "tier_evict", + "tier_promote", + "tier_flush" + ]: + if field in op_weights: + weights[field] = op_weights[field] + + if config.get('write_append_excl', True): + if 'write' in weights: + weights['write'] = weights['write'] // 2 + weights['write_excl'] = weights['write'] + + if 'append' in weights: + weights['append'] = weights['append'] // 2 + weights['append_excl'] = weights['append'] + + for op, weight in weights.items(): + args.extend([ + '--op', op, str(weight) + ]) + + + def thread(): + """Thread spawned by gevent""" + clients = ['client.{id}'.format(id=id_) for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + log.info('clients are %s' % clients) + manager = ctx.managers['ceph'] + if config.get('ec_pool', False): + profile = config.get('erasure_code_profile', {}) + profile_name = profile.get('name', 'teuthologyprofile') + manager.create_erasure_code_profile(profile_name, profile) + else: + profile_name = None + for i in range(int(config.get('runs', '1'))): + log.info("starting run %s out of %s", str(i), config.get('runs', '1')) + tests = {} + existing_pools = config.get('pools', []) + created_pools = [] + for role in config.get('clients', clients): + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + + pool = config.get('pool', None) + if not pool and existing_pools: + pool = existing_pools.pop() + else: + pool = manager.create_pool_with_unique_name( + erasure_code_profile_name=profile_name, + erasure_code_use_overwrites= + config.get('erasure_code_use_overwrites', False) + ) + created_pools.append(pool) + if config.get('fast_read', False): + manager.raw_cluster_cmd( + 'osd', 'pool', 'set', pool, 'fast_read', 'true') + min_size = config.get('min_size', None); + if min_size is not None: + manager.raw_cluster_cmd( + 'osd', 'pool', 'set', pool, 'min_size', str(min_size)) + + (remote,) = ctx.cluster.only(role).remotes.keys() + proc = remote.run( + args=["CEPH_CLIENT_ID={id_}".format(id_=id_)] + args + + ["--pool", pool], + logger=log.getChild("rados.{id}".format(id=id_)), + stdin=run.PIPE, + wait=False + ) + tests[id_] = proc + run.wait(tests.values()) + + for pool in created_pools: + manager.wait_snap_trimming_complete(pool); + manager.remove_pool(pool) + + running = gevent.spawn(thread) + + try: + yield + finally: + log.info('joining rados') + running.get() diff --git a/qa/tasks/radosbench.py b/qa/tasks/radosbench.py new file mode 100644 index 000000000..3a5aee2e2 --- /dev/null +++ b/qa/tasks/radosbench.py @@ -0,0 +1,144 @@ +""" +Rados benchmarking +""" +import contextlib +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run radosbench + + The config should be as follows: + + radosbench: + clients: [client list] + time: <seconds to run> + pool: <pool to use> + size: write size to use + concurrency: max number of outstanding writes (16) + objectsize: object size to use + unique_pool: use a unique pool, defaults to False + ec_pool: create an ec pool, defaults to False + create_pool: create pool, defaults to True + erasure_code_profile: + name: teuthologyprofile + k: 2 + m: 1 + crush-failure-domain: osd + cleanup: false (defaults to true) + type: <write|seq|rand> (defaults to write) + example: + + tasks: + - ceph: + - radosbench: + clients: [client.0] + time: 360 + - interactive: + """ + log.info('Beginning radosbench...') + assert isinstance(config, dict), \ + "please list clients to run on" + radosbench = {} + + testdir = teuthology.get_testdir(ctx) + manager = ctx.managers['ceph'] + runtype = config.get('type', 'write') + + create_pool = config.get('create_pool', True) + for role in config.get( + 'clients', + list(map(lambda x: 'client.' + x, + teuthology.all_roles_of_type(ctx.cluster, 'client')))): + assert isinstance(role, str) + (_, id_) = role.split('.', 1) + (remote,) = ctx.cluster.only(role).remotes.keys() + + if config.get('ec_pool', False): + profile = config.get('erasure_code_profile', {}) + profile_name = profile.get('name', 'teuthologyprofile') + manager.create_erasure_code_profile(profile_name, profile) + else: + profile_name = None + + cleanup = [] + if not config.get('cleanup', True): + cleanup = ['--no-cleanup'] + write_to_omap = [] + if config.get('write-omap', False): + write_to_omap = ['--write-omap'] + log.info('omap writes') + + pool = config.get('pool', 'data') + if create_pool: + if pool != 'data': + manager.create_pool(pool, erasure_code_profile_name=profile_name) + else: + pool = manager.create_pool_with_unique_name(erasure_code_profile_name=profile_name) + + concurrency = config.get('concurrency', 16) + osize = config.get('objectsize', 65536) + if osize == 0: + objectsize = [] + else: + objectsize = ['--object-size', str(osize)] + size = ['-b', str(config.get('size', 65536))] + # If doing a reading run then populate data + if runtype != "write": + proc = remote.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', role] + + ['-t', str(concurrency)] + + size + objectsize + + ['-p' , pool, + 'bench', str(60), "write", "--no-cleanup" + ]).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id=id_)), + wait=True + ) + size = [] + objectsize = [] + + proc = remote.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', role] + + size + objectsize + + ['-p' , pool, + 'bench', str(config.get('time', 360)), runtype, + ] + write_to_omap + cleanup).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id=id_)), + stdin=run.PIPE, + wait=False + ) + radosbench[id_] = proc + + try: + yield + finally: + timeout = config.get('time', 360) * 30 + 300 + log.info('joining radosbench (timing out after %ss)', timeout) + run.wait(radosbench.values(), timeout=timeout) + + if pool != 'data' and create_pool: + manager.remove_pool(pool) diff --git a/qa/tasks/radosbenchsweep.py b/qa/tasks/radosbenchsweep.py new file mode 100644 index 000000000..df0ba1ed1 --- /dev/null +++ b/qa/tasks/radosbenchsweep.py @@ -0,0 +1,222 @@ +""" +Rados benchmarking sweep +""" +import contextlib +import logging +import re + +from io import BytesIO +from itertools import product + +from teuthology.orchestra import run +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Execute a radosbench parameter sweep + + Puts radosbench in a loop, taking values from the given config at each + iteration. If given, the min and max values below create a range, e.g. + min_replicas=1 and max_replicas=3 implies executing with 1-3 replicas. + + Parameters: + + clients: [client list] + time: seconds to run (default=120) + sizes: [list of object sizes] (default=[4M]) + mode: <write|read|seq> (default=write) + repetitions: execute the same configuration multiple times (default=1) + min_num_replicas: minimum number of replicas to use (default = 3) + max_num_replicas: maximum number of replicas to use (default = 3) + min_num_osds: the minimum number of OSDs in a pool (default=all) + max_num_osds: the maximum number of OSDs in a pool (default=all) + file: name of CSV-formatted output file (default='radosbench.csv') + columns: columns to include (default=all) + - rep: execution number (takes values from 'repetitions') + - num_osd: number of osds for pool + - num_replica: number of replicas + - avg_throughput: throughput + - avg_latency: latency + - stdev_throughput: + - stdev_latency: + + Example: + - radsobenchsweep: + columns: [rep, num_osd, num_replica, avg_throughput, stdev_throughput] + """ + log.info('Beginning radosbenchsweep...') + assert isinstance(config, dict), 'expecting dictionary for configuration' + + # get and validate config values + # { + + # only one client supported for now + if len(config.get('clients', [])) != 1: + raise Exception("Only one client can be specified") + + # only write mode + if config.get('mode', 'write') != 'write': + raise Exception("Only 'write' mode supported for now.") + + # OSDs + total_osds_in_cluster = teuthology.num_instances_of_type(ctx.cluster, 'osd') + min_num_osds = config.get('min_num_osds', total_osds_in_cluster) + max_num_osds = config.get('max_num_osds', total_osds_in_cluster) + + if max_num_osds > total_osds_in_cluster: + raise Exception('max_num_osds cannot be greater than total in cluster') + if min_num_osds < 1: + raise Exception('min_num_osds cannot be less than 1') + if min_num_osds > max_num_osds: + raise Exception('min_num_osds cannot be greater than max_num_osd') + osds = range(0, (total_osds_in_cluster + 1)) + + # replicas + min_num_replicas = config.get('min_num_replicas', 3) + max_num_replicas = config.get('max_num_replicas', 3) + + if min_num_replicas < 1: + raise Exception('min_num_replicas cannot be less than 1') + if min_num_replicas > max_num_replicas: + raise Exception('min_num_replicas cannot be greater than max_replicas') + if max_num_replicas > max_num_osds: + raise Exception('max_num_replicas cannot be greater than max_num_osds') + replicas = range(min_num_replicas, (max_num_replicas + 1)) + + # object size + sizes = config.get('size', [4 << 20]) + + # repetitions + reps = range(config.get('repetitions', 1)) + + # file + fname = config.get('file', 'radosbench.csv') + f = open('{}/{}'.format(ctx.archive, fname), 'w') + f.write(get_csv_header(config) + '\n') + # } + + # set default pools size=1 to avoid 'unhealthy' issues + ctx.manager.set_pool_property('data', 'size', 1) + ctx.manager.set_pool_property('metadata', 'size', 1) + ctx.manager.set_pool_property('rbd', 'size', 1) + + current_osds_out = 0 + + # sweep through all parameters + for osds_out, size, replica, rep in product(osds, sizes, replicas, reps): + + osds_in = total_osds_in_cluster - osds_out + + if osds_in == 0: + # we're done + break + + if current_osds_out != osds_out: + # take an osd out + ctx.manager.raw_cluster_cmd( + 'osd', 'reweight', str(osds_out-1), '0.0') + wait_until_healthy(ctx, config) + current_osds_out = osds_out + + if osds_in not in range(min_num_osds, (max_num_osds + 1)): + # no need to execute with a number of osds that wasn't requested + continue + + if osds_in < replica: + # cannot execute with more replicas than available osds + continue + + run_radosbench(ctx, config, f, osds_in, size, replica, rep) + + f.close() + + yield + + +def get_csv_header(conf): + all_columns = [ + 'rep', 'num_osd', 'num_replica', 'avg_throughput', + 'avg_latency', 'stdev_throughput', 'stdev_latency' + ] + given_columns = conf.get('columns', None) + if given_columns and len(given_columns) != 0: + for column in given_columns: + if column not in all_columns: + raise Exception('Unknown column ' + column) + return ','.join(conf['columns']) + else: + conf['columns'] = all_columns + return ','.join(all_columns) + + +def run_radosbench(ctx, config, f, num_osds, size, replica, rep): + pool = ctx.manager.create_pool_with_unique_name() + + ctx.manager.set_pool_property(pool, 'size', replica) + + wait_until_healthy(ctx, config) + + log.info('Executing with parameters: ') + log.info(' num_osd =' + str(num_osds)) + log.info(' size =' + str(size)) + log.info(' num_replicas =' + str(replica)) + log.info(' repetition =' + str(rep)) + + for role in config.get('clients', ['client.0']): + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.keys() + + proc = remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{}/archive/coverage'.format(teuthology.get_testdir(ctx)), + 'rados', + '--no-log-to-stderr', + '--name', role, + '-b', str(size), + '-p', pool, + 'bench', str(config.get('time', 120)), 'write', + ], + logger=log.getChild('radosbench.{id}'.format(id=id_)), + stdin=run.PIPE, + stdout=BytesIO(), + wait=False + ) + + # parse output to get summary and format it as CSV + proc.wait() + out = proc.stdout.getvalue() + all_values = { + 'stdev_throughput': re.sub(r'Stddev Bandwidth: ', '', re.search( + r'Stddev Bandwidth:.*', out).group(0)), + 'stdev_latency': re.sub(r'Stddev Latency: ', '', re.search( + r'Stddev Latency:.*', out).group(0)), + 'avg_throughput': re.sub(r'Bandwidth \(MB/sec\): ', '', re.search( + r'Bandwidth \(MB/sec\):.*', out).group(0)), + 'avg_latency': re.sub(r'Average Latency: ', '', re.search( + r'Average Latency:.*', out).group(0)), + 'rep': str(rep), + 'num_osd': str(num_osds), + 'num_replica': str(replica) + } + values_to_write = [] + for column in config['columns']: + values_to_write.extend([all_values[column]]) + f.write(','.join(values_to_write) + '\n') + + ctx.manager.remove_pool(pool) + + +def wait_until_healthy(ctx, config): + first_mon = teuthology.get_first_mon(ctx, config) + (mon_remote,) = ctx.cluster.only(first_mon).remotes.keys() + teuthology.wait_until_healthy(ctx, mon_remote) diff --git a/qa/tasks/radosgw_admin.py b/qa/tasks/radosgw_admin.py new file mode 100644 index 000000000..780dae1e1 --- /dev/null +++ b/qa/tasks/radosgw_admin.py @@ -0,0 +1,1148 @@ +""" +Rgw admin testing against a running instance +""" +# The test cases in this file have been annotated for inventory. +# To extract the inventory (in csv format) use the command: +# +# grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' +# +# to run this standalone: +# python qa/tasks/radosgw_admin.py [--user=uid] --host=host --port=port +# + +import json +import logging +import time +import datetime +import sys + +from io import StringIO +from queue import Queue + +import boto.exception +import boto.s3.connection +import boto.s3.acl + +import httplib2 + +#import pdb + +import tasks.vstart_runner +from tasks.rgw import RGWEndpoint +from tasks.util.rgw import rgwadmin as tasks_util_rgw_rgwadmin +from tasks.util.rgw import get_user_summary, get_user_successful_ops + +log = logging.getLogger(__name__) + +def rgwadmin(*args, **kwargs): + ctx = args[0] + # Is this a local runner? + omit_sudo = hasattr(ctx.rgw, 'omit_sudo') and ctx.rgw.omit_sudo == True + omit_tdir = hasattr(ctx.rgw, 'omit_tdir') and ctx.rgw.omit_tdir == True + return tasks_util_rgw_rgwadmin(*args, **kwargs, omit_sudo=omit_sudo, omit_tdir=omit_tdir) + +def usage_acc_findentry2(entries, user, add=True): + for e in entries: + if e['user'] == user: + return e + if not add: + return None + e = {'user': user, 'buckets': []} + entries.append(e) + return e +def usage_acc_findsum2(summaries, user, add=True): + for e in summaries: + if e['user'] == user: + return e + if not add: + return None + e = {'user': user, 'categories': [], + 'total': {'bytes_received': 0, + 'bytes_sent': 0, 'ops': 0, 'successful_ops': 0 }} + summaries.append(e) + return e +def usage_acc_update2(x, out, b_in, err): + x['bytes_sent'] += b_in + x['bytes_received'] += out + x['ops'] += 1 + if not err: + x['successful_ops'] += 1 +def usage_acc_validate_fields(r, x, x2, what): + q=[] + for field in ['bytes_sent', 'bytes_received', 'ops', 'successful_ops']: + try: + if x2[field] < x[field]: + q.append("field %s: %d < %d" % (field, x2[field], x[field])) + except Exception as ex: + r.append( "missing/bad field " + field + " in " + what + " " + str(ex)) + return + if len(q) > 0: + r.append("incomplete counts in " + what + ": " + ", ".join(q)) +class usage_acc: + def __init__(self): + self.results = {'entries': [], 'summary': []} + def findentry(self, user): + return usage_acc_findentry2(self.results['entries'], user) + def findsum(self, user): + return usage_acc_findsum2(self.results['summary'], user) + def e2b(self, e, bucket, add=True): + for b in e['buckets']: + if b['bucket'] == bucket: + return b + if not add: + return None + b = {'bucket': bucket, 'categories': []} + e['buckets'].append(b) + return b + def c2x(self, c, cat, add=True): + for x in c: + if x['category'] == cat: + return x + if not add: + return None + x = {'bytes_received': 0, 'category': cat, + 'bytes_sent': 0, 'ops': 0, 'successful_ops': 0 } + c.append(x) + return x + def update(self, c, cat, user, out, b_in, err): + x = self.c2x(c, cat) + usage_acc_update2(x, out, b_in, err) + if not err and cat == 'create_bucket' and 'owner' not in x: + x['owner'] = user + def make_entry(self, cat, bucket, user, out, b_in, err): + if cat == 'create_bucket' and err: + return + e = self.findentry(user) + b = self.e2b(e, bucket) + self.update(b['categories'], cat, user, out, b_in, err) + s = self.findsum(user) + x = self.c2x(s['categories'], cat) + usage_acc_update2(x, out, b_in, err) + x = s['total'] + usage_acc_update2(x, out, b_in, err) + def generate_make_entry(self): + return lambda cat,bucket,user,out,b_in,err: self.make_entry(cat, bucket, user, out, b_in, err) + def get_usage(self): + return self.results + def compare_results(self, results): + if 'entries' not in results or 'summary' not in results: + return ['Missing entries or summary'] + r = [] + for e in self.results['entries']: + try: + e2 = usage_acc_findentry2(results['entries'], e['user'], False) + except Exception as ex: + r.append("malformed entry looking for user " + + e['user'] + " " + str(ex)) + break + if e2 == None: + r.append("missing entry for user " + e['user']) + continue + for b in e['buckets']: + c = b['categories'] + if b['bucket'] == 'nosuchbucket': + print("got here") + try: + b2 = self.e2b(e2, b['bucket'], False) + if b2 != None: + c2 = b2['categories'] + except Exception as ex: + r.append("malformed entry looking for bucket " + + b['bucket'] + " in user " + e['user'] + " " + str(ex)) + break + if b2 == None: + r.append("can't find bucket " + b['bucket'] + + " in user " + e['user']) + continue + for x in c: + try: + x2 = self.c2x(c2, x['category'], False) + except Exception as ex: + r.append("malformed entry looking for " + + x['category'] + " in bucket " + b['bucket'] + + " user " + e['user'] + " " + str(ex)) + break + usage_acc_validate_fields(r, x, x2, "entry: category " + + x['category'] + " bucket " + b['bucket'] + + " in user " + e['user']) + for s in self.results['summary']: + c = s['categories'] + try: + s2 = usage_acc_findsum2(results['summary'], s['user'], False) + except Exception as ex: + r.append("malformed summary looking for user " + e['user'] + + " " + str(ex)) + break + if s2 == None: + r.append("missing summary for user " + e['user'] + " " + str(ex)) + continue + try: + c2 = s2['categories'] + except Exception as ex: + r.append("malformed summary missing categories for user " + + e['user'] + " " + str(ex)) + break + for x in c: + try: + x2 = self.c2x(c2, x['category'], False) + except Exception as ex: + r.append("malformed summary looking for " + + x['category'] + " user " + e['user'] + " " + str(ex)) + break + usage_acc_validate_fields(r, x, x2, "summary: category " + + x['category'] + " in user " + e['user']) + x = s['total'] + try: + x2 = s2['total'] + except Exception as ex: + r.append("malformed summary looking for totals for user " + + e['user'] + " " + str(ex)) + break + usage_acc_validate_fields(r, x, x2, "summary: totals for user" + e['user']) + return r + +def ignore_this_entry(cat, bucket, user, out, b_in, err): + pass +class requestlog_queue(): + def __init__(self, add): + self.q = Queue(1000) + self.adder = add + def handle_request_data(self, request, response, error=False): + now = datetime.datetime.now() + if error: + pass + elif response.status < 200 or response.status >= 400: + error = True + self.q.put({'t': now, 'o': request, 'i': response, 'e': error}) + def clear(self): + with self.q.mutex: + self.q.queue.clear() + def log_and_clear(self, cat, bucket, user, add_entry = None): + while not self.q.empty(): + j = self.q.get() + bytes_out = 0 + if 'Content-Length' in j['o'].headers: + bytes_out = int(j['o'].headers['Content-Length']) + bytes_in = 0 + msg = j['i'].msg + if 'content-length'in msg: + bytes_in = int(msg['content-length']) + log.info('RL: %s %s %s bytes_out=%d bytes_in=%d failed=%r' + % (cat, bucket, user, bytes_out, bytes_in, j['e'])) + if add_entry == None: + add_entry = self.adder + add_entry(cat, bucket, user, bytes_out, bytes_in, j['e']) + +def create_presigned_url(conn, method, bucket_name, key_name, expiration): + return conn.generate_url(expires_in=expiration, + method=method, + bucket=bucket_name, + key=key_name, + query_auth=True, + ) + +def send_raw_http_request(conn, method, bucket_name, key_name, follow_redirects = False): + url = create_presigned_url(conn, method, bucket_name, key_name, 3600) + print(url) + h = httplib2.Http() + h.follow_redirects = follow_redirects + return h.request(url, method) + + +def get_acl(key): + """ + Helper function to get the xml acl from a key, ensuring that the xml + version tag is removed from the acl response + """ + raw_acl = key.get_xml_acl().decode() + + def remove_version(string): + return string.split( + '<?xml version="1.0" encoding="UTF-8"?>' + )[-1] + + def remove_newlines(string): + return string.strip('\n') + + return remove_version( + remove_newlines(raw_acl) + ) + +def cleanup(ctx, client): + # remove objects and buckets + (err, out) = rgwadmin(ctx, client, ['bucket', 'list'], check_status=True) + try: + for bucket in out: + (err, out) = rgwadmin(ctx, client, [ + 'bucket', 'rm', '--bucket', bucket, '--purge-objects'], + check_status=True) + except: + pass + + # remove test user(s) + users = ['foo', 'fud', 'bar', 'bud'] + users.reverse() + for user in users: + try: + (err, out) = rgwadmin(ctx, client, [ + 'user', 'rm', '--uid', user], + check_status=True) + except: + pass + + # remove custom placement + try: + zonecmd = ['zone', 'placement', 'rm', '--rgw-zone', 'default', + '--placement-id', 'new-placement'] + (err, out) = rgwadmin(ctx, client, zonecmd, check_status=True) + except: + pass + +def task(ctx, config): + """ + Test radosgw-admin functionality against a running rgw instance. + """ + global log + + assert ctx.rgw.config, \ + "radosgw_admin task needs a config passed from the rgw task" + config = ctx.rgw.config + log.debug('config is: %r', config) + + clients_from_config = config.keys() + + # choose first client as default + client = next(iter(clients_from_config)) + + # once the client is chosen, pull the host name and assigned port out of + # the role_endpoints that were assigned by the rgw task + endpoint = ctx.rgw.role_endpoints[client] + + cleanup(ctx, client) + + ## + user1='foo' + user2='fud' + user3='bar' + user4='bud' + subuser1='foo:foo1' + subuser2='foo:foo2' + display_name1='Foo' + display_name2='Fud' + display_name3='Bar' + email='foo@foo.com' + access_key='9te6NH5mcdcq0Tc5i8i1' + secret_key='Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu' + access_key2='p5YnriCv1nAtykxBrupQ' + secret_key2='Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh' + access_key3='NX5QOQKC6BH2IDN8HC7A' + secret_key3='LnEsqNNqZIpkzauboDcLXLcYaWwLQ3Kop0zAnKIn' + swift_secret1='gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL' + swift_secret2='ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy' + + bucket_name='myfoo' + bucket_name2='mybar' + + # connect to rgw + connection = boto.s3.connection.S3Connection( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + is_secure=False, + port=endpoint.port, + host=endpoint.hostname, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + connection.auth_region_name='us-east-1' + + connection2 = boto.s3.connection.S3Connection( + aws_access_key_id=access_key2, + aws_secret_access_key=secret_key2, + is_secure=False, + port=endpoint.port, + host=endpoint.hostname, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + connection2.auth_region_name='us-east-1' + + connection3 = boto.s3.connection.S3Connection( + aws_access_key_id=access_key3, + aws_secret_access_key=secret_key3, + is_secure=False, + port=endpoint.port, + host=endpoint.hostname, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + connection3.auth_region_name='us-east-1' + + acc = usage_acc() + rl = requestlog_queue(acc.generate_make_entry()) + connection.set_request_hook(rl) + connection2.set_request_hook(rl) + connection3.set_request_hook(rl) + + # legend (test cases can be easily grep-ed out) + # TESTCASE 'testname','object','method','operation','assertion' + + # TESTCASE 'usage-show0' 'usage' 'show' 'all usage' 'succeeds' + (err, summary0) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True) + + # TESTCASE 'info-nosuch','user','info','non-existent user','fails' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) + assert err + + # TESTCASE 'create-ok','user','create','w/all valid info','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user1, + '--display-name', display_name1, + '--email', email, + '--access-key', access_key, + '--secret', secret_key, + '--max-buckets', '4' + ], + check_status=True) + + # TESTCASE 'duplicate email','user','create','existing user email','fails' + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user2, + '--display-name', display_name2, + '--email', email, + ]) + assert err + + # TESTCASE 'info-existing','user','info','existing user','returns correct info' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert out['user_id'] == user1 + assert out['email'] == email + assert out['display_name'] == display_name1 + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + assert not out['suspended'] + + # TESTCASE 'suspend-ok','user','suspend','active user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1], + check_status=True) + + # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert out['suspended'] + + # TESTCASE 're-enable','user','enable','suspended user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], check_status=True) + + # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert not out['suspended'] + + # TESTCASE 'add-keys','key','create','w/valid info','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'key', 'create', '--uid', user1, + '--access-key', access_key2, '--secret', secret_key2, + ], check_status=True) + + # TESTCASE 'info-new-key','user','info','after key addition','returns all keys' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], + check_status=True) + assert len(out['keys']) == 2 + assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2 + assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2 + + # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed' + (err, out) = rgwadmin(ctx, client, [ + 'key', 'rm', '--uid', user1, + '--access-key', access_key2, + ], check_status=True) + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + + # TESTCASE 'add-swift-key','key','create','swift key','succeeds' + subuser_access = 'full' + subuser_perm = 'full-control' + + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'create', '--subuser', subuser1, + '--access', subuser_access + ], check_status=True) + + # TESTCASE 'add-swift-key','key','create','swift key','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'modify', '--subuser', subuser1, + '--secret', swift_secret1, + '--key-type', 'swift', + ], check_status=True) + + # TESTCASE 'subuser-perm-mask', 'subuser', 'info', 'test subuser perm mask durability', 'succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) + + assert out['subusers'][0]['permissions'] == subuser_perm + + # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert len(out['swift_keys']) == 1 + assert out['swift_keys'][0]['user'] == subuser1 + assert out['swift_keys'][0]['secret_key'] == swift_secret1 + + # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'create', '--subuser', subuser2, + '--secret', swift_secret2, + '--key-type', 'swift', + ], check_status=True) + + # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1], check_status=True) + assert len(out['swift_keys']) == 2 + assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2 + assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2 + + # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed' + (err, out) = rgwadmin(ctx, client, [ + 'key', 'rm', '--subuser', subuser1, + '--key-type', 'swift', + ], check_status=True) + assert len(out['swift_keys']) == 1 + + # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'rm', '--subuser', subuser1, + ], check_status=True) + assert len(out['subusers']) == 1 + + # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed' + (err, out) = rgwadmin(ctx, client, [ + 'subuser', 'rm', '--subuser', subuser2, + '--key-type', 'swift', '--purge-keys', + ], check_status=True) + assert len(out['swift_keys']) == 0 + assert len(out['subusers']) == 0 + + # TESTCASE 'bucket-stats','bucket','stats','no session/buckets','succeeds, empty list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], + check_status=True) + assert len(out) == 0 + + # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True) + assert len(out) == 0 + + # create a first bucket + bucket = connection.create_bucket(bucket_name) + + rl.log_and_clear("create_bucket", bucket_name, user1) + + # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'list', '--uid', user1], check_status=True) + assert len(out) == 1 + assert out[0] == bucket_name + + bucket_list = connection.get_all_buckets() + assert len(bucket_list) == 1 + assert bucket_list[0].name == bucket_name + + rl.log_and_clear("list_buckets", '', user1) + + # TESTCASE 'bucket-list-all','bucket','list','all buckets','succeeds, expected list' + (err, out) = rgwadmin(ctx, client, ['bucket', 'list'], check_status=True) + assert len(out) >= 1 + assert bucket_name in out; + + # TESTCASE 'max-bucket-limit,'bucket','create','4 buckets','5th bucket fails due to max buckets == 4' + bucket2 = connection.create_bucket(bucket_name + '2') + rl.log_and_clear("create_bucket", bucket_name + '2', user1) + bucket3 = connection.create_bucket(bucket_name + '3') + rl.log_and_clear("create_bucket", bucket_name + '3', user1) + bucket4 = connection.create_bucket(bucket_name + '4') + rl.log_and_clear("create_bucket", bucket_name + '4', user1) + # the 5th should fail. + failed = False + try: + connection.create_bucket(bucket_name + '5') + except Exception: + failed = True + assert failed + rl.log_and_clear("create_bucket", bucket_name + '5', user1) + + # delete the buckets + bucket2.delete() + rl.log_and_clear("delete_bucket", bucket_name + '2', user1) + bucket3.delete() + rl.log_and_clear("delete_bucket", bucket_name + '3', user1) + bucket4.delete() + rl.log_and_clear("delete_bucket", bucket_name + '4', user1) + + # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list' + (err, out) = rgwadmin(ctx, client, [ + 'bucket', 'stats', '--bucket', bucket_name], check_status=True) + assert out['owner'] == user1 + bucket_id = out['id'] + + # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID' + (err, out) = rgwadmin(ctx, client, ['bucket', 'stats', '--uid', user1], check_status=True) + assert len(out) == 1 + assert out[0]['id'] == bucket_id # does it return the same ID twice in a row? + + # use some space + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('one') + rl.log_and_clear("put_obj", bucket_name, user1) + + # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object' + (err, out) = rgwadmin(ctx, client, [ + 'bucket', 'stats', '--bucket', bucket_name], check_status=True) + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 1 + assert out['usage']['rgw.main']['size_kb'] > 0 + + #validate we have a positive user stats now + (err, out) = rgwadmin(ctx, client, + ['user', 'stats','--uid', user1, '--sync-stats'], + check_status=True) + assert out['stats']['size'] > 0 + + # reclaim it + key.delete() + rl.log_and_clear("delete_obj", bucket_name, user1) + + # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error' + (err, out) = rgwadmin(ctx, client, + ['bucket', 'unlink', '--uid', user1, '--bucket', bucket_name], + check_status=True) + + # create a second user to link the bucket to + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user2, + '--display-name', display_name2, + '--access-key', access_key2, + '--secret', secret_key2, + '--max-buckets', '1', + ], + check_status=True) + + # try creating an object with the first user before the bucket is relinked + denied = False + key = boto.s3.key.Key(bucket) + + try: + key.set_contents_from_string('two') + except boto.exception.S3ResponseError: + denied = True + + assert not denied + rl.log_and_clear("put_obj", bucket_name, user1) + + # delete the object + key.delete() + rl.log_and_clear("delete_obj", bucket_name, user1) + + # link the bucket to another user + (err, out) = rgwadmin(ctx, client, ['metadata', 'get', 'bucket:{n}'.format(n=bucket_name)], + check_status=True) + + bucket_data = out['data'] + assert bucket_data['bucket']['name'] == bucket_name + + bucket_id = bucket_data['bucket']['bucket_id'] + + # link the bucket to another user + (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--uid', user2, '--bucket', bucket_name, '--bucket-id', bucket_id], + check_status=True) + + # try to remove user, should fail (has a linked bucket) + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2]) + assert err + + # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'succeeds, bucket unlinked' + (err, out) = rgwadmin(ctx, client, ['bucket', 'unlink', '--uid', user2, '--bucket', bucket_name], + check_status=True) + + # relink the bucket to the first user and delete the second user + (err, out) = rgwadmin(ctx, client, + ['bucket', 'link', '--uid', user1, '--bucket', bucket_name, '--bucket-id', bucket_id], + check_status=True) + + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user2], + check_status=True) + + #TESTCASE 'bucket link', 'bucket', 'tenanted user', 'succeeds' + tenant_name = "testx" + # create a tenanted user to link the bucket to + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--tenant', tenant_name, + '--uid', 'tenanteduser', + '--display-name', 'tenanted-user', + '--access-key', access_key2, + '--secret', secret_key2, + '--max-buckets', '1', + ], + check_status=True) + + # link the bucket to a tenanted user + (err, out) = rgwadmin(ctx, client, ['bucket', 'link', '--bucket', '/' + bucket_name, '--tenant', tenant_name, '--uid', 'tenanteduser'], + check_status=True) + + # check if the bucket name has tenant/ prefix + (err, out) = rgwadmin(ctx, client, ['metadata', 'get', 'bucket:{n}'.format(n= tenant_name + '/' + bucket_name)], + check_status=True) + + bucket_data = out['data'] + assert bucket_data['bucket']['name'] == bucket_name + assert bucket_data['bucket']['tenant'] == tenant_name + + # relink the bucket to the first user and delete the tenanted user + (err, out) = rgwadmin(ctx, client, + ['bucket', 'link', '--bucket', tenant_name + '/' + bucket_name, '--uid', user1], + check_status=True) + + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--tenant', tenant_name, '--uid', 'tenanteduser'], + check_status=True) + + # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed' + + # upload an object + object_name = 'four' + key = boto.s3.key.Key(bucket, object_name) + key.set_contents_from_string(object_name) + rl.log_and_clear("put_obj", bucket_name, user1) + + # fetch it too (for usage stats presently) + s = key.get_contents_as_string(encoding='ascii') + rl.log_and_clear("get_obj", bucket_name, user1) + assert s == object_name + # list bucket too (for usage stats presently) + keys = list(bucket.list()) + rl.log_and_clear("list_bucket", bucket_name, user1) + assert len(keys) == 1 + assert keys[0].name == object_name + + # now delete it + (err, out) = rgwadmin(ctx, client, + ['object', 'rm', '--bucket', bucket_name, '--object', object_name], + check_status=True) + + # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects' + (err, out) = rgwadmin(ctx, client, [ + 'bucket', 'stats', '--bucket', bucket_name], + check_status=True) + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 0 + + # list log objects + # TESTCASE 'log-list','log','list','after activity','succeeds, lists one no objects' + (err, out) = rgwadmin(ctx, client, ['log', 'list'], check_status=True) + assert len(out) > 0 + + for obj in out: + # TESTCASE 'log-show','log','show','after activity','returns expected info' + if obj[:4] == 'meta' or obj[:4] == 'data' or obj[:18] == 'obj_delete_at_hint': + continue + + (err, rgwlog) = rgwadmin(ctx, client, ['log', 'show', '--object', obj], + check_status=True) + assert len(rgwlog) > 0 + + # skip any entry for which there is no bucket name--e.g., list_buckets, + # since that is valid but cannot pass the following checks + entry_bucket_name = rgwlog['bucket'] + if entry_bucket_name.strip() != "": + # exempt bucket_name2 from checking as it was only used for multi-region tests + assert rgwlog['bucket'].find(bucket_name) == 0 or rgwlog['bucket'].find(bucket_name2) == 0 + assert rgwlog['bucket'] != bucket_name or rgwlog['bucket_id'] == bucket_id + assert rgwlog['bucket_owner'] == user1 or rgwlog['bucket'] == bucket_name + '5' or rgwlog['bucket'] == bucket_name2 + for entry in rgwlog['log_entries']: + log.debug('checking log entry: ', entry) + assert entry['bucket'] == rgwlog['bucket'] + possible_buckets = [bucket_name + '5', bucket_name2] + user = entry['user'] + assert user == user1 or user.endswith('system-user') or \ + rgwlog['bucket'] in possible_buckets + + # TESTCASE 'log-rm','log','rm','delete log objects','succeeds' + (err, out) = rgwadmin(ctx, client, ['log', 'rm', '--object', obj], + check_status=True) + + # TODO: show log by bucket+date + + # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'suspend', '--uid', user1], + check_status=True) + + # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects' + denied = False + try: + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('five') + except boto.exception.S3ResponseError as e: + denied = True + assert e.status == 403 + + assert denied + rl.log_and_clear("put_obj", bucket_name, user1) + + # TESTCASE 'user-renable2','user','enable','suspended user','succeeds' + (err, out) = rgwadmin(ctx, client, ['user', 'enable', '--uid', user1], + check_status=True) + + # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects' + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('six') + rl.log_and_clear("put_obj", bucket_name, user1) + + # TESTCASE 'gc-list', 'gc', 'list', 'get list of objects ready for garbage collection' + + # create an object large enough to be split into multiple parts + test_string = 'foo'*10000000 + + big_key = boto.s3.key.Key(bucket) + big_key.set_contents_from_string(test_string) + rl.log_and_clear("put_obj", bucket_name, user1) + + # now delete the head + big_key.delete() + rl.log_and_clear("delete_obj", bucket_name, user1) + + # wait a bit to give the garbage collector time to cycle + time.sleep(15) + + (err, out) = rgwadmin(ctx, client, ['gc', 'list', '--include-all']) + assert len(out) > 0 + + # TESTCASE 'gc-process', 'gc', 'process', 'manually collect garbage' + (err, out) = rgwadmin(ctx, client, ['gc', 'process'], check_status=True) + + #confirm + (err, out) = rgwadmin(ctx, client, ['gc', 'list', '--include-all']) + + # don't assume rgw_gc_obj_min_wait has been overridden + omit_tdir = hasattr(ctx.rgw, 'omit_tdir') and ctx.rgw.omit_tdir == True + if omit_tdir==False: + assert len(out) == 0 + + # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets' + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1]) + assert err + + # delete should fail because ``key`` still exists + try: + bucket.delete() + except boto.exception.S3ResponseError as e: + assert e.status == 409 + rl.log_and_clear("delete_bucket", bucket_name, user1) + + key.delete() + rl.log_and_clear("delete_obj", bucket_name, user1) + bucket.delete() + rl.log_and_clear("delete_bucket", bucket_name, user1) + + # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy' + bucket = connection.create_bucket(bucket_name) + rl.log_and_clear("create_bucket", bucket_name, user1) + + # create an object + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('seven') + rl.log_and_clear("put_obj", bucket_name, user1) + + # should be private already but guarantee it + key.set_acl('private') + rl.log_and_clear("put_acls", bucket_name, user1) + + (err, out) = rgwadmin(ctx, client, + ['policy', '--bucket', bucket.name, '--object', key.key.decode()], + check_status=True, format='xml') + + acl = get_acl(key) + rl.log_and_clear("get_acls", bucket_name, user1) + + assert acl == out.strip('\n') + + # add another grantee by making the object public read + key.set_acl('public-read') + rl.log_and_clear("put_acls", bucket_name, user1) + + (err, out) = rgwadmin(ctx, client, + ['policy', '--bucket', bucket.name, '--object', key.key.decode()], + check_status=True, format='xml') + + acl = get_acl(key) + rl.log_and_clear("get_acls", bucket_name, user1) + + assert acl == out.strip('\n') + + # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds' + bucket = connection.create_bucket(bucket_name) + rl.log_and_clear("create_bucket", bucket_name, user1) + key_name = ['eight', 'nine', 'ten', 'eleven'] + for i in range(4): + key = boto.s3.key.Key(bucket) + key.set_contents_from_string(key_name[i]) + rl.log_and_clear("put_obj", bucket_name, user1) + + (err, out) = rgwadmin(ctx, client, + ['bucket', 'rm', '--bucket', bucket_name, '--purge-objects'], + check_status=True) + + # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds' + caps='user=read' + (err, out) = rgwadmin(ctx, client, ['caps', 'add', '--uid', user1, '--caps', caps]) + + assert out['caps'][0]['perm'] == 'read' + + # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds' + (err, out) = rgwadmin(ctx, client, ['caps', 'rm', '--uid', user1, '--caps', caps]) + + assert not out['caps'] + + # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets' + bucket = connection.create_bucket(bucket_name) + rl.log_and_clear("create_bucket", bucket_name, user1) + key = boto.s3.key.Key(bucket) + + (err, out) = rgwadmin(ctx, client, ['user', 'rm', '--uid', user1]) + assert err + + # TESTCASE 'rm-user2', 'user', 'rm', 'user with data', 'succeeds' + bucket = connection.create_bucket(bucket_name) + rl.log_and_clear("create_bucket", bucket_name, user1) + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('twelve') + rl.log_and_clear("put_obj", bucket_name, user1) + + time.sleep(35) + + # need to wait for all usage data to get flushed, should take up to 30 seconds + timestamp = time.time() + while time.time() - timestamp <= (2 * 60): # wait up to 20 minutes + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--categories', 'delete_obj']) # one of the operations we did is delete_obj, should be present. + if get_user_successful_ops(out, user1) > 0: + break + time.sleep(1) + + assert time.time() - timestamp <= (20 * 60) + + # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds' + (err, out) = rgwadmin(ctx, client, ['usage', 'show'], check_status=True) + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + + r = acc.compare_results(out) + if len(r) != 0: + sys.stderr.write(("\n".join(r))+"\n") + assert(len(r) == 0) + + user_summary = get_user_summary(out, user1) + + total = user_summary['total'] + assert total['successful_ops'] > 0 + + # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds' + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1], + check_status=True) + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + for entry in user_summary['categories']: + assert entry['successful_ops'] > 0 + assert user_summary['user'] == user1 + + # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds' + test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket'] + for cat in test_categories: + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1, '--categories', cat], + check_status=True) + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + assert user_summary['user'] == user1 + assert len(user_summary['categories']) == 1 + entry = user_summary['categories'][0] + assert entry['category'] == cat + assert entry['successful_ops'] > 0 + + # TESTCASE 'user-rename', 'user', 'rename', 'existing user', 'new user', 'succeeds' + # create a new user user3 + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user3, + '--display-name', display_name3, + '--access-key', access_key3, + '--secret', secret_key3, + '--max-buckets', '4' + ], + check_status=True) + + # create a bucket + bucket = connection3.create_bucket(bucket_name + '6') + + rl.log_and_clear("create_bucket", bucket_name + '6', user3) + + # create object + object_name1 = 'thirteen' + key1 = boto.s3.key.Key(bucket, object_name1) + key1.set_contents_from_string(object_name1) + rl.log_and_clear("put_obj", bucket_name + '6', user3) + + # rename user3 + (err, out) = rgwadmin(ctx, client, ['user', 'rename', '--uid', user3, '--new-uid', user4], check_status=True) + assert out['user_id'] == user4 + assert out['keys'][0]['access_key'] == access_key3 + assert out['keys'][0]['secret_key'] == secret_key3 + + time.sleep(5) + + # get bucket and object to test if user keys are preserved + bucket = connection3.get_bucket(bucket_name + '6') + s = key1.get_contents_as_string(encoding='ascii') + rl.log_and_clear("get_obj", bucket_name + '6', user4) + assert s == object_name1 + + # TESTCASE 'user-rename', 'user', 'rename', 'existing user', 'another existing user', 'fails' + # create a new user user2 + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', user2, + '--display-name', display_name2, + '--access-key', access_key2, + '--secret', secret_key2, + '--max-buckets', '4' + ], + check_status=True) + + # create a bucket + bucket = connection2.create_bucket(bucket_name + '7') + + rl.log_and_clear("create_bucket", bucket_name + '7', user2) + + # create object + object_name2 = 'fourteen' + key2 = boto.s3.key.Key(bucket, object_name2) + key2.set_contents_from_string(object_name2) + rl.log_and_clear("put_obj", bucket_name + '7', user2) + + (err, out) = rgwadmin(ctx, client, ['user', 'rename', '--uid', user4, '--new-uid', user2]) + assert err + + # test if user 2 and user4 can still access their bucket and objects after rename fails + bucket = connection3.get_bucket(bucket_name + '6') + s = key1.get_contents_as_string(encoding='ascii') + rl.log_and_clear("get_obj", bucket_name + '6', user4) + assert s == object_name1 + + bucket = connection2.get_bucket(bucket_name + '7') + s = key2.get_contents_as_string(encoding='ascii') + rl.log_and_clear("get_obj", bucket_name + '7', user2) + assert s == object_name2 + + (err, out) = rgwadmin(ctx, client, + ['user', 'rm', '--uid', user4, '--purge-data' ], + check_status=True) + + (err, out) = rgwadmin(ctx, client, + ['user', 'rm', '--uid', user2, '--purge-data' ], + check_status=True) + + time.sleep(5) + + # should be all through with connection. (anything using connection + # should be BEFORE the usage stuff above.) + rl.log_and_clear("(before-close)", '-', '-', ignore_this_entry) + connection.close() + connection = None + + # the usage flush interval is 30 seconds, wait that much an then some + # to make sure everything has been flushed + time.sleep(35) + + # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed' + (err, out) = rgwadmin(ctx, client, ['usage', 'trim', '--uid', user1], + check_status=True) + (err, out) = rgwadmin(ctx, client, ['usage', 'show', '--uid', user1], + check_status=True) + assert len(out['entries']) == 0 + assert len(out['summary']) == 0 + + (err, out) = rgwadmin(ctx, client, + ['user', 'rm', '--uid', user1, '--purge-data' ], + check_status=True) + + # TESTCASE 'rm-user3','user','rm','deleted user','fails' + (err, out) = rgwadmin(ctx, client, ['user', 'info', '--uid', user1]) + assert err + + # TESTCASE 'zone-info', 'zone', 'get', 'get zone info', 'succeeds, has default placement rule' + (err, out) = rgwadmin(ctx, client, ['zone', 'get','--rgw-zone','default']) + orig_placement_pools = len(out['placement_pools']) + + # removed this test, it is not correct to assume that zone has default placement, it really + # depends on how we set it up before + # + # assert len(out) > 0 + # assert len(out['placement_pools']) == 1 + + # default_rule = out['placement_pools'][0] + # assert default_rule['key'] == 'default-placement' + + rule={'key': 'new-placement', 'val': {'data_pool': '.rgw.buckets.2', 'index_pool': '.rgw.buckets.index.2'}} + + out['placement_pools'].append(rule) + + (err, out) = rgwadmin(ctx, client, ['zone', 'set'], + stdin=StringIO(json.dumps(out)), + check_status=True) + + (err, out) = rgwadmin(ctx, client, ['zone', 'get']) + assert len(out) > 0 + assert len(out['placement_pools']) == orig_placement_pools + 1 + + zonecmd = ['zone', 'placement', 'rm', + '--rgw-zone', 'default', + '--placement-id', 'new-placement'] + + (err, out) = rgwadmin(ctx, client, zonecmd, check_status=True) + + # TESTCASE 'zonegroup-info', 'zonegroup', 'get', 'get zonegroup info', 'succeeds' + (err, out) = rgwadmin(ctx, client, ['zonegroup', 'get'], check_status=True) + +from teuthology.config import config +from teuthology.orchestra import cluster + +import argparse; + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--uid') + parser.add_argument('--host', required=True) + parser.add_argument('--port', type=int) + + args = parser.parse_args() + host = args.host + if args.port: + port = args.port + else: + port = 80 + + client0 = tasks.vstart_runner.LocalRemote() + ctx = config + ctx.cluster=cluster.Cluster(remotes=[(client0, + [ 'ceph.client.rgw.%s' % (port), ]),]) + ctx.rgw = argparse.Namespace() + endpoints = {} + endpoints['ceph.client.rgw.%s' % port] = RGWEndpoint( + hostname=host, + port=port) + ctx.rgw.role_endpoints = endpoints + ctx.rgw.realm = None + ctx.rgw.regions = {'region0': { 'api name': 'api1', + 'is master': True, 'master zone': 'r0z0', + 'zones': ['r0z0', 'r0z1'] }} + ctx.rgw.omit_sudo = True + ctx.rgw.omit_tdir = True + ctx.rgw.config = {'ceph.client.rgw.%s' % port: {'system user': {'name': '%s-system-user' % port}}} + task(config, None) + exit() + +if __name__ == '__main__': + main() diff --git a/qa/tasks/radosgw_admin_rest.py b/qa/tasks/radosgw_admin_rest.py new file mode 100644 index 000000000..3de4d6bc9 --- /dev/null +++ b/qa/tasks/radosgw_admin_rest.py @@ -0,0 +1,815 @@ +""" +Run a series of rgw admin commands through the rest interface. + +The test cases in this file have been annotated for inventory. +To extract the inventory (in csv format) use the command: + + grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' + +""" +import logging + + +import boto.exception +import boto.s3.connection +import boto.s3.acl + +import requests +import time + +from boto.connection import AWSAuthConnection +from teuthology import misc as teuthology +from tasks.util.rgw import get_user_summary, get_user_successful_ops, rgwadmin + +log = logging.getLogger(__name__) + +def rgwadmin_rest(connection, cmd, params=None, headers=None, raw=False): + """ + perform a rest command + """ + log.info('radosgw-admin-rest: %s %s' % (cmd, params)) + put_cmds = ['create', 'link', 'add'] + post_cmds = ['unlink', 'modify'] + delete_cmds = ['trim', 'rm', 'process'] + get_cmds = ['check', 'info', 'show', 'list', ''] + + bucket_sub_resources = ['object', 'policy', 'index'] + user_sub_resources = ['subuser', 'key', 'caps'] + zone_sub_resources = ['pool', 'log', 'garbage'] + + def get_cmd_method_and_handler(cmd): + """ + Get the rest command and handler from information in cmd and + from the imported requests object. + """ + if cmd[1] in put_cmds: + return 'PUT', requests.put + elif cmd[1] in delete_cmds: + return 'DELETE', requests.delete + elif cmd[1] in post_cmds: + return 'POST', requests.post + elif cmd[1] in get_cmds: + return 'GET', requests.get + + def get_resource(cmd): + """ + Get the name of the resource from information in cmd. + """ + if cmd[0] == 'bucket' or cmd[0] in bucket_sub_resources: + if cmd[0] == 'bucket': + return 'bucket', '' + else: + return 'bucket', cmd[0] + elif cmd[0] == 'user' or cmd[0] in user_sub_resources: + if cmd[0] == 'user': + return 'user', '' + else: + return 'user', cmd[0] + elif cmd[0] == 'usage': + return 'usage', '' + elif cmd[0] == 'info': + return 'info', '' + elif cmd[0] == 'ratelimit': + return 'ratelimit', '' + elif cmd[0] == 'zone' or cmd[0] in zone_sub_resources: + if cmd[0] == 'zone': + return 'zone', '' + else: + return 'zone', cmd[0] + + def build_admin_request(conn, method, resource = '', headers=None, data='', + query_args=None, params=None): + """ + Build an administative request adapted from the build_request() + method of boto.connection + """ + + path = conn.calling_format.build_path_base('admin', resource) + auth_path = conn.calling_format.build_auth_path('admin', resource) + host = conn.calling_format.build_host(conn.server_name(), 'admin') + if query_args: + path += '?' + query_args + boto.log.debug('path=%s' % path) + auth_path += '?' + query_args + boto.log.debug('auth_path=%s' % auth_path) + return AWSAuthConnection.build_base_http_request(conn, method, path, + auth_path, params, headers, data, host) + + method, handler = get_cmd_method_and_handler(cmd) + resource, query_args = get_resource(cmd) + request = build_admin_request(connection, method, resource, + query_args=query_args, headers=headers) + + url = '{protocol}://{host}{path}'.format(protocol=request.protocol, + host=request.host, path=request.path) + + request.authorize(connection=connection) + result = handler(url, params=params, headers=request.headers) + + if raw: + log.info(' text result: %s' % result.text) + return result.status_code, result.text + elif len(result.content) == 0: + # many admin requests return no body, so json() throws a JSONDecodeError + log.info(' empty result') + return result.status_code, None + else: + log.info(' json result: %s' % result.json()) + return result.status_code, result.json() + + +def task(ctx, config): + """ + Test radosgw-admin functionality through the RESTful interface + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task s3tests only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + # just use the first client... + client = next(iter(clients)) + + ## + admin_user = 'ada' + admin_display_name = 'Ms. Admin User' + admin_access_key = 'MH1WC2XQ1S8UISFDZC8W' + admin_secret_key = 'dQyrTPA0s248YeN5bBv4ukvKU0kh54LWWywkrpoG' + admin_caps = 'users=read, write; usage=read, write; buckets=read, write; zone=read, write; info=read;ratelimit=read, write' + + user1 = 'foo' + user2 = 'fud' + ratelimit_user = 'ratelimit_user' + subuser1 = 'foo:foo1' + subuser2 = 'foo:foo2' + display_name1 = 'Foo' + display_name2 = 'Fud' + email = 'foo@foo.com' + access_key = '9te6NH5mcdcq0Tc5i8i1' + secret_key = 'Ny4IOauQoL18Gp2zM7lC1vLmoawgqcYP/YGcWfXu' + access_key2 = 'p5YnriCv1nAtykxBrupQ' + secret_key2 = 'Q8Tk6Q/27hfbFSYdSkPtUqhqx1GgzvpXa4WARozh' + swift_secret1 = 'gpS2G9RREMrnbqlp29PP2D36kgPR1tm72n5fPYfL' + swift_secret2 = 'ri2VJQcKSYATOY6uaDUX7pxgkW+W1YmC6OCxPHwy' + + bucket_name = 'myfoo' + + # legend (test cases can be easily grep-ed out) + # TESTCASE 'testname','object','method','operation','assertion' + # TESTCASE 'create-admin-user','user','create','administrative user','succeeds' + (err, out) = rgwadmin(ctx, client, [ + 'user', 'create', + '--uid', admin_user, + '--display-name', admin_display_name, + '--access-key', admin_access_key, + '--secret', admin_secret_key, + '--max-buckets', '0', + '--caps', admin_caps + ]) + logging.error(out) + logging.error(err) + assert not err + + assert hasattr(ctx, 'rgw'), 'radosgw-admin-rest must run after the rgw task' + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 'no rgw endpoint for {}'.format(client) + + admin_conn = boto.s3.connection.S3Connection( + aws_access_key_id=admin_access_key, + aws_secret_access_key=admin_secret_key, + is_secure=True if endpoint.cert else False, + port=endpoint.port, + host=endpoint.hostname, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + + # TESTCASE 'info-nosuch','user','info','non-existent user','fails' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {"uid": user1}) + assert ret == 404 + + # TESTCASE 'create-ok','user','create','w/all valid info','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['user', 'create'], + {'uid' : user1, + 'display-name' : display_name1, + 'email' : email, + 'access-key' : access_key, + 'secret-key' : secret_key, + 'max-buckets' : '4' + }) + + assert ret == 200 + + # TESTCASE 'list-no-user','user','list','list user keys','user list object' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 0}) + assert ret == 200 + assert out['count'] == 0 + assert out['truncated'] == True + assert len(out['keys']) == 0 + assert len(out['marker']) > 0 + + # TESTCASE 'list-user-without-marker','user','list','list user keys','user list object' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 1}) + assert ret == 200 + assert out['count'] == 1 + assert out['truncated'] == True + assert len(out['keys']) == 1 + assert len(out['marker']) > 0 + marker = out['marker'] + + # TESTCASE 'list-user-with-marker','user','list','list user keys','user list object' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'list'], {'list' : '', 'max-entries' : 1, 'marker': marker}) + assert ret == 200 + assert out['count'] == 1 + assert out['truncated'] == False + assert len(out['keys']) == 1 + + # TESTCASE 'info-existing','user','info','existing user','returns correct info' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + + assert out['user_id'] == user1 + assert out['email'] == email + assert out['display_name'] == display_name1 + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + assert not out['suspended'] + assert out['tenant'] == '' + assert out['max_buckets'] == 4 + assert out['caps'] == [] + assert out['op_mask'] == 'read, write, delete' + assert out['default_placement'] == '' + assert out['default_storage_class'] == '' + assert out['placement_tags'] == [] + assert not out['bucket_quota']['enabled'] + assert not out['bucket_quota']['check_on_raw'] + assert out['bucket_quota']['max_size'] == -1 + assert out['bucket_quota']['max_size_kb'] == 0 + assert out['bucket_quota']['max_objects'] == -1 + assert not out['user_quota']['enabled'] + assert not out['user_quota']['check_on_raw'] + assert out['user_quota']['max_size'] == -1 + assert out['user_quota']['max_size_kb'] == 0 + assert out['user_quota']['max_objects'] == -1 + assert out['temp_url_keys'] == [] + assert out['type'] == 'rgw' + assert out['mfa_ids'] == [] + # TESTCASE 'info-existing','user','info','existing user query with wrong uid but correct access key','returns correct info' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'access-key' : access_key, 'uid': 'uid_not_exist'}) + + assert out['user_id'] == user1 + assert out['email'] == email + assert out['display_name'] == display_name1 + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + assert not out['suspended'] + assert out['tenant'] == '' + assert out['max_buckets'] == 4 + assert out['caps'] == [] + assert out['op_mask'] == "read, write, delete" + assert out['default_placement'] == '' + assert out['default_storage_class'] == '' + assert out['placement_tags'] == [] + assert not out['bucket_quota']['enabled'] + assert not out['bucket_quota']['check_on_raw'] + assert out ['bucket_quota']['max_size'] == -1 + assert out ['bucket_quota']['max_size_kb'] == 0 + assert out ['bucket_quota']['max_objects'] == -1 + assert not out['user_quota']['enabled'] + assert not out['user_quota']['check_on_raw'] + assert out['user_quota']['max_size'] == -1 + assert out['user_quota']['max_size_kb'] == 0 + assert out['user_quota']['max_objects'] == -1 + assert out['temp_url_keys'] == [] + assert out['type'] == 'rgw' + assert out['mfa_ids'] == [] + + # TESTCASE 'suspend-ok','user','suspend','active user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True}) + assert ret == 200 + + # TESTCASE 'suspend-suspended','user','suspend','suspended user','succeeds w/advisory' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert out['suspended'] + assert out['email'] == email + + # TESTCASE 're-enable','user','enable','suspended user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'}) + assert not err + + # TESTCASE 'info-re-enabled','user','info','re-enabled user','no longer suspended' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert not out['suspended'] + + # TESTCASE 'add-keys','key','create','w/valid info','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['key', 'create'], + {'uid' : user1, + 'access-key' : access_key2, + 'secret-key' : secret_key2 + }) + + + assert ret == 200 + + # TESTCASE 'info-new-key','user','info','after key addition','returns all keys' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out['keys']) == 2 + assert out['keys'][0]['access_key'] == access_key2 or out['keys'][1]['access_key'] == access_key2 + assert out['keys'][0]['secret_key'] == secret_key2 or out['keys'][1]['secret_key'] == secret_key2 + + # TESTCASE 'rm-key','key','rm','newly added key','succeeds, key is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['key', 'rm'], + {'uid' : user1, + 'access-key' : access_key2 + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + + assert len(out['keys']) == 1 + assert out['keys'][0]['access_key'] == access_key + assert out['keys'][0]['secret_key'] == secret_key + + # TESTCASE 'add-swift-key','key','create','swift key','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'create'], + {'subuser' : subuser1, + 'secret-key' : swift_secret1, + 'key-type' : 'swift' + }) + + assert ret == 200 + + # TESTCASE 'info-swift-key','user','info','after key addition','returns all keys' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out['swift_keys']) == 1 + assert out['swift_keys'][0]['user'] == subuser1 + assert out['swift_keys'][0]['secret_key'] == swift_secret1 + + # TESTCASE 'add-swift-subuser','key','create','swift sub-user key','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'create'], + {'subuser' : subuser2, + 'secret-key' : swift_secret2, + 'key-type' : 'swift' + }) + + assert ret == 200 + + # TESTCASE 'info-swift-subuser','user','info','after key addition','returns all sub-users/keys' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out['swift_keys']) == 2 + assert out['swift_keys'][0]['user'] == subuser2 or out['swift_keys'][1]['user'] == subuser2 + assert out['swift_keys'][0]['secret_key'] == swift_secret2 or out['swift_keys'][1]['secret_key'] == swift_secret2 + + # TESTCASE 'rm-swift-key1','key','rm','subuser','succeeds, one key is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['key', 'rm'], + {'subuser' : subuser1, + 'key-type' :'swift' + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert len(out['swift_keys']) == 1 + + # TESTCASE 'rm-subuser','subuser','rm','subuser','success, subuser is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'rm'], + {'subuser' : subuser1 + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert len(out['subusers']) == 1 + + # TESTCASE 'rm-subuser-with-keys','subuser','rm','subuser','succeeds, second subser and key is removed' + (ret, out) = rgwadmin_rest(admin_conn, + ['subuser', 'rm'], + {'subuser' : subuser2, + 'key-type' : 'swift', + '{purge-keys' :True + }) + + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert len(out['swift_keys']) == 0 + assert len(out['subusers']) == 0 + + # TESTCASE 'bucket-stats','bucket','info','no session/buckets','succeeds, empty list' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out) == 0 + + # connect to rgw + connection = boto.s3.connection.S3Connection( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + is_secure=True if endpoint.cert else False, + port=endpoint.port, + host=endpoint.hostname, + calling_format=boto.s3.connection.OrdinaryCallingFormat(), + ) + + # TESTCASE 'bucket-stats2','bucket','stats','no buckets','succeeds, empty list' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True}) + assert ret == 200 + assert len(out) == 0 + + # create a first bucket + bucket = connection.create_bucket(bucket_name) + + # TESTCASE 'bucket-list','bucket','list','one bucket','succeeds, expected list' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1}) + assert ret == 200 + assert len(out) == 1 + assert out[0] == bucket_name + + # TESTCASE 'bucket-stats3','bucket','stats','new empty bucket','succeeds, empty list' + (ret, out) = rgwadmin_rest(admin_conn, + ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) + + assert ret == 200 + assert out['owner'] == user1 + assert out['tenant'] == '' + bucket_id = out['id'] + + # TESTCASE 'bucket-stats4','bucket','stats','new empty bucket','succeeds, expected bucket ID' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'uid' : user1, 'stats' : True}) + assert ret == 200 + assert len(out) == 1 + assert out[0]['id'] == bucket_id # does it return the same ID twice in a row? + + # use some space + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('one') + + # TESTCASE 'bucket-stats5','bucket','stats','after creating key','succeeds, lists one non-empty object' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) + assert ret == 200 + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 1 + assert out['usage']['rgw.main']['size_kb'] > 0 + + # TESTCASE 'bucket-stats6', 'bucket', 'stats', 'non-existent bucket', 'fails, 'bucket not found error' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : 'doesnotexist'}) + assert ret == 404 + assert out['Code'] == 'NoSuchBucket' + + # reclaim it + key.delete() + + # TESTCASE 'bucket unlink', 'bucket', 'unlink', 'unlink bucket from user', 'fails', 'access denied error' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'unlink'], {'uid' : user1, 'bucket' : bucket_name}) + + assert ret == 200 + + # create a second user to link the bucket to + (ret, out) = rgwadmin_rest(admin_conn, + ['user', 'create'], + {'uid' : user2, + 'display-name' : display_name2, + 'access-key' : access_key2, + 'secret-key' : secret_key2, + 'max-buckets' : '1', + }) + + assert ret == 200 + + # try creating an object with the first user before the bucket is relinked + denied = False + key = boto.s3.key.Key(bucket) + + try: + key.set_contents_from_string('two') + except boto.exception.S3ResponseError: + denied = True + + assert not denied + + # delete the object + key.delete() + + # link the bucket to another user + (ret, out) = rgwadmin_rest(admin_conn, + ['bucket', 'link'], + {'uid' : user2, + 'bucket' : bucket_name, + 'bucket-id' : bucket_id, + }) + + assert ret == 200 + + # try creating an object with the first user which should cause an error + key = boto.s3.key.Key(bucket) + + try: + key.set_contents_from_string('three') + except boto.exception.S3ResponseError: + denied = True + + assert denied + + # relink the bucket to the first user and delete the second user + (ret, out) = rgwadmin_rest(admin_conn, + ['bucket', 'link'], + {'uid' : user1, + 'bucket' : bucket_name, + 'bucket-id' : bucket_id, + }) + assert ret == 200 + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user2}) + assert ret == 200 + + # TESTCASE 'object-rm', 'object', 'rm', 'remove object', 'succeeds, object is removed' + + # upload an object + object_name = 'four' + key = boto.s3.key.Key(bucket, object_name) + key.set_contents_from_string(object_name) + + # now delete it + (ret, out) = rgwadmin_rest(admin_conn, ['object', 'rm'], {'bucket' : bucket_name, 'object' : object_name}) + assert ret == 200 + + # TESTCASE 'bucket-stats6','bucket','stats','after deleting key','succeeds, lists one no objects' + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'info'], {'bucket' : bucket_name, 'stats' : True}) + assert ret == 200 + assert out['id'] == bucket_id + assert out['usage']['rgw.main']['num_objects'] == 0 + + # create a bucket for deletion stats + useless_bucket = connection.create_bucket('useless-bucket') + useless_key = useless_bucket.new_key('useless_key') + useless_key.set_contents_from_string('useless string') + + # delete it + useless_key.delete() + useless_bucket.delete() + + # wait for the statistics to flush + time.sleep(60) + + # need to wait for all usage data to get flushed, should take up to 30 seconds + timestamp = time.time() + while time.time() - timestamp <= (20 * 60): # wait up to 20 minutes + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'categories' : 'delete_obj'}) # last operation we did is delete obj, wait for it to flush + + if get_user_successful_ops(out, user1) > 0: + break + time.sleep(1) + + assert time.time() - timestamp <= (20 * 60) + + # TESTCASE 'usage-show' 'usage' 'show' 'all usage' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show']) + assert ret == 200 + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + user_summary = get_user_summary(out, user1) + total = user_summary['total'] + assert total['successful_ops'] > 0 + + # TESTCASE 'usage-show2' 'usage' 'show' 'user usage' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1}) + assert ret == 200 + assert len(out['entries']) > 0 + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + for entry in user_summary['categories']: + assert entry['successful_ops'] > 0 + assert user_summary['user'] == user1 + + # TESTCASE 'usage-show3' 'usage' 'show' 'user usage categories' 'succeeds' + test_categories = ['create_bucket', 'put_obj', 'delete_obj', 'delete_bucket'] + for cat in test_categories: + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1, 'categories' : cat}) + assert ret == 200 + assert len(out['summary']) > 0 + user_summary = out['summary'][0] + assert user_summary['user'] == user1 + assert len(user_summary['categories']) == 1 + entry = user_summary['categories'][0] + assert entry['category'] == cat + assert entry['successful_ops'] > 0 + + # TESTCASE 'usage-trim' 'usage' 'trim' 'user usage' 'succeeds, usage removed' + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'trim'], {'uid' : user1}) + assert ret == 200 + (ret, out) = rgwadmin_rest(admin_conn, ['usage', 'show'], {'uid' : user1}) + assert ret == 200 + assert len(out['entries']) == 0 + assert len(out['summary']) == 0 + + # TESTCASE 'user-suspend2','user','suspend','existing user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : True}) + assert ret == 200 + + # TESTCASE 'user-suspend3','user','suspend','suspended user','cannot write objects' + try: + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('five') + except boto.exception.S3ResponseError as e: + assert e.status == 403 + + # TESTCASE 'user-renable2','user','enable','suspended user','succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'modify'], {'uid' : user1, 'suspended' : 'false'}) + assert ret == 200 + + # TESTCASE 'user-renable3','user','enable','reenabled user','can write objects' + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('six') + + # TESTCASE 'garbage-list', 'garbage', 'list', 'get list of objects ready for garbage collection' + + # create an object large enough to be split into multiple parts + test_string = 'foo'*10000000 + + big_key = boto.s3.key.Key(bucket) + big_key.set_contents_from_string(test_string) + + # now delete the head + big_key.delete() + + # TESTCASE 'rm-user-buckets','user','rm','existing user','fails, still has buckets' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1}) + assert ret == 409 + + # delete should fail because ``key`` still exists + try: + bucket.delete() + except boto.exception.S3ResponseError as e: + assert e.status == 409 + + key.delete() + bucket.delete() + + # TESTCASE 'policy', 'bucket', 'policy', 'get bucket policy', 'returns S3 policy' + bucket = connection.create_bucket(bucket_name) + + # create an object + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('seven') + + # should be private already but guarantee it + key.set_acl('private') + + (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key}) + assert ret == 200 + assert len(out['acl']['grant_map']) == 1 + + # add another grantee by making the object public read + key.set_acl('public-read') + + (ret, out) = rgwadmin_rest(admin_conn, ['policy', 'show'], {'bucket' : bucket.name, 'object' : key.key}) + assert ret == 200 + assert len(out['acl']['grant_map']) == 2 + + # TESTCASE 'rm-bucket', 'bucket', 'rm', 'bucket with objects', 'succeeds' + bucket = connection.create_bucket(bucket_name) + key_name = ['eight', 'nine', 'ten', 'eleven'] + for i in range(4): + key = boto.s3.key.Key(bucket) + key.set_contents_from_string(key_name[i]) + + (ret, out) = rgwadmin_rest(admin_conn, ['bucket', 'rm'], {'bucket' : bucket_name, 'purge-objects' : True}) + assert ret == 200 + + # TESTCASE 'caps-add', 'caps', 'add', 'add user cap', 'succeeds' + caps = 'usage=read' + (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'add'], {'uid' : user1, 'user-caps' : caps}) + assert ret == 200 + assert out[0]['perm'] == 'read' + + # TESTCASE 'caps-rm', 'caps', 'rm', 'remove existing cap from user', 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['caps', 'rm'], {'uid' : user1, 'user-caps' : caps}) + assert ret == 200 + assert not out + + # TESTCASE 'rm-user','user','rm','existing user','fails, still has buckets' + bucket = connection.create_bucket(bucket_name) + key = boto.s3.key.Key(bucket) + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1}) + assert ret == 409 + + # TESTCASE 'rm-user2', 'user', 'rm', user with data', 'succeeds' + bucket = connection.create_bucket(bucket_name) + key = boto.s3.key.Key(bucket) + key.set_contents_from_string('twelve') + + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'rm'], {'uid' : user1, 'purge-data' : True}) + assert ret == 200 + + # TESTCASE 'rm-user3','user','info','deleted user','fails' + (ret, out) = rgwadmin_rest(admin_conn, ['user', 'info'], {'uid' : user1}) + assert ret == 404 + + # TESTCASE 'info' 'display info' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['info', '']) + assert ret == 200 + info = out['info'] + backends = info['storage_backends'] + name = backends[0]['name'] + fsid = backends[0]['cluster_id'] + # name is always "rados" at time of writing, but zipper would allow + # other backends, at some point + assert len(name) > 0 + # fsid is a uuid, but I'm not going to try to parse it + assert len(fsid) > 0 + + # TESTCASE 'ratelimit' 'user' 'info' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, + ['user', 'create'], + {'uid' : ratelimit_user, + 'display-name' : display_name1, + 'email' : email, + 'access-key' : access_key, + 'secret-key' : secret_key, + 'max-buckets' : '1000' + }) + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'user', 'uid' : ratelimit_user}) + assert ret == 200 + + # TESTCASE 'ratelimit' 'user' 'info' 'not existing user' 'fails' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'user', 'uid' : ratelimit_user + 'string'}) + assert ret == 404 + + # TESTCASE 'ratelimit' 'user' 'info' 'uid not specified' 'fails' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'user'}) + assert ret == 400 + + # TESTCASE 'ratelimit' 'bucket' 'info' 'succeeds' + ratelimit_bucket = 'ratelimitbucket' + connection.create_bucket(ratelimit_bucket) + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'bucket', 'bucket' : ratelimit_bucket}) + assert ret == 200 + + # TESTCASE 'ratelimit' 'bucket' 'info' 'not existing bucket' 'fails' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'bucket', 'bucket' : ratelimit_bucket + 'string'}) + assert ret == 404 + + # TESTCASE 'ratelimit' 'bucket' 'info' 'bucket not specified' 'fails' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'bucket'}) + assert ret == 400 + + # TESTCASE 'ratelimit' 'global' 'info' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'global' : 'true'}) + assert ret == 200 + + # TESTCASE 'ratelimit' 'user' 'modify' 'not existing user' 'fails' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'user', 'uid' : ratelimit_user + 'string', 'enabled' : 'true'}) + assert ret == 404 + + # TESTCASE 'ratelimit' 'user' 'modify' 'uid not specified' 'fails' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'user'}) + assert ret == 400 + + # TESTCASE 'ratelimit' 'bucket' 'modify' 'not existing bucket' 'fails' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'bucket', 'bucket' : ratelimit_bucket + 'string', 'enabled' : 'true'}) + assert ret == 404 + + # TESTCASE 'ratelimit' 'bucket' 'modify' 'bucket not specified' 'fails' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'bucket', 'enabled' : 'true'}) + assert ret == 400 + + # TESTCASE 'ratelimit' 'user' 'modifiy' 'enabled' 'max-read-bytes = 2' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'user', 'uid' : ratelimit_user, 'enabled' : 'true', 'max-read-bytes' : '2'}) + assert ret == 200 + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'user', 'uid' : ratelimit_user}) + assert ret == 200 + user_ratelimit = out['user_ratelimit'] + assert user_ratelimit['enabled'] == True + assert user_ratelimit['max_read_bytes'] == 2 + + # TESTCASE 'ratelimit' 'bucket' 'modifiy' 'enabled' 'max-write-bytes = 2' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'bucket', 'bucket' : ratelimit_bucket, 'enabled' : 'true', 'max-write-bytes' : '2'}) + assert ret == 200 + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'info'], {'ratelimit-scope' : 'bucket', 'bucket' : ratelimit_bucket}) + assert ret == 200 + bucket_ratelimit = out['bucket_ratelimit'] + assert bucket_ratelimit['enabled'] == True + assert bucket_ratelimit['max_write_bytes'] == 2 + + # TESTCASE 'ratelimit' 'global' 'modify' 'anonymous' 'enabled' 'succeeds' + (ret, out) = rgwadmin_rest(admin_conn, ['ratelimit', 'modify'], {'ratelimit-scope' : 'bucket', 'global': 'true', 'enabled' : 'true'}) + assert ret == 200
\ No newline at end of file diff --git a/qa/tasks/ragweed.py b/qa/tasks/ragweed.py new file mode 100644 index 000000000..e2b33527a --- /dev/null +++ b/qa/tasks/ragweed.py @@ -0,0 +1,372 @@ +""" +Run a set of s3 tests on rgw. +""" +from io import BytesIO +from configobj import ConfigObj +import base64 +import contextlib +import logging +import os +import random +import string + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +def get_ragweed_branches(config, client_conf): + """ + figure out the ragweed branch according to the per-client settings + + use force-branch is specified, and fall back to the ones deduced using ceph + branch under testing + """ + force_branch = client_conf.get('force-branch', None) + if force_branch: + return [force_branch] + else: + S3_BRANCHES = ['master', 'nautilus', 'mimic', + 'luminous', 'kraken', 'jewel'] + ceph_branch = config.get('branch') + suite_branch = config.get('suite_branch', ceph_branch) + if suite_branch in S3_BRANCHES: + branch = client_conf.get('branch', 'ceph-' + suite_branch) + else: + branch = client_conf.get('branch', suite_branch) + default_branch = client_conf.get('default-branch', None) + if default_branch: + return [branch, default_branch] + else: + return [branch] + +def get_ragweed_dir(testdir, client): + return '{}/ragweed.{}'.format(testdir, client) + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the s3 tests from the git builder. + Remove downloaded s3 file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading ragweed...') + testdir = teuthology.get_testdir(ctx) + for (client, cconf) in config.items(): + ragweed_dir = get_ragweed_dir(testdir, client) + ragweed_repo = ctx.config.get('ragweed_repo', + teuth_config.ceph_git_base_url + 'ragweed.git') + for branch in get_ragweed_branches(ctx.config, cconf): + log.info("Using branch '%s' for ragweed", branch) + try: + ctx.cluster.only(client).sh( + script=f'git clone -b {branch} {ragweed_repo} {ragweed_dir}') + break + except Exception as e: + exc = e + else: + raise exc + + sha1 = cconf.get('sha1') + if sha1 is not None: + ctx.cluster.only(client).run( + args=[ + 'cd', ragweed_dir, + run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ], + ) + try: + yield + finally: + log.info('Removing ragweed...') + for client in config: + ragweed_dir = get_ragweed_dir(testdir, client) + ctx.cluster.only(client).run( + args=['rm', '-rf', ragweed_dir] + ) + + +def _config_user(ragweed_conf, section, user): + """ + Configure users for this section by stashing away keys, ids, and + email addresses. + """ + ragweed_conf[section].setdefault('user_id', user) + ragweed_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user)) + ragweed_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user)) + ragweed_conf[section].setdefault('access_key', ''.join(random.choice(string.ascii_uppercase) for i in range(20))) + ragweed_conf[section].setdefault('secret_key', base64.b64encode(os.urandom(40)).decode('ascii')) + + +@contextlib.contextmanager +def create_users(ctx, config, run_stages): + """ + Create a main and an alternate s3 user. + """ + assert isinstance(config, dict) + + for client, properties in config['config'].items(): + run_stages[client] = properties.get('stages', 'prepare,check').split(',') + + log.info('Creating rgw users...') + testdir = teuthology.get_testdir(ctx) + users = {'user regular': 'ragweed', 'user system': 'sysuser'} + for client in config['clients']: + if not 'prepare' in run_stages[client]: + # should have been prepared in a previous run + continue + + ragweed_conf = config['ragweed_conf'][client] + ragweed_conf.setdefault('fixtures', {}) + ragweed_conf['rgw'].setdefault('bucket_prefix', 'test-' + client) + for section, user in users.items(): + _config_user(ragweed_conf, section, '{user}.{client}'.format(user=user, client=client)) + log.debug('Creating user {user} on {host}'.format(user=ragweed_conf[section]['user_id'], host=client)) + if user == 'sysuser': + sys_str = 'true' + else: + sys_str = 'false' + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'create', + '--uid', ragweed_conf[section]['user_id'], + '--display-name', ragweed_conf[section]['display_name'], + '--access-key', ragweed_conf[section]['access_key'], + '--secret', ragweed_conf[section]['secret_key'], + '--email', ragweed_conf[section]['email'], + '--system', sys_str, + ], + ) + try: + yield + finally: + for client in config['clients']: + if not 'check' in run_stages[client]: + # only remove user if went through the check stage + continue + for user in users.values(): + uid = '{user}.{client}'.format(user=user, client=client) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'rm', + '--uid', uid, + '--purge-data', + ], + ) + + +@contextlib.contextmanager +def configure(ctx, config, run_stages): + """ + Configure the local config files. + """ + assert isinstance(config, dict) + log.info('Configuring ragweed...') + testdir = teuthology.get_testdir(ctx) + for client, properties in config['clients'].items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + preparing = 'prepare' in run_stages[client] + if not preparing: + # should have been prepared in a previous run + continue + + ragweed_conf = config['ragweed_conf'][client] + if properties is not None and 'slow_backend' in properties: + ragweed_conf['fixtures']['slow backend'] = properties['slow_backend'] + + conf_fp = BytesIO() + ragweed_conf.write(conf_fp) + remote.write_file( + path='{tdir}/archive/ragweed.{client}.conf'.format(tdir=testdir, client=client), + data=conf_fp.getvalue(), + ) + + log.info('Configuring boto...') + boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template') + for client, properties in config['clients'].items(): + with open(boto_src, 'r') as f: + (remote,) = ctx.cluster.only(client).remotes.keys() + conf = f.read().format( + idle_timeout=config.get('idle_timeout', 30) + ) + remote.write_file('{tdir}/boto.cfg'.format(tdir=testdir), conf) + + try: + yield + + finally: + log.info('Cleaning up boto...') + for client, properties in config['clients'].items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rm', '-f', + '{tdir}/boto.cfg'.format(tdir=testdir), + ], + ) + +def get_toxvenv_dir(ctx): + return ctx.tox.venv_path + +def toxvenv_sh(ctx, remote, args, **kwargs): + activate = get_toxvenv_dir(ctx) + '/bin/activate' + return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs) + +@contextlib.contextmanager +def run_tests(ctx, config, run_stages): + """ + Run the ragweed after everything is set up. + + :param ctx: Context passed to task + :param config: specific configuration information + """ + assert isinstance(config, dict) + testdir = teuthology.get_testdir(ctx) + attrs = ["not fails_on_rgw"] + for client, client_config in config.items(): + ragweed_dir = get_ragweed_dir(testdir, client) + stages = ','.join(run_stages[client]) + args = [ + 'cd', ragweed_dir, run.Raw('&&'), + 'RAGWEED_CONF={tdir}/archive/ragweed.{client}.conf'.format(tdir=testdir, client=client), + 'RAGWEED_STAGES={stages}'.format(stages=stages), + 'BOTO_CONFIG={tdir}/boto.cfg'.format(tdir=testdir), + 'tox', + '--sitepackages', + '--', + '-v', + '-m', ' and '.join(attrs), + ] + if client_config is not None and 'extra_args' in client_config: + args.extend(client_config['extra_args']) + + (remote,) = ctx.cluster.only(client).remotes.keys() + toxvenv_sh(ctx, remote, args, label="ragweed tests against rgw") + yield + +@contextlib.contextmanager +def task(ctx, config): + """ + Run the ragweed suite against rgw. + + To run all tests on all clients:: + + tasks: + - ceph: + - rgw: + - ragweed: + + To restrict testing to particular clients:: + + tasks: + - ceph: + - rgw: [client.0] + - ragweed: [client.0] + + To run against a server on client.1 and increase the boto timeout to 10m:: + + tasks: + - ceph: + - rgw: [client.1] + - ragweed: + client.0: + rgw_server: client.1 + idle_timeout: 600 + stages: prepare,check + + To pass extra arguments to nose (e.g. to run a certain test):: + + tasks: + - ceph: + - rgw: [client.0] + - ragweed: + client.0: + extra_args: ['test_s3:test_object_acl_grand_public_read'] + client.1: + extra_args: ['--exclude', 'test_100_continue'] + """ + assert hasattr(ctx, 'rgw'), 'ragweed must run after the rgw task' + assert hasattr(ctx, 'tox'), 'ragweed must run after the tox task' + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task ragweed only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('ragweed', {})) + + log.debug('ragweed config is %s', config) + + ragweed_conf = {} + for client in clients: + # use rgw_server endpoint if given, or default to same client + target = config[client].get('rgw_server', client) + + endpoint = ctx.rgw.role_endpoints.get(target) + assert endpoint, 'ragweed: no rgw endpoint for {}'.format(target) + + ragweed_conf[client] = ConfigObj( + indent_type='', + infile={ + 'rgw': + { + 'host' : endpoint.dns_name, + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + }, + 'fixtures' : {}, + 'user system' : {}, + 'user regular' : {}, + 'rados': + { + 'ceph_conf' : '/etc/ceph/ceph.conf', + }, + } + ) + + run_stages = {} + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + ragweed_conf=ragweed_conf, + config=config, + ), + run_stages=run_stages), + lambda: configure(ctx=ctx, config=dict( + clients=config, + ragweed_conf=ragweed_conf, + ), + run_stages=run_stages), + lambda: run_tests(ctx=ctx, config=config, run_stages=run_stages), + ): + pass + yield diff --git a/qa/tasks/rbd.py b/qa/tasks/rbd.py new file mode 100644 index 000000000..b0ffaba83 --- /dev/null +++ b/qa/tasks/rbd.py @@ -0,0 +1,747 @@ +""" +Rbd testing task +""" +import contextlib +import logging +import os +import tempfile +import sys + +from io import StringIO +from teuthology.orchestra import run +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.parallel import parallel +from teuthology.task.common_fs_utils import generic_mkfs +from teuthology.task.common_fs_utils import generic_mount +from teuthology.task.common_fs_utils import default_image_name + + +#V1 image unsupported but required for testing purposes +os.environ["RBD_FORCE_ALLOW_V1"] = "1" + +log = logging.getLogger(__name__) + +ENCRYPTION_PASSPHRASE = "password" +CLONE_ENCRYPTION_PASSPHRASE = "password2" + +@contextlib.contextmanager +def create_image(ctx, config): + """ + Create an rbd image. + + For example:: + + tasks: + - ceph: + - rbd.create_image: + client.0: + image_name: testimage + image_size: 100 + image_format: 1 + encryption_format: luks2 + client.1: + + Image size is expressed as a number of megabytes; default value + is 10240. + + Image format value must be either 1 or 2; default value is 1. + + """ + assert isinstance(config, dict) or isinstance(config, list), \ + "task create_image only supports a list or dictionary for configuration" + + if isinstance(config, dict): + images = config.items() + else: + images = [(role, None) for role in config] + + testdir = teuthology.get_testdir(ctx) + passphrase_file = '{tdir}/passphrase'.format(tdir=testdir) + for role, properties in images: + if properties is None: + properties = {} + name = properties.get('image_name', default_image_name(role)) + size = properties.get('image_size', 10240) + fmt = properties.get('image_format', 1) + encryption_format = properties.get('encryption_format', 'none') + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Creating image {name} with size {size}'.format(name=name, + size=size)) + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '-p', 'rbd', + 'create', + '--size', str(size), + name, + ] + # omit format option if using the default (format 1) + # since old versions of don't support it + if int(fmt) != 1: + args += ['--image-format', str(fmt)] + remote.run(args=args) + + if encryption_format != 'none': + remote.run( + args=[ + 'echo', + ENCRYPTION_PASSPHRASE, + run.Raw('>'), + passphrase_file + ] + ) + remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + 'encryption', + 'format', + name, + encryption_format, + passphrase_file, + '-p', + 'rbd' + ] + ) + try: + yield + finally: + log.info('Deleting rbd images...') + remote.run(args=['rm', '-f', passphrase_file]) + for role, properties in images: + if properties is None: + properties = {} + name = properties.get('image_name', default_image_name(role)) + (remote,) = ctx.cluster.only(role).remotes.keys() + remote.run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '-p', 'rbd', + 'rm', + name, + ], + ) + +@contextlib.contextmanager +def clone_image(ctx, config): + """ + Clones a parent imag + + For example:: + + tasks: + - ceph: + - rbd.clone_image: + client.0: + parent_name: testimage + image_name: cloneimage + encryption_format: luks2 + """ + assert isinstance(config, dict) or isinstance(config, list), \ + "task clone_image only supports a list or dictionary for configuration" + + if isinstance(config, dict): + images = config.items() + else: + images = [(role, None) for role in config] + + testdir = teuthology.get_testdir(ctx) + clone_passphrase_file = '{tdir}/clone-passphrase'.format(tdir=testdir) + for role, properties in images: + if properties is None: + properties = {} + + name = properties.get('image_name', default_image_name(role)) + parent_name = properties.get('parent_name') + assert parent_name is not None, \ + "parent_name is required" + parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name) + + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Clone image {parent} to {child}'.format(parent=parent_name, + child=name)) + + commands = [('snap', 'create', parent_spec), + ('snap', 'protect', parent_spec), + ('clone', parent_spec, name) + ] + + encryption_format = properties.get('encryption_format', 'none') + if encryption_format != 'none': + remote.run( + args=[ + 'echo', + CLONE_ENCRYPTION_PASSPHRASE, + run.Raw('>'), + clone_passphrase_file + ] + ) + + commands.append( + ('encryption', 'format', name, encryption_format, + clone_passphrase_file) + ) + + for cmd in commands: + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', '-p', 'rbd' + ] + args.extend(cmd) + remote.run(args=args) + + try: + yield + finally: + log.info('Deleting rbd clones...') + remote.run(args=['rm', '-f', clone_passphrase_file]) + for role, properties in images: + if properties is None: + properties = {} + name = properties.get('image_name', default_image_name(role)) + parent_name = properties.get('parent_name') + parent_spec = '{name}@{snap}'.format(name=parent_name, snap=name) + + (remote,) = ctx.cluster.only(role).remotes.keys() + + for cmd in [('rm', name), + ('snap', 'unprotect', parent_spec), + ('snap', 'rm', parent_spec)]: + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', '-p', 'rbd' + ] + args.extend(cmd) + remote.run(args=args) + +@contextlib.contextmanager +def modprobe(ctx, config): + """ + Load the rbd kernel module.. + + For example:: + + tasks: + - ceph: + - rbd.create_image: [client.0] + - rbd.modprobe: [client.0] + """ + log.info('Loading rbd kernel module...') + for role in config: + (remote,) = ctx.cluster.only(role).remotes.keys() + remote.run( + args=[ + 'sudo', + 'modprobe', + 'rbd', + ], + ) + try: + yield + finally: + log.info('Unloading rbd kernel module...') + for role in config: + (remote,) = ctx.cluster.only(role).remotes.keys() + remote.run( + args=[ + 'sudo', + 'modprobe', + '-r', + 'rbd', + # force errors to be ignored; necessary if more + # than one device was created, which may mean + # the module isn't quite ready to go the first + # time through. + run.Raw('||'), + 'true', + ], + ) + +@contextlib.contextmanager +def dev_create(ctx, config): + """ + Map block devices to rbd images. + + For example:: + + tasks: + - ceph: + - rbd.create_image: [client.0] + - rbd.modprobe: [client.0] + - rbd.dev_create: + client.0: + image_name: testimage.client.0 + encryption_format: luks2 + parent_encryption_format: luks1 + """ + assert isinstance(config, dict) or isinstance(config, list), \ + "task dev_create only supports a list or dictionary for configuration" + + if isinstance(config, dict): + images = config.items() + else: + images = [(role, None) for role in config] + + log.info('Creating rbd block devices...') + + testdir = teuthology.get_testdir(ctx) + passphrase_file = '{tdir}/passphrase'.format(tdir=testdir) + clone_passphrase_file = '{tdir}/clone-passphrase'.format(tdir=testdir) + device_path = {} + + for role, properties in images: + if properties is None: + properties = {} + name = properties.get('image_name', default_image_name(role)) + parent_encryption_format = properties.get('parent_encryption_format', + 'none') + encryption_format = properties.get('encryption_format', + parent_encryption_format) + (remote,) = ctx.cluster.only(role).remotes.keys() + + if encryption_format == 'none' and parent_encryption_format == 'none': + device_path[role] = '/dev/rbd/rbd/{image}'.format(image=name) + device_specific_args = [] + else: + device_specific_args = ['-t', 'nbd', '-o'] + + is_cloned = properties.get('parent_name') is not None + encryption_args = "" + if is_cloned and properties.get('encryption_format') != 'none': + remote.run( + args=[ + 'echo', + CLONE_ENCRYPTION_PASSPHRASE, + run.Raw('>'), + clone_passphrase_file + ] + ) + + encryption_args = \ + 'encryption-format=%s,encryption-passphrase-file=%s' % ( + encryption_format, clone_passphrase_file) + + if not is_cloned or parent_encryption_format != 'none': + remote.run( + args=[ + 'echo', + ENCRYPTION_PASSPHRASE, + run.Raw('>'), + passphrase_file + ] + ) + + if is_cloned and properties.get('encryption_format') != 'none': + encryption_args += "," + + if parent_encryption_format != 'none': + encryption_args += \ + 'encryption-format=%s,encryption-passphrase-file=%s' % ( + parent_encryption_format, passphrase_file) + else: + encryption_args += \ + 'encryption-format=%s,encryption-passphrase-file=%s' % ( + encryption_format, passphrase_file) + + device_specific_args.append(encryption_args) + + map_fp = StringIO() + remote.run( + args=[ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '--id', role.rsplit('.')[-1], + '-p', 'rbd', + 'map', + name] + device_specific_args, + stdout=map_fp, + ) + + if encryption_format != 'none' or parent_encryption_format != 'none': + device_path[role] = map_fp.getvalue().rstrip() + properties['device_path'] = device_path[role] + remote.run(args=['sudo', 'chmod', '666', device_path[role]]) + try: + yield + finally: + log.info('Unmapping rbd devices...') + remote.run(args=['rm', '-f', passphrase_file, clone_passphrase_file]) + for role, properties in images: + if not device_path.get(role): + continue + + if properties is None: + properties = {} + encryption_format = properties.get('encryption_format', 'none') + parent_encryption_format = properties.get( + 'parent_encryption_format', 'none') + (remote,) = ctx.cluster.only(role).remotes.keys() + + if encryption_format == 'none' and \ + parent_encryption_format == 'none': + device_specific_args = [] + else: + device_specific_args = ['-t', 'nbd'] + + remote.run( + args=[ + 'LD_LIBRARY_PATH={tdir}/binary/usr/local/lib'.format(tdir=testdir), + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rbd', + '-p', 'rbd', + 'unmap', + device_path[role], + ] + device_specific_args, + ) + + +def rbd_devname_rtn(ctx, image): + return '/dev/rbd/rbd/{image}'.format(image=image) + +def canonical_path(ctx, role, path): + """ + Determine the canonical path for a given path on the host + representing the given role. A canonical path contains no + . or .. components, and includes no symbolic links. + """ + version_fp = StringIO() + ctx.cluster.only(role).run( + args=[ 'readlink', '-f', path ], + stdout=version_fp, + ) + canonical_path = version_fp.getvalue().rstrip('\n') + version_fp.close() + return canonical_path + +@contextlib.contextmanager +def run_xfstests(ctx, config): + """ + Run xfstests over specified devices. + + Warning: both the test and scratch devices specified will be + overwritten. Normally xfstests modifies (but does not destroy) + the test device, but for now the run script used here re-makes + both filesystems. + + Note: Only one instance of xfstests can run on a single host at + a time, although this is not enforced. + + This task in its current form needs some improvement. For + example, it assumes all roles provided in the config are + clients, and that the config provided is a list of key/value + pairs. For now please use the xfstests() interface, below. + + For example:: + + tasks: + - ceph: + - rbd.run_xfstests: + client.0: + count: 2 + test_dev: 'test_dev' + scratch_dev: 'scratch_dev' + fs_type: 'xfs' + tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015' + exclude: + - generic/42 + randomize: true + """ + with parallel() as p: + for role, properties in config.items(): + p.spawn(run_xfstests_one_client, ctx, role, properties) + exc = None + while True: + try: + p.next() + except StopIteration: + break + except: + exc = sys.exc_info()[1] + if exc is not None: + raise exc + yield + +def run_xfstests_one_client(ctx, role, properties): + """ + Spawned routine to handle xfs tests for a single client + """ + testdir = teuthology.get_testdir(ctx) + try: + count = properties.get('count') + test_dev = properties.get('test_dev') + assert test_dev is not None, \ + "task run_xfstests requires test_dev to be defined" + test_dev = canonical_path(ctx, role, test_dev) + + scratch_dev = properties.get('scratch_dev') + assert scratch_dev is not None, \ + "task run_xfstests requires scratch_dev to be defined" + scratch_dev = canonical_path(ctx, role, scratch_dev) + + fs_type = properties.get('fs_type') + tests = properties.get('tests') + exclude_list = properties.get('exclude') + randomize = properties.get('randomize') + + (remote,) = ctx.cluster.only(role).remotes.keys() + + # Fetch the test script + test_root = teuthology.get_testdir(ctx) + test_script = 'run_xfstests.sh' + test_path = os.path.join(test_root, test_script) + + xfstests_url = properties.get('xfstests_url') + assert xfstests_url is not None, \ + "task run_xfstests requires xfstests_url to be defined" + + xfstests_krbd_url = xfstests_url + '/' + test_script + + log.info('Fetching {script} for {role} from {url}'.format( + script=test_script, + role=role, + url=xfstests_krbd_url)) + + args = [ 'wget', '-O', test_path, '--', xfstests_krbd_url ] + remote.run(args=args) + + log.info('Running xfstests on {role}:'.format(role=role)) + log.info(' iteration count: {count}:'.format(count=count)) + log.info(' test device: {dev}'.format(dev=test_dev)) + log.info(' scratch device: {dev}'.format(dev=scratch_dev)) + log.info(' using fs_type: {fs_type}'.format(fs_type=fs_type)) + log.info(' tests to run: {tests}'.format(tests=tests)) + log.info(' exclude list: {}'.format(' '.join(exclude_list))) + log.info(' randomize: {randomize}'.format(randomize=randomize)) + + if exclude_list: + with tempfile.NamedTemporaryFile(mode='w', prefix='exclude') as exclude_file: + for test in exclude_list: + exclude_file.write("{}\n".format(test)) + exclude_file.flush() + remote.put_file(exclude_file.name, exclude_file.name) + + # Note that the device paths are interpreted using + # readlink -f <path> in order to get their canonical + # pathname (so it matches what the kernel remembers). + args = [ + '/usr/bin/sudo', + 'TESTDIR={tdir}'.format(tdir=testdir), + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + '/bin/bash', + test_path, + '-c', str(count), + '-f', fs_type, + '-t', test_dev, + '-s', scratch_dev, + ] + if exclude_list: + args.extend(['-x', exclude_file.name]) + if randomize: + args.append('-r') + if tests: + args.extend(['--', tests]) + remote.run(args=args, logger=log.getChild(role)) + finally: + log.info('Removing {script} on {role}'.format(script=test_script, + role=role)) + remote.run(args=['rm', '-f', test_path]) + +@contextlib.contextmanager +def xfstests(ctx, config): + """ + Run xfstests over rbd devices. This interface sets up all + required configuration automatically if not otherwise specified. + Note that only one instance of xfstests can run on a single host + at a time. By default, the set of tests specified is run once. + If a (non-zero) count value is supplied, the complete set of + tests will be run that number of times. + + For example:: + + tasks: + - ceph: + # Image sizes are in MB + - rbd.xfstests: + client.0: + count: 3 + test_image: 'test_image' + test_size: 250 + test_format: 2 + scratch_image: 'scratch_image' + scratch_size: 250 + scratch_format: 1 + fs_type: 'xfs' + tests: 'generic/100 xfs/003 xfs/005 xfs/006 generic/015' + exclude: + - generic/42 + randomize: true + xfstests_url: 'https://raw.github.com/ceph/ceph-ci/wip-55555/qa' + """ + if config is None: + config = { 'all': None } + assert isinstance(config, dict) or isinstance(config, list), \ + "task xfstests only supports a list or dictionary for configuration" + if isinstance(config, dict): + config = teuthology.replace_all_with_clients(ctx.cluster, config) + runs = config.items() + else: + runs = [(role, None) for role in config] + + running_xfstests = {} + for role, properties in runs: + assert role.startswith('client.'), \ + "task xfstests can only run on client nodes" + for host, roles_for_host in ctx.cluster.remotes.items(): + if role in roles_for_host: + assert host not in running_xfstests, \ + "task xfstests allows only one instance at a time per host" + running_xfstests[host] = True + + images_config = {} + scratch_config = {} + modprobe_config = {} + image_map_config = {} + scratch_map_config = {} + xfstests_config = {} + for role, properties in runs: + if properties is None: + properties = {} + + test_image = properties.get('test_image', 'test_image.{role}'.format(role=role)) + test_size = properties.get('test_size', 10000) # 10G + test_fmt = properties.get('test_format', 1) + scratch_image = properties.get('scratch_image', 'scratch_image.{role}'.format(role=role)) + scratch_size = properties.get('scratch_size', 10000) # 10G + scratch_fmt = properties.get('scratch_format', 1) + + images_config[role] = dict( + image_name=test_image, + image_size=test_size, + image_format=test_fmt, + ) + + scratch_config[role] = dict( + image_name=scratch_image, + image_size=scratch_size, + image_format=scratch_fmt, + ) + + xfstests_branch = properties.get('xfstests_branch', 'master') + xfstests_url = properties.get('xfstests_url', 'https://raw.github.com/ceph/ceph/{branch}/qa'.format(branch=xfstests_branch)) + + xfstests_config[role] = dict( + count=properties.get('count', 1), + test_dev='/dev/rbd/rbd/{image}'.format(image=test_image), + scratch_dev='/dev/rbd/rbd/{image}'.format(image=scratch_image), + fs_type=properties.get('fs_type', 'xfs'), + randomize=properties.get('randomize', False), + tests=properties.get('tests'), + exclude=properties.get('exclude', []), + xfstests_url=xfstests_url, + ) + + log.info('Setting up xfstests using RBD images:') + log.info(' test ({size} MB): {image}'.format(size=test_size, + image=test_image)) + log.info(' scratch ({size} MB): {image}'.format(size=scratch_size, + image=scratch_image)) + modprobe_config[role] = None + image_map_config[role] = {'image_name': test_image} + scratch_map_config[role] = {'image_name': scratch_image} + + with contextutil.nested( + lambda: create_image(ctx=ctx, config=images_config), + lambda: create_image(ctx=ctx, config=scratch_config), + lambda: modprobe(ctx=ctx, config=modprobe_config), + lambda: dev_create(ctx=ctx, config=image_map_config), + lambda: dev_create(ctx=ctx, config=scratch_map_config), + lambda: run_xfstests(ctx=ctx, config=xfstests_config), + ): + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Create and mount an rbd image. + + For example, you can specify which clients to run on:: + + tasks: + - ceph: + - rbd: [client.0, client.1] + + There are a few image options:: + + tasks: + - ceph: + - rbd: + client.0: # uses defaults + client.1: + image_name: foo + image_size: 2048 + image_format: 2 + fs_type: xfs + + To use default options on all clients:: + + tasks: + - ceph: + - rbd: + all: + + To create 20GiB images and format them with xfs on all clients:: + + tasks: + - ceph: + - rbd: + all: + image_size: 20480 + fs_type: xfs + """ + if config is None: + config = { 'all': None } + norm_config = config + if isinstance(config, dict): + norm_config = teuthology.replace_all_with_clients(ctx.cluster, config) + if isinstance(norm_config, dict): + role_images = {} + for role, properties in norm_config.items(): + if properties is None: + properties = {} + role_images[role] = properties.get('image_name') + else: + role_images = norm_config + + log.debug('rbd config is: %s', norm_config) + + with contextutil.nested( + lambda: create_image(ctx=ctx, config=norm_config), + lambda: modprobe(ctx=ctx, config=norm_config), + lambda: dev_create(ctx=ctx, config=norm_config), + lambda: generic_mkfs(ctx=ctx, config=norm_config, + devname_rtn=rbd_devname_rtn), + lambda: generic_mount(ctx=ctx, config=role_images, + devname_rtn=rbd_devname_rtn), + ): + yield diff --git a/qa/tasks/rbd_fio.py b/qa/tasks/rbd_fio.py new file mode 100644 index 000000000..959d07d49 --- /dev/null +++ b/qa/tasks/rbd_fio.py @@ -0,0 +1,225 @@ +""" + Long running fio tests on rbd mapped devices for format/features provided in config + Many fio parameters can be configured so that this task can be used along with thrash/power-cut tests + and exercise IO on full disk for all format/features + - This test should not be run on VM due to heavy use of resource + +""" +import contextlib +import json +import logging +import os + +from teuthology.parallel import parallel +from teuthology import misc as teuthology +from tempfile import NamedTemporaryFile +from teuthology.orchestra import run +from teuthology.packaging import install_package, remove_package + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + client.0: + fio-io-size: 100g or 80% or 100m + fio-version: 2.2.9 + formats: [2] + features: [[layering],[striping],[layering,exclusive-lock,object-map]] + test-clone-io: 1 #remove this option to not run create rbd clone and not run io on clone + io-engine: "sync or rbd or any io-engine" + rw: randrw + client.1: + fio-io-size: 100g + fio-version: 2.2.9 + rw: read + image-size:20480 + +or + all: + fio-io-size: 400g + rw: randrw + formats: [2] + features: [[layering],[striping]] + io-engine: libaio + + Create rbd image + device and exercise IO for format/features provided in config file + Config can be per client or one config can be used for all clients, fio jobs are run in parallel for client provided + + """ + if config.get('all'): + client_config = config['all'] + clients = ctx.cluster.only(teuthology.is_type('client')) + rbd_test_dir = teuthology.get_testdir(ctx) + "/rbd_fio_test" + for remote,role in clients.remotes.items(): + if 'client_config' in locals(): + with parallel() as p: + p.spawn(run_fio, remote, client_config, rbd_test_dir) + else: + for client_config in config: + if client_config in role: + with parallel() as p: + p.spawn(run_fio, remote, config[client_config], rbd_test_dir) + + yield + + +def get_ioengine_package_name(ioengine, remote): + system_type = teuthology.get_system_type(remote) + if ioengine == 'rbd': + return 'librbd1-devel' if system_type == 'rpm' else 'librbd-dev' + elif ioengine == 'libaio': + return 'libaio-devel' if system_type == 'rpm' else 'libaio-dev' + else: + return None + + +def run_rbd_map(remote, image, iodepth): + iodepth = max(iodepth, 128) # RBD_QUEUE_DEPTH_DEFAULT + dev = remote.sh(['sudo', 'rbd', 'device', 'map', '-o', + 'queue_depth={}'.format(iodepth), image]).rstrip('\n') + remote.sudo_write_file( + '/sys/block/{}/queue/nr_requests'.format(os.path.basename(dev)), + str(iodepth)) + return dev + + +def run_fio(remote, config, rbd_test_dir): + """ + create fio config file with options based on above config + get the fio from github, generate binary, and use it to run on + the generated fio config file + """ + fio_config=NamedTemporaryFile(mode='w', prefix='fio_rbd_', dir='/tmp/', delete=False) + fio_config.write('[global]\n') + if config.get('io-engine'): + ioengine=config['io-engine'] + fio_config.write('ioengine={ioe}\n'.format(ioe=ioengine)) + else: + fio_config.write('ioengine=sync\n') + if config.get('bs'): + bs=config['bs'] + fio_config.write('bs={bs}\n'.format(bs=bs)) + else: + fio_config.write('bs=4k\n') + iodepth = config.get('io-depth', 2) + fio_config.write('iodepth={iod}\n'.format(iod=iodepth)) + if config.get('fio-io-size'): + size=config['fio-io-size'] + fio_config.write('size={size}\n'.format(size=size)) + else: + fio_config.write('size=100m\n') + + fio_config.write('time_based\n') + if config.get('runtime'): + runtime=config['runtime'] + fio_config.write('runtime={runtime}\n'.format(runtime=runtime)) + else: + fio_config.write('runtime=1800\n') + fio_config.write('allow_file_create=0\n') + image_size=10240 + if config.get('image_size'): + image_size=config['image_size'] + + formats=[1,2] + features=[['layering'],['striping'],['exclusive-lock','object-map']] + fio_version='3.32' + if config.get('formats'): + formats=config['formats'] + if config.get('features'): + features=config['features'] + if config.get('fio-version'): + fio_version=config['fio-version'] + + # handle package required for ioengine, if any + sn=remote.shortname + ioengine_pkg = get_ioengine_package_name(ioengine, remote) + if ioengine_pkg: + install_package(ioengine_pkg, remote) + + fio_config.write('norandommap\n') + if ioengine == 'rbd': + fio_config.write('clientname=admin\n') + fio_config.write('pool=rbd\n') + fio_config.write('invalidate=0\n') + elif ioengine == 'libaio': + fio_config.write('direct=1\n') + for frmt in formats: + for feature in features: + log.info("Creating rbd images on {sn}".format(sn=sn)) + feature_name = '-'.join(feature) + rbd_name = 'i{i}f{f}{sn}'.format(i=frmt,f=feature_name,sn=sn) + rbd_snap_name = 'i{i}f{f}{sn}@i{i}f{f}{sn}Snap'.format(i=frmt,f=feature_name,sn=sn) + rbd_clone_name = 'i{i}f{f}{sn}Clone'.format(i=frmt,f=feature_name,sn=sn) + create_args=['rbd', 'create', + '--size', '{size}'.format(size=image_size), + '--image', rbd_name, + '--image-format', '{f}'.format(f=frmt)] + map(lambda x: create_args.extend(['--image-feature', x]), feature) + if config.get('thick-provision'): + create_args.append('--thick-provision') + remote.run(args=create_args) + remote.run(args=['rbd', 'info', rbd_name]) + if ioengine != 'rbd': + rbd_dev = run_rbd_map(remote, rbd_name, iodepth) + if config.get('test-clone-io'): + log.info("Testing clones using fio") + remote.run(args=['rbd', 'snap', 'create', rbd_snap_name]) + remote.run(args=['rbd', 'snap', 'protect', rbd_snap_name]) + remote.run(args=['rbd', 'clone', rbd_snap_name, rbd_clone_name]) + rbd_clone_dev = run_rbd_map(remote, rbd_clone_name, iodepth) + fio_config.write('[{rbd_dev}]\n'.format(rbd_dev=rbd_dev)) + if config.get('rw'): + rw=config['rw'] + fio_config.write('rw={rw}\n'.format(rw=rw)) + else: + fio_config .write('rw=randrw\n') + fio_config.write('filename={rbd_dev}\n'.format(rbd_dev=rbd_dev)) + if config.get('test-clone-io'): + fio_config.write('[{rbd_clone_dev}]\n'.format(rbd_clone_dev=rbd_clone_dev)) + fio_config.write('rw={rw}\n'.format(rw=rw)) + fio_config.write('filename={rbd_clone_dev}\n'.format(rbd_clone_dev=rbd_clone_dev)) + else: + if config.get('test-clone-io'): + log.info("Testing clones using fio") + remote.run(args=['rbd', 'snap', 'create', rbd_snap_name]) + remote.run(args=['rbd', 'snap', 'protect', rbd_snap_name]) + remote.run(args=['rbd', 'clone', rbd_snap_name, rbd_clone_name]) + fio_config.write('[{img_name}]\n'.format(img_name=rbd_name)) + if config.get('rw'): + rw=config['rw'] + fio_config.write('rw={rw}\n'.format(rw=rw)) + else: + fio_config.write('rw=randrw\n') + fio_config.write('rbdname={img_name}\n'.format(img_name=rbd_name)) + if config.get('test-clone-io'): + fio_config.write('[{clone_img_name}]\n'.format(clone_img_name=rbd_clone_name)) + fio_config.write('rw={rw}\n'.format(rw=rw)) + fio_config.write('rbdname={clone_img_name}\n'.format(clone_img_name=rbd_clone_name)) + + + fio_config.close() + remote.put_file(fio_config.name,fio_config.name) + try: + log.info("Running rbd feature - fio test on {sn}".format(sn=sn)) + fio = "https://github.com/axboe/fio/archive/fio-" + fio_version + ".tar.gz" + remote.run(args=['mkdir', run.Raw(rbd_test_dir),]) + remote.run(args=['cd' , run.Raw(rbd_test_dir), + run.Raw(';'), 'wget', fio, run.Raw(';'), run.Raw('tar -xvf fio*tar.gz'), run.Raw(';'), + run.Raw('cd fio-fio*'), run.Raw(';'), './configure', run.Raw(';'), 'make']) + remote.run(args=['ceph', '-s']) + remote.run(args=[run.Raw('{tdir}/fio-fio-{v}/fio --showcmd {f}'.format(tdir=rbd_test_dir,v=fio_version,f=fio_config.name))]) + remote.run(args=['sudo', run.Raw('{tdir}/fio-fio-{v}/fio {f}'.format(tdir=rbd_test_dir,v=fio_version,f=fio_config.name))]) + remote.run(args=['ceph', '-s']) + finally: + out = remote.sh('rbd device list --format=json') + mapped_images = json.loads(out) + if mapped_images: + log.info("Unmapping rbd images on {sn}".format(sn=sn)) + for image in mapped_images: + remote.run(args=['sudo', 'rbd', 'device', 'unmap', + str(image['device'])]) + log.info("Cleaning up fio install") + remote.run(args=['rm','-rf', run.Raw(rbd_test_dir)]) + if ioengine_pkg: + remove_package(ioengine_pkg, remote) diff --git a/qa/tasks/rbd_fsx.py b/qa/tasks/rbd_fsx.py new file mode 100644 index 000000000..efea7208e --- /dev/null +++ b/qa/tasks/rbd_fsx.py @@ -0,0 +1,115 @@ +""" +Run fsx on an rbd image +""" +import contextlib +import logging + +from teuthology.exceptions import ConfigError +from teuthology.parallel import parallel +from teuthology import misc as teuthology +from tasks.ceph_manager import get_valgrind_args + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run fsx on an rbd image. + + Currently this requires running as client.admin + to create a pool. + + Specify which clients to run on as a list:: + + tasks: + ceph: + rbd_fsx: + clients: [client.0, client.1] + + You can optionally change some properties of fsx: + + tasks: + ceph: + rbd_fsx: + clients: <list of clients> + seed: <random seed number, or 0 to use the time> + ops: <number of operations to do> + size: <maximum image size in bytes> + valgrind: [--tool=<valgrind tool>] + """ + log.info('starting rbd_fsx...') + with parallel() as p: + for role in config['clients']: + p.spawn(_run_one_client, ctx, config, role) + yield + +def _run_one_client(ctx, config, role): + """Spawned task that runs the client""" + krbd = config.get('krbd', False) + nbd = config.get('nbd', False) + testdir = teuthology.get_testdir(ctx) + (remote,) = ctx.cluster.only(role).remotes.keys() + + args = [] + if krbd or nbd: + args.append('sudo') # rbd(-nbd) map/unmap need privileges + args.extend([ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir) + ]) + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('rbd_fsx', {})) + + if config.get('valgrind'): + args = get_valgrind_args( + testdir, + 'fsx_{id}'.format(id=role), + args, + config.get('valgrind') + ) + + cluster_name, type_, client_id = teuthology.split_role(role) + if type_ != 'client': + msg = 'client role ({0}) must be a client'.format(role) + raise ConfigError(msg) + + args.extend([ + 'ceph_test_librbd_fsx', + '--cluster', cluster_name, + '--id', client_id, + '-d', # debug output for all operations + '-W', '-R', # mmap doesn't work with rbd + '-p', str(config.get('progress_interval', 100)), # show progress + '-P', '{tdir}/archive'.format(tdir=testdir), + '-r', str(config.get('readbdy',1)), + '-w', str(config.get('writebdy',1)), + '-t', str(config.get('truncbdy',1)), + '-h', str(config.get('holebdy',1)), + '-l', str(config.get('size', 250000000)), + '-S', str(config.get('seed', 0)), + '-N', str(config.get('ops', 1000)), + ]) + if krbd: + args.append('-K') # -K enables krbd mode + if nbd: + args.append('-M') # -M enables nbd mode + if config.get('direct_io', False): + args.append('-Z') # -Z use direct IO + if not config.get('randomized_striping', True): + args.append('-U') # -U disables randomized striping + if not config.get('punch_holes', True): + args.append('-H') # -H disables discard ops + if config.get('deep_copy', False): + args.append('-g') # -g deep copy instead of clone + if config.get('journal_replay', False): + args.append('-j') # -j replay all IO events from journal + if config.get('keep_images', False): + args.append('-k') # -k keep images on success + args.extend([ + config.get('pool_name', 'pool_{pool}'.format(pool=role)), + 'image_{image}'.format(image=role), + ]) + + remote.run(args=args) diff --git a/qa/tasks/rbd_mirror.py b/qa/tasks/rbd_mirror.py new file mode 100644 index 000000000..5da252560 --- /dev/null +++ b/qa/tasks/rbd_mirror.py @@ -0,0 +1,120 @@ +""" +Task for running rbd mirroring daemons and configuring mirroring +""" + +import logging + +from teuthology.orchestra import run +from teuthology import misc +from teuthology.exceptions import ConfigError +from teuthology.task import Task +from tasks.ceph_manager import get_valgrind_args +from tasks.util import get_remote_for_role + +log = logging.getLogger(__name__) + + +class RBDMirror(Task): + """ + Run an rbd-mirror daemon to sync rbd images between clusters. + + This requires two clients (one from each cluster) on the same host + to connect with. The pool configuration should be adjusted by later + test scripts to include the remote client and cluster name. This task + just needs to know how to connect to the local cluster. + + For example: + + roles: + - [primary.mon.a, primary.osd.0, primary.osd.1, primary.osd.2] + - [secondary.mon.a, secondary.osd.0, secondary.osd.1, secondary.osd.2] + - [primary.client.mirror, secondary.client.mirror] + tasks: + - ceph: + cluster: primary + - ceph: + cluster: secondary + - rbd-mirror: + client: primary.client.mirror + + To mirror back to the primary cluster as well, add another + rbd_mirror instance: + + - rbd-mirror: + client: secondary.client.mirror + + Possible options for this task are: + + client: role - ceph client to connect as + valgrind: [--tool=<valgrind tool>] - none by default + coverage: bool - whether this run may be collecting coverage data + thrash: bool - whether this run may be thrashed + """ + def __init__(self, ctx, config): + super(RBDMirror, self).__init__(ctx, config) + self.log = log + + def setup(self): + super(RBDMirror, self).setup() + try: + self.client = self.config['client'] + except KeyError: + raise ConfigError('rbd-mirror requires a client to connect with') + + self.cluster_name, type_, self.client_id = misc.split_role(self.client) + + if type_ != 'client': + msg = 'client role ({0}) must be a client'.format(self.client) + raise ConfigError(msg) + + self.remote = get_remote_for_role(self.ctx, self.client) + + def begin(self): + super(RBDMirror, self).begin() + testdir = misc.get_testdir(self.ctx) + daemon_signal = 'kill' + if 'coverage' in self.config or 'valgrind' in self.config or \ + self.config.get('thrash', False): + daemon_signal = 'term' + + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'daemon-helper', + daemon_signal, + ] + + if 'valgrind' in self.config: + args = get_valgrind_args( + testdir, + 'rbd-mirror-{id}'.format(id=self.client), + args, + self.config.get('valgrind') + ) + + args.extend([ + 'rbd-mirror', '--foreground', + '--cluster', + self.cluster_name, + '--id', + self.client_id, + ]) + + self.ctx.daemons.add_daemon( + self.remote, 'rbd-mirror', self.client, + cluster=self.cluster_name, + args=args, + logger=self.log.getChild(self.client), + stdin=run.PIPE, + wait=False, + ) + + def end(self): + mirror_daemon = self.ctx.daemons.get_daemon('rbd-mirror', + self.client, + self.cluster_name) + mirror_daemon.stop() + super(RBDMirror, self).end() + +task = RBDMirror diff --git a/qa/tasks/rbd_mirror_thrash.py b/qa/tasks/rbd_mirror_thrash.py new file mode 100644 index 000000000..a42d19e70 --- /dev/null +++ b/qa/tasks/rbd_mirror_thrash.py @@ -0,0 +1,218 @@ +""" +Task for thrashing rbd-mirror daemons +""" + +import contextlib +import logging +import random +import signal +import socket +import time + +from gevent import sleep +from gevent.greenlet import Greenlet +from gevent.event import Event + +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra import run +from tasks.thrasher import Thrasher + +log = logging.getLogger(__name__) + + +class RBDMirrorThrasher(Thrasher, Greenlet): + """ + RBDMirrorThrasher:: + + The RBDMirrorThrasher thrashes rbd-mirror daemons during execution of other + tasks (workunits, etc). + + The config is optional. Many of the config parameters are a maximum value + to use when selecting a random value from a range. The config is a dict + containing some or all of: + + cluster: [default: ceph] cluster to thrash + + max_thrash: [default: 1] the maximum number of active rbd-mirror daemons per + cluster will be thrashed at any given time. + + min_thrash_delay: [default: 60] minimum number of seconds to delay before + thrashing again. + + max_thrash_delay: [default: 120] maximum number of seconds to delay before + thrashing again. + + max_revive_delay: [default: 10] maximum number of seconds to delay before + bringing back a thrashed rbd-mirror daemon. + + randomize: [default: true] enables randomization and use the max/min values + + seed: [no default] seed the random number generator + + Examples:: + + The following example disables randomization, and uses the max delay + values: + + tasks: + - ceph: + - rbd_mirror_thrash: + randomize: False + max_thrash_delay: 10 + """ + + def __init__(self, ctx, config, cluster, daemons): + super(RBDMirrorThrasher, self).__init__() + + self.ctx = ctx + self.config = config + self.cluster = cluster + self.daemons = daemons + + self.logger = log + self.name = 'thrasher.rbd_mirror.[{cluster}]'.format(cluster = cluster) + self.stopping = Event() + + self.randomize = bool(self.config.get('randomize', True)) + self.max_thrash = int(self.config.get('max_thrash', 1)) + self.min_thrash_delay = float(self.config.get('min_thrash_delay', 60.0)) + self.max_thrash_delay = float(self.config.get('max_thrash_delay', 120.0)) + self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0)) + + def _run(self): + try: + self.do_thrash() + except Exception as e: + # See _run exception comment for MDSThrasher + self.set_thrasher_exception(e) + self.logger.exception("exception:") + # Allow successful completion so gevent doesn't see an exception. + # The DaemonWatchdog will observe the error and tear down the test. + + def log(self, x): + """Write data to logger assigned to this RBDMirrorThrasher""" + self.logger.info(x) + + def stop(self): + self.stopping.set() + + def do_thrash(self): + """ + Perform the random thrashing action + """ + + self.log('starting thrash for cluster {cluster}'.format(cluster=self.cluster)) + stats = { + "kill": 0, + } + + while not self.stopping.is_set(): + delay = self.max_thrash_delay + if self.randomize: + delay = random.randrange(self.min_thrash_delay, self.max_thrash_delay) + + if delay > 0.0: + self.log('waiting for {delay} secs before thrashing'.format(delay=delay)) + self.stopping.wait(delay) + if self.stopping.is_set(): + continue + + killed_daemons = [] + + weight = 1.0 / len(self.daemons) + count = 0 + for daemon in self.daemons: + skip = random.uniform(0.0, 1.0) + if weight <= skip: + self.log('skipping daemon {label} with skip ({skip}) > weight ({weight})'.format( + label=daemon.id_, skip=skip, weight=weight)) + continue + + self.log('kill {label}'.format(label=daemon.id_)) + try: + daemon.signal(signal.SIGTERM) + except socket.error: + pass + killed_daemons.append(daemon) + stats['kill'] += 1 + + # if we've reached max_thrash, we're done + count += 1 + if count >= self.max_thrash: + break + + if killed_daemons: + # wait for a while before restarting + delay = self.max_revive_delay + if self.randomize: + delay = random.randrange(0.0, self.max_revive_delay) + + self.log('waiting for {delay} secs before reviving daemons'.format(delay=delay)) + sleep(delay) + + for daemon in killed_daemons: + self.log('waiting for {label}'.format(label=daemon.id_)) + try: + run.wait([daemon.proc], timeout=600) + except CommandFailedError: + pass + except: + self.log('Failed to stop {label}'.format(label=daemon.id_)) + + try: + # try to capture a core dump + daemon.signal(signal.SIGABRT) + except socket.error: + pass + raise + finally: + daemon.reset() + + for daemon in killed_daemons: + self.log('reviving {label}'.format(label=daemon.id_)) + daemon.start() + + for stat in stats: + self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat])) + +@contextlib.contextmanager +def task(ctx, config): + """ + Stress test the rbd-mirror by thrashing while another task/workunit + is running. + + Please refer to RBDMirrorThrasher class for further information on the + available options. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'rbd_mirror_thrash task only accepts a dict for configuration' + + cluster = config.get('cluster', 'ceph') + daemons = list(ctx.daemons.iter_daemons_of_role('rbd-mirror', cluster)) + assert len(daemons) > 0, \ + 'rbd_mirror_thrash task requires at least 1 rbd-mirror daemon' + + # choose random seed + if 'seed' in config: + seed = int(config['seed']) + else: + seed = int(time.time()) + log.info('rbd_mirror_thrash using random seed: {seed}'.format(seed=seed)) + random.seed(seed) + + thrasher = RBDMirrorThrasher(ctx, config, cluster, daemons) + thrasher.start() + ctx.ceph[cluster].thrashers.append(thrasher) + + try: + log.debug('Yielding') + yield + finally: + log.info('joining rbd_mirror_thrash') + thrasher.stop() + if thrasher.exception is not None: + raise RuntimeError('error during thrashing') + thrasher.join() + log.info('done joining') diff --git a/qa/tasks/rbd_pwl_cache_recovery.py b/qa/tasks/rbd_pwl_cache_recovery.py new file mode 100644 index 000000000..e13c1f664 --- /dev/null +++ b/qa/tasks/rbd_pwl_cache_recovery.py @@ -0,0 +1,96 @@ +""" +persistent write log cache recovery task +""" +import contextlib +import logging +import random +import json +import time + +from teuthology import misc as teuthology +from teuthology import contextutil + +DEFAULT_NUM_ITERATIONS = 20 +IO_PATTERNS = ("full-seq", "rand") +IO_SIZES = ('4K', '16K', '128K', '1024K') + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def thrashes_rbd_bench_on_persistent_cache(ctx, config): + """ + thrashes rbd bench on persistent write log cache. + It can test recovery feature of persistent write log cache. + """ + log.info("thrashes rbd bench on persistent write log cache") + + client, client_config = list(config.items())[0] + (remote,) = ctx.cluster.only(client).remotes.keys() + client_config = client_config if client_config is not None else dict() + image_name = client_config.get('image_name', 'testimage') + num_iterations = client_config.get('num_iterations', DEFAULT_NUM_ITERATIONS) + + for i in range(num_iterations): + log.info("start rbd bench") + # rbd bench could not specify the run time so set a large enough test size. + remote.run( + args=[ + 'rbd', 'bench', + '--io-type', 'write', + '--io-pattern', random.choice(IO_PATTERNS), + '--io-size', random.choice(IO_SIZES), + '--io-total', '100G', + image_name, + ], + wait=False, + ) + # Wait a few seconds for the rbd bench process to run + # and complete the pwl cache initialization + time.sleep(10) + log.info("dump cache state when rbd bench running.") + remote.sh(['rbd', 'status', image_name, '--format=json']) + log.info("sleep...") + time.sleep(random.randint(10, 60)) + log.info("rbd bench crash.") + remote.run( + args=[ + 'killall', '-9', 'rbd', + ], + check_status=False, + ) + log.info("wait for watch timeout.") + time.sleep(40) + log.info("check cache state after crash.") + out = remote.sh(['rbd', 'status', image_name, '--format=json']) + rbd_status = json.loads(out) + assert len(rbd_status['watchers']) == 0 + assert rbd_status['persistent_cache']['present'] == True + assert rbd_status['persistent_cache']['empty'] == False + assert rbd_status['persistent_cache']['clean'] == False + log.info("check dirty cache file.") + remote.run( + args=[ + 'test', '-e', rbd_status['persistent_cache']['path'], + ] + ) + try: + yield + finally: + log.info("cleanup") + +@contextlib.contextmanager +def task(ctx, config): + """ + This is task for testing persistent write log cache recovery. + """ + assert isinstance(config, dict), \ + "task rbd_pwl_cache_recovery only supports a dictionary for configuration" + + managers = [] + config = teuthology.replace_all_with_clients(ctx.cluster, config) + managers.append( + lambda: thrashes_rbd_bench_on_persistent_cache(ctx=ctx, config=config) + ) + + with contextutil.nested(*managers): + yield diff --git a/qa/tasks/rebuild_mondb.py b/qa/tasks/rebuild_mondb.py new file mode 100644 index 000000000..bbf6383b1 --- /dev/null +++ b/qa/tasks/rebuild_mondb.py @@ -0,0 +1,228 @@ +""" +Test if we can recover the leveldb from OSD after where all leveldbs are +corrupted +""" + +import logging +import os.path +import shutil +import tempfile + +from tasks import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + + +def _push_directory(path, remote, remote_dir): + """ + local_temp_path=`mktemp` + tar czf $local_temp_path $path + ssh remote mkdir -p remote_dir + remote_temp_path=`mktemp` + scp $local_temp_path $remote_temp_path + rm $local_temp_path + tar xzf $remote_temp_path -C $remote_dir + ssh remote:$remote_temp_path + """ + fd, local_temp_path = tempfile.mkstemp(suffix='.tgz', + prefix='rebuild_mondb-') + os.close(fd) + cmd = ' '.join(['tar', 'cz', + '-f', local_temp_path, + '-C', path, + '--', '.']) + teuthology.sh(cmd) + _, fname = os.path.split(local_temp_path) + fd, remote_temp_path = tempfile.mkstemp(suffix='.tgz', + prefix='rebuild_mondb-') + os.close(fd) + remote.put_file(local_temp_path, remote_temp_path) + os.remove(local_temp_path) + remote.run(args=['sudo', + 'tar', 'xz', + '-C', remote_dir, + '-f', remote_temp_path]) + remote.run(args=['sudo', 'rm', '-fr', remote_temp_path]) + + +def _nuke_mons(manager, mons, mon_id): + assert mons + is_mon = teuthology.is_type('mon') + for remote, roles in mons.remotes.items(): + for role in roles: + if not is_mon(role): + continue + cluster, _, m = teuthology.split_role(role) + log.info('killing {cluster}:mon.{mon}'.format( + cluster=cluster, + mon=m)) + manager.kill_mon(m) + mon_data = os.path.join('/var/lib/ceph/mon/', + '{0}-{1}'.format(cluster, m)) + if m == mon_id: + # so we will only need to recreate the store.db for the + # first mon, would be easier than mkfs on it then replace + # the its store.db with the recovered one + store_dir = os.path.join(mon_data, 'store.db') + remote.run(args=['sudo', 'rm', '-r', store_dir]) + # we need to remove the external_log_to file too, since it + # references a version number inside store.db + remote.run(args=['sudo', 'rm', '-r', os.path.join(mon_data, + 'external_log_to')]) + else: + remote.run(args=['sudo', 'rm', '-r', mon_data]) + + +def _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path): + local_mstore = tempfile.mkdtemp() + + # collect the maps from all OSDs + is_osd = teuthology.is_type('osd') + osds = ctx.cluster.only(is_osd) + assert osds + for osd, roles in osds.remotes.items(): + for role in roles: + if not is_osd(role): + continue + cluster, _, osd_id = teuthology.split_role(role) + assert cluster_name == cluster + log.info('collecting maps from {cluster}:osd.{osd}'.format( + cluster=cluster, + osd=osd_id)) + # push leveldb to OSD + osd_mstore = os.path.join(teuthology.get_testdir(ctx), 'mon-store') + osd.run(args=['sudo', 'mkdir', '-m', 'o+x', '-p', osd_mstore]) + + _push_directory(local_mstore, osd, osd_mstore) + log.info('rm -rf {0}'.format(local_mstore)) + shutil.rmtree(local_mstore) + # update leveldb with OSD data + options = '--no-mon-config --op update-mon-db --mon-store-path {0}' + log.info('cot {0}'.format(osd_mstore)) + manager.objectstore_tool(pool=None, + options=options.format(osd_mstore), + args='', + osd=osd_id, + do_revive=False) + # pull the updated mon db + log.info('pull dir {0} -> {1}'.format(osd_mstore, local_mstore)) + local_mstore = tempfile.mkdtemp() + teuthology.pull_directory(osd, osd_mstore, local_mstore) + log.info('rm -rf osd:{0}'.format(osd_mstore)) + osd.run(args=['sudo', 'rm', '-fr', osd_mstore]) + + # recover the first_mon with re-built mon db + # pull from recovered leveldb from client + mon_store_dir = os.path.join('/var/lib/ceph/mon', + '{0}-{1}'.format(cluster_name, mon_id)) + _push_directory(local_mstore, mon, mon_store_dir) + mon.run(args=['sudo', 'chown', '-R', 'ceph:ceph', mon_store_dir]) + shutil.rmtree(local_mstore) + + # fill up the caps in the keyring file + mon.run(args=['sudo', + 'ceph-authtool', keyring_path, + '-n', 'mon.', + '--cap', 'mon', 'allow *']) + mon.run(args=['sudo', + 'ceph-authtool', keyring_path, + '-n', 'client.admin', + '--cap', 'mon', 'allow *', + '--cap', 'osd', 'allow *', + '--cap', 'mds', 'allow *', + '--cap', 'mgr', 'allow *']) + mon.run(args=['sudo', '-u', 'ceph', + 'CEPH_ARGS=--no-mon-config', + 'ceph-monstore-tool', mon_store_dir, + 'rebuild', '--', + '--keyring', keyring_path, + '--monmap', '/tmp/monmap', + ]) + + +def _revive_mons(manager, mons, recovered, keyring_path): + # revive monitors + # the initial monmap is in the ceph.conf, so we are good. + n_mons = 0 + is_mon = teuthology.is_type('mon') + for remote, roles in mons.remotes.items(): + for role in roles: + if not is_mon(role): + continue + cluster, _, m = teuthology.split_role(role) + if recovered != m: + log.info('running mkfs on {cluster}:mon.{mon}'.format( + cluster=cluster, + mon=m)) + remote.run( + args=[ + 'sudo', + 'ceph-mon', + '--cluster', cluster, + '--mkfs', + '-i', m, + '--keyring', keyring_path, + '--monmap', '/tmp/monmap']) + log.info('reviving mon.{0}'.format(m)) + manager.revive_mon(m) + n_mons += 1 + manager.wait_for_mon_quorum_size(n_mons, timeout=30) + + +def _revive_mgrs(ctx, manager): + is_mgr = teuthology.is_type('mgr') + mgrs = ctx.cluster.only(is_mgr) + for _, roles in mgrs.remotes.items(): + for role in roles: + if not is_mgr(role): + continue + _, _, mgr_id = teuthology.split_role(role) + log.info('reviving mgr.{0}'.format(mgr_id)) + manager.revive_mgr(mgr_id) + + +def _revive_osds(ctx, manager): + is_osd = teuthology.is_type('osd') + osds = ctx.cluster.only(is_osd) + for _, roles in osds.remotes.items(): + for role in roles: + if not is_osd(role): + continue + _, _, osd_id = teuthology.split_role(role) + log.info('reviving osd.{0}'.format(osd_id)) + manager.revive_osd(osd_id) + + +def task(ctx, config): + """ + Test monitor recovery from OSD + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'task only accepts a dict for configuration' + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + # stash a monmap for later + mon.run(args=['ceph', 'mon', 'getmap', '-o', '/tmp/monmap']) + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager')) + + mons = ctx.cluster.only(teuthology.is_type('mon')) + # note down the first cluster_name and mon_id + # we will recover it later on + cluster_name, _, mon_id = teuthology.split_role(first_mon) + _nuke_mons(manager, mons, mon_id) + default_keyring = '/etc/ceph/{cluster}.keyring'.format( + cluster=cluster_name) + keyring_path = config.get('keyring_path', default_keyring) + _rebuild_db(ctx, manager, cluster_name, mon, mon_id, keyring_path) + _revive_mons(manager, mons, mon_id, keyring_path) + _revive_mgrs(ctx, manager) + _revive_osds(ctx, manager) diff --git a/qa/tasks/reg11184.py b/qa/tasks/reg11184.py new file mode 100644 index 000000000..86cfbf39a --- /dev/null +++ b/qa/tasks/reg11184.py @@ -0,0 +1,242 @@ +""" +Special regression test for tracker #11184 + +Synopsis: osd/SnapMapper.cc: 282: FAILED assert(check(oid)) + +This is accomplished by moving a pg that wasn't part of split and still include +divergent priors. +""" +import logging +import time + +from teuthology.exceptions import CommandFailedError +from teuthology.orchestra import run +from teuthology import misc as teuthology +from tasks.util.rados import rados +import os + + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Test handling of divergent entries during export / import + to regression test tracker #11184 + + overrides: + ceph: + conf: + osd: + debug osd: 5 + + Requires 3 osds on a single test node. + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'divergent_priors task only accepts a dict for configuration' + + manager = ctx.managers['ceph'] + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + osds = [0, 1, 2] + manager.flush_pg_stats(osds) + manager.raw_cluster_cmd('osd', 'set', 'noout') + manager.raw_cluster_cmd('osd', 'set', 'noin') + manager.raw_cluster_cmd('osd', 'set', 'nodown') + manager.wait_for_clean() + + # something that is always there + dummyfile = '/etc/fstab' + dummyfile2 = '/etc/resolv.conf' + testdir = teuthology.get_testdir(ctx) + + # create 1 pg pool + log.info('creating foo') + manager.raw_cluster_cmd('osd', 'pool', 'create', 'foo', '1') + manager.raw_cluster_cmd( + 'osd', 'pool', 'application', 'enable', + 'foo', 'rados', run.Raw('||'), 'true') + + # Remove extra pool to simlify log output + manager.raw_cluster_cmd('osd', 'pool', 'delete', 'rbd', 'rbd', '--yes-i-really-really-mean-it') + + for i in osds: + manager.set_config(i, osd_min_pg_log_entries=10) + manager.set_config(i, osd_max_pg_log_entries=10) + manager.set_config(i, osd_pg_log_trim_min=5) + + # determine primary + divergent = manager.get_pg_primary('foo', 0) + log.info("primary and soon to be divergent is %d", divergent) + non_divergent = list(osds) + non_divergent.remove(divergent) + + log.info('writing initial objects') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + # write 100 objects + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) + + manager.wait_for_clean() + + # blackhole non_divergent + log.info("blackholing osds %s", str(non_divergent)) + for i in non_divergent: + manager.set_config(i, objectstore_blackhole=1) + + DIVERGENT_WRITE = 5 + DIVERGENT_REMOVE = 5 + # Write some soon to be divergent + log.info('writing divergent objects') + for i in range(DIVERGENT_WRITE): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, + dummyfile2], wait=False) + # Remove some soon to be divergent + log.info('remove divergent objects') + for i in range(DIVERGENT_REMOVE): + rados(ctx, mon, ['-p', 'foo', 'rm', + 'existing_%d' % (i + DIVERGENT_WRITE)], wait=False) + time.sleep(10) + mon.run( + args=['killall', '-9', 'rados'], + wait=True, + check_status=False) + + # kill all the osds but leave divergent in + log.info('killing all the osds') + for i in osds: + manager.kill_osd(i) + for i in osds: + manager.mark_down_osd(i) + for i in non_divergent: + manager.mark_out_osd(i) + + # bring up non-divergent + log.info("bringing up non_divergent %s", str(non_divergent)) + for i in non_divergent: + manager.revive_osd(i) + for i in non_divergent: + manager.mark_in_osd(i) + + # write 1 non-divergent object (ensure that old divergent one is divergent) + objname = "existing_%d" % (DIVERGENT_WRITE + DIVERGENT_REMOVE) + log.info('writing non-divergent object ' + objname) + rados(ctx, mon, ['-p', 'foo', 'put', objname, dummyfile2]) + + manager.wait_for_recovery() + + # ensure no recovery of up osds first + log.info('delay recovery') + for i in non_divergent: + manager.wait_run_admin_socket( + 'osd', i, ['set_recovery_delay', '100000']) + + # bring in our divergent friend + log.info("revive divergent %d", divergent) + manager.raw_cluster_cmd('osd', 'set', 'noup') + manager.revive_osd(divergent) + + log.info('delay recovery divergent') + manager.wait_run_admin_socket( + 'osd', divergent, ['set_recovery_delay', '100000']) + + manager.raw_cluster_cmd('osd', 'unset', 'noup') + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + # At this point the divergent_priors should have been detected + + log.info("killing divergent %d", divergent) + manager.kill_osd(divergent) + + # Split pgs for pool foo + manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'pg_num', '2') + time.sleep(5) + + manager.raw_cluster_cmd('pg','dump') + + # Export a pg + (exp_remote,) = ctx.\ + cluster.only('osd.{o}'.format(o=divergent)).remotes.keys() + FSPATH = manager.get_filepath() + JPATH = os.path.join(FSPATH, "journal") + prefix = ("sudo adjust-ulimits ceph-objectstore-tool " + "--data-path {fpath} --journal-path {jpath} " + "--log-file=" + "/var/log/ceph/objectstore_tool.$$.log ". + format(fpath=FSPATH, jpath=JPATH)) + pid = os.getpid() + expfile = os.path.join(testdir, "exp.{pid}.out".format(pid=pid)) + cmd = ((prefix + "--op export-remove --pgid 2.0 --file {file}"). + format(id=divergent, file=expfile)) + try: + exp_remote.sh(cmd, wait=True) + except CommandFailedError as e: + assert e.exitstatus == 0 + + # Kill one of non-divergent OSDs + log.info('killing osd.%d' % non_divergent[0]) + manager.kill_osd(non_divergent[0]) + manager.mark_down_osd(non_divergent[0]) + # manager.mark_out_osd(non_divergent[0]) + + # An empty collection for pg 2.0 might need to be cleaned up + cmd = ((prefix + "--force --op remove --pgid 2.0"). + format(id=non_divergent[0])) + exp_remote.sh(cmd, wait=True, check_status=False) + + cmd = ((prefix + "--op import --file {file}"). + format(id=non_divergent[0], file=expfile)) + try: + exp_remote.sh(cmd, wait=True) + except CommandFailedError as e: + assert e.exitstatus == 0 + + # bring in our divergent friend and other node + log.info("revive divergent %d", divergent) + manager.revive_osd(divergent) + manager.mark_in_osd(divergent) + log.info("revive %d", non_divergent[0]) + manager.revive_osd(non_divergent[0]) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + log.info('delay recovery divergent') + manager.set_config(divergent, osd_recovery_delay_start=100000) + log.info('mark divergent in') + manager.mark_in_osd(divergent) + + log.info('wait for peering') + rados(ctx, mon, ['-p', 'foo', 'put', 'foo', dummyfile]) + + log.info("killing divergent %d", divergent) + manager.kill_osd(divergent) + log.info("reviving divergent %d", divergent) + manager.revive_osd(divergent) + time.sleep(3) + + log.info('allowing recovery') + # Set osd_recovery_delay_start back to 0 and kick the queue + for i in osds: + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'debug', + 'kick_recovery_wq', ' 0') + + log.info('reading divergent objects') + for i in range(DIVERGENT_WRITE + DIVERGENT_REMOVE): + exit_status = rados(ctx, mon, ['-p', 'foo', 'get', 'existing_%d' % i, + '/tmp/existing']) + assert exit_status == 0 + + (remote,) = ctx.\ + cluster.only('osd.{o}'.format(o=divergent)).remotes.keys() + cmd = 'rm {file}'.format(file=expfile) + remote.run(args=cmd, wait=True) + log.info("success") diff --git a/qa/tasks/rep_lost_unfound_delete.py b/qa/tasks/rep_lost_unfound_delete.py new file mode 100644 index 000000000..8e99ade27 --- /dev/null +++ b/qa/tasks/rep_lost_unfound_delete.py @@ -0,0 +1,179 @@ +""" +Lost_unfound +""" +import logging +import time + +from tasks import ceph_manager +from tasks.util.rados import rados +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling of lost objects. + + A pretty rigid cluster is brought up and tested by this task + """ + POOL = 'unfounddel_pool' + if config is None: + config = {} + assert isinstance(config, dict), \ + 'lost_unfound task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + manager.flush_pg_stats([0, 1, 2]) + manager.wait_for_clean() + + manager.create_pool(POOL) + + # something that is always there + dummyfile = '/etc/fstab' + + # take an osd out until the very end + manager.kill_osd(2) + manager.mark_down_osd(2) + manager.mark_out_osd(2) + + # kludge to make sure they get a map + rados(ctx, mon, ['-p', POOL, 'put', 'dummy', dummyfile]) + + manager.flush_pg_stats([0, 1]) + manager.wait_for_recovery() + + # create old objects + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'rm', 'existed_%d' % f]) + + # delay recovery, and make the pg log very long (to prevent backfill) + manager.raw_cluster_cmd( + 'tell', 'osd.1', + 'injectargs', + '--osd-recovery-delay-start 1000 --osd-min-pg-log-entries 100000000' + ) + + manager.kill_osd(0) + manager.mark_down_osd(0) + + for f in range(1, 10): + rados(ctx, mon, ['-p', POOL, 'put', 'new_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existed_%d' % f, dummyfile]) + rados(ctx, mon, ['-p', POOL, 'put', 'existing_%d' % f, dummyfile]) + + # bring osd.0 back up, let it peer, but don't replicate the new + # objects... + log.info('osd.0 command_args is %s' % 'foo') + log.info(ctx.daemons.get_daemon('osd', 0).command_args) + ctx.daemons.get_daemon('osd', 0).command_kwargs['args'].extend([ + '--osd-recovery-delay-start', '1000' + ]) + manager.revive_osd(0) + manager.mark_in_osd(0) + manager.wait_till_osd_is_up(0) + + manager.flush_pg_stats([0, 1]) + manager.wait_till_active() + + # take out osd.1 and the only copy of those objects. + manager.kill_osd(1) + manager.mark_down_osd(1) + manager.mark_out_osd(1) + manager.raw_cluster_cmd('osd', 'lost', '1', '--yes-i-really-mean-it') + + # bring up osd.2 so that things would otherwise, in theory, recovery fully + manager.revive_osd(2) + manager.mark_in_osd(2) + manager.wait_till_osd_is_up(2) + + manager.flush_pg_stats([0, 2]) + manager.wait_till_active() + manager.flush_pg_stats([0, 2]) + + # verify that there are unfound objects + unfound = manager.get_num_unfound_objects() + log.info("there are %d unfound objects" % unfound) + assert unfound + + testdir = teuthology.get_testdir(ctx) + procs = [] + if config.get('parallel_bench', True): + procs.append(mon.run( + args=[ + "/bin/sh", "-c", + " ".join(['adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage', + 'rados', + '--no-log-to-stderr', + '--name', 'client.admin', + '-b', str(4<<10), + '-p' , POOL, + '-t', '20', + 'bench', '240', 'write', + ]).format(tdir=testdir), + ], + logger=log.getChild('radosbench.{id}'.format(id='client.admin')), + stdin=run.PIPE, + wait=False + )) + time.sleep(10) + + # mark stuff lost + pgs = manager.get_pg_stats() + for pg in pgs: + if pg['stat_sum']['num_objects_unfound'] > 0: + primary = 'osd.%d' % pg['acting'][0] + + # verify that i can list them direct from the osd + log.info('listing missing/lost in %s state %s', pg['pgid'], + pg['state']); + m = manager.list_pg_unfound(pg['pgid']) + #log.info('%s' % m) + assert m['num_unfound'] == pg['stat_sum']['num_objects_unfound'] + num_unfound=0 + for o in m['objects']: + if len(o['locations']) == 0: + num_unfound += 1 + assert m['num_unfound'] == num_unfound + + log.info("reverting unfound in %s on %s", pg['pgid'], primary) + manager.raw_cluster_cmd('pg', pg['pgid'], + 'mark_unfound_lost', 'delete') + else: + log.info("no unfound in %s", pg['pgid']) + + manager.raw_cluster_cmd('tell', 'osd.0', 'debug', 'kick_recovery_wq', '5') + manager.raw_cluster_cmd('tell', 'osd.2', 'debug', 'kick_recovery_wq', '5') + manager.flush_pg_stats([0, 2]) + manager.wait_for_recovery() + + # verify result + for f in range(1, 10): + err = rados(ctx, mon, ['-p', POOL, 'get', 'new_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existed_%d' % f, '-']) + assert err + err = rados(ctx, mon, ['-p', POOL, 'get', 'existing_%d' % f, '-']) + assert err + + # see if osd.1 can cope + manager.mark_in_osd(1) + manager.revive_osd(1) + manager.wait_till_osd_is_up(1) + manager.wait_for_clean() + run.wait(procs) + manager.wait_for_clean() + diff --git a/qa/tasks/repair_test.py b/qa/tasks/repair_test.py new file mode 100644 index 000000000..cfd6ef791 --- /dev/null +++ b/qa/tasks/repair_test.py @@ -0,0 +1,303 @@ +""" +Test pool repairing after objects are damaged. +""" +import logging +import time + +log = logging.getLogger(__name__) + + +def choose_primary(manager, pool, num): + """ + Return primary to test on. + """ + log.info("Choosing primary") + return manager.get_pg_primary(pool, num) + + +def choose_replica(manager, pool, num): + """ + Return replica to test on. + """ + log.info("Choosing replica") + return manager.get_pg_replica(pool, num) + + +def trunc(manager, osd, pool, obj): + """ + truncate an object + """ + log.info("truncating object") + return manager.osd_admin_socket( + osd, + ['truncobj', pool, obj, '1']) + + +def dataerr(manager, osd, pool, obj): + """ + cause an error in the data + """ + log.info("injecting data err on object") + return manager.osd_admin_socket( + osd, + ['injectdataerr', pool, obj]) + + +def mdataerr(manager, osd, pool, obj): + """ + cause an error in the mdata + """ + log.info("injecting mdata err on object") + return manager.osd_admin_socket( + osd, + ['injectmdataerr', pool, obj]) + + +def omaperr(manager, osd, pool, obj): + """ + Cause an omap error. + """ + log.info("injecting omap err on object") + return manager.osd_admin_socket(osd, ['setomapval', pool, obj, + 'badkey', 'badval']) + + +def repair_test_1(manager, corrupter, chooser, scrub_type): + """ + Creates an object in the pool, corrupts it, + scrubs it, and verifies that the pool is inconsistent. It then repairs + the pool, rescrubs it, and verifies that the pool is consistent + + :param corrupter: error generating function (truncate, data-error, or + meta-data error, for example). + :param chooser: osd type chooser (primary or replica) + :param scrub_type: regular scrub or deep-scrub + """ + pool = "repair_pool_1" + manager.wait_for_clean() + with manager.pool(pool, 1): + + log.info("starting repair test type 1") + victim_osd = chooser(manager, pool, 0) + + # create object + log.info("doing put") + manager.do_put(pool, 'repair_test_obj', '/etc/hosts') + + # corrupt object + log.info("corrupting object") + corrupter(manager, victim_osd, pool, 'repair_test_obj') + + # verify inconsistent + log.info("scrubbing") + manager.do_pg_scrub(pool, 0, scrub_type) + + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s) + + # repair + log.info("repairing") + manager.do_pg_scrub(pool, 0, "repair") + + log.info("re-scrubbing") + manager.do_pg_scrub(pool, 0, scrub_type) + + # verify consistent + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s) + log.info("done") + + +def repair_test_2(ctx, manager, config, chooser): + """ + First creates a set of objects and + sets the omap value. It then corrupts an object, does both a scrub + and a deep-scrub, and then corrupts more objects. After that, it + repairs the pool and makes sure that the pool is consistent some + time after a deep-scrub. + + :param chooser: primary or replica selection routine. + """ + pool = "repair_pool_2" + manager.wait_for_clean() + with manager.pool(pool, 1): + log.info("starting repair test type 2") + victim_osd = chooser(manager, pool, 0) + + # create object + log.info("doing put and setomapval") + manager.do_put(pool, 'file1', '/etc/hosts') + manager.do_rados(['setomapval', 'file1', 'key', 'val'], pool=pool) + manager.do_put(pool, 'file2', '/etc/hosts') + manager.do_put(pool, 'file3', '/etc/hosts') + manager.do_put(pool, 'file4', '/etc/hosts') + manager.do_put(pool, 'file5', '/etc/hosts') + manager.do_rados(['setomapval', 'file5', 'key', 'val'], pool=pool) + manager.do_put(pool, 'file6', '/etc/hosts') + + # corrupt object + log.info("corrupting object") + omaperr(manager, victim_osd, pool, 'file1') + + # verify inconsistent + log.info("scrubbing") + manager.do_pg_scrub(pool, 0, 'deep-scrub') + + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s) + + # Regression test for bug #4778, should still + # be inconsistent after scrub + manager.do_pg_scrub(pool, 0, 'scrub') + + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s) + + # Additional corruptions including 2 types for file1 + log.info("corrupting more objects") + dataerr(manager, victim_osd, pool, 'file1') + mdataerr(manager, victim_osd, pool, 'file2') + trunc(manager, victim_osd, pool, 'file3') + omaperr(manager, victim_osd, pool, 'file6') + + # see still inconsistent + log.info("scrubbing") + manager.do_pg_scrub(pool, 0, 'deep-scrub') + + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s) + + # repair + log.info("repairing") + manager.do_pg_scrub(pool, 0, "repair") + + # Let repair clear inconsistent flag + time.sleep(10) + + # verify consistent + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s) + + # In the future repair might determine state of + # inconsistency itself, verify with a deep-scrub + log.info("scrubbing") + manager.do_pg_scrub(pool, 0, 'deep-scrub') + + # verify consistent + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s) + + log.info("done") + + +def hinfoerr(manager, victim, pool, obj): + """ + cause an error in the hinfo_key + """ + log.info("remove the hinfo_key") + manager.objectstore_tool(pool, + options='', + args='rm-attr hinfo_key', + object_name=obj, + osd=victim) + + +def repair_test_erasure_code(manager, corrupter, victim, scrub_type): + """ + Creates an object in the pool, corrupts it, + scrubs it, and verifies that the pool is inconsistent. It then repairs + the pool, rescrubs it, and verifies that the pool is consistent + + :param corrupter: error generating function. + :param chooser: osd type chooser (primary or replica) + :param scrub_type: regular scrub or deep-scrub + """ + pool = "repair_pool_3" + manager.wait_for_clean() + with manager.pool(pool_name=pool, pg_num=1, + erasure_code_profile_name='default'): + + log.info("starting repair test for erasure code") + + # create object + log.info("doing put") + manager.do_put(pool, 'repair_test_obj', '/etc/hosts') + + # corrupt object + log.info("corrupting object") + corrupter(manager, victim, pool, 'repair_test_obj') + + # verify inconsistent + log.info("scrubbing") + manager.do_pg_scrub(pool, 0, scrub_type) + + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' in s) + + # repair + log.info("repairing") + manager.do_pg_scrub(pool, 0, "repair") + + log.info("re-scrubbing") + manager.do_pg_scrub(pool, 0, scrub_type) + + # verify consistent + manager.with_pg_state(pool, 0, lambda s: 'inconsistent' not in s) + log.info("done") + + +def task(ctx, config): + """ + Test [deep] repair in several situations: + Repair [Truncate, Data EIO, MData EIO] on [Primary|Replica] + + The config should be as follows: + + Must include the log-ignorelist below + Must enable filestore_debug_inject_read_err config + + example: + + tasks: + - chef: + - install: + - ceph: + log-ignorelist: + - 'candidate had a stat error' + - 'candidate had a read error' + - 'deep-scrub 0 missing, 1 inconsistent objects' + - 'deep-scrub 0 missing, 4 inconsistent objects' + - 'deep-scrub [0-9]+ errors' + - '!= omap_digest' + - '!= data_digest' + - 'repair 0 missing, 1 inconsistent objects' + - 'repair 0 missing, 4 inconsistent objects' + - 'repair [0-9]+ errors, [0-9]+ fixed' + - 'scrub 0 missing, 1 inconsistent objects' + - 'scrub [0-9]+ errors' + - 'size 1 != size' + - 'attr name mismatch' + - 'Regular scrub request, deep-scrub details will be lost' + - 'candidate size [0-9]+ info size [0-9]+ mismatch' + conf: + osd: + filestore debug inject read err: true + - repair_test: + + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'repair_test task only accepts a dict for config' + + manager = ctx.managers['ceph'] + manager.wait_for_all_osds_up() + + manager.raw_cluster_cmd('osd', 'set', 'noscrub') + manager.raw_cluster_cmd('osd', 'set', 'nodeep-scrub') + + repair_test_1(manager, mdataerr, choose_primary, "scrub") + repair_test_1(manager, mdataerr, choose_replica, "scrub") + repair_test_1(manager, dataerr, choose_primary, "deep-scrub") + repair_test_1(manager, dataerr, choose_replica, "deep-scrub") + repair_test_1(manager, trunc, choose_primary, "scrub") + repair_test_1(manager, trunc, choose_replica, "scrub") + repair_test_2(ctx, manager, config, choose_primary) + repair_test_2(ctx, manager, config, choose_replica) + + repair_test_erasure_code(manager, hinfoerr, 'primary', "deep-scrub") + + manager.raw_cluster_cmd('osd', 'unset', 'noscrub') + manager.raw_cluster_cmd('osd', 'unset', 'nodeep-scrub') diff --git a/qa/tasks/resolve_stuck_peering.py b/qa/tasks/resolve_stuck_peering.py new file mode 100644 index 000000000..d140544c4 --- /dev/null +++ b/qa/tasks/resolve_stuck_peering.py @@ -0,0 +1,112 @@ +""" +Resolve stuck peering +""" +import logging +import time + +from teuthology import misc as teuthology +from tasks.util.rados import rados + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Test handling resolve stuck peering + + requires 3 osds on a single test node + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'Resolve stuck peering only accepts a dict for config' + + manager = ctx.managers['ceph'] + + while len(manager.get_osd_status()['up']) < 3: + time.sleep(10) + + + manager.wait_for_clean() + + dummyfile = '/etc/fstab' + dummyfile1 = '/etc/resolv.conf' + + #create 1 PG pool + pool='foo' + log.info('creating pool foo') + manager.raw_cluster_cmd('osd', 'pool', 'create', '%s' % pool, '1') + + #set min_size of the pool to 1 + #so that we can continue with I/O + #when 2 osds are down + manager.set_pool_property(pool, "min_size", 1) + + osds = [0, 1, 2] + + primary = manager.get_pg_primary('foo', 0) + log.info("primary osd is %d", primary) + + others = list(osds) + others.remove(primary) + + log.info('writing initial objects') + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + #create few objects + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'existing_%d' % i, dummyfile]) + + manager.wait_for_clean() + + #kill other osds except primary + log.info('killing other osds except primary') + for i in others: + manager.kill_osd(i) + for i in others: + manager.mark_down_osd(i) + + + for i in range(100): + rados(ctx, mon, ['-p', 'foo', 'put', 'new_%d' % i, dummyfile1]) + + #kill primary osd + manager.kill_osd(primary) + manager.mark_down_osd(primary) + + #revive other 2 osds + for i in others: + manager.revive_osd(i) + + #make sure that pg is down + #Assuming pg number for single pg pool will start from 0 + pgnum=0 + pgstr = manager.get_pgid(pool, pgnum) + stats = manager.get_single_pg_stats(pgstr) + print(stats['state']) + + timeout=60 + start=time.time() + + while 'down' not in stats['state']: + assert time.time() - start < timeout, \ + 'failed to reach down state before timeout expired' + stats = manager.get_single_pg_stats(pgstr) + + #mark primary as lost + manager.raw_cluster_cmd('osd', 'lost', '%d' % primary,\ + '--yes-i-really-mean-it') + + + #expect the pg status to be active+undersized+degraded + #pg should recover and become active+clean within timeout + stats = manager.get_single_pg_stats(pgstr) + print(stats['state']) + + timeout=10 + start=time.time() + + while manager.get_num_down(): + assert time.time() - start < timeout, \ + 'failed to recover before timeout expired' + + manager.revive_osd(primary) diff --git a/qa/tasks/rgw.py b/qa/tasks/rgw.py new file mode 100644 index 000000000..61bcea3a5 --- /dev/null +++ b/qa/tasks/rgw.py @@ -0,0 +1,472 @@ +""" +rgw routines +""" +import argparse +import contextlib +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.exceptions import ConfigError +from tasks.ceph_manager import get_valgrind_args +from tasks.util import get_remote_for_role +from tasks.util.rgw import rgwadmin, wait_for_radosgw +from tasks.util.rados import (create_ec_pool, + create_replicated_pool, + create_cache_pool) + +log = logging.getLogger(__name__) + +class RGWEndpoint: + def __init__(self, hostname=None, port=None, cert=None, dns_name=None, website_dns_name=None): + self.hostname = hostname + self.port = port + self.cert = cert + self.dns_name = dns_name + self.website_dns_name = website_dns_name + + def url(self): + proto = 'https' if self.cert else 'http' + return '{proto}://{hostname}:{port}/'.format(proto=proto, hostname=self.hostname, port=self.port) + +@contextlib.contextmanager +def start_rgw(ctx, config, clients): + """ + Start rgw on remote sites. + """ + log.info('Starting rgw...') + testdir = teuthology.get_testdir(ctx) + for client in clients: + (remote,) = ctx.cluster.only(client).remotes.keys() + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + client_with_cluster = cluster_name + '.' + client_with_id + + client_config = config.get(client) + if client_config is None: + client_config = {} + log.info("rgw %s config is %s", client, client_config) + cmd_prefix = [ + 'sudo', + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'daemon-helper', + 'term', + ] + + rgw_cmd = ['radosgw'] + + log.info("Using %s as radosgw frontend", ctx.rgw.frontend) + + endpoint = ctx.rgw.role_endpoints[client] + frontends = ctx.rgw.frontend + frontend_prefix = client_config.get('frontend_prefix', None) + if frontend_prefix: + frontends += ' prefix={pfx}'.format(pfx=frontend_prefix) + + if endpoint.cert: + # add the ssl certificate path + frontends += ' ssl_certificate={}'.format(endpoint.cert.certificate) + frontends += ' ssl_port={}'.format(endpoint.port) + else: + frontends += ' port={}'.format(endpoint.port) + + rgw_cmd.extend([ + '--rgw-frontends', frontends, + '-n', client_with_id, + '--cluster', cluster_name, + '-k', '/etc/ceph/{client_with_cluster}.keyring'.format(client_with_cluster=client_with_cluster), + '--log-file', + '/var/log/ceph/rgw.{client_with_cluster}.log'.format(client_with_cluster=client_with_cluster), + '--rgw_ops_log_socket_path', + '{tdir}/rgw.opslog.{client_with_cluster}.sock'.format(tdir=testdir, + client_with_cluster=client_with_cluster), + ]) + + keystone_role = client_config.get('use-keystone-role', None) + if keystone_role is not None: + if not ctx.keystone: + raise ConfigError('rgw must run after the keystone task') + url = 'http://{host}:{port}/v1/KEY_$(tenant_id)s'.format(host=endpoint.hostname, + port=endpoint.port) + ctx.keystone.create_endpoint(ctx, keystone_role, 'swift', url) + + keystone_host, keystone_port = \ + ctx.keystone.public_endpoints[keystone_role] + rgw_cmd.extend([ + '--rgw_keystone_url', + 'http://{khost}:{kport}'.format(khost=keystone_host, + kport=keystone_port), + ]) + + + if client_config.get('dns-name') is not None: + rgw_cmd.extend(['--rgw-dns-name', endpoint.dns_name]) + if client_config.get('dns-s3website-name') is not None: + rgw_cmd.extend(['--rgw-dns-s3website-name', endpoint.website_dns_name]) + + + vault_role = client_config.get('use-vault-role', None) + barbican_role = client_config.get('use-barbican-role', None) + pykmip_role = client_config.get('use-pykmip-role', None) + + token_path = '/etc/ceph/vault-root-token' + if barbican_role is not None: + if not hasattr(ctx, 'barbican'): + raise ConfigError('rgw must run after the barbican task') + + barbican_host, barbican_port = \ + ctx.barbican.endpoints[barbican_role] + log.info("Use barbican url=%s:%s", barbican_host, barbican_port) + + rgw_cmd.extend([ + '--rgw_barbican_url', + 'http://{bhost}:{bport}'.format(bhost=barbican_host, + bport=barbican_port), + ]) + elif vault_role is not None: + if not ctx.vault.root_token: + raise ConfigError('vault: no "root_token" specified') + # create token on file + ctx.rgw.vault_role = vault_role + ctx.cluster.only(client).run(args=['sudo', 'echo', '-n', ctx.vault.root_token, run.Raw('|'), 'sudo', 'tee', token_path]) + log.info("Token file content") + ctx.cluster.only(client).run(args=['cat', token_path]) + log.info("Restrict access to token file") + ctx.cluster.only(client).run(args=['sudo', 'chmod', '600', token_path]) + ctx.cluster.only(client).run(args=['sudo', 'chown', 'ceph', token_path]) + + vault_addr = "{}:{}".format(*ctx.vault.endpoints[vault_role]) + rgw_cmd.extend([ + '--rgw_crypt_vault_addr', vault_addr, + '--rgw_crypt_vault_token_file', token_path, + '--rgw_crypt_sse_s3_vault_addr', vault_addr, + '--rgw_crypt_sse_s3_vault_token_file', token_path, + ]) + elif pykmip_role is not None: + if not hasattr(ctx, 'pykmip'): + raise ConfigError('rgw must run after the pykmip task') + ctx.rgw.pykmip_role = pykmip_role + rgw_cmd.extend([ + '--rgw_crypt_kmip_addr', "{}:{}".format(*ctx.pykmip.endpoints[pykmip_role]), + ]) + + clientcert = ctx.ssl_certificates.get('kmip-client') + servercert = ctx.ssl_certificates.get('kmip-server') + clientca = ctx.ssl_certificates.get('kmiproot') + + clientkey = clientcert.key + clientcert = clientcert.certificate + serverkey = servercert.key + servercert = servercert.certificate + rootkey = clientca.key + rootcert = clientca.certificate + + cert_path = '/etc/ceph/' + ctx.cluster.only(client).run(args=['sudo', 'cp', clientcert, cert_path]) + ctx.cluster.only(client).run(args=['sudo', 'cp', clientkey, cert_path]) + ctx.cluster.only(client).run(args=['sudo', 'cp', servercert, cert_path]) + ctx.cluster.only(client).run(args=['sudo', 'cp', serverkey, cert_path]) + ctx.cluster.only(client).run(args=['sudo', 'cp', rootkey, cert_path]) + ctx.cluster.only(client).run(args=['sudo', 'cp', rootcert, cert_path]) + + clientcert = cert_path + 'kmip-client.crt' + clientkey = cert_path + 'kmip-client.key' + servercert = cert_path + 'kmip-server.crt' + serverkey = cert_path + 'kmip-server.key' + rootkey = cert_path + 'kmiproot.key' + rootcert = cert_path + 'kmiproot.crt' + + ctx.cluster.only(client).run(args=['sudo', 'chmod', '600', clientcert, clientkey, servercert, serverkey, rootkey, rootcert]) + ctx.cluster.only(client).run(args=['sudo', 'chown', 'ceph', clientcert, clientkey, servercert, serverkey, rootkey, rootcert]) + + rgw_cmd.extend([ + '--foreground', + run.Raw('|'), + 'sudo', + 'tee', + '/var/log/ceph/rgw.{client_with_cluster}.stdout'.format(client_with_cluster=client_with_cluster), + run.Raw('2>&1'), + ]) + + if client_config.get('valgrind'): + cmd_prefix = get_valgrind_args( + testdir, + client_with_cluster, + cmd_prefix, + client_config.get('valgrind'), + # see https://github.com/ceph/teuthology/pull/1600 + exit_on_first_error=False + ) + + run_cmd = list(cmd_prefix) + run_cmd.extend(rgw_cmd) + + ctx.daemons.add_daemon( + remote, 'rgw', client_with_id, + cluster=cluster_name, + fsid=ctx.ceph[cluster_name].fsid, + args=run_cmd, + logger=log.getChild(client), + stdin=run.PIPE, + wait=False, + ) + + # XXX: add_daemon() doesn't let us wait until radosgw finishes startup + for client in clients: + endpoint = ctx.rgw.role_endpoints[client] + url = endpoint.url() + log.info('Polling {client} until it starts accepting connections on {url}'.format(client=client, url=url)) + (remote,) = ctx.cluster.only(client).remotes.keys() + wait_for_radosgw(url, remote) + + try: + yield + finally: + for client in clients: + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + client_with_cluster = cluster_name + '.' + client_with_id + ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).stop() + ctx.cluster.only(client).run( + args=[ + 'rm', + '-f', + '{tdir}/rgw.opslog.{client}.sock'.format(tdir=testdir, + client=client_with_cluster), + ], + ) + ctx.cluster.only(client).run(args=['sudo', 'rm', '-f', token_path]) + +def assign_endpoints(ctx, config, default_cert): + role_endpoints = {} + for role, client_config in config.items(): + client_config = client_config or {} + remote = get_remote_for_role(ctx, role) + + cert = client_config.get('ssl certificate', default_cert) + if cert: + # find the certificate created by the ssl task + if not hasattr(ctx, 'ssl_certificates'): + raise ConfigError('rgw: no ssl task found for option "ssl certificate"') + ssl_certificate = ctx.ssl_certificates.get(cert, None) + if not ssl_certificate: + raise ConfigError('rgw: missing ssl certificate "{}"'.format(cert)) + else: + ssl_certificate = None + + port = client_config.get('port', 443 if ssl_certificate else 80) + + # if dns-name is given, use it as the hostname (or as a prefix) + dns_name = client_config.get('dns-name', '') + if len(dns_name) == 0 or dns_name.endswith('.'): + dns_name += remote.hostname + + website_dns_name = client_config.get('dns-s3website-name') + if website_dns_name is not None and (len(website_dns_name) == 0 or website_dns_name.endswith('.')): + website_dns_name += remote.hostname + + role_endpoints[role] = RGWEndpoint(remote.hostname, port, ssl_certificate, dns_name, website_dns_name) + + return role_endpoints + +@contextlib.contextmanager +def create_pools(ctx, clients): + """Create replicated or erasure coded data pools for rgw.""" + + log.info('Creating data pools') + for client in clients: + log.debug("Obtaining remote for client {}".format(client)) + (remote,) = ctx.cluster.only(client).remotes.keys() + data_pool = 'default.rgw.buckets.data' + cluster_name, daemon_type, client_id = teuthology.split_role(client) + + if ctx.rgw.ec_data_pool: + create_ec_pool(remote, data_pool, client, ctx.rgw.data_pool_pg_size, + ctx.rgw.erasure_code_profile, cluster_name, 'rgw') + else: + create_replicated_pool(remote, data_pool, ctx.rgw.data_pool_pg_size, cluster_name, 'rgw') + + index_pool = 'default.rgw.buckets.index' + create_replicated_pool(remote, index_pool, ctx.rgw.index_pool_pg_size, cluster_name, 'rgw') + + if ctx.rgw.cache_pools: + create_cache_pool(remote, data_pool, data_pool + '.cache', 64, + 64*1024*1024, cluster_name) + log.debug('Pools created') + yield + +@contextlib.contextmanager +def configure_compression(ctx, clients, compression): + """ set a compression type in the default zone placement """ + log.info('Configuring compression type = %s', compression) + for client in clients: + # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete(). + # issue a 'radosgw-admin user list' command to trigger this + rgwadmin(ctx, client, cmd=['user', 'list'], check_status=True) + + rgwadmin(ctx, client, + cmd=['zone', 'placement', 'modify', '--rgw-zone', 'default', + '--placement-id', 'default-placement', + '--compression', compression], + check_status=True) + yield + +@contextlib.contextmanager +def disable_inline_data(ctx, clients): + for client in clients: + # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete(). + # issue a 'radosgw-admin user list' command to trigger this + rgwadmin(ctx, client, cmd=['user', 'list'], check_status=True) + + rgwadmin(ctx, client, + cmd=['zone', 'placement', 'modify', '--rgw-zone', 'default', + '--placement-id', 'default-placement', + '--placement-inline-data', 'false'], + check_status=True) + yield + +@contextlib.contextmanager +def configure_datacache(ctx, clients, datacache_path): + """ create directory for rgw datacache """ + log.info('Preparing directory for rgw datacache at %s', datacache_path) + for client in clients: + if(datacache_path != None): + ctx.cluster.only(client).run(args=['mkdir', '-p', datacache_path]) + ctx.cluster.only(client).run(args=['sudo', 'chmod', 'a+rwx', datacache_path]) + else: + log.info('path for datacache was not provided') + yield + +@contextlib.contextmanager +def configure_storage_classes(ctx, clients, storage_classes): + """ set a compression type in the default zone placement """ + + sc = [s.strip() for s in storage_classes.split(',')] + + for client in clients: + # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete(). + # issue a 'radosgw-admin user list' command to trigger this + rgwadmin(ctx, client, cmd=['user', 'list'], check_status=True) + + for storage_class in sc: + log.info('Configuring storage class type = %s', storage_class) + rgwadmin(ctx, client, + cmd=['zonegroup', 'placement', 'add', + '--rgw-zone', 'default', + '--placement-id', 'default-placement', + '--storage-class', storage_class], + check_status=True) + rgwadmin(ctx, client, + cmd=['zone', 'placement', 'add', + '--rgw-zone', 'default', + '--placement-id', 'default-placement', + '--storage-class', storage_class, + '--data-pool', 'default.rgw.buckets.data.' + storage_class.lower()], + check_status=True) + yield + +@contextlib.contextmanager +def task(ctx, config): + """ + For example, to run rgw on all clients:: + + tasks: + - ceph: + - rgw: + + To only run on certain clients:: + + tasks: + - ceph: + - rgw: [client.0, client.3] + + or + + tasks: + - ceph: + - rgw: + client.0: + client.3: + + To run radosgw through valgrind: + + tasks: + - ceph: + - rgw: + client.0: + valgrind: [--tool=memcheck] + client.3: + valgrind: [--tool=memcheck] + + To configure data or index pool pg_size: + + overrides: + rgw: + data_pool_pg_size: 256 + index_pool_pg_size: 128 + """ + if config is None: + config = dict(('client.{id}'.format(id=id_), None) + for id_ in teuthology.all_roles_of_type( + ctx.cluster, 'client')) + elif isinstance(config, list): + config = dict((name, None) for name in config) + + clients = config.keys() # http://tracker.ceph.com/issues/20417 + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('rgw', {})) + + ctx.rgw = argparse.Namespace() + ctx.rgw_cloudtier = None + + ctx.rgw.ec_data_pool = bool(config.pop('ec-data-pool', False)) + ctx.rgw.erasure_code_profile = config.pop('erasure_code_profile', {}) + ctx.rgw.cache_pools = bool(config.pop('cache-pools', False)) + ctx.rgw.frontend = config.pop('frontend', 'beast') + ctx.rgw.compression_type = config.pop('compression type', None) + ctx.rgw.inline_data = config.pop('inline data', True) + ctx.rgw.storage_classes = config.pop('storage classes', None) + default_cert = config.pop('ssl certificate', None) + ctx.rgw.data_pool_pg_size = config.pop('data_pool_pg_size', 64) + ctx.rgw.index_pool_pg_size = config.pop('index_pool_pg_size', 64) + ctx.rgw.datacache = bool(config.pop('datacache', False)) + ctx.rgw.datacache_path = config.pop('datacache_path', None) + ctx.rgw.config = config + + log.debug("config is {}".format(config)) + log.debug("client list is {}".format(clients)) + + ctx.rgw.role_endpoints = assign_endpoints(ctx, config, default_cert) + + subtasks = [ + lambda: create_pools(ctx=ctx, clients=clients), + ] + if ctx.rgw.compression_type: + subtasks.extend([ + lambda: configure_compression(ctx=ctx, clients=clients, + compression=ctx.rgw.compression_type), + ]) + if not ctx.rgw.inline_data: + subtasks.extend([ + lambda: disable_inline_data(ctx=ctx, clients=clients), + ]) + if ctx.rgw.datacache: + subtasks.extend([ + lambda: configure_datacache(ctx=ctx, clients=clients, + datacache_path=ctx.rgw.datacache_path), + ]) + if ctx.rgw.storage_classes: + subtasks.extend([ + lambda: configure_storage_classes(ctx=ctx, clients=clients, + storage_classes=ctx.rgw.storage_classes), + ]) + subtasks.extend([ + lambda: start_rgw(ctx=ctx, config=config, clients=clients), + ]) + + with contextutil.nested(*subtasks): + yield diff --git a/qa/tasks/rgw_cloudtier.py b/qa/tasks/rgw_cloudtier.py new file mode 100644 index 000000000..88f7d0dc2 --- /dev/null +++ b/qa/tasks/rgw_cloudtier.py @@ -0,0 +1,122 @@ +""" +rgw_cloudtier configuration routines +""" +import argparse +import logging + +from teuthology import misc as teuthology +from teuthology.exceptions import ConfigError +from tasks.util.rgw import rgwadmin, wait_for_radosgw +from teuthology.task import Task + +log = logging.getLogger(__name__) + +class RGWCloudTier(Task): + """ + Configure CloudTier storage class. + + To configure cloudtiering on any client:: + + tasks: + - ceph: + - rgw: + - rgw-cloudtier: + client.0: + cloud_storage_class: + cloud_client: + cloud_regular_storage_class: + cloud_target_storage_class: + cloud_retain_head_object: + cloud_target_path: + cloudtier_user: + cloud_secret: + cloud_access_key: + + """ + def __init__(self, ctx, config): + super(RGWCloudTier, self).__init__(ctx, config) + + def setup(self): + super(RGWCloudTier, self).setup() + + overrides = self.ctx.config.get('overrides', {}) + teuthology.deep_merge(self.config, overrides.get('rgw-cloudtier', {})) + + if not self.ctx.rgw: + raise ConfigError('rgw-cloudtier must run after the rgw task') + + self.ctx.rgw_cloudtier = argparse.Namespace() + self.ctx.rgw_cloudtier.config = self.config + + log.info('Configuring rgw cloudtier ...') + clients = self.config.keys() # http://tracker.ceph.com/issues/20417 + for client in clients: + client_config = self.config.get(client) + if client_config is None: + client_config = {} + + if client_config is not None: + log.info('client %s - cloudtier config is -----------------%s ', client, client_config) + # configuring cloudtier + + cloud_client = client_config.get('cloud_client') + cloud_storage_class = client_config.get('cloud_storage_class') + cloud_target_path = client_config.get('cloud_target_path') + cloud_target_storage_class = client_config.get('cloud_target_storage_class') + cloud_retain_head_object = client_config.get('cloud_retain_head_object') + + cloudtier_user = client_config.get('cloudtier_user') + cloud_access_key = cloudtier_user.get('cloud_access_key') + cloud_secret = cloudtier_user.get('cloud_secret') + + # XXX: the 'default' zone and zonegroup aren't created until we run RGWRados::init_complete(). + # issue a 'radosgw-admin user list' command to trigger this + rgwadmin(self.ctx, client, cmd=['user', 'list'], check_status=True) + + endpoint = self.ctx.rgw.role_endpoints[cloud_client] + + # create cloudtier storage class + tier_config_params = "endpoint=" + endpoint.url() + \ + ",access_key=" + cloud_access_key + \ + ",secret=" + cloud_secret + \ + ",retain_head_object=" + cloud_retain_head_object + + if (cloud_target_path != None): + tier_config_params += ",target_path=" + cloud_target_path + if (cloud_target_storage_class != None): + tier_config_params += ",target_storage_class=" + cloud_target_storage_class + + log.info('Configuring cloud-s3 tier storage class type = %s', cloud_storage_class) + + rgwadmin(self.ctx, client, + cmd=['zonegroup', 'placement', 'add', + '--rgw-zone', 'default', + '--placement-id', 'default-placement', + '--storage-class', cloud_storage_class, + '--tier-type', 'cloud-s3', + '--tier-config', tier_config_params], + check_status=True) + + ## create cloudtier user with the access keys given on the cloud client + cloud_tier_user_id = "cloud-tier-user-" + cloud_client + cloud_tier_user_name = "CLOUD TIER USER - " + cloud_client + rgwadmin(self.ctx, cloud_client, + cmd=['user', 'create', '--uid', cloud_tier_user_id, + '--display-name', cloud_tier_user_name, + '--access-key', cloud_access_key, + '--secret', cloud_secret, + '--caps', 'user-policy=*'], + check_status=True) + + log.info('Finished Configuring rgw cloudtier ...') + + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + self.ctx.daemons.get_daemon('rgw', client_with_id, cluster_name).restart() + log.info('restarted rgw daemon ...') + + (remote,) = self.ctx.cluster.only(client).remotes.keys() + wait_for_radosgw(endpoint.url(), remote) + + +task = RGWCloudTier diff --git a/qa/tasks/rgw_logsocket.py b/qa/tasks/rgw_logsocket.py new file mode 100644 index 000000000..d76e59d7f --- /dev/null +++ b/qa/tasks/rgw_logsocket.py @@ -0,0 +1,165 @@ +""" +rgw s3tests logging wrappers +""" +from io import BytesIO +from configobj import ConfigObj +import contextlib +import logging +from tasks import s3tests + +from teuthology import misc as teuthology +from teuthology import contextutil + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download(ctx, config): + """ + Run s3tests download function + """ + return s3tests.download(ctx, config) + +def _config_user(s3tests_conf, section, user): + """ + Run s3tests user config function + """ + return s3tests._config_user(s3tests_conf, section, user) + +@contextlib.contextmanager +def create_users(ctx, config): + """ + Run s3tests user create function + """ + return s3tests.create_users(ctx, config) + +@contextlib.contextmanager +def configure(ctx, config): + """ + Run s3tests user configure function + """ + return s3tests.configure(ctx, config) + +@contextlib.contextmanager +def run_tests(ctx, config): + """ + Run remote netcat tests + """ + assert isinstance(config, dict) + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.items(): + client_config['extra_args'] = [ + 's3tests.functional.test_s3:test_bucket_list_return_data', + ] +# args = [ +# 'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), +# '{tdir}/s3-tests/virtualenv/bin/nosetests'.format(tdir=testdir), +# '-w', +# '{tdir}/s3-tests'.format(tdir=testdir), +# '-v', +# 's3tests.functional.test_s3:test_bucket_list_return_data', +# ] +# if client_config is not None and 'extra_args' in client_config: +# args.extend(client_config['extra_args']) +# +# ctx.cluster.only(client).run( +# args=args, +# ) + + s3tests.run_tests(ctx, config) + + netcat_out = BytesIO() + + for client, client_config in config.items(): + ctx.cluster.only(client).run( + args = [ + 'netcat', + '-w', '5', + '-U', '{tdir}/rgw.opslog.sock'.format(tdir=testdir), + ], + stdout = netcat_out, + ) + + out = netcat_out.getvalue() + + assert len(out) > 100 + + log.info('Received', out) + + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run some s3-tests suite against rgw, verify opslog socket returns data + + Must restrict testing to a particular client:: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: [client.0] + + To pass extra arguments to nose (e.g. to run a certain test):: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: + client.0: + extra_args: ['test_s3:test_object_acl_grand_public_read'] + client.1: + extra_args: ['--exclude', 'test_100_continue'] + """ + assert hasattr(ctx, 'rgw'), 'rgw-logsocket must run after the rgw task' + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task rgw-logsocket only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for (client, cconf) in config.items(): + teuthology.deep_merge(cconf, overrides.get('rgw-logsocket', {})) + + log.debug('config is %s', config) + + s3tests_conf = {} + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 'rgw-logsocket: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + } + ) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + s3tests_conf=s3tests_conf, + )), + lambda: configure(ctx=ctx, config=dict( + clients=config, + s3tests_conf=s3tests_conf, + )), + lambda: run_tests(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/rgw_module.py b/qa/tasks/rgw_module.py new file mode 100644 index 000000000..0d2ca9094 --- /dev/null +++ b/qa/tasks/rgw_module.py @@ -0,0 +1,53 @@ +import logging +import yaml + +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + + +def _shell(ctx, cluster_name, remote, args, extra_cephadm_args=[], **kwargs): + teuthology.get_testdir(ctx) + return remote.run( + args=[ + 'sudo', + ctx.cephadm, + '--image', ctx.ceph[cluster_name].image, + 'shell', + '-c', '/etc/ceph/{}.conf'.format(cluster_name), + '-k', '/etc/ceph/{}.client.admin.keyring'.format(cluster_name), + '--fsid', ctx.ceph[cluster_name].fsid, + ] + extra_cephadm_args + [ + '--', + ] + args, + **kwargs + ) + + +def apply(ctx, config): + """ + Apply spec + + tasks: + - rgw_module.apply: + specs: + - rgw_realm: myrealm1 + rgw_zonegroup: myzonegroup1 + rgw_zone: myzone1 + placement: + hosts: + - ceph-node-0 + - ceph-node-1 + spec: + rgw_frontend_port: 5500 + """ + cluster_name = config.get('cluster', 'ceph') + specs = config.get('specs', []) + y = yaml.dump_all(specs) + log.info(f'Applying spec(s):\n{y}') + _shell( + ctx, cluster_name, ctx.ceph[cluster_name].bootstrap_remote, + ['ceph', 'rgw', 'realm', 'bootstrap', '-i', '-'], + stdin=y, + ) diff --git a/qa/tasks/rgw_multi b/qa/tasks/rgw_multi new file mode 120000 index 000000000..abfc703b9 --- /dev/null +++ b/qa/tasks/rgw_multi @@ -0,0 +1 @@ +../../src/test/rgw/rgw_multi
\ No newline at end of file diff --git a/qa/tasks/rgw_multisite.py b/qa/tasks/rgw_multisite.py new file mode 100644 index 000000000..f5a6f5a26 --- /dev/null +++ b/qa/tasks/rgw_multisite.py @@ -0,0 +1,428 @@ +""" +rgw multisite configuration routines +""" +import argparse +import logging +import random +import string +from copy import deepcopy +from tasks.util.rgw import rgwadmin, wait_for_radosgw +from tasks.util.rados import create_ec_pool, create_replicated_pool +from tasks.rgw_multi import multisite +from tasks.rgw_multi.zone_rados import RadosZone as RadosZone + +from teuthology.orchestra import run +from teuthology import misc +from teuthology.exceptions import ConfigError +from teuthology.task import Task + +log = logging.getLogger(__name__) + +class RGWMultisite(Task): + """ + Performs rgw multisite configuration to match the given realm definition. + + - rgw-multisite: + realm: + name: test-realm + is_default: true + + List one or more zonegroup definitions. These are provided as json + input to `radosgw-admin zonegroup set`, with the exception of these keys: + + * 'is_master' is passed on the command line as --master + * 'is_default' is passed on the command line as --default + * 'endpoints' given as client names are replaced with actual endpoints + + zonegroups: + - name: test-zonegroup + api_name: test-api + is_master: true + is_default: true + endpoints: [c1.client.0] + + List each of the zones to be created in this zonegroup. + + zones: + - name: test-zone1 + is_master: true + is_default: true + endpoints: [c1.client.0] + - name: test-zone2 + is_default: true + endpoints: [c2.client.0] + + A complete example: + + tasks: + - install: + - ceph: {cluster: c1} + - ceph: {cluster: c2} + - rgw: + c1.client.0: + c2.client.0: + - rgw-multisite: + realm: + name: test-realm + is_default: true + zonegroups: + - name: test-zonegroup + is_master: true + is_default: true + zones: + - name: test-zone1 + is_master: true + is_default: true + endpoints: [c1.client.0] + - name: test-zone2 + is_default: true + endpoints: [c2.client.0] + + """ + def __init__(self, ctx, config): + super(RGWMultisite, self).__init__(ctx, config) + + def setup(self): + super(RGWMultisite, self).setup() + + overrides = self.ctx.config.get('overrides', {}) + misc.deep_merge(self.config, overrides.get('rgw-multisite', {})) + + if not self.ctx.rgw: + raise ConfigError('rgw-multisite must run after the rgw task') + role_endpoints = self.ctx.rgw.role_endpoints + + # construct Clusters and Gateways for each client in the rgw task + clusters, gateways = extract_clusters_and_gateways(self.ctx, + role_endpoints) + + # get the master zone and zonegroup configuration + mz, mzg = extract_master_zone_zonegroup(self.config['zonegroups']) + cluster1 = cluster_for_zone(clusters, mz) + + # create the realm and period on the master zone's cluster + log.info('creating realm..') + realm = create_realm(cluster1, self.config['realm']) + period = realm.current_period + + creds = gen_credentials() + + # create the master zonegroup and its master zone + log.info('creating master zonegroup..') + master_zonegroup = create_zonegroup(cluster1, gateways, period, + deepcopy(mzg)) + period.master_zonegroup = master_zonegroup + + log.info('creating master zone..') + master_zone = create_zone(self.ctx, cluster1, gateways, creds, + master_zonegroup, deepcopy(mz)) + master_zonegroup.master_zone = master_zone + + period.update(master_zone, commit=True) + restart_zone_gateways(master_zone) # restart with --rgw-zone + + # create the admin user on the master zone + log.info('creating admin user..') + user_args = ['--display-name', 'Realm Admin', '--system'] + user_args += creds.credential_args() + admin_user = multisite.User('realm-admin') + admin_user.create(master_zone, user_args) + + # process 'zonegroups' + for zg_config in self.config['zonegroups']: + zones_config = zg_config.pop('zones') + + zonegroup = None + for zone_config in zones_config: + # get the cluster for this zone + cluster = cluster_for_zone(clusters, zone_config) + + if cluster != cluster1: # already created on master cluster + log.info('pulling realm configuration to %s', cluster.name) + realm.pull(cluster, master_zone.gateways[0], creds) + + # use the first zone's cluster to create the zonegroup + if not zonegroup: + if zg_config['name'] == master_zonegroup.name: + zonegroup = master_zonegroup + else: + log.info('creating zonegroup..') + zonegroup = create_zonegroup(cluster, gateways, + period, zg_config) + + if zone_config['name'] == master_zone.name: + # master zone was already created + zone = master_zone + else: + # create the zone and commit the period + log.info('creating zone..') + zone = create_zone(self.ctx, cluster, gateways, creds, + zonegroup, zone_config) + period.update(zone, commit=True) + + restart_zone_gateways(zone) # restart with --rgw-zone + + # attach configuration to the ctx for other tasks + self.ctx.rgw_multisite = argparse.Namespace() + self.ctx.rgw_multisite.clusters = clusters + self.ctx.rgw_multisite.gateways = gateways + self.ctx.rgw_multisite.realm = realm + self.ctx.rgw_multisite.admin_user = admin_user + + log.info('rgw multisite configuration completed') + + def end(self): + del self.ctx.rgw_multisite + +class Cluster(multisite.Cluster): + """ Issues 'radosgw-admin' commands with the rgwadmin() helper """ + def __init__(self, ctx, name, client): + super(Cluster, self).__init__() + self.ctx = ctx + self.name = name + self.client = client + + def admin(self, args = None, **kwargs): + """ radosgw-admin command """ + args = args or [] + args += ['--cluster', self.name] + args += ['--debug-rgw', str(kwargs.pop('debug_rgw', 0))] + args += ['--debug-ms', str(kwargs.pop('debug_ms', 0))] + if kwargs.pop('read_only', False): + args += ['--rgw-cache-enabled', 'false'] + kwargs['decode'] = False + check_retcode = kwargs.pop('check_retcode', True) + r, s = rgwadmin(self.ctx, self.client, args, **kwargs) + if check_retcode: + assert r == 0 + return s, r + +class Gateway(multisite.Gateway): + """ Controls a radosgw instance using its daemon """ + def __init__(self, role, remote, daemon, *args, **kwargs): + super(Gateway, self).__init__(*args, **kwargs) + self.role = role + self.remote = remote + self.daemon = daemon + + def set_zone(self, zone): + """ set the zone and add its args to the daemon's command line """ + assert self.zone is None, 'zone can only be set once' + self.zone = zone + # daemon.restart_with_args() would be perfect for this, except that + # radosgw args likely include a pipe and redirect. zone arguments at + # the end won't actually apply to radosgw + args = self.daemon.command_kwargs.get('args', []) + try: + # insert zone args before the first | + pipe = args.index(run.Raw('|')) + args = args[0:pipe] + zone.zone_args() + args[pipe:] + except ValueError: + args += zone.zone_args() + self.daemon.command_kwargs['args'] = args + + def start(self, args = None): + """ (re)start the daemon """ + self.daemon.restart() + # wait until startup completes + wait_for_radosgw(self.endpoint(), self.remote) + + def stop(self): + """ stop the daemon """ + self.daemon.stop() + +def extract_clusters_and_gateways(ctx, role_endpoints): + """ create cluster and gateway instances for all of the radosgw roles """ + clusters = {} + gateways = {} + for role, endpoint in role_endpoints.items(): + cluster_name, daemon_type, client_id = misc.split_role(role) + # find or create the cluster by name + cluster = clusters.get(cluster_name) + if not cluster: + clusters[cluster_name] = cluster = Cluster(ctx, cluster_name, role) + # create a gateway for this daemon + client_with_id = daemon_type + '.' + client_id # match format from rgw.py + daemon = ctx.daemons.get_daemon('rgw', client_with_id, cluster_name) + if not daemon: + raise ConfigError('no daemon for role=%s cluster=%s type=rgw id=%s' % \ + (role, cluster_name, client_id)) + (remote,) = ctx.cluster.only(role).remotes.keys() + gateways[role] = Gateway(role, remote, daemon, endpoint.hostname, + endpoint.port, cluster) + return clusters, gateways + +def create_realm(cluster, config): + """ create a realm from configuration and initialize its first period """ + realm = multisite.Realm(config['name']) + args = [] + if config.get('is_default', False): + args += ['--default'] + realm.create(cluster, args) + realm.current_period = multisite.Period(realm) + return realm + +def extract_user_credentials(config): + """ extract keys from configuration """ + return multisite.Credentials(config['access_key'], config['secret_key']) + +def extract_master_zone(zonegroup_config): + """ find and return the master zone definition """ + master = None + for zone in zonegroup_config['zones']: + if not zone.get('is_master', False): + continue + if master: + raise ConfigError('zones %s and %s cannot both set \'is_master\'' % \ + (master['name'], zone['name'])) + master = zone + # continue the loop so we can detect duplicates + if not master: + raise ConfigError('one zone must set \'is_master\' in zonegroup %s' % \ + zonegroup_config['name']) + return master + +def extract_master_zone_zonegroup(zonegroups_config): + """ find and return the master zone and zonegroup definitions """ + master_zone, master_zonegroup = (None, None) + for zonegroup in zonegroups_config: + # verify that all zonegroups have a master zone set, even if they + # aren't in the master zonegroup + zone = extract_master_zone(zonegroup) + if not zonegroup.get('is_master', False): + continue + if master_zonegroup: + raise ConfigError('zonegroups %s and %s cannot both set \'is_master\'' % \ + (master_zonegroup['name'], zonegroup['name'])) + master_zonegroup = zonegroup + master_zone = zone + # continue the loop so we can detect duplicates + if not master_zonegroup: + raise ConfigError('one zonegroup must set \'is_master\'') + return master_zone, master_zonegroup + +def extract_zone_cluster_name(zone_config): + """ return the cluster (must be common to all zone endpoints) """ + cluster_name = None + endpoints = zone_config.get('endpoints') + if not endpoints: + raise ConfigError('zone %s missing \'endpoints\' list' % \ + zone_config['name']) + for role in endpoints: + name, _, _ = misc.split_role(role) + if not cluster_name: + cluster_name = name + elif cluster_name != name: + raise ConfigError('all zone %s endpoints must be in the same cluster' % \ + zone_config['name']) + return cluster_name + +def cluster_for_zone(clusters, zone_config): + """ return the cluster entry for the given zone """ + name = extract_zone_cluster_name(zone_config) + try: + return clusters[name] + except KeyError: + raise ConfigError('no cluster %s found' % name) + +def gen_access_key(): + return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(16)) + +def gen_secret(): + return ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(32)) + +def gen_credentials(): + return multisite.Credentials(gen_access_key(), gen_secret()) + +def extract_gateway_endpoints(gateways, endpoints_config): + """ return a list of gateway endpoints associated with the given roles """ + endpoints = [] + for role in endpoints_config: + try: + # replace role names with their gateway's endpoint + endpoints.append(gateways[role].endpoint()) + except KeyError: + raise ConfigError('no radosgw endpoint found for role %s' % role) + return endpoints + +def is_default_arg(config): + return ['--default'] if config.pop('is_default', False) else [] + +def is_master_arg(config): + return ['--master'] if config.pop('is_master', False) else [] + +def create_zonegroup(cluster, gateways, period, config): + """ pass the zonegroup configuration to `zonegroup set` """ + config.pop('zones', None) # remove 'zones' from input to `zonegroup set` + endpoints = config.get('endpoints') + if endpoints: + # replace client names with their gateway endpoints + config['endpoints'] = extract_gateway_endpoints(gateways, endpoints) + zonegroup = multisite.ZoneGroup(config['name'], period) + # `zonegroup set` needs --default on command line, and 'is_master' in json + args = is_default_arg(config) + zonegroup.set(cluster, config, args) + period.zonegroups.append(zonegroup) + return zonegroup + +def create_zone(ctx, cluster, gateways, creds, zonegroup, config): + """ create a zone with the given configuration """ + zone = multisite.Zone(config['name'], zonegroup, cluster) + zone = RadosZone(config['name'], zonegroup, cluster) + + # collect Gateways for the zone's endpoints + endpoints = config.get('endpoints') + if not endpoints: + raise ConfigError('no \'endpoints\' for zone %s' % config['name']) + zone.gateways = [gateways[role] for role in endpoints] + for gateway in zone.gateways: + gateway.set_zone(zone) + + # format the gateway endpoints + endpoints = [g.endpoint() for g in zone.gateways] + + args = is_default_arg(config) + args += is_master_arg(config) + args += creds.credential_args() + if len(endpoints): + args += ['--endpoints', ','.join(endpoints)] + zone.create(cluster, args) + zonegroup.zones.append(zone) + + create_zone_pools(ctx, zone) + if ctx.rgw.compression_type: + configure_zone_compression(zone, ctx.rgw.compression_type) + + zonegroup.zones_by_type.setdefault(zone.tier_type(), []).append(zone) + + if zone.is_read_only(): + zonegroup.ro_zones.append(zone) + else: + zonegroup.rw_zones.append(zone) + + return zone + +def create_zone_pools(ctx, zone): + """ Create the data_pool for each placement type """ + gateway = zone.gateways[0] + cluster = zone.cluster + for pool_config in zone.data.get('placement_pools', []): + pool_name = pool_config['val']['storage_classes']['STANDARD']['data_pool'] + if ctx.rgw.ec_data_pool: + create_ec_pool(gateway.remote, pool_name, zone.name, 64, + ctx.rgw.erasure_code_profile, cluster.name, 'rgw') + else: + create_replicated_pool(gateway.remote, pool_name, 64, cluster.name, 'rgw') + +def configure_zone_compression(zone, compression): + """ Set compression type in the zone's default-placement """ + zone.json_command(zone.cluster, 'placement', ['modify', + '--placement-id', 'default-placement', + '--compression', compression + ]) + +def restart_zone_gateways(zone): + zone.stop() + zone.start() + +task = RGWMultisite diff --git a/qa/tasks/rgw_multisite_tests.py b/qa/tasks/rgw_multisite_tests.py new file mode 100644 index 000000000..888a37181 --- /dev/null +++ b/qa/tasks/rgw_multisite_tests.py @@ -0,0 +1,120 @@ +""" +rgw multisite testing +""" +import importlib.util +import logging +import nose.core +import nose.config +import sys + +from nose.plugins.manager import DefaultPluginManager +from teuthology.config import config as teuth_config +from teuthology.exceptions import ConfigError +from teuthology.repo_utils import fetch_repo +from teuthology.task import Task +from teuthology import misc + +log = logging.getLogger(__name__) + + +class RGWMultisiteTests(Task): + """ + Runs the rgw_multi tests against a multisite configuration created by the + rgw-multisite task. Tests are run with nose, using any additional 'args' + provided. Overrides for tests.Config can be set in 'config'. The 'branch' + and 'repo' can be overridden to clone the rgw_multi tests from another + release. + + - rgw-multisite-tests: + args: + - tests.py:test_object_sync + config: + reconfigure_delay: 60 + branch: octopus + repo: https://github.com/ceph/ceph.git + + """ + def __init__(self, ctx, config): + super(RGWMultisiteTests, self).__init__(ctx, config) + + def setup(self): + super(RGWMultisiteTests, self).setup() + + overrides = self.ctx.config.get('overrides', {}) + misc.deep_merge(self.config, overrides.get('rgw-multisite-tests', {})) + + if not self.ctx.rgw_multisite: + raise ConfigError('rgw-multisite-tests must run after the rgw-multisite task') + realm = self.ctx.rgw_multisite.realm + master_zone = realm.meta_master_zone() + + branch = self.config.get('branch') + if not branch: + # run from suite_path + suite_path = self.ctx.config.get('suite_path') + self.module_path = suite_path + '/../src/test/rgw/rgw_multi' + else: + # clone the qa branch + repo = self.config.get('repo', teuth_config.get_ceph_qa_suite_git_url()) + log.info("cloning suite branch %s from %s...", branch, repo) + clonedir = fetch_repo(repo, branch) + # import its version of rgw_multi + self.module_path = clonedir + '/src/test/rgw/rgw_multi' + + log.info("importing tests from %s", self.module_path) + spec = importlib.util.spec_from_file_location('rgw_multi', self.module_path + '/__init__.py') + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + + from rgw_multi import multisite, tests + + # create the test user + log.info('creating test user..') + user = multisite.User('rgw-multisite-test-user') + user.create(master_zone, ['--display-name', 'Multisite Test User', + '--gen-access-key', '--gen-secret', '--caps', 'roles=*']) + + config = self.config.get('config', {}) + tests.init_multi(realm, user, tests.Config(**config)) + tests.realm_meta_checkpoint(realm) + + def begin(self): + # extra arguments for nose can be passed as a string or list + extra_args = self.config.get('args', []) + if not isinstance(extra_args, list): + extra_args = [extra_args] + argv = [__name__] + extra_args + + log.info("running rgw multisite tests on '%s' with args=%r", + self.module_path, extra_args) + + # run nose tests in the module path + conf = nose.config.Config(stream=get_log_stream(), verbosity=2, workingDir=self.module_path) + conf.plugins = DefaultPluginManager() # overrides default = NoPlugins() + assert nose.run(argv=argv, config=conf), 'rgw multisite test failures' + + +def get_log_stream(): + """ return a log stream for nose output """ + # XXX: this is a workaround for IOErrors when nose writes to stderr, + # copied from vstart_runner.py + class LogStream(object): + def __init__(self): + self.buffer = "" + + def write(self, data): + self.buffer += data + if "\n" in self.buffer: + lines = self.buffer.split("\n") + for line in lines[:-1]: + log.info(line) + self.buffer = lines[-1] + + def flush(self): + pass + + return LogStream() + + +task = RGWMultisiteTests diff --git a/qa/tasks/rook-ceph.conf b/qa/tasks/rook-ceph.conf new file mode 100644 index 000000000..38ac11e41 --- /dev/null +++ b/qa/tasks/rook-ceph.conf @@ -0,0 +1,41 @@ +[global] + +log to file = true + +mon clock drift allowed = 1.000 + +# replicate across OSDs, not hosts +osd crush chooseleaf type = 0 + +# enable some debugging +auth debug = true +ms die on old message = true +ms die on bug = true +debug asserts on shutdown = true + + +[osd] +# debugging +osd debug shutdown = true +osd debug op order = true +osd debug verify stray on activate = true +osd debug pg log writeout = true +osd debug verify cached snaps = true +osd debug verify missing on start = true +osd debug misdirected ops = true +osd op queue = debug_random +osd op queue cut off = debug_random +osd shutdown pgref assert = true +bdev debug aio = true +osd sloppy crc = true + + +[mon] +# rotate auth tickets quickly to exercise renewal paths +auth mon ticket ttl = 660 # 11m +auth service ticket ttl = 240 # 4m + +# don't complain about global id reclaim +mon_warn_on_insecure_global_id_reclaim = false +mon_warn_on_insecure_global_id_reclaim_allowed = false + diff --git a/qa/tasks/rook.py b/qa/tasks/rook.py new file mode 100644 index 000000000..427f8324e --- /dev/null +++ b/qa/tasks/rook.py @@ -0,0 +1,654 @@ +""" +Rook cluster task +""" +import argparse +import configobj +import contextlib +import json +import logging +import os +import yaml +from io import BytesIO + +from tarfile import ReadError +from tasks.ceph_manager import CephManager +from teuthology import misc as teuthology +from teuthology.config import config as teuth_config +from teuthology.contextutil import safe_while +from teuthology.orchestra import run +from teuthology import contextutil +from tasks.ceph import healthy +from tasks.cephadm import update_archive_setting + +log = logging.getLogger(__name__) + +def path_to_examples(ctx, cluster_name : str) -> str: + for p in ['rook/deploy/examples/', 'rook/cluster/examples/kubernetes/ceph/']: + try: + ctx.rook[cluster_name].remote.get_file(p + 'operator.yaml') + return p + except: + pass + assert False, 'Path to examples not found' + +def _kubectl(ctx, config, args, **kwargs): + cluster_name = config.get('cluster', 'ceph') + return ctx.rook[cluster_name].remote.run( + args=['kubectl'] + args, + **kwargs + ) + + +def shell(ctx, config): + """ + Run command(s) inside the rook tools container. + + tasks: + - kubeadm: + - rook: + - rook.shell: + - ceph -s + + or + + tasks: + - kubeadm: + - rook: + - rook.shell: + commands: + - ceph -s + + """ + if isinstance(config, list): + config = {'commands': config} + for cmd in config.get('commands', []): + if isinstance(cmd, str): + _shell(ctx, config, cmd.split(' ')) + else: + _shell(ctx, config, cmd) + + +def _shell(ctx, config, args, **kwargs): + cluster_name = config.get('cluster', 'ceph') + return _kubectl( + ctx, config, + [ + '-n', 'rook-ceph', + 'exec', + ctx.rook[cluster_name].toolbox, '--' + ] + args, + **kwargs + ) + + +@contextlib.contextmanager +def rook_operator(ctx, config): + cluster_name = config['cluster'] + rook_branch = config.get('rook_branch', 'master') + rook_git_url = config.get('rook_git_url', 'https://github.com/rook/rook') + + log.info(f'Cloning {rook_git_url} branch {rook_branch}') + ctx.rook[cluster_name].remote.run( + args=[ + 'rm', '-rf', 'rook', + run.Raw('&&'), + 'git', + 'clone', + '--single-branch', + '--branch', rook_branch, + rook_git_url, + 'rook', + ] + ) + + # operator.yaml + log.info(os.path.abspath(os.getcwd())) + object_methods = [method_name for method_name in dir(ctx.rook[cluster_name].remote) + if callable(getattr(ctx.rook[cluster_name].remote, method_name))] + log.info(object_methods) + operator_yaml = ctx.rook[cluster_name].remote.read_file( + (path_to_examples(ctx, cluster_name) + 'operator.yaml') + ) + rook_image = config.get('rook_image') + if rook_image: + log.info(f'Patching operator to use image {rook_image}') + crs = list(yaml.load_all(operator_yaml, Loader=yaml.FullLoader)) + assert len(crs) == 2 + crs[1]['spec']['template']['spec']['containers'][0]['image'] = rook_image + operator_yaml = yaml.dump_all(crs) + ctx.rook[cluster_name].remote.write_file('operator.yaml', operator_yaml) + + op_job = None + try: + log.info('Deploying operator') + _kubectl(ctx, config, [ + 'create', + '-f', (path_to_examples(ctx, cluster_name) + 'crds.yaml'), + '-f', (path_to_examples(ctx, cluster_name) + 'common.yaml'), + '-f', 'operator.yaml', + ]) + + # on centos: + if teuthology.get_distro(ctx) == 'centos': + _kubectl(ctx, config, [ + '-n', 'rook-ceph', + 'set', 'env', 'deploy/rook-ceph-operator', + 'ROOK_HOSTPATH_REQUIRES_PRIVILEGED=true' + ]) + + # wait for operator + op_name = None + with safe_while(sleep=10, tries=90, action="wait for operator") as proceed: + while not op_name and proceed(): + p = _kubectl( + ctx, config, + ['-n', 'rook-ceph', 'get', 'pods', '-l', 'app=rook-ceph-operator'], + stdout=BytesIO(), + ) + for line in p.stdout.getvalue().decode('utf-8').strip().splitlines(): + name, ready, status, _ = line.split(None, 3) + if status == 'Running': + op_name = name + break + + # log operator output + op_job = _kubectl( + ctx, + config, + ['-n', 'rook-ceph', 'logs', '-f', op_name], + wait=False, + logger=log.getChild('operator'), + ) + + yield + + except Exception as e: + log.exception(e) + raise + + finally: + log.info('Cleaning up rook operator') + _kubectl(ctx, config, [ + 'delete', + '-f', 'operator.yaml', + ]) + if False: + # don't bother since we'll tear down k8s anyway (and this mysteriously + # fails sometimes when deleting some of the CRDs... not sure why!) + _kubectl(ctx, config, [ + 'delete', + '-f', (path_to_examples() + 'common.yaml'), + ]) + _kubectl(ctx, config, [ + 'delete', + '-f', (path_to_examples() + 'crds.yaml'), + ]) + ctx.rook[cluster_name].remote.run(args=['rm', '-rf', 'rook', 'operator.yaml']) + if op_job: + op_job.wait() + run.wait( + ctx.cluster.run( + args=[ + 'sudo', 'rm', '-rf', '/var/lib/rook' + ] + ) + ) + + +@contextlib.contextmanager +def ceph_log(ctx, config): + cluster_name = config['cluster'] + + log_dir = '/var/lib/rook/rook-ceph/log' + update_archive_setting(ctx, 'log', log_dir) + + try: + yield + + except Exception: + # we need to know this below + ctx.summary['success'] = False + raise + + finally: + log.info('Checking cluster log for badness...') + def first_in_ceph_log(pattern, excludes): + """ + Find the first occurrence of the pattern specified in the Ceph log, + Returns None if none found. + + :param pattern: Pattern scanned for. + :param excludes: Patterns to ignore. + :return: First line of text (or None if not found) + """ + args = [ + 'sudo', + 'egrep', pattern, + f'{log_dir}/ceph.log', + ] + if excludes: + for exclude in excludes: + args.extend([run.Raw('|'), 'egrep', '-v', exclude]) + args.extend([ + run.Raw('|'), 'head', '-n', '1', + ]) + r = ctx.rook[cluster_name].remote.run( + stdout=BytesIO(), + args=args, + ) + stdout = r.stdout.getvalue().decode() + if stdout: + return stdout + return None + + if first_in_ceph_log('\[ERR\]|\[WRN\]|\[SEC\]', + config.get('log-ignorelist')) is not None: + log.warning('Found errors (ERR|WRN|SEC) in cluster log') + ctx.summary['success'] = False + # use the most severe problem as the failure reason + if 'failure_reason' not in ctx.summary: + for pattern in ['\[SEC\]', '\[ERR\]', '\[WRN\]']: + match = first_in_ceph_log(pattern, config['log-ignorelist']) + if match is not None: + ctx.summary['failure_reason'] = \ + '"{match}" in cluster log'.format( + match=match.rstrip('\n'), + ) + break + + if ctx.archive is not None and \ + not (ctx.config.get('archive-on-error') and ctx.summary['success']): + # and logs + log.info('Compressing logs...') + run.wait( + ctx.cluster.run( + args=[ + 'sudo', + 'find', + log_dir, + '-name', + '*.log', + '-print0', + run.Raw('|'), + 'sudo', + 'xargs', + '-0', + '--no-run-if-empty', + '--', + 'gzip', + '--', + ], + wait=False, + ), + ) + + log.info('Archiving logs...') + path = os.path.join(ctx.archive, 'remote') + try: + os.makedirs(path) + except OSError: + pass + for remote in ctx.cluster.remotes.keys(): + sub = os.path.join(path, remote.name) + try: + os.makedirs(sub) + except OSError: + pass + try: + teuthology.pull_directory(remote, log_dir, + os.path.join(sub, 'log')) + except ReadError: + pass + + +def build_initial_config(ctx, config): + path = os.path.join(os.path.dirname(__file__), 'rook-ceph.conf') + conf = configobj.ConfigObj(path, file_error=True) + + # overrides + for section, keys in config.get('conf',{}).items(): + for key, value in keys.items(): + log.info(" override: [%s] %s = %s" % (section, key, value)) + if section not in conf: + conf[section] = {} + conf[section][key] = value + + return conf + + +@contextlib.contextmanager +def rook_cluster(ctx, config): + cluster_name = config['cluster'] + + # count how many OSDs we'll create + num_devs = 0 + num_hosts = 0 + for remote in ctx.cluster.remotes.keys(): + ls = remote.read_file('/scratch_devs').decode('utf-8').strip().splitlines() + num_devs += len(ls) + num_hosts += 1 + ctx.rook[cluster_name].num_osds = num_devs + + # config + ceph_conf = build_initial_config(ctx, config) + ceph_conf_fp = BytesIO() + ceph_conf.write(ceph_conf_fp) + log.info(f'Config:\n{ceph_conf_fp.getvalue()}') + _kubectl(ctx, ceph_conf, ['create', '-f', '-'], stdin=yaml.dump({ + 'apiVersion': 'v1', + 'kind': 'ConfigMap', + 'metadata': { + 'name': 'rook-config-override', + 'namespace': 'rook-ceph'}, + 'data': { + 'config': ceph_conf_fp.getvalue() + } + })) + + # cluster + cluster = { + 'apiVersion': 'ceph.rook.io/v1', + 'kind': 'CephCluster', + 'metadata': {'name': 'rook-ceph', 'namespace': 'rook-ceph'}, + 'spec': { + 'cephVersion': { + 'image': ctx.rook[cluster_name].image, + 'allowUnsupported': True, + }, + 'dataDirHostPath': '/var/lib/rook', + 'skipUpgradeChecks': True, + 'mgr': { + 'count': 1, + 'modules': [ + { 'name': 'rook', 'enabled': True }, + ], + }, + 'mon': { + 'count': num_hosts, + 'allowMultiplePerNode': True, + }, + 'storage': { + 'storageClassDeviceSets': [ + { + 'name': 'scratch', + 'count': num_devs, + 'portable': False, + 'volumeClaimTemplates': [ + { + 'metadata': {'name': 'data'}, + 'spec': { + 'resources': { + 'requests': { + 'storage': '10Gi' # <= (lte) the actual PV size + } + }, + 'storageClassName': 'scratch', + 'volumeMode': 'Block', + 'accessModes': ['ReadWriteOnce'], + }, + }, + ], + } + ], + }, + } + } + teuthology.deep_merge(cluster['spec'], config.get('spec', {})) + + cluster_yaml = yaml.dump(cluster) + log.info(f'Cluster:\n{cluster_yaml}') + try: + ctx.rook[cluster_name].remote.write_file('cluster.yaml', cluster_yaml) + _kubectl(ctx, config, ['create', '-f', 'cluster.yaml']) + yield + + except Exception as e: + log.exception(e) + raise + + finally: + _kubectl(ctx, config, ['delete', '-f', 'cluster.yaml'], check_status=False) + + # wait for cluster to shut down + log.info('Waiting for cluster to stop') + running = True + with safe_while(sleep=5, tries=100, action="wait for teardown") as proceed: + while running and proceed(): + p = _kubectl( + ctx, config, + ['-n', 'rook-ceph', 'get', 'pods'], + stdout=BytesIO(), + ) + running = False + for line in p.stdout.getvalue().decode('utf-8').strip().splitlines(): + name, ready, status, _ = line.split(None, 3) + if ( + name != 'NAME' + and not name.startswith('csi-') + and not name.startswith('rook-ceph-operator-') + and not name.startswith('rook-ceph-tools-') + ): + running = True + break + + _kubectl( + ctx, config, + ['-n', 'rook-ceph', 'delete', 'configmap', 'rook-config-override'], + check_status=False, + ) + ctx.rook[cluster_name].remote.run(args=['rm', '-f', 'cluster.yaml']) + + +@contextlib.contextmanager +def rook_toolbox(ctx, config): + cluster_name = config['cluster'] + try: + _kubectl(ctx, config, [ + 'create', + '-f', (path_to_examples(ctx, cluster_name) + 'toolbox.yaml'), + ]) + + log.info('Waiting for tools container to start') + toolbox = None + with safe_while(sleep=5, tries=100, action="wait for toolbox") as proceed: + while not toolbox and proceed(): + p = _kubectl( + ctx, config, + ['-n', 'rook-ceph', 'get', 'pods', '-l', 'app=rook-ceph-tools'], + stdout=BytesIO(), + ) + _kubectl( + ctx, config, + ['-n', 'rook-ceph', 'get', 'pods'], + stdout=BytesIO(), + ) + for line in p.stdout.getvalue().decode('utf-8').strip().splitlines(): + name, ready, status, _ = line.split(None, 3) + if status == 'Running': + toolbox = name + break + ctx.rook[cluster_name].toolbox = toolbox + yield + + except Exception as e: + log.exception(e) + raise + + finally: + _kubectl(ctx, config, [ + 'delete', + '-f', (path_to_examples(ctx, cluster_name) + 'toolbox.yaml'), + ], check_status=False) + + +@contextlib.contextmanager +def wait_for_osds(ctx, config): + cluster_name = config.get('cluster', 'ceph') + + want = ctx.rook[cluster_name].num_osds + log.info(f'Waiting for {want} OSDs') + with safe_while(sleep=10, tries=90, action="check osd count") as proceed: + while proceed(): + p = _shell(ctx, config, ['ceph', 'osd', 'stat', '-f', 'json'], + stdout=BytesIO(), + check_status=False) + if p.exitstatus == 0: + r = json.loads(p.stdout.getvalue().decode('utf-8')) + have = r.get('num_up_osds', 0) + if have == want: + break + log.info(f' have {have}/{want} OSDs') + + yield + +@contextlib.contextmanager +def ceph_config_keyring(ctx, config): + # get config and push to hosts + log.info('Distributing ceph config and client.admin keyring') + p = _shell(ctx, config, ['cat', '/etc/ceph/ceph.conf'], stdout=BytesIO()) + conf = p.stdout.getvalue() + p = _shell(ctx, config, ['cat', '/etc/ceph/keyring'], stdout=BytesIO()) + keyring = p.stdout.getvalue() + ctx.cluster.run(args=['sudo', 'mkdir', '-p', '/etc/ceph']) + for remote in ctx.cluster.remotes.keys(): + remote.write_file( + '/etc/ceph/ceph.conf', + conf, + sudo=True, + ) + remote.write_file( + '/etc/ceph/keyring', + keyring, + sudo=True, + ) + + try: + yield + + except Exception as e: + log.exception(e) + raise + + finally: + log.info('Cleaning up config and client.admin keyring') + ctx.cluster.run(args=[ + 'sudo', 'rm', '-f', + '/etc/ceph/ceph.conf', + '/etc/ceph/ceph.client.admin.keyring' + ]) + + +@contextlib.contextmanager +def ceph_clients(ctx, config): + cluster_name = config['cluster'] + + log.info('Setting up client nodes...') + clients = ctx.cluster.only(teuthology.is_type('client', cluster_name)) + for remote, roles_for_host in clients.remotes.items(): + for role in teuthology.cluster_roles_of_type(roles_for_host, 'client', + cluster_name): + name = teuthology.ceph_role(role) + client_keyring = '/etc/ceph/{0}.{1}.keyring'.format(cluster_name, + name) + r = _shell(ctx, config, + args=[ + 'ceph', 'auth', + 'get-or-create', name, + 'mon', 'allow *', + 'osd', 'allow *', + 'mds', 'allow *', + 'mgr', 'allow *', + ], + stdout=BytesIO(), + ) + keyring = r.stdout.getvalue() + remote.write_file(client_keyring, keyring, sudo=True, mode='0644') + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy rook-ceph cluster + + tasks: + - kubeadm: + - rook: + branch: wip-foo + spec: + mon: + count: 1 + + The spec item is deep-merged against the cluster.yaml. The branch, sha1, or + image items are used to determine the Ceph container image. + """ + if not config: + config = {} + assert isinstance(config, dict), \ + "task only supports a dictionary for configuration" + + log.info('Rook start') + + overrides = ctx.config.get('overrides', {}) + teuthology.deep_merge(config, overrides.get('ceph', {})) + teuthology.deep_merge(config, overrides.get('rook', {})) + log.info('Config: ' + str(config)) + + # set up cluster context + if not hasattr(ctx, 'rook'): + ctx.rook = {} + if 'cluster' not in config: + config['cluster'] = 'ceph' + cluster_name = config['cluster'] + if cluster_name not in ctx.rook: + ctx.rook[cluster_name] = argparse.Namespace() + + ctx.rook[cluster_name].remote = list(ctx.cluster.remotes.keys())[0] + + # image + teuth_defaults = teuth_config.get('defaults', {}) + cephadm_defaults = teuth_defaults.get('cephadm', {}) + containers_defaults = cephadm_defaults.get('containers', {}) + container_image_name = containers_defaults.get('image', None) + if 'image' in config: + ctx.rook[cluster_name].image = config.get('image') + else: + sha1 = config.get('sha1') + flavor = config.get('flavor', 'default') + if sha1: + if flavor == "crimson": + ctx.rook[cluster_name].image = container_image_name + ':' + sha1 + '-' + flavor + else: + ctx.rook[cluster_name].image = container_image_name + ':' + sha1 + else: + # hmm, fall back to branch? + branch = config.get('branch', 'master') + ctx.rook[cluster_name].image = container_image_name + ':' + branch + log.info('Ceph image is %s' % ctx.rook[cluster_name].image) + + with contextutil.nested( + lambda: rook_operator(ctx, config), + lambda: ceph_log(ctx, config), + lambda: rook_cluster(ctx, config), + lambda: rook_toolbox(ctx, config), + lambda: wait_for_osds(ctx, config), + lambda: ceph_config_keyring(ctx, config), + lambda: ceph_clients(ctx, config), + ): + if not hasattr(ctx, 'managers'): + ctx.managers = {} + ctx.managers[cluster_name] = CephManager( + ctx.rook[cluster_name].remote, + ctx=ctx, + logger=log.getChild('ceph_manager.' + cluster_name), + cluster=cluster_name, + rook=True, + ) + try: + if config.get('wait-for-healthy', True): + healthy(ctx=ctx, config=config) + log.info('Rook complete, yielding') + yield + + finally: + log.info('Tearing down rook') diff --git a/qa/tasks/s3a_hadoop.py b/qa/tasks/s3a_hadoop.py new file mode 100644 index 000000000..7b77359fc --- /dev/null +++ b/qa/tasks/s3a_hadoop.py @@ -0,0 +1,285 @@ +import contextlib +import logging +from teuthology import misc +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run Hadoop S3A tests using Ceph + usage: + -tasks: + ceph-ansible: + s3a-hadoop: + maven-version: '3.6.3' (default) + hadoop-version: '2.9.2' + bucket-name: 's3atest' (default) + access-key: 'anykey' (uses a default value) + secret-key: 'secretkey' ( uses a default value) + role: client.0 + """ + if config is None: + config = {} + + assert isinstance(config, dict), \ + "task only supports a dictionary for configuration" + + assert hasattr(ctx, 'rgw'), 's3a-hadoop must run after the rgw task' + + overrides = ctx.config.get('overrides', {}) + misc.deep_merge(config, overrides.get('s3a-hadoop', {})) + testdir = misc.get_testdir(ctx) + + role = config.get('role') + (remote,) = ctx.cluster.only(role).remotes.keys() + endpoint = ctx.rgw.role_endpoints.get(role) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(role) + + # get versions + maven_major = config.get('maven-major', 'maven-3') + maven_version = config.get('maven-version', '3.6.3') + hadoop_ver = config.get('hadoop-version', '2.9.2') + bucket_name = config.get('bucket-name', 's3atest') + access_key = config.get('access-key', 'EGAQRD2ULOIFKFSKCT4F') + secret_key = config.get( + 'secret-key', + 'zi816w1vZKfaSM85Cl0BxXTwSLyN7zB4RbTswrGb') + + # set versions for cloning the repo + apache_maven = 'apache-maven-{maven_version}-bin.tar.gz'.format( + maven_version=maven_version) + maven_link = 'http://archive.apache.org/dist/maven/' + \ + '{maven_major}/{maven_version}/binaries/'.format(maven_major=maven_major, maven_version=maven_version) + apache_maven + hadoop_git = 'https://github.com/apache/hadoop' + hadoop_rel = 'hadoop-{ver} rel/release-{ver}'.format(ver=hadoop_ver) + if hadoop_ver == 'trunk': + # just checkout a new branch out of trunk + hadoop_rel = 'hadoop-ceph-trunk' + install_prereq(remote) + remote.run( + args=[ + 'cd', + testdir, + run.Raw('&&'), + 'wget', + maven_link, + run.Raw('&&'), + 'tar', + '-xvf', + apache_maven, + run.Raw('&&'), + 'git', + 'clone', + run.Raw(hadoop_git), + run.Raw('&&'), + 'cd', + 'hadoop', + run.Raw('&&'), + 'git', + 'checkout', + '-b', + run.Raw(hadoop_rel) + ] + ) + configure_s3a(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir) + setup_user_bucket(remote, endpoint.dns_name, access_key, secret_key, bucket_name, testdir) + if hadoop_ver.startswith('2.8'): + # test all ITtests but skip AWS test using public bucket landsat-pds + # which is not available from within this test + test_options = '-Dit.test=ITestS3A* -Dparallel-tests -Dscale \ + -Dfs.s3a.scale.test.timeout=1200 \ + -Dfs.s3a.scale.test.huge.filesize=256M verify' + else: + test_options = 'test -Dtest=S3a*,TestS3A*' + try: + run_s3atest(remote, maven_version, testdir, test_options) + yield + finally: + log.info("Done s3a testing, Cleaning up") + for fil in ['apache*', 'hadoop*', 'venv*', 'create*']: + remote.run(args=['rm', run.Raw('-rf'), run.Raw('{tdir}/{file}'.format(tdir=testdir, file=fil))]) + + +def install_prereq(client): + """ + Install pre requisites for RHEL and CentOS + TBD: Ubuntu + """ + if client.os.name == 'rhel' or client.os.name == 'centos': + client.run( + args=[ + 'sudo', + 'yum', + 'install', + '-y', + 'protobuf-c.x86_64', + 'java', + 'java-1.8.0-openjdk-devel', + 'dnsmasq' + ] + ) + + +def setup_user_bucket(client, dns_name, access_key, secret_key, bucket_name, testdir): + """ + Create user with access_key and secret_key that will be + used for the s3a testdir + """ + client.run( + args=[ + 'sudo', + 'radosgw-admin', + 'user', + 'create', + run.Raw('--uid'), + 's3a', + run.Raw('--display-name="s3a cephtests"'), + run.Raw('--access-key={access_key}'.format(access_key=access_key)), + run.Raw('--secret-key={secret_key}'.format(secret_key=secret_key)), + run.Raw('--email=s3a@ceph.com'), + ] + ) + client.run( + args=[ + 'python3', + '-m', + 'venv', + '{testdir}/venv'.format(testdir=testdir), + run.Raw('&&'), + run.Raw('{testdir}/venv/bin/pip'.format(testdir=testdir)), + 'install', + 'boto' + ] + ) + create_bucket = """ +#!/usr/bin/env python +import boto +import boto.s3.connection +access_key = '{access_key}' +secret_key = '{secret_key}' + +conn = boto.connect_s3( + aws_access_key_id = access_key, + aws_secret_access_key = secret_key, + host = '{dns_name}', + is_secure=False, + calling_format = boto.s3.connection.OrdinaryCallingFormat(), + ) +bucket = conn.create_bucket('{bucket_name}') +for bucket in conn.get_all_buckets(): + print(bucket.name + "\t" + bucket.creation_date) +""".format(access_key=access_key, secret_key=secret_key, dns_name=dns_name, bucket_name=bucket_name) + py_bucket_file = '{testdir}/create_bucket.py'.format(testdir=testdir) + client.sudo_write_file(py_bucket_file, create_bucket, mode='0744') + client.run( + args=[ + 'cat', + '{testdir}/create_bucket.py'.format(testdir=testdir), + ] + ) + client.run( + args=[ + '{testdir}/venv/bin/python'.format(testdir=testdir), + '{testdir}/create_bucket.py'.format(testdir=testdir), + ] + ) + + +def run_s3atest(client, maven_version, testdir, test_options): + """ + Finally run the s3a test + """ + aws_testdir = '{testdir}/hadoop/hadoop-tools/hadoop-aws/'.format(testdir=testdir) + run_test = '{testdir}/apache-maven-{maven_version}/bin/mvn'.format(testdir=testdir, maven_version=maven_version) + # Remove AWS CredentialsProvider tests as it hits public bucket from AWS + # better solution is to create the public bucket on local server and test + rm_test = 'rm src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java' + client.run( + args=[ + 'cd', + run.Raw(aws_testdir), + run.Raw('&&'), + run.Raw(rm_test), + run.Raw('&&'), + run.Raw(run_test), + run.Raw(test_options) + ] + ) + + +def configure_s3a(client, dns_name, access_key, secret_key, bucket_name, testdir): + """ + Use the template to configure s3a test, Fill in access_key, secret_key + and other details required for test. + """ + config_template = """<configuration> +<property> +<name>fs.s3a.endpoint</name> +<value>{name}</value> +</property> + +<property> +<name>fs.contract.test.fs.s3a</name> +<value>s3a://{bucket_name}/</value> +</property> + +<property> +<name>fs.s3a.connection.ssl.enabled</name> +<value>false</value> +</property> + +<property> +<name>test.fs.s3n.name</name> +<value>s3n://{bucket_name}/</value> +</property> + +<property> +<name>test.fs.s3a.name</name> +<value>s3a://{bucket_name}/</value> +</property> + +<property> +<name>test.fs.s3.name</name> +<value>s3://{bucket_name}/</value> +</property> + +<property> +<name>fs.s3.awsAccessKeyId</name> +<value>{access_key}</value> +</property> + +<property> +<name>fs.s3.awsSecretAccessKey</name> +<value>{secret_key}</value> +</property> + +<property> +<name>fs.s3n.awsAccessKeyId</name> +<value>{access_key}</value> +</property> + +<property> +<name>fs.s3n.awsSecretAccessKey</name> +<value>{secret_key}</value> +</property> + +<property> +<name>fs.s3a.access.key</name> +<description>AWS access key ID. Omit for Role-based authentication.</description> +<value>{access_key}</value> +</property> + +<property> +<name>fs.s3a.secret.key</name> +<description>AWS secret key. Omit for Role-based authentication.</description> +<value>{secret_key}</value> +</property> +</configuration> +""".format(name=dns_name, bucket_name=bucket_name, access_key=access_key, secret_key=secret_key) + config_path = testdir + '/hadoop/hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml' + client.write_file(config_path, config_template) + # output for debug + client.run(args=['cat', config_path]) diff --git a/qa/tasks/s3tests.py b/qa/tasks/s3tests.py new file mode 100644 index 000000000..3856f8fad --- /dev/null +++ b/qa/tasks/s3tests.py @@ -0,0 +1,648 @@ +""" +Run a set of s3 tests on rgw. +""" +from io import BytesIO +from configobj import ConfigObj +import base64 +import contextlib +import logging +import os +import random +import string + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.orchestra import run +from teuthology.exceptions import ConfigError + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the s3 tests from the git builder. + Remove downloaded s3 file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading s3-tests...') + testdir = teuthology.get_testdir(ctx) + for (client, client_config) in config.items(): + s3tests_branch = client_config.get('force-branch', None) + if not s3tests_branch: + raise ValueError( + "Could not determine what branch to use for s3-tests. Please add 'force-branch: {s3-tests branch name}' to the .yaml config for this s3tests task.") + + log.info("Using branch '%s' for s3tests", s3tests_branch) + sha1 = client_config.get('sha1') + git_remote = client_config.get('git_remote', teuth_config.ceph_git_base_url) + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', s3tests_branch, + git_remote + 's3-tests.git', + '{tdir}/s3-tests-{client}'.format(tdir=testdir, client=client), + ], + ) + if sha1 is not None: + ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/s3-tests-{client}'.format(tdir=testdir, client=client), + run.Raw('&&'), + 'git', 'reset', '--hard', sha1, + ], + ) + try: + yield + finally: + log.info('Removing s3-tests...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/s3-tests-{client}'.format(tdir=testdir, client=client), + ], + ) + + +def _config_user(s3tests_conf, section, user): + """ + Configure users for this section by stashing away keys, ids, and + email addresses. + """ + s3tests_conf[section].setdefault('user_id', user) + s3tests_conf[section].setdefault('email', '{user}+test@test.test'.format(user=user)) + s3tests_conf[section].setdefault('display_name', 'Mr. {user}'.format(user=user)) + s3tests_conf[section].setdefault('access_key', + ''.join(random.choice(string.ascii_uppercase) for i in range(20))) + s3tests_conf[section].setdefault('secret_key', + base64.b64encode(os.urandom(40)).decode()) + s3tests_conf[section].setdefault('totp_serial', + ''.join(random.choice(string.digits) for i in range(10))) + s3tests_conf[section].setdefault('totp_seed', + base64.b32encode(os.urandom(40)).decode()) + s3tests_conf[section].setdefault('totp_seconds', '5') + + +@contextlib.contextmanager +def create_users(ctx, config): + """ + Create a main and an alternate s3 user. + """ + assert isinstance(config, dict) + log.info('Creating rgw users...') + testdir = teuthology.get_testdir(ctx) + + users = {'s3 main': 'foo', 's3 alt': 'bar', 's3 tenant': 'testx$tenanteduser', 'iam': 'foobar'} + for client in config['clients']: + s3tests_conf = config['s3tests_conf'][client] + s3tests_conf.setdefault('fixtures', {}) + s3tests_conf['fixtures'].setdefault('bucket prefix', 'test-' + client + '-{random}-') + for section, user in users.items(): + _config_user(s3tests_conf, section, '{user}.{client}'.format(user=user, client=client)) + log.debug('Creating user {user} on {host}'.format(user=s3tests_conf[section]['user_id'], host=client)) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + # create user + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--email', s3tests_conf[section]['email'], + '--caps', 'user-policy=*', + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['secret_key'], + '--cluster', cluster_name, + ], + ) + + if not ctx.dbstore_variable: + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'mfa', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--totp-serial', s3tests_conf[section]['totp_serial'], + '--totp-seed', s3tests_conf[section]['totp_seed'], + '--totp-seconds', s3tests_conf[section]['totp_seconds'], + '--totp-window', '8', + '--totp-seed-type', 'base32', + '--cluster', cluster_name, + ], + ) + + # add/configure caps for iam user + if section=='iam': + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'caps', 'add', + '--uid', s3tests_conf[section]['user_id'], + '--caps', 'roles=*', + '--cluster', cluster_name, + ], + ) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'caps', 'add', + '--uid', s3tests_conf[section]['user_id'], + '--caps', 'oidc-provider=*', + '--cluster', cluster_name, + ], + ) + + if "TOKEN" in os.environ: + s3tests_conf.setdefault('webidentity', {}) + s3tests_conf['webidentity'].setdefault('token',os.environ['TOKEN']) + s3tests_conf['webidentity'].setdefault('aud',os.environ['AUD']) + s3tests_conf['webidentity'].setdefault('sub',os.environ['SUB']) + s3tests_conf['webidentity'].setdefault('azp',os.environ['AZP']) + s3tests_conf['webidentity'].setdefault('user_token',os.environ['USER_TOKEN']) + s3tests_conf['webidentity'].setdefault('thumbprint',os.environ['THUMBPRINT']) + s3tests_conf['webidentity'].setdefault('KC_REALM',os.environ['KC_REALM']) + + try: + yield + finally: + for client in config['clients']: + for user in users.values(): + uid = '{user}.{client}'.format(user=user, client=client) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'rm', + '--uid', uid, + '--purge-data', + '--cluster', cluster_name, + ], + ) + + +@contextlib.contextmanager +def configure(ctx, config): + """ + Create the config files for s3tests an boto. + """ + assert isinstance(config, dict) + log.info('Configuring s3-tests...') + testdir = teuthology.get_testdir(ctx) + for client, properties in config['clients'].items(): + properties = properties or {} + s3tests_conf = config['s3tests_conf'][client] + s3tests_conf['DEFAULT']['calling_format'] = properties.get('calling-format', 'ordinary') + + # use rgw_server if given, or default to local client + role = properties.get('rgw_server', client) + + endpoint = ctx.rgw.role_endpoints.get(role) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(role) + + s3tests_conf['DEFAULT']['host'] = endpoint.dns_name + + website_role = properties.get('rgw_website_server') + if website_role: + website_endpoint = ctx.rgw.role_endpoints.get(website_role) + assert website_endpoint, \ + 's3tests: no rgw endpoint for rgw_website_server {}'.format(website_role) + assert website_endpoint.website_dns_name, \ + 's3tests: no dns-s3website-name for rgw_website_server {}'.format(website_role) + s3tests_conf['DEFAULT']['s3website_domain'] = website_endpoint.website_dns_name + + if hasattr(ctx, 'barbican'): + properties = properties['barbican'] + if properties is not None and 'kms_key' in properties: + if not (properties['kms_key'] in ctx.barbican.keys): + raise ConfigError('Key '+properties['kms_key']+' not defined') + + if not (properties['kms_key2'] in ctx.barbican.keys): + raise ConfigError('Key '+properties['kms_key2']+' not defined') + + key = ctx.barbican.keys[properties['kms_key']] + s3tests_conf['DEFAULT']['kms_keyid'] = key['id'] + + key = ctx.barbican.keys[properties['kms_key2']] + s3tests_conf['DEFAULT']['kms_keyid2'] = key['id'] + + elif hasattr(ctx, 'vault'): + engine_or_flavor = vars(ctx.vault).get('flavor',ctx.vault.engine) + keys=[] + for name in (x['Path'] for x in vars(ctx.vault).get('keys', {}).get(ctx.rgw.vault_role)): + keys.append(name) + + keys.extend(['testkey-1','testkey-2']) + if engine_or_flavor == "old": + keys=[keys[i] + "/1" for i in range(len(keys))] + + properties = properties.get('vault_%s' % engine_or_flavor, {}) + s3tests_conf['DEFAULT']['kms_keyid'] = properties.get('key_path', keys[0]) + s3tests_conf['DEFAULT']['kms_keyid2'] = properties.get('key_path2', keys[1]) + elif hasattr(ctx.rgw, 'pykmip_role'): + keys=[] + for name in (x['Name'] for x in ctx.pykmip.keys[ctx.rgw.pykmip_role]): + p=name.partition('-') + keys.append(p[2] if p[2] else p[0]) + keys.extend(['testkey-1', 'testkey-2']) + s3tests_conf['DEFAULT']['kms_keyid'] = properties.get('kms_key', keys[0]) + s3tests_conf['DEFAULT']['kms_keyid2'] = properties.get('kms_key2', keys[1]) + else: + # Fallback scenario where it's the local (ceph.conf) kms being tested + s3tests_conf['DEFAULT']['kms_keyid'] = 'testkey-1' + s3tests_conf['DEFAULT']['kms_keyid2'] = 'testkey-2' + + slow_backend = properties.get('slow_backend') + if slow_backend: + s3tests_conf['fixtures']['slow backend'] = slow_backend + + storage_classes = properties.get('storage classes') + if storage_classes: + s3tests_conf['s3 main']['storage_classes'] = storage_classes + + lc_debug_interval = properties.get('lc_debug_interval') + if lc_debug_interval: + s3tests_conf['s3 main']['lc_debug_interval'] = lc_debug_interval + + if ctx.rgw_cloudtier is not None: + log.info(' ctx.rgw_cloudtier config is %s ...', ctx.rgw_cloudtier.config) + client_rgw_config = ctx.rgw_cloudtier.config.get(client) + if client_rgw_config: + log.info(' ctx.rgw_cloudtier config is %s ...', client_rgw_config) + cloudtier_user = client_rgw_config.get('cloudtier_user') + cloud_client = client_rgw_config.get('cloud_client') + endpoint = ctx.rgw.role_endpoints.get(cloud_client) + s3tests_conf['s3 cloud']['host'] = endpoint.dns_name + s3tests_conf['s3 cloud']['port'] = endpoint.port + s3tests_conf['s3 cloud']['access_key'] = cloudtier_user.get('cloud_access_key') + s3tests_conf['s3 cloud']['secret_key'] = cloudtier_user.get('cloud_secret') + s3tests_conf['s3 cloud']['cloud_storage_class'] = client_rgw_config.get('cloud_storage_class') + s3tests_conf['s3 cloud']['storage_class'] = client_rgw_config.get('cloud_regular_storage_class') + s3tests_conf['s3 cloud']['retain_head_object'] = client_rgw_config.get('cloud_retain_head_object') + cloud_target_path = client_rgw_config.get('cloud_target_path') + cloud_target_storage_class = client_rgw_config.get('cloud_target_storage_class') + if (cloud_target_path != None): + s3tests_conf['s3 cloud']['target_path'] = cloud_target_path + if (cloud_target_storage_class != None): + s3tests_conf['s3 cloud']['target_storage_class'] = cloud_target_storage_class + + (remote,) = ctx.cluster.only(client).remotes.keys() + conf_fp = BytesIO() + s3tests_conf.write(conf_fp) + remote.write_file( + path='{tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), + data=conf_fp.getvalue(), + ) + + log.info('Configuring boto...') + boto_src = os.path.join(os.path.dirname(__file__), 'boto.cfg.template') + for client, properties in config['clients'].items(): + with open(boto_src) as f: + (remote,) = ctx.cluster.only(client).remotes.keys() + conf = f.read().format( + idle_timeout=config.get('idle_timeout', 30) + ) + remote.write_file('{tdir}/boto-{client}.cfg'.format(tdir=testdir, client=client), conf) + + try: + yield + + finally: + log.info('Cleaning up boto...') + for client, properties in config['clients'].items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'rm', + '{tdir}/boto-{client}.cfg'.format(tdir=testdir, client=client), + ], + ) + +def get_toxvenv_dir(ctx): + return ctx.tox.venv_path + +def toxvenv_sh(ctx, remote, args, **kwargs): + activate = get_toxvenv_dir(ctx) + '/bin/activate' + return remote.sh(['source', activate, run.Raw('&&')] + args, **kwargs) + +@contextlib.contextmanager +def run_tests(ctx, config): + """ + Run the s3tests after everything is set up. + + :param ctx: Context passed to task + :param config: specific configuration information + """ + assert isinstance(config, dict) + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.items(): + client_config = client_config or {} + (remote,) = ctx.cluster.only(client).remotes.keys() + args = [ + 'cd', '{tdir}/s3-tests-{client}'.format(tdir=testdir, client=client), run.Raw('&&'), + 'S3TEST_CONF={tdir}/archive/s3-tests.{client}.conf'.format(tdir=testdir, client=client), + 'BOTO_CONFIG={tdir}/boto-{client}.cfg'.format(tdir=testdir, client=client) + ] + # the 'requests' library comes with its own ca bundle to verify ssl + # certificates - override that to use the system's ca bundle, which + # is where the ssl task installed this certificate + if remote.os.package_type == 'deb': + args += ['REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt'] + else: + args += ['REQUESTS_CA_BUNDLE=/etc/pki/tls/certs/ca-bundle.crt'] + # civetweb > 1.8 && beast parsers are strict on rfc2616 + attrs = ["not fails_on_rgw", "not lifecycle_expiration", "not test_of_sts", "not webidentity_test"] + if client_config.get('calling-format') != 'ordinary': + attrs += ['not fails_with_subdomain'] + if not client_config.get('with-sse-s3'): + attrs += ['not sse_s3'] + + if 'extra_attrs' in client_config: + attrs = client_config.get('extra_attrs') + args += ['tox', '--', '-v', '-m', ' and '.join(attrs)] + if 'extra_args' in client_config: + args.append(client_config['extra_args']) + + toxvenv_sh(ctx, remote, args, label="s3 tests against rgw") + yield + +@contextlib.contextmanager +def scan_for_leaked_encryption_keys(ctx, config): + """ + Scan radosgw logs for the encryption keys used by s3tests to + verify that we're not leaking secrets. + + :param ctx: Context passed to task + :param config: specific configuration information + """ + assert isinstance(config, dict) + + try: + yield + finally: + # x-amz-server-side-encryption-customer-key + s3test_customer_key = 'pO3upElrwuEXSoFwCfnZPdSsmt/xWeFa0N9KgDijwVs=' + + log.debug('Scanning radosgw logs for leaked encryption keys...') + procs = list() + for client, client_config in config.items(): + if not client_config.get('scan_for_encryption_keys', True): + continue + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_cluster = '.'.join((cluster_name, daemon_type, client_id)) + (remote,) = ctx.cluster.only(client).remotes.keys() + proc = remote.run( + args=[ + 'grep', + '--binary-files=text', + s3test_customer_key, + '/var/log/ceph/rgw.{client}.log'.format(client=client_with_cluster), + ], + wait=False, + check_status=False, + ) + procs.append(proc) + + for proc in procs: + proc.wait() + if proc.returncode == 1: # 1 means no matches + continue + log.error('radosgw log is leaking encryption keys!') + raise Exception('radosgw log is leaking encryption keys') + +@contextlib.contextmanager +def task(ctx, config): + """ + Run the s3-tests suite against rgw. + + To run all tests on all clients:: + + tasks: + - ceph: + - rgw: + - s3tests: + + To restrict testing to particular clients:: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: [client.0] + + To run against a server on client.1 and increase the boto timeout to 10m:: + + tasks: + - ceph: + - rgw: [client.1] + - s3tests: + client.0: + rgw_server: client.1 + idle_timeout: 600 + + To pass extra arguments to pytest (e.g. to run a certain test):: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: + client.0: + extra_args: ['test_s3:test_object_acl_grand_public_read'] + client.1: + extra_args: ['--exclude', 'test_100_continue'] + + To run any sts-tests don't forget to set a config variable named 'sts_tests' to 'True' as follows:: + + tasks: + - ceph: + - rgw: [client.0] + - s3tests: + client.0: + sts_tests: True + rgw_server: client.0 + + To run any cloud-transition tests don't forget to set a config variable named 'cloudtier_tests' to 'True' as follows:: + + tasks: + - ceph: + - rgw: [client.0 client.1] + - s3tests: + client.0: + cloudtier_tests: True + rgw_server: client.0 + + """ + assert hasattr(ctx, 'rgw'), 's3tests must run after the rgw task' + assert hasattr(ctx, 'tox'), 's3tests must run after the tox task' + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task s3tests only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('s3tests', {})) + + log.debug('s3tests config is %s', config) + + s3tests_conf = {} + + for client, client_config in config.items(): + if 'sts_tests' in client_config: + ctx.sts_variable = True + else: + ctx.sts_variable = False + + if 'cloudtier_tests' in client_config: + ctx.cloudtier_variable = True + else: + ctx.cloudtier_variable = False + + if 'dbstore_tests' in client_config: + ctx.dbstore_variable = True + else: + ctx.dbstore_variable = False + + #This will be the structure of config file when you want to run webidentity_test (sts-test) + if ctx.sts_variable and "TOKEN" in os.environ: + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + 'api_name' : 'default', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + 's3 tenant' : {}, + 'iam' : {}, + 'webidentity': {}, + } + ) + + elif ctx.sts_variable: + #This will be the structure of config file when you want to run assume_role_test and get_session_token_test (sts-test) or iam-tests + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + 'api_name' : 'default', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + 'iam' : {}, + 's3 tenant' : {}, + } + ) + + elif ctx.cloudtier_variable: + #This will be the structure of config file when you want to run normal s3-tests + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + 'api_name' : 'default', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + 's3 tenant' : {}, + 's3 cloud' : {}, + 'iam' : {}, + } + ) + else: + #This will be the structure of config file when you want to run normal s3-tests + for client in clients: + endpoint = ctx.rgw.role_endpoints.get(client) + assert endpoint, 's3tests: no rgw endpoint for {}'.format(client) + + s3tests_conf[client] = ConfigObj( + indent_type='', + infile={ + 'DEFAULT': + { + 'port' : endpoint.port, + 'is_secure' : endpoint.cert is not None, + 'api_name' : 'default', + }, + 'fixtures' : {}, + 's3 main' : {}, + 's3 alt' : {}, + 's3 tenant' : {}, + 'iam' : {}, + } + ) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + s3tests_conf=s3tests_conf, + )), + lambda: configure(ctx=ctx, config=dict( + clients=config, + s3tests_conf=s3tests_conf, + )), + lambda: run_tests(ctx=ctx, config=config), + lambda: scan_for_leaked_encryption_keys(ctx=ctx, config=config), + ): + pass + yield diff --git a/qa/tasks/s3tests_java.py b/qa/tasks/s3tests_java.py new file mode 100644 index 000000000..dbe03921c --- /dev/null +++ b/qa/tasks/s3tests_java.py @@ -0,0 +1,402 @@ +""" +Task for running RGW S3 tests with the AWS Java SDK +""" +from io import BytesIO +import logging + +import base64 +import os +import random +import string +import yaml +import getpass + +from teuthology import misc as teuthology +from teuthology.task import Task +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +""" + Task for running RGW S3 tests with the AWS Java SDK + + Tests run only on clients specified in the s3tests-java config section. + If no client is given a default 'client.0' is chosen. + If such does not match the rgw client the task will fail. + + tasks: + - ceph: + - rgw: [client.0] + - s3tests-java: + client.0: + + Extra arguments can be passed by adding options to the corresponding client + section under the s3tests-java task (e.g. to run a certain test, + specify a different repository and branch for the test suite, + run in info/debug mode (for the java suite) or forward the gradle output to a log file): + + tasks: + - ceph: + - rgw: [client.0] + - s3tests-java: + client.0: + force-branch: wip + force-repo: 'https://github.com/adamyanova/java_s3tests.git' + log-fwd: '../s3tests-java.log' + log-level: info + extra-args: ['--tests', 'ObjectTest.testEncryptionKeySSECInvalidMd5'] + + To run a specific test, provide its name to the extra-args section e.g.: + - s3tests-java: + client.0: + extra-args: ['--tests', 'ObjectTest.testEncryptionKeySSECInvalidMd5'] + +""" + + +class S3tests_java(Task): + """ + Download and install S3 tests in Java + This will require openjdk and gradle + """ + + def __init__(self, ctx, config): + super(S3tests_java, self).__init__(ctx, config) + self.log = log + log.debug('S3 Tests Java: __INIT__ ') + assert hasattr(ctx, 'rgw'), 'S3tests_java must run after the rgw task' + clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(self.ctx.cluster, 'client')] + self.all_clients = [] + for client in clients: + if client in self.config: + self.all_clients.extend([client]) + if self.all_clients is None: + self.all_clients = 'client.0' + self.users = {'s3main': 'tester', + 's3alt': 'johndoe', 'tenanted': 'testx$tenanteduser'} + + def setup(self): + super(S3tests_java, self).setup() + log.debug('S3 Tests Java: SETUP') + for client in self.all_clients: + self.download_test_suite(client) + self.install_required_packages(client) + + def begin(self): + super(S3tests_java, self).begin() + log.debug('S3 Tests Java: BEGIN') + for (host, roles) in self.ctx.cluster.remotes.items(): + log.debug( + 'S3 Tests Java: Cluster config is: {cfg}'.format(cfg=roles)) + log.debug('S3 Tests Java: Host is: {host}'.format(host=host)) + self.create_users() + self.run_tests() + + def end(self): + super(S3tests_java, self).end() + log.debug('S3 Tests Java: END') + for client in self.all_clients: + self.remove_tests(client) + self.delete_users(client) + + def download_test_suite(self, client): + log.info("S3 Tests Java: Downloading test suite...") + testdir = teuthology.get_testdir(self.ctx) + branch = 'master' + repo = 'https://github.com/ceph/java_s3tests.git' + if client in self.config and self.config[client] is not None: + if 'force-branch' in self.config[client] and self.config[client]['force-branch'] is not None: + branch = self.config[client]['force-branch'] + if 'force-repo' in self.config[client] and self.config[client]['force-repo'] is not None: + repo = self.config[client]['force-repo'] + self.ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', branch, + repo, + '{tdir}/s3-tests-java'.format(tdir=testdir), + ], + stdout=BytesIO() + ) + if client in self.config and self.config[client] is not None: + if 'sha1' in self.config[client] and self.config[client]['sha1'] is not None: + self.ctx.cluster.only(client).run( + args=[ + 'cd', '{tdir}/s3-tests-java'.format(tdir=testdir), + run.Raw('&&'), + 'git', 'reset', '--hard', self.config[client]['sha1'], + ], + ) + + if 'log-level' in self.config[client]: + if self.config[client]['log-level'] == 'info': + self.ctx.cluster.only(client).run( + args=[ + 'sed', '-i', '\'s/log4j.rootLogger=WARN/log4j.rootLogger=INFO/g\'', + '{tdir}/s3-tests-java/src/main/resources/log4j.properties'.format( + tdir=testdir) + ] + ) + if self.config[client]['log-level'] == 'debug': + self.ctx.cluster.only(client).run( + args=[ + 'sed', '-i', '\'s/log4j.rootLogger=WARN/log4j.rootLogger=DEBUG/g\'', + '{tdir}/s3-tests-java/src/main/resources/log4j.properties'.format( + tdir=testdir) + ] + ) + + def install_required_packages(self, client): + """ + Run bootstrap script to install openjdk and gradle. + Add certificates to java keystore + """ + log.info("S3 Tests Java: Installing required packages...") + testdir = teuthology.get_testdir(self.ctx) + self.ctx.cluster.only(client).run( + args=['{tdir}/s3-tests-java/bootstrap.sh'.format(tdir=testdir)], + stdout=BytesIO() + ) + + endpoint = self.ctx.rgw.role_endpoints[client] + if endpoint.cert: + path = 'lib/security/cacerts' + self.ctx.cluster.only(client).run( + args=['sudo', + 'keytool', + '-import', '-alias', '{alias}'.format( + alias=endpoint.hostname), + '-keystore', + run.Raw( + '$(readlink -e $(dirname $(readlink -e $(which keytool)))/../{path})'.format(path=path)), + '-file', endpoint.cert.certificate, + '-storepass', 'changeit', + ], + stdout=BytesIO() + ) + + def create_users(self): + """ + Create a main and an alternative s3 user. + Configuration is read from a skelethon config file + s3tests.teuth.config.yaml in the java-s3tests repository + and missing information is added from the task. + Existing values are NOT overriden unless they are empty! + """ + log.info("S3 Tests Java: Creating S3 users...") + testdir = teuthology.get_testdir(self.ctx) + for client in self.all_clients: + endpoint = self.ctx.rgw.role_endpoints.get(client) + local_user = getpass.getuser() + remote_user = teuthology.get_test_user() + os.system("scp {remote}@{host}:{tdir}/s3-tests-java/s3tests.teuth.config.yaml /home/{local}/".format( + host=endpoint.hostname, tdir=testdir, remote=remote_user, local=local_user)) + s3tests_conf = teuthology.config_file( + '/home/{local}/s3tests.teuth.config.yaml'.format(local=local_user)) + log.debug("S3 Tests Java: s3tests_conf is {s3cfg}".format( + s3cfg=s3tests_conf)) + for section, user in list(self.users.items()): + if section in s3tests_conf: + s3_user_id = '{user}.{client}'.format( + user=user, client=client) + log.debug( + 'S3 Tests Java: Creating user {s3_user_id}'.format(s3_user_id=s3_user_id)) + self._config_user(s3tests_conf=s3tests_conf, + section=section, user=s3_user_id, client=client) + cluster_name, daemon_type, client_id = teuthology.split_role( + client) + client_with_id = daemon_type + '.' + client_id + args = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client_with_id, + 'user', 'create', + '--uid', s3tests_conf[section]['user_id'], + '--display-name', s3tests_conf[section]['display_name'], + '--access-key', s3tests_conf[section]['access_key'], + '--secret', s3tests_conf[section]['access_secret'], + '--email', s3tests_conf[section]['email'], + '--cluster', cluster_name, + ] + log.info('{args}'.format(args=args)) + self.ctx.cluster.only(client).run( + args=args, + stdout=BytesIO() + ) + else: + self.users.pop(section) + self._write_cfg_file(s3tests_conf, client) + os.system( + "rm -rf /home/{local}/s3tests.teuth.config.yaml".format(local=local_user)) + + def _config_user(self, s3tests_conf, section, user, client): + """ + Generate missing users data for this section by stashing away keys, ids, and + email addresses. + """ + access_key = ''.join(random.choice(string.ascii_uppercase) + for i in range(20)) + access_secret = base64.b64encode(os.urandom(40)).decode('ascii') + endpoint = self.ctx.rgw.role_endpoints.get(client) + + self._set_cfg_entry( + s3tests_conf[section], 'user_id', '{user}'.format(user=user)) + self._set_cfg_entry( + s3tests_conf[section], 'email', '{user}_test@test.test'.format(user=user)) + self._set_cfg_entry( + s3tests_conf[section], 'display_name', 'Ms. {user}'.format(user=user)) + self._set_cfg_entry( + s3tests_conf[section], 'access_key', '{ak}'.format(ak=access_key)) + self._set_cfg_entry( + s3tests_conf[section], 'access_secret', '{asc}'.format(asc=access_secret)) + self._set_cfg_entry( + s3tests_conf[section], 'region', 'us-east-1') + self._set_cfg_entry( + s3tests_conf[section], 'endpoint', '{ip}:{port}'.format( + ip=endpoint.hostname, port=endpoint.port)) + self._set_cfg_entry( + s3tests_conf[section], 'host', endpoint.hostname) + self._set_cfg_entry( + s3tests_conf[section], 'port', endpoint.port) + self._set_cfg_entry( + s3tests_conf[section], 'is_secure', True if endpoint.cert else False) + + log.debug("S3 Tests Java: s3tests_conf[{sect}] is {s3cfg}".format( + sect=section, s3cfg=s3tests_conf[section])) + log.debug('S3 Tests Java: Setion, User = {sect}, {user}'.format( + sect=section, user=user)) + + def _write_cfg_file(self, cfg_dict, client): + """ + Write s3 tests java config file on the remote node. + """ + testdir = teuthology.get_testdir(self.ctx) + (remote,) = self.ctx.cluster.only(client).remotes.keys() + data = yaml.safe_dump(cfg_dict, default_flow_style=False) + path = testdir + '/archive/s3-tests-java.' + client + '.conf' + remote.write_file(path, data) + + def _set_cfg_entry(self, cfg_dict, key, value): + if not (key in cfg_dict): + cfg_dict.setdefault(key, value) + elif cfg_dict[key] is None: + cfg_dict[key] = value + + def run_tests(self): + log.info("S3 Tests Java: Running tests...") + testdir = teuthology.get_testdir(self.ctx) + for client in self.all_clients: + self.ctx.cluster.only(client).run( + args=['cp', + '{tdir}/archive/s3-tests-java.{client}.conf'.format( + tdir=testdir, client=client), + '{tdir}/s3-tests-java/config.properties'.format( + tdir=testdir) + ], + stdout=BytesIO() + ) + args = ['cd', + '{tdir}/s3-tests-java'.format(tdir=testdir), + run.Raw('&&'), + '/opt/gradle/gradle/bin/gradle', 'clean', 'test', + '--rerun-tasks', '--no-build-cache', + ] + extra_args = [] + suppress_groups = False + self.log_fwd = False + self.log_name = '' + if client in self.config and self.config[client] is not None: + if 'extra-args' in self.config[client]: + extra_args.extend(self.config[client]['extra-args']) + suppress_groups = True + if 'log-level' in self.config[client] and self.config[client]['log-level'] == 'debug': + extra_args += ['--debug'] + if 'log-fwd' in self.config[client]: + self.log_fwd = True + self.log_name = '{tdir}/s3tests_log.txt'.format( + tdir=testdir) + if self.config[client]['log-fwd'] is not None: + self.log_name = self.config[client]['log-fwd'] + extra_args += [run.Raw('>>'), + self.log_name] + + if not suppress_groups: + test_groups = ['AWS4Test', 'BucketTest', 'ObjectTest'] + else: + test_groups = ['All'] + + for gr in test_groups: + for i in range(2): + self.ctx.cluster.only(client).run( + args=['radosgw-admin', 'gc', + 'process', '--include-all'], + stdout=BytesIO() + ) + + if gr != 'All': + self.ctx.cluster.only(client).run( + args=args + ['--tests'] + [gr] + extra_args, + stdout=BytesIO() + ) + else: + self.ctx.cluster.only(client).run( + args=args + extra_args, + stdout=BytesIO() + ) + + for i in range(2): + self.ctx.cluster.only(client).run( + args=['radosgw-admin', 'gc', + 'process', '--include-all'], + stdout=BytesIO() + ) + + def remove_tests(self, client): + log.info('S3 Tests Java: Cleaning up s3-tests-java...') + testdir = teuthology.get_testdir(self.ctx) + + if self.log_fwd: + self.ctx.cluster.only(client).run( + args=['cd', + '{tdir}/s3-tests-java'.format(tdir=testdir), + run.Raw('&&'), + 'cat', self.log_name, + run.Raw('&&'), + 'rm', self.log_name], + stdout=BytesIO() + ) + + self.ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/s3-tests-java'.format(tdir=testdir), + ], + stdout=BytesIO() + ) + + def delete_users(self, client): + log.info("S3 Tests Java: Deleting S3 users...") + testdir = teuthology.get_testdir(self.ctx) + for section, user in self.users.items(): + s3_user_id = '{user}.{client}'.format(user=user, client=client) + self.ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + 'user', 'rm', + '--uid', s3_user_id, + '--purge-data', + '--cluster', 'ceph', + ], + stdout=BytesIO() + ) + + +task = S3tests_java diff --git a/qa/tasks/samba.py b/qa/tasks/samba.py new file mode 100644 index 000000000..bcc247697 --- /dev/null +++ b/qa/tasks/samba.py @@ -0,0 +1,244 @@ +""" +Samba +""" +import contextlib +import logging +import time + +from teuthology import misc as teuthology +from teuthology.orchestra import run +from teuthology.orchestra.daemon import DaemonGroup + +log = logging.getLogger(__name__) + + +def get_sambas(ctx, roles): + """ + Scan for roles that are samba. Yield the id of the the samba role + (samba.0, samba.1...) and the associated remote site + + :param ctx: Context + :param roles: roles for this test (extracted from yaml files) + """ + for role in roles: + assert isinstance(role, str) + PREFIX = 'samba.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.keys() + yield (id_, remote) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Setup samba smbd with ceph vfs module. This task assumes the samba + package has already been installed via the install task. + + The config is optional and defaults to starting samba on all nodes. + If a config is given, it is expected to be a list of + samba nodes to start smbd servers on. + + Example that starts smbd on all samba nodes:: + + tasks: + - install: + - install: + project: samba + extra_packages: ['samba'] + - ceph: + - samba: + - interactive: + + Example that starts smbd on just one of the samba nodes and cifs on the other:: + + tasks: + - samba: [samba.0] + - cifs: [samba.1] + + An optional backend can be specified, and requires a path which smbd will + use as the backend storage location: + + roles: + - [osd.0, osd.1, osd.2, mon.0, mon.1, mon.2, mds.a] + - [client.0, samba.0] + + tasks: + - ceph: + - ceph-fuse: [client.0] + - samba: + samba.0: + cephfuse: "{testdir}/mnt.0" + + This mounts ceph to {testdir}/mnt.0 using fuse, and starts smbd with + a UNC of //localhost/cephfuse. Access through that UNC will be on + the ceph fuse mount point. + + If no arguments are specified in the samba + role, the default behavior is to enable the ceph UNC //localhost/ceph + and use the ceph vfs module as the smbd backend. + + :param ctx: Context + :param config: Configuration + """ + log.info("Setting up smbd with ceph vfs...") + assert config is None or isinstance(config, list) or isinstance(config, dict), \ + "task samba got invalid config" + + if config is None: + config = dict(('samba.{id}'.format(id=id_), None) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'samba')) + elif isinstance(config, list): + config = dict((name, None) for name in config) + + samba_servers = list(get_sambas(ctx=ctx, roles=config.keys())) + + testdir = teuthology.get_testdir(ctx) + + if not hasattr(ctx, 'daemons'): + ctx.daemons = DaemonGroup() + + for id_, remote in samba_servers: + + rolestr = "samba.{id_}".format(id_=id_) + + confextras = """vfs objects = ceph + ceph:config_file = /etc/ceph/ceph.conf""" + + unc = "ceph" + backend = "/" + + if config[rolestr] is not None: + # verify that there's just one parameter in role + if len(config[rolestr]) != 1: + log.error("samba config for role samba.{id_} must have only one parameter".format(id_=id_)) + raise Exception('invalid config') + confextras = "" + (unc, backendstr) = config[rolestr].items()[0] + backend = backendstr.format(testdir=testdir) + + # on first samba role, set ownership and permissions of ceph root + # so that samba tests succeed + if config[rolestr] is None and id_ == samba_servers[0][0]: + remote.run( + args=[ + 'mkdir', '-p', '/tmp/cmnt', run.Raw('&&'), + 'sudo', 'ceph-fuse', '/tmp/cmnt', run.Raw('&&'), + 'sudo', 'chown', 'ubuntu:ubuntu', '/tmp/cmnt/', run.Raw('&&'), + 'sudo', 'chmod', '1777', '/tmp/cmnt/', run.Raw('&&'), + 'sudo', 'umount', '/tmp/cmnt/', run.Raw('&&'), + 'rm', '-rf', '/tmp/cmnt', + ], + ) + else: + remote.run( + args=[ + 'sudo', 'chown', 'ubuntu:ubuntu', backend, run.Raw('&&'), + 'sudo', 'chmod', '1777', backend, + ], + ) + + remote.sudo_write_file("/usr/local/samba/etc/smb.conf", """ +[global] + workgroup = WORKGROUP + netbios name = DOMAIN + +[{unc}] + path = {backend} + {extras} + writeable = yes + valid users = ubuntu +""".format(extras=confextras, unc=unc, backend=backend)) + + # create ubuntu user + remote.run( + args=[ + 'sudo', '/usr/local/samba/bin/smbpasswd', '-e', 'ubuntu', + run.Raw('||'), + 'printf', run.Raw('"ubuntu\nubuntu\n"'), + run.Raw('|'), + 'sudo', '/usr/local/samba/bin/smbpasswd', '-s', '-a', 'ubuntu' + ]) + + smbd_cmd = [ + 'sudo', + 'daemon-helper', + 'term', + 'nostdin', + '/usr/local/samba/sbin/smbd', + '-F', + ] + ctx.daemons.add_daemon(remote, 'smbd', id_, + args=smbd_cmd, + logger=log.getChild("smbd.{id_}".format(id_=id_)), + stdin=run.PIPE, + wait=False, + ) + + # let smbd initialize, probably a better way... + seconds_to_sleep = 100 + log.info('Sleeping for %s seconds...' % seconds_to_sleep) + time.sleep(seconds_to_sleep) + log.info('Sleeping stopped...') + + try: + yield + finally: + log.info('Stopping smbd processes...') + exc = None + for d in ctx.daemons.iter_daemons_of_role('smbd'): + try: + d.stop() + except (run.CommandFailedError, + run.CommandCrashedError, + run.ConnectionLostError) as e: + exc = e + log.exception('Saw exception from %s.%s', d.role, d.id_) + if exc is not None: + raise exc + + for id_, remote in samba_servers: + remote.run( + args=[ + 'sudo', + 'rm', '-rf', + '/usr/local/samba/etc/smb.conf', + '/usr/local/samba/private/*', + '/usr/local/samba/var/run/', + '/usr/local/samba/var/locks', + '/usr/local/samba/var/lock', + ], + ) + # make sure daemons are gone + try: + remote.run( + args=[ + 'while', + 'sudo', 'killall', '-9', 'smbd', + run.Raw(';'), + 'do', 'sleep', '1', + run.Raw(';'), + 'done', + ], + ) + + remote.run( + args=[ + 'sudo', + 'lsof', + backend, + ], + check_status=False + ) + remote.run( + args=[ + 'sudo', + 'fuser', + '-M', + backend, + ], + check_status=False + ) + except Exception: + log.exception("Saw exception") + pass diff --git a/qa/tasks/scrub.py b/qa/tasks/scrub.py new file mode 100644 index 000000000..ddc1a9164 --- /dev/null +++ b/qa/tasks/scrub.py @@ -0,0 +1,117 @@ +""" +Scrub osds +""" +import contextlib +import gevent +import logging +import random +import time + +from tasks import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + Run scrub periodically. Randomly chooses an OSD to scrub. + + The config should be as follows: + + scrub: + frequency: <seconds between scrubs> + deep: <bool for deepness> + + example: + + tasks: + - ceph: + - scrub: + frequency: 30 + deep: 0 + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'scrub task only accepts a dict for configuration' + + log.info('Beginning scrub...') + + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + while len(manager.get_osd_status()['up']) < num_osds: + time.sleep(10) + + scrub_proc = Scrubber( + manager, + config, + ) + try: + yield + finally: + log.info('joining scrub') + scrub_proc.do_join() + +class Scrubber: + """ + Scrubbing is actually performed during initialization + """ + def __init__(self, manager, config): + """ + Spawn scrubbing thread upon completion. + """ + self.ceph_manager = manager + self.ceph_manager.wait_for_clean() + + osd_status = self.ceph_manager.get_osd_status() + self.osds = osd_status['up'] + + self.config = config + if self.config is None: + self.config = dict() + + else: + def tmp(x): + """Local display""" + print(x) + self.log = tmp + + self.stopping = False + + log.info("spawning thread") + + self.thread = gevent.spawn(self.do_scrub) + + def do_join(self): + """Scrubbing thread finished""" + self.stopping = True + self.thread.get() + + def do_scrub(self): + """Perform the scrub operation""" + frequency = self.config.get("frequency", 30) + deep = self.config.get("deep", 0) + + log.info("stopping %s" % self.stopping) + + while not self.stopping: + osd = str(random.choice(self.osds)) + + if deep: + cmd = 'deep-scrub' + else: + cmd = 'scrub' + + log.info('%sbing %s' % (cmd, osd)) + self.ceph_manager.raw_cluster_cmd('osd', cmd, osd) + + time.sleep(frequency) diff --git a/qa/tasks/scrub_test.py b/qa/tasks/scrub_test.py new file mode 100644 index 000000000..edf106952 --- /dev/null +++ b/qa/tasks/scrub_test.py @@ -0,0 +1,413 @@ +"""Scrub testing""" + +import contextlib +import json +import logging +import os +import time +import tempfile + +from tasks import ceph_manager +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + + +def wait_for_victim_pg(manager, poolid): + """Return a PG with some data and its acting set""" + # wait for some PG to have data that we can mess with + victim = None + while victim is None: + stats = manager.get_pg_stats() + for pg in stats: + pgid = str(pg['pgid']) + pgpool = int(pgid.split('.')[0]) + if poolid != pgpool: + continue + size = pg['stat_sum']['num_bytes'] + if size > 0: + victim = pg['pgid'] + acting = pg['acting'] + return victim, acting + time.sleep(3) + + +def find_victim_object(ctx, pg, osd): + """Return a file to be fuzzed""" + (osd_remote,) = ctx.cluster.only('osd.%d' % osd).remotes.keys() + data_path = os.path.join( + '/var/lib/ceph/osd', + 'ceph-{id}'.format(id=osd), + 'fuse', + '{pg}_head'.format(pg=pg), + 'all', + ) + + # fuzz time + ls_out = osd_remote.sh('sudo ls %s' % data_path) + + # find an object file we can mess with (and not the pg info object) + osdfilename = next(line for line in ls_out.split('\n') + if not line.endswith('::::head#')) + assert osdfilename is not None + + # Get actual object name from osd stored filename + objname = osdfilename.split(':')[4] + return osd_remote, os.path.join(data_path, osdfilename), objname + + +def corrupt_file(osd_remote, path): + # put a single \0 at the beginning of the file + osd_remote.run( + args=['sudo', 'dd', + 'if=/dev/zero', + 'of=%s/data' % path, + 'bs=1', 'count=1', 'conv=notrunc'] + ) + + +def get_pgnum(pgid): + pos = pgid.find('.') + assert pos != -1 + return pgid[pos+1:] + + +def deep_scrub(manager, victim, pool): + # scrub, verify inconsistent + pgnum = get_pgnum(victim) + manager.do_pg_scrub(pool, pgnum, 'deep-scrub') + + stats = manager.get_single_pg_stats(victim) + inconsistent = stats['state'].find('+inconsistent') != -1 + assert inconsistent + + +def repair(manager, victim, pool): + # repair, verify no longer inconsistent + pgnum = get_pgnum(victim) + manager.do_pg_scrub(pool, pgnum, 'repair') + + stats = manager.get_single_pg_stats(victim) + inconsistent = stats['state'].find('+inconsistent') != -1 + assert not inconsistent + + +def test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, pool): + corrupt_file(osd_remote, obj_path) + deep_scrub(manager, pg, pool) + repair(manager, pg, pool) + + +def test_repair_bad_omap(ctx, manager, pg, osd, objname): + # Test deep-scrub with various omap modifications + # Modify omap on specific osd + log.info('fuzzing omap of %s' % objname) + manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'key']) + manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, + 'badkey', 'badval']) + manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'badhdr']) + + deep_scrub(manager, pg, 'rbd') + # please note, the repair here is errnomous, it rewrites the correct omap + # digest and data digest on the replicas with the corresponding digests + # from the primary osd which is hosting the victim object, see + # find_victim_object(). + # so we need to either put this test and the end of this task or + # undo the mess-up manually before the "repair()" that just ensures + # the cleanup is sane, otherwise the succeeding tests will fail. if they + # try set "badkey" in hope to get an "inconsistent" pg with a deep-scrub. + manager.osd_admin_socket(osd, ['setomapheader', 'rbd', objname, 'hdr']) + manager.osd_admin_socket(osd, ['rmomapkey', 'rbd', objname, 'badkey']) + manager.osd_admin_socket(osd, ['setomapval', 'rbd', objname, + 'key', 'val']) + repair(manager, pg, 'rbd') + + +class MessUp: + def __init__(self, manager, osd_remote, pool, osd_id, + obj_name, obj_path, omap_key, omap_val): + self.manager = manager + self.osd = osd_remote + self.pool = pool + self.osd_id = osd_id + self.obj = obj_name + self.path = obj_path + self.omap_key = omap_key + self.omap_val = omap_val + + @contextlib.contextmanager + def _test_with_file(self, messup_cmd, *checks): + temp = tempfile.mktemp() + backup_cmd = ['sudo', 'cp', os.path.join(self.path, 'data'), temp] + self.osd.run(args=backup_cmd) + self.osd.run(args=messup_cmd.split()) + yield checks + create_cmd = ['sudo', 'mkdir', self.path] + self.osd.run(args=create_cmd, check_status=False) + restore_cmd = ['sudo', 'cp', temp, os.path.join(self.path, 'data')] + self.osd.run(args=restore_cmd) + + def remove(self): + cmd = 'sudo rmdir {path}'.format(path=self.path) + return self._test_with_file(cmd, 'missing') + + def append(self): + cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \ + 'conv=notrunc oflag=append'.format(path=self.path) + return self._test_with_file(cmd, + 'data_digest_mismatch', + 'size_mismatch') + + def truncate(self): + cmd = 'sudo dd if=/dev/null of={path}/data'.format(path=self.path) + return self._test_with_file(cmd, + 'data_digest_mismatch', + 'size_mismatch') + + def change_obj(self): + cmd = 'sudo dd if=/dev/zero of={path}/data bs=1 count=1 ' \ + 'conv=notrunc'.format(path=self.path) + return self._test_with_file(cmd, + 'data_digest_mismatch') + + @contextlib.contextmanager + def rm_omap(self): + cmd = ['rmomapkey', self.pool, self.obj, self.omap_key] + self.manager.osd_admin_socket(self.osd_id, cmd) + yield ('omap_digest_mismatch',) + cmd = ['setomapval', self.pool, self.obj, + self.omap_key, self.omap_val] + self.manager.osd_admin_socket(self.osd_id, cmd) + + @contextlib.contextmanager + def add_omap(self): + cmd = ['setomapval', self.pool, self.obj, 'badkey', 'badval'] + self.manager.osd_admin_socket(self.osd_id, cmd) + yield ('omap_digest_mismatch',) + cmd = ['rmomapkey', self.pool, self.obj, 'badkey'] + self.manager.osd_admin_socket(self.osd_id, cmd) + + @contextlib.contextmanager + def change_omap(self): + cmd = ['setomapval', self.pool, self.obj, self.omap_key, 'badval'] + self.manager.osd_admin_socket(self.osd_id, cmd) + yield ('omap_digest_mismatch',) + cmd = ['setomapval', self.pool, self.obj, self.omap_key, self.omap_val] + self.manager.osd_admin_socket(self.osd_id, cmd) + + +class InconsistentObjChecker: + """Check the returned inconsistents/inconsistent info""" + + def __init__(self, osd, acting, obj_name): + self.osd = osd + self.acting = acting + self.obj = obj_name + assert self.osd in self.acting + + def basic_checks(self, inc): + assert inc['object']['name'] == self.obj + assert inc['object']['snap'] == "head" + assert len(inc['shards']) == len(self.acting), \ + "the number of returned shard does not match with the acting set" + + def run(self, check, inc): + func = getattr(self, check) + func(inc) + + def _check_errors(self, inc, err_name): + bad_found = False + good_found = False + for shard in inc['shards']: + log.info('shard = %r' % shard) + log.info('err = %s' % err_name) + assert 'osd' in shard + osd = shard['osd'] + err = err_name in shard['errors'] + if osd == self.osd: + assert bad_found is False, \ + "multiple entries found for the given OSD" + assert err is True, \ + "Didn't find '{err}' in errors".format(err=err_name) + bad_found = True + else: + assert osd in self.acting, "shard not in acting set" + assert err is False, \ + "Expected '{err}' in errors".format(err=err_name) + good_found = True + assert bad_found is True, \ + "Shard for osd.{osd} not found".format(osd=self.osd) + assert good_found is True, \ + "No other acting shards found" + + def _check_attrs(self, inc, attr_name): + bad_attr = None + good_attr = None + for shard in inc['shards']: + log.info('shard = %r' % shard) + log.info('attr = %s' % attr_name) + assert 'osd' in shard + osd = shard['osd'] + attr = shard.get(attr_name, False) + if osd == self.osd: + assert bad_attr is None, \ + "multiple entries found for the given OSD" + bad_attr = attr + else: + assert osd in self.acting, "shard not in acting set" + assert good_attr is None or good_attr == attr, \ + "multiple good attrs found" + good_attr = attr + assert bad_attr is not None, \ + "bad {attr} not found".format(attr=attr_name) + assert good_attr is not None, \ + "good {attr} not found".format(attr=attr_name) + assert good_attr != bad_attr, \ + "bad attr is identical to the good ones: " \ + "{0} == {1}".format(good_attr, bad_attr) + + def data_digest_mismatch(self, inc): + assert 'data_digest_mismatch' in inc['errors'] + self._check_attrs(inc, 'data_digest') + + def missing(self, inc): + assert 'missing' in inc['union_shard_errors'] + self._check_errors(inc, 'missing') + + def size_mismatch(self, inc): + assert 'size_mismatch' in inc['errors'] + self._check_attrs(inc, 'size') + + def omap_digest_mismatch(self, inc): + assert 'omap_digest_mismatch' in inc['errors'] + self._check_attrs(inc, 'omap_digest') + + +def test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd_id, + obj_name, obj_path): + mon = manager.controller + pool = 'rbd' + omap_key = 'key' + omap_val = 'val' + manager.do_rados(['setomapval', obj_name, omap_key, omap_val], pool=pool) + # Update missing digests, requires "osd deep scrub update digest min age: 0" + pgnum = get_pgnum(pg) + manager.do_pg_scrub(pool, pgnum, 'deep-scrub') + + messup = MessUp(manager, osd_remote, pool, osd_id, obj_name, obj_path, + omap_key, omap_val) + for test in [messup.rm_omap, messup.add_omap, messup.change_omap, + messup.append, messup.truncate, messup.change_obj, + messup.remove]: + with test() as checks: + deep_scrub(manager, pg, pool) + cmd = 'rados list-inconsistent-pg {pool} ' \ + '--format=json'.format(pool=pool) + pgs = json.loads(mon.sh(cmd)) + assert pgs == [pg] + + cmd = 'rados list-inconsistent-obj {pg} ' \ + '--format=json'.format(pg=pg) + objs = json.loads(mon.sh(cmd)) + assert len(objs['inconsistents']) == 1 + + checker = InconsistentObjChecker(osd_id, acting, obj_name) + inc_obj = objs['inconsistents'][0] + log.info('inc = %r', inc_obj) + checker.basic_checks(inc_obj) + for check in checks: + checker.run(check, inc_obj) + + +def task(ctx, config): + """ + Test [deep] scrub + + tasks: + - chef: + - install: + - ceph: + log-ignorelist: + - '!= data_digest' + - '!= omap_digest' + - '!= size' + - deep-scrub 0 missing, 1 inconsistent objects + - deep-scrub [0-9]+ errors + - repair 0 missing, 1 inconsistent objects + - repair [0-9]+ errors, [0-9]+ fixed + - shard [0-9]+ .* : missing + - deep-scrub 1 missing, 1 inconsistent objects + - does not match object info size + - attr name mistmatch + - deep-scrub 1 missing, 0 inconsistent objects + - failed to pick suitable auth object + - candidate size [0-9]+ info size [0-9]+ mismatch + conf: + osd: + osd deep scrub update digest min age: 0 + - scrub_test: + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'scrub_test task only accepts a dict for configuration' + first_mon = teuthology.get_first_mon(ctx, config) + (mon,) = ctx.cluster.only(first_mon).remotes.keys() + + num_osds = teuthology.num_instances_of_type(ctx.cluster, 'osd') + log.info('num_osds is %s' % num_osds) + + manager = ceph_manager.CephManager( + mon, + ctx=ctx, + logger=log.getChild('ceph_manager'), + ) + + while len(manager.get_osd_status()['up']) < num_osds: + time.sleep(10) + + for i in range(num_osds): + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', + '--', '--osd-objectstore-fuse') + manager.flush_pg_stats(range(num_osds)) + manager.wait_for_clean() + + osd_dump = manager.get_osd_dump_json() + poolid = -1 + for p in osd_dump['pools']: + if p['pool_name'] == 'rbd': + poolid = p['pool'] + break + assert poolid != -1 + + # write some data + p = manager.do_rados(['bench', '--no-cleanup', '1', 'write', '-b', '4096'], pool='rbd') + log.info('err is %d' % p.exitstatus) + + # wait for some PG to have data that we can mess with + pg, acting = wait_for_victim_pg(manager, poolid) + osd = acting[0] + + osd_remote, obj_path, obj_name = find_victim_object(ctx, pg, osd) + manager.do_rados(['setomapval', obj_name, 'key', 'val'], pool='rbd') + log.info('err is %d' % p.exitstatus) + manager.do_rados(['setomapheader', obj_name, 'hdr'], pool='rbd') + log.info('err is %d' % p.exitstatus) + + # Update missing digests, requires "osd deep scrub update digest min age: 0" + pgnum = get_pgnum(pg) + manager.do_pg_scrub('rbd', pgnum, 'deep-scrub') + + log.info('messing with PG %s on osd %d' % (pg, osd)) + test_repair_corrupted_obj(ctx, manager, pg, osd_remote, obj_path, 'rbd') + test_repair_bad_omap(ctx, manager, pg, osd, obj_name) + test_list_inconsistent_obj(ctx, manager, osd_remote, pg, acting, osd, + obj_name, obj_path) + log.info('test successful!') + + # shut down fuse mount + for i in range(num_osds): + manager.raw_cluster_cmd('tell', 'osd.%d' % i, 'injectargs', + '--', '--no-osd-objectstore-fuse') + time.sleep(5) + log.info('done') diff --git a/qa/tasks/systemd.py b/qa/tasks/systemd.py new file mode 100644 index 000000000..1728b920f --- /dev/null +++ b/qa/tasks/systemd.py @@ -0,0 +1,135 @@ +""" +Systemd test +""" +import contextlib +import logging +import re +import time + +from teuthology.orchestra import run +from teuthology.misc import reconnect, get_first_mon, wait_until_healthy + +log = logging.getLogger(__name__) + +def _remote_service_status(remote, service): + status = remote.sh('sudo systemctl status %s' % service, + check_status=False) + return status + +@contextlib.contextmanager +def task(ctx, config): + """ + - tasks: + ceph-deploy: + systemd: + + Test ceph systemd services can start, stop and restart and + check for any failed services and report back errors + """ + for remote, roles in ctx.cluster.remotes.items(): + remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'), + 'grep', 'ceph']) + units = remote.sh('sudo systemctl list-units | grep ceph', + check_status=False) + log.info(units) + if units.find('failed'): + log.info("Ceph services in failed state") + + # test overall service stop and start using ceph.target + # ceph.target tests are meant for ceph systemd tests + # and not actual process testing using 'ps' + log.info("Stopping all Ceph services") + remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target']) + status = _remote_service_status(remote, 'ceph.target') + log.info(status) + log.info("Checking process status") + ps_eaf = remote.sh('sudo ps -eaf | grep ceph') + if ps_eaf.find('Active: inactive'): + log.info("Successfully stopped all ceph services") + else: + log.info("Failed to stop ceph services") + + log.info("Starting all Ceph services") + remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target']) + status = _remote_service_status(remote, 'ceph.target') + log.info(status) + if status.find('Active: active'): + log.info("Successfully started all Ceph services") + else: + log.info("info", "Failed to start Ceph services") + ps_eaf = remote.sh('sudo ps -eaf | grep ceph') + log.info(ps_eaf) + time.sleep(4) + + # test individual services start stop + name = remote.shortname + mon_name = 'ceph-mon@' + name + '.service' + mds_name = 'ceph-mds@' + name + '.service' + mgr_name = 'ceph-mgr@' + name + '.service' + mon_role_name = 'mon.' + name + mds_role_name = 'mds.' + name + mgr_role_name = 'mgr.' + name + m_osd = re.search('--id (\d+) --setuser ceph', ps_eaf) + if m_osd: + osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1)) + remote.run(args=['sudo', 'systemctl', 'status', + osd_service]) + remote.run(args=['sudo', 'systemctl', 'stop', + osd_service]) + time.sleep(4) # immediate check will result in deactivating state + status = _remote_service_status(remote, osd_service) + log.info(status) + if status.find('Active: inactive'): + log.info("Successfully stopped single osd ceph service") + else: + log.info("Failed to stop ceph osd services") + remote.sh(['sudo', 'systemctl', 'start', osd_service]) + time.sleep(4) + if mon_role_name in roles: + remote.run(args=['sudo', 'systemctl', 'status', mon_name]) + remote.run(args=['sudo', 'systemctl', 'stop', mon_name]) + time.sleep(4) # immediate check will result in deactivating state + status = _remote_service_status(remote, mon_name) + if status.find('Active: inactive'): + log.info("Successfully stopped single mon ceph service") + else: + log.info("Failed to stop ceph mon service") + remote.run(args=['sudo', 'systemctl', 'start', mon_name]) + time.sleep(4) + if mgr_role_name in roles: + remote.run(args=['sudo', 'systemctl', 'status', mgr_name]) + remote.run(args=['sudo', 'systemctl', 'stop', mgr_name]) + time.sleep(4) # immediate check will result in deactivating state + status = _remote_service_status(remote, mgr_name) + if status.find('Active: inactive'): + log.info("Successfully stopped single ceph mgr service") + else: + log.info("Failed to stop ceph mgr service") + remote.run(args=['sudo', 'systemctl', 'start', mgr_name]) + time.sleep(4) + if mds_role_name in roles: + remote.run(args=['sudo', 'systemctl', 'status', mds_name]) + remote.run(args=['sudo', 'systemctl', 'stop', mds_name]) + time.sleep(4) # immediate check will result in deactivating state + status = _remote_service_status(remote, mds_name) + if status.find('Active: inactive'): + log.info("Successfully stopped single ceph mds service") + else: + log.info("Failed to stop ceph mds service") + remote.run(args=['sudo', 'systemctl', 'start', mds_name]) + time.sleep(4) + + # reboot all nodes and verify the systemd units restart + # workunit that runs would fail if any of the systemd unit doesnt start + ctx.cluster.run(args='sudo reboot', wait=False, check_status=False) + # avoid immediate reconnect + time.sleep(120) + reconnect(ctx, 480) # reconnect all nodes + # for debug info + ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'), + 'grep', 'ceph']) + # wait for HEALTH_OK + mon = get_first_mon(ctx, config) + (mon_remote,) = ctx.cluster.only(mon).remotes.keys() + wait_until_healthy(ctx, mon_remote, use_sudo=True) + yield diff --git a/qa/tasks/tempest.py b/qa/tasks/tempest.py new file mode 100644 index 000000000..142c097cd --- /dev/null +++ b/qa/tasks/tempest.py @@ -0,0 +1,263 @@ +""" +Deploy and configure Tempest for Teuthology +""" +import configparser +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.exceptions import ConfigError +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +def get_tempest_dir(ctx): + return '{tdir}/tempest'.format(tdir=teuthology.get_testdir(ctx)) + +def run_in_tempest_dir(ctx, client, cmdargs, **kwargs): + ctx.cluster.only(client).run( + args=[ 'cd', get_tempest_dir(ctx), run.Raw('&&'), ] + cmdargs, + **kwargs + ) + +def run_in_tempest_rgw_dir(ctx, client, cmdargs, **kwargs): + ctx.cluster.only(client).run( + args=[ 'cd', get_tempest_dir(ctx) + '/rgw', run.Raw('&&'), ] + cmdargs, + **kwargs + ) + +def run_in_tempest_venv(ctx, client, cmdargs, **kwargs): + run_in_tempest_dir(ctx, client, + [ 'source', + '.tox/venv/bin/activate', + run.Raw('&&') + ] + cmdargs, **kwargs) + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the Tempest from github. + Remove downloaded file upon exit. + + The context passed in should be identical to the context + passed in to the main task. + """ + assert isinstance(config, dict) + log.info('Downloading Tempest...') + for (client, cconf) in config.items(): + ctx.cluster.only(client).run( + args=[ + 'git', 'clone', + '-b', cconf.get('force-branch', 'master'), + 'https://github.com/openstack/tempest.git', + get_tempest_dir(ctx) + ], + ) + + sha1 = cconf.get('sha1') + if sha1 is not None: + run_in_tempest_dir(ctx, client, [ 'git', 'reset', '--hard', sha1 ]) + try: + yield + finally: + log.info('Removing Tempest...') + for client in config: + ctx.cluster.only(client).run( + args=[ 'rm', '-rf', get_tempest_dir(ctx) ], + ) + +def get_toxvenv_dir(ctx): + return ctx.tox.venv_path + +@contextlib.contextmanager +def setup_venv(ctx, config): + """ + Setup the virtualenv for Tempest using tox. + """ + assert isinstance(config, dict) + log.info('Setting up virtualenv for Tempest') + for (client, _) in config.items(): + run_in_tempest_dir(ctx, client, + [ '{tvdir}/bin/tox'.format(tvdir=get_toxvenv_dir(ctx)), + '-e', 'venv', '--notest' + ]) + yield + +def setup_logging(ctx, cpar): + cpar.set('DEFAULT', 'log_dir', teuthology.get_archive_dir(ctx)) + cpar.set('DEFAULT', 'log_file', 'tempest.log') + +def to_config(config, params, section, cpar): + for (k, v) in config[section].items(): + if isinstance(v, str): + v = v.format(**params) + elif isinstance(v, bool): + v = 'true' if v else 'false' + else: + v = str(v) + cpar.set(section, k, v) + +@contextlib.contextmanager +def configure_instance(ctx, config): + assert isinstance(config, dict) + log.info('Configuring Tempest') + + for (client, cconfig) in config.items(): + run_in_tempest_venv(ctx, client, + [ + 'tempest', + 'init', + '--workspace-path', + get_tempest_dir(ctx) + '/workspace.yaml', + 'rgw' + ]) + + # prepare the config file + tetcdir = '{tdir}/rgw/etc'.format(tdir=get_tempest_dir(ctx)) + (remote,) = ctx.cluster.only(client).remotes.keys() + local_conf = remote.get_file(tetcdir + '/tempest.conf.sample') + + # fill the params dictionary which allows to use templatized configs + keystone_role = cconfig.get('use-keystone-role', None) + if keystone_role is None \ + or keystone_role not in ctx.keystone.public_endpoints: + raise ConfigError('the use-keystone-role is misconfigured') + public_host, public_port = ctx.keystone.public_endpoints[keystone_role] + params = { + 'keystone_public_host': public_host, + 'keystone_public_port': str(public_port), + } + + cpar = configparser.ConfigParser() + cpar.read(local_conf) + setup_logging(ctx, cpar) + to_config(cconfig, params, 'auth', cpar) + to_config(cconfig, params, 'identity', cpar) + to_config(cconfig, params, 'object-storage', cpar) + to_config(cconfig, params, 'object-storage-feature-enabled', cpar) + cpar.write(open(local_conf, 'w+')) + + remote.put_file(local_conf, tetcdir + '/tempest.conf') + yield + +@contextlib.contextmanager +def run_tempest(ctx, config): + assert isinstance(config, dict) + log.info('Configuring Tempest') + + for (client, cconf) in config.items(): + blocklist = cconf.get('blocklist', []) + assert isinstance(blocklist, list) + run_in_tempest_venv(ctx, client, + [ + 'tempest', + 'run', + '--workspace-path', + get_tempest_dir(ctx) + '/workspace.yaml', + '--workspace', + 'rgw', + '--regex', '^tempest.api.object_storage', + '--black-regex', '|'.join(blocklist) + ]) + try: + yield + finally: + pass + + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy and run Tempest's object storage campaign + + Example of configuration: + + overrides: + ceph: + conf: + client: + rgw keystone api version: 3 + rgw keystone accepted roles: admin,member + rgw keystone implicit tenants: true + rgw keystone accepted admin roles: admin + rgw swift enforce content length: true + rgw swift account in url: true + rgw swift versioning enabled: true + rgw keystone admin domain: Default + rgw keystone admin user: admin + rgw keystone admin password: ADMIN + rgw keystone admin project: admin + tasks: + # typically, the task should be preceded with install, ceph, tox, + # keystone and rgw. Tox and Keystone are specific requirements + # of tempest.py. + - rgw: + # it's important to match the prefix with the endpoint's URL + # in Keystone. Additionally, if we want to test /info and its + # accompanying stuff, the whole Swift API must be put in root + # of the whole URL hierarchy (read: frontend_prefix == /swift). + frontend_prefix: /swift + client.0: + use-keystone-role: client.0 + - tempest: + client.0: + force-branch: master + use-keystone-role: client.0 + auth: + admin_username: admin + admin_project_name: admin + admin_password: ADMIN + admin_domain_name: Default + identity: + uri: http://{keystone_public_host}:{keystone_public_port}/v2.0/ + uri_v3: http://{keystone_public_host}:{keystone_public_port}/v3/ + admin_role: admin + object-storage: + reseller_admin_role: admin + object-storage-feature-enabled: + container_sync: false + discoverability: false + blocklist: + # please strip half of these items after merging PRs #15369 + # and #12704 + - .*test_list_containers_reverse_order.* + - .*test_list_container_contents_with_end_marker.* + - .*test_delete_non_empty_container.* + - .*test_container_synchronization.* + - .*test_get_object_after_expiration_time.* + - .*test_create_object_with_transfer_encoding.* + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + 'task tempest only supports a list or dictionary for configuration' + + if not ctx.tox: + raise ConfigError('tempest must run after the tox task') + if not ctx.keystone: + raise ConfigError('tempest must run after the keystone task') + + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('keystone', {})) + + log.debug('Tempest config is %s', config) + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: setup_venv(ctx=ctx, config=config), + lambda: configure_instance(ctx=ctx, config=config), + lambda: run_tempest(ctx=ctx, config=config), + ): + yield diff --git a/qa/tasks/tests/__init__.py b/qa/tasks/tests/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/tasks/tests/__init__.py diff --git a/qa/tasks/tests/conftest.py b/qa/tasks/tests/conftest.py new file mode 100644 index 000000000..7cc617a41 --- /dev/null +++ b/qa/tasks/tests/conftest.py @@ -0,0 +1,12 @@ +import glob + + +def pytest_addoption(parser): + parser.addoption("--suite-dir", help="suite dir") + +def pytest_generate_tests(metafunc): + if "yaml_file" in metafunc.fixturenames: + suite_dir = metafunc.config.getoption("--suite-dir") + files = glob.glob(f"{suite_dir}/**/*.yaml", recursive=True) + + metafunc.parametrize("yaml_file", list(set(files))) diff --git a/qa/tasks/tests/test_devstack.py b/qa/tasks/tests/test_devstack.py new file mode 100644 index 000000000..39b94a64c --- /dev/null +++ b/qa/tasks/tests/test_devstack.py @@ -0,0 +1,48 @@ +from textwrap import dedent + +from tasks import devstack + + +class TestDevstack(object): + def test_parse_os_table(self): + table_str = dedent(""" + +---------------------+--------------------------------------+ + | Property | Value | + +---------------------+--------------------------------------+ + | attachments | [] | + | availability_zone | nova | + | bootable | false | + | created_at | 2014-02-21T17:14:47.548361 | + | display_description | None | + | display_name | NAME | + | id | ffdbd1bb-60dc-4d95-acfe-88774c09ad3e | + | metadata | {} | + | size | 1 | + | snapshot_id | None | + | source_volid | None | + | status | creating | + | volume_type | None | + +---------------------+--------------------------------------+ + """).strip() + expected = { + 'Property': 'Value', + 'attachments': '[]', + 'availability_zone': 'nova', + 'bootable': 'false', + 'created_at': '2014-02-21T17:14:47.548361', + 'display_description': 'None', + 'display_name': 'NAME', + 'id': 'ffdbd1bb-60dc-4d95-acfe-88774c09ad3e', + 'metadata': '{}', + 'size': '1', + 'snapshot_id': 'None', + 'source_volid': 'None', + 'status': 'creating', + 'volume_type': 'None'} + + vol_info = devstack.parse_os_table(table_str) + assert vol_info == expected + + + + diff --git a/qa/tasks/tests/test_import_yaml.py b/qa/tasks/tests/test_import_yaml.py new file mode 100644 index 000000000..d6e0e2640 --- /dev/null +++ b/qa/tasks/tests/test_import_yaml.py @@ -0,0 +1,5 @@ +import yaml + + +def test_load_yaml(yaml_file): + yaml.safe_load(open(yaml_file)) diff --git a/qa/tasks/tests/test_radosgw_admin.py b/qa/tasks/tests/test_radosgw_admin.py new file mode 100644 index 000000000..8506eda7b --- /dev/null +++ b/qa/tasks/tests/test_radosgw_admin.py @@ -0,0 +1,31 @@ +from unittest.mock import Mock + +from tasks import radosgw_admin + +acl_with_version = b"""<?xml version="1.0" encoding="UTF-8"?><AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>foo</ID><DisplayName>Foo</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>foo</ID><DisplayName>Foo</DisplayName></Grantee><Permission>FULL_CONTROL</Permission></Grant></AccessControlList></AccessControlPolicy> +""" # noqa + + +acl_without_version = b"""<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>foo</ID><DisplayName>Foo</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>foo</ID><DisplayName>Foo</DisplayName></Grantee><Permission>FULL_CONTROL</Permission></Grant></AccessControlList></AccessControlPolicy> +""" # noqa + + +class TestGetAcl(object): + + def setup(self): + self.key = Mock() + + def test_removes_xml_version(self): + self.key.get_xml_acl = Mock(return_value=acl_with_version) + result = radosgw_admin.get_acl(self.key) + assert result.startswith('<AccessControlPolicy') + + def test_xml_version_is_already_removed(self): + self.key.get_xml_acl = Mock(return_value=acl_without_version) + result = radosgw_admin.get_acl(self.key) + assert result.startswith('<AccessControlPolicy') + + def test_newline_gets_trimmed(self): + self.key.get_xml_acl = Mock(return_value=acl_without_version) + result = radosgw_admin.get_acl(self.key) + assert result.endswith('\n') is False diff --git a/qa/tasks/teuthology_integration.py b/qa/tasks/teuthology_integration.py new file mode 100644 index 000000000..b5a2278eb --- /dev/null +++ b/qa/tasks/teuthology_integration.py @@ -0,0 +1,19 @@ +import logging +from teuthology import misc +from teuthology.task import Task + +log = logging.getLogger(__name__) + + +class TeuthologyIntegration(Task): + + def begin(self): + misc.sh(""" + set -x + pip install tox + tox + # tox -e py27-integration + tox -e openstack-integration + """) + +task = TeuthologyIntegration diff --git a/qa/tasks/tgt.py b/qa/tasks/tgt.py new file mode 100644 index 000000000..a0758f472 --- /dev/null +++ b/qa/tasks/tgt.py @@ -0,0 +1,177 @@ +""" +Task to handle tgt + +Assumptions made: + The ceph-extras tgt package may need to get installed. + The open-iscsi package needs to get installed. +""" +import logging +import contextlib + +from teuthology import misc as teuthology +from teuthology import contextutil + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def start_tgt_remotes(ctx, start_tgtd): + """ + This subtask starts up a tgtd on the clients specified + """ + remotes = ctx.cluster.only(teuthology.is_type('client')).remotes + tgtd_list = [] + for rem, roles in remotes.items(): + for _id in roles: + if _id in start_tgtd: + if not rem in tgtd_list: + tgtd_list.append(rem) + size = ctx.config.get('image_size', 10240) + rem.run( + args=[ + 'rbd', + 'create', + 'iscsi-image', + '--size', + str(size), + ]) + rem.run( + args=[ + 'sudo', + 'tgtadm', + '--lld', + 'iscsi', + '--mode', + 'target', + '--op', + 'new', + '--tid', + '1', + '--targetname', + 'rbd', + ]) + rem.run( + args=[ + 'sudo', + 'tgtadm', + '--lld', + 'iscsi', + '--mode', + 'logicalunit', + '--op', + 'new', + '--tid', + '1', + '--lun', + '1', + '--backing-store', + 'iscsi-image', + '--bstype', + 'rbd', + ]) + rem.run( + args=[ + 'sudo', + 'tgtadm', + '--lld', + 'iscsi', + '--op', + 'bind', + '--mode', + 'target', + '--tid', + '1', + '-I', + 'ALL', + ]) + try: + yield + + finally: + for rem in tgtd_list: + rem.run( + args=[ + 'sudo', + 'tgtadm', + '--lld', + 'iscsi', + '--mode', + 'target', + '--op', + 'delete', + '--force', + '--tid', + '1', + ]) + rem.run( + args=[ + 'rbd', + 'snap', + 'purge', + 'iscsi-image', + ]) + rem.run( + args=[ + 'sudo', + 'rbd', + 'rm', + 'iscsi-image', + ]) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Start up tgt. + + To start on on all clients:: + + tasks: + - ceph: + - tgt: + + To start on certain clients:: + + tasks: + - ceph: + - tgt: [client.0, client.3] + + or + + tasks: + - ceph: + - tgt: + client.0: + client.3: + + An image blocksize size can also be specified:: + + tasks: + - ceph: + - tgt: + image_size = 20480 + + The general flow of things here is: + 1. Find clients on which tgt is supposed to run (start_tgtd) + 2. Remotely start up tgt daemon + On cleanup: + 3. Stop tgt daemon + + The iscsi administration is handled by the iscsi task. + """ + if config: + config = {key : val for key, val in config.items() + if key.startswith('client')} + # config at this point should only contain keys starting with 'client' + start_tgtd = [] + remotes = ctx.cluster.only(teuthology.is_type('client')).remotes + log.info(remotes) + if not config: + start_tgtd = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + else: + start_tgtd = config + log.info(start_tgtd) + with contextutil.nested( + lambda: start_tgt_remotes(ctx=ctx, start_tgtd=start_tgtd),): + yield diff --git a/qa/tasks/thrash_pool_snaps.py b/qa/tasks/thrash_pool_snaps.py new file mode 100644 index 000000000..c71c9ce8d --- /dev/null +++ b/qa/tasks/thrash_pool_snaps.py @@ -0,0 +1,61 @@ +""" +Thrash -- Simulate random osd failures. +""" +import contextlib +import logging +import gevent +import time +import random + + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + "Thrash" snap creation and removal on the listed pools + + Example: + + thrash_pool_snaps: + pools: [.rgw.buckets, .rgw.buckets.index] + max_snaps: 10 + min_snaps: 5 + period: 10 + """ + stopping = False + def do_thrash(): + pools = config.get('pools', []) + max_snaps = config.get('max_snaps', 10) + min_snaps = config.get('min_snaps', 5) + period = config.get('period', 30) + snaps = [] + manager = ctx.managers['ceph'] + def remove_snap(): + assert len(snaps) > 0 + snap = random.choice(snaps) + log.info("Removing snap %s" % (snap,)) + for pool in pools: + manager.remove_pool_snap(pool, str(snap)) + snaps.remove(snap) + def add_snap(snap): + log.info("Adding snap %s" % (snap,)) + for pool in pools: + manager.add_pool_snap(pool, str(snap)) + snaps.append(snap) + index = 0 + while not stopping: + index += 1 + time.sleep(period) + if len(snaps) <= min_snaps: + add_snap(index) + elif len(snaps) >= max_snaps: + remove_snap() + else: + random.choice([lambda: add_snap(index), remove_snap])() + log.info("Stopping") + thread = gevent.spawn(do_thrash) + yield + stopping = True + thread.join() + diff --git a/qa/tasks/thrasher.py b/qa/tasks/thrasher.py new file mode 100644 index 000000000..0ea1bf0ee --- /dev/null +++ b/qa/tasks/thrasher.py @@ -0,0 +1,15 @@ +""" +Thrasher base class +""" +class Thrasher(object): + + def __init__(self): + super(Thrasher, self).__init__() + self._exception = None + + @property + def exception(self): + return self._exception + + def set_thrasher_exception(self, e): + self._exception = e diff --git a/qa/tasks/thrashosds-health.yaml b/qa/tasks/thrashosds-health.yaml new file mode 100644 index 000000000..1405f4740 --- /dev/null +++ b/qa/tasks/thrashosds-health.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + conf: + osd: + osd max markdown count: 1000 + osd blocked scrub grace period: 3600 + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(SMALLER_PGP_NUM\) + - \(OBJECT_ + - \(SLOW_OPS\) + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) + - slow request + - timeout on replica + - late reservation from diff --git a/qa/tasks/thrashosds.py b/qa/tasks/thrashosds.py new file mode 100644 index 000000000..aa7ec437a --- /dev/null +++ b/qa/tasks/thrashosds.py @@ -0,0 +1,221 @@ +""" +Thrash -- Simulate random osd failures. +""" +import contextlib +import logging +from tasks import ceph_manager +from teuthology import misc as teuthology + + +log = logging.getLogger(__name__) + +@contextlib.contextmanager +def task(ctx, config): + """ + "Thrash" the OSDs by randomly marking them out/down (and then back + in) until the task is ended. This loops, and every op_delay + seconds it randomly chooses to add or remove an OSD (even odds) + unless there are fewer than min_out OSDs out of the cluster, or + more than min_in OSDs in the cluster. + + All commands are run on mon0 and it stops when __exit__ is called. + + The config is optional, and is a dict containing some or all of: + + cluster: (default 'ceph') the name of the cluster to thrash + + min_in: (default 4) the minimum number of OSDs to keep in the + cluster + + min_out: (default 0) the minimum number of OSDs to keep out of the + cluster + + op_delay: (5) the length of time to sleep between changing an + OSD's status + + min_dead: (0) minimum number of osds to leave down/dead. + + max_dead: (0) maximum number of osds to leave down/dead before waiting + for clean. This should probably be num_replicas - 1. + + clean_interval: (60) the approximate length of time to loop before + waiting until the cluster goes clean. (In reality this is used + to probabilistically choose when to wait, and the method used + makes it closer to -- but not identical to -- the half-life.) + + scrub_interval: (-1) the approximate length of time to loop before + waiting until a scrub is performed while cleaning. (In reality + this is used to probabilistically choose when to wait, and it + only applies to the cases where cleaning is being performed). + -1 is used to indicate that no scrubbing will be done. + + chance_down: (0.4) the probability that the thrasher will mark an + OSD down rather than marking it out. (The thrasher will not + consider that OSD out of the cluster, since presently an OSD + wrongly marked down will mark itself back up again.) This value + can be either an integer (eg, 75) or a float probability (eg + 0.75). + + chance_test_min_size: (0) chance to run test_pool_min_size, + which: + - kills all but one osd + - waits + - kills that osd + - revives all other osds + - verifies that the osds fully recover + + timeout: (360) the number of seconds to wait for the cluster + to become clean after each cluster change. If this doesn't + happen within the timeout, an exception will be raised. + + revive_timeout: (150) number of seconds to wait for an osd asok to + appear after attempting to revive the osd + + thrash_primary_affinity: (true) randomly adjust primary-affinity + + chance_pgnum_grow: (0) chance to increase a pool's size + chance_pgpnum_fix: (0) chance to adjust pgpnum to pg for a pool + pool_grow_by: (10) amount to increase pgnum by + chance_pgnum_shrink: (0) chance to decrease a pool's size + pool_shrink_by: (10) amount to decrease pgnum by + max_pgs_per_pool_osd: (1200) don't expand pools past this size per osd + + pause_short: (3) duration of short pause + pause_long: (80) duration of long pause + pause_check_after: (50) assert osd down after this long + chance_inject_pause_short: (1) chance of injecting short stall + chance_inject_pause_long: (0) chance of injecting long stall + + clean_wait: (0) duration to wait before resuming thrashing once clean + + sighup_delay: (0.1) duration to delay between sending signal.SIGHUP to a + random live osd + + powercycle: (false) whether to power cycle the node instead + of just the osd process. Note that this assumes that a single + osd is the only important process on the node. + + bdev_inject_crash: (0) seconds to delay while inducing a synthetic crash. + the delay lets the BlockDevice "accept" more aio operations but blocks + any flush, and then eventually crashes (losing some or all ios). If 0, + no bdev failure injection is enabled. + + bdev_inject_crash_probability: (.5) probability of doing a bdev failure + injection crash vs a normal OSD kill. + + chance_test_backfill_full: (0) chance to simulate full disks stopping + backfill + + chance_test_map_discontinuity: (0) chance to test map discontinuity + map_discontinuity_sleep_time: (40) time to wait for map trims + + ceph_objectstore_tool: (true) whether to export/import a pg while an osd is down + chance_move_pg: (1.0) chance of moving a pg if more than 1 osd is down (default 100%) + + optrack_toggle_delay: (2.0) duration to delay between toggling op tracker + enablement to all osds + + dump_ops_enable: (true) continuously dump ops on all live osds + + noscrub_toggle_delay: (2.0) duration to delay between toggling noscrub + + disable_objectstore_tool_tests: (false) disable ceph_objectstore_tool based + tests + + chance_thrash_cluster_full: .05 + + chance_thrash_pg_upmap: 1.0 + chance_thrash_pg_upmap_items: 1.0 + + aggressive_pg_num_changes: (true) whether we should bypass the careful throttling of pg_num and pgp_num changes in mgr's adjust_pgs() controller + + example: + + tasks: + - ceph: + - thrashosds: + cluster: ceph + chance_down: 10 + op_delay: 3 + min_in: 1 + timeout: 600 + - interactive: + """ + if config is None: + config = {} + assert isinstance(config, dict), \ + 'thrashosds task only accepts a dict for configuration' + # add default value for sighup_delay + config['sighup_delay'] = config.get('sighup_delay', 0.1) + # add default value for optrack_toggle_delay + config['optrack_toggle_delay'] = config.get('optrack_toggle_delay', 2.0) + # add default value for dump_ops_enable + config['dump_ops_enable'] = config.get('dump_ops_enable', "true") + # add default value for noscrub_toggle_delay + config['noscrub_toggle_delay'] = config.get('noscrub_toggle_delay', 2.0) + # add default value for random_eio + config['random_eio'] = config.get('random_eio', 0.0) + aggro = config.get('aggressive_pg_num_changes', True) + + log.info("config is {config}".format(config=str(config))) + + overrides = ctx.config.get('overrides', {}) + log.info("overrides is {overrides}".format(overrides=str(overrides))) + teuthology.deep_merge(config, overrides.get('thrashosds', {})) + cluster = config.get('cluster', 'ceph') + + log.info("config is {config}".format(config=str(config))) + + if 'powercycle' in config: + + # sync everyone first to avoid collateral damage to / etc. + log.info('Doing preliminary sync to avoid collateral damage...') + ctx.cluster.run(args=['sync']) + + if 'ipmi_user' in ctx.teuthology_config: + for remote in ctx.cluster.remotes.keys(): + log.debug('checking console status of %s' % remote.shortname) + if not remote.console.check_status(): + log.warning('Failed to get console status for %s', + remote.shortname) + + # check that all osd remotes have a valid console + osds = ctx.cluster.only(teuthology.is_type('osd', cluster)) + for remote in osds.remotes.keys(): + if not remote.console.has_ipmi_credentials: + raise Exception( + 'IPMI console required for powercycling, ' + 'but not available on osd role: {r}'.format( + r=remote.name)) + + cluster_manager = ctx.managers[cluster] + for f in ['powercycle', 'bdev_inject_crash']: + if config.get(f): + cluster_manager.config[f] = config.get(f) + + if aggro: + cluster_manager.raw_cluster_cmd( + 'config', 'set', 'mgr', + 'mgr_debug_aggressive_pg_num_changes', + 'true') + + log.info('Beginning thrashosds...') + thrash_proc = ceph_manager.OSDThrasher( + cluster_manager, + config, + "OSDThrasher", + logger=log.getChild('thrasher') + ) + ctx.ceph[cluster].thrashers.append(thrash_proc) + try: + yield + finally: + log.info('joining thrashosds') + thrash_proc.do_join() + cluster_manager.wait_for_all_osds_up() + cluster_manager.flush_all_pg_stats() + cluster_manager.wait_for_recovery(config.get('timeout', 360)) + if aggro: + cluster_manager.raw_cluster_cmd( + 'config', 'rm', 'mgr', + 'mgr_debug_aggressive_pg_num_changes') diff --git a/qa/tasks/tox.py b/qa/tasks/tox.py new file mode 100644 index 000000000..61c5b7411 --- /dev/null +++ b/qa/tasks/tox.py @@ -0,0 +1,50 @@ +import argparse +import contextlib +import logging + +from teuthology import misc as teuthology +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + + +def get_toxvenv_dir(ctx): + return '{tdir}/tox-venv'.format(tdir=teuthology.get_testdir(ctx)) + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy tox from pip. It's a dependency for both Keystone and Tempest. + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task tox only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + log.info('Deploying tox from pip...') + for (client, _) in config.items(): + # yup, we have to deploy tox first. The packaged one, available + # on Sepia's Ubuntu machines, is outdated for Keystone/Tempest. + tvdir = get_toxvenv_dir(ctx) + ctx.cluster.only(client).run(args=['python3', '-m', 'venv', tvdir]) + ctx.cluster.only(client).run(args=[ + 'source', '{tvdir}/bin/activate'.format(tvdir=tvdir), + run.Raw('&&'), + 'pip', 'install', 'tox==3.15.0' + ]) + + # export the path Keystone and Tempest + ctx.tox = argparse.Namespace() + ctx.tox.venv_path = get_toxvenv_dir(ctx) + + try: + yield + finally: + for (client, _) in config.items(): + ctx.cluster.only(client).run( + args=[ 'rm', '-rf', get_toxvenv_dir(ctx) ]) diff --git a/qa/tasks/userdata_setup.yaml b/qa/tasks/userdata_setup.yaml new file mode 100644 index 000000000..afcc08e22 --- /dev/null +++ b/qa/tasks/userdata_setup.yaml @@ -0,0 +1,36 @@ +#cloud-config-archive + +- type: text/cloud-config + content: | + output: + all: '| tee -a /var/log/cloud-init-output.log' + +# allow passwordless access for debugging +- | + #!/usr/bin/env bash + exec passwd -d ubuntu + +- | + #!/usr/bin/env bash + + # mount a NFS share for storing logs + sed -i 's/archive.ubuntu.com/old-releases.ubuntu.com/' /etc/apt/sources.list + sed -i 's/security.ubuntu.com/old-releases.ubuntu.com/' /etc/apt/sources.list + apt-get update + + # DST Root CA X3 certificate expired on Sep 30, 2021. It was used by + # Let's Encrypt, which is what git.ceph.com relies on for HTTPS. Get the + # new Let's Encrypt root certificate in place and deactivate the old one + # (lines that begin with "!" are deselected). + apt-get install --only-upgrade ca-certificates libssl1.0.0 + sed -i 's/mozilla\/DST_Root_CA_X3\.crt/!mozilla\/DST_Root_CA_X3\.crt/' /etc/ca-certificates.conf + update-ca-certificates + + apt-get -y install nfs-common + mkdir /mnt/log + # 10.0.2.2 is the host + mount -v -t nfs -o proto=tcp 10.0.2.2:{mnt_dir} /mnt/log + + # mount the iso image that has the test script + mkdir /mnt/cdrom + mount -t auto /dev/cdrom /mnt/cdrom diff --git a/qa/tasks/userdata_teardown.yaml b/qa/tasks/userdata_teardown.yaml new file mode 100644 index 000000000..731d769f0 --- /dev/null +++ b/qa/tasks/userdata_teardown.yaml @@ -0,0 +1,11 @@ +- | + #!/usr/bin/env bash + cp /var/log/cloud-init-output.log /mnt/log + +- | + #!/usr/bin/env bash + umount /mnt/log + +- | + #!/usr/bin/env bash + shutdown -h -P now diff --git a/qa/tasks/util/__init__.py b/qa/tasks/util/__init__.py new file mode 100644 index 000000000..5b8575ed9 --- /dev/null +++ b/qa/tasks/util/__init__.py @@ -0,0 +1,26 @@ +from teuthology import misc + +def get_remote(ctx, cluster, service_type, service_id): + """ + Get the Remote for the host where a particular role runs. + + :param cluster: name of the cluster the service is part of + :param service_type: e.g. 'mds', 'osd', 'client' + :param service_id: The third part of a role, e.g. '0' for + the role 'ceph.client.0' + :return: a Remote instance for the host where the + requested role is placed + """ + def _is_instance(role): + role_tuple = misc.split_role(role) + return role_tuple == (cluster, service_type, str(service_id)) + try: + (remote,) = ctx.cluster.only(_is_instance).remotes.keys() + except ValueError: + raise KeyError("Service {0}.{1}.{2} not found".format(cluster, + service_type, + service_id)) + return remote + +def get_remote_for_role(ctx, role): + return get_remote(ctx, *misc.split_role(role)) diff --git a/qa/tasks/util/chacra.py b/qa/tasks/util/chacra.py new file mode 100644 index 000000000..ed9358a59 --- /dev/null +++ b/qa/tasks/util/chacra.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import requests +import sys + +from pathlib import Path +from urllib.parse import urlparse + +log = logging.getLogger(__name__) + +SHAMAN_SEARCH_URL = 'https://shaman.ceph.com/api/search' + +PROJECT = 'ceph' +DISTRO = 'ubuntu' +RELEASE = 'focal' +ARCH='x86_64' +BRANCH = 'main' +SHA1 = 'latest' +FLAVOR = 'default' +FILENAME = 'cephadm' + + +def search(*args, **kwargs): + ''' + Query shaman for a build result + ''' + resp = requests.get(SHAMAN_SEARCH_URL, params=kwargs) + resp.raise_for_status() + return resp + +def _get_distros(distro, release, arch=None): + ret = f'{distro}/{release}' + if arch: + ret = f'{ret}/{arch}' + return ret + +def _get_binary_url(host, project, ref, sha1, distro, release, arch, flavor, filename): + return f'https://{host}/binaries/{project}/{ref}/{sha1}/{distro}/{release}/{arch}/flavors/{flavor}/{filename}' + +def get_binary_url( + filename, + project=None, + distro=None, + release=None, + arch=None, + flavor=None, + branch=None, + sha1=None +): + ''' + Return the chacra url for a build result + ''' + # query shaman for the built binary + s = {} + if project: + s['project'] = project + if distro: + s['distros'] = _get_distros(distro, release, arch) + if flavor: + s['flavor'] = flavor + if branch: + s['ref'] = branch + if sha1: + s['sha1'] = sha1 + + resp = search(**s) + result = resp.json() + + if len(result) == 0: + raise RuntimeError(f'no results found at {resp.url}') + + # TODO: filter the result down to the correct arch etc.? + result = result[0] + + status = result['status'] + if status != 'ready': + raise RuntimeError(f'cannot pull file with status: {status}') + + # build the chacra url + chacra_host = urlparse(result['url']).netloc + chacra_ref = result['ref'] + chacra_sha1 = result['sha1'] + log.info(f'got chacra host {chacra_host}, ref {chacra_ref}, sha1 {chacra_sha1} from {resp.url}') + + # prefer codename if a release is not specified + if result.get('distro_codename'): + release = result.get('distro_codename') + elif result.get('distro_version'): + release = result.get('distro_version') + elif not release: + raise RuntimeError('cannot determine distro release!') + + if not arch: + if ARCH in result['archs']: + arch = ARCH + elif len(result['archs']) > 0: + arch = result['archs'][0] + else: + raise RuntimeError('cannot determine the arch type!') + + # build the url to the binary + url = _get_binary_url( + chacra_host, + result['project'], + chacra_ref, + chacra_sha1, + result['distro'], + release, + arch, + result['flavor'], + filename, + ) + + return url + +def pull( + filename, + project=None, + distro=None, + release=None, + arch=None, + flavor=None, + branch=None, + sha1=None +): + ''' + Pull a build result from chacra + ''' + url = get_binary_url( + filename, + project=project, + distro=distro, + release=release, + arch=arch, + flavor=flavor, + branch=branch, + sha1=sha1 + ) + resp = requests.get(url, stream=True) + resp.raise_for_status() + log.info(f'got file from {resp.url}') + + return resp + +def main(): + handler = logging.StreamHandler(sys.stdout) + log.addHandler(handler) + log.setLevel(logging.INFO) + + parser = argparse.ArgumentParser() + parser.add_argument('--project', default=PROJECT) + parser.add_argument('--distro', default=DISTRO) + parser.add_argument('--release', default=RELEASE) + parser.add_argument('--arch', default=ARCH) + parser.add_argument('--branch', default=BRANCH) + parser.add_argument('--sha1', default=SHA1) + parser.add_argument('--flavor', default=FLAVOR) + parser.add_argument('--src', default=FILENAME) + parser.add_argument('--dest', default=FILENAME) + args = parser.parse_args() + + resp = pull( + args.src, + project=args.project, + distro=args.distro, + release=args.release, + arch=args.arch, + flavor=args.flavor, + branch=args.branch, + sha1=args.sha1 + ) + + dest = Path(args.dest).absolute() + with open(dest, 'wb') as f: + for chunk in resp.iter_content(chunk_size=None, decode_unicode=True): + log.info('.',) + f.write(chunk) + log.info(f'wrote binary file: {dest}') + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/qa/tasks/util/rados.py b/qa/tasks/util/rados.py new file mode 100644 index 000000000..a0c54ce4e --- /dev/null +++ b/qa/tasks/util/rados.py @@ -0,0 +1,87 @@ +import logging + +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def rados(ctx, remote, cmd, wait=True, check_status=False): + testdir = teuthology.get_testdir(ctx) + log.info("rados %s" % ' '.join(cmd)) + pre = [ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'rados', + ]; + pre.extend(cmd) + proc = remote.run( + args=pre, + check_status=check_status, + wait=wait, + ) + if wait: + return proc.exitstatus + else: + return proc + +def create_ec_pool(remote, name, profile_name, pgnum, profile={}, cluster_name="ceph", application=None): + remote.run(args=['sudo', 'ceph'] + + cmd_erasure_code_profile(profile_name, profile) + ['--cluster', cluster_name]) + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'create', name, + str(pgnum), str(pgnum), 'erasure', profile_name, '--cluster', cluster_name + ]) + if application: + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name + ], check_status=False) # may fail as EINVAL when run in jewel upgrade test + +def create_replicated_pool(remote, name, pgnum, cluster_name="ceph", application=None): + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'create', name, str(pgnum), str(pgnum), '--cluster', cluster_name + ]) + if application: + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'application', 'enable', name, application, '--cluster', cluster_name + ], check_status=False) + +def create_cache_pool(remote, base_name, cache_name, pgnum, size, cluster_name="ceph"): + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'pool', 'create', cache_name, str(pgnum), '--cluster', cluster_name + ]) + remote.run(args=[ + 'sudo', 'ceph', 'osd', 'tier', 'add-cache', base_name, cache_name, + str(size), '--cluster', cluster_name + ]) + +def cmd_erasure_code_profile(profile_name, profile): + """ + Return the shell command to run to create the erasure code profile + described by the profile parameter. + + :param profile_name: a string matching [A-Za-z0-9-_.]+ + :param profile: a map whose semantic depends on the erasure code plugin + :returns: a shell command as an array suitable for Remote.run + + If profile is {}, it is replaced with + + { 'k': '2', 'm': '1', 'crush-failure-domain': 'osd'} + + for backward compatibility. In previous versions of teuthology, + these values were hardcoded as function arguments and some yaml + files were designed with these implicit values. The teuthology + code should not know anything about the erasure code profile + content or semantic. The valid values and parameters are outside + its scope. + """ + + if profile == {}: + profile = { + 'k': '2', + 'm': '1', + 'crush-failure-domain': 'osd' + } + return [ + 'osd', 'erasure-code-profile', 'set', + profile_name + ] + [ str(key) + '=' + str(value) for key, value in profile.items() ] diff --git a/qa/tasks/util/rgw.py b/qa/tasks/util/rgw.py new file mode 100644 index 000000000..59c801028 --- /dev/null +++ b/qa/tasks/util/rgw.py @@ -0,0 +1,99 @@ +import logging +import json +import time + +from io import StringIO + +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + +def rgwadmin(ctx, client, cmd, stdin=StringIO(), check_status=False, + omit_sudo=False, omit_tdir=False, format='json', decode=True, + log_level=logging.DEBUG): + log.info('rgwadmin: {client} : {cmd}'.format(client=client,cmd=cmd)) + testdir = teuthology.get_testdir(ctx) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + client_with_id = daemon_type + '.' + client_id + pre = [ + 'adjust-ulimits', + 'ceph-coverage'] + if not omit_tdir: + pre.append( + '{tdir}/archive/coverage'.format(tdir=testdir)) + pre.extend([ + 'radosgw-admin', + '--log-to-stderr', + '--format', format, + '-n', client_with_id, + '--cluster', cluster_name, + ]) + pre.extend(cmd) + log.log(log_level, 'rgwadmin: cmd=%s' % pre) + (remote,) = ctx.cluster.only(client).remotes.keys() + proc = remote.run( + args=pre, + check_status=check_status, + omit_sudo=omit_sudo, + stdout=StringIO(), + stderr=StringIO(), + stdin=stdin, + ) + r = proc.exitstatus + out = proc.stdout.getvalue() + if not decode: + return (r, out) + j = None + if not r and out != '': + try: + j = json.loads(out) + log.log(log_level, ' json result: %s' % j) + except ValueError: + j = out + log.log(log_level, ' raw result: %s' % j) + return (r, j) + +def get_user_summary(out, user): + """Extract the summary for a given user""" + user_summary = None + for summary in out['summary']: + if summary.get('user') == user: + user_summary = summary + + if not user_summary: + raise AssertionError('No summary info found for user: %s' % user) + + return user_summary + +def get_user_successful_ops(out, user): + summary = out['summary'] + if len(summary) == 0: + return 0 + return get_user_summary(out, user)['total']['successful_ops'] + +def wait_for_radosgw(url, remote): + """ poll the given url until it starts accepting connections + + add_daemon() doesn't wait until radosgw finishes startup, so this is used + to avoid racing with later tasks that expect radosgw to be up and listening + """ + # TODO: use '--retry-connrefused --retry 8' when teuthology is running on + # Centos 8 and other OS's with an updated version of curl + curl_cmd = ['curl', + url] + exit_status = 0 + num_retries = 8 + for seconds in range(num_retries): + proc = remote.run( + args=curl_cmd, + check_status=False, + stdout=StringIO(), + stderr=StringIO(), + stdin=StringIO(), + ) + exit_status = proc.exitstatus + if exit_status == 0: + break + time.sleep(2**seconds) + + assert exit_status == 0 diff --git a/qa/tasks/util/test/__init__.py b/qa/tasks/util/test/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/tasks/util/test/__init__.py diff --git a/qa/tasks/util/test/test_rados.py b/qa/tasks/util/test/test_rados.py new file mode 100644 index 000000000..a8f4cb02d --- /dev/null +++ b/qa/tasks/util/test/test_rados.py @@ -0,0 +1,40 @@ +# +# The MIT License +# +# Copyright (C) 2014 Cloudwatt <libre.licensing@cloudwatt.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +from tasks.util import rados + +class TestRados(object): + + def test_cmd_erasure_code_profile(self): + name = 'NAME' + cmd = rados.cmd_erasure_code_profile(name, {}) + assert 'k=2' in cmd + assert name in cmd + cmd = rados.cmd_erasure_code_profile(name, { 'k': '88' }) + assert 'k=88' in cmd + assert name in cmd diff --git a/qa/tasks/util/workunit.py b/qa/tasks/util/workunit.py new file mode 100644 index 000000000..1f5623af8 --- /dev/null +++ b/qa/tasks/util/workunit.py @@ -0,0 +1,78 @@ +import copy + +from teuthology import misc +from teuthology.orchestra import run + +class Refspec: + def __init__(self, refspec): + self.refspec = refspec + + def __str__(self): + return self.refspec + + def _clone(self, git_url, clonedir, opts=None): + if opts is None: + opts = [] + return (['rm', '-rf', clonedir] + + [run.Raw('&&')] + + ['git', 'clone'] + opts + + [git_url, clonedir]) + + def _cd(self, clonedir): + return ['cd', clonedir] + + def _checkout(self): + return ['git', 'checkout', self.refspec] + + def clone(self, git_url, clonedir): + return (self._clone(git_url, clonedir) + + [run.Raw('&&')] + + self._cd(clonedir) + + [run.Raw('&&')] + + self._checkout()) + + +class Branch(Refspec): + def __init__(self, tag): + Refspec.__init__(self, tag) + + def clone(self, git_url, clonedir): + opts = ['--depth', '1', + '--branch', self.refspec] + return (self._clone(git_url, clonedir, opts) + + [run.Raw('&&')] + + self._cd(clonedir)) + + +class Head(Refspec): + def __init__(self): + Refspec.__init__(self, 'HEAD') + + def clone(self, git_url, clonedir): + opts = ['--depth', '1'] + return (self._clone(git_url, clonedir, opts) + + [run.Raw('&&')] + + self._cd(clonedir)) + + +def get_refspec_after_overrides(config, overrides): + # mimic the behavior of the "install" task, where the "overrides" are + # actually the defaults of that task. in other words, if none of "sha1", + # "tag", or "branch" is specified by a "workunit" tasks, we will update + # it with the information in the "workunit" sub-task nested in "overrides". + overrides = copy.deepcopy(overrides.get('workunit', {})) + refspecs = {'suite_sha1': Refspec, 'suite_branch': Branch, + 'sha1': Refspec, 'tag': Refspec, 'branch': Branch} + if any(map(lambda i: i in config, refspecs.keys())): + for i in refspecs.keys(): + overrides.pop(i, None) + misc.deep_merge(config, overrides) + + for spec, cls in refspecs.items(): + refspec = config.get(spec) + if refspec: + refspec = cls(refspec) + break + if refspec is None: + refspec = Head() + return refspec diff --git a/qa/tasks/vault.py b/qa/tasks/vault.py new file mode 100644 index 000000000..2ff008c4d --- /dev/null +++ b/qa/tasks/vault.py @@ -0,0 +1,288 @@ +""" +Deploy and configure Vault for Teuthology +""" + +import argparse +import contextlib +import logging +import time +import json +from os import path +from http import client as http_client +from urllib.parse import urljoin + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.orchestra import run +from teuthology.exceptions import ConfigError, CommandFailedError + + +log = logging.getLogger(__name__) + + +def assign_ports(ctx, config, initial_port): + """ + Assign port numbers starting from @initial_port + """ + port = initial_port + role_endpoints = {} + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in roles_for_host: + if role in config: + role_endpoints[role] = (remote.name.split('@')[1], port) + port += 1 + + return role_endpoints + + +@contextlib.contextmanager +def download(ctx, config): + """ + Download Vault Release from Hashicopr website. + Remove downloaded file upon exit. + """ + assert isinstance(config, dict) + log.info('Downloading Vault...') + testdir = teuthology.get_testdir(ctx) + + for (client, cconf) in config.items(): + install_url = cconf.get('install_url') + install_sha256 = cconf.get('install_sha256') + if not install_url or not install_sha256: + raise ConfigError("Missing Vault install_url and/or install_sha256") + install_zip = path.join(testdir, 'vault.zip') + install_dir = path.join(testdir, 'vault') + + log.info('Downloading Vault...') + ctx.cluster.only(client).run( + args=['curl', '-L', install_url, '-o', install_zip]) + + log.info('Verifying SHA256 signature...') + ctx.cluster.only(client).run( + args=['echo', ' '.join([install_sha256, install_zip]), run.Raw('|'), + 'sha256sum', '--check', '--status']) + + log.info('Extracting vault...') + ctx.cluster.only(client).run(args=['mkdir', '-p', install_dir]) + # Using python in case unzip is not installed on hosts + # Using python3 in case python is not installed on hosts + failed=True + for f in [ + lambda z,d: ['unzip', z, '-d', d], + lambda z,d: ['python3', '-m', 'zipfile', '-e', z, d], + lambda z,d: ['python', '-m', 'zipfile', '-e', z, d]]: + try: + ctx.cluster.only(client).run(args=f(install_zip, install_dir)) + failed = False + break + except CommandFailedError as e: + failed = e + if failed: + raise failed + + try: + yield + finally: + log.info('Removing Vault...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=['rm', '-rf', install_dir, install_zip]) + + +def get_vault_dir(ctx): + return '{tdir}/vault'.format(tdir=teuthology.get_testdir(ctx)) + + +@contextlib.contextmanager +def run_vault(ctx, config): + assert isinstance(config, dict) + + for (client, cconf) in config.items(): + (remote,) = ctx.cluster.only(client).remotes.keys() + cluster_name, _, client_id = teuthology.split_role(client) + + _, port = ctx.vault.endpoints[client] + listen_addr = "0.0.0.0:{}".format(port) + + root_token = ctx.vault.root_token = cconf.get('root_token', 'root') + + log.info("Starting Vault listening on %s ...", listen_addr) + v_params = [ + '-dev', + '-dev-listen-address={}'.format(listen_addr), + '-dev-no-store-token', + '-dev-root-token-id={}'.format(root_token) + ] + + cmd = "chmod +x {vdir}/vault && {vdir}/vault server {vargs}".format(vdir=get_vault_dir(ctx), vargs=" ".join(v_params)) + + ctx.daemons.add_daemon( + remote, 'vault', client_id, + cluster=cluster_name, + args=['bash', '-c', cmd, run.Raw('& { read; kill %1; }')], + logger=log.getChild(client), + stdin=run.PIPE, + cwd=get_vault_dir(ctx), + wait=False, + check_status=False, + ) + time.sleep(10) + try: + yield + finally: + log.info('Stopping Vault instance') + ctx.daemons.get_daemon('vault', client_id, cluster_name).stop() + + +@contextlib.contextmanager +def setup_vault(ctx, config): + """ + Mount Transit or KV version 2 secrets engine + """ + (cclient, cconfig) = next(iter(config.items())) + engine = cconfig.get('engine') + + if engine == 'kv': + log.info('Mounting kv version 2 secrets engine') + mount_path = '/v1/sys/mounts/kv' + data = { + "type": "kv", + "options": { + "version": "2" + } + } + elif engine == 'transit': + log.info('Mounting transit secrets engine') + mount_path = '/v1/sys/mounts/transit' + data = { + "type": "transit" + } + else: + raise Exception("Unknown or missing secrets engine") + + send_req(ctx, cconfig, cclient, mount_path, json.dumps(data)) + yield + + +def send_req(ctx, cconfig, client, path, body, method='POST'): + host, port = ctx.vault.endpoints[client] + req = http_client.HTTPConnection(host, port, timeout=30) + token = cconfig.get('root_token', 'atoken') + log.info("Send request to Vault: %s:%s at %s with token: %s", host, port, path, token) + headers = {'X-Vault-Token': token} + req.request(method, path, headers=headers, body=body) + resp = req.getresponse() + log.info(resp.read()) + if not (resp.status >= 200 and resp.status < 300): + raise Exception("Request to Vault server failed with status %d" % resp.status) + return resp + + +@contextlib.contextmanager +def create_secrets(ctx, config): + (cclient, cconfig) = next(iter(config.items())) + engine = cconfig.get('engine') + prefix = cconfig.get('prefix') + secrets = cconfig.get('secrets') + flavor = cconfig.get('flavor') + if secrets is None: + raise ConfigError("No secrets specified, please specify some.") + + ctx.vault.keys[cclient] = [] + for secret in secrets: + try: + path = secret['path'] + except KeyError: + raise ConfigError('Missing "path" field in secret') + exportable = secret.get("exportable", flavor == "old") + + if engine == 'kv': + try: + data = { + "data": { + "key": secret['secret'] + } + } + except KeyError: + raise ConfigError('Missing "secret" field in secret') + elif engine == 'transit': + data = {"exportable": "true" if exportable else "false"} + else: + raise Exception("Unknown or missing secrets engine") + + send_req(ctx, cconfig, cclient, urljoin(prefix, path), json.dumps(data)) + + ctx.vault.keys[cclient].append({ 'Path': path }); + + log.info("secrets created") + yield + + +@contextlib.contextmanager +def task(ctx, config): + """ + Deploy and configure Vault + + Example of configuration: + + tasks: + - vault: + client.0: + install_url: http://my.special.place/vault.zip + install_sha256: zipfiles-sha256-sum-much-larger-than-this + root_token: test_root_token + engine: transit + flavor: old + prefix: /v1/transit/keys + secrets: + - path: kv/teuthology/key_a + secret: base64_only_if_using_kv_aWxkCmNlcGguY29uZgo= + exportable: true + - path: kv/teuthology/key_b + secret: base64_only_if_using_kv_dApzcmMKVGVzdGluZwo= + + engine can be 'kv' or 'transit' + prefix should be /v1/kv/data/ for kv, /v1/transit/keys/ for transit + flavor should be 'old' only if testing the original transit logic + otherwise omit. + for kv only: 256-bit key value should be specified via secret, + otherwise should omit. + for transit: exportable may be used to make individual keys exportable. + flavor may be set to 'old' to make all keys exportable by default, + which is required by the original transit logic. + """ + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + + overrides = ctx.config.get('overrides', {}) + # merge each client section, not the top level. + for client in config.keys(): + if not config[client]: + config[client] = {} + teuthology.deep_merge(config[client], overrides.get('vault', {})) + + log.debug('Vault config is %s', config) + + ctx.vault = argparse.Namespace() + ctx.vault.endpoints = assign_ports(ctx, config, 8200) + ctx.vault.root_token = None + ctx.vault.prefix = config[client].get('prefix') + ctx.vault.engine = config[client].get('engine') + ctx.vault.keys = {} + q=config[client].get('flavor') + if q: + ctx.vault.flavor = q + + with contextutil.nested( + lambda: download(ctx=ctx, config=config), + lambda: run_vault(ctx=ctx, config=config), + lambda: setup_vault(ctx=ctx, config=config), + lambda: create_secrets(ctx=ctx, config=config) + ): + yield + diff --git a/qa/tasks/vip.py b/qa/tasks/vip.py new file mode 100644 index 000000000..52114b104 --- /dev/null +++ b/qa/tasks/vip.py @@ -0,0 +1,205 @@ +import contextlib +import ipaddress +import logging +import re + +from teuthology import misc as teuthology +from teuthology.config import config as teuth_config + +log = logging.getLogger(__name__) + + +def subst_vip(ctx, cmd): + p = re.compile(r'({{VIP(\d+)}})') + for m in p.findall(cmd): + n = int(m[1]) + if n >= len(ctx.vip["vips"]): + log.warning(f'no VIP{n} (we have {len(ctx.vip["vips"])})') + else: + cmd = cmd.replace(m[0], str(ctx.vip["vips"][n])) + + if '{{VIPPREFIXLEN}}' in cmd: + cmd = cmd.replace('{{VIPPREFIXLEN}}', str(ctx.vip["vnet"].prefixlen)) + + if '{{VIPSUBNET}}' in cmd: + cmd = cmd.replace('{{VIPSUBNET}}', str(ctx.vip["vnet"].network_address)) + + return cmd + + +def echo(ctx, config): + """ + This is mostly for debugging + """ + for remote in ctx.cluster.remotes.keys(): + log.info(subst_vip(ctx, config)) + + +def exec(ctx, config): + """ + This is similar to the standard 'exec' task, but does the VIP substitutions. + """ + assert isinstance(config, dict), "task exec got invalid config" + + testdir = teuthology.get_testdir(ctx) + + if 'all-roles' in config and len(config) == 1: + a = config['all-roles'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles if not id_.startswith('host.')) + elif 'all-hosts' in config and len(config) == 1: + a = config['all-hosts'] + roles = teuthology.all_roles(ctx.cluster) + config = dict((id_, a) for id_ in roles if id_.startswith('host.')) + + for role, ls in config.items(): + (remote,) = ctx.cluster.only(role).remotes.keys() + log.info('Running commands on role %s host %s', role, remote.name) + for c in ls: + c.replace('$TESTDIR', testdir) + remote.run( + args=[ + 'sudo', + 'TESTDIR={tdir}'.format(tdir=testdir), + 'bash', + '-ex', + '-c', + subst_vip(ctx, c)], + ) + + +def map_vips(mip, count): + for mapping in teuth_config.get('vip', []): + mnet = ipaddress.ip_network(mapping['machine_subnet']) + vnet = ipaddress.ip_network(mapping['virtual_subnet']) + if vnet.prefixlen >= mnet.prefixlen: + log.error(f"virtual_subnet {vnet} prefix >= machine_subnet {mnet} prefix") + return None + if mip in mnet: + pos = list(mnet.hosts()).index(mip) + log.info(f"{mip} in {mnet}, pos {pos}") + r = [] + for sub in vnet.subnets(new_prefix=mnet.prefixlen): + r += [list(sub.hosts())[pos]] + count -= 1 + if count == 0: + break + return vnet, r + return None + + +@contextlib.contextmanager +def task(ctx, config): + """ + Set up a virtual network and allocate virtual IP(s) for each machine. + + The strategy here is to set up a private virtual subnet that is larger than + the subnet the machine(s) exist in, and allocate virtual IPs from that pool. + + - The teuthology.yaml must include a section like:: + + vip: + - machine_subnet: 172.21.0.0/20 + virtual_subnet: 10.0.0.0/16 + + At least one item's machine_subnet should map the subnet the test machine's + primary IP lives in (the one DNS resolves to). The virtual_subnet must have a + shorter prefix (i.e., larger than the machine_subnet). If there are multiple + machine_subnets, they cannot map into the same virtual_subnet. + + - Each machine gets an IP in the virtual_subset statically configured by the vip + task. This lets all test machines reach each other and (most importantly) any + virtual IPs. + + - 1 or more virtual IPs are then mapped for the task. These IPs are chosen based + on one of the remotes. This uses a lot of network space but it avoids any + conflicts between tests. + + To use a virtual IP, the {{VIP0}}, {{VIP1}}, etc. substitutions can be used. + + {{VIPSUBNET}} is the virtual_subnet address (10.0.0.0 in the example). + + {{VIPPREFIXLEN}} is the virtual_subnet prefix (16 in the example. + + These substitutions work for vip.echo, and (at the time of writing) cephadm.apply + and cephadm.shell. + """ + if config is None: + config = {} + count = config.get('count', 1) + + ctx.vip_static = {} + ctx.vip = {} + + log.info("Allocating static IPs for each host...") + for remote in ctx.cluster.remotes.keys(): + ip = remote.ssh.get_transport().getpeername()[0] + log.info(f'peername {ip}') + mip = ipaddress.ip_address(ip) + vnet, vips = map_vips(mip, count + 1) + static = vips.pop(0) + log.info(f"{remote.hostname} static {static}, vnet {vnet}") + + if not ctx.vip: + # do this only once (use the first remote we see), since we only need 1 + # set of virtual IPs, regardless of how many remotes we have. + log.info("VIPs are {map(str, vips)}") + ctx.vip = { + 'vnet': vnet, + 'vips': vips, + } + else: + # all remotes must be in the same virtual network... + assert vnet == ctx.vip['vnet'] + + # pick interface + p = re.compile(r'^(\S+) dev (\S+) (.*)scope link (.*)src (\S+)') + iface = None + for line in remote.sh(['sudo', 'ip','route','ls']).splitlines(): + m = p.findall(line) + if not m: + continue + route_iface = m[0][1] + route_ip = m[0][4] + if route_ip == ip: + iface = route_iface + break + + if not iface: + log.error(f"Unable to find {remote.hostname} interface for {ip}") + continue + + # configure + log.info(f"Configuring {static} on {remote.hostname} iface {iface}...") + remote.sh(['sudo', + 'ip', 'addr', 'add', + str(static) + '/' + str(vnet.prefixlen), + 'dev', iface]) + + ctx.vip_static[remote] = { + "iface": iface, + "static": static, + } + + try: + yield + + finally: + for remote, m in ctx.vip_static.items(): + log.info(f"Removing {m['static']} (and any VIPs) on {remote.hostname} iface {m['iface']}...") + remote.sh(['sudo', + 'ip', 'addr', 'del', + str(m['static']) + '/' + str(ctx.vip['vnet'].prefixlen), + 'dev', m['iface']]) + + for vip in ctx.vip['vips']: + remote.sh( + [ + 'sudo', + 'ip', 'addr', 'del', + str(vip) + '/' + str(ctx.vip['vnet'].prefixlen), + 'dev', m['iface'] + ], + check_status=False, + ) + diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py new file mode 100644 index 000000000..df4886fb6 --- /dev/null +++ b/qa/tasks/vstart_runner.py @@ -0,0 +1,1516 @@ +""" +vstart_runner: override Filesystem and Mount interfaces to run a CephFSTestCase against a vstart +ceph instance instead of a packaged/installed cluster. Use this to turn around test cases +quickly during development. + +Simple usage (assuming teuthology and ceph checked out in ~/git): + + # Activate the teuthology virtualenv + source ~/git/teuthology/virtualenv/bin/activate + # Go into your ceph build directory + cd ~/git/ceph/build + # Invoke a test using this script + python ~/git/ceph/qa/tasks/vstart_runner.py --create tasks.cephfs.test_data_scan + +Alternative usage: + + # Alternatively, if you use different paths, specify them as follows: + LD_LIBRARY_PATH=`pwd`/lib PYTHONPATH=~/git/teuthology:~/git/ceph/qa:`pwd`/../src/pybind:`pwd`/lib/cython_modules/lib.3 python ~/git/ceph/qa/tasks/vstart_runner.py + + # If you wish to drop to a python shell on failures, use --interactive: + python ~/git/ceph/qa/tasks/vstart_runner.py --interactive + + # If you wish to run a named test case, pass it as an argument: + python ~/git/ceph/qa/tasks/vstart_runner.py tasks.cephfs.test_data_scan + + # Also, you can create the cluster once and then run named test cases against it: + python ~/git/ceph/qa/tasks/vstart_runner.py --create-cluster-only + python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_health + python ~/git/ceph/qa/tasks/vstart_runner.py tasks.mgr.dashboard.test_rgw + +Following are few important notes that might save some investigation around +vstart_runner.py - + +* If using the FUSE client, ensure that the fuse package is installed and + enabled on the system and that "user_allow_other" is added to /etc/fuse.conf. + +* If using the kernel client, the user must have the ability to run commands + with passwordless sudo access. + +* A failure on the kernel client may crash the host, so it's recommended to + use this functionality within a virtual machine. + +* "adjust-ulimits", "ceph-coverage" and "sudo" in command arguments are + overridden by vstart_runner.py. Former two usually have no applicability + for test runs on developer's machines and see note point on "omit_sudo" + to know more about overriding of "sudo". + +* "omit_sudo" is re-set to False unconditionally in cases of commands + "passwd" and "chown". + +* The presence of binary file named after the first argument in the command + arguments received by the method LocalRemote.run() is checked for in + <ceph-repo-root>/build/bin/. If present, the first argument is replaced with + the path to binary file. +""" + +from io import StringIO +from json import loads +from collections import defaultdict +import getpass +import signal +import tempfile +import threading +import datetime +import shutil +import re +import os +import time +import sys +import errno +from IPy import IP +import unittest +import platform +import logging +from argparse import Namespace + +from unittest import suite, loader + +from teuthology.orchestra.run import quote, PIPE +from teuthology.orchestra.daemon import DaemonGroup +from teuthology.orchestra.remote import RemoteShell +from teuthology.config import config as teuth_config +from teuthology.contextutil import safe_while +from teuthology.contextutil import MaxWhileTries +from teuthology.exceptions import CommandFailedError +try: + import urllib3 + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +except: + pass + +def init_log(log_level=logging.INFO): + global log + if log is not None: + del log + log = logging.getLogger(__name__) + + global logpath + logpath = './vstart_runner.log' + + handler = logging.FileHandler(logpath) + formatter = logging.Formatter( + fmt=u'%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s', + datefmt='%Y-%m-%dT%H:%M:%S') + handler.setFormatter(formatter) + log.addHandler(handler) + log.setLevel(log_level) + +log = None +init_log() + + +def respawn_in_path(lib_path, python_paths): + execv_cmd = ['python'] + if platform.system() == "Darwin": + lib_path_var = "DYLD_LIBRARY_PATH" + else: + lib_path_var = "LD_LIBRARY_PATH" + + py_binary = os.environ.get("PYTHON", sys.executable) + + if lib_path_var in os.environ: + if lib_path not in os.environ[lib_path_var]: + os.environ[lib_path_var] += ':' + lib_path + os.execvp(py_binary, execv_cmd + sys.argv) + else: + os.environ[lib_path_var] = lib_path + os.execvp(py_binary, execv_cmd + sys.argv) + + for p in python_paths: + sys.path.insert(0, p) + + +def launch_subprocess(args, cwd=None, env=None, shell=True, + executable='/bin/bash'): + return subprocess.Popen(args, cwd=cwd, env=env, shell=shell, + executable=executable, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, stdin=subprocess.PIPE) + + +# Let's use some sensible defaults +if os.path.exists("./CMakeCache.txt") and os.path.exists("./bin"): + + # A list of candidate paths for each package we need + guesses = [ + ["~/git/teuthology", "~/scm/teuthology", "~/teuthology"], + ["lib/cython_modules/lib.3"], + ["../src/pybind"], + ] + + python_paths = [] + + # Up one level so that "tasks.foo.bar" imports work + python_paths.append(os.path.abspath( + os.path.join(os.path.dirname(os.path.realpath(__file__)), "..") + )) + + for package_guesses in guesses: + for g in package_guesses: + g_exp = os.path.abspath(os.path.expanduser(g)) + if os.path.exists(g_exp): + python_paths.append(g_exp) + + ld_path = os.path.join(os.getcwd(), "lib/") + print("Using guessed paths {0} {1}".format(ld_path, python_paths)) + respawn_in_path(ld_path, python_paths) + + +try: + from tasks.ceph_manager import CephManager + from tasks.cephfs.fuse_mount import FuseMount + from tasks.cephfs.kernel_mount import KernelMount + from tasks.cephfs.filesystem import Filesystem, MDSCluster, CephCluster + from tasks.cephfs.mount import CephFSMount + from tasks.mgr.mgr_test_case import MgrCluster + from teuthology.task import interactive +except ImportError: + sys.stderr.write("***\nError importing packages, have you activated your teuthology virtualenv " + "and set PYTHONPATH to point to teuthology and ceph-qa-suite?\n***\n\n") + raise + +# Must import after teuthology because of gevent monkey patching +import subprocess + +if os.path.exists("./CMakeCache.txt"): + # Running in build dir of a cmake build + BIN_PREFIX = "./bin/" + SRC_PREFIX = "../src" +else: + # Running in src/ of an autotools build + BIN_PREFIX = "./" + SRC_PREFIX = "./" + +CEPH_CMD = os.path.join(BIN_PREFIX, 'ceph') +RADOS_CMD = os.path.join(BIN_PREFIX, 'rados') + + +def rm_nonascii_chars(var): + var = var.replace(b'\xe2\x80\x98', b'\'') + var = var.replace(b'\xe2\x80\x99', b'\'') + return var + +class LocalRemoteProcess(object): + def __init__(self, args, subproc, check_status, stdout, stderr, usr_args): + self.args = args + self.subproc = subproc + self.stdout = stdout + self.stderr = stderr + self.usr_args = usr_args + # this variable is meant for instance of this class named fuse_daemon. + # child process of the command launched with sudo must be killed, + # since killing parent process alone has no impact on the child + # process. + self.fuse_pid = -1 + + self.check_status = check_status + self.exitstatus = self.returncode = None + + def _write_stdout(self, out): + if isinstance(self.stdout, StringIO): + self.stdout.write(out.decode(errors='ignore')) + elif self.stdout is None: + pass + else: + self.stdout.write(out) + + def _write_stderr(self, err): + if isinstance(self.stderr, StringIO): + self.stderr.write(err.decode(errors='ignore')) + elif self.stderr is None: + pass + else: + self.stderr.write(err) + + def wait(self): + if self.finished: + # Avoid calling communicate() on a dead process because it'll + # give you stick about std* already being closed + if self.check_status and self.exitstatus != 0: + # TODO: print self.args or self.usr_args in exception msg? + raise CommandFailedError(self.args, self.exitstatus) + else: + return + + out, err = self.subproc.communicate() + out, err = rm_nonascii_chars(out), rm_nonascii_chars(err) + self._write_stdout(out) + self._write_stderr(err) + + self.exitstatus = self.returncode = self.subproc.returncode + + if self.exitstatus != 0: + sys.stderr.write(out.decode()) + sys.stderr.write(err.decode()) + + if self.check_status and self.exitstatus != 0: + # TODO: print self.args or self.usr_args in exception msg? + raise CommandFailedError(self.args, self.exitstatus) + + @property + def finished(self): + if self.exitstatus is not None: + return True + + if self.subproc.poll() is not None: + out, err = self.subproc.communicate() + self._write_stdout(out) + self._write_stderr(err) + + self.exitstatus = self.returncode = self.subproc.returncode + + return True + else: + return False + + def kill(self): + log.debug("kill ") + if self.subproc.pid and not self.finished: + log.debug(f"kill: killing pid {self.subproc.pid} " + f"({self.usr_args})") + if self.fuse_pid != -1: + safe_kill(self.fuse_pid) + else: + safe_kill(self.subproc.pid) + else: + log.debug(f"kill: already terminated ({self.usr_args})") + + @property + def stdin(self): + class FakeStdIn(object): + def __init__(self, mount_daemon): + self.mount_daemon = mount_daemon + + def close(self): + self.mount_daemon.kill() + + return FakeStdIn(self) + + +class LocalRemote(RemoteShell): + """ + Amusingly named class to present the teuthology RemoteProcess interface when we are really + running things locally for vstart + + Run this inside your src/ dir! + """ + + def __init__(self): + super().__init__() + self.name = "local" + self._hostname = "localhost" + self.user = getpass.getuser() + + @property + def hostname(self): + if not hasattr(self, '_hostname'): + self._hostname = 'localhost' + return self._hostname + + def get_file(self, path, sudo, dest_dir): + tmpfile = tempfile.NamedTemporaryFile(delete=False).name + shutil.copy(path, tmpfile) + return tmpfile + + # XXX: This method ignores the error raised when src and dst are + # holding same path. For teuthology, same path still represents + # different locations as they lie on different machines. + def put_file(self, src, dst, sudo=False): + try: + shutil.copy(src, dst) + except shutil.SameFileError: + pass + + + def _omit_cmd_args(self, args, omit_sudo): + """ + Helper tools are omitted since those are not meant for tests executed + using vstart_runner.py. And sudo's omission depends on the value of + the variable omit_sudo. + """ + helper_tools = ('adjust-ulimits', 'ceph-coverage', + 'None/archive/coverage') + for i in helper_tools: + if i in args: + helper_tools_found = True + break + else: + helper_tools_found = False + + if not helper_tools_found and 'sudo' not in args: + return args, args + + prefix = '' + + if helper_tools_found: + args = args.replace('None/archive/coverage', '') + prefix += """ +adjust-ulimits() { + "$@" +} +ceph-coverage() { + "$@" +} +""" + log.debug('Helper tools like adjust-ulimits and ceph-coverage ' + 'were omitted from the following cmd args before ' + 'logging and execution; check vstart_runner.py for ' + 'more details.') + + first_arg = args[ : args.find(' ')] + # We'll let sudo be a part of command even omit flag says otherwise in + # cases of commands which can normally be ran only by root. + last_arg = args[args.rfind(' ') + 1 : ] + # XXX: should sudo be omitted/allowed by default in cases similar to + # that of "exec sudo" as well? + if 'sudo' in args: + for x in ('passwd', 'chown'): + if x == first_arg or x == last_arg or f' {x} ' in args: + omit_sudo = False + + if omit_sudo: + prefix += """ +sudo() { + "$@" +} +""" + log.debug('"sudo" was omitted from the following cmd args ' + 'before execution and logging using function ' + 'overriding; check vstart_runner.py for more details.') + + # usr_args = args passed by the user/caller of this method + usr_args, args = args, prefix + args + + return usr_args, args + + def _perform_checks_and_adjustments(self, args, omit_sudo): + if isinstance(args, list): + args = quote(args) + + assert isinstance(args, str) + + first_arg = args[ : args.find(' ')] + if '/' not in first_arg: + local_bin = os.path.join(BIN_PREFIX, first_arg) + if os.path.exists(local_bin): + args = args.replace(first_arg, local_bin, 1) + + usr_args, args = self._omit_cmd_args(args, omit_sudo) + + log.debug('> ' + usr_args) + + return args, usr_args + + # Wrapper to keep the interface exactly same as that of + # teuthology.remote.run. + def run(self, **kwargs): + return self._do_run(**kwargs) + + # XXX: omit_sudo is set to True since using sudo can change the ownership + # of files which becomes problematic for following executions of + # vstart_runner.py. + # XXX: omit_sudo is re-set to False even in cases of commands like passwd + # and chown. + # XXX: "adjust-ulimits", "ceph-coverage" and "sudo" in command arguments + # are overridden. Former two usually have no applicability for test runs + # on developer's machines and see note point on "omit_sudo" to know more + # about overriding of "sudo". + # XXX: the presence of binary file named after the first argument is + # checked in build/bin/, if present the first argument is replaced with + # the path to binary file. + def _do_run(self, args, check_status=True, wait=True, stdout=None, + stderr=None, cwd=None, stdin=None, logger=None, label=None, + env=None, timeout=None, omit_sudo=True, shell=True, quiet=False): + args, usr_args = self._perform_checks_and_adjustments(args, omit_sudo) + + subproc = launch_subprocess(args, cwd, env, shell) + + if stdin: + # Hack: writing to stdin is not deadlock-safe, but it "always" works + # as long as the input buffer is "small" + if isinstance(stdin, str): + subproc.stdin.write(stdin.encode()) + elif stdin == subprocess.PIPE or stdin == PIPE: + pass + elif isinstance(stdin, StringIO): + subproc.stdin.write(bytes(stdin.getvalue(),encoding='utf8')) + else: + subproc.stdin.write(stdin.getvalue()) + + proc = LocalRemoteProcess( + args, subproc, check_status, + stdout, stderr, usr_args + ) + + if wait: + proc.wait() + + return proc + +class LocalDaemon(object): + def __init__(self, daemon_type, daemon_id): + self.daemon_type = daemon_type + self.daemon_id = daemon_id + self.controller = LocalRemote() + self.proc = None + + @property + def remote(self): + return LocalRemote() + + def running(self): + return self._get_pid() is not None + + def check_status(self): + if self.proc: + return self.proc.poll() + + def _get_pid(self): + """ + Return PID as an integer or None if not found + """ + ps_txt = self.controller.run(args=["ps", "ww", "-u"+str(os.getuid())], + stdout=StringIO()).\ + stdout.getvalue().strip() + lines = ps_txt.split("\n")[1:] + + for line in lines: + if line.find("ceph-{0} -i {1}".format(self.daemon_type, self.daemon_id)) != -1: + log.debug("Found ps line for daemon: {0}".format(line)) + return int(line.split()[0]) + if not opt_log_ps_output: + ps_txt = '(omitted)' + log.debug("No match for {0} {1}: {2}".format( + self.daemon_type, self.daemon_id, ps_txt)) + return None + + def wait(self, timeout): + waited = 0 + while self._get_pid() is not None: + if waited > timeout: + raise MaxWhileTries("Timed out waiting for daemon {0}.{1}".format(self.daemon_type, self.daemon_id)) + time.sleep(1) + waited += 1 + + def stop(self, timeout=300): + if not self.running(): + log.error('tried to stop a non-running daemon') + return + + pid = self._get_pid() + if pid is None: + return + log.debug("Killing PID {0} for {1}.{2}".format(pid, self.daemon_type, self.daemon_id)) + os.kill(pid, signal.SIGTERM) + + waited = 0 + while pid is not None: + new_pid = self._get_pid() + if new_pid is not None and new_pid != pid: + log.debug("Killing new PID {0}".format(new_pid)) + pid = new_pid + os.kill(pid, signal.SIGTERM) + + if new_pid is None: + break + else: + if waited > timeout: + raise MaxWhileTries( + "Timed out waiting for daemon {0}.{1}".format( + self.daemon_type, self.daemon_id)) + time.sleep(1) + waited += 1 + + self.wait(timeout=timeout) + + def restart(self): + if self._get_pid() is not None: + self.stop() + + self.proc = self.controller.run(args=[ + os.path.join(BIN_PREFIX, "ceph-{0}".format(self.daemon_type)), + "-i", self.daemon_id]) + + def signal(self, sig, silent=False): + if not self.running(): + raise RuntimeError("Can't send signal to non-running daemon") + + os.kill(self._get_pid(), sig) + if not silent: + log.debug("Sent signal {0} to {1}.{2}".format(sig, self.daemon_type, self.daemon_id)) + + +def safe_kill(pid): + """ + os.kill annoyingly raises exception if process already dead. Ignore it. + """ + try: + return remote.run(args=f'sudo kill -{signal.SIGKILL.value} {pid}', + omit_sudo=False) + except OSError as e: + if e.errno == errno.ESRCH: + # Raced with process termination + pass + else: + raise + +def mon_in_localhost(config_path="./ceph.conf"): + """ + If the ceph cluster is using the localhost IP as mon host, will must disable ns unsharing + """ + with open(config_path) as f: + for line in f: + local = re.match(r'^\s*mon host\s*=\s*\[((v1|v2):127\.0\.0\.1:\d+,?)+\]', line) + if local: + return True + return False + +class LocalCephFSMount(): + @property + def config_path(self): + return "./ceph.conf" + + def get_keyring_path(self): + # This is going to end up in a config file, so use an absolute path + # to avoid assumptions about daemons' pwd + keyring_path = "./client.{0}.keyring".format(self.client_id) + try: + os.stat(keyring_path) + except OSError: + return os.path.join(os.getcwd(), 'keyring') + else: + return keyring_path + + @property + def _prefix(self): + return BIN_PREFIX + + def _asok_path(self): + # In teuthology, the asok is named after the PID of the ceph-fuse + # process, because it's run foreground. When running it daemonized + # however, the asok is named after the PID of the launching process, + # not the long running ceph-fuse process. Therefore we need to give + # an exact path here as the logic for checking /proc/ for which asok + # is alive does not work. + + # Load the asok path from ceph.conf as vstart.sh now puts admin sockets + # in a tmpdir. All of the paths are the same, so no need to select + # based off of the service type. + d = "./asok" + with open(self.config_path) as f: + for line in f: + asok_conf = re.search("^\s*admin\s+socket\s*=\s*(.*?)[^/]+$", line) + if asok_conf: + d = asok_conf.groups(1)[0] + break + path = "{0}/client.{1}.*.asok".format(d, self.client_id) + return path + + def _run_python(self, pyscript, py_version='python', sudo=False): + """ + Override this to remove the daemon-helper prefix that is used otherwise + to make the process killable. + """ + args = [] + if sudo: + args.append('sudo') + args += [py_version, '-c', pyscript] + return self.client_remote.run(args=args, wait=False, + stdout=StringIO(), omit_sudo=(not sudo)) + + def setup_netns(self): + if opt_use_ns: + super(type(self), self).setup_netns() + + @property + def _nsenter_args(self): + if opt_use_ns: + return super(type(self), self)._nsenter_args + else: + return [] + + def setupfs(self, name=None): + if name is None and self.fs is not None: + # Previous mount existed, reuse the old name + name = self.fs.name + self.fs = LocalFilesystem(self.ctx, name=name) + log.info('Wait for MDS to reach steady state...') + self.fs.wait_for_daemons() + log.info('Ready to start {}...'.format(type(self).__name__)) + + def is_blocked(self): + self.fs = LocalFilesystem(self.ctx, name=self.cephfs_name) + + output = self.fs.mon_manager.raw_cluster_cmd(args='osd blocklist ls') + return self.addr in output + + +class LocalKernelMount(LocalCephFSMount, KernelMount): + def __init__(self, ctx, test_dir, client_id=None, + client_keyring_path=None, client_remote=None, + hostfs_mntpt=None, cephfs_name=None, cephfs_mntpt=None, + brxnet=None): + super(LocalKernelMount, self).__init__(ctx=ctx, test_dir=test_dir, + client_id=client_id, client_keyring_path=client_keyring_path, + client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt, + cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet) + + # Make vstart_runner compatible with teuth and qa/tasks/cephfs. + self._mount_bin = [os.path.join(BIN_PREFIX , 'mount.ceph')] + + def get_global_addr(self): + self.get_global_inst() + self.addr = self.inst[self.inst.find(' ') + 1 : ] + return self.addr + + def get_global_inst(self): + clients = self.client_remote.run( + args=f'{CEPH_CMD} tell mds.* session ls', + stdout=StringIO()).stdout.getvalue() + clients = loads(clients) + for c in clients: + if c['id'] == self.id: + self.inst = c['inst'] + return self.inst + + +class LocalFuseMount(LocalCephFSMount, FuseMount): + def __init__(self, ctx, test_dir, client_id, client_keyring_path=None, + client_remote=None, hostfs_mntpt=None, cephfs_name=None, + cephfs_mntpt=None, brxnet=None): + super(LocalFuseMount, self).__init__(ctx=ctx, test_dir=test_dir, + client_id=client_id, client_keyring_path=client_keyring_path, + client_remote=LocalRemote(), hostfs_mntpt=hostfs_mntpt, + cephfs_name=cephfs_name, cephfs_mntpt=cephfs_mntpt, brxnet=brxnet) + + # Following block makes tests meant for teuthology compatible with + # vstart_runner. + self._mount_bin = [os.path.join(BIN_PREFIX, 'ceph-fuse')] + self._mount_cmd_cwd, self._mount_cmd_logger, \ + self._mount_cmd_stdin = None, None, None + + # XXX: CephFSMount._create_mntpt() sets mountpoint's permission mode to + # 0000 which doesn't work for vstart_runner since superuser privileges are + # not used for mounting Ceph FS with FUSE. + def _create_mntpt(self): + self.client_remote.run(args=f'mkdir -p -v {self.hostfs_mntpt}') + + def _run_mount_cmd(self, mntopts, mntargs, check_status): + retval = super(type(self), self)._run_mount_cmd(mntopts, mntargs, + check_status) + if retval is None: # None represents success + self._set_fuse_daemon_pid(check_status) + return retval + + def _get_mount_cmd(self, mntopts, mntargs): + mount_cmd = super(type(self), self)._get_mount_cmd(mntopts, mntargs) + + if os.getuid() != 0: + mount_cmd += ['--client_die_on_failed_dentry_invalidate=false'] + return mount_cmd + + @property + def _fuse_conn_check_timeout(self): + return 30 + + def _add_valgrind_args(self, mount_cmd): + return [] + + def _set_fuse_daemon_pid(self, check_status): + # NOTE: When a command <args> is launched with sudo, two processes are + # launched, one with sudo in <args> and other without. Make sure we + # get the PID of latter one. + try: + with safe_while(sleep=1, tries=15) as proceed: + while proceed(): + try: + sock = self.find_admin_socket() + except (RuntimeError, CommandFailedError): + continue + + self.fuse_daemon.fuse_pid = int(re.match(".*\.(\d+)\.asok$", + sock).group(1)) + break + except MaxWhileTries: + if check_status: + raise + else: + pass + +# XXX: this class has nothing to do with the Ceph daemon (ceph-mgr) of +# the same name. +class LocalCephManager(CephManager): + def __init__(self, ctx=None): + self.ctx = ctx + if self.ctx: + self.cluster = self.ctx.config['cluster'] + + # Deliberately skip parent init, only inheriting from it to get + # util methods like osd_dump that sit on top of raw_cluster_cmd + self.controller = LocalRemote() + + # A minority of CephManager fns actually bother locking for when + # certain teuthology tests want to run tasks in parallel + self.lock = threading.RLock() + + self.log = lambda x: log.debug(x) + + # Don't bother constructing a map of pools: it should be empty + # at test cluster start, and in any case it would be out of date + # in no time. The attribute needs to exist for some of the CephManager + # methods to work though. + self.pools = {} + + # NOTE: These variables are being overriden here so that parent class + # can pick it up. + self.cephadm = False + self.rook = False + self.testdir = None + self.run_ceph_w_prefix = self.run_cluster_cmd_prefix = [CEPH_CMD] + self.CEPH_CMD = [CEPH_CMD] + self.RADOS_CMD = [RADOS_CMD] + + def find_remote(self, daemon_type, daemon_id): + """ + daemon_type like 'mds', 'osd' + daemon_id like 'a', '0' + """ + return LocalRemote() + + def admin_socket(self, daemon_type, daemon_id, command, check_status=True, + timeout=None, stdout=None): + if stdout is None: + stdout = StringIO() + + args=[CEPH_CMD, "daemon", f"{daemon_type}.{daemon_id}"] + command + return self.controller.run(args=args, check_status=check_status, + timeout=timeout, stdout=stdout) + + +class LocalCephCluster(CephCluster): + def __init__(self, ctx): + # Deliberately skip calling CephCluster constructor + self._ctx = ctx + self.mon_manager = LocalCephManager(ctx=self._ctx) + self._conf = defaultdict(dict) + + @property + def admin_remote(self): + return LocalRemote() + + def get_config(self, key, service_type=None): + if service_type is None: + service_type = 'mon' + + # FIXME hardcoded vstart service IDs + service_id = { + 'mon': 'a', + 'mds': 'a', + 'osd': '0' + }[service_type] + + return self.json_asok(['config', 'get', key], service_type, service_id)[key] + + def _write_conf(self): + # In teuthology, we have the honour of writing the entire ceph.conf, but + # in vstart land it has mostly already been written and we need to carefully + # append to it. + conf_path = "./ceph.conf" + banner = "\n#LOCAL_TEST\n" + existing_str = open(conf_path).read() + + if banner in existing_str: + existing_str = existing_str[0:existing_str.find(banner)] + + existing_str += banner + + for subsys, kvs in self._conf.items(): + existing_str += "\n[{0}]\n".format(subsys) + for key, val in kvs.items(): + # Comment out existing instance if it exists + log.debug("Searching for existing instance {0}/{1}".format( + key, subsys + )) + existing_section = re.search("^\[{0}\]$([\n]|[^\[])+".format( + subsys + ), existing_str, re.MULTILINE) + + if existing_section: + section_str = existing_str[existing_section.start():existing_section.end()] + existing_val = re.search("^\s*[^#]({0}) =".format(key), section_str, re.MULTILINE) + if existing_val: + start = existing_section.start() + existing_val.start(1) + log.debug("Found string to replace at {0}".format( + start + )) + existing_str = existing_str[0:start] + "#" + existing_str[start:] + + existing_str += "{0} = {1}\n".format(key, val) + + open(conf_path, "w").write(existing_str) + + def set_ceph_conf(self, subsys, key, value): + self._conf[subsys][key] = value + self._write_conf() + + def clear_ceph_conf(self, subsys, key): + del self._conf[subsys][key] + self._write_conf() + + +class LocalMDSCluster(LocalCephCluster, MDSCluster): + def __init__(self, ctx): + LocalCephCluster.__init__(self, ctx) + # Deliberately skip calling MDSCluster constructor + self._mds_ids = ctx.daemons.daemons['ceph.mds'].keys() + log.debug("Discovered MDS IDs: {0}".format(self._mds_ids)) + self._mds_daemons = dict([(id_, LocalDaemon("mds", id_)) for id_ in self.mds_ids]) + + @property + def mds_ids(self): + return self._mds_ids + + @property + def mds_daemons(self): + return self._mds_daemons + + def clear_firewall(self): + # FIXME: unimplemented + pass + + def newfs(self, name='cephfs', create=True): + return LocalFilesystem(self._ctx, name=name, create=create) + + def delete_all_filesystems(self): + """ + Remove all filesystems that exist, and any pools in use by them. + """ + for fs in self.status().get_filesystems(): + LocalFilesystem(ctx=self._ctx, fscid=fs['id']).destroy() + + +class LocalMgrCluster(LocalCephCluster, MgrCluster): + def __init__(self, ctx): + super(LocalMgrCluster, self).__init__(ctx) + + self.mgr_ids = ctx.daemons.daemons['ceph.mgr'].keys() + self.mgr_daemons = dict([(id_, LocalDaemon("mgr", id_)) for id_ in self.mgr_ids]) + + +class LocalFilesystem(LocalMDSCluster, Filesystem): + def __init__(self, ctx, fs_config={}, fscid=None, name=None, create=False): + # Deliberately skip calling Filesystem constructor + LocalMDSCluster.__init__(self, ctx) + + self.id = None + self.name = name + self.metadata_pool_name = None + self.metadata_overlay = False + self.data_pool_name = None + self.data_pools = None + self.fs_config = fs_config + self.ec_profile = fs_config.get('ec_profile') + + self.mon_manager = LocalCephManager(ctx=self._ctx) + + self.client_remote = LocalRemote() + + self._conf = defaultdict(dict) + + if name is not None: + if fscid is not None: + raise RuntimeError("cannot specify fscid when creating fs") + if create and not self.legacy_configured(): + self.create() + else: + if fscid is not None: + self.id = fscid + self.getinfo(refresh=True) + + # Stash a reference to the first created filesystem on ctx, so + # that if someone drops to the interactive shell they can easily + # poke our methods. + if not hasattr(self._ctx, "filesystem"): + self._ctx.filesystem = self + + @property + def _prefix(self): + return BIN_PREFIX + + def set_clients_block(self, blocked, mds_id=None): + raise NotImplementedError() + + +class LocalCluster(object): + def __init__(self, rolename="placeholder"): + self.remotes = { + LocalRemote(): [rolename] + } + + def only(self, requested): + return self.__class__(rolename=requested) + + def run(self, *args, **kwargs): + r = [] + for remote in self.remotes.keys(): + r.append(remote.run(*args, **kwargs)) + return r + + +class LocalContext(object): + def __init__(self): + FSID = remote.run(args=[os.path.join(BIN_PREFIX, 'ceph'), 'fsid'], + stdout=StringIO()).stdout.getvalue() + + cluster_name = 'ceph' + self.config = {'cluster': cluster_name} + self.ceph = {cluster_name: Namespace()} + self.ceph[cluster_name].fsid = FSID + self.teuthology_config = teuth_config + self.cluster = LocalCluster() + self.daemons = DaemonGroup() + if not hasattr(self, 'managers'): + self.managers = {} + self.managers[self.config['cluster']] = LocalCephManager(ctx=self) + + # Shove some LocalDaemons into the ctx.daemons DaemonGroup instance so that any + # tests that want to look these up via ctx can do so. + # Inspect ceph.conf to see what roles exist + for conf_line in open("ceph.conf").readlines(): + for svc_type in ["mon", "osd", "mds", "mgr"]: + prefixed_type = "ceph." + svc_type + if prefixed_type not in self.daemons.daemons: + self.daemons.daemons[prefixed_type] = {} + match = re.match("^\[{0}\.(.+)\]$".format(svc_type), conf_line) + if match: + svc_id = match.group(1) + self.daemons.daemons[prefixed_type][svc_id] = LocalDaemon(svc_type, svc_id) + + def __del__(self): + test_path = self.teuthology_config['test_path'] + # opt_create_cluster_only does not create the test path + if test_path: + shutil.rmtree(test_path) + + +######################################### +# +# stuff necessary for launching tests... +# +######################################### + + +def enumerate_methods(s): + log.debug("e: {0}".format(s)) + for t in s._tests: + if isinstance(t, suite.BaseTestSuite): + for sub in enumerate_methods(t): + yield sub + else: + yield s, t + + +def load_tests(modules, loader): + if modules: + log.debug("Executing modules: {0}".format(modules)) + module_suites = [] + for mod_name in modules: + # Test names like cephfs.test_auto_repair + module_suites.append(loader.loadTestsFromName(mod_name)) + log.debug("Loaded: {0}".format(list(module_suites))) + return suite.TestSuite(module_suites) + else: + log.debug("Executing all cephfs tests") + return loader.discover( + os.path.join(os.path.dirname(os.path.abspath(__file__)), "cephfs") + ) + + +def scan_tests(modules): + overall_suite = load_tests(modules, loader.TestLoader()) + max_required_mds = 0 + max_required_clients = 0 + max_required_mgr = 0 + require_memstore = False + + for suite_, case in enumerate_methods(overall_suite): + max_required_mds = max(max_required_mds, + getattr(case, "MDSS_REQUIRED", 0)) + max_required_clients = max(max_required_clients, + getattr(case, "CLIENTS_REQUIRED", 0)) + max_required_mgr = max(max_required_mgr, + getattr(case, "MGRS_REQUIRED", 0)) + require_memstore = getattr(case, "REQUIRE_MEMSTORE", False) \ + or require_memstore + + return max_required_mds, max_required_clients, \ + max_required_mgr, require_memstore + + +class LogRotate(): + def __init__(self): + self.conf_file_path = os.path.join(os.getcwd(), 'logrotate.conf') + self.state_file_path = os.path.join(os.getcwd(), 'logrotate.state') + + def run_logrotate(self): + remote.run(args=['logrotate', '-f', self.conf_file_path, '-s', + self.state_file_path, '--verbose']) + + +def teardown_cluster(): + log.info('\ntearing down the cluster...') + try: + remote.run(args=[os.path.join(SRC_PREFIX, "stop.sh")], timeout=60) + except CommandFailedError as e: + log.error('stop.sh failed: %s', e) + log.info('\nceph cluster torn down') + remote.run(args=['rm', '-rf', './dev', './out']) + + +def clear_old_log(): + try: + os.stat(logpath) + except FileNotFoundError: + return + else: + os.remove(logpath) + with open(logpath, 'w') as logfile: + logfile.write('') + init_log(log.level) + log.debug('logging in a fresh file now...') + + +class LogStream(object): + def __init__(self): + self.buffer = "" + self.omit_result_lines = False + + def _del_result_lines(self): + """ + Don't let unittest.TextTestRunner print "Ran X tests in Ys", + vstart_runner.py will do it for itself since it runs tests in a + testsuite one by one. + """ + if self.omit_result_lines: + self.buffer = re.sub('-'*70+'\nran [0-9]* test in [0-9.]*s\n*', + '', self.buffer, flags=re.I) + self.buffer = re.sub('failed \(failures=[0-9]*\)\n', '', self.buffer, + flags=re.I) + self.buffer = self.buffer.replace('OK\n', '') + + def write(self, data): + self.buffer += data + if self.buffer.count("\n") > 5: + self._write() + + def _write(self): + if opt_rotate_logs: + self._del_result_lines() + if self.buffer == '': + return + + lines = self.buffer.split("\n") + for line in lines: + # sys.stderr.write(line + "\n") + log.info(line) + self.buffer = '' + + def flush(self): + pass + + def __del__(self): + self._write() + + +class InteractiveFailureResult(unittest.TextTestResult): + """ + Specialization that implements interactive-on-error style + behavior. + """ + def addFailure(self, test, err): + super(InteractiveFailureResult, self).addFailure(test, err) + log.error(self._exc_info_to_string(err, test)) + log.error("Failure in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=None, config=None) + + def addError(self, test, err): + super(InteractiveFailureResult, self).addError(test, err) + log.error(self._exc_info_to_string(err, test)) + log.error("Error in test '{0}', going interactive".format( + self.getDescription(test) + )) + interactive.task(ctx=None, config=None) + + +# XXX: class we require would be inherited from this one and one of +# InteractiveFailureResult and unittestunittest.TextTestResult. +class LoggingResultTemplate(object): + fail_on_skip = False + + def startTest(self, test): + log.info("Starting test: {0}".format(self.getDescription(test))) + test.started_at = datetime.datetime.utcnow() + return super(LoggingResultTemplate, self).startTest(test) + + def stopTest(self, test): + log.info("Stopped test: {0} in {1}s".format( + self.getDescription(test), + (datetime.datetime.utcnow() - test.started_at).total_seconds() + )) + + def addSkip(self, test, reason): + if LoggingResultTemplate.fail_on_skip: + # Don't just call addFailure because that requires a traceback + self.failures.append((test, reason)) + else: + super(LoggingResultTemplate, self).addSkip(test, reason) + + +def launch_tests(overall_suite): + if opt_rotate_logs or not opt_exit_on_test_failure: + return launch_individually(overall_suite) + else: + return launch_entire_suite(overall_suite) + + +def get_logging_result_class(): + result_class = InteractiveFailureResult if opt_interactive_on_error else \ + unittest.TextTestResult + return type('', (LoggingResultTemplate, result_class), {}) + + +def launch_individually(overall_suite): + no_of_tests_execed = 0 + no_of_tests_failed, no_of_tests_execed = 0, 0 + LoggingResult = get_logging_result_class() + stream = LogStream() + stream.omit_result_lines = True + if opt_rotate_logs: + logrotate = LogRotate() + + started_at = datetime.datetime.utcnow() + for suite_, case in enumerate_methods(overall_suite): + # don't run logrotate beforehand since some ceph daemons might be + # down and pre/post-rotate scripts in logrotate.conf might fail. + if opt_rotate_logs: + logrotate.run_logrotate() + + result = unittest.TextTestRunner(stream=stream, + resultclass=LoggingResult, + verbosity=2, failfast=True).run(case) + + if not result.wasSuccessful(): + if opt_exit_on_test_failure: + break + else: + no_of_tests_failed += 1 + + no_of_tests_execed += 1 + time_elapsed = (datetime.datetime.utcnow() - started_at).total_seconds() + + if result.wasSuccessful(): + log.info('') + log.info('-'*70) + log.info(f'Ran {no_of_tests_execed} tests successfully in ' + f'{time_elapsed}s') + if no_of_tests_failed > 0: + log.info(f'{no_of_tests_failed} tests failed') + log.info('') + log.info('OK') + + return result + + +def launch_entire_suite(overall_suite): + LoggingResult = get_logging_result_class() + + testrunner = unittest.TextTestRunner(stream=LogStream(), + resultclass=LoggingResult, + verbosity=2, failfast=True) + return testrunner.run(overall_suite) + + +def exec_test(): + # Parse arguments + global opt_interactive_on_error + opt_interactive_on_error = False + opt_create_cluster = False + opt_create_cluster_only = False + opt_ignore_missing_binaries = False + opt_teardown_cluster = False + global opt_log_ps_output + opt_log_ps_output = False + use_kernel_client = False + global opt_use_ns + opt_use_ns = False + opt_brxnet= None + opt_verbose = True + global opt_rotate_logs + opt_rotate_logs = False + global opt_exit_on_test_failure + opt_exit_on_test_failure = True + + args = sys.argv[1:] + flags = [a for a in args if a.startswith("-")] + modules = [a for a in args if not a.startswith("-")] + for f in flags: + if f == "--interactive": + opt_interactive_on_error = True + elif f == "--create": + opt_create_cluster = True + elif f == "--create-cluster-only": + opt_create_cluster_only = True + elif f == "--ignore-missing-binaries": + opt_ignore_missing_binaries = True + elif f == '--teardown': + opt_teardown_cluster = True + elif f == '--log-ps-output': + opt_log_ps_output = True + elif f == '--clear-old-log': + clear_old_log() + elif f == "--kclient": + use_kernel_client = True + elif f == '--usens': + opt_use_ns = True + elif '--brxnet' in f: + if re.search(r'=[0-9./]+', f) is None: + log.error("--brxnet=<ip/mask> option needs one argument: '{0}'".format(f)) + sys.exit(-1) + opt_brxnet=f.split('=')[1] + try: + IP(opt_brxnet) + if IP(opt_brxnet).iptype() == 'PUBLIC': + raise RuntimeError('is public') + except Exception as e: + log.error("Invalid ip '{0}' {1}".format(opt_brxnet, e)) + sys.exit(-1) + elif '--no-verbose' == f: + opt_verbose = False + elif f == '--rotate-logs': + opt_rotate_logs = True + elif f == '--run-all-tests': + opt_exit_on_test_failure = False + elif f == '--debug': + log.setLevel(logging.DEBUG) + else: + log.error("Unknown option '{0}'".format(f)) + sys.exit(-1) + + # Help developers by stopping up-front if their tree isn't built enough for all the + # tools that the tests might want to use (add more here if needed) + require_binaries = ["ceph-dencoder", "cephfs-journal-tool", "cephfs-data-scan", + "cephfs-table-tool", "ceph-fuse", "rados", "cephfs-meta-injection"] + # What binaries may be required is task specific + require_binaries = ["ceph-dencoder", "rados"] + missing_binaries = [b for b in require_binaries if not os.path.exists(os.path.join(BIN_PREFIX, b))] + if missing_binaries and not opt_ignore_missing_binaries: + log.error("Some ceph binaries missing, please build them: {0}".format(" ".join(missing_binaries))) + sys.exit(-1) + + max_required_mds, max_required_clients, \ + max_required_mgr, require_memstore = scan_tests(modules) + + global remote + remote = LocalRemote() + + CephFSMount.cleanup_stale_netnses_and_bridge(remote) + + # Tolerate no MDSs or clients running at start + ps_txt = remote.run(args=["ps", "-u"+str(os.getuid())], + stdout=StringIO()).stdout.getvalue().strip() + lines = ps_txt.split("\n")[1:] + for line in lines: + if 'ceph-fuse' in line or 'ceph-mds' in line: + pid = int(line.split()[0]) + log.warning("Killing stray process {0}".format(line)) + remote.run(args=f'sudo kill -{signal.SIGKILL.value} {pid}', + omit_sudo=False) + + # Fire up the Ceph cluster if the user requested it + if opt_create_cluster or opt_create_cluster_only: + log.info("Creating cluster with {0} MDS daemons".format( + max_required_mds)) + teardown_cluster() + vstart_env = os.environ.copy() + vstart_env["FS"] = "0" + vstart_env["MDS"] = max_required_mds.__str__() + vstart_env["OSD"] = "4" + vstart_env["MGR"] = max(max_required_mgr, 1).__str__() + + args = [ + os.path.join(SRC_PREFIX, "vstart.sh"), + "-n", + "--nolockdep", + ] + if require_memstore: + args.append("--memstore") + + if opt_verbose: + args.append("-d") + + log.info('\nrunning vstart.sh now...') + # usually, i get vstart.sh running completely in less than 100 + # seconds. + remote.run(args=args, env=vstart_env, timeout=(3 * 60)) + log.info('\nvstart.sh finished running') + + # Wait for OSD to come up so that subsequent injectargs etc will + # definitely succeed + LocalCephCluster(LocalContext()).mon_manager.wait_for_all_osds_up(timeout=30) + + if opt_create_cluster_only: + return + + if opt_use_ns and mon_in_localhost() and not opt_create_cluster: + raise RuntimeError("cluster is on localhost; '--usens' option is incompatible. Or you can pass an extra '--create' option to create a new cluster without localhost!") + + # List of client mounts, sufficient to run the selected tests + clients = [i.__str__() for i in range(0, max_required_clients)] + + test_dir = tempfile.mkdtemp() + teuth_config['test_path'] = test_dir + + ctx = LocalContext() + ceph_cluster = LocalCephCluster(ctx) + mds_cluster = LocalMDSCluster(ctx) + mgr_cluster = LocalMgrCluster(ctx) + + # Construct Mount classes + mounts = [] + for client_id in clients: + # Populate client keyring (it sucks to use client.admin for test clients + # because it's awkward to find the logs later) + client_name = "client.{0}".format(client_id) + + if client_name not in open("./keyring").read(): + p = remote.run(args=[CEPH_CMD, "auth", "get-or-create", client_name, + "osd", "allow rw", + "mds", "allow", + "mon", "allow r"], stdout=StringIO()) + + open("./keyring", "at").write(p.stdout.getvalue()) + + if use_kernel_client: + mount = LocalKernelMount(ctx=ctx, test_dir=test_dir, + client_id=client_id, brxnet=opt_brxnet) + else: + mount = LocalFuseMount(ctx=ctx, test_dir=test_dir, + client_id=client_id, brxnet=opt_brxnet) + + mounts.append(mount) + if os.path.exists(mount.hostfs_mntpt): + if mount.is_mounted(): + log.warning("unmounting {0}".format(mount.hostfs_mntpt)) + mount.umount_wait() + else: + os.rmdir(mount.hostfs_mntpt) + + from tasks.cephfs_test_runner import DecoratingLoader + + decorating_loader = DecoratingLoader({ + "ctx": ctx, + "mounts": mounts, + "ceph_cluster": ceph_cluster, + "mds_cluster": mds_cluster, + "mgr_cluster": mgr_cluster, + }) + + # For the benefit of polling tests like test_full -- in teuthology land we set this + # in a .yaml, here it's just a hardcoded thing for the developer's pleasure. + remote.run(args=[CEPH_CMD, "tell", "osd.*", "injectargs", "--osd-mon-report-interval", "5"]) + ceph_cluster.set_ceph_conf("osd", "osd_mon_report_interval", "5") + + # Enable override of recovery options if mClock scheduler is active. This is to allow + # current and future tests to modify recovery related limits. This is because by default, + # with mclock enabled, a subset of recovery options are not allowed to be modified. + remote.run(args=[CEPH_CMD, "tell", "osd.*", "injectargs", "--osd-mclock-override-recovery-settings", "true"]) + ceph_cluster.set_ceph_conf("osd", "osd_mclock_override_recovery_settings", "true") + + # Vstart defaults to two segments, which very easily gets a "behind on trimming" health warning + # from normal IO latency. Increase it for running teests. + ceph_cluster.set_ceph_conf("mds", "mds log max segments", "10") + + # Make sure the filesystem created in tests has uid/gid that will let us talk to + # it after mounting it (without having to go root). Set in 'global' not just 'mds' + # so that cephfs-data-scan will pick it up too. + ceph_cluster.set_ceph_conf("global", "mds root ino uid", "%s" % os.getuid()) + ceph_cluster.set_ceph_conf("global", "mds root ino gid", "%s" % os.getgid()) + + # Monkeypatch get_package_version to avoid having to work out what kind of distro we're on + def _get_package_version(remote, pkg_name): + # Used in cephfs tests to find fuse version. Your development workstation *does* have >=2.9, right? + return "2.9" + + import teuthology.packaging + teuthology.packaging.get_package_version = _get_package_version + + overall_suite = load_tests(modules, decorating_loader) + + # Filter out tests that don't lend themselves to interactive running, + victims = [] + for case, method in enumerate_methods(overall_suite): + fn = getattr(method, method._testMethodName) + + drop_test = False + + if hasattr(fn, 'is_for_teuthology') and getattr(fn, 'is_for_teuthology') is True: + drop_test = True + log.warning("Dropping test because long running: {method_id}".format(method_id=method.id())) + + if getattr(fn, "needs_trimming", False) is True: + drop_test = (os.getuid() != 0) + log.warning("Dropping test because client trim unavailable: {method_id}".format(method_id=method.id())) + + if drop_test: + # Don't drop the test if it was explicitly requested in arguments + is_named = False + for named in modules: + if named.endswith(method.id()): + is_named = True + break + + if not is_named: + victims.append((case, method)) + + log.debug("Disabling {0} tests because of is_for_teuthology or needs_trimming".format(len(victims))) + for s, method in victims: + s._tests.remove(method) + + overall_suite = load_tests(modules, loader.TestLoader()) + result = launch_tests(overall_suite) + + CephFSMount.cleanup_stale_netnses_and_bridge(remote) + if opt_teardown_cluster: + teardown_cluster() + + if not result.wasSuccessful(): + # no point in duplicating if we can have multiple failures in same + # run. + if opt_exit_on_test_failure: + result.printErrors() # duplicate output at end for convenience + + bad_tests = [] + for test, error in result.errors: + bad_tests.append(str(test)) + for test, failure in result.failures: + bad_tests.append(str(test)) + + sys.exit(-1) + else: + sys.exit(0) + + +if __name__ == "__main__": + exec_test() diff --git a/qa/tasks/watch_notify_same_primary.py b/qa/tasks/watch_notify_same_primary.py new file mode 100644 index 000000000..448fee193 --- /dev/null +++ b/qa/tasks/watch_notify_same_primary.py @@ -0,0 +1,129 @@ + +""" +watch_notify_same_primary task +""" +from io import StringIO +import contextlib +import logging + + +from teuthology.orchestra import run +from teuthology.contextutil import safe_while + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run watch_notify_same_primary + + The config should be as follows: + + watch_notify_same_primary: + clients: [client list] + + The client list should contain 1 client + + The test requires 3 osds. + + example: + + tasks: + - ceph: + - watch_notify_same_primary: + clients: [client.0] + - interactive: + """ + log.info('Beginning watch_notify_same_primary...') + assert isinstance(config, dict), \ + "please list clients to run on" + + clients = config.get('clients', ['client.0']) + assert len(clients) == 1 + role = clients[0] + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + (remote,) = ctx.cluster.only(role).remotes.keys() + manager = ctx.managers['ceph'] + manager.raw_cluster_cmd('osd', 'set', 'noout') + + pool = manager.create_pool_with_unique_name() + def obj(n): return "foo-{num}".format(num=n) + def start_watch(n): + remote.run( + args = [ + "rados", + "-p", pool, + "put", + obj(n), + "/etc/resolv.conf"], + logger=log.getChild('watch.{id}'.format(id=n))) + proc = remote.run( + args = [ + "rados", + "-p", pool, + "watch", + obj(n)], + stdin=run.PIPE, + stdout=StringIO(), + stderr=StringIO(), + wait=False) + return proc + + num = 20 + + watches = [start_watch(i) for i in range(num)] + + # wait for them all to register + for i in range(num): + with safe_while() as proceed: + while proceed(): + lines = remote.sh( + ["rados", "-p", pool, "listwatchers", obj(i)]) + num_watchers = lines.count('watcher=') + log.info('i see %d watchers for %s', num_watchers, obj(i)) + if num_watchers >= 1: + break + + def notify(n, msg): + remote.run( + args = [ + "rados", + "-p", pool, + "notify", + obj(n), + msg], + logger=log.getChild('notify.{id}'.format(id=n))) + + [notify(n, 'notify1') for n in range(len(watches))] + + manager.kill_osd(0) + manager.mark_down_osd(0) + + [notify(n, 'notify2') for n in range(len(watches))] + + try: + yield + finally: + log.info('joining watch_notify_stress') + for watch in watches: + watch.stdin.write("\n") + + run.wait(watches) + + for watch in watches: + lines = watch.stdout.getvalue().split("\n") + got1 = False + got2 = False + for l in lines: + if 'notify1' in l: + got1 = True + if 'notify2' in l: + got2 = True + log.info(lines) + assert got1 and got2 + + manager.revive_osd(0) + manager.remove_pool(pool) diff --git a/qa/tasks/watch_notify_stress.py b/qa/tasks/watch_notify_stress.py new file mode 100644 index 000000000..47747b1ca --- /dev/null +++ b/qa/tasks/watch_notify_stress.py @@ -0,0 +1,69 @@ +""" +test_stress_watch task +""" +import contextlib +import logging + +from teuthology.orchestra import run +from teuthology.task import proc_thrasher + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + Run test_stress_watch + + The config should be as follows: + + test_stress_watch: + clients: [client list] + + example: + + tasks: + - ceph: + - test_stress_watch: + clients: [client.0] + - interactive: + """ + log.info('Beginning test_stress_watch...') + assert isinstance(config, dict), \ + "please list clients to run on" + testwatch = {} + + remotes = [] + + for role in config.get('clients', ['client.0']): + assert isinstance(role, str) + PREFIX = 'client.' + assert role.startswith(PREFIX) + id_ = role[len(PREFIX):] + (remote,) = ctx.cluster.only(role).remotes.keys() + remotes.append(remote) + + args =['CEPH_CLIENT_ID={id_}'.format(id_=id_), + 'CEPH_ARGS="{flags}"'.format(flags=config.get('flags', '')), + 'daemon-helper', + 'kill', + 'multi_stress_watch foo foo' + ] + + log.info("args are %s" % (args,)) + + proc = proc_thrasher.ProcThrasher({}, remote, + args=[run.Raw(i) for i in args], + logger=log.getChild('testwatch.{id}'.format(id=id_)), + stdin=run.PIPE, + wait=False + ) + proc.start() + testwatch[id_] = proc + + try: + yield + finally: + log.info('joining watch_notify_stress') + for i in testwatch.values(): + i.join() diff --git a/qa/tasks/workunit.py b/qa/tasks/workunit.py new file mode 100644 index 000000000..92c5780f9 --- /dev/null +++ b/qa/tasks/workunit.py @@ -0,0 +1,439 @@ +""" +Workunit task -- Run ceph on sets of specific clients +""" +import logging +import pipes +import os +import re +import shlex + +from tasks.util import get_remote_for_role +from tasks.util.workunit import get_refspec_after_overrides + +from teuthology import misc +from teuthology.config import config as teuth_config +from teuthology.exceptions import CommandFailedError +from teuthology.parallel import parallel +from teuthology.orchestra import run + +log = logging.getLogger(__name__) + +def task(ctx, config): + """ + Run ceph on all workunits found under the specified path. + + For example:: + + tasks: + - ceph: + - ceph-fuse: [client.0] + - workunit: + clients: + client.0: [direct_io, xattrs.sh] + client.1: [snaps] + branch: foo + + You can also run a list of workunits on all clients: + tasks: + - ceph: + - ceph-fuse: + - workunit: + tag: v0.47 + clients: + all: [direct_io, xattrs.sh, snaps] + + If you have an "all" section it will run all the workunits + on each client simultaneously, AFTER running any workunits specified + for individual clients. (This prevents unintended simultaneous runs.) + + To customize tests, you can specify environment variables as a dict. You + can also specify a time limit for each work unit (defaults to 3h): + + tasks: + - ceph: + - ceph-fuse: + - workunit: + sha1: 9b28948635b17165d17c1cf83d4a870bd138ddf6 + clients: + all: [snaps] + env: + FOO: bar + BAZ: quux + timeout: 3h + + You can also pass optional arguments to the found workunits: + + tasks: + - workunit: + clients: + all: + - test-ceph-helpers.sh test_get_config + + This task supports roles that include a ceph cluster, e.g.:: + + tasks: + - ceph: + - workunit: + clients: + backup.client.0: [foo] + client.1: [bar] # cluster is implicitly 'ceph' + + You can also specify an alternative top-level dir to 'qa/workunits', like + 'qa/standalone', with:: + + tasks: + - install: + - workunit: + basedir: qa/standalone + clients: + client.0: + - test-ceph-helpers.sh + + :param ctx: Context + :param config: Configuration + """ + assert isinstance(config, dict) + assert isinstance(config.get('clients'), dict), \ + 'configuration must contain a dictionary of clients' + + overrides = ctx.config.get('overrides', {}) + refspec = get_refspec_after_overrides(config, overrides) + timeout = config.get('timeout', '3h') + cleanup = config.get('cleanup', True) + + log.info('Pulling workunits from ref %s', refspec) + + created_mountpoint = {} + + if config.get('env') is not None: + assert isinstance(config['env'], dict), 'env must be a dictionary' + clients = config['clients'] + + # Create scratch dirs for any non-all workunits + log.info('Making a separate scratch dir for every client...') + for role in clients.keys(): + assert isinstance(role, str) + if role == "all": + continue + + assert 'client' in role + created_mnt_dir = _make_scratch_dir(ctx, role, config.get('subdir')) + created_mountpoint[role] = created_mnt_dir + + # Execute any non-all workunits + log.info("timeout={}".format(timeout)) + log.info("cleanup={}".format(cleanup)) + with parallel() as p: + for role, tests in clients.items(): + if role != "all": + p.spawn(_run_tests, ctx, refspec, role, tests, + config.get('env'), + basedir=config.get('basedir','qa/workunits'), + subdir=config.get('subdir'), + timeout=timeout, + cleanup=cleanup, + coverage_and_limits=not config.get('no_coverage_and_limits', None)) + + if cleanup: + # Clean up dirs from any non-all workunits + for role, created in created_mountpoint.items(): + _delete_dir(ctx, role, created) + + # Execute any 'all' workunits + if 'all' in clients: + all_tasks = clients["all"] + _spawn_on_all_clients(ctx, refspec, all_tasks, config.get('env'), + config.get('basedir', 'qa/workunits'), + config.get('subdir'), timeout=timeout, + cleanup=cleanup) + + +def _client_mountpoint(ctx, cluster, id_): + """ + Returns the path to the expected mountpoint for workunits running + on some kind of filesystem. + """ + # for compatibility with tasks like ceph-fuse that aren't cluster-aware yet, + # only include the cluster name in the dir if the cluster is not 'ceph' + if cluster == 'ceph': + dir_ = 'mnt.{0}'.format(id_) + else: + dir_ = 'mnt.{0}.{1}'.format(cluster, id_) + return os.path.join(misc.get_testdir(ctx), dir_) + + +def _delete_dir(ctx, role, created_mountpoint): + """ + Delete file used by this role, and delete the directory that this + role appeared in. + + :param ctx: Context + :param role: "role.#" where # is used for the role id. + """ + cluster, _, id_ = misc.split_role(role) + remote = get_remote_for_role(ctx, role) + mnt = _client_mountpoint(ctx, cluster, id_) + client = os.path.join(mnt, 'client.{id}'.format(id=id_)) + + # Remove the directory inside the mount where the workunit ran + remote.run( + args=[ + 'sudo', + 'rm', + '-rf', + '--', + client, + ], + ) + log.info("Deleted dir {dir}".format(dir=client)) + + # If the mount was an artificially created dir, delete that too + if created_mountpoint: + remote.run( + args=[ + 'rmdir', + '--', + mnt, + ], + ) + log.info("Deleted artificial mount point {dir}".format(dir=client)) + + +def _make_scratch_dir(ctx, role, subdir): + """ + Make scratch directories for this role. This also makes the mount + point if that directory does not exist. + + :param ctx: Context + :param role: "role.#" where # is used for the role id. + :param subdir: use this subdir (False if not used) + """ + created_mountpoint = False + cluster, _, id_ = misc.split_role(role) + remote = get_remote_for_role(ctx, role) + dir_owner = remote.user + mnt = _client_mountpoint(ctx, cluster, id_) + # if neither kclient nor ceph-fuse are required for a workunit, + # mnt may not exist. Stat and create the directory if it doesn't. + try: + remote.run( + args=[ + 'stat', + '--', + mnt, + ], + ) + log.info('Did not need to create dir {dir}'.format(dir=mnt)) + except CommandFailedError: + remote.run( + args=[ + 'mkdir', + '--', + mnt, + ], + ) + log.info('Created dir {dir}'.format(dir=mnt)) + created_mountpoint = True + + if not subdir: + subdir = 'client.{id}'.format(id=id_) + + if created_mountpoint: + remote.run( + args=[ + 'cd', + '--', + mnt, + run.Raw('&&'), + 'mkdir', + '--', + subdir, + ], + ) + else: + remote.run( + args=[ + # cd first so this will fail if the mount point does + # not exist; pure install -d will silently do the + # wrong thing + 'cd', + '--', + mnt, + run.Raw('&&'), + 'sudo', + 'install', + '-d', + '-m', '0755', + '--owner={user}'.format(user=dir_owner), + '--', + subdir, + ], + ) + + return created_mountpoint + + +def _spawn_on_all_clients(ctx, refspec, tests, env, basedir, subdir, timeout=None, cleanup=True): + """ + Make a scratch directory for each client in the cluster, and then for each + test spawn _run_tests() for each role. + + See run_tests() for parameter documentation. + """ + is_client = misc.is_type('client') + client_remotes = {} + created_mountpoint = {} + for remote, roles_for_host in ctx.cluster.remotes.items(): + for role in roles_for_host: + if is_client(role): + client_remotes[role] = remote + created_mountpoint[role] = _make_scratch_dir(ctx, role, subdir) + + for unit in tests: + with parallel() as p: + for role, remote in client_remotes.items(): + p.spawn(_run_tests, ctx, refspec, role, [unit], env, + basedir, + subdir, + timeout=timeout) + + # cleanup the generated client directories + if cleanup: + for role, _ in client_remotes.items(): + _delete_dir(ctx, role, created_mountpoint[role]) + + +def _run_tests(ctx, refspec, role, tests, env, basedir, + subdir=None, timeout=None, cleanup=True, + coverage_and_limits=True): + """ + Run the individual test. Create a scratch directory and then extract the + workunits from git. Make the executables, and then run the tests. + Clean up (remove files created) after the tests are finished. + + :param ctx: Context + :param refspec: branch, sha1, or version tag used to identify this + build + :param tests: specific tests specified. + :param env: environment set in yaml file. Could be None. + :param subdir: subdirectory set in yaml file. Could be None + :param timeout: If present, use the 'timeout' command on the remote host + to limit execution time. Must be specified by a number + followed by 's' for seconds, 'm' for minutes, 'h' for + hours, or 'd' for days. If '0' or anything that evaluates + to False is passed, the 'timeout' command is not used. + """ + testdir = misc.get_testdir(ctx) + assert isinstance(role, str) + cluster, type_, id_ = misc.split_role(role) + assert type_ == 'client' + remote = get_remote_for_role(ctx, role) + mnt = _client_mountpoint(ctx, cluster, id_) + # subdir so we can remove and recreate this a lot without sudo + if subdir is None: + scratch_tmp = os.path.join(mnt, 'client.{id}'.format(id=id_), 'tmp') + else: + scratch_tmp = os.path.join(mnt, subdir) + clonedir = '{tdir}/clone.{role}'.format(tdir=testdir, role=role) + srcdir = '{cdir}/{basedir}'.format(cdir=clonedir, + basedir=basedir) + + git_url = teuth_config.get_ceph_qa_suite_git_url() + # if we are running an upgrade test, and ceph-ci does not have branches like + # `jewel`, so should use ceph.git as an alternative. + try: + remote.run(logger=log.getChild(role), + args=refspec.clone(git_url, clonedir)) + except CommandFailedError: + if git_url.endswith('/ceph-ci.git'): + alt_git_url = git_url.replace('/ceph-ci.git', '/ceph.git') + elif git_url.endswith('/ceph-ci'): + alt_git_url = re.sub(r'/ceph-ci$', '/ceph.git', git_url) + else: + raise + log.info( + "failed to check out '%s' from %s; will also try in %s", + refspec, + git_url, + alt_git_url, + ) + remote.run(logger=log.getChild(role), + args=refspec.clone(alt_git_url, clonedir)) + remote.run( + logger=log.getChild(role), + args=[ + 'cd', '--', srcdir, + run.Raw('&&'), + 'if', 'test', '-e', 'Makefile', run.Raw(';'), 'then', 'make', run.Raw(';'), 'fi', + run.Raw('&&'), + 'find', '-executable', '-type', 'f', '-printf', r'%P\0', + run.Raw('>{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role)), + ], + ) + + workunits_file = '{tdir}/workunits.list.{role}'.format(tdir=testdir, role=role) + workunits = sorted(remote.read_file(workunits_file).decode().split('\0')) + assert workunits + + try: + assert isinstance(tests, list) + for spec in tests: + dir_or_fname, *optional_args = shlex.split(spec) + log.info('Running workunits matching %s on %s...', dir_or_fname, role) + # match executables named "foo" or "foo/*" with workunit named + # "foo" + to_run = [w for w in workunits + if os.path.commonpath([w, dir_or_fname]) == dir_or_fname] + if not to_run: + raise RuntimeError('Spec did not match any workunits: {spec!r}'.format(spec=spec)) + for workunit in to_run: + log.info('Running workunit %s...', workunit) + args = [ + 'mkdir', '-p', '--', scratch_tmp, + run.Raw('&&'), + 'cd', '--', scratch_tmp, + run.Raw('&&'), + run.Raw('CEPH_CLI_TEST_DUP_COMMAND=1'), + run.Raw('CEPH_REF={ref}'.format(ref=refspec)), + run.Raw('TESTDIR="{tdir}"'.format(tdir=testdir)), + run.Raw('CEPH_ARGS="--cluster {0}"'.format(cluster)), + run.Raw('CEPH_ID="{id}"'.format(id=id_)), + run.Raw('PATH=$PATH:/usr/sbin'), + run.Raw('CEPH_BASE={dir}'.format(dir=clonedir)), + run.Raw('CEPH_ROOT={dir}'.format(dir=clonedir)), + run.Raw('CEPH_MNT={dir}'.format(dir=mnt)), + ] + if env is not None: + for var, val in env.items(): + quoted_val = pipes.quote(val) + env_arg = '{var}={val}'.format(var=var, val=quoted_val) + args.append(run.Raw(env_arg)) + if coverage_and_limits: + args.extend([ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir)]) + if timeout and timeout != '0': + args.extend(['timeout', timeout]) + args.extend([ + '{srcdir}/{workunit}'.format( + srcdir=srcdir, + workunit=workunit, + ), + ]) + remote.run( + logger=log.getChild(role), + args=args + optional_args, + label="workunit test {workunit}".format(workunit=workunit) + ) + if cleanup: + args=['sudo', 'rm', '-rf', '--', scratch_tmp] + remote.run(logger=log.getChild(role), args=args, timeout=(60*60)) + finally: + log.info('Stopping %s on %s...', tests, role) + args=['sudo', 'rm', '-rf', '--', workunits_file, clonedir] + # N.B. don't cleanup scratch_tmp! If the mount is broken then rm will hang. + remote.run( + logger=log.getChild(role), + args=args, + ) diff --git a/qa/test_import.py b/qa/test_import.py new file mode 100644 index 000000000..4ee59b565 --- /dev/null +++ b/qa/test_import.py @@ -0,0 +1,38 @@ +# try to import all .py files from a given directory + +import glob +import os +import importlib +import importlib.util +import pytest + +def _module_name(path): + task = os.path.splitext(path)[0] + parts = task.split(os.path.sep) + package = parts[0] + name = ''.join('.' + c for c in parts[1:]) + return package, name + +def _import_file(path): + package, mod_name = _module_name(path) + line = f'Importing {package}{mod_name} from {path}' + print(f'{line:<80}', end='') + mod_spec = importlib.util.find_spec(mod_name, package) + mod = mod_spec.loader.load_module(f'{package}{mod_name}') + if mod is None: + result = 'FAIL' + else: + result = 'DONE' + print(f'{result:>6}') + mod_spec.loader.exec_module(mod) + return result + +def get_paths(): + for g in ['tasks/**/*.py']: + for p in glob.glob(g, recursive=True): + yield p + +@pytest.mark.parametrize("path", list(sorted(get_paths()))) +def test_import(path): + assert _import_file(path) == 'DONE' + diff --git a/qa/timezone/eastern.yaml b/qa/timezone/eastern.yaml new file mode 100644 index 000000000..019c761e1 --- /dev/null +++ b/qa/timezone/eastern.yaml @@ -0,0 +1,4 @@ +tasks: +- exec: + all: + - echo America/New_York | sudo tee /etc/timezone diff --git a/qa/timezone/pacific.yaml b/qa/timezone/pacific.yaml new file mode 100644 index 000000000..6944aa6d8 --- /dev/null +++ b/qa/timezone/pacific.yaml @@ -0,0 +1,4 @@ +tasks: +- exec: + all: + - echo America/Los_Angeles | sudo tee /etc/timezone diff --git a/qa/timezone/random.yaml b/qa/timezone/random.yaml new file mode 100644 index 000000000..1d48ce918 --- /dev/null +++ b/qa/timezone/random.yaml @@ -0,0 +1,5 @@ +tasks: +- exec: + all: + - echo America/Los_Angeles | sudo tee /etc/timezone + - [ $RANDOM -gt 32000 ] && echo America/New_York | sudo tee /etc/timezone diff --git a/qa/tox.ini b/qa/tox.ini new file mode 100644 index 000000000..5b2149c49 --- /dev/null +++ b/qa/tox.ini @@ -0,0 +1,46 @@ +[tox] +envlist = flake8, mypy, pytest, deadsymlinks +skipsdist = True + +[testenv] +setenv = + LC_ALL = C.UTF-8 + LANG = C + +[testenv:flake8] +basepython = python3 +deps= + flake8 +commands=flake8 --select=F,E9 --exclude=venv,.tox + +[testenv:mypy] +basepython = python3 +deps = + mypy + types-boto + types-requests + types-jwt + types-paramiko + types-PyYAML + types-cryptography + types-python-dateutil + -c{toxinidir}/../src/mypy-constrains.txt +commands = mypy {posargs:.} + +[testenv:py3] +basepython = python3 +deps = + {env:TEUTHOLOGY_GIT:git+https://github.com/ceph/teuthology.git@main}#egg=teuthology[coverage,orchestra,test] + httplib2 + pytest +commands = + pytest --assert=plain test_import.py + pytest tasks/tests --suite-dir {toxinidir}/suites {posargs} + +[testenv:deadsymlinks] +basepython = python3 +toxworkdir = {toxinidir} +allowlist_externals = + bash +commands = + bash -c '! (find . -xtype l | grep ^)' diff --git a/qa/valgrind.supp b/qa/valgrind.supp new file mode 100644 index 000000000..6a5a08f14 --- /dev/null +++ b/qa/valgrind.supp @@ -0,0 +1,703 @@ + +{ + <allthefrees, so we can behave with tcmalloc> + Memcheck:Free + fun:free + ... +} +{ + operator delete[] in Rados::shutdown + Memcheck:Free + fun:_ZdaPvm + ... + fun:_ZN8librados7v14_2_05Rados8shutdownEv +} +{ + older boost mersenne twister uses uninitialized memory for randomness + Memcheck:Cond + ... + fun:*Monitor::prepare_new_fingerprint* + ... +} +{ + older boost mersenne twister uses uninitialized memory for randomness + Memcheck:Value8 + ... + fun:*Monitor::prepare_new_fingerprint* + ... +} +{ + apparent TLS leak in eglibc + Memcheck:Leak + fun:calloc + ... + fun:_dl_allocate_tls + fun:pthread_create* + ... +} +{ + osd: ignore ec plugin loading (FIXME SOMEDAY) + Memcheck:Leak + ... + fun:*ErasureCodePluginRegistry*load* + ... +} +{ + osd: ignore ec plugin factory (FIXME SOMEDAY) + Memcheck:Leak + ... + fun:*ErasureCodePluginRegistry*factory* + ... +} +{ + tcmalloc: libboost_thread-mt.so.1.53 is linked with tcmalloc + Memcheck:Param + msync(start) + obj:/usr/lib64/libpthread-2.17.so + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + ... + fun:*tcmalloc*ThreadCache* + ... + obj:/usr/lib64/libboost_thread-mt.so.1.53.0 +} +{ + tcmalloc: msync heap allocation points to uninit bytes (centos 6.5) + Memcheck:Param + msync(start) + obj:/lib64/libpthread-2.12.so + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to unaddressible bytes (centos 6.5 #2) + Memcheck:Param + msync(start) + obj:/lib64/libpthread-2.12.so + obj:/usr/lib64/libunwind.so.7.0.0 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to uninit bytes (rhel7) + Memcheck:Param + msync(start) + obj:/usr/lib64/libpthread-2.17.so + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to uninit bytes (rhel7 #2) + Memcheck:Param + msync(start) + obj:/usr/lib64/libpthread-2.17.so + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + obj:/usr/lib64/libunwind.so.8.0.1 + fun:_ULx86_64_step + obj:/usr/lib64/libtcmalloc.so.4.2.6 + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to uninit bytes (wheezy) + Memcheck:Param + msync(start) + obj:/lib/x86_64-linux-gnu/libpthread-2.13.so + obj:/usr/lib/libunwind.so.7.0.0 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to uninit bytes (precise) + Memcheck:Param + msync(start) + obj:/lib/x86_64-linux-gnu/libpthread-2.15.so + obj:/usr/lib/libunwind.so.7.0.0 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm + obj:/usr/lib/libtcmalloc.so.0.1.0 +} +{ + tcmalloc: msync heap allocation points to uninit bytes (trusty) + Memcheck:Param + msync(start) + obj:/lib/x86_64-linux-gnu/libpthread-2.19.so + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm +} +{ + tcmalloc: msync heap allocation points to uninit bytes 2 (trusty) + Memcheck:Param + msync(start) + fun:__msync_nocancel + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + fun:_ULx86_64_step + fun:_Z13GetStackTracePPvii + fun:_ZN8tcmalloc8PageHeap8GrowHeapEm + fun:_ZN8tcmalloc8PageHeap3NewEm + fun:_ZN8tcmalloc15CentralFreeList8PopulateEv + fun:_ZN8tcmalloc15CentralFreeList18FetchFromSpansSafeEv + fun:_ZN8tcmalloc15CentralFreeList11RemoveRangeEPPvS2_i +} +{ + tcmalloc: msync (xenial) + Memcheck:Param + msync(start) + fun:__msync_nocancel + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:/usr/lib/x86_64-linux-gnu/libunwind.so.8.0.1 + obj:*tcmalloc* + fun:*GetStackTrace* +} +{ + tcmalloc: param points to uninit bytes under call_init (jammy) + Memcheck:Param + write(buf) + fun:syscall + obj:*libunwind* + obj:*libunwind* + obj:*libunwind* + obj:*libunwind* + fun:_ULx86_64_step + obj:*tcmalloc* + obj:*tcmalloc* + obj:*tcmalloc* + obj:*tcmalloc* + fun:call_init.part.0 +} +{ + tcmalloc: string + Memcheck:Leak + ... + obj:*tcmalloc* + fun:call_init* + ... +} +{ + ceph global: deliberate onexit leak + Memcheck:Leak + ... + fun:*set_flush_on_exit* + ... +} +{ + libleveldb: ignore all static leveldb leaks + Memcheck:Leak + ... + fun:*leveldb* + ... +} +{ + libleveldb: ignore all dynamic libleveldb leaks + Memcheck:Leak + ... + obj:*libleveldb.so* + ... +} +{ + libcurl: ignore libcurl leaks + Memcheck:Leak + ... + fun:*curl_global_init +} +{ + ignore gnutls leaks + Memcheck:Leak + ... + fun:gnutls_global_init +} +{ + ignore libfcgi leak; OS_LibShutdown has no callers! + Memcheck:Leak + ... + fun:OS_LibInit + fun:FCGX_Init +} +{ + ignore libnss3 leaks + Memcheck:Leak + ... + obj:*libnss3* + ... +} +{ + strptime suckage + Memcheck:Cond + fun:__GI___strncasecmp_l + fun:__strptime_internal + ... +} +{ + strptime suckage 2 + Memcheck:Value8 + fun:__GI___strncasecmp_l + fun:__strptime_internal + ... +} +{ + strptime suckage 3 + Memcheck:Addr8 + fun:__GI___strncasecmp_l + fun:__strptime_internal + ... +} +{ + inet_ntop does something lame on local stack + Memcheck:Value8 + ... + fun:inet_ntop + ... +} +{ + inet_ntop does something lame on local stack + Memcheck:Addr8 + ... + fun:inet_ntop + ... +} +{ + dl-lookup.c thing .. Invalid write of size 8 + Memcheck:Value8 + fun:do_lookup_x + ... + fun:_dl_lookup_symbol_x + ... +} +{ + dl-lookup.c thing .. Invalid write of size 8 + Memcheck:Addr8 + fun:do_lookup_x + ... + fun:_dl_lookup_symbol_x + ... +} +{ + dl-init.c possible lost init + Memcheck:Leak + ... + fun:__trans_list_add + fun:call_init.part.0 + fun:call_init + ... +} +{ + weird thing from libc + Memcheck:Leak + ... + fun:*sub_I_comparator* + fun:__libc_csu_init + ... +} +{ + libfuse leak + Memcheck:Leak + ... + fun:fuse_parse_cmdline + ... +} +{ + boost thread leaks on exit + Memcheck:Leak + ... + fun:*boost*detail* + ... + fun:exit +} +{ + lttng appears to not clean up state + Memcheck:Leak + ... + fun:lttng_ust_baddr_statedump_init + fun:lttng_ust_init + fun:call_init.part.0 + ... +} +{ + fun:PK11_CreateContextBySymKey race + Helgrind:Race + obj:/usr/*lib*/libfreebl*3.so + ... + obj:/usr/*lib*/libsoftokn3.so + ... + obj:/usr/*lib*/libnss3.so + fun:PK11_CreateContextBySymKey + ... +} +{ + thread init race + Helgrind:Race + fun:mempcpy + fun:_dl_allocate_tls_init + ... + fun:pthread_create@* + ... +} +{ + thread_local memory is falsely detected (https://svn.boost.org/trac/boost/ticket/3296) + Memcheck:Leak + ... + fun:*boost*detail*get_once_per_thread_epoch* + fun:*boost*call_once* + fun:*boost*detail*get_current_thread_data* + ... +} +{ + rocksdb thread local singletons + Memcheck:Leak + ... + fun:rocksdb::Env::Default() + ... +} +{ + rocksdb column thread local leaks + Memcheck:Leak + ... + fun:rocksdb::ThreadLocalPtr::StaticMeta::SetHandler* + fun:rocksdb::ColumnFamilyData::ColumnFamilyData* + ... +} +{ + rocksdb thread crap + Memcheck:Leak + ... + fun:*ThreadLocalPtr* + ... +} +{ + rocksdb singleton Env leak, blech + Memcheck:Leak + ... + fun:CreateThreadStatusUpdater + fun:PosixEnv + ... +} +{ + rocksdb::Env::Default() + Memcheck:Leak + ... + fun:*rocksdb*Env*Default* + ... +} +{ + rocksdb config parsing + Memcheck:Leak + ... + fun:*rocksdb*Configurable*ParseOption* + ... +} +{ + rocksdb config parsing + Memcheck:Leak + ... + fun:*RocksDBStore*ParseOptionsFromString* + ... +} +{ + rocksdb config parsing + Memcheck:Leak + ... + fun:*RocksDBStore*do_open* + ... +} +{ + rocksdb column family init + Memcheck:Leak + ... + fun:*rocksdb*ColumnFamilyOptions* + ... + fun:*RocksDBStore*init* + ... +} +{ + rocksdb cache get-or-create + Memcheck:Leak + ... + fun:*rocksdb*RegisterCacheDeleterRole* + ... + fun:*rocksdb*GetCacheItemHelperForRole* + ... +} +{ + rocksdb BGThreadWrapper + Memcheck:Leak + ... + fun:*BGThreadWrapper* + ... +} +{ + rocksdb VersionStorageInfo + Memcheck:Leak + ... + fun:*VersionStorageInfo + ... +} +{ + rocksdb version builder + Memcheck:Leak + ... + fun:*rocksdb*VersionBuilder*Rep*LoadTableHandlers* + ... +} +{ + libstdc++ leak on xenial + Memcheck:Leak + fun:malloc + ... + fun:call_init.part.0 + fun:call_init + fun:_dl_init + ... +} +{ + strange leak of std::string memory from md_config_t seen in radosgw + Memcheck:Leak + ... + fun:_ZNSs4_Rep9_S_createEmmRKSaIcE + fun:_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag + ... + fun:_ZN11md_config_tC1Ev + fun:_ZN11CephContextC1Eji + ... +} +{ + python does not reset the member field when dealloc an object + Memcheck:Leak + match-leak-kinds: all + ... + fun:Py_InitializeEx + ... +} +{ + statically allocated python types don't get members freed + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyType_Ready + ... +} +{ + manually constructed python module members don't get freed + Memcheck:Leak + match-leak-kinds: all + ... + fun:Py_InitModule4_64 + ... +} +{ + manually constructed python module members don't get freed + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyModule_AddObject + ... +} +{ + python subinterpreters may not clean up properly + Memcheck:Leak + match-leak-kinds: all + ... + fun:Py_NewInterpreter + ... +} +{ + python should be able to take care of itself + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyEval_EvalCode +} +{ + python should be able to take care of itself + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyImport_ImportModuleLevel +} +{ + python-owned threads may not full clean up after themselves + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyEval_CallObjectWithKeywords +} +{ + python should be able to take care of itself + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyEval_EvalFrameEx + ... + obj:/usr/lib64/libpython2.7.so.1.0 +} +{ + python should be able to take care of itself + Memcheck:Leak + match-leak-kinds: all + ... + fun:PyObject_Call +} + +{ + rados cython constants + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:PyObject_Malloc + fun:PyCode_New + fun:__Pyx_InitCachedConstants + fun:initrados + fun:_PyImport_LoadDynamicModule + ... + fun:PyImport_ImportModuleLevel + ... + fun:PyObject_Call + fun:PyEval_CallObjectWithKeywords + fun:PyEval_EvalFrameEx +} + +{ + rbd cython constants + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:PyObject_Malloc + fun:PyCode_New + fun:__Pyx_InitCachedConstants + fun:initrbd + fun:_PyImport_LoadDynamicModule + ... + fun:PyImport_ImportModuleLevel + ... + fun:PyObject_Call + fun:PyEval_CallObjectWithKeywords + fun:PyEval_EvalFrameEx +} + +{ + dlopen() with -lceph-common https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=700899 + Memcheck:Leak + match-leak-kinds: reachable + fun:*alloc + ... + fun:_dlerror_run + fun:dlopen@@GLIBC_2.2.5 +} + +{ + ethdev_init_log thing + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:ethdev_init_log + ... +} + +{ + rte_log_init() in DPDK fails to reset strdup()'ed string at exit + Memcheck:Leak + match-leak-kinds: reachable + fun:*alloc + ... + fun:rte_log_init + fun:__libc_csu_init +} + +{ + libc_csu_init (strdup, rte_log_register, etc.) + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:__libc_csu_init + ... +} + +{ + Boost.Thread fails to call tls_destructor() when the thread exists + Memcheck:Leak + match-leak-kinds: reachable + ... + fun:_Znwm + ... + fun:*boost*detail*set_tss_data* + ... +} + +{ + ignore *all* ceph-mgr python crap. this is overkill, but better than nothing + Memcheck:Leak + match-leak-kinds: all + ... + fun:Py* + ... +} + +{ + something in glibc + Memcheck:Leak + match-leak-kinds: all + ... + fun:strdup + fun:__trans_list_add + ... + fun:_dl_init + ... +} + +# "Conditional jump or move depends on uninitialised value(s)" in OpenSSL +# https://github.com/openssl/openssl/issues/19719 +{ + uninitialized value in gcm_cipher_internal + Memcheck:Cond + ... + fun:gcm_cipher_internal + ... + fun:ossl_gcm_stream_final + fun:EVP_DecryptFinal_ex + ... +} +{ + tracker #62141 : valgrind: UninitCondition under __run_exit_handlers + Memcheck:Cond + fun:free + fun:free_res + fun:__libc_freeres + fun:_vgnU_freeres + fun:__run_exit_handlers + fun:exit + fun:(below main) +} + diff --git a/qa/workunits/Makefile b/qa/workunits/Makefile new file mode 100644 index 000000000..f75f5dfd4 --- /dev/null +++ b/qa/workunits/Makefile @@ -0,0 +1,4 @@ +DIRS = direct_io fs + +all: + for d in $(DIRS) ; do ( cd $$d ; $(MAKE) all ) ; done diff --git a/qa/workunits/caps/mon_commands.sh b/qa/workunits/caps/mon_commands.sh new file mode 100755 index 000000000..5b5bce62e --- /dev/null +++ b/qa/workunits/caps/mon_commands.sh @@ -0,0 +1,25 @@ +#!/bin/sh -ex + +ceph-authtool --create-keyring k --gen-key -p --name client.xx +ceph auth add -i k client.xx mon "allow command foo; allow command bar *; allow command baz ...; allow command foo add * mon allow\\ rwx osd allow\\ *" + +( ceph -k k -n client.xx foo || true ) | grep 'unrecog' +( ceph -k k -n client.xx foo ooo || true ) | grep 'Access denied' +( ceph -k k -n client.xx fo || true ) | grep 'Access denied' +( ceph -k k -n client.xx fooo || true ) | grep 'Access denied' + +( ceph -k k -n client.xx bar || true ) | grep 'Access denied' +( ceph -k k -n client.xx bar a || true ) | grep 'unrecog' +( ceph -k k -n client.xx bar a b c || true ) | grep 'Access denied' +( ceph -k k -n client.xx ba || true ) | grep 'Access denied' +( ceph -k k -n client.xx barr || true ) | grep 'Access denied' + +( ceph -k k -n client.xx baz || true ) | grep -v 'Access denied' +( ceph -k k -n client.xx baz a || true ) | grep -v 'Access denied' +( ceph -k k -n client.xx baz a b || true ) | grep -v 'Access denied' + +( ceph -k k -n client.xx foo add osd.1 -i k mon 'allow rwx' osd 'allow *' || true ) | grep 'unrecog' +( ceph -k k -n client.xx foo add osd a b c -i k mon 'allow rwx' osd 'allow *' || true ) | grep 'Access denied' +( ceph -k k -n client.xx foo add osd a b c -i k mon 'allow *' || true ) | grep 'Access denied' + +echo OK
\ No newline at end of file diff --git a/qa/workunits/ceph-helpers-root.sh b/qa/workunits/ceph-helpers-root.sh new file mode 100755 index 000000000..5b5d2b409 --- /dev/null +++ b/qa/workunits/ceph-helpers-root.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +####################################################################### + +function distro_id() { + source /etc/os-release + echo $ID +} + +function distro_version() { + source /etc/os-release + echo $VERSION +} + +function install() { + if [ $(distro_id) = "ubuntu" ]; then + sudo apt-get purge -y gcc + sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test + fi + for package in "$@" ; do + install_one $package + done + if [ $(distro_id) = "ubuntu" ]; then + sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 11 + sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 11 + sudo update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 11 + sudo update-alternatives --set cc /usr/bin/gcc + sudo update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 11 + sudo update-alternatives --set c++ /usr/bin/g++ + fi +} + +function install_one() { + case $(distro_id) in + ubuntu|debian|devuan|softiron) + sudo env DEBIAN_FRONTEND=noninteractive apt-get install -y "$@" + ;; + centos|fedora|rhel) + sudo yum install -y "$@" + ;; + opensuse*|suse|sles) + sudo zypper --non-interactive install "$@" + ;; + *) + echo "$(distro_id) is unknown, $@ will have to be installed manually." + ;; + esac +} + +function install_pkg_on_ubuntu { + local project=$1 + shift + local sha1=$1 + shift + local codename=$1 + shift + local force=$1 + shift + local pkgs=$@ + local missing_pkgs + if [ $force = "force" ]; then + missing_pkgs="$@" + else + for pkg in $pkgs; do + if ! dpkg -s $pkg &> /dev/null; then + missing_pkgs+=" $pkg" + fi + done + fi + if test -n "$missing_pkgs"; then + local shaman_url="https://shaman.ceph.com/api/repos/${project}/master/${sha1}/ubuntu/${codename}/repo" + sudo curl --silent --location $shaman_url --output /etc/apt/sources.list.d/$project.list + sudo env DEBIAN_FRONTEND=noninteractive apt-get update -y -o Acquire::Languages=none -o Acquire::Translation=none || true + sudo env DEBIAN_FRONTEND=noninteractive apt-get install --allow-unauthenticated -y $missing_pkgs + fi +} + +####################################################################### + +function control_osd() { + local action=$1 + local id=$2 + + sudo systemctl $action ceph-osd@$id + + return 0 +} + +####################################################################### + +function pool_read_write() { + local size=${1:-1} + local dir=/tmp + local timeout=360 + local test_pool=test_pool + + ceph osd pool delete $test_pool $test_pool --yes-i-really-really-mean-it || return 1 + ceph osd pool create $test_pool 4 || return 1 + ceph osd pool set $test_pool size $size --yes-i-really-mean-it || return 1 + ceph osd pool set $test_pool min_size $size || return 1 + ceph osd pool application enable $test_pool rados + + echo FOO > $dir/BAR + timeout $timeout rados --pool $test_pool put BAR $dir/BAR || return 1 + timeout $timeout rados --pool $test_pool get BAR $dir/BAR.copy || return 1 + diff $dir/BAR $dir/BAR.copy || return 1 + ceph osd pool delete $test_pool $test_pool --yes-i-really-really-mean-it || return 1 +} + +####################################################################### + +set -x + +"$@" diff --git a/qa/workunits/ceph-tests/ceph-admin-commands.sh b/qa/workunits/ceph-tests/ceph-admin-commands.sh new file mode 100755 index 000000000..4a9f0a66f --- /dev/null +++ b/qa/workunits/ceph-tests/ceph-admin-commands.sh @@ -0,0 +1,10 @@ +#!/bin/sh -ex + +ceph -s +rados lspools +rbd ls +# check that the monitors work +ceph osd set nodown +ceph osd unset nodown + +exit 0 diff --git a/qa/workunits/cephadm/create_iscsi_disks.sh b/qa/workunits/cephadm/create_iscsi_disks.sh new file mode 100755 index 000000000..45319e3a1 --- /dev/null +++ b/qa/workunits/cephadm/create_iscsi_disks.sh @@ -0,0 +1,36 @@ +#!/bin/bash -ex +# Create some file-backed iSCSI targets and attach them locally. + +# Exit if it's not CentOS +if ! grep -q rhel /etc/*-release; then + echo "The script only supports CentOS." + exit 1 +fi + +[ -z "$SUDO" ] && SUDO=sudo + +# 15 GB +DISK_FILE_SIZE="16106127360" + +$SUDO yum install -y targetcli iscsi-initiator-utils + +TARGET_NAME="iqn.2003-01.org.linux-iscsi.$(hostname).x8664:sn.foobar" +$SUDO targetcli /iscsi create ${TARGET_NAME} +$SUDO targetcli /iscsi/${TARGET_NAME}/tpg1/portals delete 0.0.0.0 3260 +$SUDO targetcli /iscsi/${TARGET_NAME}/tpg1/portals create 127.0.0.1 3260 +$SUDO targetcli /iscsi/${TARGET_NAME}/tpg1 set attribute generate_node_acls=1 +$SUDO targetcli /iscsi/${TARGET_NAME}/tpg1 set attribute demo_mode_write_protect=0 + +for i in $(seq 3); do + # Create truncated files, and add them as luns + DISK_FILE="/tmp/disk${i}" + $SUDO truncate --size ${DISK_FILE_SIZE} ${DISK_FILE} + + $SUDO targetcli /backstores/fileio create "lun${i}" ${DISK_FILE} + # Workaround for https://tracker.ceph.com/issues/47758 + $SUDO targetcli "/backstores/fileio/lun${i}" set attribute optimal_sectors=0 + $SUDO targetcli /iscsi/${TARGET_NAME}/tpg1/luns create "/backstores/fileio/lun${i}" +done + +$SUDO iscsiadm -m discovery -t sendtargets -p 127.0.0.1 +$SUDO iscsiadm -m node -p 127.0.0.1 -T ${TARGET_NAME} -l diff --git a/qa/workunits/cephadm/test_adoption.sh b/qa/workunits/cephadm/test_adoption.sh new file mode 100755 index 000000000..68580eb62 --- /dev/null +++ b/qa/workunits/cephadm/test_adoption.sh @@ -0,0 +1,60 @@ +#!/bin/bash -ex + +SCRIPT_NAME=$(basename ${BASH_SOURCE[0]}) +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm +CORPUS_COMMIT=9cd9ad020d93b0b420924fec55da307aff8bd422 + +[ -z "$SUDO" ] && SUDO=sudo + +[ -d "$TMPDIR" ] || TMPDIR=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX) +trap "$SUDO rm -rf $TMPDIR" EXIT + +if [ -z "$CEPHADM" ]; then + CEPHADM=`mktemp -p $TMPDIR tmp.cephadm.XXXXXX` + ${CEPHADM_SRC_DIR}/build.sh "$CEPHADM" +fi + +# at this point, we need $CEPHADM set +if ! [ -x "$CEPHADM" ]; then + echo "cephadm not found. Please set \$CEPHADM" + exit 1 +fi + +# combine into a single var +CEPHADM_BIN="$CEPHADM" +CEPHADM="$SUDO $CEPHADM_BIN" + +## adopt +CORPUS_GIT_SUBMOD="cephadm-adoption-corpus" +GIT_CLONE_DIR=${TMPDIR}/${CORPUS_GIT_SUBMOD} +git clone https://github.com/ceph/$CORPUS_GIT_SUBMOD $GIT_CLONE_DIR + +git -C $GIT_CLONE_DIR checkout $CORPUS_COMMIT +CORPUS_DIR=${GIT_CLONE_DIR}/archive + +for subdir in `ls ${CORPUS_DIR}`; do + for tarfile in `ls ${CORPUS_DIR}/${subdir} | grep .tgz`; do + tarball=${CORPUS_DIR}/${subdir}/${tarfile} + FSID_LEGACY=`echo "$tarfile" | cut -c 1-36` + TMP_TAR_DIR=`mktemp -d -p $TMPDIR` + $SUDO tar xzvf $tarball -C $TMP_TAR_DIR + NAMES=$($CEPHADM ls --legacy-dir $TMP_TAR_DIR | jq -r '.[].name') + for name in $NAMES; do + $CEPHADM adopt \ + --style legacy \ + --legacy-dir $TMP_TAR_DIR \ + --name $name + # validate after adopt + out=$($CEPHADM ls | jq '.[]' \ + | jq 'select(.name == "'$name'")') + echo $out | jq -r '.style' | grep 'cephadm' + echo $out | jq -r '.fsid' | grep $FSID_LEGACY + done + # clean-up before next iter + $CEPHADM rm-cluster --fsid $FSID_LEGACY --force + $SUDO rm -rf $TMP_TAR_DIR + done +done + +echo "OK" diff --git a/qa/workunits/cephadm/test_cephadm.sh b/qa/workunits/cephadm/test_cephadm.sh new file mode 100755 index 000000000..7d06a3326 --- /dev/null +++ b/qa/workunits/cephadm/test_cephadm.sh @@ -0,0 +1,474 @@ +#!/bin/bash -ex + +SCRIPT_NAME=$(basename ${BASH_SOURCE[0]}) +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# cleanup during exit +[ -z "$CLEANUP" ] && CLEANUP=true + +FSID='00000000-0000-0000-0000-0000deadbeef' + +# images that are used +IMAGE_MAIN=${IMAGE_MAIN:-'quay.ceph.io/ceph-ci/ceph:main'} +IMAGE_PACIFIC=${IMAGE_PACIFIC:-'quay.ceph.io/ceph-ci/ceph:pacific'} +#IMAGE_OCTOPUS=${IMAGE_OCTOPUS:-'quay.ceph.io/ceph-ci/ceph:octopus'} +IMAGE_DEFAULT=${IMAGE_MAIN} + +OSD_IMAGE_NAME="${SCRIPT_NAME%.*}_osd.img" +OSD_IMAGE_SIZE='6G' +OSD_TO_CREATE=2 +OSD_VG_NAME=${SCRIPT_NAME%.*} +OSD_LV_NAME=${SCRIPT_NAME%.*} + +# TMPDIR for test data +[ -d "$TMPDIR" ] || TMPDIR=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX) +[ -d "$TMPDIR_TEST_MULTIPLE_MOUNTS" ] || TMPDIR_TEST_MULTIPLE_MOUNTS=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX) + +CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm +CEPHADM_SAMPLES_DIR=${CEPHADM_SRC_DIR}/samples + +[ -z "$SUDO" ] && SUDO=sudo + +# If cephadm is already installed on the system, use that one, avoid building +# # one if we can. +if [ -z "$CEPHADM" ] && command -v cephadm >/dev/null ; then + CEPHADM="$(command -v cephadm)" +fi + +if [ -z "$CEPHADM" ]; then + CEPHADM=`mktemp -p $TMPDIR tmp.cephadm.XXXXXX` + ${CEPHADM_SRC_DIR}/build.sh "$CEPHADM" + NO_BUILD_INFO=1 +fi + +# at this point, we need $CEPHADM set +if ! [ -x "$CEPHADM" ]; then + echo "cephadm not found. Please set \$CEPHADM" + exit 1 +fi + +# add image to args +CEPHADM_ARGS="$CEPHADM_ARGS --image $IMAGE_DEFAULT" + +# combine into a single var +CEPHADM_BIN="$CEPHADM" +CEPHADM="$SUDO $CEPHADM_BIN $CEPHADM_ARGS" + +# clean up previous run(s)? +$CEPHADM rm-cluster --fsid $FSID --force +$SUDO vgchange -an $OSD_VG_NAME || true +loopdev=$($SUDO losetup -a | grep $(basename $OSD_IMAGE_NAME) | awk -F : '{print $1}') +if ! [ "$loopdev" = "" ]; then + $SUDO losetup -d $loopdev +fi + +function cleanup() +{ + if [ $CLEANUP = false ]; then + # preserve the TMPDIR state + echo "========================" + echo "!!! CLEANUP=$CLEANUP !!!" + echo + echo "TMPDIR=$TMPDIR" + echo "========================" + return + fi + + dump_all_logs $FSID + rm -rf $TMPDIR +} +trap cleanup EXIT + +function expect_false() +{ + set -x + if eval "$@"; then return 1; else return 0; fi +} + +# expect_return_code $expected_code $command ... +function expect_return_code() +{ + set -x + local expected_code="$1" + shift + local command="$@" + + set +e + eval "$command" + local return_code="$?" + set -e + + if [ ! "$return_code" -eq "$expected_code" ]; then return 1; else return 0; fi +} + +function is_available() +{ + local name="$1" + local condition="$2" + local tries="$3" + + local num=0 + while ! eval "$condition"; do + num=$(($num + 1)) + if [ "$num" -ge $tries ]; then + echo "$name is not available" + false + fi + sleep 5 + done + + echo "$name is available" + true +} + +function dump_log() +{ + local fsid="$1" + local name="$2" + local num_lines="$3" + + if [ -z $num_lines ]; then + num_lines=100 + fi + + echo '-------------------------' + echo 'dump daemon log:' $name + echo '-------------------------' + + $CEPHADM logs --fsid $fsid --name $name -- --no-pager -n $num_lines +} + +function dump_all_logs() +{ + local fsid="$1" + local names=$($CEPHADM ls | jq -r '.[] | select(.fsid == "'$fsid'").name') + + echo 'dumping logs for daemons: ' $names + for name in $names; do + dump_log $fsid $name + done +} + +function nfs_stop() +{ + # stop the running nfs server + local units="nfs-server nfs-kernel-server" + for unit in $units; do + if systemctl --no-pager status $unit > /dev/null; then + $SUDO systemctl stop $unit + fi + done + + # ensure the NFS port is no longer in use + expect_false "$SUDO ss -tlnp '( sport = :nfs )' | grep LISTEN" +} + +## prepare + check host +$SUDO $CEPHADM check-host + +## run a gather-facts (output to stdout) +$SUDO $CEPHADM gather-facts + +## NOTE: cephadm version is, as of around May 2023, no longer basing the +## output for `cephadm version` on the version of the containers. The version +## reported is that of the "binary" and is determined during the ceph build. +## `cephadm version` should NOT require sudo/root. +$CEPHADM_BIN version +$CEPHADM_BIN version | grep 'cephadm version' +# Typically cmake should be running the cephadm build script with CLI arguments +# that embed version info into the "binary". If not using a cephadm build via +# cmake you can set `NO_BUILD_INFO` to skip this check. +if [ -z "$NO_BUILD_INFO" ]; then + $CEPHADM_BIN version | grep -v 'UNSET' + $CEPHADM_BIN version | grep -v 'UNKNOWN' +fi + + +## test shell before bootstrap, when crash dir isn't (yet) present on this host +$CEPHADM shell --fsid $FSID -- ceph -v | grep 'ceph version' +$CEPHADM shell --fsid $FSID -e FOO=BAR -- printenv | grep FOO=BAR + +# test stdin +echo foo | $CEPHADM shell -- cat | grep -q foo + +# the shell commands a bit above this seems to cause the +# /var/lib/ceph/<fsid> directory to be made. Since we now +# check in bootstrap that there are no clusters with the same +# fsid based on the directory existing, we need to make sure +# this directory is gone before bootstrapping. We can +# accomplish this with another rm-cluster +$CEPHADM rm-cluster --fsid $FSID --force + +## bootstrap +ORIG_CONFIG=`mktemp -p $TMPDIR` +CONFIG=`mktemp -p $TMPDIR` +MONCONFIG=`mktemp -p $TMPDIR` +KEYRING=`mktemp -p $TMPDIR` +IP=127.0.0.1 +cat <<EOF > $ORIG_CONFIG +[global] + log to file = true + osd crush chooseleaf type = 0 +EOF +$CEPHADM bootstrap \ + --mon-id a \ + --mgr-id x \ + --mon-ip $IP \ + --fsid $FSID \ + --config $ORIG_CONFIG \ + --output-config $CONFIG \ + --output-keyring $KEYRING \ + --output-pub-ssh-key $TMPDIR/ceph.pub \ + --allow-overwrite \ + --skip-mon-network \ + --skip-monitoring-stack +test -e $CONFIG +test -e $KEYRING +rm -f $ORIG_CONFIG + +$SUDO test -e /var/log/ceph/$FSID/ceph-mon.a.log +$SUDO test -e /var/log/ceph/$FSID/ceph-mgr.x.log + +for u in ceph.target \ + ceph-$FSID.target \ + ceph-$FSID@mon.a \ + ceph-$FSID@mgr.x; do + systemctl is-enabled $u + systemctl is-active $u +done +systemctl | grep system-ceph | grep -q .slice # naming is escaped and annoying + +# check ceph -s works (via shell w/ passed config/keyring) +$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \ + ceph -s | grep $FSID + +for t in mon mgr node-exporter prometheus grafana; do + $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \ + ceph orch apply $t --unmanaged +done + +## ls +$CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").fsid' \ + | grep $FSID +$CEPHADM ls | jq '.[]' | jq 'select(.name == "mgr.x").fsid' \ + | grep $FSID + +# make sure the version is returned correctly +$CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").version' | grep -q \\. + +## deploy +# add mon.b +cp $CONFIG $MONCONFIG +echo "public addrv = [v2:$IP:3301,v1:$IP:6790]" >> $MONCONFIG +jq --null-input \ + --arg fsid $FSID \ + --arg name mon.b \ + --arg keyring /var/lib/ceph/$FSID/mon.a/keyring \ + --arg config "$MONCONFIG" \ + '{"fsid": $fsid, "name": $name, "params":{"keyring": $keyring, "config": $config}}' | \ + $CEPHADM _orch deploy +for u in ceph-$FSID@mon.b; do + systemctl is-enabled $u + systemctl is-active $u +done +cond="$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \ + ceph mon stat | grep '2 mons'" +is_available "mon.b" "$cond" 30 + +# add mgr.y +$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \ + ceph auth get-or-create mgr.y \ + mon 'allow profile mgr' \ + osd 'allow *' \ + mds 'allow *' > $TMPDIR/keyring.mgr.y +jq --null-input \ + --arg fsid $FSID \ + --arg name mgr.y \ + --arg keyring $TMPDIR/keyring.mgr.y \ + --arg config "$CONFIG" \ + '{"fsid": $fsid, "name": $name, "params":{"keyring": $keyring, "config": $config}}' | \ + $CEPHADM _orch deploy +for u in ceph-$FSID@mgr.y; do + systemctl is-enabled $u + systemctl is-active $u +done + +for f in `seq 1 30`; do + if $CEPHADM shell --fsid $FSID \ + --config $CONFIG --keyring $KEYRING -- \ + ceph -s -f json-pretty \ + | jq '.mgrmap.num_standbys' | grep -q 1 ; then break; fi + sleep 1 +done +$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \ + ceph -s -f json-pretty \ + | jq '.mgrmap.num_standbys' | grep -q 1 + +# add osd.{1,2,..} +dd if=/dev/zero of=$TMPDIR/$OSD_IMAGE_NAME bs=1 count=0 seek=$OSD_IMAGE_SIZE +loop_dev=$($SUDO losetup -f) +$SUDO vgremove -f $OSD_VG_NAME || true +$SUDO losetup $loop_dev $TMPDIR/$OSD_IMAGE_NAME +$SUDO pvcreate $loop_dev && $SUDO vgcreate $OSD_VG_NAME $loop_dev + +# osd bootstrap keyring +$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \ + ceph auth get client.bootstrap-osd > $TMPDIR/keyring.bootstrap.osd + +# create lvs first so ceph-volume doesn't overlap with lv creation +for id in `seq 0 $((--OSD_TO_CREATE))`; do + $SUDO lvcreate -l $((100/$OSD_TO_CREATE))%VG -n $OSD_LV_NAME.$id $OSD_VG_NAME +done + +for id in `seq 0 $((--OSD_TO_CREATE))`; do + device_name=/dev/$OSD_VG_NAME/$OSD_LV_NAME.$id + CEPH_VOLUME="$CEPHADM ceph-volume \ + --fsid $FSID \ + --config $CONFIG \ + --keyring $TMPDIR/keyring.bootstrap.osd --" + + # prepare the osd + $CEPH_VOLUME lvm prepare --bluestore --data $device_name --no-systemd + $CEPH_VOLUME lvm batch --no-auto $device_name --yes --no-systemd + + # osd id and osd fsid + $CEPH_VOLUME lvm list --format json $device_name > $TMPDIR/osd.map + osd_id=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_id"? | select(.)') + osd_fsid=$($SUDO cat $TMPDIR/osd.map | jq -cr '.. | ."ceph.osd_fsid"? | select(.)') + + # deploy the osd + jq --null-input \ + --arg fsid $FSID \ + --arg name osd.$osd_id \ + --arg keyring $TMPDIR/keyring.bootstrap.osd \ + --arg config "$CONFIG" \ + --arg osd_fsid $osd_fsid \ + '{"fsid": $fsid, "name": $name, "params":{"keyring": $keyring, "config": $config, "osd_fsid": $osd_fsid}}' | \ + $CEPHADM _orch deploy +done + +# add node-exporter +jq --null-input \ + --arg fsid $FSID \ + --arg name node-exporter.a \ + '{"fsid": $fsid, "name": $name}' | \ + ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy +cond="curl 'http://localhost:9100' | grep -q 'Node Exporter'" +is_available "node-exporter" "$cond" 10 + +# add prometheus +jq --null-input \ + --arg fsid $FSID \ + --arg name prometheus.a \ + --argjson config_blobs "$(cat ${CEPHADM_SAMPLES_DIR}/prometheus.json)" \ + '{"fsid": $fsid, "name": $name, "config_blobs": $config_blobs}' | \ + ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy +cond="curl 'localhost:9095/api/v1/query?query=up'" +is_available "prometheus" "$cond" 10 + +# add grafana +jq --null-input \ + --arg fsid $FSID \ + --arg name grafana.a \ + --argjson config_blobs "$(cat ${CEPHADM_SAMPLES_DIR}/grafana.json)" \ + '{"fsid": $fsid, "name": $name, "config_blobs": $config_blobs}' | \ + ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy +cond="curl --insecure 'https://localhost:3000' | grep -q 'grafana'" +is_available "grafana" "$cond" 50 + +# add nfs-ganesha +nfs_stop +nfs_rados_pool=$(cat ${CEPHADM_SAMPLES_DIR}/nfs.json | jq -r '.["pool"]') +$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \ + ceph osd pool create $nfs_rados_pool 64 +$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \ + rados --pool nfs-ganesha --namespace nfs-ns create conf-nfs.a +$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \ + ceph orch pause +jq --null-input \ + --arg fsid $FSID \ + --arg name nfs.a \ + --arg keyring "$KEYRING" \ + --arg config "$CONFIG" \ + --argjson config_blobs "$(cat ${CEPHADM_SAMPLES_DIR}/nfs.json)" \ + '{"fsid": $fsid, "name": $name, "params": {"keyring": $keyring, "config": $config}, "config_blobs": $config_blobs}' | \ + ${CEPHADM} _orch deploy +cond="$SUDO ss -tlnp '( sport = :nfs )' | grep 'ganesha.nfsd'" +is_available "nfs" "$cond" 10 +$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \ + ceph orch resume + +# add alertmanager via custom container +alertmanager_image=$(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json | jq -r '.image') +tcp_ports=$(jq .ports ${CEPHADM_SAMPLES_DIR}/custom_container.json) +jq --null-input \ + --arg fsid $FSID \ + --arg name container.alertmanager.a \ + --arg keyring $TMPDIR/keyring.bootstrap.osd \ + --arg config "$CONFIG" \ + --arg image "$alertmanager_image" \ + --argjson tcp_ports "${tcp_ports}" \ + --argjson config_blobs "$(cat ${CEPHADM_SAMPLES_DIR}/custom_container.json)" \ + '{"fsid": $fsid, "name": $name, "image": $image, "params": {"keyring": $keyring, "config": $config, "tcp_ports": $tcp_ports}, "config_blobs": $config_blobs}' | \ + ${CEPHADM//--image $IMAGE_DEFAULT/} _orch deploy +cond="$CEPHADM enter --fsid $FSID --name container.alertmanager.a -- test -f \ + /etc/alertmanager/alertmanager.yml" +is_available "alertmanager.yml" "$cond" 10 +cond="curl 'http://localhost:9093' | grep -q 'Alertmanager'" +is_available "alertmanager" "$cond" 10 + +## run +# WRITE ME + +## unit +$CEPHADM unit --fsid $FSID --name mon.a -- is-enabled +$CEPHADM unit --fsid $FSID --name mon.a -- is-active +expect_false $CEPHADM unit --fsid $FSID --name mon.xyz -- is-active +$CEPHADM unit --fsid $FSID --name mon.a -- disable +expect_false $CEPHADM unit --fsid $FSID --name mon.a -- is-enabled +$CEPHADM unit --fsid $FSID --name mon.a -- enable +$CEPHADM unit --fsid $FSID --name mon.a -- is-enabled +$CEPHADM unit --fsid $FSID --name mon.a -- status +$CEPHADM unit --fsid $FSID --name mon.a -- stop +expect_return_code 3 $CEPHADM unit --fsid $FSID --name mon.a -- status +$CEPHADM unit --fsid $FSID --name mon.a -- start + +## shell +$CEPHADM shell --fsid $FSID -- true +$CEPHADM shell --fsid $FSID -- test -d /var/log/ceph +expect_false $CEPHADM --timeout 10 shell --fsid $FSID -- sleep 60 +$CEPHADM --timeout 60 shell --fsid $FSID -- sleep 10 +$CEPHADM shell --fsid $FSID --mount $TMPDIR $TMPDIR_TEST_MULTIPLE_MOUNTS -- stat /mnt/$(basename $TMPDIR) + +## enter +expect_false $CEPHADM enter +$CEPHADM enter --fsid $FSID --name mon.a -- test -d /var/lib/ceph/mon/ceph-a +$CEPHADM enter --fsid $FSID --name mgr.x -- test -d /var/lib/ceph/mgr/ceph-x +$CEPHADM enter --fsid $FSID --name mon.a -- pidof ceph-mon +expect_false $CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mon +$CEPHADM enter --fsid $FSID --name mgr.x -- pidof ceph-mgr +# this triggers a bug in older versions of podman, including 18.04's 1.6.2 +#expect_false $CEPHADM --timeout 5 enter --fsid $FSID --name mon.a -- sleep 30 +$CEPHADM --timeout 60 enter --fsid $FSID --name mon.a -- sleep 10 + +## ceph-volume +$CEPHADM ceph-volume --fsid $FSID -- inventory --format=json \ + | jq '.[]' + +## preserve test state +[ $CLEANUP = false ] && exit 0 + +## rm-daemon +# mon and osd require --force +expect_false $CEPHADM rm-daemon --fsid $FSID --name mon.a +# mgr does not +$CEPHADM rm-daemon --fsid $FSID --name mgr.x + +expect_false $CEPHADM zap-osds --fsid $FSID +$CEPHADM zap-osds --fsid $FSID --force + +## rm-cluster +expect_false $CEPHADM rm-cluster --fsid $FSID --zap-osds +$CEPHADM rm-cluster --fsid $FSID --force --zap-osds + +echo PASS diff --git a/qa/workunits/cephadm/test_dashboard_e2e.sh b/qa/workunits/cephadm/test_dashboard_e2e.sh new file mode 100755 index 000000000..32e0bcc77 --- /dev/null +++ b/qa/workunits/cephadm/test_dashboard_e2e.sh @@ -0,0 +1,107 @@ +#!/bin/bash -ex + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +DASHBOARD_FRONTEND_DIR=${SCRIPT_DIR}/../../../src/pybind/mgr/dashboard/frontend + +[ -z "$SUDO" ] && SUDO=sudo + +install_common () { + NODEJS_VERSION="16" + if grep -q debian /etc/*-release; then + $SUDO apt-get update + # https://github.com/nodesource/distributions#manual-installation + $SUDO apt-get install curl gpg + KEYRING=/usr/share/keyrings/nodesource.gpg + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource.gpg.key | gpg --dearmor | $SUDO tee "$KEYRING" >/dev/null + DISTRO="$(source /etc/lsb-release; echo $DISTRIB_CODENAME)" + VERSION="node_$NODEJS_VERSION.x" + echo "deb [signed-by=$KEYRING] https://deb.nodesource.com/$VERSION $DISTRO main" | $SUDO tee /etc/apt/sources.list.d/nodesource.list + echo "deb-src [signed-by=$KEYRING] https://deb.nodesource.com/$VERSION $DISTRO main" | $SUDO tee -a /etc/apt/sources.list.d/nodesource.list + $SUDO apt-get update + $SUDO apt-get install nodejs + elif grep -q rhel /etc/*-release; then + $SUDO yum module -y enable nodejs:$NODEJS_VERSION + $SUDO yum install -y jq npm + else + echo "Unsupported distribution." + exit 1 + fi +} + +install_chrome () { + if grep -q debian /etc/*-release; then + $SUDO bash -c 'echo "deb [arch=amd64] https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list' + curl -fsSL https://dl.google.com/linux/linux_signing_key.pub | $SUDO apt-key add - + $SUDO apt-get update + $SUDO apt-get install -y google-chrome-stable + $SUDO apt-get install -y xvfb + $SUDO rm /etc/apt/sources.list.d/google-chrome.list + elif grep -q rhel /etc/*-release; then + $SUDO dd of=/etc/yum.repos.d/google-chrome.repo status=none <<EOF +[google-chrome] +name=google-chrome +baseurl=https://dl.google.com/linux/chrome/rpm/stable/\$basearch +enabled=1 +gpgcheck=1 +gpgkey=https://dl-ssl.google.com/linux/linux_signing_key.pub +EOF + $SUDO yum install -y google-chrome-stable + $SUDO rm /etc/yum.repos.d/google-chrome.repo + # Cypress dependencies + $SUDO yum install -y xorg-x11-server-Xvfb gtk2-devel gtk3-devel libnotify-devel GConf2 nss.x86_64 libXScrnSaver alsa-lib + else + echo "Unsupported distribution." + exit 1 + fi +} + +cypress_run () { + local specs="$1" + local timeout="$2" + local override_config="excludeSpecPattern=*.po.ts,retries=0,specPattern=${specs}" + + if [ x"$timeout" != "x" ]; then + override_config="${override_config},defaultCommandTimeout=${timeout}" + fi + npx cypress run --browser chrome --headless --config "$override_config" +} + +install_common +install_chrome + +CYPRESS_BASE_URL=$(ceph mgr services | jq -r .dashboard) +export CYPRESS_BASE_URL + +cd $DASHBOARD_FRONTEND_DIR + +# This is required for Cypress to understand typescript +npm ci --unsafe-perm +npx cypress verify +npx cypress info + +# Take `orch device ls` and `orch ps` as ground truth. +ceph orch device ls --refresh +ceph orch ps --refresh +sleep 10 # the previous call is asynchronous +ceph orch device ls --format=json | tee cypress/fixtures/orchestrator/inventory.json +ceph orch ps --format=json | tee cypress/fixtures/orchestrator/services.json + +DASHBOARD_ADMIN_SECRET_FILE="/tmp/dashboard-admin-secret.txt" +printf 'admin' > "${DASHBOARD_ADMIN_SECRET_FILE}" +ceph dashboard ac-user-set-password admin -i "${DASHBOARD_ADMIN_SECRET_FILE}" --force-password + +# Run Dashboard e2e tests. +# These tests are designed with execution order in mind, since orchestrator operations +# are likely to change cluster state, we can't just run tests in arbitrarily order. +# See /ceph/src/pybind/mgr/dashboard/frontend/cypress/integration/orchestrator/ folder. +find cypress # List all specs + +cypress_run "cypress/e2e/orchestrator/01-hosts.e2e-spec.ts" + +# Hosts are removed and added in the previous step. Do a refresh again. +ceph orch device ls --refresh +sleep 10 +ceph orch device ls --format=json | tee cypress/fixtures/orchestrator/inventory.json + +cypress_run "cypress/e2e/orchestrator/03-inventory.e2e-spec.ts" +cypress_run "cypress/e2e/orchestrator/04-osds.e2e-spec.ts" 300000 diff --git a/qa/workunits/cephadm/test_iscsi_etc_hosts.sh b/qa/workunits/cephadm/test_iscsi_etc_hosts.sh new file mode 100755 index 000000000..adbc34a92 --- /dev/null +++ b/qa/workunits/cephadm/test_iscsi_etc_hosts.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# checks if the container and host's /etc/hosts files match +# Necessary to avoid potential bugs caused by podman making +# edits to /etc/hosts file in the container +# exits with code 1 if host and iscsi container /etc/hosts do no match + +set -ex + +ISCSI_DAEMON=$(sudo /home/ubuntu/cephtest/cephadm ls | jq -r '.[] | select(.service_name == "iscsi.foo") | .name') +sudo /home/ubuntu/cephtest/cephadm enter --name $ISCSI_DAEMON -- cat /etc/hosts > iscsi_daemon_etc_hosts.txt +if cmp --silent /etc/hosts iscsi_daemon_etc_hosts.txt; then + echo "Daemon and host /etc/hosts files successfully matched" +else + echo "ERROR: /etc/hosts on host did not match /etc/hosts in the iscsi container!" + echo "Host /etc/hosts:" + cat /etc/hosts + echo "Iscsi container /etc/hosts:" + cat iscsi_daemon_etc_hosts.txt + exit 1 +fi diff --git a/qa/workunits/cephadm/test_iscsi_pids_limit.sh b/qa/workunits/cephadm/test_iscsi_pids_limit.sh new file mode 100755 index 000000000..bed4cc9e2 --- /dev/null +++ b/qa/workunits/cephadm/test_iscsi_pids_limit.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# checks if the containers default pids-limit (4096) is removed and Iscsi +# containers continue to run +# exits 1 if fails + +set -ex + +ISCSI_CONT_IDS=$(sudo podman ps -qa --filter='name=iscsi') +CONT_COUNT=$(echo ${ISCSI_CONT_IDS} | wc -w) +test ${CONT_COUNT} -eq 2 + +for i in ${ISCSI_CONT_IDS} +do + test $(sudo podman exec ${i} cat /sys/fs/cgroup/pids/pids.max) == max +done + +for i in ${ISCSI_CONT_IDS} +do + sudo podman exec ${i} /bin/sh -c 'for j in {0..20000}; do sleep 300 & done' +done + +for i in ${ISCSI_CONT_IDS} +do + SLEEP_COUNT=$(sudo podman exec ${i} /bin/sh -c 'ps -ef | grep -c sleep') + test ${SLEEP_COUNT} -gt 20000 +done + +echo OK diff --git a/qa/workunits/cephadm/test_repos.sh b/qa/workunits/cephadm/test_repos.sh new file mode 100755 index 000000000..221585fd0 --- /dev/null +++ b/qa/workunits/cephadm/test_repos.sh @@ -0,0 +1,45 @@ +#!/bin/bash -ex + +SCRIPT_NAME=$(basename ${BASH_SOURCE[0]}) +SCRIPT_DIR=$(dirname ${BASH_SOURCE[0]}) +CEPHADM_SRC_DIR=${SCRIPT_DIR}/../../../src/cephadm + +[ -d "$TMPDIR" ] || TMPDIR=$(mktemp -d tmp.$SCRIPT_NAME.XXXXXX) +trap "$SUDO rm -rf $TMPDIR" EXIT + +if [ -z "$CEPHADM" ]; then + CEPHADM=`mktemp -p $TMPDIR tmp.cephadm.XXXXXX` + ${CEPHADM_SRC_DIR}/build.sh "$CEPHADM" +fi + +# this is a pretty weak test, unfortunately, since the +# package may also be in the base OS. +function test_install_uninstall() { + ( sudo apt update && \ + sudo apt -y install cephadm && \ + sudo $CEPHADM install && \ + sudo apt -y remove cephadm ) || \ + ( sudo yum -y install cephadm && \ + sudo $CEPHADM install && \ + sudo yum -y remove cephadm ) || \ + ( sudo dnf -y install cephadm && \ + sudo $CEPHADM install && \ + sudo dnf -y remove cephadm ) || \ + ( sudo zypper -n install cephadm && \ + sudo $CEPHADM install && \ + sudo zypper -n remove cephadm ) +} + +sudo $CEPHADM -v add-repo --release octopus +test_install_uninstall +sudo $CEPHADM -v rm-repo + +sudo $CEPHADM -v add-repo --dev main +test_install_uninstall +sudo $CEPHADM -v rm-repo + +sudo $CEPHADM -v add-repo --release 15.2.7 +test_install_uninstall +sudo $CEPHADM -v rm-repo + +echo OK. diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh new file mode 100755 index 000000000..aecfd56a9 --- /dev/null +++ b/qa/workunits/cephtool/test.sh @@ -0,0 +1,2991 @@ +#!/usr/bin/env bash +# -*- mode:shell-script; tab-width:8; sh-basic-offset:2; indent-tabs-mode:t -*- +# vim: ts=8 sw=8 ft=bash smarttab +set -x + +source $(dirname $0)/../../standalone/ceph-helpers.sh + +set -e +set -o functrace +PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' +SUDO=${SUDO:-sudo} +export CEPH_DEV=1 + +function check_no_osd_down() +{ + ! ceph osd dump | grep ' down ' +} + +function wait_no_osd_down() +{ + max_run=300 + for i in $(seq 1 $max_run) ; do + if ! check_no_osd_down ; then + echo "waiting for osd(s) to come back up ($i/$max_run)" + sleep 1 + else + break + fi + done + check_no_osd_down +} + +function expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +function expect_true() +{ + set -x + if ! "$@"; then return 1; else return 0; fi +} + +TEMP_DIR=$(mktemp -d ${TMPDIR-/tmp}/cephtool.XXX) +trap "rm -fr $TEMP_DIR" 0 + +TMPFILE=$(mktemp $TEMP_DIR/test_invalid.XXX) + +# +# retry_eagain max cmd args ... +# +# retry cmd args ... if it exits on error and its output contains the +# string EAGAIN, at most $max times +# +function retry_eagain() +{ + local max=$1 + shift + local status + local tmpfile=$TEMP_DIR/retry_eagain.$$ + local count + for count in $(seq 1 $max) ; do + status=0 + "$@" > $tmpfile 2>&1 || status=$? + if test $status = 0 || + ! grep --quiet EAGAIN $tmpfile ; then + break + fi + sleep 1 + done + if test $count = $max ; then + echo retried with non zero exit status, $max times: "$@" >&2 + fi + cat $tmpfile + rm $tmpfile + return $status +} + +# +# map_enxio_to_eagain cmd arg ... +# +# add EAGAIN to the output of cmd arg ... if the output contains +# ENXIO. +# +function map_enxio_to_eagain() +{ + local status=0 + local tmpfile=$TEMP_DIR/map_enxio_to_eagain.$$ + + "$@" > $tmpfile 2>&1 || status=$? + if test $status != 0 && + grep --quiet ENXIO $tmpfile ; then + echo "EAGAIN added by $0::map_enxio_to_eagain" >> $tmpfile + fi + cat $tmpfile + rm $tmpfile + return $status +} + +function check_response() +{ + expected_string=$1 + retcode=$2 + expected_retcode=$3 + if [ "$expected_retcode" -a $retcode != $expected_retcode ] ; then + echo "return code invalid: got $retcode, expected $expected_retcode" >&2 + exit 1 + fi + + if ! grep --quiet -- "$expected_string" $TMPFILE ; then + echo "Didn't find $expected_string in output" >&2 + cat $TMPFILE >&2 + exit 1 + fi +} + +function get_config_value_or_die() +{ + local target config_opt raw val + + target=$1 + config_opt=$2 + + raw="`$SUDO ceph daemon $target config get $config_opt 2>/dev/null`" + if [[ $? -ne 0 ]]; then + echo "error obtaining config opt '$config_opt' from '$target': $raw" + exit 1 + fi + + raw=`echo $raw | sed -e 's/[{} "]//g'` + val=`echo $raw | cut -f2 -d:` + + echo "$val" + return 0 +} + +function expect_config_value() +{ + local target config_opt expected_val val + target=$1 + config_opt=$2 + expected_val=$3 + + val=$(get_config_value_or_die $target $config_opt) + + if [[ "$val" != "$expected_val" ]]; then + echo "expected '$expected_val', got '$val'" + exit 1 + fi +} + +function ceph_watch_start() +{ + local whatch_opt=--watch + + if [ -n "$1" ]; then + whatch_opt=--watch-$1 + if [ -n "$2" ]; then + whatch_opt+=" --watch-channel $2" + fi + fi + + CEPH_WATCH_FILE=${TEMP_DIR}/CEPH_WATCH_$$ + ceph $whatch_opt > $CEPH_WATCH_FILE & + CEPH_WATCH_PID=$! + + # wait until the "ceph" client is connected and receiving + # log messages from monitor + for i in `seq 3`; do + grep -q "cluster" $CEPH_WATCH_FILE && break + sleep 1 + done +} + +function ceph_watch_wait() +{ + local regexp=$1 + local timeout=30 + + if [ -n "$2" ]; then + timeout=$2 + fi + + for i in `seq ${timeout}`; do + grep -q "$regexp" $CEPH_WATCH_FILE && break + sleep 1 + done + + kill $CEPH_WATCH_PID + + if ! grep "$regexp" $CEPH_WATCH_FILE; then + echo "pattern ${regexp} not found in watch file. Full watch file content:" >&2 + cat $CEPH_WATCH_FILE >&2 + return 1 + fi +} + +function test_mon_injectargs() +{ + ceph tell osd.0 injectargs --no-osd_enable_op_tracker + ceph tell osd.0 config get osd_enable_op_tracker | grep false + ceph tell osd.0 injectargs '--osd_enable_op_tracker --osd_op_history_duration 500' + ceph tell osd.0 config get osd_enable_op_tracker | grep true + ceph tell osd.0 config get osd_op_history_duration | grep 500 + ceph tell osd.0 injectargs --no-osd_enable_op_tracker + ceph tell osd.0 config get osd_enable_op_tracker | grep false + ceph tell osd.0 injectargs -- --osd_enable_op_tracker + ceph tell osd.0 config get osd_enable_op_tracker | grep true + ceph tell osd.0 injectargs -- '--osd_enable_op_tracker --osd_op_history_duration 600' + ceph tell osd.0 config get osd_enable_op_tracker | grep true + ceph tell osd.0 config get osd_op_history_duration | grep 600 + + ceph tell osd.0 injectargs -- '--osd_deep_scrub_interval 2419200' + ceph tell osd.0 config get osd_deep_scrub_interval | grep 2419200 + + ceph tell osd.0 injectargs -- '--mon_probe_timeout 2' + ceph tell osd.0 config get mon_probe_timeout | grep 2 + + ceph tell osd.0 injectargs -- '--mon-lease 6' + ceph tell osd.0 config get mon_lease | grep 6 + + # osd-scrub-auto-repair-num-errors is an OPT_U32, so -1 is not a valid setting + expect_false ceph tell osd.0 injectargs --osd-scrub-auto-repair-num-errors -1 2> $TMPFILE || return 1 + check_response "Error EINVAL: Parse error setting osd_scrub_auto_repair_num_errors to '-1' using injectargs" + + expect_failure $TEMP_DIR "Option --osd_op_history_duration requires an argument" \ + ceph tell osd.0 injectargs -- '--osd_op_history_duration' + +} + +function test_mon_injectargs_SI() +{ + # Test SI units during injectargs and 'config set' + # We only aim at testing the units are parsed accordingly + # and don't intend to test whether the options being set + # actually expect SI units to be passed. + # Keep in mind that all integer based options that are not based on bytes + # (i.e., INT, LONG, U32, U64) will accept SI unit modifiers and be parsed to + # base 10. + initial_value=$(get_config_value_or_die "mon.a" "mon_pg_warn_min_objects") + $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 10 + expect_config_value "mon.a" "mon_pg_warn_min_objects" 10 + $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 10K + expect_config_value "mon.a" "mon_pg_warn_min_objects" 10000 + $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 1G + expect_config_value "mon.a" "mon_pg_warn_min_objects" 1000000000 + $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 10F > $TMPFILE || true + check_response "(22) Invalid argument" + # now test with injectargs + ceph tell mon.a injectargs '--mon_pg_warn_min_objects 10' + expect_config_value "mon.a" "mon_pg_warn_min_objects" 10 + ceph tell mon.a injectargs '--mon_pg_warn_min_objects 10K' + expect_config_value "mon.a" "mon_pg_warn_min_objects" 10000 + ceph tell mon.a injectargs '--mon_pg_warn_min_objects 1G' + expect_config_value "mon.a" "mon_pg_warn_min_objects" 1000000000 + expect_false ceph tell mon.a injectargs '--mon_pg_warn_min_objects 10F' + expect_false ceph tell mon.a injectargs '--mon_globalid_prealloc -1' + $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects $initial_value +} + +function test_mon_injectargs_IEC() +{ + # Test IEC units during injectargs and 'config set' + # We only aim at testing the units are parsed accordingly + # and don't intend to test whether the options being set + # actually expect IEC units to be passed. + # Keep in mind that all integer based options that are based on bytes + # (i.e., INT, LONG, U32, U64) will accept IEC unit modifiers, as well as SI + # unit modifiers (for backwards compatibility and convenience) and be parsed + # to base 2. + initial_value=$(get_config_value_or_die "mon.a" "mon_data_size_warn") + $SUDO ceph daemon mon.a config set mon_data_size_warn 15000000000 + expect_config_value "mon.a" "mon_data_size_warn" 15000000000 + $SUDO ceph daemon mon.a config set mon_data_size_warn 15G + expect_config_value "mon.a" "mon_data_size_warn" 16106127360 + $SUDO ceph daemon mon.a config set mon_data_size_warn 16Gi + expect_config_value "mon.a" "mon_data_size_warn" 17179869184 + $SUDO ceph daemon mon.a config set mon_data_size_warn 10F > $TMPFILE || true + check_response "(22) Invalid argument" + # now test with injectargs + ceph tell mon.a injectargs '--mon_data_size_warn 15000000000' + expect_config_value "mon.a" "mon_data_size_warn" 15000000000 + ceph tell mon.a injectargs '--mon_data_size_warn 15G' + expect_config_value "mon.a" "mon_data_size_warn" 16106127360 + ceph tell mon.a injectargs '--mon_data_size_warn 16Gi' + expect_config_value "mon.a" "mon_data_size_warn" 17179869184 + expect_false ceph tell mon.a injectargs '--mon_data_size_warn 10F' + $SUDO ceph daemon mon.a config set mon_data_size_warn $initial_value +} + +function test_tiering_agent() +{ + local slow=slow_eviction + local fast=fast_eviction + ceph osd pool create $slow 1 1 + ceph osd pool application enable $slow rados + ceph osd pool create $fast 1 1 + ceph osd tier add $slow $fast + ceph osd tier cache-mode $fast writeback + ceph osd tier set-overlay $slow $fast + ceph osd pool set $fast hit_set_type bloom + rados -p $slow put obj1 /etc/group + ceph osd pool set $fast target_max_objects 1 + ceph osd pool set $fast hit_set_count 1 + ceph osd pool set $fast hit_set_period 5 + # wait for the object to be evicted from the cache + local evicted + evicted=false + for i in `seq 1 300` ; do + if ! rados -p $fast ls | grep obj1 ; then + evicted=true + break + fi + sleep 1 + done + $evicted # assert + # the object is proxy read and promoted to the cache + rados -p $slow get obj1 - >/dev/null + # wait for the promoted object to be evicted again + evicted=false + for i in `seq 1 300` ; do + if ! rados -p $fast ls | grep obj1 ; then + evicted=true + break + fi + sleep 1 + done + $evicted # assert + ceph osd tier remove-overlay $slow + ceph osd tier remove $slow $fast + ceph osd pool delete $fast $fast --yes-i-really-really-mean-it + ceph osd pool delete $slow $slow --yes-i-really-really-mean-it +} + +function test_tiering_1() +{ + # tiering + ceph osd pool create slow 2 + ceph osd pool application enable slow rados + ceph osd pool create slow2 2 + ceph osd pool application enable slow2 rados + ceph osd pool create cache 2 + ceph osd pool create cache2 2 + ceph osd tier add slow cache + ceph osd tier add slow cache2 + expect_false ceph osd tier add slow2 cache + # application metadata should propagate to the tiers + ceph osd pool ls detail -f json | jq '.[] | select(.pool_name == "slow") | .application_metadata["rados"]' | grep '{}' + ceph osd pool ls detail -f json | jq '.[] | select(.pool_name == "slow2") | .application_metadata["rados"]' | grep '{}' + ceph osd pool ls detail -f json | jq '.[] | select(.pool_name == "cache") | .application_metadata["rados"]' | grep '{}' + ceph osd pool ls detail -f json | jq '.[] | select(.pool_name == "cache2") | .application_metadata["rados"]' | grep '{}' + # forward is removed/deprecated + expect_false ceph osd tier cache-mode cache forward + expect_false ceph osd tier cache-mode cache forward --yes-i-really-mean-it + # test some state transitions + ceph osd tier cache-mode cache writeback + expect_false ceph osd tier cache-mode cache readonly + expect_false ceph osd tier cache-mode cache readonly --yes-i-really-mean-it + ceph osd tier cache-mode cache proxy + ceph osd tier cache-mode cache readproxy + ceph osd tier cache-mode cache none + ceph osd tier cache-mode cache readonly --yes-i-really-mean-it + ceph osd tier cache-mode cache none + ceph osd tier cache-mode cache writeback + ceph osd tier cache-mode cache proxy + ceph osd tier cache-mode cache writeback + expect_false ceph osd tier cache-mode cache none + expect_false ceph osd tier cache-mode cache readonly --yes-i-really-mean-it + # test with dirty objects in the tier pool + # tier pool currently set to 'writeback' + rados -p cache put /etc/passwd /etc/passwd + flush_pg_stats + # 1 dirty object in pool 'cache' + ceph osd tier cache-mode cache proxy + expect_false ceph osd tier cache-mode cache none + expect_false ceph osd tier cache-mode cache readonly --yes-i-really-mean-it + ceph osd tier cache-mode cache writeback + # remove object from tier pool + rados -p cache rm /etc/passwd + rados -p cache cache-flush-evict-all + flush_pg_stats + # no dirty objects in pool 'cache' + ceph osd tier cache-mode cache proxy + ceph osd tier cache-mode cache none + ceph osd tier cache-mode cache readonly --yes-i-really-mean-it + TRIES=0 + while ! ceph osd pool set cache pg_num 3 --yes-i-really-mean-it 2>$TMPFILE + do + grep 'currently creating pgs' $TMPFILE + TRIES=$(( $TRIES + 1 )) + test $TRIES -ne 60 + sleep 3 + done + expect_false ceph osd pool set cache pg_num 4 + ceph osd tier cache-mode cache none + ceph osd tier set-overlay slow cache + expect_false ceph osd tier set-overlay slow cache2 + expect_false ceph osd tier remove slow cache + ceph osd tier remove-overlay slow + ceph osd tier set-overlay slow cache2 + ceph osd tier remove-overlay slow + ceph osd tier remove slow cache + ceph osd tier add slow2 cache + expect_false ceph osd tier set-overlay slow cache + ceph osd tier set-overlay slow2 cache + ceph osd tier remove-overlay slow2 + ceph osd tier remove slow2 cache + ceph osd tier remove slow cache2 + + # make sure a non-empty pool fails + rados -p cache2 put /etc/passwd /etc/passwd + while ! ceph df | grep cache2 | grep ' 1 ' ; do + echo waiting for pg stats to flush + sleep 2 + done + expect_false ceph osd tier add slow cache2 + ceph osd tier add slow cache2 --force-nonempty + ceph osd tier remove slow cache2 + + ceph osd pool ls | grep cache2 + ceph osd pool ls -f json-pretty | grep cache2 + ceph osd pool ls detail | grep cache2 + ceph osd pool ls detail -f json-pretty | grep cache2 + + ceph osd pool delete slow slow --yes-i-really-really-mean-it + ceph osd pool delete slow2 slow2 --yes-i-really-really-mean-it + ceph osd pool delete cache cache --yes-i-really-really-mean-it + ceph osd pool delete cache2 cache2 --yes-i-really-really-mean-it +} + +function test_tiering_2() +{ + # make sure we can't clobber snapshot state + ceph osd pool create snap_base 2 + ceph osd pool application enable snap_base rados + ceph osd pool create snap_cache 2 + ceph osd pool mksnap snap_cache snapname + expect_false ceph osd tier add snap_base snap_cache + ceph osd pool delete snap_base snap_base --yes-i-really-really-mean-it + ceph osd pool delete snap_cache snap_cache --yes-i-really-really-mean-it +} + +function test_tiering_3() +{ + # make sure we can't create snapshot on tier + ceph osd pool create basex 2 + ceph osd pool application enable basex rados + ceph osd pool create cachex 2 + ceph osd tier add basex cachex + expect_false ceph osd pool mksnap cache snapname + ceph osd tier remove basex cachex + ceph osd pool delete basex basex --yes-i-really-really-mean-it + ceph osd pool delete cachex cachex --yes-i-really-really-mean-it +} + +function test_tiering_4() +{ + # make sure we can't create an ec pool tier + ceph osd pool create eccache 2 2 erasure + expect_false ceph osd set-require-min-compat-client bobtail + ceph osd pool create repbase 2 + ceph osd pool application enable repbase rados + expect_false ceph osd tier add repbase eccache + ceph osd pool delete repbase repbase --yes-i-really-really-mean-it + ceph osd pool delete eccache eccache --yes-i-really-really-mean-it +} + +function test_tiering_5() +{ + # convenient add-cache command + ceph osd pool create slow 2 + ceph osd pool application enable slow rados + ceph osd pool create cache3 2 + ceph osd tier add-cache slow cache3 1024000 + ceph osd dump | grep cache3 | grep bloom | grep 'false_positive_probability: 0.05' | grep 'target_bytes 1024000' | grep '1200s x4' + ceph osd tier remove slow cache3 2> $TMPFILE || true + check_response "EBUSY: tier pool 'cache3' is the overlay for 'slow'; please remove-overlay first" + ceph osd tier remove-overlay slow + ceph osd tier remove slow cache3 + ceph osd pool ls | grep cache3 + ceph osd pool delete cache3 cache3 --yes-i-really-really-mean-it + ! ceph osd pool ls | grep cache3 || exit 1 + ceph osd pool delete slow slow --yes-i-really-really-mean-it +} + +function test_tiering_6() +{ + # check add-cache whether work + ceph osd pool create datapool 2 + ceph osd pool application enable datapool rados + ceph osd pool create cachepool 2 + ceph osd tier add-cache datapool cachepool 1024000 + ceph osd tier cache-mode cachepool writeback + rados -p datapool put object /etc/passwd + rados -p cachepool stat object + rados -p cachepool cache-flush object + rados -p datapool stat object + ceph osd tier remove-overlay datapool + ceph osd tier remove datapool cachepool + ceph osd pool delete cachepool cachepool --yes-i-really-really-mean-it + ceph osd pool delete datapool datapool --yes-i-really-really-mean-it +} + +function test_tiering_7() +{ + # protection against pool removal when used as tiers + ceph osd pool create datapool 2 + ceph osd pool application enable datapool rados + ceph osd pool create cachepool 2 + ceph osd tier add-cache datapool cachepool 1024000 + ceph osd pool delete cachepool cachepool --yes-i-really-really-mean-it 2> $TMPFILE || true + check_response "EBUSY: pool 'cachepool' is a tier of 'datapool'" + ceph osd pool delete datapool datapool --yes-i-really-really-mean-it 2> $TMPFILE || true + check_response "EBUSY: pool 'datapool' has tiers cachepool" + ceph osd tier remove-overlay datapool + ceph osd tier remove datapool cachepool + ceph osd pool delete cachepool cachepool --yes-i-really-really-mean-it + ceph osd pool delete datapool datapool --yes-i-really-really-mean-it +} + +function test_tiering_8() +{ + ## check health check + ceph osd set notieragent + ceph osd pool create datapool 2 + ceph osd pool application enable datapool rados + ceph osd pool create cache4 2 + ceph osd tier add-cache datapool cache4 1024000 + ceph osd tier cache-mode cache4 writeback + tmpfile=$(mktemp|grep tmp) + dd if=/dev/zero of=$tmpfile bs=4K count=1 + ceph osd pool set cache4 target_max_objects 200 + ceph osd pool set cache4 target_max_bytes 1000000 + rados -p cache4 put foo1 $tmpfile + rados -p cache4 put foo2 $tmpfile + rm -f $tmpfile + flush_pg_stats + ceph df | grep datapool | grep ' 2 ' + ceph osd tier remove-overlay datapool + ceph osd tier remove datapool cache4 + ceph osd pool delete cache4 cache4 --yes-i-really-really-mean-it + ceph osd pool delete datapool datapool --yes-i-really-really-mean-it + ceph osd unset notieragent +} + +function test_tiering_9() +{ + # make sure 'tier remove' behaves as we expect + # i.e., removing a tier from a pool that's not its base pool only + # results in a 'pool foo is now (or already was) not a tier of bar' + # + ceph osd pool create basepoolA 2 + ceph osd pool application enable basepoolA rados + ceph osd pool create basepoolB 2 + ceph osd pool application enable basepoolB rados + poolA_id=$(ceph osd dump | grep 'pool.*basepoolA' | awk '{print $2;}') + poolB_id=$(ceph osd dump | grep 'pool.*basepoolB' | awk '{print $2;}') + + ceph osd pool create cache5 2 + ceph osd pool create cache6 2 + ceph osd tier add basepoolA cache5 + ceph osd tier add basepoolB cache6 + ceph osd tier remove basepoolB cache5 2>&1 | grep 'not a tier of' + ceph osd dump | grep "pool.*'cache5'" 2>&1 | grep "tier_of[ \t]\+$poolA_id" + ceph osd tier remove basepoolA cache6 2>&1 | grep 'not a tier of' + ceph osd dump | grep "pool.*'cache6'" 2>&1 | grep "tier_of[ \t]\+$poolB_id" + + ceph osd tier remove basepoolA cache5 2>&1 | grep 'not a tier of' + ! ceph osd dump | grep "pool.*'cache5'" 2>&1 | grep "tier_of" || exit 1 + ceph osd tier remove basepoolB cache6 2>&1 | grep 'not a tier of' + ! ceph osd dump | grep "pool.*'cache6'" 2>&1 | grep "tier_of" || exit 1 + + ! ceph osd dump | grep "pool.*'basepoolA'" 2>&1 | grep "tiers" || exit 1 + ! ceph osd dump | grep "pool.*'basepoolB'" 2>&1 | grep "tiers" || exit 1 + + ceph osd pool delete cache6 cache6 --yes-i-really-really-mean-it + ceph osd pool delete cache5 cache5 --yes-i-really-really-mean-it + ceph osd pool delete basepoolB basepoolB --yes-i-really-really-mean-it + ceph osd pool delete basepoolA basepoolA --yes-i-really-really-mean-it +} + +function test_auth() +{ + expect_false ceph auth add client.xx mon 'invalid' osd "allow *" + expect_false ceph auth add client.xx mon 'allow *' osd "allow *" invalid "allow *" + ceph auth add client.xx mon 'allow *' osd "allow *" + ceph auth export client.xx >client.xx.keyring + ceph auth add client.xx -i client.xx.keyring + rm -f client.xx.keyring + ceph auth list | grep client.xx + ceph auth ls | grep client.xx + ceph auth get client.xx | grep caps | grep mon + ceph auth get client.xx | grep caps | grep osd + ceph auth get-key client.xx + ceph auth print-key client.xx + ceph auth print_key client.xx + ceph auth caps client.xx osd "allow rw" + expect_false sh <<< "ceph auth get client.xx | grep caps | grep mon" + ceph auth get client.xx | grep osd | grep "allow rw" + ceph auth caps client.xx mon 'allow command "osd tree"' + ceph auth export | grep client.xx + ceph auth export -o authfile + ceph auth import -i authfile + + ceph auth export -o authfile2 + diff authfile authfile2 + rm authfile authfile2 + ceph auth del client.xx + expect_false ceph auth get client.xx + + # (almost) interactive mode + echo -e 'auth add client.xx mon "allow *" osd "allow *"\n' | ceph + ceph auth get client.xx + # script mode + echo 'auth del client.xx' | ceph + expect_false ceph auth get client.xx +} + +function test_auth_profiles() +{ + ceph auth add client.xx-profile-ro mon 'allow profile read-only' \ + mgr 'allow profile read-only' + ceph auth add client.xx-profile-rw mon 'allow profile read-write' \ + mgr 'allow profile read-write' + ceph auth add client.xx-profile-rd mon 'allow profile role-definer' + + ceph auth export > client.xx.keyring + + # read-only is allowed all read-only commands (auth excluded) + ceph -n client.xx-profile-ro -k client.xx.keyring status + ceph -n client.xx-profile-ro -k client.xx.keyring osd dump + ceph -n client.xx-profile-ro -k client.xx.keyring pg dump + ceph -n client.xx-profile-ro -k client.xx.keyring mon dump + # read-only gets access denied for rw commands or auth commands + ceph -n client.xx-profile-ro -k client.xx.keyring log foo >& $TMPFILE || true + check_response "EACCES: access denied" + ceph -n client.xx-profile-ro -k client.xx.keyring osd set noout >& $TMPFILE || true + check_response "EACCES: access denied" + ceph -n client.xx-profile-ro -k client.xx.keyring auth ls >& $TMPFILE || true + check_response "EACCES: access denied" + + # read-write is allowed for all read-write commands (except auth) + ceph -n client.xx-profile-rw -k client.xx.keyring status + ceph -n client.xx-profile-rw -k client.xx.keyring osd dump + ceph -n client.xx-profile-rw -k client.xx.keyring pg dump + ceph -n client.xx-profile-rw -k client.xx.keyring mon dump + ceph -n client.xx-profile-rw -k client.xx.keyring fs dump + ceph -n client.xx-profile-rw -k client.xx.keyring log foo + ceph -n client.xx-profile-rw -k client.xx.keyring osd set noout + ceph -n client.xx-profile-rw -k client.xx.keyring osd unset noout + # read-write gets access denied for auth commands + ceph -n client.xx-profile-rw -k client.xx.keyring auth ls >& $TMPFILE || true + check_response "EACCES: access denied" + + # role-definer is allowed RWX 'auth' commands and read-only 'mon' commands + ceph -n client.xx-profile-rd -k client.xx.keyring auth ls + ceph -n client.xx-profile-rd -k client.xx.keyring auth export + ceph -n client.xx-profile-rd -k client.xx.keyring auth add client.xx-profile-foo + ceph -n client.xx-profile-rd -k client.xx.keyring status + ceph -n client.xx-profile-rd -k client.xx.keyring osd dump >& $TMPFILE || true + check_response "EACCES: access denied" + ceph -n client.xx-profile-rd -k client.xx.keyring pg dump >& $TMPFILE || true + check_response "EACCES: access denied" + # read-only 'mon' subsystem commands are allowed + ceph -n client.xx-profile-rd -k client.xx.keyring mon dump + # but read-write 'mon' commands are not + ceph -n client.xx-profile-rd -k client.xx.keyring mon add foo 1.1.1.1 >& $TMPFILE || true + check_response "EACCES: access denied" + ceph -n client.xx-profile-rd -k client.xx.keyring fs dump >& $TMPFILE || true + check_response "EACCES: access denied" + ceph -n client.xx-profile-rd -k client.xx.keyring log foo >& $TMPFILE || true + check_response "EACCES: access denied" + ceph -n client.xx-profile-rd -k client.xx.keyring osd set noout >& $TMPFILE || true + check_response "EACCES: access denied" + + ceph -n client.xx-profile-rd -k client.xx.keyring auth del client.xx-profile-ro + ceph -n client.xx-profile-rd -k client.xx.keyring auth del client.xx-profile-rw + + # add a new role-definer with the existing role-definer + ceph -n client.xx-profile-rd -k client.xx.keyring \ + auth add client.xx-profile-rd2 mon 'allow profile role-definer' + ceph -n client.xx-profile-rd -k client.xx.keyring \ + auth export > client.xx.keyring.2 + # remove old role-definer using the new role-definer + ceph -n client.xx-profile-rd2 -k client.xx.keyring.2 \ + auth del client.xx-profile-rd + # remove the remaining role-definer with admin + ceph auth del client.xx-profile-rd2 + rm -f client.xx.keyring client.xx.keyring.2 +} + +function test_mon_caps() +{ + ceph-authtool --create-keyring $TEMP_DIR/ceph.client.bug.keyring + chmod +r $TEMP_DIR/ceph.client.bug.keyring + ceph-authtool $TEMP_DIR/ceph.client.bug.keyring -n client.bug --gen-key + ceph auth add client.bug -i $TEMP_DIR/ceph.client.bug.keyring + + # pass --no-mon-config since we are looking for the permission denied error + rados lspools --no-mon-config --keyring $TEMP_DIR/ceph.client.bug.keyring -n client.bug >& $TMPFILE || true + cat $TMPFILE + check_response "Permission denied" + + rm -rf $TEMP_DIR/ceph.client.bug.keyring + ceph auth del client.bug + ceph-authtool --create-keyring $TEMP_DIR/ceph.client.bug.keyring + chmod +r $TEMP_DIR/ceph.client.bug.keyring + ceph-authtool $TEMP_DIR/ceph.client.bug.keyring -n client.bug --gen-key + ceph-authtool -n client.bug --cap mon '' $TEMP_DIR/ceph.client.bug.keyring + ceph auth add client.bug -i $TEMP_DIR/ceph.client.bug.keyring + rados lspools --no-mon-config --keyring $TEMP_DIR/ceph.client.bug.keyring -n client.bug >& $TMPFILE || true + check_response "Permission denied" +} + +function test_mon_misc() +{ + # with and without verbosity + ceph osd dump | grep '^epoch' + ceph --concise osd dump | grep '^epoch' + + ceph osd df | grep 'MIN/MAX VAR' + + # df + ceph df > $TMPFILE + grep RAW $TMPFILE + grep -v DIRTY $TMPFILE + ceph df detail > $TMPFILE + grep DIRTY $TMPFILE + ceph df --format json > $TMPFILE + grep 'total_bytes' $TMPFILE + grep -v 'dirty' $TMPFILE + ceph df detail --format json > $TMPFILE + grep 'rd_bytes' $TMPFILE + grep 'dirty' $TMPFILE + ceph df --format xml | grep '<total_bytes>' + ceph df detail --format xml | grep '<rd_bytes>' + + ceph fsid + ceph health + ceph health detail + ceph health --format json-pretty + ceph health detail --format xml-pretty + + ceph time-sync-status + + ceph node ls + for t in mon osd mds mgr ; do + ceph node ls $t + done + + ceph_watch_start + mymsg="this is a test log message $$.$(date)" + ceph log "$mymsg" + ceph log last | grep "$mymsg" + ceph log last 100 | grep "$mymsg" + ceph_watch_wait "$mymsg" + + ceph mgr stat + ceph mgr dump + ceph mgr dump | jq -e '.active_clients[0].name' + ceph mgr module ls + ceph mgr module enable restful + expect_false ceph mgr module enable foodne + ceph mgr module enable foodne --force + ceph mgr module disable foodne + ceph mgr module disable foodnebizbangbash + + ceph mon metadata a + ceph mon metadata + ceph mon count-metadata ceph_version + ceph mon versions + + ceph mgr metadata + ceph mgr versions + ceph mgr count-metadata ceph_version + + ceph versions + + ceph node ls +} + +function check_mds_active() +{ + fs_name=$1 + ceph fs get $fs_name | grep active +} + +function wait_mds_active() +{ + fs_name=$1 + max_run=300 + for i in $(seq 1 $max_run) ; do + if ! check_mds_active $fs_name ; then + echo "waiting for an active MDS daemon ($i/$max_run)" + sleep 5 + else + break + fi + done + check_mds_active $fs_name +} + +function get_mds_gids() +{ + fs_name=$1 + ceph fs get $fs_name --format=json | python3 -c "import json; import sys; print(' '.join([m['gid'].__str__() for m in json.load(sys.stdin)['mdsmap']['info'].values()]))" +} + +function fail_all_mds() +{ + fs_name=$1 + ceph fs set $fs_name cluster_down true + mds_gids=$(get_mds_gids $fs_name) + for mds_gid in $mds_gids ; do + ceph mds fail $mds_gid + done + if check_mds_active $fs_name ; then + echo "An active MDS remains, something went wrong" + ceph fs get $fs_name + exit -1 + fi + +} + +function remove_all_fs() +{ + existing_fs=$(ceph fs ls --format=json | python3 -c "import json; import sys; print(' '.join([fs['name'] for fs in json.load(sys.stdin)]))") + for fs_name in $existing_fs ; do + echo "Removing fs ${fs_name}..." + fail_all_mds $fs_name + echo "Removing existing filesystem '${fs_name}'..." + ceph fs rm $fs_name --yes-i-really-mean-it + echo "Removed '${fs_name}'." + done +} + +# So that tests requiring MDS can skip if one is not configured +# in the cluster at all +function mds_exists() +{ + ceph auth ls | grep "^mds" +} + +# some of the commands are just not idempotent. +function without_test_dup_command() +{ + if [ -z ${CEPH_CLI_TEST_DUP_COMMAND+x} ]; then + $@ + else + local saved=${CEPH_CLI_TEST_DUP_COMMAND} + unset CEPH_CLI_TEST_DUP_COMMAND + $@ + CEPH_CLI_TEST_DUP_COMMAND=saved + fi +} + +function test_mds_tell() +{ + local FS_NAME=cephfs + if ! mds_exists ; then + echo "Skipping test, no MDS found" + return + fi + + remove_all_fs + ceph osd pool create fs_data 16 + ceph osd pool create fs_metadata 16 + ceph fs new $FS_NAME fs_metadata fs_data + wait_mds_active $FS_NAME + + # Test injectargs by GID + old_mds_gids=$(get_mds_gids $FS_NAME) + echo Old GIDs: $old_mds_gids + + for mds_gid in $old_mds_gids ; do + ceph tell mds.$mds_gid injectargs "--debug-mds 20" + done + expect_false ceph tell mds.a injectargs mds_max_file_recover -1 + + # Test respawn by rank + without_test_dup_command ceph tell mds.0 respawn + new_mds_gids=$old_mds_gids + while [ $new_mds_gids -eq $old_mds_gids ] ; do + sleep 5 + new_mds_gids=$(get_mds_gids $FS_NAME) + done + echo New GIDs: $new_mds_gids + + # Test respawn by ID + without_test_dup_command ceph tell mds.a respawn + new_mds_gids=$old_mds_gids + while [ $new_mds_gids -eq $old_mds_gids ] ; do + sleep 5 + new_mds_gids=$(get_mds_gids $FS_NAME) + done + echo New GIDs: $new_mds_gids + + remove_all_fs + ceph osd pool delete fs_data fs_data --yes-i-really-really-mean-it + ceph osd pool delete fs_metadata fs_metadata --yes-i-really-really-mean-it +} + +function test_mon_mds() +{ + local FS_NAME=cephfs + remove_all_fs + + ceph osd pool create fs_data 16 + ceph osd pool create fs_metadata 16 + ceph fs new $FS_NAME fs_metadata fs_data + + ceph fs set $FS_NAME cluster_down true + ceph fs set $FS_NAME cluster_down false + + ceph mds compat rm_incompat 4 + ceph mds compat rm_incompat 4 + + # We don't want any MDSs to be up, their activity can interfere with + # the "current_epoch + 1" checking below if they're generating updates + fail_all_mds $FS_NAME + + ceph mds compat show + ceph fs dump + ceph fs get $FS_NAME + for mds_gid in $(get_mds_gids $FS_NAME) ; do + ceph mds metadata $mds_id + done + ceph mds metadata + ceph mds versions + ceph mds count-metadata os + + # XXX mds fail, but how do you undo it? + mdsmapfile=$TEMP_DIR/mdsmap.$$ + current_epoch=$(ceph fs dump -o $mdsmapfile --no-log-to-stderr 2>&1 | grep epoch | sed 's/.*epoch //') + [ -s $mdsmapfile ] + rm $mdsmapfile + + ceph osd pool create data2 16 + ceph osd pool create data3 16 + data2_pool=$(ceph osd dump | grep "pool.*'data2'" | awk '{print $2;}') + data3_pool=$(ceph osd dump | grep "pool.*'data3'" | awk '{print $2;}') + ceph fs add_data_pool cephfs $data2_pool + ceph fs add_data_pool cephfs $data3_pool + ceph fs add_data_pool cephfs 100 >& $TMPFILE || true + check_response "Error ENOENT" + ceph fs add_data_pool cephfs foobarbaz >& $TMPFILE || true + check_response "Error ENOENT" + ceph fs rm_data_pool cephfs $data2_pool + ceph fs rm_data_pool cephfs $data3_pool + ceph osd pool delete data2 data2 --yes-i-really-really-mean-it + ceph osd pool delete data3 data3 --yes-i-really-really-mean-it + ceph fs set cephfs max_mds 4 + ceph fs set cephfs max_mds 3 + ceph fs set cephfs max_mds 256 + expect_false ceph fs set cephfs max_mds 257 + ceph fs set cephfs max_mds 4 + ceph fs set cephfs max_mds 256 + expect_false ceph fs set cephfs max_mds 257 + expect_false ceph fs set cephfs max_mds asdf + expect_false ceph fs set cephfs inline_data true + ceph fs set cephfs inline_data true --yes-i-really-really-mean-it + ceph fs set cephfs inline_data yes --yes-i-really-really-mean-it + ceph fs set cephfs inline_data 1 --yes-i-really-really-mean-it + expect_false ceph fs set cephfs inline_data --yes-i-really-really-mean-it + ceph fs set cephfs inline_data false + ceph fs set cephfs inline_data no + ceph fs set cephfs inline_data 0 + expect_false ceph fs set cephfs inline_data asdf + ceph fs set cephfs max_file_size 1048576 + expect_false ceph fs set cephfs max_file_size 123asdf + + expect_false ceph fs set cephfs allow_new_snaps + ceph fs set cephfs allow_new_snaps true + ceph fs set cephfs allow_new_snaps 0 + ceph fs set cephfs allow_new_snaps false + ceph fs set cephfs allow_new_snaps no + expect_false ceph fs set cephfs allow_new_snaps taco + + # we should never be able to add EC pools as data or metadata pools + # create an ec-pool... + ceph osd pool create mds-ec-pool 16 16 erasure + set +e + ceph fs add_data_pool cephfs mds-ec-pool 2>$TMPFILE + check_response 'erasure-code' $? 22 + set -e + ec_poolnum=$(ceph osd dump | grep "pool.* 'mds-ec-pool" | awk '{print $2;}') + data_poolnum=$(ceph osd dump | grep "pool.* 'fs_data" | awk '{print $2;}') + metadata_poolnum=$(ceph osd dump | grep "pool.* 'fs_metadata" | awk '{print $2;}') + + fail_all_mds $FS_NAME + + set +e + # Check that rmfailed requires confirmation + expect_false ceph mds rmfailed 0 + ceph mds rmfailed 0 --yes-i-really-mean-it + set -e + + # Check that `fs new` is no longer permitted + expect_false ceph fs new cephfs $metadata_poolnum $data_poolnum --yes-i-really-mean-it 2>$TMPFILE + + # Check that 'fs reset' runs + ceph fs reset $FS_NAME --yes-i-really-mean-it + + # Check that creating a second FS fails by default + ceph osd pool create fs_metadata2 16 + ceph osd pool create fs_data2 16 + set +e + expect_false ceph fs new cephfs2 fs_metadata2 fs_data2 + set -e + + # Check that setting enable_multiple enables creation of second fs + ceph fs flag set enable_multiple true --yes-i-really-mean-it + ceph fs new cephfs2 fs_metadata2 fs_data2 + + # Clean up multi-fs stuff + fail_all_mds cephfs2 + ceph fs rm cephfs2 --yes-i-really-mean-it + ceph osd pool delete fs_metadata2 fs_metadata2 --yes-i-really-really-mean-it + ceph osd pool delete fs_data2 fs_data2 --yes-i-really-really-mean-it + + fail_all_mds $FS_NAME + + # Clean up to enable subsequent fs new tests + ceph fs rm $FS_NAME --yes-i-really-mean-it + + set +e + ceph fs new $FS_NAME fs_metadata mds-ec-pool --force 2>$TMPFILE + check_response 'erasure-code' $? 22 + ceph fs new $FS_NAME mds-ec-pool fs_data 2>$TMPFILE + check_response 'already used by filesystem' $? 22 + ceph fs new $FS_NAME mds-ec-pool fs_data --force 2>$TMPFILE + check_response 'erasure-code' $? 22 + ceph fs new $FS_NAME mds-ec-pool mds-ec-pool 2>$TMPFILE + check_response 'erasure-code' $? 22 + set -e + + # ... new create a cache tier in front of the EC pool... + ceph osd pool create mds-tier 2 + ceph osd tier add mds-ec-pool mds-tier + ceph osd tier set-overlay mds-ec-pool mds-tier + tier_poolnum=$(ceph osd dump | grep "pool.* 'mds-tier" | awk '{print $2;}') + + # Use of a readonly tier should be forbidden + ceph osd tier cache-mode mds-tier readonly --yes-i-really-mean-it + set +e + ceph fs new $FS_NAME fs_metadata mds-ec-pool --force 2>$TMPFILE + check_response 'has a write tier (mds-tier) that is configured to forward' $? 22 + set -e + + # Use of a writeback tier should enable FS creation + ceph osd tier cache-mode mds-tier writeback + ceph fs new $FS_NAME fs_metadata mds-ec-pool --force + + # While a FS exists using the tiered pools, I should not be allowed + # to remove the tier + set +e + ceph osd tier remove-overlay mds-ec-pool 2>$TMPFILE + check_response 'in use by CephFS' $? 16 + ceph osd tier remove mds-ec-pool mds-tier 2>$TMPFILE + check_response 'in use by CephFS' $? 16 + set -e + + fail_all_mds $FS_NAME + ceph fs rm $FS_NAME --yes-i-really-mean-it + + # ... but we should be forbidden from using the cache pool in the FS directly. + set +e + ceph fs new $FS_NAME fs_metadata mds-tier --force 2>$TMPFILE + check_response 'in use as a cache tier' $? 22 + ceph fs new $FS_NAME mds-tier fs_data 2>$TMPFILE + check_response 'already used by filesystem' $? 22 + ceph fs new $FS_NAME mds-tier fs_data --force 2>$TMPFILE + check_response 'in use as a cache tier' $? 22 + ceph fs new $FS_NAME mds-tier mds-tier 2>$TMPFILE + check_response 'already used by filesystem' $? 22 + ceph fs new $FS_NAME mds-tier mds-tier --force 2>$TMPFILE + check_response 'in use as a cache tier' $? 22 + set -e + + # Clean up tier + EC pools + ceph osd tier remove-overlay mds-ec-pool + ceph osd tier remove mds-ec-pool mds-tier + + # Create a FS using the 'cache' pool now that it's no longer a tier + ceph fs new $FS_NAME fs_metadata mds-tier --force + + # We should be forbidden from using this pool as a tier now that + # it's in use for CephFS + set +e + ceph osd tier add mds-ec-pool mds-tier 2>$TMPFILE + check_response 'in use by CephFS' $? 16 + set -e + + fail_all_mds $FS_NAME + ceph fs rm $FS_NAME --yes-i-really-mean-it + + # We should be permitted to use an EC pool with overwrites enabled + # as the data pool... + ceph osd pool set mds-ec-pool allow_ec_overwrites true + ceph fs new $FS_NAME fs_metadata mds-ec-pool --force 2>$TMPFILE + fail_all_mds $FS_NAME + ceph fs rm $FS_NAME --yes-i-really-mean-it + + # ...but not as the metadata pool + set +e + ceph fs new $FS_NAME mds-ec-pool fs_data 2>$TMPFILE + check_response 'already used by filesystem' $? 22 + ceph fs new $FS_NAME mds-ec-pool fs_data --force 2>$TMPFILE + check_response 'erasure-code' $? 22 + set -e + + ceph osd pool delete mds-ec-pool mds-ec-pool --yes-i-really-really-mean-it + + # Create a FS and check that we can subsequently add a cache tier to it + ceph fs new $FS_NAME fs_metadata fs_data --force + + # Adding overlay to FS pool should be permitted, RADOS clients handle this. + ceph osd tier add fs_metadata mds-tier + ceph osd tier cache-mode mds-tier writeback + ceph osd tier set-overlay fs_metadata mds-tier + + # Removing tier should be permitted because the underlying pool is + # replicated (#11504 case) + ceph osd tier cache-mode mds-tier proxy + ceph osd tier remove-overlay fs_metadata + ceph osd tier remove fs_metadata mds-tier + ceph osd pool delete mds-tier mds-tier --yes-i-really-really-mean-it + + # Clean up FS + fail_all_mds $FS_NAME + ceph fs rm $FS_NAME --yes-i-really-mean-it + + + + ceph mds stat + # ceph mds tell mds.a getmap + # ceph mds rm + # ceph mds rmfailed + # ceph mds set_state + + ceph osd pool delete fs_data fs_data --yes-i-really-really-mean-it + ceph osd pool delete fs_metadata fs_metadata --yes-i-really-really-mean-it +} + +function test_mon_mds_metadata() +{ + local nmons=$(ceph tell 'mon.*' version | grep -c 'version') + test "$nmons" -gt 0 + + ceph fs dump | + sed -nEe "s/^([0-9]+):.*'([a-z])' mds\\.([0-9]+)\\..*/\\1 \\2 \\3/p" | + while read gid id rank; do + ceph mds metadata ${gid} | grep '"hostname":' + ceph mds metadata ${id} | grep '"hostname":' + ceph mds metadata ${rank} | grep '"hostname":' + + local n=$(ceph tell 'mon.*' mds metadata ${id} | grep -c '"hostname":') + test "$n" -eq "$nmons" + done + + expect_false ceph mds metadata UNKNOWN +} + +function test_mon_mon() +{ + # print help message + ceph --help mon + # -h works even when some arguments are passed + ceph osd dump -h | grep 'osd dump' + ceph osd dump 123 -h | grep 'osd dump' + # no mon add/remove + ceph mon dump + ceph mon getmap -o $TEMP_DIR/monmap.$$ + [ -s $TEMP_DIR/monmap.$$ ] + + # ceph mon tell + first=$(ceph mon dump -f json | jq -r '.mons[0].name') + ceph tell mon.$first mon_status + + # test mon features + ceph mon feature ls + ceph mon feature set kraken --yes-i-really-mean-it + expect_false ceph mon feature set abcd + expect_false ceph mon feature set abcd --yes-i-really-mean-it + + # test elector + expect_failure $TEMP_DIR ceph mon add disallowed_leader $first + ceph mon set election_strategy disallow + ceph mon add disallowed_leader $first + ceph mon set election_strategy connectivity + ceph mon rm disallowed_leader $first + ceph mon set election_strategy classic + expect_failure $TEMP_DIR ceph mon rm disallowed_leader $first + + # test mon stat + # don't check output, just ensure it does not fail. + ceph mon stat + ceph mon stat -f json | jq '.' +} + +function test_mon_priority_and_weight() +{ + for i in 0 1 65535; do + ceph mon set-weight a $i + w=$(ceph mon dump --format=json-pretty 2>/dev/null | jq '.mons[0].weight') + [[ "$w" == "$i" ]] + done + + for i in -1 65536; do + expect_false ceph mon set-weight a $i + done +} + +function gen_secrets_file() +{ + # lets assume we can have the following types + # all - generates both cephx and lockbox, with mock dm-crypt key + # cephx - only cephx + # no_cephx - lockbox and dm-crypt, no cephx + # no_lockbox - dm-crypt and cephx, no lockbox + # empty - empty file + # empty_json - correct json, empty map + # bad_json - bad json :) + # + local t=$1 + if [[ -z "$t" ]]; then + t="all" + fi + + fn=$(mktemp $TEMP_DIR/secret.XXXXXX) + echo $fn + if [[ "$t" == "empty" ]]; then + return 0 + fi + + echo "{" > $fn + if [[ "$t" == "bad_json" ]]; then + echo "asd: ; }" >> $fn + return 0 + elif [[ "$t" == "empty_json" ]]; then + echo "}" >> $fn + return 0 + fi + + cephx_secret="\"cephx_secret\": \"$(ceph-authtool --gen-print-key)\"" + lb_secret="\"cephx_lockbox_secret\": \"$(ceph-authtool --gen-print-key)\"" + dmcrypt_key="\"dmcrypt_key\": \"$(ceph-authtool --gen-print-key)\"" + + if [[ "$t" == "all" ]]; then + echo "$cephx_secret,$lb_secret,$dmcrypt_key" >> $fn + elif [[ "$t" == "cephx" ]]; then + echo "$cephx_secret" >> $fn + elif [[ "$t" == "no_cephx" ]]; then + echo "$lb_secret,$dmcrypt_key" >> $fn + elif [[ "$t" == "no_lockbox" ]]; then + echo "$cephx_secret,$dmcrypt_key" >> $fn + else + echo "unknown gen_secrets_file() type \'$fn\'" + return 1 + fi + echo "}" >> $fn + return 0 +} + +function test_mon_osd_create_destroy() +{ + ceph osd new 2>&1 | grep 'EINVAL' + ceph osd new '' -1 2>&1 | grep 'EINVAL' + ceph osd new '' 10 2>&1 | grep 'EINVAL' + + old_maxosd=$(ceph osd getmaxosd | sed -e 's/max_osd = //' -e 's/ in epoch.*//') + + old_osds=$(ceph osd ls) + num_osds=$(ceph osd ls | wc -l) + + uuid=$(uuidgen) + id=$(ceph osd new $uuid 2>/dev/null) + + for i in $old_osds; do + [[ "$i" != "$id" ]] + done + + ceph osd find $id + + id2=`ceph osd new $uuid 2>/dev/null` + + [[ $id2 == $id ]] + + ceph osd new $uuid $id + + id3=$(ceph osd getmaxosd | sed -e 's/max_osd = //' -e 's/ in epoch.*//') + ceph osd new $uuid $((id3+1)) 2>&1 | grep EEXIST + + uuid2=$(uuidgen) + id2=$(ceph osd new $uuid2) + ceph osd find $id2 + [[ "$id2" != "$id" ]] + + ceph osd new $uuid $id2 2>&1 | grep EEXIST + ceph osd new $uuid2 $id2 + + # test with secrets + empty_secrets=$(gen_secrets_file "empty") + empty_json=$(gen_secrets_file "empty_json") + all_secrets=$(gen_secrets_file "all") + cephx_only=$(gen_secrets_file "cephx") + no_cephx=$(gen_secrets_file "no_cephx") + no_lockbox=$(gen_secrets_file "no_lockbox") + bad_json=$(gen_secrets_file "bad_json") + + # empty secrets should be idempotent + new_id=$(ceph osd new $uuid $id -i $empty_secrets) + [[ "$new_id" == "$id" ]] + + # empty json, thus empty secrets + new_id=$(ceph osd new $uuid $id -i $empty_json) + [[ "$new_id" == "$id" ]] + + ceph osd new $uuid $id -i $all_secrets 2>&1 | grep 'EEXIST' + + ceph osd rm $id + ceph osd rm $id2 + ceph osd setmaxosd $old_maxosd + + ceph osd new $uuid -i $no_cephx 2>&1 | grep 'EINVAL' + ceph osd new $uuid -i $no_lockbox 2>&1 | grep 'EINVAL' + + osds=$(ceph osd ls) + id=$(ceph osd new $uuid -i $all_secrets) + for i in $osds; do + [[ "$i" != "$id" ]] + done + + ceph osd find $id + + # validate secrets and dm-crypt are set + k=$(ceph auth get-key osd.$id --format=json-pretty 2>/dev/null | jq '.key') + s=$(cat $all_secrets | jq '.cephx_secret') + [[ $k == $s ]] + k=$(ceph auth get-key client.osd-lockbox.$uuid --format=json-pretty 2>/dev/null | \ + jq '.key') + s=$(cat $all_secrets | jq '.cephx_lockbox_secret') + [[ $k == $s ]] + ceph config-key exists dm-crypt/osd/$uuid/luks + + osds=$(ceph osd ls) + id2=$(ceph osd new $uuid2 -i $cephx_only) + for i in $osds; do + [[ "$i" != "$id2" ]] + done + + ceph osd find $id2 + k=$(ceph auth get-key osd.$id --format=json-pretty 2>/dev/null | jq '.key') + s=$(cat $all_secrets | jq '.cephx_secret') + [[ $k == $s ]] + expect_false ceph auth get-key client.osd-lockbox.$uuid2 + expect_false ceph config-key exists dm-crypt/osd/$uuid2/luks + + ceph osd destroy osd.$id2 --yes-i-really-mean-it + ceph osd destroy $id2 --yes-i-really-mean-it + ceph osd find $id2 + expect_false ceph auth get-key osd.$id2 + ceph osd dump | grep osd.$id2 | grep destroyed + + id3=$id2 + uuid3=$(uuidgen) + ceph osd new $uuid3 $id3 -i $all_secrets + ceph osd dump | grep osd.$id3 | expect_false grep destroyed + ceph auth get-key client.osd-lockbox.$uuid3 + ceph auth get-key osd.$id3 + ceph config-key exists dm-crypt/osd/$uuid3/luks + + ceph osd purge-new osd.$id3 --yes-i-really-mean-it + expect_false ceph osd find $id2 + expect_false ceph auth get-key osd.$id2 + expect_false ceph auth get-key client.osd-lockbox.$uuid3 + expect_false ceph config-key exists dm-crypt/osd/$uuid3/luks + ceph osd purge osd.$id3 --yes-i-really-mean-it + ceph osd purge-new osd.$id3 --yes-i-really-mean-it # idempotent + + ceph osd purge osd.$id --yes-i-really-mean-it + ceph osd purge 123456 --yes-i-really-mean-it + expect_false ceph osd find $id + expect_false ceph auth get-key osd.$id + expect_false ceph auth get-key client.osd-lockbox.$uuid + expect_false ceph config-key exists dm-crypt/osd/$uuid/luks + + rm $empty_secrets $empty_json $all_secrets $cephx_only \ + $no_cephx $no_lockbox $bad_json + + for i in $(ceph osd ls); do + [[ "$i" != "$id" ]] + [[ "$i" != "$id2" ]] + [[ "$i" != "$id3" ]] + done + + [[ "$(ceph osd ls | wc -l)" == "$num_osds" ]] + ceph osd setmaxosd $old_maxosd + +} + +function test_mon_config_key() +{ + key=asdfasdfqwerqwreasdfuniquesa123df + ceph config-key list | grep -c $key | grep 0 + ceph config-key get $key | grep -c bar | grep 0 + ceph config-key set $key bar + ceph config-key get $key | grep bar + ceph config-key list | grep -c $key | grep 1 + ceph config-key dump | grep $key | grep bar + ceph config-key rm $key + expect_false ceph config-key get $key + ceph config-key list | grep -c $key | grep 0 + ceph config-key dump | grep -c $key | grep 0 +} + +function test_mon_osd() +{ + # + # osd blocklist + # + bl=192.168.0.1:0/1000 + ceph osd blocklist add $bl + ceph osd blocklist ls | grep $bl + ceph osd blocklist ls --format=json-pretty | sed 's/\\\//\//' | grep $bl + ceph osd dump --format=json-pretty | grep $bl + ceph osd dump | grep $bl + ceph osd blocklist rm $bl + ceph osd blocklist ls | expect_false grep $bl + + bl=192.168.0.1 + # test without nonce, invalid nonce + ceph osd blocklist add $bl + ceph osd blocklist ls | grep $bl + ceph osd blocklist rm $bl + ceph osd blocklist ls | expect_false grep $bl + expect_false "ceph osd blocklist add $bl/-1" + expect_false "ceph osd blocklist add $bl/foo" + + # test with invalid address + expect_false "ceph osd blocklist add 1234.56.78.90/100" + + # test range blocklisting + bl=192.168.0.1:0/24 + ceph osd blocklist range add $bl + ceph osd blocklist ls | grep $bl + ceph osd blocklist range rm $bl + ceph osd blocklist ls | expect_false grep $bl + bad_bl=192.168.0.1/33 + expect_false ceph osd blocklist range add $bad_bl + + # Test `clear` + ceph osd blocklist add $bl + ceph osd blocklist ls | grep $bl + ceph osd blocklist clear + ceph osd blocklist ls | expect_false grep $bl + + # deprecated syntax? + ceph osd blacklist ls + + # + # osd crush + # + ceph osd crush reweight-all + ceph osd crush tunables legacy + ceph osd crush show-tunables | grep argonaut + ceph osd crush tunables bobtail + ceph osd crush show-tunables | grep bobtail + ceph osd crush tunables firefly + ceph osd crush show-tunables | grep firefly + + ceph osd crush set-tunable straw_calc_version 0 + ceph osd crush get-tunable straw_calc_version | grep 0 + ceph osd crush set-tunable straw_calc_version 1 + ceph osd crush get-tunable straw_calc_version | grep 1 + + # + # require-min-compat-client + expect_false ceph osd set-require-min-compat-client dumpling # firefly tunables + ceph osd get-require-min-compat-client | grep luminous + ceph osd dump | grep 'require_min_compat_client luminous' + + # + # osd scrub + # + + # blocking + ceph osd scrub 0 --block + ceph osd deep-scrub 0 --block + + # how do I tell when these are done? + ceph osd scrub 0 + ceph osd deep-scrub 0 + ceph osd repair 0 + + # pool scrub, force-recovery/backfill + pool_names=`rados lspools` + for pool_name in $pool_names + do + ceph osd pool scrub $pool_name + ceph osd pool deep-scrub $pool_name + ceph osd pool repair $pool_name + ceph osd pool force-recovery $pool_name + ceph osd pool cancel-force-recovery $pool_name + ceph osd pool force-backfill $pool_name + ceph osd pool cancel-force-backfill $pool_name + done + + for f in noup nodown noin noout noscrub nodeep-scrub nobackfill \ + norebalance norecover notieragent noautoscale + do + ceph osd set $f + ceph osd unset $f + done + expect_false ceph osd set bogus + expect_false ceph osd unset bogus + for f in sortbitwise recover_deletes require_jewel_osds \ + require_kraken_osds + do + expect_false ceph osd set $f + expect_false ceph osd unset $f + done + ceph osd require-osd-release reef + # can't lower + expect_false ceph osd require-osd-release quincy + expect_false ceph osd require-osd-release pacific + # these are no-ops but should succeed. + + ceph osd set noup + ceph osd down 0 + ceph osd dump | grep 'osd.0 down' + ceph osd unset noup + max_run=1000 + for ((i=0; i < $max_run; i++)); do + if ! ceph osd dump | grep 'osd.0 up'; then + echo "waiting for osd.0 to come back up ($i/$max_run)" + sleep 1 + else + break + fi + done + ceph osd dump | grep 'osd.0 up' + + ceph osd dump | grep 'osd.0 up' + # ceph osd find expects the OsdName, so both ints and osd.n should work. + ceph osd find 1 + ceph osd find osd.1 + expect_false ceph osd find osd.xyz + expect_false ceph osd find xyz + expect_false ceph osd find 0.1 + ceph --format plain osd find 1 # falls back to json-pretty + if [ `uname` == Linux ]; then + ceph osd metadata 1 | grep 'distro' + ceph --format plain osd metadata 1 | grep 'distro' # falls back to json-pretty + fi + ceph osd out 0 + ceph osd dump | grep 'osd.0.*out' + ceph osd in 0 + ceph osd dump | grep 'osd.0.*in' + ceph osd find 0 + + ceph osd info 0 + ceph osd info osd.0 + expect_false ceph osd info osd.xyz + expect_false ceph osd info xyz + expect_false ceph osd info 42 + expect_false ceph osd info osd.42 + + ceph osd info + info_json=$(ceph osd info --format=json | jq -cM '.') + dump_json=$(ceph osd dump --format=json | jq -cM '.osds') + if [[ "${info_json}" != "${dump_json}" ]]; then + echo "waiting for OSDs to settle" + sleep 10 + info_json=$(ceph osd info --format=json | jq -cM '.') + dump_json=$(ceph osd dump --format=json | jq -cM '.osds') + [[ "${info_json}" == "${dump_json}" ]] + fi + + info_json=$(ceph osd info 0 --format=json | jq -cM '.') + dump_json=$(ceph osd dump --format=json | \ + jq -cM '.osds[] | select(.osd == 0)') + [[ "${info_json}" == "${dump_json}" ]] + + info_plain="$(ceph osd info)" + dump_plain="$(ceph osd dump | grep '^osd')" + [[ "${info_plain}" == "${dump_plain}" ]] + + info_plain="$(ceph osd info 0)" + dump_plain="$(ceph osd dump | grep '^osd.0')" + [[ "${info_plain}" == "${dump_plain}" ]] + + ceph osd add-nodown 0 1 + ceph health detail | grep 'NODOWN' + ceph osd rm-nodown 0 1 + ! ceph health detail | grep 'NODOWN' + + ceph osd out 0 # so we can mark it as noin later + ceph osd add-noin 0 + ceph health detail | grep 'NOIN' + ceph osd rm-noin 0 + ! ceph health detail | grep 'NOIN' + ceph osd in 0 + + ceph osd add-noout 0 + ceph health detail | grep 'NOOUT' + ceph osd rm-noout 0 + ! ceph health detail | grep 'NOOUT' + + # test osd id parse + expect_false ceph osd add-noup 797er + expect_false ceph osd add-nodown u9uwer + expect_false ceph osd add-noin 78~15 + + expect_false ceph osd rm-noup 1234567 + expect_false ceph osd rm-nodown fsadf7 + expect_false ceph osd rm-noout 790-fd + + ids=`ceph osd ls-tree default` + for osd in $ids + do + ceph osd add-nodown $osd + ceph osd add-noout $osd + done + ceph -s | grep 'NODOWN' + ceph -s | grep 'NOOUT' + ceph osd rm-nodown any + ceph osd rm-noout all + ! ceph -s | grep 'NODOWN' + ! ceph -s | grep 'NOOUT' + + # test crush node flags + ceph osd add-noup osd.0 + ceph osd add-nodown osd.0 + ceph osd add-noin osd.0 + ceph osd add-noout osd.0 + ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep "osd.0" + ceph osd rm-noup osd.0 + ceph osd rm-nodown osd.0 + ceph osd rm-noin osd.0 + ceph osd rm-noout osd.0 + ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep "osd.0" + + ceph osd crush add-bucket foo host root=default + ceph osd add-noup foo + ceph osd add-nodown foo + ceph osd add-noin foo + ceph osd add-noout foo + ceph osd dump -f json-pretty | jq ".crush_node_flags" | grep foo + ceph osd rm-noup foo + ceph osd rm-nodown foo + ceph osd rm-noin foo + ceph osd rm-noout foo + ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep foo + ceph osd add-noup foo + ceph osd dump -f json-pretty | jq ".crush_node_flags" | grep foo + ceph osd crush rm foo + ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep foo + + ceph osd set-group noup osd.0 + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup' + ceph osd set-group noup,nodown osd.0 + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'nodown' + ceph osd set-group noup,nodown,noin osd.0 + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noin' + ceph osd set-group noup,nodown,noin,noout osd.0 + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noin' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noout' + ceph osd unset-group noup osd.0 + ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noin' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noout' + ceph osd unset-group noup,nodown osd.0 + ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup\|nodown' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noin' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noout' + ceph osd unset-group noup,nodown,noin osd.0 + ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup\|nodown\|noin' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noout' + ceph osd unset-group noup,nodown,noin,noout osd.0 + ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup\|nodown\|noin\|noout' + + ceph osd set-group noup,nodown,noin,noout osd.0 osd.1 + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noin' + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noout' + ceph osd dump -f json-pretty | jq ".osds[1].state" | grep 'noup' + ceph osd dump -f json-pretty | jq ".osds[1].state" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".osds[1].state" | grep 'noin' + ceph osd dump -f json-pretty | jq ".osds[1].state" | grep 'noout' + ceph osd unset-group noup,nodown,noin,noout osd.0 osd.1 + ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup\|nodown\|noin\|noout' + ceph osd dump -f json-pretty | jq ".osds[1].state" | expect_false grep 'noup\|nodown\|noin\|noout' + + ceph osd set-group noup all + ceph osd dump -f json-pretty | jq ".osds[0].state" | grep 'noup' + ceph osd unset-group noup all + ceph osd dump -f json-pretty | jq ".osds[0].state" | expect_false grep 'noup' + + # crush node flags + ceph osd crush add-bucket foo host root=default + ceph osd set-group noup foo + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noup' + ceph osd set-group noup,nodown foo + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noup' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'nodown' + ceph osd set-group noup,nodown,noin foo + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noup' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin' + ceph osd set-group noup,nodown,noin,noout foo + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noup' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout' + + ceph osd unset-group noup foo + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | expect_false grep 'noup' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout' + ceph osd unset-group noup,nodown foo + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | expect_false grep 'noup\|nodown' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout' + ceph osd unset-group noup,nodown,noin foo + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | expect_false grep 'noup\|nodown\|noin' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout' + ceph osd unset-group noup,nodown,noin,noout foo + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | expect_false grep 'noup\|nodown\|noin\|noout' + + ceph osd set-group noin,noout foo + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout' + ceph osd unset-group noin,noout foo + ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep 'foo' + + ceph osd set-group noup,nodown,noin,noout foo + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noup' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noin' + ceph osd dump -f json-pretty | jq ".crush_node_flags.foo" | grep 'noout' + ceph osd crush rm foo + ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep 'foo' + + # test device class flags + osd_0_device_class=$(ceph osd crush get-device-class osd.0) + ceph osd set-group noup $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup' + ceph osd set-group noup,nodown $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown' + ceph osd set-group noup,nodown,noin $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin' + ceph osd set-group noup,nodown,noin,noout $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noup' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout' + + ceph osd unset-group noup $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'nodown' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout' + ceph osd unset-group noup,nodown $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup\|nodown' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout' + ceph osd unset-group noup,nodown,noin $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup\|nodown\|noin' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout' + ceph osd unset-group noup,nodown,noin,noout $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | expect_false grep 'noup\|nodown\|noin\|noout' + + ceph osd set-group noin,noout $osd_0_device_class + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noin' + ceph osd dump -f json-pretty | jq ".device_class_flags.$osd_0_device_class" | grep 'noout' + ceph osd unset-group noin,noout $osd_0_device_class + ceph osd dump -f json-pretty | jq ".crush_node_flags" | expect_false grep $osd_0_device_class + + # make sure mark out preserves weight + ceph osd reweight osd.0 .5 + ceph osd dump | grep ^osd.0 | grep 'weight 0.5' + ceph osd out 0 + ceph osd in 0 + ceph osd dump | grep ^osd.0 | grep 'weight 0.5' + + ceph osd getmap -o $f + [ -s $f ] + rm $f + save=$(ceph osd getmaxosd | sed -e 's/max_osd = //' -e 's/ in epoch.*//') + [ "$save" -gt 0 ] + ceph osd setmaxosd $((save - 1)) 2>&1 | grep 'EBUSY' + ceph osd setmaxosd 10 + ceph osd getmaxosd | grep 'max_osd = 10' + ceph osd setmaxosd $save + ceph osd getmaxosd | grep "max_osd = $save" + + for id in `ceph osd ls` ; do + retry_eagain 5 map_enxio_to_eagain ceph tell osd.$id version + done + + ceph osd rm 0 2>&1 | grep 'EBUSY' + + local old_osds=$(echo $(ceph osd ls)) + id=`ceph osd create` + ceph osd find $id + ceph osd lost $id --yes-i-really-mean-it + expect_false ceph osd setmaxosd $id + local new_osds=$(echo $(ceph osd ls)) + for id in $(echo $new_osds | sed -e "s/$old_osds//") ; do + ceph osd rm $id + done + + uuid=`uuidgen` + id=`ceph osd create $uuid` + id2=`ceph osd create $uuid` + [ "$id" = "$id2" ] + ceph osd rm $id + + ceph --help osd + + # reset max_osd. + ceph osd setmaxosd $id + ceph osd getmaxosd | grep "max_osd = $save" + local max_osd=$save + + ceph osd create $uuid 0 2>&1 | grep 'EINVAL' + ceph osd create $uuid $((max_osd - 1)) 2>&1 | grep 'EINVAL' + + id=`ceph osd create $uuid $max_osd` + [ "$id" = "$max_osd" ] + ceph osd find $id + max_osd=$((max_osd + 1)) + ceph osd getmaxosd | grep "max_osd = $max_osd" + + ceph osd create $uuid $((id - 1)) 2>&1 | grep 'EEXIST' + ceph osd create $uuid $((id + 1)) 2>&1 | grep 'EEXIST' + id2=`ceph osd create $uuid` + [ "$id" = "$id2" ] + id2=`ceph osd create $uuid $id` + [ "$id" = "$id2" ] + + uuid=`uuidgen` + local gap_start=$max_osd + id=`ceph osd create $uuid $((gap_start + 100))` + [ "$id" = "$((gap_start + 100))" ] + max_osd=$((id + 1)) + ceph osd getmaxosd | grep "max_osd = $max_osd" + + ceph osd create $uuid $gap_start 2>&1 | grep 'EEXIST' + + # + # When CEPH_CLI_TEST_DUP_COMMAND is set, osd create + # is repeated and consumes two osd id, not just one. + # + local next_osd=$gap_start + id=`ceph osd create $(uuidgen)` + [ "$id" = "$next_osd" ] + + next_osd=$((id + 1)) + id=`ceph osd create $(uuidgen) $next_osd` + [ "$id" = "$next_osd" ] + + local new_osds=$(echo $(ceph osd ls)) + for id in $(echo $new_osds | sed -e "s/$old_osds//") ; do + [ $id -ge $save ] + ceph osd rm $id + done + ceph osd setmaxosd $save + + ceph osd ls + ceph osd pool create data 16 + ceph osd pool application enable data rados + ceph osd lspools | grep data + ceph osd map data foo | grep 'pool.*data.*object.*foo.*pg.*up.*acting' + ceph osd map data foo namespace| grep 'pool.*data.*object.*namespace/foo.*pg.*up.*acting' + ceph osd pool delete data data --yes-i-really-really-mean-it + + ceph osd pause + ceph osd dump | grep 'flags.*pauserd,pausewr' + ceph osd unpause + + ceph osd tree + ceph osd tree up + ceph osd tree down + ceph osd tree in + ceph osd tree out + ceph osd tree destroyed + ceph osd tree up in + ceph osd tree up out + ceph osd tree down in + ceph osd tree down out + ceph osd tree out down + expect_false ceph osd tree up down + expect_false ceph osd tree up destroyed + expect_false ceph osd tree down destroyed + expect_false ceph osd tree up down destroyed + expect_false ceph osd tree in out + expect_false ceph osd tree up foo + + ceph osd metadata + ceph osd count-metadata os + ceph osd versions + + ceph osd perf + ceph osd blocked-by + + ceph osd stat | grep up +} + +function test_mon_crush() +{ + f=$TEMP_DIR/map.$$ + epoch=$(ceph osd getcrushmap -o $f 2>&1 | tail -n1) + [ -s $f ] + [ "$epoch" -gt 1 ] + nextepoch=$(( $epoch + 1 )) + echo epoch $epoch nextepoch $nextepoch + rm -f $f.epoch + expect_false ceph osd setcrushmap $nextepoch -i $f + gotepoch=$(ceph osd setcrushmap $epoch -i $f 2>&1 | tail -n1) + echo gotepoch $gotepoch + [ "$gotepoch" -eq "$nextepoch" ] + # should be idempotent + gotepoch=$(ceph osd setcrushmap $epoch -i $f 2>&1 | tail -n1) + echo epoch $gotepoch + [ "$gotepoch" -eq "$nextepoch" ] + rm $f +} + +function test_mon_osd_pool() +{ + # + # osd pool + # + ceph osd pool create data 16 + ceph osd pool application enable data rados + ceph osd pool mksnap data datasnap + rados -p data lssnap | grep datasnap + ceph osd pool rmsnap data datasnap + expect_false ceph osd pool rmsnap pool_fake snapshot + ceph osd pool delete data data --yes-i-really-really-mean-it + + ceph osd pool create data2 16 + ceph osd pool application enable data2 rados + ceph osd pool rename data2 data3 + ceph osd lspools | grep data3 + ceph osd pool delete data3 data3 --yes-i-really-really-mean-it + + ceph osd pool create replicated 16 16 replicated + ceph osd pool create replicated 1 16 replicated + ceph osd pool create replicated 16 16 # default is replicated + ceph osd pool create replicated 16 # default is replicated, pgp_num = pg_num + ceph osd pool application enable replicated rados + # should fail because the type is not the same + expect_false ceph osd pool create replicated 16 16 erasure + ceph osd lspools | grep replicated + ceph osd pool create ec_test 1 1 erasure + ceph osd pool application enable ec_test rados + set +e + ceph osd count-metadata osd_objectstore | grep 'bluestore' + if [ $? -eq 1 ]; then # enable ec_overwrites on non-bluestore pools should fail + ceph osd pool set ec_test allow_ec_overwrites true >& $TMPFILE + check_response "pool must only be stored on bluestore for scrubbing to work" $? 22 + else + ceph osd pool set ec_test allow_ec_overwrites true || return 1 + expect_false ceph osd pool set ec_test allow_ec_overwrites false + fi + set -e + ceph osd pool delete replicated replicated --yes-i-really-really-mean-it + ceph osd pool delete ec_test ec_test --yes-i-really-really-mean-it + + # test create pool with rule + ceph osd erasure-code-profile set foo foo + ceph osd erasure-code-profile ls | grep foo + ceph osd crush rule create-erasure foo foo + ceph osd pool create erasure 16 16 erasure foo + expect_false ceph osd erasure-code-profile rm foo + ceph osd pool delete erasure erasure --yes-i-really-really-mean-it + ceph osd crush rule rm foo + ceph osd erasure-code-profile rm foo + + # autoscale mode + ceph osd pool create modeon --autoscale-mode=on + ceph osd dump | grep modeon | grep 'autoscale_mode on' + ceph osd pool create modewarn --autoscale-mode=warn + ceph osd dump | grep modewarn | grep 'autoscale_mode warn' + ceph osd pool create modeoff --autoscale-mode=off + ceph osd dump | grep modeoff | grep 'autoscale_mode off' + ceph osd pool delete modeon modeon --yes-i-really-really-mean-it + ceph osd pool delete modewarn modewarn --yes-i-really-really-mean-it + ceph osd pool delete modeoff modeoff --yes-i-really-really-mean-it +} + +function test_mon_osd_pool_quota() +{ + # + # test osd pool set/get quota + # + + # create tmp pool + ceph osd pool create tmp-quota-pool 32 + ceph osd pool application enable tmp-quota-pool rados + # + # set erroneous quotas + # + expect_false ceph osd pool set-quota tmp-quota-pool max_fooness 10 + expect_false ceph osd pool set-quota tmp-quota-pool max_bytes -1 + expect_false ceph osd pool set-quota tmp-quota-pool max_objects aaa + # + # set valid quotas + # + ceph osd pool set-quota tmp-quota-pool max_bytes 10 + ceph osd pool set-quota tmp-quota-pool max_objects 10M + # + # get quotas in json-pretty format + # + ceph osd pool get-quota tmp-quota-pool --format=json-pretty | \ + grep '"quota_max_objects":.*10000000' + ceph osd pool get-quota tmp-quota-pool --format=json-pretty | \ + grep '"quota_max_bytes":.*10' + # + # get quotas + # + ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*10 B' + ceph osd pool get-quota tmp-quota-pool | grep 'max objects.*10.*M objects' + # + # set valid quotas with unit prefix + # + ceph osd pool set-quota tmp-quota-pool max_bytes 10K + # + # get quotas + # + ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*10 Ki' + # + # set valid quotas with unit prefix + # + ceph osd pool set-quota tmp-quota-pool max_bytes 10Ki + # + # get quotas + # + ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*10 Ki' + # + # + # reset pool quotas + # + ceph osd pool set-quota tmp-quota-pool max_bytes 0 + ceph osd pool set-quota tmp-quota-pool max_objects 0 + # + # test N/A quotas + # + ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*N/A' + ceph osd pool get-quota tmp-quota-pool | grep 'max objects.*N/A' + # + # cleanup tmp pool + ceph osd pool delete tmp-quota-pool tmp-quota-pool --yes-i-really-really-mean-it +} + +function test_mon_pg() +{ + # Make sure we start healthy. + wait_for_health_ok + + ceph pg debug unfound_objects_exist + ceph pg debug degraded_pgs_exist + ceph pg deep-scrub 1.0 + ceph pg dump + ceph pg dump pgs_brief --format=json + ceph pg dump pgs --format=json + ceph pg dump pools --format=json + ceph pg dump osds --format=json + ceph pg dump sum --format=json + ceph pg dump all --format=json + ceph pg dump pgs_brief osds --format=json + ceph pg dump pools osds pgs_brief --format=json + ceph pg dump_json + ceph pg dump_pools_json + ceph pg dump_stuck inactive + ceph pg dump_stuck unclean + ceph pg dump_stuck stale + ceph pg dump_stuck undersized + ceph pg dump_stuck degraded + ceph pg ls + ceph pg ls 1 + ceph pg ls stale + expect_false ceph pg ls scrubq + ceph pg ls active stale repair recovering + ceph pg ls 1 active + ceph pg ls 1 active stale + ceph pg ls-by-primary osd.0 + ceph pg ls-by-primary osd.0 1 + ceph pg ls-by-primary osd.0 active + ceph pg ls-by-primary osd.0 active stale + ceph pg ls-by-primary osd.0 1 active stale + ceph pg ls-by-osd osd.0 + ceph pg ls-by-osd osd.0 1 + ceph pg ls-by-osd osd.0 active + ceph pg ls-by-osd osd.0 active stale + ceph pg ls-by-osd osd.0 1 active stale + ceph pg ls-by-pool rbd + ceph pg ls-by-pool rbd active stale + # can't test this... + # ceph pg force_create_pg + ceph pg getmap -o $TEMP_DIR/map.$$ + [ -s $TEMP_DIR/map.$$ ] + ceph pg map 1.0 | grep acting + ceph pg repair 1.0 + ceph pg scrub 1.0 + + ceph osd set-full-ratio .962 + ceph osd dump | grep '^full_ratio 0.962' + ceph osd set-backfillfull-ratio .912 + ceph osd dump | grep '^backfillfull_ratio 0.912' + ceph osd set-nearfull-ratio .892 + ceph osd dump | grep '^nearfull_ratio 0.892' + + # Check health status + ceph osd set-nearfull-ratio .913 + ceph health -f json | grep OSD_OUT_OF_ORDER_FULL + ceph health detail | grep OSD_OUT_OF_ORDER_FULL + ceph osd set-nearfull-ratio .892 + ceph osd set-backfillfull-ratio .963 + ceph health -f json | grep OSD_OUT_OF_ORDER_FULL + ceph health detail | grep OSD_OUT_OF_ORDER_FULL + ceph osd set-backfillfull-ratio .912 + + # Check injected full results + $SUDO ceph tell osd.0 injectfull nearfull + wait_for_health "OSD_NEARFULL" + ceph health detail | grep "osd.0 is near full" + $SUDO ceph tell osd.0 injectfull none + wait_for_health_ok + + $SUDO ceph tell osd.1 injectfull backfillfull + wait_for_health "OSD_BACKFILLFULL" + ceph health detail | grep "osd.1 is backfill full" + $SUDO ceph tell osd.1 injectfull none + wait_for_health_ok + + $SUDO ceph tell osd.2 injectfull failsafe + # failsafe and full are the same as far as the monitor is concerned + wait_for_health "OSD_FULL" + ceph health detail | grep "osd.2 is full" + $SUDO ceph tell osd.2 injectfull none + wait_for_health_ok + + $SUDO ceph tell osd.0 injectfull full + wait_for_health "OSD_FULL" + ceph health detail | grep "osd.0 is full" + $SUDO ceph tell osd.0 injectfull none + wait_for_health_ok + + ceph pg stat | grep 'pgs:' + ceph pg 1.0 query + ceph tell 1.0 query + first=$(ceph mon dump -f json | jq -r '.mons[0].name') + ceph tell mon.$first quorum enter + ceph quorum_status + ceph report | grep osd_stats + ceph status + ceph -s + + # + # tell osd version + # + ceph tell osd.0 version + expect_false ceph tell osd.9999 version + expect_false ceph tell osd.foo version + + # back to pg stuff + + ceph tell osd.0 dump_pg_recovery_stats | grep Started + + ceph osd reweight 0 0.9 + expect_false ceph osd reweight 0 -1 + ceph osd reweight osd.0 1 + + ceph osd primary-affinity osd.0 .9 + expect_false ceph osd primary-affinity osd.0 -2 + expect_false ceph osd primary-affinity osd.9999 .5 + ceph osd primary-affinity osd.0 1 + + ceph osd pool set rbd size 2 + ceph osd pg-temp 1.0 0 1 + ceph osd pg-temp 1.0 osd.1 osd.0 + expect_false ceph osd pg-temp 1.0 0 1 2 + expect_false ceph osd pg-temp asdf qwer + expect_false ceph osd pg-temp 1.0 asdf + ceph osd pg-temp 1.0 # cleanup pg-temp + + ceph pg repeer 1.0 + expect_false ceph pg repeer 0.0 # pool 0 shouldn't exist anymore + + # don't test ceph osd primary-temp for now +} + +function test_mon_osd_pool_set() +{ + TEST_POOL_GETSET=pool_getset + expect_false ceph osd pool create $TEST_POOL_GETSET 1 --target_size_ratio -0.3 + expect_true ceph osd pool create $TEST_POOL_GETSET 1 --target_size_ratio 1 + ceph osd pool application enable $TEST_POOL_GETSET rados + ceph osd pool set $TEST_POOL_GETSET pg_autoscale_mode off + wait_for_clean + ceph osd pool get $TEST_POOL_GETSET all + + for s in pg_num pgp_num size min_size crush_rule target_size_ratio; do + ceph osd pool get $TEST_POOL_GETSET $s + done + + old_size=$(ceph osd pool get $TEST_POOL_GETSET size | sed -e 's/size: //') + (( new_size = old_size + 1 )) + ceph osd pool set $TEST_POOL_GETSET size $new_size --yes-i-really-mean-it + ceph osd pool get $TEST_POOL_GETSET size | grep "size: $new_size" + ceph osd pool set $TEST_POOL_GETSET size $old_size --yes-i-really-mean-it + + ceph osd pool create pool_erasure 1 1 erasure + ceph osd pool application enable pool_erasure rados + wait_for_clean + set +e + ceph osd pool set pool_erasure size 4444 2>$TMPFILE + check_response 'not change the size' + set -e + ceph osd pool get pool_erasure erasure_code_profile + ceph osd pool rm pool_erasure pool_erasure --yes-i-really-really-mean-it + + for flag in nodelete nopgchange nosizechange write_fadvise_dontneed noscrub nodeep-scrub bulk; do + ceph osd pool set $TEST_POOL_GETSET $flag false + ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: false" + ceph osd pool set $TEST_POOL_GETSET $flag true + ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: true" + ceph osd pool set $TEST_POOL_GETSET $flag 1 + ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: true" + ceph osd pool set $TEST_POOL_GETSET $flag 0 + ceph osd pool get $TEST_POOL_GETSET $flag | grep "$flag: false" + expect_false ceph osd pool set $TEST_POOL_GETSET $flag asdf + expect_false ceph osd pool set $TEST_POOL_GETSET $flag 2 + done + + ceph osd pool get $TEST_POOL_GETSET scrub_min_interval | expect_false grep '.' + ceph osd pool set $TEST_POOL_GETSET scrub_min_interval 123456 + ceph osd pool get $TEST_POOL_GETSET scrub_min_interval | grep 'scrub_min_interval: 123456' + ceph osd pool set $TEST_POOL_GETSET scrub_min_interval 0 + ceph osd pool get $TEST_POOL_GETSET scrub_min_interval | expect_false grep '.' + + ceph osd pool get $TEST_POOL_GETSET scrub_max_interval | expect_false grep '.' + ceph osd pool set $TEST_POOL_GETSET scrub_max_interval 123456 + ceph osd pool get $TEST_POOL_GETSET scrub_max_interval | grep 'scrub_max_interval: 123456' + ceph osd pool set $TEST_POOL_GETSET scrub_max_interval 0 + ceph osd pool get $TEST_POOL_GETSET scrub_max_interval | expect_false grep '.' + + ceph osd pool get $TEST_POOL_GETSET deep_scrub_interval | expect_false grep '.' + ceph osd pool set $TEST_POOL_GETSET deep_scrub_interval 123456 + ceph osd pool get $TEST_POOL_GETSET deep_scrub_interval | grep 'deep_scrub_interval: 123456' + ceph osd pool set $TEST_POOL_GETSET deep_scrub_interval 0 + ceph osd pool get $TEST_POOL_GETSET deep_scrub_interval | expect_false grep '.' + + ceph osd pool get $TEST_POOL_GETSET recovery_priority | expect_false grep '.' + ceph osd pool set $TEST_POOL_GETSET recovery_priority 5 + ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: 5' + ceph osd pool set $TEST_POOL_GETSET recovery_priority -5 + ceph osd pool get $TEST_POOL_GETSET recovery_priority | grep 'recovery_priority: -5' + ceph osd pool set $TEST_POOL_GETSET recovery_priority 0 + ceph osd pool get $TEST_POOL_GETSET recovery_priority | expect_false grep '.' + expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority -11 + expect_false ceph osd pool set $TEST_POOL_GETSET recovery_priority 11 + + ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | expect_false grep '.' + ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 5 + ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | grep 'recovery_op_priority: 5' + ceph osd pool set $TEST_POOL_GETSET recovery_op_priority 0 + ceph osd pool get $TEST_POOL_GETSET recovery_op_priority | expect_false grep '.' + + ceph osd pool get $TEST_POOL_GETSET scrub_priority | expect_false grep '.' + ceph osd pool set $TEST_POOL_GETSET scrub_priority 5 + ceph osd pool get $TEST_POOL_GETSET scrub_priority | grep 'scrub_priority: 5' + ceph osd pool set $TEST_POOL_GETSET scrub_priority 0 + ceph osd pool get $TEST_POOL_GETSET scrub_priority | expect_false grep '.' + + expect_false ceph osd pool set $TEST_POOL_GETSET target_size_ratio -3 + expect_false ceph osd pool set $TEST_POOL_GETSET target_size_ratio abc + expect_true ceph osd pool set $TEST_POOL_GETSET target_size_ratio 0.1 + expect_true ceph osd pool set $TEST_POOL_GETSET target_size_ratio 1 + ceph osd pool get $TEST_POOL_GETSET target_size_ratio | grep 'target_size_ratio: 1' + + ceph osd pool set $TEST_POOL_GETSET nopgchange 1 + expect_false ceph osd pool set $TEST_POOL_GETSET pg_num 10 + expect_false ceph osd pool set $TEST_POOL_GETSET pgp_num 10 + ceph osd pool set $TEST_POOL_GETSET nopgchange 0 + ceph osd pool set $TEST_POOL_GETSET pg_num 10 + wait_for_clean + ceph osd pool set $TEST_POOL_GETSET pgp_num 10 + expect_false ceph osd pool set $TEST_POOL_GETSET pg_num 0 + expect_false ceph osd pool set $TEST_POOL_GETSET pgp_num 0 + + old_pgs=$(ceph osd pool get $TEST_POOL_GETSET pg_num | sed -e 's/pg_num: //') + new_pgs=$(($old_pgs + $(ceph osd stat --format json | jq '.num_osds') * 32)) + ceph osd pool set $TEST_POOL_GETSET pg_num $new_pgs + ceph osd pool set $TEST_POOL_GETSET pgp_num $new_pgs + wait_for_clean + + ceph osd pool set $TEST_POOL_GETSET nosizechange 1 + expect_false ceph osd pool set $TEST_POOL_GETSET size 2 + expect_false ceph osd pool set $TEST_POOL_GETSET min_size 2 + ceph osd pool set $TEST_POOL_GETSET nosizechange 0 + ceph osd pool set $TEST_POOL_GETSET size 2 + wait_for_clean + ceph osd pool set $TEST_POOL_GETSET min_size 2 + + expect_false ceph osd pool set $TEST_POOL_GETSET hashpspool 0 + ceph osd pool set $TEST_POOL_GETSET hashpspool 0 --yes-i-really-mean-it + + expect_false ceph osd pool set $TEST_POOL_GETSET hashpspool 1 + ceph osd pool set $TEST_POOL_GETSET hashpspool 1 --yes-i-really-mean-it + + ceph osd pool get rbd crush_rule | grep 'crush_rule: ' + + ceph osd pool get $TEST_POOL_GETSET compression_mode | expect_false grep '.' + ceph osd pool set $TEST_POOL_GETSET compression_mode aggressive + ceph osd pool get $TEST_POOL_GETSET compression_mode | grep 'aggressive' + ceph osd pool set $TEST_POOL_GETSET compression_mode unset + ceph osd pool get $TEST_POOL_GETSET compression_mode | expect_false grep '.' + + ceph osd pool get $TEST_POOL_GETSET compression_algorithm | expect_false grep '.' + ceph osd pool set $TEST_POOL_GETSET compression_algorithm zlib + ceph osd pool get $TEST_POOL_GETSET compression_algorithm | grep 'zlib' + ceph osd pool set $TEST_POOL_GETSET compression_algorithm unset + ceph osd pool get $TEST_POOL_GETSET compression_algorithm | expect_false grep '.' + + ceph osd pool get $TEST_POOL_GETSET compression_required_ratio | expect_false grep '.' + expect_false ceph osd pool set $TEST_POOL_GETSET compression_required_ratio 1.1 + expect_false ceph osd pool set $TEST_POOL_GETSET compression_required_ratio -.2 + ceph osd pool set $TEST_POOL_GETSET compression_required_ratio .2 + ceph osd pool get $TEST_POOL_GETSET compression_required_ratio | grep '.2' + ceph osd pool set $TEST_POOL_GETSET compression_required_ratio 0 + ceph osd pool get $TEST_POOL_GETSET compression_required_ratio | expect_false grep '.' + + ceph osd pool get $TEST_POOL_GETSET csum_type | expect_false grep '.' + ceph osd pool set $TEST_POOL_GETSET csum_type crc32c + ceph osd pool get $TEST_POOL_GETSET csum_type | grep 'crc32c' + ceph osd pool set $TEST_POOL_GETSET csum_type unset + ceph osd pool get $TEST_POOL_GETSET csum_type | expect_false grep '.' + + for size in compression_max_blob_size compression_min_blob_size csum_max_block csum_min_block; do + ceph osd pool get $TEST_POOL_GETSET $size | expect_false grep '.' + ceph osd pool set $TEST_POOL_GETSET $size 100 + ceph osd pool get $TEST_POOL_GETSET $size | grep '100' + ceph osd pool set $TEST_POOL_GETSET $size 0 + ceph osd pool get $TEST_POOL_GETSET $size | expect_false grep '.' + done + + ceph osd pool set $TEST_POOL_GETSET nodelete 1 + expect_false ceph osd pool delete $TEST_POOL_GETSET $TEST_POOL_GETSET --yes-i-really-really-mean-it + ceph osd pool set $TEST_POOL_GETSET nodelete 0 + ceph osd pool delete $TEST_POOL_GETSET $TEST_POOL_GETSET --yes-i-really-really-mean-it + +} + +function test_mon_osd_tiered_pool_set() +{ + # this is really a tier pool + ceph osd pool create real-tier 2 + ceph osd tier add rbd real-tier + + # expect us to be unable to set negative values for hit_set_* + for o in hit_set_period hit_set_count hit_set_fpp; do + expect_false ceph osd pool set real_tier $o -1 + done + + # and hit_set_fpp should be in range 0..1 + expect_false ceph osd pool set real_tier hit_set_fpp 2 + + ceph osd pool set real-tier hit_set_type explicit_hash + ceph osd pool get real-tier hit_set_type | grep "hit_set_type: explicit_hash" + ceph osd pool set real-tier hit_set_type explicit_object + ceph osd pool get real-tier hit_set_type | grep "hit_set_type: explicit_object" + ceph osd pool set real-tier hit_set_type bloom + ceph osd pool get real-tier hit_set_type | grep "hit_set_type: bloom" + expect_false ceph osd pool set real-tier hit_set_type i_dont_exist + ceph osd pool set real-tier hit_set_period 123 + ceph osd pool get real-tier hit_set_period | grep "hit_set_period: 123" + ceph osd pool set real-tier hit_set_count 12 + ceph osd pool get real-tier hit_set_count | grep "hit_set_count: 12" + ceph osd pool set real-tier hit_set_fpp .01 + ceph osd pool get real-tier hit_set_fpp | grep "hit_set_fpp: 0.01" + + ceph osd pool set real-tier target_max_objects 123 + ceph osd pool get real-tier target_max_objects | \ + grep 'target_max_objects:[ \t]\+123' + ceph osd pool set real-tier target_max_bytes 123456 + ceph osd pool get real-tier target_max_bytes | \ + grep 'target_max_bytes:[ \t]\+123456' + ceph osd pool set real-tier cache_target_dirty_ratio .123 + ceph osd pool get real-tier cache_target_dirty_ratio | \ + grep 'cache_target_dirty_ratio:[ \t]\+0.123' + expect_false ceph osd pool set real-tier cache_target_dirty_ratio -.2 + expect_false ceph osd pool set real-tier cache_target_dirty_ratio 1.1 + ceph osd pool set real-tier cache_target_dirty_high_ratio .123 + ceph osd pool get real-tier cache_target_dirty_high_ratio | \ + grep 'cache_target_dirty_high_ratio:[ \t]\+0.123' + expect_false ceph osd pool set real-tier cache_target_dirty_high_ratio -.2 + expect_false ceph osd pool set real-tier cache_target_dirty_high_ratio 1.1 + ceph osd pool set real-tier cache_target_full_ratio .123 + ceph osd pool get real-tier cache_target_full_ratio | \ + grep 'cache_target_full_ratio:[ \t]\+0.123' + ceph osd dump -f json-pretty | grep '"cache_target_full_ratio_micro": 123000' + ceph osd pool set real-tier cache_target_full_ratio 1.0 + ceph osd pool set real-tier cache_target_full_ratio 0 + expect_false ceph osd pool set real-tier cache_target_full_ratio 1.1 + ceph osd pool set real-tier cache_min_flush_age 123 + ceph osd pool get real-tier cache_min_flush_age | \ + grep 'cache_min_flush_age:[ \t]\+123' + ceph osd pool set real-tier cache_min_evict_age 234 + ceph osd pool get real-tier cache_min_evict_age | \ + grep 'cache_min_evict_age:[ \t]\+234' + + # iec vs si units + ceph osd pool set real-tier target_max_objects 1K + ceph osd pool get real-tier target_max_objects | grep 1000 + for o in target_max_bytes target_size_bytes compression_max_blob_size compression_min_blob_size csum_max_block csum_min_block; do + ceph osd pool set real-tier $o 1Ki # no i suffix + val=$(ceph osd pool get real-tier $o --format=json | jq -c ".$o") + [[ $val == 1024 ]] + ceph osd pool set real-tier $o 1M # with i suffix + val=$(ceph osd pool get real-tier $o --format=json | jq -c ".$o") + [[ $val == 1048576 ]] + done + + # this is not a tier pool + ceph osd pool create fake-tier 2 + ceph osd pool application enable fake-tier rados + wait_for_clean + + expect_false ceph osd pool set fake-tier hit_set_type explicit_hash + expect_false ceph osd pool get fake-tier hit_set_type + expect_false ceph osd pool set fake-tier hit_set_type explicit_object + expect_false ceph osd pool get fake-tier hit_set_type + expect_false ceph osd pool set fake-tier hit_set_type bloom + expect_false ceph osd pool get fake-tier hit_set_type + expect_false ceph osd pool set fake-tier hit_set_type i_dont_exist + expect_false ceph osd pool set fake-tier hit_set_period 123 + expect_false ceph osd pool get fake-tier hit_set_period + expect_false ceph osd pool set fake-tier hit_set_count 12 + expect_false ceph osd pool get fake-tier hit_set_count + expect_false ceph osd pool set fake-tier hit_set_fpp .01 + expect_false ceph osd pool get fake-tier hit_set_fpp + + expect_false ceph osd pool set fake-tier target_max_objects 123 + expect_false ceph osd pool get fake-tier target_max_objects + expect_false ceph osd pool set fake-tier target_max_bytes 123456 + expect_false ceph osd pool get fake-tier target_max_bytes + expect_false ceph osd pool set fake-tier cache_target_dirty_ratio .123 + expect_false ceph osd pool get fake-tier cache_target_dirty_ratio + expect_false ceph osd pool set fake-tier cache_target_dirty_ratio -.2 + expect_false ceph osd pool set fake-tier cache_target_dirty_ratio 1.1 + expect_false ceph osd pool set fake-tier cache_target_dirty_high_ratio .123 + expect_false ceph osd pool get fake-tier cache_target_dirty_high_ratio + expect_false ceph osd pool set fake-tier cache_target_dirty_high_ratio -.2 + expect_false ceph osd pool set fake-tier cache_target_dirty_high_ratio 1.1 + expect_false ceph osd pool set fake-tier cache_target_full_ratio .123 + expect_false ceph osd pool get fake-tier cache_target_full_ratio + expect_false ceph osd pool set fake-tier cache_target_full_ratio 1.0 + expect_false ceph osd pool set fake-tier cache_target_full_ratio 0 + expect_false ceph osd pool set fake-tier cache_target_full_ratio 1.1 + expect_false ceph osd pool set fake-tier cache_min_flush_age 123 + expect_false ceph osd pool get fake-tier cache_min_flush_age + expect_false ceph osd pool set fake-tier cache_min_evict_age 234 + expect_false ceph osd pool get fake-tier cache_min_evict_age + + ceph osd tier remove rbd real-tier + ceph osd pool delete real-tier real-tier --yes-i-really-really-mean-it + ceph osd pool delete fake-tier fake-tier --yes-i-really-really-mean-it +} + +function test_mon_osd_erasure_code() +{ + + ceph osd erasure-code-profile set fooprofile a=b c=d + ceph osd erasure-code-profile set fooprofile a=b c=d + expect_false ceph osd erasure-code-profile set fooprofile a=b c=d e=f + ceph osd erasure-code-profile set fooprofile a=b c=d e=f --force + ceph osd erasure-code-profile set fooprofile a=b c=d e=f + expect_false ceph osd erasure-code-profile set fooprofile a=b c=d e=f g=h + # make sure rule-foo doesn't work anymore + expect_false ceph osd erasure-code-profile set barprofile ruleset-failure-domain=host + ceph osd erasure-code-profile set barprofile crush-failure-domain=host + # clean up + ceph osd erasure-code-profile rm fooprofile + ceph osd erasure-code-profile rm barprofile + + # try weird k and m values + expect_false ceph osd erasure-code-profile set badk k=1 m=1 + expect_false ceph osd erasure-code-profile set badk k=1 m=2 + expect_false ceph osd erasure-code-profile set badk k=0 m=2 + expect_false ceph osd erasure-code-profile set badk k=-1 m=2 + expect_false ceph osd erasure-code-profile set badm k=2 m=0 + expect_false ceph osd erasure-code-profile set badm k=2 m=-1 + ceph osd erasure-code-profile set good k=2 m=1 + ceph osd erasure-code-profile rm good +} + +function test_mon_osd_misc() +{ + set +e + + # expect error about missing 'pool' argument + ceph osd map 2>$TMPFILE; check_response 'pool' $? 22 + + # expect error about unused argument foo + ceph osd ls foo 2>$TMPFILE; check_response 'unused' $? 22 + + # expect "not in range" for invalid overload percentage + ceph osd reweight-by-utilization 80 2>$TMPFILE; check_response 'higher than 100' $? 22 + + set -e + + local old_bytes_per_osd=$(ceph config get mgr mon_reweight_min_bytes_per_osd) + local old_pgs_per_osd=$(ceph config get mgr mon_reweight_min_pgs_per_osd) + # otherwise ceph-mgr complains like: + # Error EDOM: Refusing to reweight: we only have 5372 kb used across all osds! + # Error EDOM: Refusing to reweight: we only have 20 PGs across 3 osds! + ceph config set mgr mon_reweight_min_bytes_per_osd 0 + ceph config set mgr mon_reweight_min_pgs_per_osd 0 + ceph osd reweight-by-utilization 110 + ceph osd reweight-by-utilization 110 .5 + expect_false ceph osd reweight-by-utilization 110 0 + expect_false ceph osd reweight-by-utilization 110 -0.1 + ceph osd test-reweight-by-utilization 110 .5 --no-increasing + ceph osd test-reweight-by-utilization 110 .5 4 --no-increasing + expect_false ceph osd test-reweight-by-utilization 110 .5 0 --no-increasing + expect_false ceph osd test-reweight-by-utilization 110 .5 -10 --no-increasing + ceph osd reweight-by-pg 110 + ceph osd test-reweight-by-pg 110 .5 + ceph osd reweight-by-pg 110 rbd + ceph osd reweight-by-pg 110 .5 rbd + expect_false ceph osd reweight-by-pg 110 boguspoolasdfasdfasdf + # restore the setting + ceph config set mgr mon_reweight_min_bytes_per_osd $old_bytes_per_osd + ceph config set mgr mon_reweight_min_pgs_per_osd $old_pgs_per_osd +} + +function test_admin_heap_profiler() +{ + do_test=1 + set +e + # expect 'heap' commands to be correctly parsed + ceph tell osd.0 heap stats 2>$TMPFILE + if [[ $? -eq 22 && `grep 'tcmalloc not enabled' $TMPFILE` ]]; then + echo "tcmalloc not enabled; skip heap profiler test" + do_test=0 + fi + set -e + + [[ $do_test -eq 0 ]] && return 0 + + $SUDO ceph tell osd.0 heap start_profiler + $SUDO ceph tell osd.0 heap dump + $SUDO ceph tell osd.0 heap stop_profiler + $SUDO ceph tell osd.0 heap release +} + +function test_osd_bench() +{ + # test osd bench limits + # As we should not rely on defaults (as they may change over time), + # lets inject some values and perform some simple tests + # max iops: 10 # 100 IOPS + # max throughput: 10485760 # 10MB/s + # max block size: 2097152 # 2MB + # duration: 10 # 10 seconds + + local args="\ + --osd-bench-duration 10 \ + --osd-bench-max-block-size 2097152 \ + --osd-bench-large-size-max-throughput 10485760 \ + --osd-bench-small-size-max-iops 10" + ceph tell osd.0 injectargs ${args## } + + # anything with a bs larger than 2097152 must fail + expect_false ceph tell osd.0 bench 1 2097153 + # but using 'osd_bench_max_bs' must succeed + ceph tell osd.0 bench 1 2097152 + + # we assume 1MB as a large bs; anything lower is a small bs + # for a 4096 bytes bs, for 10 seconds, we are limited by IOPS + # max count: 409600 (bytes) + + # more than max count must not be allowed + expect_false ceph tell osd.0 bench 409601 4096 + # but 409600 must be succeed + ceph tell osd.0 bench 409600 4096 + + # for a large bs, we are limited by throughput. + # for a 2MB block size for 10 seconds, assuming 10MB/s throughput, + # the max count will be (10MB * 10s) = 100MB + # max count: 104857600 (bytes) + + # more than max count must not be allowed + expect_false ceph tell osd.0 bench 104857601 2097152 + # up to max count must be allowed + ceph tell osd.0 bench 104857600 2097152 +} + +function test_osd_negative_filestore_merge_threshold() +{ + $SUDO ceph daemon osd.0 config set filestore_merge_threshold -1 + expect_config_value "osd.0" "filestore_merge_threshold" -1 +} + +function test_mon_tell() +{ + for m in mon.a mon.b; do + ceph tell $m sessions + ceph_watch_start debug audit + ceph tell mon.a sessions + ceph_watch_wait "${m} \[DBG\] from.*cmd='sessions' args=\[\]: dispatch" + done + expect_false ceph tell mon.foo version +} + +function test_mon_ping() +{ + ceph ping mon.a + ceph ping mon.b + expect_false ceph ping mon.foo + + ceph ping mon.\* +} + +function test_mon_deprecated_commands() +{ + # current DEPRECATED commands are marked with FLAG(DEPRECATED) + # + # Testing should be accomplished by setting + # 'mon_debug_deprecated_as_obsolete = true' and expecting ENOTSUP for + # each one of these commands. + + ceph tell mon.* injectargs '--mon-debug-deprecated-as-obsolete' + expect_false ceph config-key list 2> $TMPFILE + check_response "\(EOPNOTSUPP\|ENOTSUP\): command is obsolete" + + ceph tell mon.* injectargs '--no-mon-debug-deprecated-as-obsolete' +} + +function test_mon_cephdf_commands() +{ + # ceph df detail: + # pool section: + # RAW USED The near raw used per pool in raw total + + ceph osd pool create cephdf_for_test 1 1 replicated + ceph osd pool application enable cephdf_for_test rados + ceph osd pool set cephdf_for_test size 2 + + dd if=/dev/zero of=./cephdf_for_test bs=4k count=1 + rados put cephdf_for_test cephdf_for_test -p cephdf_for_test + + #wait for update + for i in `seq 1 10`; do + rados -p cephdf_for_test ls - | grep -q cephdf_for_test && break + sleep 1 + done + # "rados ls" goes straight to osd, but "ceph df" is served by mon. so we need + # to sync mon with osd + flush_pg_stats + local jq_filter='.pools | .[] | select(.name == "cephdf_for_test") | .stats' + stored=`ceph df detail --format=json | jq "$jq_filter.stored * 2"` + stored_raw=`ceph df detail --format=json | jq "$jq_filter.stored_raw"` + + ceph osd pool delete cephdf_for_test cephdf_for_test --yes-i-really-really-mean-it + rm ./cephdf_for_test + + expect_false test $stored != $stored_raw +} + +function test_mon_pool_application() +{ + ceph osd pool create app_for_test 16 + + ceph osd pool application enable app_for_test rbd + expect_false ceph osd pool application enable app_for_test rgw + ceph osd pool application enable app_for_test rgw --yes-i-really-mean-it + ceph osd pool ls detail | grep "application rbd,rgw" + ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{},"rgw":{}}' + + expect_false ceph osd pool application set app_for_test cephfs key value + ceph osd pool application set app_for_test rbd key1 value1 + ceph osd pool application set app_for_test rbd key2 value2 + ceph osd pool application set app_for_test rgw key1 value1 + ceph osd pool application get app_for_test rbd key1 | grep 'value1' + ceph osd pool application get app_for_test rbd key2 | grep 'value2' + ceph osd pool application get app_for_test rgw key1 | grep 'value1' + + ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{"key1":"value1","key2":"value2"},"rgw":{"key1":"value1"}}' + + ceph osd pool application rm app_for_test rgw key1 + ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{"key1":"value1","key2":"value2"},"rgw":{}}' + ceph osd pool application rm app_for_test rbd key2 + ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{"key1":"value1"},"rgw":{}}' + ceph osd pool application rm app_for_test rbd key1 + ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{},"rgw":{}}' + ceph osd pool application rm app_for_test rbd key1 # should be idempotent + + expect_false ceph osd pool application disable app_for_test rgw + ceph osd pool application disable app_for_test rgw --yes-i-really-mean-it + ceph osd pool application disable app_for_test rgw --yes-i-really-mean-it # should be idempotent + ceph osd pool ls detail | grep "application rbd" + ceph osd pool ls detail --format=json | grep '"application_metadata":{"rbd":{}}' + + ceph osd pool application disable app_for_test rgw --yes-i-really-mean-it + ceph osd pool ls detail | grep -v "application " + ceph osd pool ls detail --format=json | grep '"application_metadata":{}' + + ceph osd pool rm app_for_test app_for_test --yes-i-really-really-mean-it +} + +function test_mon_tell_help_command() +{ + ceph tell mon.a help | grep sync_force + ceph tell mon.a -h | grep sync_force + ceph tell mon.a config -h | grep 'config diff get' + + # wrong target + expect_false ceph tell mon.zzz help +} + +function test_mon_stdin_stdout() +{ + echo foo | ceph config-key set test_key -i - + ceph config-key get test_key -o - | grep -c foo | grep -q 1 +} + +function test_osd_tell_help_command() +{ + ceph tell osd.1 help + expect_false ceph tell osd.100 help +} + +function test_osd_compact() +{ + ceph tell osd.1 compact + $SUDO ceph daemon osd.1 compact +} + +function test_mds_tell_help_command() +{ + local FS_NAME=cephfs + if ! mds_exists ; then + echo "Skipping test, no MDS found" + return + fi + + remove_all_fs + ceph osd pool create fs_data 16 + ceph osd pool create fs_metadata 16 + ceph fs new $FS_NAME fs_metadata fs_data + wait_mds_active $FS_NAME + + + ceph tell mds.a help + expect_false ceph tell mds.z help + + remove_all_fs + ceph osd pool delete fs_data fs_data --yes-i-really-really-mean-it + ceph osd pool delete fs_metadata fs_metadata --yes-i-really-really-mean-it +} + +function test_mgr_tell() +{ + ceph tell mgr version +} + +function test_mgr_devices() +{ + ceph device ls + expect_false ceph device info doesnotexist + expect_false ceph device get-health-metrics doesnotexist +} + +function test_per_pool_scrub_status() +{ + ceph osd pool create noscrub_pool 16 + ceph osd pool create noscrub_pool2 16 + ceph -s | expect_false grep -q "Some pool(s) have the.*scrub.* flag(s) set" + ceph -s --format json | \ + jq .health.checks.POOL_SCRUB_FLAGS.summary.message | \ + expect_false grep -q "Some pool(s) have the.*scrub.* flag(s) set" + ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | + expect_false grep -q "Pool .* has .*scrub.* flag" + ceph health detail | jq .health.checks.POOL_SCRUB_FLAGS.detail | \ + expect_false grep -q "Pool .* has .*scrub.* flag" + + ceph osd pool set noscrub_pool noscrub 1 + ceph -s | expect_true grep -q "Some pool(s) have the noscrub flag(s) set" + ceph -s --format json | \ + jq .health.checks.POOL_SCRUB_FLAGS.summary.message | \ + expect_true grep -q "Some pool(s) have the noscrub flag(s) set" + ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \ + expect_true grep -q "Pool noscrub_pool has noscrub flag" + ceph health detail | expect_true grep -q "Pool noscrub_pool has noscrub flag" + + ceph osd pool set noscrub_pool nodeep-scrub 1 + ceph osd pool set noscrub_pool2 nodeep-scrub 1 + ceph -s | expect_true grep -q "Some pool(s) have the noscrub, nodeep-scrub flag(s) set" + ceph -s --format json | \ + jq .health.checks.POOL_SCRUB_FLAGS.summary.message | \ + expect_true grep -q "Some pool(s) have the noscrub, nodeep-scrub flag(s) set" + ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \ + expect_true grep -q "Pool noscrub_pool has noscrub flag" + ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \ + expect_true grep -q "Pool noscrub_pool has nodeep-scrub flag" + ceph report | jq .health.checks.POOL_SCRUB_FLAGS.detail | \ + expect_true grep -q "Pool noscrub_pool2 has nodeep-scrub flag" + ceph health detail | expect_true grep -q "Pool noscrub_pool has noscrub flag" + ceph health detail | expect_true grep -q "Pool noscrub_pool has nodeep-scrub flag" + ceph health detail | expect_true grep -q "Pool noscrub_pool2 has nodeep-scrub flag" + + ceph osd pool rm noscrub_pool noscrub_pool --yes-i-really-really-mean-it + ceph osd pool rm noscrub_pool2 noscrub_pool2 --yes-i-really-really-mean-it +} + +# +# New tests should be added to the TESTS array below +# +# Individual tests may be run using the '-t <testname>' argument +# The user can specify '-t <testname>' as many times as she wants +# +# Tests will be run in order presented in the TESTS array, or in +# the order specified by the '-t <testname>' options. +# +# '-l' will list all the available test names +# '-h' will show usage +# +# The test maintains backward compatibility: not specifying arguments +# will run all tests following the order they appear in the TESTS array. +# + +set +x +MON_TESTS+=" mon_injectargs" +MON_TESTS+=" mon_injectargs_SI" +for i in `seq 9`; do + MON_TESTS+=" tiering_$i"; +done +MON_TESTS+=" auth" +MON_TESTS+=" auth_profiles" +MON_TESTS+=" mon_misc" +MON_TESTS+=" mon_mon" +MON_TESTS+=" mon_osd" +MON_TESTS+=" mon_config_key" +MON_TESTS+=" mon_crush" +MON_TESTS+=" mon_osd_create_destroy" +MON_TESTS+=" mon_osd_pool" +MON_TESTS+=" mon_osd_pool_quota" +MON_TESTS+=" mon_pg" +MON_TESTS+=" mon_osd_pool_set" +MON_TESTS+=" mon_osd_tiered_pool_set" +MON_TESTS+=" mon_osd_erasure_code" +MON_TESTS+=" mon_osd_misc" +MON_TESTS+=" mon_tell" +MON_TESTS+=" mon_ping" +MON_TESTS+=" mon_deprecated_commands" +MON_TESTS+=" mon_caps" +MON_TESTS+=" mon_cephdf_commands" +MON_TESTS+=" mon_tell_help_command" +MON_TESTS+=" mon_stdin_stdout" + +OSD_TESTS+=" osd_bench" +OSD_TESTS+=" osd_negative_filestore_merge_threshold" +OSD_TESTS+=" tiering_agent" +OSD_TESTS+=" admin_heap_profiler" +OSD_TESTS+=" osd_tell_help_command" +OSD_TESTS+=" osd_compact" +OSD_TESTS+=" per_pool_scrub_status" + +MDS_TESTS+=" mds_tell" +MDS_TESTS+=" mon_mds" +MDS_TESTS+=" mon_mds_metadata" +MDS_TESTS+=" mds_tell_help_command" + +MGR_TESTS+=" mgr_tell" +MGR_TESTS+=" mgr_devices" + +TESTS+=$MON_TESTS +TESTS+=$OSD_TESTS +TESTS+=$MDS_TESTS +TESTS+=$MGR_TESTS + +# +# "main" follows +# + +function list_tests() +{ + echo "AVAILABLE TESTS" + for i in $TESTS; do + echo " $i" + done +} + +function usage() +{ + echo "usage: $0 [-h|-l|-t <testname> [-t <testname>...]]" +} + +tests_to_run=() + +sanity_check=true + +while [[ $# -gt 0 ]]; do + opt=$1 + + case "$opt" in + "-l" ) + do_list=1 + ;; + "--asok-does-not-need-root" ) + SUDO="" + ;; + "--no-sanity-check" ) + sanity_check=false + ;; + "--test-mon" ) + tests_to_run+="$MON_TESTS" + ;; + "--test-osd" ) + tests_to_run+="$OSD_TESTS" + ;; + "--test-mds" ) + tests_to_run+="$MDS_TESTS" + ;; + "--test-mgr" ) + tests_to_run+="$MGR_TESTS" + ;; + "-t" ) + shift + if [[ -z "$1" ]]; then + echo "missing argument to '-t'" + usage ; + exit 1 + fi + tests_to_run+=" $1" + ;; + "-h" ) + usage ; + exit 0 + ;; + esac + shift +done + +if [[ $do_list -eq 1 ]]; then + list_tests ; + exit 0 +fi + +ceph osd pool create rbd 16 + +if test -z "$tests_to_run" ; then + tests_to_run="$TESTS" +fi + +if $sanity_check ; then + wait_no_osd_down +fi +for i in $tests_to_run; do + if $sanity_check ; then + check_no_osd_down + fi + set -x + test_${i} + set +x +done +if $sanity_check ; then + check_no_osd_down +fi + +set -x + +echo OK diff --git a/qa/workunits/cephtool/test_daemon.sh b/qa/workunits/cephtool/test_daemon.sh new file mode 100755 index 000000000..08ae937cc --- /dev/null +++ b/qa/workunits/cephtool/test_daemon.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +set -ex + +expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +echo note: assuming mon.a is on the current host + +# can set to 'sudo ./ceph' to execute tests from current dir for development +CEPH=${CEPH:-'sudo ceph'} + +${CEPH} daemon mon.a version | grep version + +# get debug_ms setting and strip it, painfully for reuse +old_ms=$(${CEPH} daemon mon.a config get debug_ms | \ + grep debug_ms | sed -e 's/.*: //' -e 's/["\}\\]//g') +${CEPH} daemon mon.a config set debug_ms 13 +new_ms=$(${CEPH} daemon mon.a config get debug_ms | \ + grep debug_ms | sed -e 's/.*: //' -e 's/["\}\\]//g') +[ "$new_ms" = "13/13" ] +${CEPH} daemon mon.a config set debug_ms $old_ms +new_ms=$(${CEPH} daemon mon.a config get debug_ms | \ + grep debug_ms | sed -e 's/.*: //' -e 's/["\}\\]//g') +[ "$new_ms" = "$old_ms" ] + +# unregistered/non-existent command +expect_false ${CEPH} daemon mon.a bogus_command_blah foo + +set +e +OUTPUT=$(${CEPH} -c /not/a/ceph.conf daemon mon.a help 2>&1) +# look for EINVAL +if [ $? != 22 ] ; then exit 1; fi +if ! echo "$OUTPUT" | grep -q '.*open.*/not/a/ceph.conf'; then + echo "didn't find expected error in bad conf search" + exit 1 +fi +set -e + +echo OK diff --git a/qa/workunits/cephtool/test_kvstore_tool.sh b/qa/workunits/cephtool/test_kvstore_tool.sh new file mode 100755 index 000000000..b7953dd21 --- /dev/null +++ b/qa/workunits/cephtool/test_kvstore_tool.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +set -x + +source $(dirname $0)/../../standalone/ceph-helpers.sh + +set -e +set -o functrace +PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' +SUDO=${SUDO:-sudo} +export CEPH_DEV=1 + +echo note: test ceph_kvstore_tool with bluestore + +expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +TEMP_DIR=$(mktemp -d ./cephtool.XXX) +trap "rm -fr $TEMP_DIR" 0 + +TEMP_FILE=$(mktemp $TEMP_DIR/test_invalid.XXX) + +function test_ceph_kvstore_tool() +{ + # create a data directory + ceph-objectstore-tool --data-path ${TEMP_DIR} --op mkfs --no-mon-config + + # list + origin_kv_nums=`ceph-kvstore-tool bluestore-kv ${TEMP_DIR} list 2>/dev/null | wc -l` + + # exists + prefix=`ceph-kvstore-tool bluestore-kv ${TEMP_DIR} list 2>/dev/null | head -n 1 | awk '{print $1}'` + ceph-kvstore-tool bluestore-kv ${TEMP_DIR} exists ${prefix} + expect_false ceph-kvstore-tool bluestore-kv ${TEMP_DIR} exists ${prefix}notexist + + # list-crc + ceph-kvstore-tool bluestore-kv ${TEMP_DIR} list-crc + ceph-kvstore-tool bluestore-kv ${TEMP_DIR} list-crc ${prefix} + + # list with prefix + ceph-kvstore-tool bluestore-kv ${TEMP_DIR} list ${prefix} + + # set + echo "helloworld" >> ${TEMP_FILE} + ceph-kvstore-tool bluestore-kv ${TEMP_DIR} set TESTPREFIX TESTKEY in ${TEMP_FILE} + ceph-kvstore-tool bluestore-kv ${TEMP_DIR} exists TESTPREFIX TESTKEY + + # get + ceph-kvstore-tool bluestore-kv ${TEMP_DIR} get TESTPREFIX TESTKEY out ${TEMP_FILE}.bak + diff ${TEMP_FILE} ${TEMP_FILE}.bak + + # rm + ceph-kvstore-tool bluestore-kv ${TEMP_DIR} rm TESTPREFIX TESTKEY + expect_false ceph-kvstore-tool bluestore-kv ${TEMP_DIR} exists TESTPREFIX TESTKEY + + # compact + ceph-kvstore-tool bluestore-kv ${TEMP_DIR} compact + + # destructive-repair + ceph-kvstore-tool bluestore-kv ${TEMP_DIR} destructive-repair + + current_kv_nums=`ceph-kvstore-tool bluestore-kv ${TEMP_DIR} list 2>/dev/null | wc -l` + test ${origin_kv_nums} -eq ${current_kv_nums} +} + +test_ceph_kvstore_tool + +echo OK diff --git a/qa/workunits/client/test.sh b/qa/workunits/client/test.sh new file mode 100755 index 000000000..12abd3a5d --- /dev/null +++ b/qa/workunits/client/test.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +set -ex + +ceph_test_client diff --git a/qa/workunits/cls/test_cls_2pc_queue.sh b/qa/workunits/cls/test_cls_2pc_queue.sh new file mode 100755 index 000000000..b4f68800f --- /dev/null +++ b/qa/workunits/cls/test_cls_2pc_queue.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_cls_2pc_queue + +exit 0 diff --git a/qa/workunits/cls/test_cls_cas.sh b/qa/workunits/cls/test_cls_cas.sh new file mode 100755 index 000000000..765913482 --- /dev/null +++ b/qa/workunits/cls/test_cls_cas.sh @@ -0,0 +1,6 @@ +#!/bin/sh -e + +GTEST_FILTER=${CLS_CAS_GTEST_FILTER:-*} +ceph_test_cls_cas --gtest_filter=${GTEST_FILTER} + +exit 0 diff --git a/qa/workunits/cls/test_cls_cmpomap.sh b/qa/workunits/cls/test_cls_cmpomap.sh new file mode 100755 index 000000000..af079f6e6 --- /dev/null +++ b/qa/workunits/cls/test_cls_cmpomap.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_cls_cmpomap + +exit 0 diff --git a/qa/workunits/cls/test_cls_hello.sh b/qa/workunits/cls/test_cls_hello.sh new file mode 100755 index 000000000..0a2e09620 --- /dev/null +++ b/qa/workunits/cls/test_cls_hello.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_cls_hello + +exit 0 diff --git a/qa/workunits/cls/test_cls_journal.sh b/qa/workunits/cls/test_cls_journal.sh new file mode 100755 index 000000000..9aa7450a9 --- /dev/null +++ b/qa/workunits/cls/test_cls_journal.sh @@ -0,0 +1,6 @@ +#!/bin/sh -e + +GTEST_FILTER=${CLS_JOURNAL_GTEST_FILTER:-*} +ceph_test_cls_journal --gtest_filter=${GTEST_FILTER} + +exit 0 diff --git a/qa/workunits/cls/test_cls_lock.sh b/qa/workunits/cls/test_cls_lock.sh new file mode 100755 index 000000000..c14527053 --- /dev/null +++ b/qa/workunits/cls/test_cls_lock.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_cls_lock + +exit 0 diff --git a/qa/workunits/cls/test_cls_log.sh b/qa/workunits/cls/test_cls_log.sh new file mode 100755 index 000000000..523f985e7 --- /dev/null +++ b/qa/workunits/cls/test_cls_log.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_cls_log + +exit 0 diff --git a/qa/workunits/cls/test_cls_numops.sh b/qa/workunits/cls/test_cls_numops.sh new file mode 100755 index 000000000..dcbafcab2 --- /dev/null +++ b/qa/workunits/cls/test_cls_numops.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_cls_numops + +exit 0 diff --git a/qa/workunits/cls/test_cls_rbd.sh b/qa/workunits/cls/test_cls_rbd.sh new file mode 100755 index 000000000..fd4bec0f8 --- /dev/null +++ b/qa/workunits/cls/test_cls_rbd.sh @@ -0,0 +1,6 @@ +#!/bin/sh -e + +GTEST_FILTER=${CLS_RBD_GTEST_FILTER:-*} +ceph_test_cls_rbd --gtest_filter=${GTEST_FILTER} + +exit 0 diff --git a/qa/workunits/cls/test_cls_refcount.sh b/qa/workunits/cls/test_cls_refcount.sh new file mode 100755 index 000000000..d722f5ad9 --- /dev/null +++ b/qa/workunits/cls/test_cls_refcount.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_cls_refcount + +exit 0 diff --git a/qa/workunits/cls/test_cls_rgw.sh b/qa/workunits/cls/test_cls_rgw.sh new file mode 100755 index 000000000..257338a05 --- /dev/null +++ b/qa/workunits/cls/test_cls_rgw.sh @@ -0,0 +1,8 @@ +#!/bin/sh -e + +ceph_test_cls_rgw +#ceph_test_cls_rgw_meta +#ceph_test_cls_rgw_log +#ceph_test_cls_rgw_opstate + +exit 0 diff --git a/qa/workunits/cls/test_cls_rgw_gc.sh b/qa/workunits/cls/test_cls_rgw_gc.sh new file mode 100755 index 000000000..0266438f8 --- /dev/null +++ b/qa/workunits/cls/test_cls_rgw_gc.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_cls_rgw_gc + +exit 0 diff --git a/qa/workunits/cls/test_cls_rgw_stats.sh b/qa/workunits/cls/test_cls_rgw_stats.sh new file mode 100755 index 000000000..e1b5bd6b9 --- /dev/null +++ b/qa/workunits/cls/test_cls_rgw_stats.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_cls_rgw_stats + +exit 0 diff --git a/qa/workunits/cls/test_cls_sdk.sh b/qa/workunits/cls/test_cls_sdk.sh new file mode 100755 index 000000000..f1ccdc3b4 --- /dev/null +++ b/qa/workunits/cls/test_cls_sdk.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_cls_sdk + +exit 0 diff --git a/qa/workunits/direct_io/.gitignore b/qa/workunits/direct_io/.gitignore new file mode 100644 index 000000000..80f1fd1aa --- /dev/null +++ b/qa/workunits/direct_io/.gitignore @@ -0,0 +1,3 @@ +/direct_io_test +/test_sync_io +/test_short_dio_read diff --git a/qa/workunits/direct_io/Makefile b/qa/workunits/direct_io/Makefile new file mode 100644 index 000000000..20fec0be5 --- /dev/null +++ b/qa/workunits/direct_io/Makefile @@ -0,0 +1,11 @@ +CFLAGS = -Wall -Wextra -D_GNU_SOURCE + +TARGETS = direct_io_test test_sync_io test_short_dio_read + +.c: + $(CC) $(CFLAGS) $@.c -o $@ + +all: $(TARGETS) + +clean: + rm $(TARGETS) diff --git a/qa/workunits/direct_io/big.sh b/qa/workunits/direct_io/big.sh new file mode 100755 index 000000000..43bd6d72b --- /dev/null +++ b/qa/workunits/direct_io/big.sh @@ -0,0 +1,6 @@ +#!/bin/sh -ex + +echo "test large (16MB) dio write" +dd if=/dev/zero of=foo.big bs=16M count=1 oflag=direct + +echo OK diff --git a/qa/workunits/direct_io/direct_io_test.c b/qa/workunits/direct_io/direct_io_test.c new file mode 100644 index 000000000..ccfbbb860 --- /dev/null +++ b/qa/workunits/direct_io/direct_io_test.c @@ -0,0 +1,312 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2011 New Dream Network + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <errno.h> +#include <inttypes.h> +#include <fcntl.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +/* + * direct_io_test + * + * This test does some I/O using O_DIRECT. + * + * Semantics of O_DIRECT can be found at http://lwn.net/Articles/348739/ + * + */ + +static int g_num_pages = 100; + +static int g_duration = 10; + +struct chunk { + uint64_t offset; + uint64_t pad0; + uint64_t pad1; + uint64_t pad2; + uint64_t pad3; + uint64_t pad4; + uint64_t pad5; + uint64_t not_offset; +} __attribute__((packed)); + +static int page_size; + +static char temp_file[] = "direct_io_temp_file_XXXXXX"; + +static int safe_write(int fd, const void *buf, signed int len) +{ + const char *b = (const char*)buf; + /* Handle EINTR and short writes */ + while (1) { + int res = write(fd, b, len); + if (res < 0) { + int err = errno; + if (err != EINTR) { + return err; + } + } + len -= res; + b += res; + if (len <= 0) + return 0; + } +} + +static int do_read(int fd, char *buf, int buf_sz) +{ + /* We assume no short reads or EINTR. It's not really clear how + * those things interact with O_DIRECT. */ + int ret = read(fd, buf, buf_sz); + if (ret < 0) { + int err = errno; + printf("do_read: error: %d (%s)\n", err, strerror(err)); + return err; + } + if (ret != buf_sz) { + printf("do_read: short read\n"); + return -EIO; + } + return 0; +} + +static int setup_temp_file(void) +{ + int fd; + int64_t num_chunks, i; + + if (page_size % sizeof(struct chunk)) { + printf("setup_big_file: page_size doesn't divide evenly " + "into data blocks.\n"); + return -EINVAL; + } + + fd = mkstemp(temp_file); + if (fd < 0) { + int err = errno; + printf("setup_big_file: mkostemps failed with error %d\n", err); + return err; + } + + num_chunks = g_num_pages * (page_size / sizeof(struct chunk)); + for (i = 0; i < num_chunks; ++i) { + int ret; + struct chunk c; + memset(&c, 0, sizeof(c)); + c.offset = i * sizeof(struct chunk); + c.pad0 = 0; + c.pad1 = 1; + c.pad2 = 2; + c.pad3 = 3; + c.pad4 = 4; + c.pad5 = 5; + c.not_offset = ~c.offset; + ret = safe_write(fd, &c, sizeof(struct chunk)); + if (ret) { + printf("setup_big_file: safe_write failed with " + "error: %d\n", ret); + TEMP_FAILURE_RETRY(close(fd)); + unlink(temp_file); + return ret; + } + } + TEMP_FAILURE_RETRY(close(fd)); + return 0; +} + +static int verify_chunk(const struct chunk *c, uint64_t offset) +{ + if (c->offset != offset) { + printf("verify_chunk(%" PRId64 "): bad offset value (got: %" + PRId64 ", expected: %" PRId64 "\n", offset, c->offset, offset); + return EIO; + } + if (c->pad0 != 0) { + printf("verify_chunk(%" PRId64 "): bad pad0 value\n", offset); + return EIO; + } + if (c->pad1 != 1) { + printf("verify_chunk(%" PRId64 "): bad pad1 value\n", offset); + return EIO; + } + if (c->pad2 != 2) { + printf("verify_chunk(%" PRId64 "): bad pad2 value\n", offset); + return EIO; + } + if (c->pad3 != 3) { + printf("verify_chunk(%" PRId64 "): bad pad3 value\n", offset); + return EIO; + } + if (c->pad4 != 4) { + printf("verify_chunk(%" PRId64 "): bad pad4 value\n", offset); + return EIO; + } + if (c->pad5 != 5) { + printf("verify_chunk(%" PRId64 "): bad pad5 value\n", offset); + return EIO; + } + if (c->not_offset != ~offset) { + printf("verify_chunk(%" PRId64 "): bad not_offset value\n", + offset); + return EIO; + } + return 0; +} + +static int do_o_direct_reads(void) +{ + int fd, ret; + unsigned int i; + void *buf = 0; + time_t cur_time, end_time; + ret = posix_memalign(&buf, page_size, page_size); + if (ret) { + printf("do_o_direct_reads: posix_memalign returned %d\n", ret); + goto done; + } + + fd = open(temp_file, O_RDONLY | O_DIRECT); + if (fd < 0) { + ret = errno; + printf("do_o_direct_reads: error opening fd: %d\n", ret); + goto free_buf; + } + + // read the first chunk and see if it looks OK + ret = do_read(fd, buf, page_size); + if (ret) + goto close_fd; + ret = verify_chunk((struct chunk*)buf, 0); + if (ret) + goto close_fd; + + // read some random chunks and see how they look + cur_time = time(NULL); + end_time = cur_time + g_duration; + i = 0; + do { + time_t next_time; + uint64_t offset; + int page; + unsigned int seed; + + seed = i++; + page = rand_r(&seed) % g_num_pages; + offset = page; + offset *= page_size; + if (lseek64(fd, offset, SEEK_SET) == -1) { + int err = errno; + printf("lseek64(%" PRId64 ") failed: error %d (%s)\n", + offset, err, strerror(err)); + goto close_fd; + } + ret = do_read(fd, buf, page_size); + if (ret) + goto close_fd; + ret = verify_chunk((struct chunk*)buf, offset); + if (ret) + goto close_fd; + next_time = time(NULL); + if (next_time > cur_time) { + printf("."); + } + cur_time = next_time; + } while (time(NULL) < end_time); + + printf("\ndo_o_direct_reads: SUCCESS\n"); +close_fd: + TEMP_FAILURE_RETRY(close(fd)); +free_buf: + free(buf); +done: + return ret; +} + +static void usage(char *argv0) +{ + printf("%s: tests direct I/O\n", argv0); + printf("-d <seconds>: sets duration to <seconds>\n"); + printf("-h: this help\n"); + printf("-p <pages>: sets number of pages to allocate\n"); +} + +static void parse_args(int argc, char *argv[]) +{ + int c; + while ((c = getopt (argc, argv, "d:hp:")) != -1) { + switch (c) { + case 'd': + g_duration = atoi(optarg); + if (g_duration <= 0) { + printf("tried to set invalid value of " + "g_duration: %d\n", g_num_pages); + exit(1); + } + break; + case 'h': + usage(argv[0]); + exit(0); + break; + case 'p': + g_num_pages = atoi(optarg); + if (g_num_pages <= 0) { + printf("tried to set invalid value of " + "g_num_pages: %d\n", g_num_pages); + exit(1); + } + break; + case '?': + usage(argv[0]); + exit(1); + break; + default: + usage(argv[0]); + exit(1); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + int ret; + + parse_args(argc, argv); + + setvbuf(stdout, NULL, _IONBF, 0); + + page_size = getpagesize(); + + ret = setup_temp_file(); + if (ret) { + printf("setup_temp_file failed with error %d\n", ret); + goto done; + } + + ret = do_o_direct_reads(); + if (ret) { + printf("do_o_direct_reads failed with error %d\n", ret); + goto unlink_temp_file; + } + +unlink_temp_file: + unlink(temp_file); +done: + return ret; +} diff --git a/qa/workunits/direct_io/misc.sh b/qa/workunits/direct_io/misc.sh new file mode 100755 index 000000000..6de080d2d --- /dev/null +++ b/qa/workunits/direct_io/misc.sh @@ -0,0 +1,16 @@ +#!/bin/sh -ex + +# a few test cases from henry +echo "test read from hole" +dd if=/dev/zero of=dd3 bs=1 seek=1048576 count=0 +dd if=dd3 of=/tmp/ddout1 skip=8 bs=512 count=2 iflag=direct +dd if=/dev/zero of=/tmp/dd3 bs=512 count=2 +cmp /tmp/dd3 /tmp/ddout1 + +echo "other thing" +dd if=/dev/urandom of=/tmp/dd10 bs=500 count=1 +dd if=/tmp/dd10 of=dd10 bs=512 seek=8388 count=1 +dd if=dd10 of=/tmp/dd10out bs=512 skip=8388 count=1 iflag=direct +cmp /tmp/dd10 /tmp/dd10out + +echo OK diff --git a/qa/workunits/direct_io/test_short_dio_read.c b/qa/workunits/direct_io/test_short_dio_read.c new file mode 100644 index 000000000..502485557 --- /dev/null +++ b/qa/workunits/direct_io/test_short_dio_read.c @@ -0,0 +1,57 @@ +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +int main() +{ + char buf[409600]; + ssize_t r; + int err; + int fd = open("shortfile", O_WRONLY|O_CREAT, 0644); + + if (fd < 0) { + err = errno; + printf("error: open() failed with: %d (%s)\n", err, strerror(err)); + exit(err); + } + + printf("writing first 3 bytes of 10k file\n"); + r = write(fd, "foo", 3); + if (r == -1) { + err = errno; + printf("error: write() failed with: %d (%s)\n", err, strerror(err)); + close(fd); + exit(err); + } + r = ftruncate(fd, 10000); + if (r == -1) { + err = errno; + printf("error: ftruncate() failed with: %d (%s)\n", err, strerror(err)); + close(fd); + exit(err); + } + + fsync(fd); + close(fd); + + printf("reading O_DIRECT\n"); + fd = open("shortfile", O_RDONLY|O_DIRECT); + if (fd < 0) { + err = errno; + printf("error: open() failed with: %d (%s)\n", err, strerror(err)); + exit(err); + } + + r = read(fd, buf, sizeof(buf)); + close(fd); + + printf("got %d\n", (int)r); + if (r != 10000) + return 1; + return 0; +} diff --git a/qa/workunits/direct_io/test_sync_io.c b/qa/workunits/direct_io/test_sync_io.c new file mode 100644 index 000000000..f393fa6e8 --- /dev/null +++ b/qa/workunits/direct_io/test_sync_io.c @@ -0,0 +1,250 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <inttypes.h> +#include <linux/types.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <errno.h> + +//#include "../client/ioctl.h" + +#include <linux/ioctl.h> +#define CEPH_IOCTL_MAGIC 0x97 +#define CEPH_IOC_SYNCIO _IO(CEPH_IOCTL_MAGIC, 5) + +void write_pattern() +{ + printf("writing pattern\n"); + + uint64_t i; + int r; + + int fd = open("foo", O_CREAT|O_WRONLY, 0644); + if (fd < 0) { + r = errno; + printf("write_pattern: error: open() failed with: %d (%s)\n", r, strerror(r)); + exit(r); + } + for (i=0; i<1048576 * sizeof(i); i += sizeof(i)) { + r = write(fd, &i, sizeof(i)); + if (r == -1) { + r = errno; + printf("write_pattern: error: write() failed with: %d (%s)\n", r, strerror(r)); + break; + } + } + + close(fd); +} + +int verify_pattern(char *buf, size_t len, uint64_t off) +{ + size_t i; + + for (i = 0; i < len; i += sizeof(uint64_t)) { + uint64_t expected = i + off; + uint64_t actual = *(uint64_t*)(buf + i); + if (expected != actual) { + printf("error: offset %llu had %llu\n", (unsigned long long)expected, + (unsigned long long)actual); + exit(1); + } + } + return 0; +} + +void generate_pattern(void *buf, size_t len, uint64_t offset) +{ + uint64_t *v = buf; + size_t i; + + for (i=0; i<len / sizeof(v); i++) + v[i] = i * sizeof(v) + offset; + verify_pattern(buf, len, offset); +} + +int read_file(int buf_align, uint64_t offset, int len, int direct) { + + printf("read_file buf_align %d offset %llu len %d\n", buf_align, + (unsigned long long)offset, len); + void *rawbuf; + int r; + int flags; + int err = 0; + + if(direct) + flags = O_RDONLY|O_DIRECT; + else + flags = O_RDONLY; + + int fd = open("foo", flags); + if (fd < 0) { + err = errno; + printf("read_file: error: open() failed with: %d (%s)\n", err, strerror(err)); + exit(err); + } + + if (!direct) + ioctl(fd, CEPH_IOC_SYNCIO); + + if ((r = posix_memalign(&rawbuf, 4096, len + buf_align)) != 0) { + printf("read_file: error: posix_memalign failed with %d", r); + close(fd); + exit (r); + } + + void *buf = (char *)rawbuf + buf_align; + memset(buf, 0, len); + r = pread(fd, buf, len, offset); + if (r == -1) { + err = errno; + printf("read_file: error: pread() failed with: %d (%s)\n", err, strerror(err)); + goto out; + } + r = verify_pattern(buf, len, offset); + +out: + close(fd); + free(rawbuf); + return r; +} + +int read_direct(int buf_align, uint64_t offset, int len) +{ + printf("read_direct buf_align %d offset %llu len %d\n", buf_align, + (unsigned long long)offset, len); + return read_file(buf_align, offset, len, 1); +} + +int read_sync(int buf_align, uint64_t offset, int len) +{ + printf("read_sync buf_align %d offset %llu len %d\n", buf_align, + (unsigned long long)offset, len); + return read_file(buf_align, offset, len, 0); +} + +int write_file(int buf_align, uint64_t offset, int len, int direct) +{ + printf("write_file buf_align %d offset %llu len %d\n", buf_align, + (unsigned long long)offset, len); + void *rawbuf; + int r; + int err = 0; + int flags; + if (direct) + flags = O_WRONLY|O_DIRECT|O_CREAT; + else + flags = O_WRONLY|O_CREAT; + + int fd = open("foo", flags, 0644); + if (fd < 0) { + int err = errno; + printf("write_file: error: open() failed with: %d (%s)\n", err, strerror(err)); + exit(err); + } + + if ((r = posix_memalign(&rawbuf, 4096, len + buf_align)) != 0) { + printf("write_file: error: posix_memalign failed with %d", r); + err = r; + goto out_close; + } + + if (!direct) + ioctl(fd, CEPH_IOC_SYNCIO); + + void *buf = (char *)rawbuf + buf_align; + + generate_pattern(buf, len, offset); + + r = pwrite(fd, buf, len, offset); + close(fd); + + fd = open("foo", O_RDONLY); + if (fd < 0) { + err = errno; + printf("write_file: error: open() failed with: %d (%s)\n", err, strerror(err)); + free(rawbuf); + goto out_unlink; + } + void *buf2 = malloc(len); + if (!buf2) { + err = -ENOMEM; + printf("write_file: error: malloc failed\n"); + goto out_free; + } + + memset(buf2, 0, len); + r = pread(fd, buf2, len, offset); + if (r == -1) { + err = errno; + printf("write_file: error: pread() failed with: %d (%s)\n", err, strerror(err)); + goto out_free_buf; + } + r = verify_pattern(buf2, len, offset); + +out_free_buf: + free(buf2); +out_free: + free(rawbuf); +out_close: + close(fd); +out_unlink: + unlink("foo"); + if (err) + exit(err); + return r; +} + +int write_direct(int buf_align, uint64_t offset, int len) +{ + printf("write_direct buf_align %d offset %llu len %d\n", buf_align, + (unsigned long long)offset, len); + return write_file (buf_align, offset, len, 1); +} + +int write_sync(int buf_align, uint64_t offset, int len) +{ + printf("write_sync buf_align %d offset %llu len %d\n", buf_align, + (unsigned long long)offset, len); + return write_file (buf_align, offset, len, 0); +} + +int main(int argc, char **argv) +{ + uint64_t i, j, k; + int read = 1; + int write = 1; + + if (argc >= 2 && strcmp(argv[1], "read") == 0) + write = 0; + if (argc >= 2 && strcmp(argv[1], "write") == 0) + read = 0; + + if (read) { + write_pattern(); + + for (i = 0; i < 4096; i += 512) + for (j = 4*1024*1024 - 4096; j < 4*1024*1024 + 4096; j += 512) + for (k = 1024; k <= 16384; k *= 2) { + read_direct(i, j, k); + read_sync(i, j, k); + } + + } + unlink("foo"); + if (write) { + for (i = 0; i < 4096; i += 512) + for (j = 4*1024*1024 - 4096 + 512; j < 4*1024*1024 + 4096; j += 512) + for (k = 1024; k <= 16384; k *= 2) { + write_direct(i, j, k); + write_sync(i, j, k); + } + } + + + return 0; +} diff --git a/qa/workunits/erasure-code/.gitignore b/qa/workunits/erasure-code/.gitignore new file mode 100644 index 000000000..7e563b8b3 --- /dev/null +++ b/qa/workunits/erasure-code/.gitignore @@ -0,0 +1,2 @@ +*.log +*.trs diff --git a/qa/workunits/erasure-code/bench.html b/qa/workunits/erasure-code/bench.html new file mode 100644 index 000000000..3b4b6c74c --- /dev/null +++ b/qa/workunits/erasure-code/bench.html @@ -0,0 +1,34 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd" > +<html> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> + <title>Erasure Code Plugins Benchmarks</title> + <link href="examples.css" rel="stylesheet" type="text/css"> + <script language="javascript" type="text/javascript" src="jquery.js"></script> + <script language="javascript" type="text/javascript" src="jquery.flot.js"></script> + <script language="javascript" type="text/javascript" src="jquery.flot.categories.js"></script> + <script language="javascript" type="text/javascript" src="bench.js"></script> + <script language="javascript" type="text/javascript" src="plot.js"></script> + </head> + <body> + + <div id="header"> + <h2>Erasure Code Plugins Benchmarks</h2> + </div> + + <div id="content"> + + <div class="demo-container"> + <div id="encode" class="demo-placeholder"></div> + </div> + <p>encode: Y = GB/s, X = K/M</p> + + <div class="demo-container"> + <div id="decode" class="demo-placeholder"></div> + </div> + <p>decode: Y = GB/s, X = K/M/erasures</p> + + </div> + + </body> +</html> diff --git a/qa/workunits/erasure-code/bench.sh b/qa/workunits/erasure-code/bench.sh new file mode 100755 index 000000000..8e288f053 --- /dev/null +++ b/qa/workunits/erasure-code/bench.sh @@ -0,0 +1,192 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Red Hat <contact@redhat.com> +# Copyright (C) 2013,2014 Cloudwatt <libre.licensing@cloudwatt.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +# Test that it works from sources with: +# +# CEPH_ERASURE_CODE_BENCHMARK=src/ceph_erasure_code_benchmark \ +# PLUGIN_DIRECTORY=build/lib \ +# qa/workunits/erasure-code/bench.sh fplot jerasure | +# tee qa/workunits/erasure-code/bench.js +# +# This should start immediately and display: +# +# ... +# [ '2/1', .48035538612887358583 ], +# [ '3/2', .21648470405675016626 ], +# etc. +# +# and complete within a few seconds. The result can then be displayed with: +# +# firefox qa/workunits/erasure-code/bench.html +# +# Once it is confirmed to work, it can be run with a more significant +# volume of data so that the measures are more reliable: +# +# TOTAL_SIZE=$((4 * 1024 * 1024 * 1024)) \ +# CEPH_ERASURE_CODE_BENCHMARK=src/ceph_erasure_code_benchmark \ +# PLUGIN_DIRECTORY=build/lib \ +# qa/workunits/erasure-code/bench.sh fplot jerasure | +# tee qa/workunits/erasure-code/bench.js +# +set -e + +export PATH=/sbin:$PATH + +: ${VERBOSE:=false} +: ${CEPH_ERASURE_CODE_BENCHMARK:=ceph_erasure_code_benchmark} +: ${PLUGIN_DIRECTORY:=/usr/lib/ceph/erasure-code} +: ${PLUGINS:=isa jerasure} +: ${TECHNIQUES:=vandermonde cauchy} +: ${TOTAL_SIZE:=$((1024 * 1024))} +: ${SIZE:=4096} +: ${PARAMETERS:=--parameter jerasure-per-chunk-alignment=true} + +function bench_header() { + echo -e "seconds\tKB\tplugin\tk\tm\twork.\titer.\tsize\teras.\tcommand." +} + +function bench() { + local plugin=$1 + shift + local k=$1 + shift + local m=$1 + shift + local workload=$1 + shift + local iterations=$1 + shift + local size=$1 + shift + local erasures=$1 + shift + command=$(echo $CEPH_ERASURE_CODE_BENCHMARK \ + --plugin $plugin \ + --workload $workload \ + --iterations $iterations \ + --size $size \ + --erasures $erasures \ + --parameter k=$k \ + --parameter m=$m \ + --erasure-code-dir $PLUGIN_DIRECTORY) + result=$($command "$@") + echo -e "$result\t$plugin\t$k\t$m\t$workload\t$iterations\t$size\t$erasures\t$command ""$@" +} + +function packetsize() { + local k=$1 + local w=$2 + local vector_wordsize=$3 + local size=$4 + + local p=$(( ($size / $k / $w / $vector_wordsize ) * $vector_wordsize)) + if [ $p -gt 3100 ] ; then + p=3100 + fi + echo $p +} + +function bench_run() { + local plugin=jerasure + local w=8 + local VECTOR_WORDSIZE=16 + local ks="2 3 4 6 10" + declare -A k2ms + k2ms[2]="1" + k2ms[3]="2" + k2ms[4]="2 3" + k2ms[6]="2 3 4" + k2ms[10]="3 4" + local isa2technique_vandermonde='reed_sol_van' + local isa2technique_cauchy='cauchy' + local jerasure2technique_vandermonde='reed_sol_van' + local jerasure2technique_cauchy='cauchy_good' + for technique in ${TECHNIQUES} ; do + for plugin in ${PLUGINS} ; do + eval technique_parameter=\$${plugin}2technique_${technique} + echo "serie encode_${technique}_${plugin}" + for k in $ks ; do + for m in ${k2ms[$k]} ; do + bench $plugin $k $m encode $(($TOTAL_SIZE / $SIZE)) $SIZE 0 \ + --parameter packetsize=$(packetsize $k $w $VECTOR_WORDSIZE $SIZE) \ + ${PARAMETERS} \ + --parameter technique=$technique_parameter + + done + done + done + done + for technique in ${TECHNIQUES} ; do + for plugin in ${PLUGINS} ; do + eval technique_parameter=\$${plugin}2technique_${technique} + echo "serie decode_${technique}_${plugin}" + for k in $ks ; do + for m in ${k2ms[$k]} ; do + echo + for erasures in $(seq 1 $m) ; do + bench $plugin $k $m decode $(($TOTAL_SIZE / $SIZE)) $SIZE $erasures \ + --parameter packetsize=$(packetsize $k $w $VECTOR_WORDSIZE $SIZE) \ + ${PARAMETERS} \ + --parameter technique=$technique_parameter + done + done + done + done + done +} + +function fplot() { + local serie + bench_run | while read seconds total plugin k m workload iteration size erasures rest ; do + if [ -z $seconds ] ; then + echo null, + elif [ $seconds = serie ] ; then + if [ "$serie" ] ; then + echo '];' + fi + local serie=`echo $total | sed 's/cauchy_\([0-9]\)/cauchy_good_\1/g'` + echo "var $serie = [" + else + local x + if [ $workload = encode ] ; then + x=$k/$m + else + x=$k/$m/$erasures + fi + echo "[ '$x', " $(echo "( $total / 1024 / 1024 ) / $seconds" | bc -ql) " ], " + fi + done + echo '];' +} + +function main() { + bench_header + bench_run +} + +if [ "$1" = fplot ] ; then + "$@" +else + main +fi +# Local Variables: +# compile-command: "\ +# CEPH_ERASURE_CODE_BENCHMARK=../../../src/ceph_erasure_code_benchmark \ +# PLUGIN_DIRECTORY=../../../build/lib \ +# ./bench.sh +# " +# End: diff --git a/qa/workunits/erasure-code/encode-decode-non-regression.sh b/qa/workunits/erasure-code/encode-decode-non-regression.sh new file mode 100755 index 000000000..7f36c91c7 --- /dev/null +++ b/qa/workunits/erasure-code/encode-decode-non-regression.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2014 Red Hat <contact@redhat.com> +# +# Author: Loic Dachary <loic@dachary.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# +set -ex + +: ${CORPUS:=https://github.com/ceph/ceph-erasure-code-corpus.git} +: ${DIRECTORY:=$CEPH_ROOT/ceph-erasure-code-corpus} + +# when running from sources, the current directory must have precedence +export PATH=:$PATH + +if ! test -d $DIRECTORY ; then + git clone $CORPUS $DIRECTORY +fi + +my_version=v$(ceph --version | cut -f3 -d ' ') + +all_versions=$((ls -d $DIRECTORY/v* ; echo $DIRECTORY/$my_version ) | sort) + +for version in $all_versions ; do + if test -d $version ; then + $version/non-regression.sh + fi + if test $version = $DIRECTORY/$my_version ; then + break + fi +done diff --git a/qa/workunits/erasure-code/examples.css b/qa/workunits/erasure-code/examples.css new file mode 100644 index 000000000..ee4724778 --- /dev/null +++ b/qa/workunits/erasure-code/examples.css @@ -0,0 +1,97 @@ +* { padding: 0; margin: 0; vertical-align: top; } + +body { + background: url(background.png) repeat-x; + font: 18px/1.5em "proxima-nova", Helvetica, Arial, sans-serif; +} + +a { color: #069; } +a:hover { color: #28b; } + +h2 { + margin-top: 15px; + font: normal 32px "omnes-pro", Helvetica, Arial, sans-serif; +} + +h3 { + margin-left: 30px; + font: normal 26px "omnes-pro", Helvetica, Arial, sans-serif; + color: #666; +} + +p { + margin-top: 10px; +} + +button { + font-size: 18px; + padding: 1px 7px; +} + +input { + font-size: 18px; +} + +input[type=checkbox] { + margin: 7px; +} + +#header { + position: relative; + width: 900px; + margin: auto; +} + +#header h2 { + margin-left: 10px; + vertical-align: middle; + font-size: 42px; + font-weight: bold; + text-decoration: none; + color: #000; +} + +#content { + width: 880px; + margin: 0 auto; + padding: 10px; +} + +#footer { + margin-top: 25px; + margin-bottom: 10px; + text-align: center; + font-size: 12px; + color: #999; +} + +.demo-container { + box-sizing: border-box; + width: 850px; + height: 450px; + padding: 20px 15px 15px 15px; + margin: 15px auto 30px auto; + border: 1px solid #ddd; + background: #fff; + background: linear-gradient(#f6f6f6 0, #fff 50px); + background: -o-linear-gradient(#f6f6f6 0, #fff 50px); + background: -ms-linear-gradient(#f6f6f6 0, #fff 50px); + background: -moz-linear-gradient(#f6f6f6 0, #fff 50px); + background: -webkit-linear-gradient(#f6f6f6 0, #fff 50px); + box-shadow: 0 3px 10px rgba(0,0,0,0.15); + -o-box-shadow: 0 3px 10px rgba(0,0,0,0.1); + -ms-box-shadow: 0 3px 10px rgba(0,0,0,0.1); + -moz-box-shadow: 0 3px 10px rgba(0,0,0,0.1); + -webkit-box-shadow: 0 3px 10px rgba(0,0,0,0.1); +} + +.demo-placeholder { + width: 100%; + height: 100%; + font-size: 14px; + line-height: 1.2em; +} + +.legend table { + border-spacing: 5px; +}
\ No newline at end of file diff --git a/qa/workunits/erasure-code/jquery.flot.categories.js b/qa/workunits/erasure-code/jquery.flot.categories.js new file mode 100644 index 000000000..2f9b25797 --- /dev/null +++ b/qa/workunits/erasure-code/jquery.flot.categories.js @@ -0,0 +1,190 @@ +/* Flot plugin for plotting textual data or categories. + +Copyright (c) 2007-2014 IOLA and Ole Laursen. +Licensed under the MIT license. + +Consider a dataset like [["February", 34], ["March", 20], ...]. This plugin +allows you to plot such a dataset directly. + +To enable it, you must specify mode: "categories" on the axis with the textual +labels, e.g. + + $.plot("#placeholder", data, { xaxis: { mode: "categories" } }); + +By default, the labels are ordered as they are met in the data series. If you +need a different ordering, you can specify "categories" on the axis options +and list the categories there: + + xaxis: { + mode: "categories", + categories: ["February", "March", "April"] + } + +If you need to customize the distances between the categories, you can specify +"categories" as an object mapping labels to values + + xaxis: { + mode: "categories", + categories: { "February": 1, "March": 3, "April": 4 } + } + +If you don't specify all categories, the remaining categories will be numbered +from the max value plus 1 (with a spacing of 1 between each). + +Internally, the plugin works by transforming the input data through an auto- +generated mapping where the first category becomes 0, the second 1, etc. +Hence, a point like ["February", 34] becomes [0, 34] internally in Flot (this +is visible in hover and click events that return numbers rather than the +category labels). The plugin also overrides the tick generator to spit out the +categories as ticks instead of the values. + +If you need to map a value back to its label, the mapping is always accessible +as "categories" on the axis object, e.g. plot.getAxes().xaxis.categories. + +*/ + +(function ($) { + var options = { + xaxis: { + categories: null + }, + yaxis: { + categories: null + } + }; + + function processRawData(plot, series, data, datapoints) { + // if categories are enabled, we need to disable + // auto-transformation to numbers so the strings are intact + // for later processing + + var xCategories = series.xaxis.options.mode == "categories", + yCategories = series.yaxis.options.mode == "categories"; + + if (!(xCategories || yCategories)) + return; + + var format = datapoints.format; + + if (!format) { + // FIXME: auto-detection should really not be defined here + var s = series; + format = []; + format.push({ x: true, number: true, required: true }); + format.push({ y: true, number: true, required: true }); + + if (s.bars.show || (s.lines.show && s.lines.fill)) { + var autoscale = !!((s.bars.show && s.bars.zero) || (s.lines.show && s.lines.zero)); + format.push({ y: true, number: true, required: false, defaultValue: 0, autoscale: autoscale }); + if (s.bars.horizontal) { + delete format[format.length - 1].y; + format[format.length - 1].x = true; + } + } + + datapoints.format = format; + } + + for (var m = 0; m < format.length; ++m) { + if (format[m].x && xCategories) + format[m].number = false; + + if (format[m].y && yCategories) + format[m].number = false; + } + } + + function getNextIndex(categories) { + var index = -1; + + for (var v in categories) + if (categories[v] > index) + index = categories[v]; + + return index + 1; + } + + function categoriesTickGenerator(axis) { + var res = []; + for (var label in axis.categories) { + var v = axis.categories[label]; + if (v >= axis.min && v <= axis.max) + res.push([v, label]); + } + + res.sort(function (a, b) { return a[0] - b[0]; }); + + return res; + } + + function setupCategoriesForAxis(series, axis, datapoints) { + if (series[axis].options.mode != "categories") + return; + + if (!series[axis].categories) { + // parse options + var c = {}, o = series[axis].options.categories || {}; + if ($.isArray(o)) { + for (var i = 0; i < o.length; ++i) + c[o[i]] = i; + } + else { + for (var v in o) + c[v] = o[v]; + } + + series[axis].categories = c; + } + + // fix ticks + if (!series[axis].options.ticks) + series[axis].options.ticks = categoriesTickGenerator; + + transformPointsOnAxis(datapoints, axis, series[axis].categories); + } + + function transformPointsOnAxis(datapoints, axis, categories) { + // go through the points, transforming them + var points = datapoints.points, + ps = datapoints.pointsize, + format = datapoints.format, + formatColumn = axis.charAt(0), + index = getNextIndex(categories); + + for (var i = 0; i < points.length; i += ps) { + if (points[i] == null) + continue; + + for (var m = 0; m < ps; ++m) { + var val = points[i + m]; + + if (val == null || !format[m][formatColumn]) + continue; + + if (!(val in categories)) { + categories[val] = index; + ++index; + } + + points[i + m] = categories[val]; + } + } + } + + function processDatapoints(plot, series, datapoints) { + setupCategoriesForAxis(series, "xaxis", datapoints); + setupCategoriesForAxis(series, "yaxis", datapoints); + } + + function init(plot) { + plot.hooks.processRawData.push(processRawData); + plot.hooks.processDatapoints.push(processDatapoints); + } + + $.plot.plugins.push({ + init: init, + options: options, + name: 'categories', + version: '1.0' + }); +})(jQuery); diff --git a/qa/workunits/erasure-code/jquery.flot.js b/qa/workunits/erasure-code/jquery.flot.js new file mode 100644 index 000000000..39f3e4cf3 --- /dev/null +++ b/qa/workunits/erasure-code/jquery.flot.js @@ -0,0 +1,3168 @@ +/* Javascript plotting library for jQuery, version 0.8.3. + +Copyright (c) 2007-2014 IOLA and Ole Laursen. +Licensed under the MIT license. + +*/ + +// first an inline dependency, jquery.colorhelpers.js, we inline it here +// for convenience + +/* Plugin for jQuery for working with colors. + * + * Version 1.1. + * + * Inspiration from jQuery color animation plugin by John Resig. + * + * Released under the MIT license by Ole Laursen, October 2009. + * + * Examples: + * + * $.color.parse("#fff").scale('rgb', 0.25).add('a', -0.5).toString() + * var c = $.color.extract($("#mydiv"), 'background-color'); + * console.log(c.r, c.g, c.b, c.a); + * $.color.make(100, 50, 25, 0.4).toString() // returns "rgba(100,50,25,0.4)" + * + * Note that .scale() and .add() return the same modified object + * instead of making a new one. + * + * V. 1.1: Fix error handling so e.g. parsing an empty string does + * produce a color rather than just crashing. + */ +(function($){$.color={};$.color.make=function(r,g,b,a){var o={};o.r=r||0;o.g=g||0;o.b=b||0;o.a=a!=null?a:1;o.add=function(c,d){for(var i=0;i<c.length;++i)o[c.charAt(i)]+=d;return o.normalize()};o.scale=function(c,f){for(var i=0;i<c.length;++i)o[c.charAt(i)]*=f;return o.normalize()};o.toString=function(){if(o.a>=1){return"rgb("+[o.r,o.g,o.b].join(",")+")"}else{return"rgba("+[o.r,o.g,o.b,o.a].join(",")+")"}};o.normalize=function(){function clamp(min,value,max){return value<min?min:value>max?max:value}o.r=clamp(0,parseInt(o.r),255);o.g=clamp(0,parseInt(o.g),255);o.b=clamp(0,parseInt(o.b),255);o.a=clamp(0,o.a,1);return o};o.clone=function(){return $.color.make(o.r,o.b,o.g,o.a)};return o.normalize()};$.color.extract=function(elem,css){var c;do{c=elem.css(css).toLowerCase();if(c!=""&&c!="transparent")break;elem=elem.parent()}while(elem.length&&!$.nodeName(elem.get(0),"body"));if(c=="rgba(0, 0, 0, 0)")c="transparent";return $.color.parse(c)};$.color.parse=function(str){var res,m=$.color.make;if(res=/rgb\(\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*\)/.exec(str))return m(parseInt(res[1],10),parseInt(res[2],10),parseInt(res[3],10));if(res=/rgba\(\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*([0-9]+(?:\.[0-9]+)?)\s*\)/.exec(str))return m(parseInt(res[1],10),parseInt(res[2],10),parseInt(res[3],10),parseFloat(res[4]));if(res=/rgb\(\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\%\s*\)/.exec(str))return m(parseFloat(res[1])*2.55,parseFloat(res[2])*2.55,parseFloat(res[3])*2.55);if(res=/rgba\(\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\s*\)/.exec(str))return m(parseFloat(res[1])*2.55,parseFloat(res[2])*2.55,parseFloat(res[3])*2.55,parseFloat(res[4]));if(res=/#([a-fA-F0-9]{2})([a-fA-F0-9]{2})([a-fA-F0-9]{2})/.exec(str))return m(parseInt(res[1],16),parseInt(res[2],16),parseInt(res[3],16));if(res=/#([a-fA-F0-9])([a-fA-F0-9])([a-fA-F0-9])/.exec(str))return m(parseInt(res[1]+res[1],16),parseInt(res[2]+res[2],16),parseInt(res[3]+res[3],16));var name=$.trim(str).toLowerCase();if(name=="transparent")return m(255,255,255,0);else{res=lookupColors[name]||[0,0,0];return m(res[0],res[1],res[2])}};var lookupColors={aqua:[0,255,255],azure:[240,255,255],beige:[245,245,220],black:[0,0,0],blue:[0,0,255],brown:[165,42,42],cyan:[0,255,255],darkblue:[0,0,139],darkcyan:[0,139,139],darkgrey:[169,169,169],darkgreen:[0,100,0],darkkhaki:[189,183,107],darkmagenta:[139,0,139],darkolivegreen:[85,107,47],darkorange:[255,140,0],darkorchid:[153,50,204],darkred:[139,0,0],darksalmon:[233,150,122],darkviolet:[148,0,211],fuchsia:[255,0,255],gold:[255,215,0],green:[0,128,0],indigo:[75,0,130],khaki:[240,230,140],lightblue:[173,216,230],lightcyan:[224,255,255],lightgreen:[144,238,144],lightgrey:[211,211,211],lightpink:[255,182,193],lightyellow:[255,255,224],lime:[0,255,0],magenta:[255,0,255],maroon:[128,0,0],navy:[0,0,128],olive:[128,128,0],orange:[255,165,0],pink:[255,192,203],purple:[128,0,128],violet:[128,0,128],red:[255,0,0],silver:[192,192,192],white:[255,255,255],yellow:[255,255,0]}})(jQuery); + +// the actual Flot code +(function($) { + + // Cache the prototype hasOwnProperty for faster access + + var hasOwnProperty = Object.prototype.hasOwnProperty; + + // A shim to provide 'detach' to jQuery versions prior to 1.4. Using a DOM + // operation produces the same effect as detach, i.e. removing the element + // without touching its jQuery data. + + // Do not merge this into Flot 0.9, since it requires jQuery 1.4.4+. + + if (!$.fn.detach) { + $.fn.detach = function() { + return this.each(function() { + if (this.parentNode) { + this.parentNode.removeChild( this ); + } + }); + }; + } + + /////////////////////////////////////////////////////////////////////////// + // The Canvas object is a wrapper around an HTML5 <canvas> tag. + // + // @constructor + // @param {string} cls List of classes to apply to the canvas. + // @param {element} container Element onto which to append the canvas. + // + // Requiring a container is a little iffy, but unfortunately canvas + // operations don't work unless the canvas is attached to the DOM. + + function Canvas(cls, container) { + + var element = container.children("." + cls)[0]; + + if (element == null) { + + element = document.createElement("canvas"); + element.className = cls; + + $(element).css({ direction: "ltr", position: "absolute", left: 0, top: 0 }) + .appendTo(container); + + // If HTML5 Canvas isn't available, fall back to [Ex|Flash]canvas + + if (!element.getContext) { + if (window.G_vmlCanvasManager) { + element = window.G_vmlCanvasManager.initElement(element); + } else { + throw new Error("Canvas is not available. If you're using IE with a fall-back such as Excanvas, then there's either a mistake in your conditional include, or the page has no DOCTYPE and is rendering in Quirks Mode."); + } + } + } + + this.element = element; + + var context = this.context = element.getContext("2d"); + + // Determine the screen's ratio of physical to device-independent + // pixels. This is the ratio between the canvas width that the browser + // advertises and the number of pixels actually present in that space. + + // The iPhone 4, for example, has a device-independent width of 320px, + // but its screen is actually 640px wide. It therefore has a pixel + // ratio of 2, while most normal devices have a ratio of 1. + + var devicePixelRatio = window.devicePixelRatio || 1, + backingStoreRatio = + context.webkitBackingStorePixelRatio || + context.mozBackingStorePixelRatio || + context.msBackingStorePixelRatio || + context.oBackingStorePixelRatio || + context.backingStorePixelRatio || 1; + + this.pixelRatio = devicePixelRatio / backingStoreRatio; + + // Size the canvas to match the internal dimensions of its container + + this.resize(container.width(), container.height()); + + // Collection of HTML div layers for text overlaid onto the canvas + + this.textContainer = null; + this.text = {}; + + // Cache of text fragments and metrics, so we can avoid expensively + // re-calculating them when the plot is re-rendered in a loop. + + this._textCache = {}; + } + + // Resizes the canvas to the given dimensions. + // + // @param {number} width New width of the canvas, in pixels. + // @param {number} width New height of the canvas, in pixels. + + Canvas.prototype.resize = function(width, height) { + + if (width <= 0 || height <= 0) { + throw new Error("Invalid dimensions for plot, width = " + width + ", height = " + height); + } + + var element = this.element, + context = this.context, + pixelRatio = this.pixelRatio; + + // Resize the canvas, increasing its density based on the display's + // pixel ratio; basically giving it more pixels without increasing the + // size of its element, to take advantage of the fact that retina + // displays have that many more pixels in the same advertised space. + + // Resizing should reset the state (excanvas seems to be buggy though) + + if (this.width != width) { + element.width = width * pixelRatio; + element.style.width = width + "px"; + this.width = width; + } + + if (this.height != height) { + element.height = height * pixelRatio; + element.style.height = height + "px"; + this.height = height; + } + + // Save the context, so we can reset in case we get replotted. The + // restore ensure that we're really back at the initial state, and + // should be safe even if we haven't saved the initial state yet. + + context.restore(); + context.save(); + + // Scale the coordinate space to match the display density; so even though we + // may have twice as many pixels, we still want lines and other drawing to + // appear at the same size; the extra pixels will just make them crisper. + + context.scale(pixelRatio, pixelRatio); + }; + + // Clears the entire canvas area, not including any overlaid HTML text + + Canvas.prototype.clear = function() { + this.context.clearRect(0, 0, this.width, this.height); + }; + + // Finishes rendering the canvas, including managing the text overlay. + + Canvas.prototype.render = function() { + + var cache = this._textCache; + + // For each text layer, add elements marked as active that haven't + // already been rendered, and remove those that are no longer active. + + for (var layerKey in cache) { + if (hasOwnProperty.call(cache, layerKey)) { + + var layer = this.getTextLayer(layerKey), + layerCache = cache[layerKey]; + + layer.hide(); + + for (var styleKey in layerCache) { + if (hasOwnProperty.call(layerCache, styleKey)) { + var styleCache = layerCache[styleKey]; + for (var key in styleCache) { + if (hasOwnProperty.call(styleCache, key)) { + + var positions = styleCache[key].positions; + + for (var i = 0, position; position = positions[i]; i++) { + if (position.active) { + if (!position.rendered) { + layer.append(position.element); + position.rendered = true; + } + } else { + positions.splice(i--, 1); + if (position.rendered) { + position.element.detach(); + } + } + } + + if (positions.length == 0) { + delete styleCache[key]; + } + } + } + } + } + + layer.show(); + } + } + }; + + // Creates (if necessary) and returns the text overlay container. + // + // @param {string} classes String of space-separated CSS classes used to + // uniquely identify the text layer. + // @return {object} The jQuery-wrapped text-layer div. + + Canvas.prototype.getTextLayer = function(classes) { + + var layer = this.text[classes]; + + // Create the text layer if it doesn't exist + + if (layer == null) { + + // Create the text layer container, if it doesn't exist + + if (this.textContainer == null) { + this.textContainer = $("<div class='flot-text'></div>") + .css({ + position: "absolute", + top: 0, + left: 0, + bottom: 0, + right: 0, + 'font-size': "smaller", + color: "#545454" + }) + .insertAfter(this.element); + } + + layer = this.text[classes] = $("<div></div>") + .addClass(classes) + .css({ + position: "absolute", + top: 0, + left: 0, + bottom: 0, + right: 0 + }) + .appendTo(this.textContainer); + } + + return layer; + }; + + // Creates (if necessary) and returns a text info object. + // + // The object looks like this: + // + // { + // width: Width of the text's wrapper div. + // height: Height of the text's wrapper div. + // element: The jQuery-wrapped HTML div containing the text. + // positions: Array of positions at which this text is drawn. + // } + // + // The positions array contains objects that look like this: + // + // { + // active: Flag indicating whether the text should be visible. + // rendered: Flag indicating whether the text is currently visible. + // element: The jQuery-wrapped HTML div containing the text. + // x: X coordinate at which to draw the text. + // y: Y coordinate at which to draw the text. + // } + // + // Each position after the first receives a clone of the original element. + // + // The idea is that that the width, height, and general 'identity' of the + // text is constant no matter where it is placed; the placements are a + // secondary property. + // + // Canvas maintains a cache of recently-used text info objects; getTextInfo + // either returns the cached element or creates a new entry. + // + // @param {string} layer A string of space-separated CSS classes uniquely + // identifying the layer containing this text. + // @param {string} text Text string to retrieve info for. + // @param {(string|object)=} font Either a string of space-separated CSS + // classes or a font-spec object, defining the text's font and style. + // @param {number=} angle Angle at which to rotate the text, in degrees. + // Angle is currently unused, it will be implemented in the future. + // @param {number=} width Maximum width of the text before it wraps. + // @return {object} a text info object. + + Canvas.prototype.getTextInfo = function(layer, text, font, angle, width) { + + var textStyle, layerCache, styleCache, info; + + // Cast the value to a string, in case we were given a number or such + + text = "" + text; + + // If the font is a font-spec object, generate a CSS font definition + + if (typeof font === "object") { + textStyle = font.style + " " + font.variant + " " + font.weight + " " + font.size + "px/" + font.lineHeight + "px " + font.family; + } else { + textStyle = font; + } + + // Retrieve (or create) the cache for the text's layer and styles + + layerCache = this._textCache[layer]; + + if (layerCache == null) { + layerCache = this._textCache[layer] = {}; + } + + styleCache = layerCache[textStyle]; + + if (styleCache == null) { + styleCache = layerCache[textStyle] = {}; + } + + info = styleCache[text]; + + // If we can't find a matching element in our cache, create a new one + + if (info == null) { + + var element = $("<div></div>").html(text) + .css({ + position: "absolute", + 'max-width': width, + top: -9999 + }) + .appendTo(this.getTextLayer(layer)); + + if (typeof font === "object") { + element.css({ + font: textStyle, + color: font.color + }); + } else if (typeof font === "string") { + element.addClass(font); + } + + info = styleCache[text] = { + width: element.outerWidth(true), + height: element.outerHeight(true), + element: element, + positions: [] + }; + + element.detach(); + } + + return info; + }; + + // Adds a text string to the canvas text overlay. + // + // The text isn't drawn immediately; it is marked as rendering, which will + // result in its addition to the canvas on the next render pass. + // + // @param {string} layer A string of space-separated CSS classes uniquely + // identifying the layer containing this text. + // @param {number} x X coordinate at which to draw the text. + // @param {number} y Y coordinate at which to draw the text. + // @param {string} text Text string to draw. + // @param {(string|object)=} font Either a string of space-separated CSS + // classes or a font-spec object, defining the text's font and style. + // @param {number=} angle Angle at which to rotate the text, in degrees. + // Angle is currently unused, it will be implemented in the future. + // @param {number=} width Maximum width of the text before it wraps. + // @param {string=} halign Horizontal alignment of the text; either "left", + // "center" or "right". + // @param {string=} valign Vertical alignment of the text; either "top", + // "middle" or "bottom". + + Canvas.prototype.addText = function(layer, x, y, text, font, angle, width, halign, valign) { + + var info = this.getTextInfo(layer, text, font, angle, width), + positions = info.positions; + + // Tweak the div's position to match the text's alignment + + if (halign == "center") { + x -= info.width / 2; + } else if (halign == "right") { + x -= info.width; + } + + if (valign == "middle") { + y -= info.height / 2; + } else if (valign == "bottom") { + y -= info.height; + } + + // Determine whether this text already exists at this position. + // If so, mark it for inclusion in the next render pass. + + for (var i = 0, position; position = positions[i]; i++) { + if (position.x == x && position.y == y) { + position.active = true; + return; + } + } + + // If the text doesn't exist at this position, create a new entry + + // For the very first position we'll re-use the original element, + // while for subsequent ones we'll clone it. + + position = { + active: true, + rendered: false, + element: positions.length ? info.element.clone() : info.element, + x: x, + y: y + }; + + positions.push(position); + + // Move the element to its final position within the container + + position.element.css({ + top: Math.round(y), + left: Math.round(x), + 'text-align': halign // In case the text wraps + }); + }; + + // Removes one or more text strings from the canvas text overlay. + // + // If no parameters are given, all text within the layer is removed. + // + // Note that the text is not immediately removed; it is simply marked as + // inactive, which will result in its removal on the next render pass. + // This avoids the performance penalty for 'clear and redraw' behavior, + // where we potentially get rid of all text on a layer, but will likely + // add back most or all of it later, as when redrawing axes, for example. + // + // @param {string} layer A string of space-separated CSS classes uniquely + // identifying the layer containing this text. + // @param {number=} x X coordinate of the text. + // @param {number=} y Y coordinate of the text. + // @param {string=} text Text string to remove. + // @param {(string|object)=} font Either a string of space-separated CSS + // classes or a font-spec object, defining the text's font and style. + // @param {number=} angle Angle at which the text is rotated, in degrees. + // Angle is currently unused, it will be implemented in the future. + + Canvas.prototype.removeText = function(layer, x, y, text, font, angle) { + if (text == null) { + var layerCache = this._textCache[layer]; + if (layerCache != null) { + for (var styleKey in layerCache) { + if (hasOwnProperty.call(layerCache, styleKey)) { + var styleCache = layerCache[styleKey]; + for (var key in styleCache) { + if (hasOwnProperty.call(styleCache, key)) { + var positions = styleCache[key].positions; + for (var i = 0, position; position = positions[i]; i++) { + position.active = false; + } + } + } + } + } + } + } else { + var positions = this.getTextInfo(layer, text, font, angle).positions; + for (var i = 0, position; position = positions[i]; i++) { + if (position.x == x && position.y == y) { + position.active = false; + } + } + } + }; + + /////////////////////////////////////////////////////////////////////////// + // The top-level container for the entire plot. + + function Plot(placeholder, data_, options_, plugins) { + // data is on the form: + // [ series1, series2 ... ] + // where series is either just the data as [ [x1, y1], [x2, y2], ... ] + // or { data: [ [x1, y1], [x2, y2], ... ], label: "some label", ... } + + var series = [], + options = { + // the color theme used for graphs + colors: ["#edc240", "#afd8f8", "#cb4b4b", "#4da74d", "#9440ed"], + legend: { + show: true, + noColumns: 1, // number of colums in legend table + labelFormatter: null, // fn: string -> string + labelBoxBorderColor: "#ccc", // border color for the little label boxes + container: null, // container (as jQuery object) to put legend in, null means default on top of graph + position: "ne", // position of default legend container within plot + margin: 5, // distance from grid edge to default legend container within plot + backgroundColor: null, // null means auto-detect + backgroundOpacity: 0.85, // set to 0 to avoid background + sorted: null // default to no legend sorting + }, + xaxis: { + show: null, // null = auto-detect, true = always, false = never + position: "bottom", // or "top" + mode: null, // null or "time" + font: null, // null (derived from CSS in placeholder) or object like { size: 11, lineHeight: 13, style: "italic", weight: "bold", family: "sans-serif", variant: "small-caps" } + color: null, // base color, labels, ticks + tickColor: null, // possibly different color of ticks, e.g. "rgba(0,0,0,0.15)" + transform: null, // null or f: number -> number to transform axis + inverseTransform: null, // if transform is set, this should be the inverse function + min: null, // min. value to show, null means set automatically + max: null, // max. value to show, null means set automatically + autoscaleMargin: null, // margin in % to add if auto-setting min/max + ticks: null, // either [1, 3] or [[1, "a"], 3] or (fn: axis info -> ticks) or app. number of ticks for auto-ticks + tickFormatter: null, // fn: number -> string + labelWidth: null, // size of tick labels in pixels + labelHeight: null, + reserveSpace: null, // whether to reserve space even if axis isn't shown + tickLength: null, // size in pixels of ticks, or "full" for whole line + alignTicksWithAxis: null, // axis number or null for no sync + tickDecimals: null, // no. of decimals, null means auto + tickSize: null, // number or [number, "unit"] + minTickSize: null // number or [number, "unit"] + }, + yaxis: { + autoscaleMargin: 0.02, + position: "left" // or "right" + }, + xaxes: [], + yaxes: [], + series: { + points: { + show: false, + radius: 3, + lineWidth: 2, // in pixels + fill: true, + fillColor: "#ffffff", + symbol: "circle" // or callback + }, + lines: { + // we don't put in show: false so we can see + // whether lines were actively disabled + lineWidth: 2, // in pixels + fill: false, + fillColor: null, + steps: false + // Omit 'zero', so we can later default its value to + // match that of the 'fill' option. + }, + bars: { + show: false, + lineWidth: 2, // in pixels + barWidth: 1, // in units of the x axis + fill: true, + fillColor: null, + align: "left", // "left", "right", or "center" + horizontal: false, + zero: true + }, + shadowSize: 3, + highlightColor: null + }, + grid: { + show: true, + aboveData: false, + color: "#545454", // primary color used for outline and labels + backgroundColor: null, // null for transparent, else color + borderColor: null, // set if different from the grid color + tickColor: null, // color for the ticks, e.g. "rgba(0,0,0,0.15)" + margin: 0, // distance from the canvas edge to the grid + labelMargin: 5, // in pixels + axisMargin: 8, // in pixels + borderWidth: 2, // in pixels + minBorderMargin: null, // in pixels, null means taken from points radius + markings: null, // array of ranges or fn: axes -> array of ranges + markingsColor: "#f4f4f4", + markingsLineWidth: 2, + // interactive stuff + clickable: false, + hoverable: false, + autoHighlight: true, // highlight in case mouse is near + mouseActiveRadius: 10 // how far the mouse can be away to activate an item + }, + interaction: { + redrawOverlayInterval: 1000/60 // time between updates, -1 means in same flow + }, + hooks: {} + }, + surface = null, // the canvas for the plot itself + overlay = null, // canvas for interactive stuff on top of plot + eventHolder = null, // jQuery object that events should be bound to + ctx = null, octx = null, + xaxes = [], yaxes = [], + plotOffset = { left: 0, right: 0, top: 0, bottom: 0}, + plotWidth = 0, plotHeight = 0, + hooks = { + processOptions: [], + processRawData: [], + processDatapoints: [], + processOffset: [], + drawBackground: [], + drawSeries: [], + draw: [], + bindEvents: [], + drawOverlay: [], + shutdown: [] + }, + plot = this; + + // public functions + plot.setData = setData; + plot.setupGrid = setupGrid; + plot.draw = draw; + plot.getPlaceholder = function() { return placeholder; }; + plot.getCanvas = function() { return surface.element; }; + plot.getPlotOffset = function() { return plotOffset; }; + plot.width = function () { return plotWidth; }; + plot.height = function () { return plotHeight; }; + plot.offset = function () { + var o = eventHolder.offset(); + o.left += plotOffset.left; + o.top += plotOffset.top; + return o; + }; + plot.getData = function () { return series; }; + plot.getAxes = function () { + var res = {}, i; + $.each(xaxes.concat(yaxes), function (_, axis) { + if (axis) + res[axis.direction + (axis.n != 1 ? axis.n : "") + "axis"] = axis; + }); + return res; + }; + plot.getXAxes = function () { return xaxes; }; + plot.getYAxes = function () { return yaxes; }; + plot.c2p = canvasToAxisCoords; + plot.p2c = axisToCanvasCoords; + plot.getOptions = function () { return options; }; + plot.highlight = highlight; + plot.unhighlight = unhighlight; + plot.triggerRedrawOverlay = triggerRedrawOverlay; + plot.pointOffset = function(point) { + return { + left: parseInt(xaxes[axisNumber(point, "x") - 1].p2c(+point.x) + plotOffset.left, 10), + top: parseInt(yaxes[axisNumber(point, "y") - 1].p2c(+point.y) + plotOffset.top, 10) + }; + }; + plot.shutdown = shutdown; + plot.destroy = function () { + shutdown(); + placeholder.removeData("plot").empty(); + + series = []; + options = null; + surface = null; + overlay = null; + eventHolder = null; + ctx = null; + octx = null; + xaxes = []; + yaxes = []; + hooks = null; + highlights = []; + plot = null; + }; + plot.resize = function () { + var width = placeholder.width(), + height = placeholder.height(); + surface.resize(width, height); + overlay.resize(width, height); + }; + + // public attributes + plot.hooks = hooks; + + // initialize + initPlugins(plot); + parseOptions(options_); + setupCanvases(); + setData(data_); + setupGrid(); + draw(); + bindEvents(); + + + function executeHooks(hook, args) { + args = [plot].concat(args); + for (var i = 0; i < hook.length; ++i) + hook[i].apply(this, args); + } + + function initPlugins() { + + // References to key classes, allowing plugins to modify them + + var classes = { + Canvas: Canvas + }; + + for (var i = 0; i < plugins.length; ++i) { + var p = plugins[i]; + p.init(plot, classes); + if (p.options) + $.extend(true, options, p.options); + } + } + + function parseOptions(opts) { + + $.extend(true, options, opts); + + // $.extend merges arrays, rather than replacing them. When less + // colors are provided than the size of the default palette, we + // end up with those colors plus the remaining defaults, which is + // not expected behavior; avoid it by replacing them here. + + if (opts && opts.colors) { + options.colors = opts.colors; + } + + if (options.xaxis.color == null) + options.xaxis.color = $.color.parse(options.grid.color).scale('a', 0.22).toString(); + if (options.yaxis.color == null) + options.yaxis.color = $.color.parse(options.grid.color).scale('a', 0.22).toString(); + + if (options.xaxis.tickColor == null) // grid.tickColor for back-compatibility + options.xaxis.tickColor = options.grid.tickColor || options.xaxis.color; + if (options.yaxis.tickColor == null) // grid.tickColor for back-compatibility + options.yaxis.tickColor = options.grid.tickColor || options.yaxis.color; + + if (options.grid.borderColor == null) + options.grid.borderColor = options.grid.color; + if (options.grid.tickColor == null) + options.grid.tickColor = $.color.parse(options.grid.color).scale('a', 0.22).toString(); + + // Fill in defaults for axis options, including any unspecified + // font-spec fields, if a font-spec was provided. + + // If no x/y axis options were provided, create one of each anyway, + // since the rest of the code assumes that they exist. + + var i, axisOptions, axisCount, + fontSize = placeholder.css("font-size"), + fontSizeDefault = fontSize ? +fontSize.replace("px", "") : 13, + fontDefaults = { + style: placeholder.css("font-style"), + size: Math.round(0.8 * fontSizeDefault), + variant: placeholder.css("font-variant"), + weight: placeholder.css("font-weight"), + family: placeholder.css("font-family") + }; + + axisCount = options.xaxes.length || 1; + for (i = 0; i < axisCount; ++i) { + + axisOptions = options.xaxes[i]; + if (axisOptions && !axisOptions.tickColor) { + axisOptions.tickColor = axisOptions.color; + } + + axisOptions = $.extend(true, {}, options.xaxis, axisOptions); + options.xaxes[i] = axisOptions; + + if (axisOptions.font) { + axisOptions.font = $.extend({}, fontDefaults, axisOptions.font); + if (!axisOptions.font.color) { + axisOptions.font.color = axisOptions.color; + } + if (!axisOptions.font.lineHeight) { + axisOptions.font.lineHeight = Math.round(axisOptions.font.size * 1.15); + } + } + } + + axisCount = options.yaxes.length || 1; + for (i = 0; i < axisCount; ++i) { + + axisOptions = options.yaxes[i]; + if (axisOptions && !axisOptions.tickColor) { + axisOptions.tickColor = axisOptions.color; + } + + axisOptions = $.extend(true, {}, options.yaxis, axisOptions); + options.yaxes[i] = axisOptions; + + if (axisOptions.font) { + axisOptions.font = $.extend({}, fontDefaults, axisOptions.font); + if (!axisOptions.font.color) { + axisOptions.font.color = axisOptions.color; + } + if (!axisOptions.font.lineHeight) { + axisOptions.font.lineHeight = Math.round(axisOptions.font.size * 1.15); + } + } + } + + // backwards compatibility, to be removed in future + if (options.xaxis.noTicks && options.xaxis.ticks == null) + options.xaxis.ticks = options.xaxis.noTicks; + if (options.yaxis.noTicks && options.yaxis.ticks == null) + options.yaxis.ticks = options.yaxis.noTicks; + if (options.x2axis) { + options.xaxes[1] = $.extend(true, {}, options.xaxis, options.x2axis); + options.xaxes[1].position = "top"; + // Override the inherit to allow the axis to auto-scale + if (options.x2axis.min == null) { + options.xaxes[1].min = null; + } + if (options.x2axis.max == null) { + options.xaxes[1].max = null; + } + } + if (options.y2axis) { + options.yaxes[1] = $.extend(true, {}, options.yaxis, options.y2axis); + options.yaxes[1].position = "right"; + // Override the inherit to allow the axis to auto-scale + if (options.y2axis.min == null) { + options.yaxes[1].min = null; + } + if (options.y2axis.max == null) { + options.yaxes[1].max = null; + } + } + if (options.grid.coloredAreas) + options.grid.markings = options.grid.coloredAreas; + if (options.grid.coloredAreasColor) + options.grid.markingsColor = options.grid.coloredAreasColor; + if (options.lines) + $.extend(true, options.series.lines, options.lines); + if (options.points) + $.extend(true, options.series.points, options.points); + if (options.bars) + $.extend(true, options.series.bars, options.bars); + if (options.shadowSize != null) + options.series.shadowSize = options.shadowSize; + if (options.highlightColor != null) + options.series.highlightColor = options.highlightColor; + + // save options on axes for future reference + for (i = 0; i < options.xaxes.length; ++i) + getOrCreateAxis(xaxes, i + 1).options = options.xaxes[i]; + for (i = 0; i < options.yaxes.length; ++i) + getOrCreateAxis(yaxes, i + 1).options = options.yaxes[i]; + + // add hooks from options + for (var n in hooks) + if (options.hooks[n] && options.hooks[n].length) + hooks[n] = hooks[n].concat(options.hooks[n]); + + executeHooks(hooks.processOptions, [options]); + } + + function setData(d) { + series = parseData(d); + fillInSeriesOptions(); + processData(); + } + + function parseData(d) { + var res = []; + for (var i = 0; i < d.length; ++i) { + var s = $.extend(true, {}, options.series); + + if (d[i].data != null) { + s.data = d[i].data; // move the data instead of deep-copy + delete d[i].data; + + $.extend(true, s, d[i]); + + d[i].data = s.data; + } + else + s.data = d[i]; + res.push(s); + } + + return res; + } + + function axisNumber(obj, coord) { + var a = obj[coord + "axis"]; + if (typeof a == "object") // if we got a real axis, extract number + a = a.n; + if (typeof a != "number") + a = 1; // default to first axis + return a; + } + + function allAxes() { + // return flat array without annoying null entries + return $.grep(xaxes.concat(yaxes), function (a) { return a; }); + } + + function canvasToAxisCoords(pos) { + // return an object with x/y corresponding to all used axes + var res = {}, i, axis; + for (i = 0; i < xaxes.length; ++i) { + axis = xaxes[i]; + if (axis && axis.used) + res["x" + axis.n] = axis.c2p(pos.left); + } + + for (i = 0; i < yaxes.length; ++i) { + axis = yaxes[i]; + if (axis && axis.used) + res["y" + axis.n] = axis.c2p(pos.top); + } + + if (res.x1 !== undefined) + res.x = res.x1; + if (res.y1 !== undefined) + res.y = res.y1; + + return res; + } + + function axisToCanvasCoords(pos) { + // get canvas coords from the first pair of x/y found in pos + var res = {}, i, axis, key; + + for (i = 0; i < xaxes.length; ++i) { + axis = xaxes[i]; + if (axis && axis.used) { + key = "x" + axis.n; + if (pos[key] == null && axis.n == 1) + key = "x"; + + if (pos[key] != null) { + res.left = axis.p2c(pos[key]); + break; + } + } + } + + for (i = 0; i < yaxes.length; ++i) { + axis = yaxes[i]; + if (axis && axis.used) { + key = "y" + axis.n; + if (pos[key] == null && axis.n == 1) + key = "y"; + + if (pos[key] != null) { + res.top = axis.p2c(pos[key]); + break; + } + } + } + + return res; + } + + function getOrCreateAxis(axes, number) { + if (!axes[number - 1]) + axes[number - 1] = { + n: number, // save the number for future reference + direction: axes == xaxes ? "x" : "y", + options: $.extend(true, {}, axes == xaxes ? options.xaxis : options.yaxis) + }; + + return axes[number - 1]; + } + + function fillInSeriesOptions() { + + var neededColors = series.length, maxIndex = -1, i; + + // Subtract the number of series that already have fixed colors or + // color indexes from the number that we still need to generate. + + for (i = 0; i < series.length; ++i) { + var sc = series[i].color; + if (sc != null) { + neededColors--; + if (typeof sc == "number" && sc > maxIndex) { + maxIndex = sc; + } + } + } + + // If any of the series have fixed color indexes, then we need to + // generate at least as many colors as the highest index. + + if (neededColors <= maxIndex) { + neededColors = maxIndex + 1; + } + + // Generate all the colors, using first the option colors and then + // variations on those colors once they're exhausted. + + var c, colors = [], colorPool = options.colors, + colorPoolSize = colorPool.length, variation = 0; + + for (i = 0; i < neededColors; i++) { + + c = $.color.parse(colorPool[i % colorPoolSize] || "#666"); + + // Each time we exhaust the colors in the pool we adjust + // a scaling factor used to produce more variations on + // those colors. The factor alternates negative/positive + // to produce lighter/darker colors. + + // Reset the variation after every few cycles, or else + // it will end up producing only white or black colors. + + if (i % colorPoolSize == 0 && i) { + if (variation >= 0) { + if (variation < 0.5) { + variation = -variation - 0.2; + } else variation = 0; + } else variation = -variation; + } + + colors[i] = c.scale('rgb', 1 + variation); + } + + // Finalize the series options, filling in their colors + + var colori = 0, s; + for (i = 0; i < series.length; ++i) { + s = series[i]; + + // assign colors + if (s.color == null) { + s.color = colors[colori].toString(); + ++colori; + } + else if (typeof s.color == "number") + s.color = colors[s.color].toString(); + + // turn on lines automatically in case nothing is set + if (s.lines.show == null) { + var v, show = true; + for (v in s) + if (s[v] && s[v].show) { + show = false; + break; + } + if (show) + s.lines.show = true; + } + + // If nothing was provided for lines.zero, default it to match + // lines.fill, since areas by default should extend to zero. + + if (s.lines.zero == null) { + s.lines.zero = !!s.lines.fill; + } + + // setup axes + s.xaxis = getOrCreateAxis(xaxes, axisNumber(s, "x")); + s.yaxis = getOrCreateAxis(yaxes, axisNumber(s, "y")); + } + } + + function processData() { + var topSentry = Number.POSITIVE_INFINITY, + bottomSentry = Number.NEGATIVE_INFINITY, + fakeInfinity = Number.MAX_VALUE, + i, j, k, m, length, + s, points, ps, x, y, axis, val, f, p, + data, format; + + function updateAxis(axis, min, max) { + if (min < axis.datamin && min != -fakeInfinity) + axis.datamin = min; + if (max > axis.datamax && max != fakeInfinity) + axis.datamax = max; + } + + $.each(allAxes(), function (_, axis) { + // init axis + axis.datamin = topSentry; + axis.datamax = bottomSentry; + axis.used = false; + }); + + for (i = 0; i < series.length; ++i) { + s = series[i]; + s.datapoints = { points: [] }; + + executeHooks(hooks.processRawData, [ s, s.data, s.datapoints ]); + } + + // first pass: clean and copy data + for (i = 0; i < series.length; ++i) { + s = series[i]; + + data = s.data; + format = s.datapoints.format; + + if (!format) { + format = []; + // find out how to copy + format.push({ x: true, number: true, required: true }); + format.push({ y: true, number: true, required: true }); + + if (s.bars.show || (s.lines.show && s.lines.fill)) { + var autoscale = !!((s.bars.show && s.bars.zero) || (s.lines.show && s.lines.zero)); + format.push({ y: true, number: true, required: false, defaultValue: 0, autoscale: autoscale }); + if (s.bars.horizontal) { + delete format[format.length - 1].y; + format[format.length - 1].x = true; + } + } + + s.datapoints.format = format; + } + + if (s.datapoints.pointsize != null) + continue; // already filled in + + s.datapoints.pointsize = format.length; + + ps = s.datapoints.pointsize; + points = s.datapoints.points; + + var insertSteps = s.lines.show && s.lines.steps; + s.xaxis.used = s.yaxis.used = true; + + for (j = k = 0; j < data.length; ++j, k += ps) { + p = data[j]; + + var nullify = p == null; + if (!nullify) { + for (m = 0; m < ps; ++m) { + val = p[m]; + f = format[m]; + + if (f) { + if (f.number && val != null) { + val = +val; // convert to number + if (isNaN(val)) + val = null; + else if (val == Infinity) + val = fakeInfinity; + else if (val == -Infinity) + val = -fakeInfinity; + } + + if (val == null) { + if (f.required) + nullify = true; + + if (f.defaultValue != null) + val = f.defaultValue; + } + } + + points[k + m] = val; + } + } + + if (nullify) { + for (m = 0; m < ps; ++m) { + val = points[k + m]; + if (val != null) { + f = format[m]; + // extract min/max info + if (f.autoscale !== false) { + if (f.x) { + updateAxis(s.xaxis, val, val); + } + if (f.y) { + updateAxis(s.yaxis, val, val); + } + } + } + points[k + m] = null; + } + } + else { + // a little bit of line specific stuff that + // perhaps shouldn't be here, but lacking + // better means... + if (insertSteps && k > 0 + && points[k - ps] != null + && points[k - ps] != points[k] + && points[k - ps + 1] != points[k + 1]) { + // copy the point to make room for a middle point + for (m = 0; m < ps; ++m) + points[k + ps + m] = points[k + m]; + + // middle point has same y + points[k + 1] = points[k - ps + 1]; + + // we've added a point, better reflect that + k += ps; + } + } + } + } + + // give the hooks a chance to run + for (i = 0; i < series.length; ++i) { + s = series[i]; + + executeHooks(hooks.processDatapoints, [ s, s.datapoints]); + } + + // second pass: find datamax/datamin for auto-scaling + for (i = 0; i < series.length; ++i) { + s = series[i]; + points = s.datapoints.points; + ps = s.datapoints.pointsize; + format = s.datapoints.format; + + var xmin = topSentry, ymin = topSentry, + xmax = bottomSentry, ymax = bottomSentry; + + for (j = 0; j < points.length; j += ps) { + if (points[j] == null) + continue; + + for (m = 0; m < ps; ++m) { + val = points[j + m]; + f = format[m]; + if (!f || f.autoscale === false || val == fakeInfinity || val == -fakeInfinity) + continue; + + if (f.x) { + if (val < xmin) + xmin = val; + if (val > xmax) + xmax = val; + } + if (f.y) { + if (val < ymin) + ymin = val; + if (val > ymax) + ymax = val; + } + } + } + + if (s.bars.show) { + // make sure we got room for the bar on the dancing floor + var delta; + + switch (s.bars.align) { + case "left": + delta = 0; + break; + case "right": + delta = -s.bars.barWidth; + break; + default: + delta = -s.bars.barWidth / 2; + } + + if (s.bars.horizontal) { + ymin += delta; + ymax += delta + s.bars.barWidth; + } + else { + xmin += delta; + xmax += delta + s.bars.barWidth; + } + } + + updateAxis(s.xaxis, xmin, xmax); + updateAxis(s.yaxis, ymin, ymax); + } + + $.each(allAxes(), function (_, axis) { + if (axis.datamin == topSentry) + axis.datamin = null; + if (axis.datamax == bottomSentry) + axis.datamax = null; + }); + } + + function setupCanvases() { + + // Make sure the placeholder is clear of everything except canvases + // from a previous plot in this container that we'll try to re-use. + + placeholder.css("padding", 0) // padding messes up the positioning + .children().filter(function(){ + return !$(this).hasClass("flot-overlay") && !$(this).hasClass('flot-base'); + }).remove(); + + if (placeholder.css("position") == 'static') + placeholder.css("position", "relative"); // for positioning labels and overlay + + surface = new Canvas("flot-base", placeholder); + overlay = new Canvas("flot-overlay", placeholder); // overlay canvas for interactive features + + ctx = surface.context; + octx = overlay.context; + + // define which element we're listening for events on + eventHolder = $(overlay.element).unbind(); + + // If we're re-using a plot object, shut down the old one + + var existing = placeholder.data("plot"); + + if (existing) { + existing.shutdown(); + overlay.clear(); + } + + // save in case we get replotted + placeholder.data("plot", plot); + } + + function bindEvents() { + // bind events + if (options.grid.hoverable) { + eventHolder.mousemove(onMouseMove); + + // Use bind, rather than .mouseleave, because we officially + // still support jQuery 1.2.6, which doesn't define a shortcut + // for mouseenter or mouseleave. This was a bug/oversight that + // was fixed somewhere around 1.3.x. We can return to using + // .mouseleave when we drop support for 1.2.6. + + eventHolder.bind("mouseleave", onMouseLeave); + } + + if (options.grid.clickable) + eventHolder.click(onClick); + + executeHooks(hooks.bindEvents, [eventHolder]); + } + + function shutdown() { + if (redrawTimeout) + clearTimeout(redrawTimeout); + + eventHolder.unbind("mousemove", onMouseMove); + eventHolder.unbind("mouseleave", onMouseLeave); + eventHolder.unbind("click", onClick); + + executeHooks(hooks.shutdown, [eventHolder]); + } + + function setTransformationHelpers(axis) { + // set helper functions on the axis, assumes plot area + // has been computed already + + function identity(x) { return x; } + + var s, m, t = axis.options.transform || identity, + it = axis.options.inverseTransform; + + // precompute how much the axis is scaling a point + // in canvas space + if (axis.direction == "x") { + s = axis.scale = plotWidth / Math.abs(t(axis.max) - t(axis.min)); + m = Math.min(t(axis.max), t(axis.min)); + } + else { + s = axis.scale = plotHeight / Math.abs(t(axis.max) - t(axis.min)); + s = -s; + m = Math.max(t(axis.max), t(axis.min)); + } + + // data point to canvas coordinate + if (t == identity) // slight optimization + axis.p2c = function (p) { return (p - m) * s; }; + else + axis.p2c = function (p) { return (t(p) - m) * s; }; + // canvas coordinate to data point + if (!it) + axis.c2p = function (c) { return m + c / s; }; + else + axis.c2p = function (c) { return it(m + c / s); }; + } + + function measureTickLabels(axis) { + + var opts = axis.options, + ticks = axis.ticks || [], + labelWidth = opts.labelWidth || 0, + labelHeight = opts.labelHeight || 0, + maxWidth = labelWidth || (axis.direction == "x" ? Math.floor(surface.width / (ticks.length || 1)) : null), + legacyStyles = axis.direction + "Axis " + axis.direction + axis.n + "Axis", + layer = "flot-" + axis.direction + "-axis flot-" + axis.direction + axis.n + "-axis " + legacyStyles, + font = opts.font || "flot-tick-label tickLabel"; + + for (var i = 0; i < ticks.length; ++i) { + + var t = ticks[i]; + + if (!t.label) + continue; + + var info = surface.getTextInfo(layer, t.label, font, null, maxWidth); + + labelWidth = Math.max(labelWidth, info.width); + labelHeight = Math.max(labelHeight, info.height); + } + + axis.labelWidth = opts.labelWidth || labelWidth; + axis.labelHeight = opts.labelHeight || labelHeight; + } + + function allocateAxisBoxFirstPhase(axis) { + // find the bounding box of the axis by looking at label + // widths/heights and ticks, make room by diminishing the + // plotOffset; this first phase only looks at one + // dimension per axis, the other dimension depends on the + // other axes so will have to wait + + var lw = axis.labelWidth, + lh = axis.labelHeight, + pos = axis.options.position, + isXAxis = axis.direction === "x", + tickLength = axis.options.tickLength, + axisMargin = options.grid.axisMargin, + padding = options.grid.labelMargin, + innermost = true, + outermost = true, + first = true, + found = false; + + // Determine the axis's position in its direction and on its side + + $.each(isXAxis ? xaxes : yaxes, function(i, a) { + if (a && (a.show || a.reserveSpace)) { + if (a === axis) { + found = true; + } else if (a.options.position === pos) { + if (found) { + outermost = false; + } else { + innermost = false; + } + } + if (!found) { + first = false; + } + } + }); + + // The outermost axis on each side has no margin + + if (outermost) { + axisMargin = 0; + } + + // The ticks for the first axis in each direction stretch across + + if (tickLength == null) { + tickLength = first ? "full" : 5; + } + + if (!isNaN(+tickLength)) + padding += +tickLength; + + if (isXAxis) { + lh += padding; + + if (pos == "bottom") { + plotOffset.bottom += lh + axisMargin; + axis.box = { top: surface.height - plotOffset.bottom, height: lh }; + } + else { + axis.box = { top: plotOffset.top + axisMargin, height: lh }; + plotOffset.top += lh + axisMargin; + } + } + else { + lw += padding; + + if (pos == "left") { + axis.box = { left: plotOffset.left + axisMargin, width: lw }; + plotOffset.left += lw + axisMargin; + } + else { + plotOffset.right += lw + axisMargin; + axis.box = { left: surface.width - plotOffset.right, width: lw }; + } + } + + // save for future reference + axis.position = pos; + axis.tickLength = tickLength; + axis.box.padding = padding; + axis.innermost = innermost; + } + + function allocateAxisBoxSecondPhase(axis) { + // now that all axis boxes have been placed in one + // dimension, we can set the remaining dimension coordinates + if (axis.direction == "x") { + axis.box.left = plotOffset.left - axis.labelWidth / 2; + axis.box.width = surface.width - plotOffset.left - plotOffset.right + axis.labelWidth; + } + else { + axis.box.top = plotOffset.top - axis.labelHeight / 2; + axis.box.height = surface.height - plotOffset.bottom - plotOffset.top + axis.labelHeight; + } + } + + function adjustLayoutForThingsStickingOut() { + // possibly adjust plot offset to ensure everything stays + // inside the canvas and isn't clipped off + + var minMargin = options.grid.minBorderMargin, + axis, i; + + // check stuff from the plot (FIXME: this should just read + // a value from the series, otherwise it's impossible to + // customize) + if (minMargin == null) { + minMargin = 0; + for (i = 0; i < series.length; ++i) + minMargin = Math.max(minMargin, 2 * (series[i].points.radius + series[i].points.lineWidth/2)); + } + + var margins = { + left: minMargin, + right: minMargin, + top: minMargin, + bottom: minMargin + }; + + // check axis labels, note we don't check the actual + // labels but instead use the overall width/height to not + // jump as much around with replots + $.each(allAxes(), function (_, axis) { + if (axis.reserveSpace && axis.ticks && axis.ticks.length) { + if (axis.direction === "x") { + margins.left = Math.max(margins.left, axis.labelWidth / 2); + margins.right = Math.max(margins.right, axis.labelWidth / 2); + } else { + margins.bottom = Math.max(margins.bottom, axis.labelHeight / 2); + margins.top = Math.max(margins.top, axis.labelHeight / 2); + } + } + }); + + plotOffset.left = Math.ceil(Math.max(margins.left, plotOffset.left)); + plotOffset.right = Math.ceil(Math.max(margins.right, plotOffset.right)); + plotOffset.top = Math.ceil(Math.max(margins.top, plotOffset.top)); + plotOffset.bottom = Math.ceil(Math.max(margins.bottom, plotOffset.bottom)); + } + + function setupGrid() { + var i, axes = allAxes(), showGrid = options.grid.show; + + // Initialize the plot's offset from the edge of the canvas + + for (var a in plotOffset) { + var margin = options.grid.margin || 0; + plotOffset[a] = typeof margin == "number" ? margin : margin[a] || 0; + } + + executeHooks(hooks.processOffset, [plotOffset]); + + // If the grid is visible, add its border width to the offset + + for (var a in plotOffset) { + if(typeof(options.grid.borderWidth) == "object") { + plotOffset[a] += showGrid ? options.grid.borderWidth[a] : 0; + } + else { + plotOffset[a] += showGrid ? options.grid.borderWidth : 0; + } + } + + $.each(axes, function (_, axis) { + var axisOpts = axis.options; + axis.show = axisOpts.show == null ? axis.used : axisOpts.show; + axis.reserveSpace = axisOpts.reserveSpace == null ? axis.show : axisOpts.reserveSpace; + setRange(axis); + }); + + if (showGrid) { + + var allocatedAxes = $.grep(axes, function (axis) { + return axis.show || axis.reserveSpace; + }); + + $.each(allocatedAxes, function (_, axis) { + // make the ticks + setupTickGeneration(axis); + setTicks(axis); + snapRangeToTicks(axis, axis.ticks); + // find labelWidth/Height for axis + measureTickLabels(axis); + }); + + // with all dimensions calculated, we can compute the + // axis bounding boxes, start from the outside + // (reverse order) + for (i = allocatedAxes.length - 1; i >= 0; --i) + allocateAxisBoxFirstPhase(allocatedAxes[i]); + + // make sure we've got enough space for things that + // might stick out + adjustLayoutForThingsStickingOut(); + + $.each(allocatedAxes, function (_, axis) { + allocateAxisBoxSecondPhase(axis); + }); + } + + plotWidth = surface.width - plotOffset.left - plotOffset.right; + plotHeight = surface.height - plotOffset.bottom - plotOffset.top; + + // now we got the proper plot dimensions, we can compute the scaling + $.each(axes, function (_, axis) { + setTransformationHelpers(axis); + }); + + if (showGrid) { + drawAxisLabels(); + } + + insertLegend(); + } + + function setRange(axis) { + var opts = axis.options, + min = +(opts.min != null ? opts.min : axis.datamin), + max = +(opts.max != null ? opts.max : axis.datamax), + delta = max - min; + + if (delta == 0.0) { + // degenerate case + var widen = max == 0 ? 1 : 0.01; + + if (opts.min == null) + min -= widen; + // always widen max if we couldn't widen min to ensure we + // don't fall into min == max which doesn't work + if (opts.max == null || opts.min != null) + max += widen; + } + else { + // consider autoscaling + var margin = opts.autoscaleMargin; + if (margin != null) { + if (opts.min == null) { + min -= delta * margin; + // make sure we don't go below zero if all values + // are positive + if (min < 0 && axis.datamin != null && axis.datamin >= 0) + min = 0; + } + if (opts.max == null) { + max += delta * margin; + if (max > 0 && axis.datamax != null && axis.datamax <= 0) + max = 0; + } + } + } + axis.min = min; + axis.max = max; + } + + function setupTickGeneration(axis) { + var opts = axis.options; + + // estimate number of ticks + var noTicks; + if (typeof opts.ticks == "number" && opts.ticks > 0) + noTicks = opts.ticks; + else + // heuristic based on the model a*sqrt(x) fitted to + // some data points that seemed reasonable + noTicks = 0.3 * Math.sqrt(axis.direction == "x" ? surface.width : surface.height); + + var delta = (axis.max - axis.min) / noTicks, + dec = -Math.floor(Math.log(delta) / Math.LN10), + maxDec = opts.tickDecimals; + + if (maxDec != null && dec > maxDec) { + dec = maxDec; + } + + var magn = Math.pow(10, -dec), + norm = delta / magn, // norm is between 1.0 and 10.0 + size; + + if (norm < 1.5) { + size = 1; + } else if (norm < 3) { + size = 2; + // special case for 2.5, requires an extra decimal + if (norm > 2.25 && (maxDec == null || dec + 1 <= maxDec)) { + size = 2.5; + ++dec; + } + } else if (norm < 7.5) { + size = 5; + } else { + size = 10; + } + + size *= magn; + + if (opts.minTickSize != null && size < opts.minTickSize) { + size = opts.minTickSize; + } + + axis.delta = delta; + axis.tickDecimals = Math.max(0, maxDec != null ? maxDec : dec); + axis.tickSize = opts.tickSize || size; + + // Time mode was moved to a plug-in in 0.8, and since so many people use it + // we'll add an especially friendly reminder to make sure they included it. + + if (opts.mode == "time" && !axis.tickGenerator) { + throw new Error("Time mode requires the flot.time plugin."); + } + + // Flot supports base-10 axes; any other mode else is handled by a plug-in, + // like flot.time.js. + + if (!axis.tickGenerator) { + + axis.tickGenerator = function (axis) { + + var ticks = [], + start = floorInBase(axis.min, axis.tickSize), + i = 0, + v = Number.NaN, + prev; + + do { + prev = v; + v = start + i * axis.tickSize; + ticks.push(v); + ++i; + } while (v < axis.max && v != prev); + return ticks; + }; + + axis.tickFormatter = function (value, axis) { + + var factor = axis.tickDecimals ? Math.pow(10, axis.tickDecimals) : 1; + var formatted = "" + Math.round(value * factor) / factor; + + // If tickDecimals was specified, ensure that we have exactly that + // much precision; otherwise default to the value's own precision. + + if (axis.tickDecimals != null) { + var decimal = formatted.indexOf("."); + var precision = decimal == -1 ? 0 : formatted.length - decimal - 1; + if (precision < axis.tickDecimals) { + return (precision ? formatted : formatted + ".") + ("" + factor).substr(1, axis.tickDecimals - precision); + } + } + + return formatted; + }; + } + + if ($.isFunction(opts.tickFormatter)) + axis.tickFormatter = function (v, axis) { return "" + opts.tickFormatter(v, axis); }; + + if (opts.alignTicksWithAxis != null) { + var otherAxis = (axis.direction == "x" ? xaxes : yaxes)[opts.alignTicksWithAxis - 1]; + if (otherAxis && otherAxis.used && otherAxis != axis) { + // consider snapping min/max to outermost nice ticks + var niceTicks = axis.tickGenerator(axis); + if (niceTicks.length > 0) { + if (opts.min == null) + axis.min = Math.min(axis.min, niceTicks[0]); + if (opts.max == null && niceTicks.length > 1) + axis.max = Math.max(axis.max, niceTicks[niceTicks.length - 1]); + } + + axis.tickGenerator = function (axis) { + // copy ticks, scaled to this axis + var ticks = [], v, i; + for (i = 0; i < otherAxis.ticks.length; ++i) { + v = (otherAxis.ticks[i].v - otherAxis.min) / (otherAxis.max - otherAxis.min); + v = axis.min + v * (axis.max - axis.min); + ticks.push(v); + } + return ticks; + }; + + // we might need an extra decimal since forced + // ticks don't necessarily fit naturally + if (!axis.mode && opts.tickDecimals == null) { + var extraDec = Math.max(0, -Math.floor(Math.log(axis.delta) / Math.LN10) + 1), + ts = axis.tickGenerator(axis); + + // only proceed if the tick interval rounded + // with an extra decimal doesn't give us a + // zero at end + if (!(ts.length > 1 && /\..*0$/.test((ts[1] - ts[0]).toFixed(extraDec)))) + axis.tickDecimals = extraDec; + } + } + } + } + + function setTicks(axis) { + var oticks = axis.options.ticks, ticks = []; + if (oticks == null || (typeof oticks == "number" && oticks > 0)) + ticks = axis.tickGenerator(axis); + else if (oticks) { + if ($.isFunction(oticks)) + // generate the ticks + ticks = oticks(axis); + else + ticks = oticks; + } + + // clean up/labelify the supplied ticks, copy them over + var i, v; + axis.ticks = []; + for (i = 0; i < ticks.length; ++i) { + var label = null; + var t = ticks[i]; + if (typeof t == "object") { + v = +t[0]; + if (t.length > 1) + label = t[1]; + } + else + v = +t; + if (label == null) + label = axis.tickFormatter(v, axis); + if (!isNaN(v)) + axis.ticks.push({ v: v, label: label }); + } + } + + function snapRangeToTicks(axis, ticks) { + if (axis.options.autoscaleMargin && ticks.length > 0) { + // snap to ticks + if (axis.options.min == null) + axis.min = Math.min(axis.min, ticks[0].v); + if (axis.options.max == null && ticks.length > 1) + axis.max = Math.max(axis.max, ticks[ticks.length - 1].v); + } + } + + function draw() { + + surface.clear(); + + executeHooks(hooks.drawBackground, [ctx]); + + var grid = options.grid; + + // draw background, if any + if (grid.show && grid.backgroundColor) + drawBackground(); + + if (grid.show && !grid.aboveData) { + drawGrid(); + } + + for (var i = 0; i < series.length; ++i) { + executeHooks(hooks.drawSeries, [ctx, series[i]]); + drawSeries(series[i]); + } + + executeHooks(hooks.draw, [ctx]); + + if (grid.show && grid.aboveData) { + drawGrid(); + } + + surface.render(); + + // A draw implies that either the axes or data have changed, so we + // should probably update the overlay highlights as well. + + triggerRedrawOverlay(); + } + + function extractRange(ranges, coord) { + var axis, from, to, key, axes = allAxes(); + + for (var i = 0; i < axes.length; ++i) { + axis = axes[i]; + if (axis.direction == coord) { + key = coord + axis.n + "axis"; + if (!ranges[key] && axis.n == 1) + key = coord + "axis"; // support x1axis as xaxis + if (ranges[key]) { + from = ranges[key].from; + to = ranges[key].to; + break; + } + } + } + + // backwards-compat stuff - to be removed in future + if (!ranges[key]) { + axis = coord == "x" ? xaxes[0] : yaxes[0]; + from = ranges[coord + "1"]; + to = ranges[coord + "2"]; + } + + // auto-reverse as an added bonus + if (from != null && to != null && from > to) { + var tmp = from; + from = to; + to = tmp; + } + + return { from: from, to: to, axis: axis }; + } + + function drawBackground() { + ctx.save(); + ctx.translate(plotOffset.left, plotOffset.top); + + ctx.fillStyle = getColorOrGradient(options.grid.backgroundColor, plotHeight, 0, "rgba(255, 255, 255, 0)"); + ctx.fillRect(0, 0, plotWidth, plotHeight); + ctx.restore(); + } + + function drawGrid() { + var i, axes, bw, bc; + + ctx.save(); + ctx.translate(plotOffset.left, plotOffset.top); + + // draw markings + var markings = options.grid.markings; + if (markings) { + if ($.isFunction(markings)) { + axes = plot.getAxes(); + // xmin etc. is backwards compatibility, to be + // removed in the future + axes.xmin = axes.xaxis.min; + axes.xmax = axes.xaxis.max; + axes.ymin = axes.yaxis.min; + axes.ymax = axes.yaxis.max; + + markings = markings(axes); + } + + for (i = 0; i < markings.length; ++i) { + var m = markings[i], + xrange = extractRange(m, "x"), + yrange = extractRange(m, "y"); + + // fill in missing + if (xrange.from == null) + xrange.from = xrange.axis.min; + if (xrange.to == null) + xrange.to = xrange.axis.max; + if (yrange.from == null) + yrange.from = yrange.axis.min; + if (yrange.to == null) + yrange.to = yrange.axis.max; + + // clip + if (xrange.to < xrange.axis.min || xrange.from > xrange.axis.max || + yrange.to < yrange.axis.min || yrange.from > yrange.axis.max) + continue; + + xrange.from = Math.max(xrange.from, xrange.axis.min); + xrange.to = Math.min(xrange.to, xrange.axis.max); + yrange.from = Math.max(yrange.from, yrange.axis.min); + yrange.to = Math.min(yrange.to, yrange.axis.max); + + var xequal = xrange.from === xrange.to, + yequal = yrange.from === yrange.to; + + if (xequal && yequal) { + continue; + } + + // then draw + xrange.from = Math.floor(xrange.axis.p2c(xrange.from)); + xrange.to = Math.floor(xrange.axis.p2c(xrange.to)); + yrange.from = Math.floor(yrange.axis.p2c(yrange.from)); + yrange.to = Math.floor(yrange.axis.p2c(yrange.to)); + + if (xequal || yequal) { + var lineWidth = m.lineWidth || options.grid.markingsLineWidth, + subPixel = lineWidth % 2 ? 0.5 : 0; + ctx.beginPath(); + ctx.strokeStyle = m.color || options.grid.markingsColor; + ctx.lineWidth = lineWidth; + if (xequal) { + ctx.moveTo(xrange.to + subPixel, yrange.from); + ctx.lineTo(xrange.to + subPixel, yrange.to); + } else { + ctx.moveTo(xrange.from, yrange.to + subPixel); + ctx.lineTo(xrange.to, yrange.to + subPixel); + } + ctx.stroke(); + } else { + ctx.fillStyle = m.color || options.grid.markingsColor; + ctx.fillRect(xrange.from, yrange.to, + xrange.to - xrange.from, + yrange.from - yrange.to); + } + } + } + + // draw the ticks + axes = allAxes(); + bw = options.grid.borderWidth; + + for (var j = 0; j < axes.length; ++j) { + var axis = axes[j], box = axis.box, + t = axis.tickLength, x, y, xoff, yoff; + if (!axis.show || axis.ticks.length == 0) + continue; + + ctx.lineWidth = 1; + + // find the edges + if (axis.direction == "x") { + x = 0; + if (t == "full") + y = (axis.position == "top" ? 0 : plotHeight); + else + y = box.top - plotOffset.top + (axis.position == "top" ? box.height : 0); + } + else { + y = 0; + if (t == "full") + x = (axis.position == "left" ? 0 : plotWidth); + else + x = box.left - plotOffset.left + (axis.position == "left" ? box.width : 0); + } + + // draw tick bar + if (!axis.innermost) { + ctx.strokeStyle = axis.options.color; + ctx.beginPath(); + xoff = yoff = 0; + if (axis.direction == "x") + xoff = plotWidth + 1; + else + yoff = plotHeight + 1; + + if (ctx.lineWidth == 1) { + if (axis.direction == "x") { + y = Math.floor(y) + 0.5; + } else { + x = Math.floor(x) + 0.5; + } + } + + ctx.moveTo(x, y); + ctx.lineTo(x + xoff, y + yoff); + ctx.stroke(); + } + + // draw ticks + + ctx.strokeStyle = axis.options.tickColor; + + ctx.beginPath(); + for (i = 0; i < axis.ticks.length; ++i) { + var v = axis.ticks[i].v; + + xoff = yoff = 0; + + if (isNaN(v) || v < axis.min || v > axis.max + // skip those lying on the axes if we got a border + || (t == "full" + && ((typeof bw == "object" && bw[axis.position] > 0) || bw > 0) + && (v == axis.min || v == axis.max))) + continue; + + if (axis.direction == "x") { + x = axis.p2c(v); + yoff = t == "full" ? -plotHeight : t; + + if (axis.position == "top") + yoff = -yoff; + } + else { + y = axis.p2c(v); + xoff = t == "full" ? -plotWidth : t; + + if (axis.position == "left") + xoff = -xoff; + } + + if (ctx.lineWidth == 1) { + if (axis.direction == "x") + x = Math.floor(x) + 0.5; + else + y = Math.floor(y) + 0.5; + } + + ctx.moveTo(x, y); + ctx.lineTo(x + xoff, y + yoff); + } + + ctx.stroke(); + } + + + // draw border + if (bw) { + // If either borderWidth or borderColor is an object, then draw the border + // line by line instead of as one rectangle + bc = options.grid.borderColor; + if(typeof bw == "object" || typeof bc == "object") { + if (typeof bw !== "object") { + bw = {top: bw, right: bw, bottom: bw, left: bw}; + } + if (typeof bc !== "object") { + bc = {top: bc, right: bc, bottom: bc, left: bc}; + } + + if (bw.top > 0) { + ctx.strokeStyle = bc.top; + ctx.lineWidth = bw.top; + ctx.beginPath(); + ctx.moveTo(0 - bw.left, 0 - bw.top/2); + ctx.lineTo(plotWidth, 0 - bw.top/2); + ctx.stroke(); + } + + if (bw.right > 0) { + ctx.strokeStyle = bc.right; + ctx.lineWidth = bw.right; + ctx.beginPath(); + ctx.moveTo(plotWidth + bw.right / 2, 0 - bw.top); + ctx.lineTo(plotWidth + bw.right / 2, plotHeight); + ctx.stroke(); + } + + if (bw.bottom > 0) { + ctx.strokeStyle = bc.bottom; + ctx.lineWidth = bw.bottom; + ctx.beginPath(); + ctx.moveTo(plotWidth + bw.right, plotHeight + bw.bottom / 2); + ctx.lineTo(0, plotHeight + bw.bottom / 2); + ctx.stroke(); + } + + if (bw.left > 0) { + ctx.strokeStyle = bc.left; + ctx.lineWidth = bw.left; + ctx.beginPath(); + ctx.moveTo(0 - bw.left/2, plotHeight + bw.bottom); + ctx.lineTo(0- bw.left/2, 0); + ctx.stroke(); + } + } + else { + ctx.lineWidth = bw; + ctx.strokeStyle = options.grid.borderColor; + ctx.strokeRect(-bw/2, -bw/2, plotWidth + bw, plotHeight + bw); + } + } + + ctx.restore(); + } + + function drawAxisLabels() { + + $.each(allAxes(), function (_, axis) { + var box = axis.box, + legacyStyles = axis.direction + "Axis " + axis.direction + axis.n + "Axis", + layer = "flot-" + axis.direction + "-axis flot-" + axis.direction + axis.n + "-axis " + legacyStyles, + font = axis.options.font || "flot-tick-label tickLabel", + tick, x, y, halign, valign; + + // Remove text before checking for axis.show and ticks.length; + // otherwise plugins, like flot-tickrotor, that draw their own + // tick labels will end up with both theirs and the defaults. + + surface.removeText(layer); + + if (!axis.show || axis.ticks.length == 0) + return; + + for (var i = 0; i < axis.ticks.length; ++i) { + + tick = axis.ticks[i]; + if (!tick.label || tick.v < axis.min || tick.v > axis.max) + continue; + + if (axis.direction == "x") { + halign = "center"; + x = plotOffset.left + axis.p2c(tick.v); + if (axis.position == "bottom") { + y = box.top + box.padding; + } else { + y = box.top + box.height - box.padding; + valign = "bottom"; + } + } else { + valign = "middle"; + y = plotOffset.top + axis.p2c(tick.v); + if (axis.position == "left") { + x = box.left + box.width - box.padding; + halign = "right"; + } else { + x = box.left + box.padding; + } + } + + surface.addText(layer, x, y, tick.label, font, null, null, halign, valign); + } + }); + } + + function drawSeries(series) { + if (series.lines.show) + drawSeriesLines(series); + if (series.bars.show) + drawSeriesBars(series); + if (series.points.show) + drawSeriesPoints(series); + } + + function drawSeriesLines(series) { + function plotLine(datapoints, xoffset, yoffset, axisx, axisy) { + var points = datapoints.points, + ps = datapoints.pointsize, + prevx = null, prevy = null; + + ctx.beginPath(); + for (var i = ps; i < points.length; i += ps) { + var x1 = points[i - ps], y1 = points[i - ps + 1], + x2 = points[i], y2 = points[i + 1]; + + if (x1 == null || x2 == null) + continue; + + // clip with ymin + if (y1 <= y2 && y1 < axisy.min) { + if (y2 < axisy.min) + continue; // line segment is outside + // compute new intersection point + x1 = (axisy.min - y1) / (y2 - y1) * (x2 - x1) + x1; + y1 = axisy.min; + } + else if (y2 <= y1 && y2 < axisy.min) { + if (y1 < axisy.min) + continue; + x2 = (axisy.min - y1) / (y2 - y1) * (x2 - x1) + x1; + y2 = axisy.min; + } + + // clip with ymax + if (y1 >= y2 && y1 > axisy.max) { + if (y2 > axisy.max) + continue; + x1 = (axisy.max - y1) / (y2 - y1) * (x2 - x1) + x1; + y1 = axisy.max; + } + else if (y2 >= y1 && y2 > axisy.max) { + if (y1 > axisy.max) + continue; + x2 = (axisy.max - y1) / (y2 - y1) * (x2 - x1) + x1; + y2 = axisy.max; + } + + // clip with xmin + if (x1 <= x2 && x1 < axisx.min) { + if (x2 < axisx.min) + continue; + y1 = (axisx.min - x1) / (x2 - x1) * (y2 - y1) + y1; + x1 = axisx.min; + } + else if (x2 <= x1 && x2 < axisx.min) { + if (x1 < axisx.min) + continue; + y2 = (axisx.min - x1) / (x2 - x1) * (y2 - y1) + y1; + x2 = axisx.min; + } + + // clip with xmax + if (x1 >= x2 && x1 > axisx.max) { + if (x2 > axisx.max) + continue; + y1 = (axisx.max - x1) / (x2 - x1) * (y2 - y1) + y1; + x1 = axisx.max; + } + else if (x2 >= x1 && x2 > axisx.max) { + if (x1 > axisx.max) + continue; + y2 = (axisx.max - x1) / (x2 - x1) * (y2 - y1) + y1; + x2 = axisx.max; + } + + if (x1 != prevx || y1 != prevy) + ctx.moveTo(axisx.p2c(x1) + xoffset, axisy.p2c(y1) + yoffset); + + prevx = x2; + prevy = y2; + ctx.lineTo(axisx.p2c(x2) + xoffset, axisy.p2c(y2) + yoffset); + } + ctx.stroke(); + } + + function plotLineArea(datapoints, axisx, axisy) { + var points = datapoints.points, + ps = datapoints.pointsize, + bottom = Math.min(Math.max(0, axisy.min), axisy.max), + i = 0, top, areaOpen = false, + ypos = 1, segmentStart = 0, segmentEnd = 0; + + // we process each segment in two turns, first forward + // direction to sketch out top, then once we hit the + // end we go backwards to sketch the bottom + while (true) { + if (ps > 0 && i > points.length + ps) + break; + + i += ps; // ps is negative if going backwards + + var x1 = points[i - ps], + y1 = points[i - ps + ypos], + x2 = points[i], y2 = points[i + ypos]; + + if (areaOpen) { + if (ps > 0 && x1 != null && x2 == null) { + // at turning point + segmentEnd = i; + ps = -ps; + ypos = 2; + continue; + } + + if (ps < 0 && i == segmentStart + ps) { + // done with the reverse sweep + ctx.fill(); + areaOpen = false; + ps = -ps; + ypos = 1; + i = segmentStart = segmentEnd + ps; + continue; + } + } + + if (x1 == null || x2 == null) + continue; + + // clip x values + + // clip with xmin + if (x1 <= x2 && x1 < axisx.min) { + if (x2 < axisx.min) + continue; + y1 = (axisx.min - x1) / (x2 - x1) * (y2 - y1) + y1; + x1 = axisx.min; + } + else if (x2 <= x1 && x2 < axisx.min) { + if (x1 < axisx.min) + continue; + y2 = (axisx.min - x1) / (x2 - x1) * (y2 - y1) + y1; + x2 = axisx.min; + } + + // clip with xmax + if (x1 >= x2 && x1 > axisx.max) { + if (x2 > axisx.max) + continue; + y1 = (axisx.max - x1) / (x2 - x1) * (y2 - y1) + y1; + x1 = axisx.max; + } + else if (x2 >= x1 && x2 > axisx.max) { + if (x1 > axisx.max) + continue; + y2 = (axisx.max - x1) / (x2 - x1) * (y2 - y1) + y1; + x2 = axisx.max; + } + + if (!areaOpen) { + // open area + ctx.beginPath(); + ctx.moveTo(axisx.p2c(x1), axisy.p2c(bottom)); + areaOpen = true; + } + + // now first check the case where both is outside + if (y1 >= axisy.max && y2 >= axisy.max) { + ctx.lineTo(axisx.p2c(x1), axisy.p2c(axisy.max)); + ctx.lineTo(axisx.p2c(x2), axisy.p2c(axisy.max)); + continue; + } + else if (y1 <= axisy.min && y2 <= axisy.min) { + ctx.lineTo(axisx.p2c(x1), axisy.p2c(axisy.min)); + ctx.lineTo(axisx.p2c(x2), axisy.p2c(axisy.min)); + continue; + } + + // else it's a bit more complicated, there might + // be a flat maxed out rectangle first, then a + // triangular cutout or reverse; to find these + // keep track of the current x values + var x1old = x1, x2old = x2; + + // clip the y values, without shortcutting, we + // go through all cases in turn + + // clip with ymin + if (y1 <= y2 && y1 < axisy.min && y2 >= axisy.min) { + x1 = (axisy.min - y1) / (y2 - y1) * (x2 - x1) + x1; + y1 = axisy.min; + } + else if (y2 <= y1 && y2 < axisy.min && y1 >= axisy.min) { + x2 = (axisy.min - y1) / (y2 - y1) * (x2 - x1) + x1; + y2 = axisy.min; + } + + // clip with ymax + if (y1 >= y2 && y1 > axisy.max && y2 <= axisy.max) { + x1 = (axisy.max - y1) / (y2 - y1) * (x2 - x1) + x1; + y1 = axisy.max; + } + else if (y2 >= y1 && y2 > axisy.max && y1 <= axisy.max) { + x2 = (axisy.max - y1) / (y2 - y1) * (x2 - x1) + x1; + y2 = axisy.max; + } + + // if the x value was changed we got a rectangle + // to fill + if (x1 != x1old) { + ctx.lineTo(axisx.p2c(x1old), axisy.p2c(y1)); + // it goes to (x1, y1), but we fill that below + } + + // fill triangular section, this sometimes result + // in redundant points if (x1, y1) hasn't changed + // from previous line to, but we just ignore that + ctx.lineTo(axisx.p2c(x1), axisy.p2c(y1)); + ctx.lineTo(axisx.p2c(x2), axisy.p2c(y2)); + + // fill the other rectangle if it's there + if (x2 != x2old) { + ctx.lineTo(axisx.p2c(x2), axisy.p2c(y2)); + ctx.lineTo(axisx.p2c(x2old), axisy.p2c(y2)); + } + } + } + + ctx.save(); + ctx.translate(plotOffset.left, plotOffset.top); + ctx.lineJoin = "round"; + + var lw = series.lines.lineWidth, + sw = series.shadowSize; + // FIXME: consider another form of shadow when filling is turned on + if (lw > 0 && sw > 0) { + // draw shadow as a thick and thin line with transparency + ctx.lineWidth = sw; + ctx.strokeStyle = "rgba(0,0,0,0.1)"; + // position shadow at angle from the mid of line + var angle = Math.PI/18; + plotLine(series.datapoints, Math.sin(angle) * (lw/2 + sw/2), Math.cos(angle) * (lw/2 + sw/2), series.xaxis, series.yaxis); + ctx.lineWidth = sw/2; + plotLine(series.datapoints, Math.sin(angle) * (lw/2 + sw/4), Math.cos(angle) * (lw/2 + sw/4), series.xaxis, series.yaxis); + } + + ctx.lineWidth = lw; + ctx.strokeStyle = series.color; + var fillStyle = getFillStyle(series.lines, series.color, 0, plotHeight); + if (fillStyle) { + ctx.fillStyle = fillStyle; + plotLineArea(series.datapoints, series.xaxis, series.yaxis); + } + + if (lw > 0) + plotLine(series.datapoints, 0, 0, series.xaxis, series.yaxis); + ctx.restore(); + } + + function drawSeriesPoints(series) { + function plotPoints(datapoints, radius, fillStyle, offset, shadow, axisx, axisy, symbol) { + var points = datapoints.points, ps = datapoints.pointsize; + + for (var i = 0; i < points.length; i += ps) { + var x = points[i], y = points[i + 1]; + if (x == null || x < axisx.min || x > axisx.max || y < axisy.min || y > axisy.max) + continue; + + ctx.beginPath(); + x = axisx.p2c(x); + y = axisy.p2c(y) + offset; + if (symbol == "circle") + ctx.arc(x, y, radius, 0, shadow ? Math.PI : Math.PI * 2, false); + else + symbol(ctx, x, y, radius, shadow); + ctx.closePath(); + + if (fillStyle) { + ctx.fillStyle = fillStyle; + ctx.fill(); + } + ctx.stroke(); + } + } + + ctx.save(); + ctx.translate(plotOffset.left, plotOffset.top); + + var lw = series.points.lineWidth, + sw = series.shadowSize, + radius = series.points.radius, + symbol = series.points.symbol; + + // If the user sets the line width to 0, we change it to a very + // small value. A line width of 0 seems to force the default of 1. + // Doing the conditional here allows the shadow setting to still be + // optional even with a lineWidth of 0. + + if( lw == 0 ) + lw = 0.0001; + + if (lw > 0 && sw > 0) { + // draw shadow in two steps + var w = sw / 2; + ctx.lineWidth = w; + ctx.strokeStyle = "rgba(0,0,0,0.1)"; + plotPoints(series.datapoints, radius, null, w + w/2, true, + series.xaxis, series.yaxis, symbol); + + ctx.strokeStyle = "rgba(0,0,0,0.2)"; + plotPoints(series.datapoints, radius, null, w/2, true, + series.xaxis, series.yaxis, symbol); + } + + ctx.lineWidth = lw; + ctx.strokeStyle = series.color; + plotPoints(series.datapoints, radius, + getFillStyle(series.points, series.color), 0, false, + series.xaxis, series.yaxis, symbol); + ctx.restore(); + } + + function drawBar(x, y, b, barLeft, barRight, fillStyleCallback, axisx, axisy, c, horizontal, lineWidth) { + var left, right, bottom, top, + drawLeft, drawRight, drawTop, drawBottom, + tmp; + + // in horizontal mode, we start the bar from the left + // instead of from the bottom so it appears to be + // horizontal rather than vertical + if (horizontal) { + drawBottom = drawRight = drawTop = true; + drawLeft = false; + left = b; + right = x; + top = y + barLeft; + bottom = y + barRight; + + // account for negative bars + if (right < left) { + tmp = right; + right = left; + left = tmp; + drawLeft = true; + drawRight = false; + } + } + else { + drawLeft = drawRight = drawTop = true; + drawBottom = false; + left = x + barLeft; + right = x + barRight; + bottom = b; + top = y; + + // account for negative bars + if (top < bottom) { + tmp = top; + top = bottom; + bottom = tmp; + drawBottom = true; + drawTop = false; + } + } + + // clip + if (right < axisx.min || left > axisx.max || + top < axisy.min || bottom > axisy.max) + return; + + if (left < axisx.min) { + left = axisx.min; + drawLeft = false; + } + + if (right > axisx.max) { + right = axisx.max; + drawRight = false; + } + + if (bottom < axisy.min) { + bottom = axisy.min; + drawBottom = false; + } + + if (top > axisy.max) { + top = axisy.max; + drawTop = false; + } + + left = axisx.p2c(left); + bottom = axisy.p2c(bottom); + right = axisx.p2c(right); + top = axisy.p2c(top); + + // fill the bar + if (fillStyleCallback) { + c.fillStyle = fillStyleCallback(bottom, top); + c.fillRect(left, top, right - left, bottom - top) + } + + // draw outline + if (lineWidth > 0 && (drawLeft || drawRight || drawTop || drawBottom)) { + c.beginPath(); + + // FIXME: inline moveTo is buggy with excanvas + c.moveTo(left, bottom); + if (drawLeft) + c.lineTo(left, top); + else + c.moveTo(left, top); + if (drawTop) + c.lineTo(right, top); + else + c.moveTo(right, top); + if (drawRight) + c.lineTo(right, bottom); + else + c.moveTo(right, bottom); + if (drawBottom) + c.lineTo(left, bottom); + else + c.moveTo(left, bottom); + c.stroke(); + } + } + + function drawSeriesBars(series) { + function plotBars(datapoints, barLeft, barRight, fillStyleCallback, axisx, axisy) { + var points = datapoints.points, ps = datapoints.pointsize; + + for (var i = 0; i < points.length; i += ps) { + if (points[i] == null) + continue; + drawBar(points[i], points[i + 1], points[i + 2], barLeft, barRight, fillStyleCallback, axisx, axisy, ctx, series.bars.horizontal, series.bars.lineWidth); + } + } + + ctx.save(); + ctx.translate(plotOffset.left, plotOffset.top); + + // FIXME: figure out a way to add shadows (for instance along the right edge) + ctx.lineWidth = series.bars.lineWidth; + ctx.strokeStyle = series.color; + + var barLeft; + + switch (series.bars.align) { + case "left": + barLeft = 0; + break; + case "right": + barLeft = -series.bars.barWidth; + break; + default: + barLeft = -series.bars.barWidth / 2; + } + + var fillStyleCallback = series.bars.fill ? function (bottom, top) { return getFillStyle(series.bars, series.color, bottom, top); } : null; + plotBars(series.datapoints, barLeft, barLeft + series.bars.barWidth, fillStyleCallback, series.xaxis, series.yaxis); + ctx.restore(); + } + + function getFillStyle(filloptions, seriesColor, bottom, top) { + var fill = filloptions.fill; + if (!fill) + return null; + + if (filloptions.fillColor) + return getColorOrGradient(filloptions.fillColor, bottom, top, seriesColor); + + var c = $.color.parse(seriesColor); + c.a = typeof fill == "number" ? fill : 0.4; + c.normalize(); + return c.toString(); + } + + function insertLegend() { + + if (options.legend.container != null) { + $(options.legend.container).html(""); + } else { + placeholder.find(".legend").remove(); + } + + if (!options.legend.show) { + return; + } + + var fragments = [], entries = [], rowStarted = false, + lf = options.legend.labelFormatter, s, label; + + // Build a list of legend entries, with each having a label and a color + + for (var i = 0; i < series.length; ++i) { + s = series[i]; + if (s.label) { + label = lf ? lf(s.label, s) : s.label; + if (label) { + entries.push({ + label: label, + color: s.color + }); + } + } + } + + // Sort the legend using either the default or a custom comparator + + if (options.legend.sorted) { + if ($.isFunction(options.legend.sorted)) { + entries.sort(options.legend.sorted); + } else if (options.legend.sorted == "reverse") { + entries.reverse(); + } else { + var ascending = options.legend.sorted != "descending"; + entries.sort(function(a, b) { + return a.label == b.label ? 0 : ( + (a.label < b.label) != ascending ? 1 : -1 // Logical XOR + ); + }); + } + } + + // Generate markup for the list of entries, in their final order + + for (var i = 0; i < entries.length; ++i) { + + var entry = entries[i]; + + if (i % options.legend.noColumns == 0) { + if (rowStarted) + fragments.push('</tr>'); + fragments.push('<tr>'); + rowStarted = true; + } + + fragments.push( + '<td class="legendColorBox"><div style="border:1px solid ' + options.legend.labelBoxBorderColor + ';padding:1px"><div style="width:4px;height:0;border:5px solid ' + entry.color + ';overflow:hidden"></div></div></td>' + + '<td class="legendLabel">' + entry.label + '</td>' + ); + } + + if (rowStarted) + fragments.push('</tr>'); + + if (fragments.length == 0) + return; + + var table = '<table style="font-size:smaller;color:' + options.grid.color + '">' + fragments.join("") + '</table>'; + if (options.legend.container != null) + $(options.legend.container).html(table); + else { + var pos = "", + p = options.legend.position, + m = options.legend.margin; + if (m[0] == null) + m = [m, m]; + if (p.charAt(0) == "n") + pos += 'top:' + (m[1] + plotOffset.top) + 'px;'; + else if (p.charAt(0) == "s") + pos += 'bottom:' + (m[1] + plotOffset.bottom) + 'px;'; + if (p.charAt(1) == "e") + pos += 'right:' + (m[0] + plotOffset.right) + 'px;'; + else if (p.charAt(1) == "w") + pos += 'left:' + (m[0] + plotOffset.left) + 'px;'; + var legend = $('<div class="legend">' + table.replace('style="', 'style="position:absolute;' + pos +';') + '</div>').appendTo(placeholder); + if (options.legend.backgroundOpacity != 0.0) { + // put in the transparent background + // separately to avoid blended labels and + // label boxes + var c = options.legend.backgroundColor; + if (c == null) { + c = options.grid.backgroundColor; + if (c && typeof c == "string") + c = $.color.parse(c); + else + c = $.color.extract(legend, 'background-color'); + c.a = 1; + c = c.toString(); + } + var div = legend.children(); + $('<div style="position:absolute;width:' + div.width() + 'px;height:' + div.height() + 'px;' + pos +'background-color:' + c + ';"> </div>').prependTo(legend).css('opacity', options.legend.backgroundOpacity); + } + } + } + + + // interactive features + + var highlights = [], + redrawTimeout = null; + + // returns the data item the mouse is over, or null if none is found + function findNearbyItem(mouseX, mouseY, seriesFilter) { + var maxDistance = options.grid.mouseActiveRadius, + smallestDistance = maxDistance * maxDistance + 1, + item = null, foundPoint = false, i, j, ps; + + for (i = series.length - 1; i >= 0; --i) { + if (!seriesFilter(series[i])) + continue; + + var s = series[i], + axisx = s.xaxis, + axisy = s.yaxis, + points = s.datapoints.points, + mx = axisx.c2p(mouseX), // precompute some stuff to make the loop faster + my = axisy.c2p(mouseY), + maxx = maxDistance / axisx.scale, + maxy = maxDistance / axisy.scale; + + ps = s.datapoints.pointsize; + // with inverse transforms, we can't use the maxx/maxy + // optimization, sadly + if (axisx.options.inverseTransform) + maxx = Number.MAX_VALUE; + if (axisy.options.inverseTransform) + maxy = Number.MAX_VALUE; + + if (s.lines.show || s.points.show) { + for (j = 0; j < points.length; j += ps) { + var x = points[j], y = points[j + 1]; + if (x == null) + continue; + + // For points and lines, the cursor must be within a + // certain distance to the data point + if (x - mx > maxx || x - mx < -maxx || + y - my > maxy || y - my < -maxy) + continue; + + // We have to calculate distances in pixels, not in + // data units, because the scales of the axes may be different + var dx = Math.abs(axisx.p2c(x) - mouseX), + dy = Math.abs(axisy.p2c(y) - mouseY), + dist = dx * dx + dy * dy; // we save the sqrt + + // use <= to ensure last point takes precedence + // (last generally means on top of) + if (dist < smallestDistance) { + smallestDistance = dist; + item = [i, j / ps]; + } + } + } + + if (s.bars.show && !item) { // no other point can be nearby + + var barLeft, barRight; + + switch (s.bars.align) { + case "left": + barLeft = 0; + break; + case "right": + barLeft = -s.bars.barWidth; + break; + default: + barLeft = -s.bars.barWidth / 2; + } + + barRight = barLeft + s.bars.barWidth; + + for (j = 0; j < points.length; j += ps) { + var x = points[j], y = points[j + 1], b = points[j + 2]; + if (x == null) + continue; + + // for a bar graph, the cursor must be inside the bar + if (series[i].bars.horizontal ? + (mx <= Math.max(b, x) && mx >= Math.min(b, x) && + my >= y + barLeft && my <= y + barRight) : + (mx >= x + barLeft && mx <= x + barRight && + my >= Math.min(b, y) && my <= Math.max(b, y))) + item = [i, j / ps]; + } + } + } + + if (item) { + i = item[0]; + j = item[1]; + ps = series[i].datapoints.pointsize; + + return { datapoint: series[i].datapoints.points.slice(j * ps, (j + 1) * ps), + dataIndex: j, + series: series[i], + seriesIndex: i }; + } + + return null; + } + + function onMouseMove(e) { + if (options.grid.hoverable) + triggerClickHoverEvent("plothover", e, + function (s) { return s["hoverable"] != false; }); + } + + function onMouseLeave(e) { + if (options.grid.hoverable) + triggerClickHoverEvent("plothover", e, + function (s) { return false; }); + } + + function onClick(e) { + triggerClickHoverEvent("plotclick", e, + function (s) { return s["clickable"] != false; }); + } + + // trigger click or hover event (they send the same parameters + // so we share their code) + function triggerClickHoverEvent(eventname, event, seriesFilter) { + var offset = eventHolder.offset(), + canvasX = event.pageX - offset.left - plotOffset.left, + canvasY = event.pageY - offset.top - plotOffset.top, + pos = canvasToAxisCoords({ left: canvasX, top: canvasY }); + + pos.pageX = event.pageX; + pos.pageY = event.pageY; + + var item = findNearbyItem(canvasX, canvasY, seriesFilter); + + if (item) { + // fill in mouse pos for any listeners out there + item.pageX = parseInt(item.series.xaxis.p2c(item.datapoint[0]) + offset.left + plotOffset.left, 10); + item.pageY = parseInt(item.series.yaxis.p2c(item.datapoint[1]) + offset.top + plotOffset.top, 10); + } + + if (options.grid.autoHighlight) { + // clear auto-highlights + for (var i = 0; i < highlights.length; ++i) { + var h = highlights[i]; + if (h.auto == eventname && + !(item && h.series == item.series && + h.point[0] == item.datapoint[0] && + h.point[1] == item.datapoint[1])) + unhighlight(h.series, h.point); + } + + if (item) + highlight(item.series, item.datapoint, eventname); + } + + placeholder.trigger(eventname, [ pos, item ]); + } + + function triggerRedrawOverlay() { + var t = options.interaction.redrawOverlayInterval; + if (t == -1) { // skip event queue + drawOverlay(); + return; + } + + if (!redrawTimeout) + redrawTimeout = setTimeout(drawOverlay, t); + } + + function drawOverlay() { + redrawTimeout = null; + + // draw highlights + octx.save(); + overlay.clear(); + octx.translate(plotOffset.left, plotOffset.top); + + var i, hi; + for (i = 0; i < highlights.length; ++i) { + hi = highlights[i]; + + if (hi.series.bars.show) + drawBarHighlight(hi.series, hi.point); + else + drawPointHighlight(hi.series, hi.point); + } + octx.restore(); + + executeHooks(hooks.drawOverlay, [octx]); + } + + function highlight(s, point, auto) { + if (typeof s == "number") + s = series[s]; + + if (typeof point == "number") { + var ps = s.datapoints.pointsize; + point = s.datapoints.points.slice(ps * point, ps * (point + 1)); + } + + var i = indexOfHighlight(s, point); + if (i == -1) { + highlights.push({ series: s, point: point, auto: auto }); + + triggerRedrawOverlay(); + } + else if (!auto) + highlights[i].auto = false; + } + + function unhighlight(s, point) { + if (s == null && point == null) { + highlights = []; + triggerRedrawOverlay(); + return; + } + + if (typeof s == "number") + s = series[s]; + + if (typeof point == "number") { + var ps = s.datapoints.pointsize; + point = s.datapoints.points.slice(ps * point, ps * (point + 1)); + } + + var i = indexOfHighlight(s, point); + if (i != -1) { + highlights.splice(i, 1); + + triggerRedrawOverlay(); + } + } + + function indexOfHighlight(s, p) { + for (var i = 0; i < highlights.length; ++i) { + var h = highlights[i]; + if (h.series == s && h.point[0] == p[0] + && h.point[1] == p[1]) + return i; + } + return -1; + } + + function drawPointHighlight(series, point) { + var x = point[0], y = point[1], + axisx = series.xaxis, axisy = series.yaxis, + highlightColor = (typeof series.highlightColor === "string") ? series.highlightColor : $.color.parse(series.color).scale('a', 0.5).toString(); + + if (x < axisx.min || x > axisx.max || y < axisy.min || y > axisy.max) + return; + + var pointRadius = series.points.radius + series.points.lineWidth / 2; + octx.lineWidth = pointRadius; + octx.strokeStyle = highlightColor; + var radius = 1.5 * pointRadius; + x = axisx.p2c(x); + y = axisy.p2c(y); + + octx.beginPath(); + if (series.points.symbol == "circle") + octx.arc(x, y, radius, 0, 2 * Math.PI, false); + else + series.points.symbol(octx, x, y, radius, false); + octx.closePath(); + octx.stroke(); + } + + function drawBarHighlight(series, point) { + var highlightColor = (typeof series.highlightColor === "string") ? series.highlightColor : $.color.parse(series.color).scale('a', 0.5).toString(), + fillStyle = highlightColor, + barLeft; + + switch (series.bars.align) { + case "left": + barLeft = 0; + break; + case "right": + barLeft = -series.bars.barWidth; + break; + default: + barLeft = -series.bars.barWidth / 2; + } + + octx.lineWidth = series.bars.lineWidth; + octx.strokeStyle = highlightColor; + + drawBar(point[0], point[1], point[2] || 0, barLeft, barLeft + series.bars.barWidth, + function () { return fillStyle; }, series.xaxis, series.yaxis, octx, series.bars.horizontal, series.bars.lineWidth); + } + + function getColorOrGradient(spec, bottom, top, defaultColor) { + if (typeof spec == "string") + return spec; + else { + // assume this is a gradient spec; IE currently only + // supports a simple vertical gradient properly, so that's + // what we support too + var gradient = ctx.createLinearGradient(0, top, 0, bottom); + + for (var i = 0, l = spec.colors.length; i < l; ++i) { + var c = spec.colors[i]; + if (typeof c != "string") { + var co = $.color.parse(defaultColor); + if (c.brightness != null) + co = co.scale('rgb', c.brightness); + if (c.opacity != null) + co.a *= c.opacity; + c = co.toString(); + } + gradient.addColorStop(i / (l - 1), c); + } + + return gradient; + } + } + } + + // Add the plot function to the top level of the jQuery object + + $.plot = function(placeholder, data, options) { + //var t0 = new Date(); + var plot = new Plot($(placeholder), data, options, $.plot.plugins); + //(window.console ? console.log : alert)("time used (msecs): " + ((new Date()).getTime() - t0.getTime())); + return plot; + }; + + $.plot.version = "0.8.3"; + + $.plot.plugins = []; + + // Also add the plot function as a chainable property + + $.fn.plot = function(data, options) { + return this.each(function() { + $.plot(this, data, options); + }); + }; + + // round to nearby lower multiple of base + function floorInBase(n, base) { + return base * Math.floor(n / base); + } + +})(jQuery); diff --git a/qa/workunits/erasure-code/jquery.js b/qa/workunits/erasure-code/jquery.js new file mode 100644 index 000000000..8c24ffc61 --- /dev/null +++ b/qa/workunits/erasure-code/jquery.js @@ -0,0 +1,9472 @@ +/*! + * jQuery JavaScript Library v1.8.3 + * http://jquery.com/ + * + * Includes Sizzle.js + * http://sizzlejs.com/ + * + * Copyright 2012 jQuery Foundation and other contributors + * Released under the MIT license + * http://jquery.org/license + * + * Date: Tue Nov 13 2012 08:20:33 GMT-0500 (Eastern Standard Time) + */ +(function( window, undefined ) { +var + // A central reference to the root jQuery(document) + rootjQuery, + + // The deferred used on DOM ready + readyList, + + // Use the correct document accordingly with window argument (sandbox) + document = window.document, + location = window.location, + navigator = window.navigator, + + // Map over jQuery in case of overwrite + _jQuery = window.jQuery, + + // Map over the $ in case of overwrite + _$ = window.$, + + // Save a reference to some core methods + core_push = Array.prototype.push, + core_slice = Array.prototype.slice, + core_indexOf = Array.prototype.indexOf, + core_toString = Object.prototype.toString, + core_hasOwn = Object.prototype.hasOwnProperty, + core_trim = String.prototype.trim, + + // Define a local copy of jQuery + jQuery = function( selector, context ) { + // The jQuery object is actually just the init constructor 'enhanced' + return new jQuery.fn.init( selector, context, rootjQuery ); + }, + + // Used for matching numbers + core_pnum = /[\-+]?(?:\d*\.|)\d+(?:[eE][\-+]?\d+|)/.source, + + // Used for detecting and trimming whitespace + core_rnotwhite = /\S/, + core_rspace = /\s+/, + + // Make sure we trim BOM and NBSP (here's looking at you, Safari 5.0 and IE) + rtrim = /^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g, + + // A simple way to check for HTML strings + // Prioritize #id over <tag> to avoid XSS via location.hash (#9521) + rquickExpr = /^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/, + + // Match a standalone tag + rsingleTag = /^<(\w+)\s*\/?>(?:<\/\1>|)$/, + + // JSON RegExp + rvalidchars = /^[\],:{}\s]*$/, + rvalidbraces = /(?:^|:|,)(?:\s*\[)+/g, + rvalidescape = /\\(?:["\\\/bfnrt]|u[\da-fA-F]{4})/g, + rvalidtokens = /"[^"\\\r\n]*"|true|false|null|-?(?:\d\d*\.|)\d+(?:[eE][\-+]?\d+|)/g, + + // Matches dashed string for camelizing + rmsPrefix = /^-ms-/, + rdashAlpha = /-([\da-z])/gi, + + // Used by jQuery.camelCase as callback to replace() + fcamelCase = function( all, letter ) { + return ( letter + "" ).toUpperCase(); + }, + + // The ready event handler and self cleanup method + DOMContentLoaded = function() { + if ( document.addEventListener ) { + document.removeEventListener( "DOMContentLoaded", DOMContentLoaded, false ); + jQuery.ready(); + } else if ( document.readyState === "complete" ) { + // we're here because readyState === "complete" in oldIE + // which is good enough for us to call the dom ready! + document.detachEvent( "onreadystatechange", DOMContentLoaded ); + jQuery.ready(); + } + }, + + // [[Class]] -> type pairs + class2type = {}; + +jQuery.fn = jQuery.prototype = { + constructor: jQuery, + init: function( selector, context, rootjQuery ) { + var match, elem, ret, doc; + + // Handle $(""), $(null), $(undefined), $(false) + if ( !selector ) { + return this; + } + + // Handle $(DOMElement) + if ( selector.nodeType ) { + this.context = this[0] = selector; + this.length = 1; + return this; + } + + // Handle HTML strings + if ( typeof selector === "string" ) { + if ( selector.charAt(0) === "<" && selector.charAt( selector.length - 1 ) === ">" && selector.length >= 3 ) { + // Assume that strings that start and end with <> are HTML and skip the regex check + match = [ null, selector, null ]; + + } else { + match = rquickExpr.exec( selector ); + } + + // Match html or make sure no context is specified for #id + if ( match && (match[1] || !context) ) { + + // HANDLE: $(html) -> $(array) + if ( match[1] ) { + context = context instanceof jQuery ? context[0] : context; + doc = ( context && context.nodeType ? context.ownerDocument || context : document ); + + // scripts is true for back-compat + selector = jQuery.parseHTML( match[1], doc, true ); + if ( rsingleTag.test( match[1] ) && jQuery.isPlainObject( context ) ) { + this.attr.call( selector, context, true ); + } + + return jQuery.merge( this, selector ); + + // HANDLE: $(#id) + } else { + elem = document.getElementById( match[2] ); + + // Check parentNode to catch when Blackberry 4.6 returns + // nodes that are no longer in the document #6963 + if ( elem && elem.parentNode ) { + // Handle the case where IE and Opera return items + // by name instead of ID + if ( elem.id !== match[2] ) { + return rootjQuery.find( selector ); + } + + // Otherwise, we inject the element directly into the jQuery object + this.length = 1; + this[0] = elem; + } + + this.context = document; + this.selector = selector; + return this; + } + + // HANDLE: $(expr, $(...)) + } else if ( !context || context.jquery ) { + return ( context || rootjQuery ).find( selector ); + + // HANDLE: $(expr, context) + // (which is just equivalent to: $(context).find(expr) + } else { + return this.constructor( context ).find( selector ); + } + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( jQuery.isFunction( selector ) ) { + return rootjQuery.ready( selector ); + } + + if ( selector.selector !== undefined ) { + this.selector = selector.selector; + this.context = selector.context; + } + + return jQuery.makeArray( selector, this ); + }, + + // Start with an empty selector + selector: "", + + // The current version of jQuery being used + jquery: "1.8.3", + + // The default length of a jQuery object is 0 + length: 0, + + // The number of elements contained in the matched element set + size: function() { + return this.length; + }, + + toArray: function() { + return core_slice.call( this ); + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num == null ? + + // Return a 'clean' array + this.toArray() : + + // Return just the object + ( num < 0 ? this[ this.length + num ] : this[ num ] ); + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems, name, selector ) { + + // Build a new jQuery matched element set + var ret = jQuery.merge( this.constructor(), elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + ret.context = this.context; + + if ( name === "find" ) { + ret.selector = this.selector + ( this.selector ? " " : "" ) + selector; + } else if ( name ) { + ret.selector = this.selector + "." + name + "(" + selector + ")"; + } + + // Return the newly-formed element set + return ret; + }, + + // Execute a callback for every element in the matched set. + // (You can seed the arguments with an array of args, but this is + // only used internally.) + each: function( callback, args ) { + return jQuery.each( this, callback, args ); + }, + + ready: function( fn ) { + // Add the callback + jQuery.ready.promise().done( fn ); + + return this; + }, + + eq: function( i ) { + i = +i; + return i === -1 ? + this.slice( i ) : + this.slice( i, i + 1 ); + }, + + first: function() { + return this.eq( 0 ); + }, + + last: function() { + return this.eq( -1 ); + }, + + slice: function() { + return this.pushStack( core_slice.apply( this, arguments ), + "slice", core_slice.call(arguments).join(",") ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map(this, function( elem, i ) { + return callback.call( elem, i, elem ); + })); + }, + + end: function() { + return this.prevObject || this.constructor(null); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: core_push, + sort: [].sort, + splice: [].splice +}; + +// Give the init function the jQuery prototype for later instantiation +jQuery.fn.init.prototype = jQuery.fn; + +jQuery.extend = jQuery.fn.extend = function() { + var options, name, src, copy, copyIsArray, clone, + target = arguments[0] || {}, + i = 1, + length = arguments.length, + deep = false; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + target = arguments[1] || {}; + // skip the boolean and the target + i = 2; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction(target) ) { + target = {}; + } + + // extend jQuery itself if only one argument is passed + if ( length === i ) { + target = this; + --i; + } + + for ( ; i < length; i++ ) { + // Only deal with non-null/undefined values + if ( (options = arguments[ i ]) != null ) { + // Extend the base object + for ( name in options ) { + src = target[ name ]; + copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) { + continue; + } + + // Recurse if we're merging plain objects or arrays + if ( deep && copy && ( jQuery.isPlainObject(copy) || (copyIsArray = jQuery.isArray(copy)) ) ) { + if ( copyIsArray ) { + copyIsArray = false; + clone = src && jQuery.isArray(src) ? src : []; + + } else { + clone = src && jQuery.isPlainObject(src) ? src : {}; + } + + // Never move original objects, clone them + target[ name ] = jQuery.extend( deep, clone, copy ); + + // Don't bring in undefined values + } else if ( copy !== undefined ) { + target[ name ] = copy; + } + } + } + } + + // Return the modified object + return target; +}; + +jQuery.extend({ + noConflict: function( deep ) { + if ( window.$ === jQuery ) { + window.$ = _$; + } + + if ( deep && window.jQuery === jQuery ) { + window.jQuery = _jQuery; + } + + return jQuery; + }, + + // Is the DOM ready to be used? Set to true once it occurs. + isReady: false, + + // A counter to track how many items to wait for before + // the ready event fires. See #6781 + readyWait: 1, + + // Hold (or release) the ready event + holdReady: function( hold ) { + if ( hold ) { + jQuery.readyWait++; + } else { + jQuery.ready( true ); + } + }, + + // Handle when the DOM is ready + ready: function( wait ) { + + // Abort if there are pending holds or we're already ready + if ( wait === true ? --jQuery.readyWait : jQuery.isReady ) { + return; + } + + // Make sure body exists, at least, in case IE gets a little overzealous (ticket #5443). + if ( !document.body ) { + return setTimeout( jQuery.ready, 1 ); + } + + // Remember that the DOM is ready + jQuery.isReady = true; + + // If a normal DOM Ready event fired, decrement, and wait if need be + if ( wait !== true && --jQuery.readyWait > 0 ) { + return; + } + + // If there are functions bound, to execute + readyList.resolveWith( document, [ jQuery ] ); + + // Trigger any bound ready events + if ( jQuery.fn.trigger ) { + jQuery( document ).trigger("ready").off("ready"); + } + }, + + // See test/unit/core.js for details concerning isFunction. + // Since version 1.3, DOM methods and functions like alert + // aren't supported. They return false on IE (#2968). + isFunction: function( obj ) { + return jQuery.type(obj) === "function"; + }, + + isArray: Array.isArray || function( obj ) { + return jQuery.type(obj) === "array"; + }, + + isWindow: function( obj ) { + return obj != null && obj == obj.window; + }, + + isNumeric: function( obj ) { + return !isNaN( parseFloat(obj) ) && isFinite( obj ); + }, + + type: function( obj ) { + return obj == null ? + String( obj ) : + class2type[ core_toString.call(obj) ] || "object"; + }, + + isPlainObject: function( obj ) { + // Must be an Object. + // Because of IE, we also have to check the presence of the constructor property. + // Make sure that DOM nodes and window objects don't pass through, as well + if ( !obj || jQuery.type(obj) !== "object" || obj.nodeType || jQuery.isWindow( obj ) ) { + return false; + } + + try { + // Not own constructor property must be Object + if ( obj.constructor && + !core_hasOwn.call(obj, "constructor") && + !core_hasOwn.call(obj.constructor.prototype, "isPrototypeOf") ) { + return false; + } + } catch ( e ) { + // IE8,9 Will throw exceptions on certain host objects #9897 + return false; + } + + // Own properties are enumerated firstly, so to speed up, + // if last one is own, then all properties are own. + + var key; + for ( key in obj ) {} + + return key === undefined || core_hasOwn.call( obj, key ); + }, + + isEmptyObject: function( obj ) { + var name; + for ( name in obj ) { + return false; + } + return true; + }, + + error: function( msg ) { + throw new Error( msg ); + }, + + // data: string of html + // context (optional): If specified, the fragment will be created in this context, defaults to document + // scripts (optional): If true, will include scripts passed in the html string + parseHTML: function( data, context, scripts ) { + var parsed; + if ( !data || typeof data !== "string" ) { + return null; + } + if ( typeof context === "boolean" ) { + scripts = context; + context = 0; + } + context = context || document; + + // Single tag + if ( (parsed = rsingleTag.exec( data )) ) { + return [ context.createElement( parsed[1] ) ]; + } + + parsed = jQuery.buildFragment( [ data ], context, scripts ? null : [] ); + return jQuery.merge( [], + (parsed.cacheable ? jQuery.clone( parsed.fragment ) : parsed.fragment).childNodes ); + }, + + parseJSON: function( data ) { + if ( !data || typeof data !== "string") { + return null; + } + + // Make sure leading/trailing whitespace is removed (IE can't handle it) + data = jQuery.trim( data ); + + // Attempt to parse using the native JSON parser first + if ( window.JSON && window.JSON.parse ) { + return window.JSON.parse( data ); + } + + // Make sure the incoming data is actual JSON + // Logic borrowed from http://json.org/json2.js + if ( rvalidchars.test( data.replace( rvalidescape, "@" ) + .replace( rvalidtokens, "]" ) + .replace( rvalidbraces, "")) ) { + + return ( new Function( "return " + data ) )(); + + } + jQuery.error( "Invalid JSON: " + data ); + }, + + // Cross-browser xml parsing + parseXML: function( data ) { + var xml, tmp; + if ( !data || typeof data !== "string" ) { + return null; + } + try { + if ( window.DOMParser ) { // Standard + tmp = new DOMParser(); + xml = tmp.parseFromString( data , "text/xml" ); + } else { // IE + xml = new ActiveXObject( "Microsoft.XMLDOM" ); + xml.async = "false"; + xml.loadXML( data ); + } + } catch( e ) { + xml = undefined; + } + if ( !xml || !xml.documentElement || xml.getElementsByTagName( "parsererror" ).length ) { + jQuery.error( "Invalid XML: " + data ); + } + return xml; + }, + + noop: function() {}, + + // Evaluates a script in a global context + // Workarounds based on findings by Jim Driscoll + // http://weblogs.java.net/blog/driscoll/archive/2009/09/08/eval-javascript-global-context + globalEval: function( data ) { + if ( data && core_rnotwhite.test( data ) ) { + // We use execScript on Internet Explorer + // We use an anonymous function so that context is window + // rather than jQuery in Firefox + ( window.execScript || function( data ) { + window[ "eval" ].call( window, data ); + } )( data ); + } + }, + + // Convert dashed to camelCase; used by the css and data modules + // Microsoft forgot to hump their vendor prefix (#9572) + camelCase: function( string ) { + return string.replace( rmsPrefix, "ms-" ).replace( rdashAlpha, fcamelCase ); + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toLowerCase() === name.toLowerCase(); + }, + + // args is for internal usage only + each: function( obj, callback, args ) { + var name, + i = 0, + length = obj.length, + isObj = length === undefined || jQuery.isFunction( obj ); + + if ( args ) { + if ( isObj ) { + for ( name in obj ) { + if ( callback.apply( obj[ name ], args ) === false ) { + break; + } + } + } else { + for ( ; i < length; ) { + if ( callback.apply( obj[ i++ ], args ) === false ) { + break; + } + } + } + + // A special, fast, case for the most common use of each + } else { + if ( isObj ) { + for ( name in obj ) { + if ( callback.call( obj[ name ], name, obj[ name ] ) === false ) { + break; + } + } + } else { + for ( ; i < length; ) { + if ( callback.call( obj[ i ], i, obj[ i++ ] ) === false ) { + break; + } + } + } + } + + return obj; + }, + + // Use native String.trim function wherever possible + trim: core_trim && !core_trim.call("\uFEFF\xA0") ? + function( text ) { + return text == null ? + "" : + core_trim.call( text ); + } : + + // Otherwise use our own trimming functionality + function( text ) { + return text == null ? + "" : + ( text + "" ).replace( rtrim, "" ); + }, + + // results is for internal usage only + makeArray: function( arr, results ) { + var type, + ret = results || []; + + if ( arr != null ) { + // The window, strings (and functions) also have 'length' + // Tweaked logic slightly to handle Blackberry 4.7 RegExp issues #6930 + type = jQuery.type( arr ); + + if ( arr.length == null || type === "string" || type === "function" || type === "regexp" || jQuery.isWindow( arr ) ) { + core_push.call( ret, arr ); + } else { + jQuery.merge( ret, arr ); + } + } + + return ret; + }, + + inArray: function( elem, arr, i ) { + var len; + + if ( arr ) { + if ( core_indexOf ) { + return core_indexOf.call( arr, elem, i ); + } + + len = arr.length; + i = i ? i < 0 ? Math.max( 0, len + i ) : i : 0; + + for ( ; i < len; i++ ) { + // Skip accessing in sparse arrays + if ( i in arr && arr[ i ] === elem ) { + return i; + } + } + } + + return -1; + }, + + merge: function( first, second ) { + var l = second.length, + i = first.length, + j = 0; + + if ( typeof l === "number" ) { + for ( ; j < l; j++ ) { + first[ i++ ] = second[ j ]; + } + + } else { + while ( second[j] !== undefined ) { + first[ i++ ] = second[ j++ ]; + } + } + + first.length = i; + + return first; + }, + + grep: function( elems, callback, inv ) { + var retVal, + ret = [], + i = 0, + length = elems.length; + inv = !!inv; + + // Go through the array, only saving the items + // that pass the validator function + for ( ; i < length; i++ ) { + retVal = !!callback( elems[ i ], i ); + if ( inv !== retVal ) { + ret.push( elems[ i ] ); + } + } + + return ret; + }, + + // arg is for internal usage only + map: function( elems, callback, arg ) { + var value, key, + ret = [], + i = 0, + length = elems.length, + // jquery objects are treated as arrays + isArray = elems instanceof jQuery || length !== undefined && typeof length === "number" && ( ( length > 0 && elems[ 0 ] && elems[ length -1 ] ) || length === 0 || jQuery.isArray( elems ) ) ; + + // Go through the array, translating each of the items to their + if ( isArray ) { + for ( ; i < length; i++ ) { + value = callback( elems[ i ], i, arg ); + + if ( value != null ) { + ret[ ret.length ] = value; + } + } + + // Go through every key on the object, + } else { + for ( key in elems ) { + value = callback( elems[ key ], key, arg ); + + if ( value != null ) { + ret[ ret.length ] = value; + } + } + } + + // Flatten any nested arrays + return ret.concat.apply( [], ret ); + }, + + // A global GUID counter for objects + guid: 1, + + // Bind a function to a context, optionally partially applying any + // arguments. + proxy: function( fn, context ) { + var tmp, args, proxy; + + if ( typeof context === "string" ) { + tmp = fn[ context ]; + context = fn; + fn = tmp; + } + + // Quick check to determine if target is callable, in the spec + // this throws a TypeError, but we will just return undefined. + if ( !jQuery.isFunction( fn ) ) { + return undefined; + } + + // Simulated bind + args = core_slice.call( arguments, 2 ); + proxy = function() { + return fn.apply( context, args.concat( core_slice.call( arguments ) ) ); + }; + + // Set the guid of unique handler to the same of original handler, so it can be removed + proxy.guid = fn.guid = fn.guid || jQuery.guid++; + + return proxy; + }, + + // Multifunctional method to get and set values of a collection + // The value/s can optionally be executed if it's a function + access: function( elems, fn, key, value, chainable, emptyGet, pass ) { + var exec, + bulk = key == null, + i = 0, + length = elems.length; + + // Sets many values + if ( key && typeof key === "object" ) { + for ( i in key ) { + jQuery.access( elems, fn, i, key[i], 1, emptyGet, value ); + } + chainable = 1; + + // Sets one value + } else if ( value !== undefined ) { + // Optionally, function values get executed if exec is true + exec = pass === undefined && jQuery.isFunction( value ); + + if ( bulk ) { + // Bulk operations only iterate when executing function values + if ( exec ) { + exec = fn; + fn = function( elem, key, value ) { + return exec.call( jQuery( elem ), value ); + }; + + // Otherwise they run against the entire set + } else { + fn.call( elems, value ); + fn = null; + } + } + + if ( fn ) { + for (; i < length; i++ ) { + fn( elems[i], key, exec ? value.call( elems[i], i, fn( elems[i], key ) ) : value, pass ); + } + } + + chainable = 1; + } + + return chainable ? + elems : + + // Gets + bulk ? + fn.call( elems ) : + length ? fn( elems[0], key ) : emptyGet; + }, + + now: function() { + return ( new Date() ).getTime(); + } +}); + +jQuery.ready.promise = function( obj ) { + if ( !readyList ) { + + readyList = jQuery.Deferred(); + + // Catch cases where $(document).ready() is called after the browser event has already occurred. + // we once tried to use readyState "interactive" here, but it caused issues like the one + // discovered by ChrisS here: http://bugs.jquery.com/ticket/12282#comment:15 + if ( document.readyState === "complete" ) { + // Handle it asynchronously to allow scripts the opportunity to delay ready + setTimeout( jQuery.ready, 1 ); + + // Standards-based browsers support DOMContentLoaded + } else if ( document.addEventListener ) { + // Use the handy event callback + document.addEventListener( "DOMContentLoaded", DOMContentLoaded, false ); + + // A fallback to window.onload, that will always work + window.addEventListener( "load", jQuery.ready, false ); + + // If IE event model is used + } else { + // Ensure firing before onload, maybe late but safe also for iframes + document.attachEvent( "onreadystatechange", DOMContentLoaded ); + + // A fallback to window.onload, that will always work + window.attachEvent( "onload", jQuery.ready ); + + // If IE and not a frame + // continually check to see if the document is ready + var top = false; + + try { + top = window.frameElement == null && document.documentElement; + } catch(e) {} + + if ( top && top.doScroll ) { + (function doScrollCheck() { + if ( !jQuery.isReady ) { + + try { + // Use the trick by Diego Perini + // http://javascript.nwbox.com/IEContentLoaded/ + top.doScroll("left"); + } catch(e) { + return setTimeout( doScrollCheck, 50 ); + } + + // and execute any waiting functions + jQuery.ready(); + } + })(); + } + } + } + return readyList.promise( obj ); +}; + +// Populate the class2type map +jQuery.each("Boolean Number String Function Array Date RegExp Object".split(" "), function(i, name) { + class2type[ "[object " + name + "]" ] = name.toLowerCase(); +}); + +// All jQuery objects should point back to these +rootjQuery = jQuery(document); +// String to Object options format cache +var optionsCache = {}; + +// Convert String-formatted options into Object-formatted ones and store in cache +function createOptions( options ) { + var object = optionsCache[ options ] = {}; + jQuery.each( options.split( core_rspace ), function( _, flag ) { + object[ flag ] = true; + }); + return object; +} + +/* + * Create a callback list using the following parameters: + * + * options: an optional list of space-separated options that will change how + * the callback list behaves or a more traditional option object + * + * By default a callback list will act like an event callback list and can be + * "fired" multiple times. + * + * Possible options: + * + * once: will ensure the callback list can only be fired once (like a Deferred) + * + * memory: will keep track of previous values and will call any callback added + * after the list has been fired right away with the latest "memorized" + * values (like a Deferred) + * + * unique: will ensure a callback can only be added once (no duplicate in the list) + * + * stopOnFalse: interrupt callings when a callback returns false + * + */ +jQuery.Callbacks = function( options ) { + + // Convert options from String-formatted to Object-formatted if needed + // (we check in cache first) + options = typeof options === "string" ? + ( optionsCache[ options ] || createOptions( options ) ) : + jQuery.extend( {}, options ); + + var // Last fire value (for non-forgettable lists) + memory, + // Flag to know if list was already fired + fired, + // Flag to know if list is currently firing + firing, + // First callback to fire (used internally by add and fireWith) + firingStart, + // End of the loop when firing + firingLength, + // Index of currently firing callback (modified by remove if needed) + firingIndex, + // Actual callback list + list = [], + // Stack of fire calls for repeatable lists + stack = !options.once && [], + // Fire callbacks + fire = function( data ) { + memory = options.memory && data; + fired = true; + firingIndex = firingStart || 0; + firingStart = 0; + firingLength = list.length; + firing = true; + for ( ; list && firingIndex < firingLength; firingIndex++ ) { + if ( list[ firingIndex ].apply( data[ 0 ], data[ 1 ] ) === false && options.stopOnFalse ) { + memory = false; // To prevent further calls using add + break; + } + } + firing = false; + if ( list ) { + if ( stack ) { + if ( stack.length ) { + fire( stack.shift() ); + } + } else if ( memory ) { + list = []; + } else { + self.disable(); + } + } + }, + // Actual Callbacks object + self = { + // Add a callback or a collection of callbacks to the list + add: function() { + if ( list ) { + // First, we save the current length + var start = list.length; + (function add( args ) { + jQuery.each( args, function( _, arg ) { + var type = jQuery.type( arg ); + if ( type === "function" ) { + if ( !options.unique || !self.has( arg ) ) { + list.push( arg ); + } + } else if ( arg && arg.length && type !== "string" ) { + // Inspect recursively + add( arg ); + } + }); + })( arguments ); + // Do we need to add the callbacks to the + // current firing batch? + if ( firing ) { + firingLength = list.length; + // With memory, if we're not firing then + // we should call right away + } else if ( memory ) { + firingStart = start; + fire( memory ); + } + } + return this; + }, + // Remove a callback from the list + remove: function() { + if ( list ) { + jQuery.each( arguments, function( _, arg ) { + var index; + while( ( index = jQuery.inArray( arg, list, index ) ) > -1 ) { + list.splice( index, 1 ); + // Handle firing indexes + if ( firing ) { + if ( index <= firingLength ) { + firingLength--; + } + if ( index <= firingIndex ) { + firingIndex--; + } + } + } + }); + } + return this; + }, + // Control if a given callback is in the list + has: function( fn ) { + return jQuery.inArray( fn, list ) > -1; + }, + // Remove all callbacks from the list + empty: function() { + list = []; + return this; + }, + // Have the list do nothing anymore + disable: function() { + list = stack = memory = undefined; + return this; + }, + // Is it disabled? + disabled: function() { + return !list; + }, + // Lock the list in its current state + lock: function() { + stack = undefined; + if ( !memory ) { + self.disable(); + } + return this; + }, + // Is it locked? + locked: function() { + return !stack; + }, + // Call all callbacks with the given context and arguments + fireWith: function( context, args ) { + args = args || []; + args = [ context, args.slice ? args.slice() : args ]; + if ( list && ( !fired || stack ) ) { + if ( firing ) { + stack.push( args ); + } else { + fire( args ); + } + } + return this; + }, + // Call all the callbacks with the given arguments + fire: function() { + self.fireWith( this, arguments ); + return this; + }, + // To know if the callbacks have already been called at least once + fired: function() { + return !!fired; + } + }; + + return self; +}; +jQuery.extend({ + + Deferred: function( func ) { + var tuples = [ + // action, add listener, listener list, final state + [ "resolve", "done", jQuery.Callbacks("once memory"), "resolved" ], + [ "reject", "fail", jQuery.Callbacks("once memory"), "rejected" ], + [ "notify", "progress", jQuery.Callbacks("memory") ] + ], + state = "pending", + promise = { + state: function() { + return state; + }, + always: function() { + deferred.done( arguments ).fail( arguments ); + return this; + }, + then: function( /* fnDone, fnFail, fnProgress */ ) { + var fns = arguments; + return jQuery.Deferred(function( newDefer ) { + jQuery.each( tuples, function( i, tuple ) { + var action = tuple[ 0 ], + fn = fns[ i ]; + // deferred[ done | fail | progress ] for forwarding actions to newDefer + deferred[ tuple[1] ]( jQuery.isFunction( fn ) ? + function() { + var returned = fn.apply( this, arguments ); + if ( returned && jQuery.isFunction( returned.promise ) ) { + returned.promise() + .done( newDefer.resolve ) + .fail( newDefer.reject ) + .progress( newDefer.notify ); + } else { + newDefer[ action + "With" ]( this === deferred ? newDefer : this, [ returned ] ); + } + } : + newDefer[ action ] + ); + }); + fns = null; + }).promise(); + }, + // Get a promise for this deferred + // If obj is provided, the promise aspect is added to the object + promise: function( obj ) { + return obj != null ? jQuery.extend( obj, promise ) : promise; + } + }, + deferred = {}; + + // Keep pipe for back-compat + promise.pipe = promise.then; + + // Add list-specific methods + jQuery.each( tuples, function( i, tuple ) { + var list = tuple[ 2 ], + stateString = tuple[ 3 ]; + + // promise[ done | fail | progress ] = list.add + promise[ tuple[1] ] = list.add; + + // Handle state + if ( stateString ) { + list.add(function() { + // state = [ resolved | rejected ] + state = stateString; + + // [ reject_list | resolve_list ].disable; progress_list.lock + }, tuples[ i ^ 1 ][ 2 ].disable, tuples[ 2 ][ 2 ].lock ); + } + + // deferred[ resolve | reject | notify ] = list.fire + deferred[ tuple[0] ] = list.fire; + deferred[ tuple[0] + "With" ] = list.fireWith; + }); + + // Make the deferred a promise + promise.promise( deferred ); + + // Call given func if any + if ( func ) { + func.call( deferred, deferred ); + } + + // All done! + return deferred; + }, + + // Deferred helper + when: function( subordinate /* , ..., subordinateN */ ) { + var i = 0, + resolveValues = core_slice.call( arguments ), + length = resolveValues.length, + + // the count of uncompleted subordinates + remaining = length !== 1 || ( subordinate && jQuery.isFunction( subordinate.promise ) ) ? length : 0, + + // the master Deferred. If resolveValues consist of only a single Deferred, just use that. + deferred = remaining === 1 ? subordinate : jQuery.Deferred(), + + // Update function for both resolve and progress values + updateFunc = function( i, contexts, values ) { + return function( value ) { + contexts[ i ] = this; + values[ i ] = arguments.length > 1 ? core_slice.call( arguments ) : value; + if( values === progressValues ) { + deferred.notifyWith( contexts, values ); + } else if ( !( --remaining ) ) { + deferred.resolveWith( contexts, values ); + } + }; + }, + + progressValues, progressContexts, resolveContexts; + + // add listeners to Deferred subordinates; treat others as resolved + if ( length > 1 ) { + progressValues = new Array( length ); + progressContexts = new Array( length ); + resolveContexts = new Array( length ); + for ( ; i < length; i++ ) { + if ( resolveValues[ i ] && jQuery.isFunction( resolveValues[ i ].promise ) ) { + resolveValues[ i ].promise() + .done( updateFunc( i, resolveContexts, resolveValues ) ) + .fail( deferred.reject ) + .progress( updateFunc( i, progressContexts, progressValues ) ); + } else { + --remaining; + } + } + } + + // if we're not waiting on anything, resolve the master + if ( !remaining ) { + deferred.resolveWith( resolveContexts, resolveValues ); + } + + return deferred.promise(); + } +}); +jQuery.support = (function() { + + var support, + all, + a, + select, + opt, + input, + fragment, + eventName, + i, + isSupported, + clickFn, + div = document.createElement("div"); + + // Setup + div.setAttribute( "className", "t" ); + div.innerHTML = " <link/><table></table><a href='/a'>a</a><input type='checkbox'/>"; + + // Support tests won't run in some limited or non-browser environments + all = div.getElementsByTagName("*"); + a = div.getElementsByTagName("a")[ 0 ]; + if ( !all || !a || !all.length ) { + return {}; + } + + // First batch of tests + select = document.createElement("select"); + opt = select.appendChild( document.createElement("option") ); + input = div.getElementsByTagName("input")[ 0 ]; + + a.style.cssText = "top:1px;float:left;opacity:.5"; + support = { + // IE strips leading whitespace when .innerHTML is used + leadingWhitespace: ( div.firstChild.nodeType === 3 ), + + // Make sure that tbody elements aren't automatically inserted + // IE will insert them into empty tables + tbody: !div.getElementsByTagName("tbody").length, + + // Make sure that link elements get serialized correctly by innerHTML + // This requires a wrapper element in IE + htmlSerialize: !!div.getElementsByTagName("link").length, + + // Get the style information from getAttribute + // (IE uses .cssText instead) + style: /top/.test( a.getAttribute("style") ), + + // Make sure that URLs aren't manipulated + // (IE normalizes it by default) + hrefNormalized: ( a.getAttribute("href") === "/a" ), + + // Make sure that element opacity exists + // (IE uses filter instead) + // Use a regex to work around a WebKit issue. See #5145 + opacity: /^0.5/.test( a.style.opacity ), + + // Verify style float existence + // (IE uses styleFloat instead of cssFloat) + cssFloat: !!a.style.cssFloat, + + // Make sure that if no value is specified for a checkbox + // that it defaults to "on". + // (WebKit defaults to "" instead) + checkOn: ( input.value === "on" ), + + // Make sure that a selected-by-default option has a working selected property. + // (WebKit defaults to false instead of true, IE too, if it's in an optgroup) + optSelected: opt.selected, + + // Test setAttribute on camelCase class. If it works, we need attrFixes when doing get/setAttribute (ie6/7) + getSetAttribute: div.className !== "t", + + // Tests for enctype support on a form (#6743) + enctype: !!document.createElement("form").enctype, + + // Makes sure cloning an html5 element does not cause problems + // Where outerHTML is undefined, this still works + html5Clone: document.createElement("nav").cloneNode( true ).outerHTML !== "<:nav></:nav>", + + // jQuery.support.boxModel DEPRECATED in 1.8 since we don't support Quirks Mode + boxModel: ( document.compatMode === "CSS1Compat" ), + + // Will be defined later + submitBubbles: true, + changeBubbles: true, + focusinBubbles: false, + deleteExpando: true, + noCloneEvent: true, + inlineBlockNeedsLayout: false, + shrinkWrapBlocks: false, + reliableMarginRight: true, + boxSizingReliable: true, + pixelPosition: false + }; + + // Make sure checked status is properly cloned + input.checked = true; + support.noCloneChecked = input.cloneNode( true ).checked; + + // Make sure that the options inside disabled selects aren't marked as disabled + // (WebKit marks them as disabled) + select.disabled = true; + support.optDisabled = !opt.disabled; + + // Test to see if it's possible to delete an expando from an element + // Fails in Internet Explorer + try { + delete div.test; + } catch( e ) { + support.deleteExpando = false; + } + + if ( !div.addEventListener && div.attachEvent && div.fireEvent ) { + div.attachEvent( "onclick", clickFn = function() { + // Cloning a node shouldn't copy over any + // bound event handlers (IE does this) + support.noCloneEvent = false; + }); + div.cloneNode( true ).fireEvent("onclick"); + div.detachEvent( "onclick", clickFn ); + } + + // Check if a radio maintains its value + // after being appended to the DOM + input = document.createElement("input"); + input.value = "t"; + input.setAttribute( "type", "radio" ); + support.radioValue = input.value === "t"; + + input.setAttribute( "checked", "checked" ); + + // #11217 - WebKit loses check when the name is after the checked attribute + input.setAttribute( "name", "t" ); + + div.appendChild( input ); + fragment = document.createDocumentFragment(); + fragment.appendChild( div.lastChild ); + + // WebKit doesn't clone checked state correctly in fragments + support.checkClone = fragment.cloneNode( true ).cloneNode( true ).lastChild.checked; + + // Check if a disconnected checkbox will retain its checked + // value of true after appended to the DOM (IE6/7) + support.appendChecked = input.checked; + + fragment.removeChild( input ); + fragment.appendChild( div ); + + // Technique from Juriy Zaytsev + // http://perfectionkills.com/detecting-event-support-without-browser-sniffing/ + // We only care about the case where non-standard event systems + // are used, namely in IE. Short-circuiting here helps us to + // avoid an eval call (in setAttribute) which can cause CSP + // to go haywire. See: https://developer.mozilla.org/en/Security/CSP + if ( div.attachEvent ) { + for ( i in { + submit: true, + change: true, + focusin: true + }) { + eventName = "on" + i; + isSupported = ( eventName in div ); + if ( !isSupported ) { + div.setAttribute( eventName, "return;" ); + isSupported = ( typeof div[ eventName ] === "function" ); + } + support[ i + "Bubbles" ] = isSupported; + } + } + + // Run tests that need a body at doc ready + jQuery(function() { + var container, div, tds, marginDiv, + divReset = "padding:0;margin:0;border:0;display:block;overflow:hidden;", + body = document.getElementsByTagName("body")[0]; + + if ( !body ) { + // Return for frameset docs that don't have a body + return; + } + + container = document.createElement("div"); + container.style.cssText = "visibility:hidden;border:0;width:0;height:0;position:static;top:0;margin-top:1px"; + body.insertBefore( container, body.firstChild ); + + // Construct the test element + div = document.createElement("div"); + container.appendChild( div ); + + // Check if table cells still have offsetWidth/Height when they are set + // to display:none and there are still other visible table cells in a + // table row; if so, offsetWidth/Height are not reliable for use when + // determining if an element has been hidden directly using + // display:none (it is still safe to use offsets if a parent element is + // hidden; don safety goggles and see bug #4512 for more information). + // (only IE 8 fails this test) + div.innerHTML = "<table><tr><td></td><td>t</td></tr></table>"; + tds = div.getElementsByTagName("td"); + tds[ 0 ].style.cssText = "padding:0;margin:0;border:0;display:none"; + isSupported = ( tds[ 0 ].offsetHeight === 0 ); + + tds[ 0 ].style.display = ""; + tds[ 1 ].style.display = "none"; + + // Check if empty table cells still have offsetWidth/Height + // (IE <= 8 fail this test) + support.reliableHiddenOffsets = isSupported && ( tds[ 0 ].offsetHeight === 0 ); + + // Check box-sizing and margin behavior + div.innerHTML = ""; + div.style.cssText = "box-sizing:border-box;-moz-box-sizing:border-box;-webkit-box-sizing:border-box;padding:1px;border:1px;display:block;width:4px;margin-top:1%;position:absolute;top:1%;"; + support.boxSizing = ( div.offsetWidth === 4 ); + support.doesNotIncludeMarginInBodyOffset = ( body.offsetTop !== 1 ); + + // NOTE: To any future maintainer, we've window.getComputedStyle + // because jsdom on node.js will break without it. + if ( window.getComputedStyle ) { + support.pixelPosition = ( window.getComputedStyle( div, null ) || {} ).top !== "1%"; + support.boxSizingReliable = ( window.getComputedStyle( div, null ) || { width: "4px" } ).width === "4px"; + + // Check if div with explicit width and no margin-right incorrectly + // gets computed margin-right based on width of container. For more + // info see bug #3333 + // Fails in WebKit before Feb 2011 nightlies + // WebKit Bug 13343 - getComputedStyle returns wrong value for margin-right + marginDiv = document.createElement("div"); + marginDiv.style.cssText = div.style.cssText = divReset; + marginDiv.style.marginRight = marginDiv.style.width = "0"; + div.style.width = "1px"; + div.appendChild( marginDiv ); + support.reliableMarginRight = + !parseFloat( ( window.getComputedStyle( marginDiv, null ) || {} ).marginRight ); + } + + if ( typeof div.style.zoom !== "undefined" ) { + // Check if natively block-level elements act like inline-block + // elements when setting their display to 'inline' and giving + // them layout + // (IE < 8 does this) + div.innerHTML = ""; + div.style.cssText = divReset + "width:1px;padding:1px;display:inline;zoom:1"; + support.inlineBlockNeedsLayout = ( div.offsetWidth === 3 ); + + // Check if elements with layout shrink-wrap their children + // (IE 6 does this) + div.style.display = "block"; + div.style.overflow = "visible"; + div.innerHTML = "<div></div>"; + div.firstChild.style.width = "5px"; + support.shrinkWrapBlocks = ( div.offsetWidth !== 3 ); + + container.style.zoom = 1; + } + + // Null elements to avoid leaks in IE + body.removeChild( container ); + container = div = tds = marginDiv = null; + }); + + // Null elements to avoid leaks in IE + fragment.removeChild( div ); + all = a = select = opt = input = fragment = div = null; + + return support; +})(); +var rbrace = /(?:\{[\s\S]*\}|\[[\s\S]*\])$/, + rmultiDash = /([A-Z])/g; + +jQuery.extend({ + cache: {}, + + deletedIds: [], + + // Remove at next major release (1.9/2.0) + uuid: 0, + + // Unique for each copy of jQuery on the page + // Non-digits removed to match rinlinejQuery + expando: "jQuery" + ( jQuery.fn.jquery + Math.random() ).replace( /\D/g, "" ), + + // The following elements throw uncatchable exceptions if you + // attempt to add expando properties to them. + noData: { + "embed": true, + // Ban all objects except for Flash (which handle expandos) + "object": "clsid:D27CDB6E-AE6D-11cf-96B8-444553540000", + "applet": true + }, + + hasData: function( elem ) { + elem = elem.nodeType ? jQuery.cache[ elem[jQuery.expando] ] : elem[ jQuery.expando ]; + return !!elem && !isEmptyDataObject( elem ); + }, + + data: function( elem, name, data, pvt /* Internal Use Only */ ) { + if ( !jQuery.acceptData( elem ) ) { + return; + } + + var thisCache, ret, + internalKey = jQuery.expando, + getByName = typeof name === "string", + + // We have to handle DOM nodes and JS objects differently because IE6-7 + // can't GC object references properly across the DOM-JS boundary + isNode = elem.nodeType, + + // Only DOM nodes need the global jQuery cache; JS object data is + // attached directly to the object so GC can occur automatically + cache = isNode ? jQuery.cache : elem, + + // Only defining an ID for JS objects if its cache already exists allows + // the code to shortcut on the same path as a DOM node with no cache + id = isNode ? elem[ internalKey ] : elem[ internalKey ] && internalKey; + + // Avoid doing any more work than we need to when trying to get data on an + // object that has no data at all + if ( (!id || !cache[id] || (!pvt && !cache[id].data)) && getByName && data === undefined ) { + return; + } + + if ( !id ) { + // Only DOM nodes need a new unique ID for each element since their data + // ends up in the global cache + if ( isNode ) { + elem[ internalKey ] = id = jQuery.deletedIds.pop() || jQuery.guid++; + } else { + id = internalKey; + } + } + + if ( !cache[ id ] ) { + cache[ id ] = {}; + + // Avoids exposing jQuery metadata on plain JS objects when the object + // is serialized using JSON.stringify + if ( !isNode ) { + cache[ id ].toJSON = jQuery.noop; + } + } + + // An object can be passed to jQuery.data instead of a key/value pair; this gets + // shallow copied over onto the existing cache + if ( typeof name === "object" || typeof name === "function" ) { + if ( pvt ) { + cache[ id ] = jQuery.extend( cache[ id ], name ); + } else { + cache[ id ].data = jQuery.extend( cache[ id ].data, name ); + } + } + + thisCache = cache[ id ]; + + // jQuery data() is stored in a separate object inside the object's internal data + // cache in order to avoid key collisions between internal data and user-defined + // data. + if ( !pvt ) { + if ( !thisCache.data ) { + thisCache.data = {}; + } + + thisCache = thisCache.data; + } + + if ( data !== undefined ) { + thisCache[ jQuery.camelCase( name ) ] = data; + } + + // Check for both converted-to-camel and non-converted data property names + // If a data property was specified + if ( getByName ) { + + // First Try to find as-is property data + ret = thisCache[ name ]; + + // Test for null|undefined property data + if ( ret == null ) { + + // Try to find the camelCased property + ret = thisCache[ jQuery.camelCase( name ) ]; + } + } else { + ret = thisCache; + } + + return ret; + }, + + removeData: function( elem, name, pvt /* Internal Use Only */ ) { + if ( !jQuery.acceptData( elem ) ) { + return; + } + + var thisCache, i, l, + + isNode = elem.nodeType, + + // See jQuery.data for more information + cache = isNode ? jQuery.cache : elem, + id = isNode ? elem[ jQuery.expando ] : jQuery.expando; + + // If there is already no cache entry for this object, there is no + // purpose in continuing + if ( !cache[ id ] ) { + return; + } + + if ( name ) { + + thisCache = pvt ? cache[ id ] : cache[ id ].data; + + if ( thisCache ) { + + // Support array or space separated string names for data keys + if ( !jQuery.isArray( name ) ) { + + // try the string as a key before any manipulation + if ( name in thisCache ) { + name = [ name ]; + } else { + + // split the camel cased version by spaces unless a key with the spaces exists + name = jQuery.camelCase( name ); + if ( name in thisCache ) { + name = [ name ]; + } else { + name = name.split(" "); + } + } + } + + for ( i = 0, l = name.length; i < l; i++ ) { + delete thisCache[ name[i] ]; + } + + // If there is no data left in the cache, we want to continue + // and let the cache object itself get destroyed + if ( !( pvt ? isEmptyDataObject : jQuery.isEmptyObject )( thisCache ) ) { + return; + } + } + } + + // See jQuery.data for more information + if ( !pvt ) { + delete cache[ id ].data; + + // Don't destroy the parent cache unless the internal data object + // had been the only thing left in it + if ( !isEmptyDataObject( cache[ id ] ) ) { + return; + } + } + + // Destroy the cache + if ( isNode ) { + jQuery.cleanData( [ elem ], true ); + + // Use delete when supported for expandos or `cache` is not a window per isWindow (#10080) + } else if ( jQuery.support.deleteExpando || cache != cache.window ) { + delete cache[ id ]; + + // When all else fails, null + } else { + cache[ id ] = null; + } + }, + + // For internal use only. + _data: function( elem, name, data ) { + return jQuery.data( elem, name, data, true ); + }, + + // A method for determining if a DOM node can handle the data expando + acceptData: function( elem ) { + var noData = elem.nodeName && jQuery.noData[ elem.nodeName.toLowerCase() ]; + + // nodes accept data unless otherwise specified; rejection can be conditional + return !noData || noData !== true && elem.getAttribute("classid") === noData; + } +}); + +jQuery.fn.extend({ + data: function( key, value ) { + var parts, part, attr, name, l, + elem = this[0], + i = 0, + data = null; + + // Gets all values + if ( key === undefined ) { + if ( this.length ) { + data = jQuery.data( elem ); + + if ( elem.nodeType === 1 && !jQuery._data( elem, "parsedAttrs" ) ) { + attr = elem.attributes; + for ( l = attr.length; i < l; i++ ) { + name = attr[i].name; + + if ( !name.indexOf( "data-" ) ) { + name = jQuery.camelCase( name.substring(5) ); + + dataAttr( elem, name, data[ name ] ); + } + } + jQuery._data( elem, "parsedAttrs", true ); + } + } + + return data; + } + + // Sets multiple values + if ( typeof key === "object" ) { + return this.each(function() { + jQuery.data( this, key ); + }); + } + + parts = key.split( ".", 2 ); + parts[1] = parts[1] ? "." + parts[1] : ""; + part = parts[1] + "!"; + + return jQuery.access( this, function( value ) { + + if ( value === undefined ) { + data = this.triggerHandler( "getData" + part, [ parts[0] ] ); + + // Try to fetch any internally stored data first + if ( data === undefined && elem ) { + data = jQuery.data( elem, key ); + data = dataAttr( elem, key, data ); + } + + return data === undefined && parts[1] ? + this.data( parts[0] ) : + data; + } + + parts[1] = value; + this.each(function() { + var self = jQuery( this ); + + self.triggerHandler( "setData" + part, parts ); + jQuery.data( this, key, value ); + self.triggerHandler( "changeData" + part, parts ); + }); + }, null, value, arguments.length > 1, null, false ); + }, + + removeData: function( key ) { + return this.each(function() { + jQuery.removeData( this, key ); + }); + } +}); + +function dataAttr( elem, key, data ) { + // If nothing was found internally, try to fetch any + // data from the HTML5 data-* attribute + if ( data === undefined && elem.nodeType === 1 ) { + + var name = "data-" + key.replace( rmultiDash, "-$1" ).toLowerCase(); + + data = elem.getAttribute( name ); + + if ( typeof data === "string" ) { + try { + data = data === "true" ? true : + data === "false" ? false : + data === "null" ? null : + // Only convert to a number if it doesn't change the string + +data + "" === data ? +data : + rbrace.test( data ) ? jQuery.parseJSON( data ) : + data; + } catch( e ) {} + + // Make sure we set the data so it isn't changed later + jQuery.data( elem, key, data ); + + } else { + data = undefined; + } + } + + return data; +} + +// checks a cache object for emptiness +function isEmptyDataObject( obj ) { + var name; + for ( name in obj ) { + + // if the public data object is empty, the private is still empty + if ( name === "data" && jQuery.isEmptyObject( obj[name] ) ) { + continue; + } + if ( name !== "toJSON" ) { + return false; + } + } + + return true; +} +jQuery.extend({ + queue: function( elem, type, data ) { + var queue; + + if ( elem ) { + type = ( type || "fx" ) + "queue"; + queue = jQuery._data( elem, type ); + + // Speed up dequeue by getting out quickly if this is just a lookup + if ( data ) { + if ( !queue || jQuery.isArray(data) ) { + queue = jQuery._data( elem, type, jQuery.makeArray(data) ); + } else { + queue.push( data ); + } + } + return queue || []; + } + }, + + dequeue: function( elem, type ) { + type = type || "fx"; + + var queue = jQuery.queue( elem, type ), + startLength = queue.length, + fn = queue.shift(), + hooks = jQuery._queueHooks( elem, type ), + next = function() { + jQuery.dequeue( elem, type ); + }; + + // If the fx queue is dequeued, always remove the progress sentinel + if ( fn === "inprogress" ) { + fn = queue.shift(); + startLength--; + } + + if ( fn ) { + + // Add a progress sentinel to prevent the fx queue from being + // automatically dequeued + if ( type === "fx" ) { + queue.unshift( "inprogress" ); + } + + // clear up the last queue stop function + delete hooks.stop; + fn.call( elem, next, hooks ); + } + + if ( !startLength && hooks ) { + hooks.empty.fire(); + } + }, + + // not intended for public consumption - generates a queueHooks object, or returns the current one + _queueHooks: function( elem, type ) { + var key = type + "queueHooks"; + return jQuery._data( elem, key ) || jQuery._data( elem, key, { + empty: jQuery.Callbacks("once memory").add(function() { + jQuery.removeData( elem, type + "queue", true ); + jQuery.removeData( elem, key, true ); + }) + }); + } +}); + +jQuery.fn.extend({ + queue: function( type, data ) { + var setter = 2; + + if ( typeof type !== "string" ) { + data = type; + type = "fx"; + setter--; + } + + if ( arguments.length < setter ) { + return jQuery.queue( this[0], type ); + } + + return data === undefined ? + this : + this.each(function() { + var queue = jQuery.queue( this, type, data ); + + // ensure a hooks for this queue + jQuery._queueHooks( this, type ); + + if ( type === "fx" && queue[0] !== "inprogress" ) { + jQuery.dequeue( this, type ); + } + }); + }, + dequeue: function( type ) { + return this.each(function() { + jQuery.dequeue( this, type ); + }); + }, + // Based off of the plugin by Clint Helfers, with permission. + // http://blindsignals.com/index.php/2009/07/jquery-delay/ + delay: function( time, type ) { + time = jQuery.fx ? jQuery.fx.speeds[ time ] || time : time; + type = type || "fx"; + + return this.queue( type, function( next, hooks ) { + var timeout = setTimeout( next, time ); + hooks.stop = function() { + clearTimeout( timeout ); + }; + }); + }, + clearQueue: function( type ) { + return this.queue( type || "fx", [] ); + }, + // Get a promise resolved when queues of a certain type + // are emptied (fx is the type by default) + promise: function( type, obj ) { + var tmp, + count = 1, + defer = jQuery.Deferred(), + elements = this, + i = this.length, + resolve = function() { + if ( !( --count ) ) { + defer.resolveWith( elements, [ elements ] ); + } + }; + + if ( typeof type !== "string" ) { + obj = type; + type = undefined; + } + type = type || "fx"; + + while( i-- ) { + tmp = jQuery._data( elements[ i ], type + "queueHooks" ); + if ( tmp && tmp.empty ) { + count++; + tmp.empty.add( resolve ); + } + } + resolve(); + return defer.promise( obj ); + } +}); +var nodeHook, boolHook, fixSpecified, + rclass = /[\t\r\n]/g, + rreturn = /\r/g, + rtype = /^(?:button|input)$/i, + rfocusable = /^(?:button|input|object|select|textarea)$/i, + rclickable = /^a(?:rea|)$/i, + rboolean = /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i, + getSetAttribute = jQuery.support.getSetAttribute; + +jQuery.fn.extend({ + attr: function( name, value ) { + return jQuery.access( this, jQuery.attr, name, value, arguments.length > 1 ); + }, + + removeAttr: function( name ) { + return this.each(function() { + jQuery.removeAttr( this, name ); + }); + }, + + prop: function( name, value ) { + return jQuery.access( this, jQuery.prop, name, value, arguments.length > 1 ); + }, + + removeProp: function( name ) { + name = jQuery.propFix[ name ] || name; + return this.each(function() { + // try/catch handles cases where IE balks (such as removing a property on window) + try { + this[ name ] = undefined; + delete this[ name ]; + } catch( e ) {} + }); + }, + + addClass: function( value ) { + var classNames, i, l, elem, + setClass, c, cl; + + if ( jQuery.isFunction( value ) ) { + return this.each(function( j ) { + jQuery( this ).addClass( value.call(this, j, this.className) ); + }); + } + + if ( value && typeof value === "string" ) { + classNames = value.split( core_rspace ); + + for ( i = 0, l = this.length; i < l; i++ ) { + elem = this[ i ]; + + if ( elem.nodeType === 1 ) { + if ( !elem.className && classNames.length === 1 ) { + elem.className = value; + + } else { + setClass = " " + elem.className + " "; + + for ( c = 0, cl = classNames.length; c < cl; c++ ) { + if ( setClass.indexOf( " " + classNames[ c ] + " " ) < 0 ) { + setClass += classNames[ c ] + " "; + } + } + elem.className = jQuery.trim( setClass ); + } + } + } + } + + return this; + }, + + removeClass: function( value ) { + var removes, className, elem, c, cl, i, l; + + if ( jQuery.isFunction( value ) ) { + return this.each(function( j ) { + jQuery( this ).removeClass( value.call(this, j, this.className) ); + }); + } + if ( (value && typeof value === "string") || value === undefined ) { + removes = ( value || "" ).split( core_rspace ); + + for ( i = 0, l = this.length; i < l; i++ ) { + elem = this[ i ]; + if ( elem.nodeType === 1 && elem.className ) { + + className = (" " + elem.className + " ").replace( rclass, " " ); + + // loop over each item in the removal list + for ( c = 0, cl = removes.length; c < cl; c++ ) { + // Remove until there is nothing to remove, + while ( className.indexOf(" " + removes[ c ] + " ") >= 0 ) { + className = className.replace( " " + removes[ c ] + " " , " " ); + } + } + elem.className = value ? jQuery.trim( className ) : ""; + } + } + } + + return this; + }, + + toggleClass: function( value, stateVal ) { + var type = typeof value, + isBool = typeof stateVal === "boolean"; + + if ( jQuery.isFunction( value ) ) { + return this.each(function( i ) { + jQuery( this ).toggleClass( value.call(this, i, this.className, stateVal), stateVal ); + }); + } + + return this.each(function() { + if ( type === "string" ) { + // toggle individual class names + var className, + i = 0, + self = jQuery( this ), + state = stateVal, + classNames = value.split( core_rspace ); + + while ( (className = classNames[ i++ ]) ) { + // check each className given, space separated list + state = isBool ? state : !self.hasClass( className ); + self[ state ? "addClass" : "removeClass" ]( className ); + } + + } else if ( type === "undefined" || type === "boolean" ) { + if ( this.className ) { + // store className if set + jQuery._data( this, "__className__", this.className ); + } + + // toggle whole className + this.className = this.className || value === false ? "" : jQuery._data( this, "__className__" ) || ""; + } + }); + }, + + hasClass: function( selector ) { + var className = " " + selector + " ", + i = 0, + l = this.length; + for ( ; i < l; i++ ) { + if ( this[i].nodeType === 1 && (" " + this[i].className + " ").replace(rclass, " ").indexOf( className ) >= 0 ) { + return true; + } + } + + return false; + }, + + val: function( value ) { + var hooks, ret, isFunction, + elem = this[0]; + + if ( !arguments.length ) { + if ( elem ) { + hooks = jQuery.valHooks[ elem.type ] || jQuery.valHooks[ elem.nodeName.toLowerCase() ]; + + if ( hooks && "get" in hooks && (ret = hooks.get( elem, "value" )) !== undefined ) { + return ret; + } + + ret = elem.value; + + return typeof ret === "string" ? + // handle most common string cases + ret.replace(rreturn, "") : + // handle cases where value is null/undef or number + ret == null ? "" : ret; + } + + return; + } + + isFunction = jQuery.isFunction( value ); + + return this.each(function( i ) { + var val, + self = jQuery(this); + + if ( this.nodeType !== 1 ) { + return; + } + + if ( isFunction ) { + val = value.call( this, i, self.val() ); + } else { + val = value; + } + + // Treat null/undefined as ""; convert numbers to string + if ( val == null ) { + val = ""; + } else if ( typeof val === "number" ) { + val += ""; + } else if ( jQuery.isArray( val ) ) { + val = jQuery.map(val, function ( value ) { + return value == null ? "" : value + ""; + }); + } + + hooks = jQuery.valHooks[ this.type ] || jQuery.valHooks[ this.nodeName.toLowerCase() ]; + + // If set returns undefined, fall back to normal setting + if ( !hooks || !("set" in hooks) || hooks.set( this, val, "value" ) === undefined ) { + this.value = val; + } + }); + } +}); + +jQuery.extend({ + valHooks: { + option: { + get: function( elem ) { + // attributes.value is undefined in Blackberry 4.7 but + // uses .value. See #6932 + var val = elem.attributes.value; + return !val || val.specified ? elem.value : elem.text; + } + }, + select: { + get: function( elem ) { + var value, option, + options = elem.options, + index = elem.selectedIndex, + one = elem.type === "select-one" || index < 0, + values = one ? null : [], + max = one ? index + 1 : options.length, + i = index < 0 ? + max : + one ? index : 0; + + // Loop through all the selected options + for ( ; i < max; i++ ) { + option = options[ i ]; + + // oldIE doesn't update selected after form reset (#2551) + if ( ( option.selected || i === index ) && + // Don't return options that are disabled or in a disabled optgroup + ( jQuery.support.optDisabled ? !option.disabled : option.getAttribute("disabled") === null ) && + ( !option.parentNode.disabled || !jQuery.nodeName( option.parentNode, "optgroup" ) ) ) { + + // Get the specific value for the option + value = jQuery( option ).val(); + + // We don't need an array for one selects + if ( one ) { + return value; + } + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + }, + + set: function( elem, value ) { + var values = jQuery.makeArray( value ); + + jQuery(elem).find("option").each(function() { + this.selected = jQuery.inArray( jQuery(this).val(), values ) >= 0; + }); + + if ( !values.length ) { + elem.selectedIndex = -1; + } + return values; + } + } + }, + + // Unused in 1.8, left in so attrFn-stabbers won't die; remove in 1.9 + attrFn: {}, + + attr: function( elem, name, value, pass ) { + var ret, hooks, notxml, + nType = elem.nodeType; + + // don't get/set attributes on text, comment and attribute nodes + if ( !elem || nType === 3 || nType === 8 || nType === 2 ) { + return; + } + + if ( pass && jQuery.isFunction( jQuery.fn[ name ] ) ) { + return jQuery( elem )[ name ]( value ); + } + + // Fallback to prop when attributes are not supported + if ( typeof elem.getAttribute === "undefined" ) { + return jQuery.prop( elem, name, value ); + } + + notxml = nType !== 1 || !jQuery.isXMLDoc( elem ); + + // All attributes are lowercase + // Grab necessary hook if one is defined + if ( notxml ) { + name = name.toLowerCase(); + hooks = jQuery.attrHooks[ name ] || ( rboolean.test( name ) ? boolHook : nodeHook ); + } + + if ( value !== undefined ) { + + if ( value === null ) { + jQuery.removeAttr( elem, name ); + return; + + } else if ( hooks && "set" in hooks && notxml && (ret = hooks.set( elem, value, name )) !== undefined ) { + return ret; + + } else { + elem.setAttribute( name, value + "" ); + return value; + } + + } else if ( hooks && "get" in hooks && notxml && (ret = hooks.get( elem, name )) !== null ) { + return ret; + + } else { + + ret = elem.getAttribute( name ); + + // Non-existent attributes return null, we normalize to undefined + return ret === null ? + undefined : + ret; + } + }, + + removeAttr: function( elem, value ) { + var propName, attrNames, name, isBool, + i = 0; + + if ( value && elem.nodeType === 1 ) { + + attrNames = value.split( core_rspace ); + + for ( ; i < attrNames.length; i++ ) { + name = attrNames[ i ]; + + if ( name ) { + propName = jQuery.propFix[ name ] || name; + isBool = rboolean.test( name ); + + // See #9699 for explanation of this approach (setting first, then removal) + // Do not do this for boolean attributes (see #10870) + if ( !isBool ) { + jQuery.attr( elem, name, "" ); + } + elem.removeAttribute( getSetAttribute ? name : propName ); + + // Set corresponding property to false for boolean attributes + if ( isBool && propName in elem ) { + elem[ propName ] = false; + } + } + } + } + }, + + attrHooks: { + type: { + set: function( elem, value ) { + // We can't allow the type property to be changed (since it causes problems in IE) + if ( rtype.test( elem.nodeName ) && elem.parentNode ) { + jQuery.error( "type property can't be changed" ); + } else if ( !jQuery.support.radioValue && value === "radio" && jQuery.nodeName(elem, "input") ) { + // Setting the type on a radio button after the value resets the value in IE6-9 + // Reset value to it's default in case type is set after value + // This is for element creation + var val = elem.value; + elem.setAttribute( "type", value ); + if ( val ) { + elem.value = val; + } + return value; + } + } + }, + // Use the value property for back compat + // Use the nodeHook for button elements in IE6/7 (#1954) + value: { + get: function( elem, name ) { + if ( nodeHook && jQuery.nodeName( elem, "button" ) ) { + return nodeHook.get( elem, name ); + } + return name in elem ? + elem.value : + null; + }, + set: function( elem, value, name ) { + if ( nodeHook && jQuery.nodeName( elem, "button" ) ) { + return nodeHook.set( elem, value, name ); + } + // Does not return so that setAttribute is also used + elem.value = value; + } + } + }, + + propFix: { + tabindex: "tabIndex", + readonly: "readOnly", + "for": "htmlFor", + "class": "className", + maxlength: "maxLength", + cellspacing: "cellSpacing", + cellpadding: "cellPadding", + rowspan: "rowSpan", + colspan: "colSpan", + usemap: "useMap", + frameborder: "frameBorder", + contenteditable: "contentEditable" + }, + + prop: function( elem, name, value ) { + var ret, hooks, notxml, + nType = elem.nodeType; + + // don't get/set properties on text, comment and attribute nodes + if ( !elem || nType === 3 || nType === 8 || nType === 2 ) { + return; + } + + notxml = nType !== 1 || !jQuery.isXMLDoc( elem ); + + if ( notxml ) { + // Fix name and attach hooks + name = jQuery.propFix[ name ] || name; + hooks = jQuery.propHooks[ name ]; + } + + if ( value !== undefined ) { + if ( hooks && "set" in hooks && (ret = hooks.set( elem, value, name )) !== undefined ) { + return ret; + + } else { + return ( elem[ name ] = value ); + } + + } else { + if ( hooks && "get" in hooks && (ret = hooks.get( elem, name )) !== null ) { + return ret; + + } else { + return elem[ name ]; + } + } + }, + + propHooks: { + tabIndex: { + get: function( elem ) { + // elem.tabIndex doesn't always return the correct value when it hasn't been explicitly set + // http://fluidproject.org/blog/2008/01/09/getting-setting-and-removing-tabindex-values-with-javascript/ + var attributeNode = elem.getAttributeNode("tabindex"); + + return attributeNode && attributeNode.specified ? + parseInt( attributeNode.value, 10 ) : + rfocusable.test( elem.nodeName ) || rclickable.test( elem.nodeName ) && elem.href ? + 0 : + undefined; + } + } + } +}); + +// Hook for boolean attributes +boolHook = { + get: function( elem, name ) { + // Align boolean attributes with corresponding properties + // Fall back to attribute presence where some booleans are not supported + var attrNode, + property = jQuery.prop( elem, name ); + return property === true || typeof property !== "boolean" && ( attrNode = elem.getAttributeNode(name) ) && attrNode.nodeValue !== false ? + name.toLowerCase() : + undefined; + }, + set: function( elem, value, name ) { + var propName; + if ( value === false ) { + // Remove boolean attributes when set to false + jQuery.removeAttr( elem, name ); + } else { + // value is true since we know at this point it's type boolean and not false + // Set boolean attributes to the same name and set the DOM property + propName = jQuery.propFix[ name ] || name; + if ( propName in elem ) { + // Only set the IDL specifically if it already exists on the element + elem[ propName ] = true; + } + + elem.setAttribute( name, name.toLowerCase() ); + } + return name; + } +}; + +// IE6/7 do not support getting/setting some attributes with get/setAttribute +if ( !getSetAttribute ) { + + fixSpecified = { + name: true, + id: true, + coords: true + }; + + // Use this for any attribute in IE6/7 + // This fixes almost every IE6/7 issue + nodeHook = jQuery.valHooks.button = { + get: function( elem, name ) { + var ret; + ret = elem.getAttributeNode( name ); + return ret && ( fixSpecified[ name ] ? ret.value !== "" : ret.specified ) ? + ret.value : + undefined; + }, + set: function( elem, value, name ) { + // Set the existing or create a new attribute node + var ret = elem.getAttributeNode( name ); + if ( !ret ) { + ret = document.createAttribute( name ); + elem.setAttributeNode( ret ); + } + return ( ret.value = value + "" ); + } + }; + + // Set width and height to auto instead of 0 on empty string( Bug #8150 ) + // This is for removals + jQuery.each([ "width", "height" ], function( i, name ) { + jQuery.attrHooks[ name ] = jQuery.extend( jQuery.attrHooks[ name ], { + set: function( elem, value ) { + if ( value === "" ) { + elem.setAttribute( name, "auto" ); + return value; + } + } + }); + }); + + // Set contenteditable to false on removals(#10429) + // Setting to empty string throws an error as an invalid value + jQuery.attrHooks.contenteditable = { + get: nodeHook.get, + set: function( elem, value, name ) { + if ( value === "" ) { + value = "false"; + } + nodeHook.set( elem, value, name ); + } + }; +} + + +// Some attributes require a special call on IE +if ( !jQuery.support.hrefNormalized ) { + jQuery.each([ "href", "src", "width", "height" ], function( i, name ) { + jQuery.attrHooks[ name ] = jQuery.extend( jQuery.attrHooks[ name ], { + get: function( elem ) { + var ret = elem.getAttribute( name, 2 ); + return ret === null ? undefined : ret; + } + }); + }); +} + +if ( !jQuery.support.style ) { + jQuery.attrHooks.style = { + get: function( elem ) { + // Return undefined in the case of empty string + // Normalize to lowercase since IE uppercases css property names + return elem.style.cssText.toLowerCase() || undefined; + }, + set: function( elem, value ) { + return ( elem.style.cssText = value + "" ); + } + }; +} + +// Safari mis-reports the default selected property of an option +// Accessing the parent's selectedIndex property fixes it +if ( !jQuery.support.optSelected ) { + jQuery.propHooks.selected = jQuery.extend( jQuery.propHooks.selected, { + get: function( elem ) { + var parent = elem.parentNode; + + if ( parent ) { + parent.selectedIndex; + + // Make sure that it also works with optgroups, see #5701 + if ( parent.parentNode ) { + parent.parentNode.selectedIndex; + } + } + return null; + } + }); +} + +// IE6/7 call enctype encoding +if ( !jQuery.support.enctype ) { + jQuery.propFix.enctype = "encoding"; +} + +// Radios and checkboxes getter/setter +if ( !jQuery.support.checkOn ) { + jQuery.each([ "radio", "checkbox" ], function() { + jQuery.valHooks[ this ] = { + get: function( elem ) { + // Handle the case where in Webkit "" is returned instead of "on" if a value isn't specified + return elem.getAttribute("value") === null ? "on" : elem.value; + } + }; + }); +} +jQuery.each([ "radio", "checkbox" ], function() { + jQuery.valHooks[ this ] = jQuery.extend( jQuery.valHooks[ this ], { + set: function( elem, value ) { + if ( jQuery.isArray( value ) ) { + return ( elem.checked = jQuery.inArray( jQuery(elem).val(), value ) >= 0 ); + } + } + }); +}); +var rformElems = /^(?:textarea|input|select)$/i, + rtypenamespace = /^([^\.]*|)(?:\.(.+)|)$/, + rhoverHack = /(?:^|\s)hover(\.\S+|)\b/, + rkeyEvent = /^key/, + rmouseEvent = /^(?:mouse|contextmenu)|click/, + rfocusMorph = /^(?:focusinfocus|focusoutblur)$/, + hoverHack = function( events ) { + return jQuery.event.special.hover ? events : events.replace( rhoverHack, "mouseenter$1 mouseleave$1" ); + }; + +/* + * Helper functions for managing events -- not part of the public interface. + * Props to Dean Edwards' addEvent library for many of the ideas. + */ +jQuery.event = { + + add: function( elem, types, handler, data, selector ) { + + var elemData, eventHandle, events, + t, tns, type, namespaces, handleObj, + handleObjIn, handlers, special; + + // Don't attach events to noData or text/comment nodes (allow plain objects tho) + if ( elem.nodeType === 3 || elem.nodeType === 8 || !types || !handler || !(elemData = jQuery._data( elem )) ) { + return; + } + + // Caller can pass in an object of custom data in lieu of the handler + if ( handler.handler ) { + handleObjIn = handler; + handler = handleObjIn.handler; + selector = handleObjIn.selector; + } + + // Make sure that the handler has a unique ID, used to find/remove it later + if ( !handler.guid ) { + handler.guid = jQuery.guid++; + } + + // Init the element's event structure and main handler, if this is the first + events = elemData.events; + if ( !events ) { + elemData.events = events = {}; + } + eventHandle = elemData.handle; + if ( !eventHandle ) { + elemData.handle = eventHandle = function( e ) { + // Discard the second event of a jQuery.event.trigger() and + // when an event is called after a page has unloaded + return typeof jQuery !== "undefined" && (!e || jQuery.event.triggered !== e.type) ? + jQuery.event.dispatch.apply( eventHandle.elem, arguments ) : + undefined; + }; + // Add elem as a property of the handle fn to prevent a memory leak with IE non-native events + eventHandle.elem = elem; + } + + // Handle multiple events separated by a space + // jQuery(...).bind("mouseover mouseout", fn); + types = jQuery.trim( hoverHack(types) ).split( " " ); + for ( t = 0; t < types.length; t++ ) { + + tns = rtypenamespace.exec( types[t] ) || []; + type = tns[1]; + namespaces = ( tns[2] || "" ).split( "." ).sort(); + + // If event changes its type, use the special event handlers for the changed type + special = jQuery.event.special[ type ] || {}; + + // If selector defined, determine special event api type, otherwise given type + type = ( selector ? special.delegateType : special.bindType ) || type; + + // Update special based on newly reset type + special = jQuery.event.special[ type ] || {}; + + // handleObj is passed to all event handlers + handleObj = jQuery.extend({ + type: type, + origType: tns[1], + data: data, + handler: handler, + guid: handler.guid, + selector: selector, + needsContext: selector && jQuery.expr.match.needsContext.test( selector ), + namespace: namespaces.join(".") + }, handleObjIn ); + + // Init the event handler queue if we're the first + handlers = events[ type ]; + if ( !handlers ) { + handlers = events[ type ] = []; + handlers.delegateCount = 0; + + // Only use addEventListener/attachEvent if the special events handler returns false + if ( !special.setup || special.setup.call( elem, data, namespaces, eventHandle ) === false ) { + // Bind the global event handler to the element + if ( elem.addEventListener ) { + elem.addEventListener( type, eventHandle, false ); + + } else if ( elem.attachEvent ) { + elem.attachEvent( "on" + type, eventHandle ); + } + } + } + + if ( special.add ) { + special.add.call( elem, handleObj ); + + if ( !handleObj.handler.guid ) { + handleObj.handler.guid = handler.guid; + } + } + + // Add to the element's handler list, delegates in front + if ( selector ) { + handlers.splice( handlers.delegateCount++, 0, handleObj ); + } else { + handlers.push( handleObj ); + } + + // Keep track of which events have ever been used, for event optimization + jQuery.event.global[ type ] = true; + } + + // Nullify elem to prevent memory leaks in IE + elem = null; + }, + + global: {}, + + // Detach an event or set of events from an element + remove: function( elem, types, handler, selector, mappedTypes ) { + + var t, tns, type, origType, namespaces, origCount, + j, events, special, eventType, handleObj, + elemData = jQuery.hasData( elem ) && jQuery._data( elem ); + + if ( !elemData || !(events = elemData.events) ) { + return; + } + + // Once for each type.namespace in types; type may be omitted + types = jQuery.trim( hoverHack( types || "" ) ).split(" "); + for ( t = 0; t < types.length; t++ ) { + tns = rtypenamespace.exec( types[t] ) || []; + type = origType = tns[1]; + namespaces = tns[2]; + + // Unbind all events (on this namespace, if provided) for the element + if ( !type ) { + for ( type in events ) { + jQuery.event.remove( elem, type + types[ t ], handler, selector, true ); + } + continue; + } + + special = jQuery.event.special[ type ] || {}; + type = ( selector? special.delegateType : special.bindType ) || type; + eventType = events[ type ] || []; + origCount = eventType.length; + namespaces = namespaces ? new RegExp("(^|\\.)" + namespaces.split(".").sort().join("\\.(?:.*\\.|)") + "(\\.|$)") : null; + + // Remove matching events + for ( j = 0; j < eventType.length; j++ ) { + handleObj = eventType[ j ]; + + if ( ( mappedTypes || origType === handleObj.origType ) && + ( !handler || handler.guid === handleObj.guid ) && + ( !namespaces || namespaces.test( handleObj.namespace ) ) && + ( !selector || selector === handleObj.selector || selector === "**" && handleObj.selector ) ) { + eventType.splice( j--, 1 ); + + if ( handleObj.selector ) { + eventType.delegateCount--; + } + if ( special.remove ) { + special.remove.call( elem, handleObj ); + } + } + } + + // Remove generic event handler if we removed something and no more handlers exist + // (avoids potential for endless recursion during removal of special event handlers) + if ( eventType.length === 0 && origCount !== eventType.length ) { + if ( !special.teardown || special.teardown.call( elem, namespaces, elemData.handle ) === false ) { + jQuery.removeEvent( elem, type, elemData.handle ); + } + + delete events[ type ]; + } + } + + // Remove the expando if it's no longer used + if ( jQuery.isEmptyObject( events ) ) { + delete elemData.handle; + + // removeData also checks for emptiness and clears the expando if empty + // so use it instead of delete + jQuery.removeData( elem, "events", true ); + } + }, + + // Events that are safe to short-circuit if no handlers are attached. + // Native DOM events should not be added, they may have inline handlers. + customEvent: { + "getData": true, + "setData": true, + "changeData": true + }, + + trigger: function( event, data, elem, onlyHandlers ) { + // Don't do events on text and comment nodes + if ( elem && (elem.nodeType === 3 || elem.nodeType === 8) ) { + return; + } + + // Event object or event type + var cache, exclusive, i, cur, old, ontype, special, handle, eventPath, bubbleType, + type = event.type || event, + namespaces = []; + + // focus/blur morphs to focusin/out; ensure we're not firing them right now + if ( rfocusMorph.test( type + jQuery.event.triggered ) ) { + return; + } + + if ( type.indexOf( "!" ) >= 0 ) { + // Exclusive events trigger only for the exact event (no namespaces) + type = type.slice(0, -1); + exclusive = true; + } + + if ( type.indexOf( "." ) >= 0 ) { + // Namespaced trigger; create a regexp to match event type in handle() + namespaces = type.split("."); + type = namespaces.shift(); + namespaces.sort(); + } + + if ( (!elem || jQuery.event.customEvent[ type ]) && !jQuery.event.global[ type ] ) { + // No jQuery handlers for this event type, and it can't have inline handlers + return; + } + + // Caller can pass in an Event, Object, or just an event type string + event = typeof event === "object" ? + // jQuery.Event object + event[ jQuery.expando ] ? event : + // Object literal + new jQuery.Event( type, event ) : + // Just the event type (string) + new jQuery.Event( type ); + + event.type = type; + event.isTrigger = true; + event.exclusive = exclusive; + event.namespace = namespaces.join( "." ); + event.namespace_re = event.namespace? new RegExp("(^|\\.)" + namespaces.join("\\.(?:.*\\.|)") + "(\\.|$)") : null; + ontype = type.indexOf( ":" ) < 0 ? "on" + type : ""; + + // Handle a global trigger + if ( !elem ) { + + // TODO: Stop taunting the data cache; remove global events and always attach to document + cache = jQuery.cache; + for ( i in cache ) { + if ( cache[ i ].events && cache[ i ].events[ type ] ) { + jQuery.event.trigger( event, data, cache[ i ].handle.elem, true ); + } + } + return; + } + + // Clean up the event in case it is being reused + event.result = undefined; + if ( !event.target ) { + event.target = elem; + } + + // Clone any incoming data and prepend the event, creating the handler arg list + data = data != null ? jQuery.makeArray( data ) : []; + data.unshift( event ); + + // Allow special events to draw outside the lines + special = jQuery.event.special[ type ] || {}; + if ( special.trigger && special.trigger.apply( elem, data ) === false ) { + return; + } + + // Determine event propagation path in advance, per W3C events spec (#9951) + // Bubble up to document, then to window; watch for a global ownerDocument var (#9724) + eventPath = [[ elem, special.bindType || type ]]; + if ( !onlyHandlers && !special.noBubble && !jQuery.isWindow( elem ) ) { + + bubbleType = special.delegateType || type; + cur = rfocusMorph.test( bubbleType + type ) ? elem : elem.parentNode; + for ( old = elem; cur; cur = cur.parentNode ) { + eventPath.push([ cur, bubbleType ]); + old = cur; + } + + // Only add window if we got to document (e.g., not plain obj or detached DOM) + if ( old === (elem.ownerDocument || document) ) { + eventPath.push([ old.defaultView || old.parentWindow || window, bubbleType ]); + } + } + + // Fire handlers on the event path + for ( i = 0; i < eventPath.length && !event.isPropagationStopped(); i++ ) { + + cur = eventPath[i][0]; + event.type = eventPath[i][1]; + + handle = ( jQuery._data( cur, "events" ) || {} )[ event.type ] && jQuery._data( cur, "handle" ); + if ( handle ) { + handle.apply( cur, data ); + } + // Note that this is a bare JS function and not a jQuery handler + handle = ontype && cur[ ontype ]; + if ( handle && jQuery.acceptData( cur ) && handle.apply && handle.apply( cur, data ) === false ) { + event.preventDefault(); + } + } + event.type = type; + + // If nobody prevented the default action, do it now + if ( !onlyHandlers && !event.isDefaultPrevented() ) { + + if ( (!special._default || special._default.apply( elem.ownerDocument, data ) === false) && + !(type === "click" && jQuery.nodeName( elem, "a" )) && jQuery.acceptData( elem ) ) { + + // Call a native DOM method on the target with the same name name as the event. + // Can't use an .isFunction() check here because IE6/7 fails that test. + // Don't do default actions on window, that's where global variables be (#6170) + // IE<9 dies on focus/blur to hidden element (#1486) + if ( ontype && elem[ type ] && ((type !== "focus" && type !== "blur") || event.target.offsetWidth !== 0) && !jQuery.isWindow( elem ) ) { + + // Don't re-trigger an onFOO event when we call its FOO() method + old = elem[ ontype ]; + + if ( old ) { + elem[ ontype ] = null; + } + + // Prevent re-triggering of the same event, since we already bubbled it above + jQuery.event.triggered = type; + elem[ type ](); + jQuery.event.triggered = undefined; + + if ( old ) { + elem[ ontype ] = old; + } + } + } + } + + return event.result; + }, + + dispatch: function( event ) { + + // Make a writable jQuery.Event from the native event object + event = jQuery.event.fix( event || window.event ); + + var i, j, cur, ret, selMatch, matched, matches, handleObj, sel, related, + handlers = ( (jQuery._data( this, "events" ) || {} )[ event.type ] || []), + delegateCount = handlers.delegateCount, + args = core_slice.call( arguments ), + run_all = !event.exclusive && !event.namespace, + special = jQuery.event.special[ event.type ] || {}, + handlerQueue = []; + + // Use the fix-ed jQuery.Event rather than the (read-only) native event + args[0] = event; + event.delegateTarget = this; + + // Call the preDispatch hook for the mapped type, and let it bail if desired + if ( special.preDispatch && special.preDispatch.call( this, event ) === false ) { + return; + } + + // Determine handlers that should run if there are delegated events + // Avoid non-left-click bubbling in Firefox (#3861) + if ( delegateCount && !(event.button && event.type === "click") ) { + + for ( cur = event.target; cur != this; cur = cur.parentNode || this ) { + + // Don't process clicks (ONLY) on disabled elements (#6911, #8165, #11382, #11764) + if ( cur.disabled !== true || event.type !== "click" ) { + selMatch = {}; + matches = []; + for ( i = 0; i < delegateCount; i++ ) { + handleObj = handlers[ i ]; + sel = handleObj.selector; + + if ( selMatch[ sel ] === undefined ) { + selMatch[ sel ] = handleObj.needsContext ? + jQuery( sel, this ).index( cur ) >= 0 : + jQuery.find( sel, this, null, [ cur ] ).length; + } + if ( selMatch[ sel ] ) { + matches.push( handleObj ); + } + } + if ( matches.length ) { + handlerQueue.push({ elem: cur, matches: matches }); + } + } + } + } + + // Add the remaining (directly-bound) handlers + if ( handlers.length > delegateCount ) { + handlerQueue.push({ elem: this, matches: handlers.slice( delegateCount ) }); + } + + // Run delegates first; they may want to stop propagation beneath us + for ( i = 0; i < handlerQueue.length && !event.isPropagationStopped(); i++ ) { + matched = handlerQueue[ i ]; + event.currentTarget = matched.elem; + + for ( j = 0; j < matched.matches.length && !event.isImmediatePropagationStopped(); j++ ) { + handleObj = matched.matches[ j ]; + + // Triggered event must either 1) be non-exclusive and have no namespace, or + // 2) have namespace(s) a subset or equal to those in the bound event (both can have no namespace). + if ( run_all || (!event.namespace && !handleObj.namespace) || event.namespace_re && event.namespace_re.test( handleObj.namespace ) ) { + + event.data = handleObj.data; + event.handleObj = handleObj; + + ret = ( (jQuery.event.special[ handleObj.origType ] || {}).handle || handleObj.handler ) + .apply( matched.elem, args ); + + if ( ret !== undefined ) { + event.result = ret; + if ( ret === false ) { + event.preventDefault(); + event.stopPropagation(); + } + } + } + } + } + + // Call the postDispatch hook for the mapped type + if ( special.postDispatch ) { + special.postDispatch.call( this, event ); + } + + return event.result; + }, + + // Includes some event props shared by KeyEvent and MouseEvent + // *** attrChange attrName relatedNode srcElement are not normalized, non-W3C, deprecated, will be removed in 1.8 *** + props: "attrChange attrName relatedNode srcElement altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "), + + fixHooks: {}, + + keyHooks: { + props: "char charCode key keyCode".split(" "), + filter: function( event, original ) { + + // Add which for key events + if ( event.which == null ) { + event.which = original.charCode != null ? original.charCode : original.keyCode; + } + + return event; + } + }, + + mouseHooks: { + props: "button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "), + filter: function( event, original ) { + var eventDoc, doc, body, + button = original.button, + fromElement = original.fromElement; + + // Calculate pageX/Y if missing and clientX/Y available + if ( event.pageX == null && original.clientX != null ) { + eventDoc = event.target.ownerDocument || document; + doc = eventDoc.documentElement; + body = eventDoc.body; + + event.pageX = original.clientX + ( doc && doc.scrollLeft || body && body.scrollLeft || 0 ) - ( doc && doc.clientLeft || body && body.clientLeft || 0 ); + event.pageY = original.clientY + ( doc && doc.scrollTop || body && body.scrollTop || 0 ) - ( doc && doc.clientTop || body && body.clientTop || 0 ); + } + + // Add relatedTarget, if necessary + if ( !event.relatedTarget && fromElement ) { + event.relatedTarget = fromElement === event.target ? original.toElement : fromElement; + } + + // Add which for click: 1 === left; 2 === middle; 3 === right + // Note: button is not normalized, so don't use it + if ( !event.which && button !== undefined ) { + event.which = ( button & 1 ? 1 : ( button & 2 ? 3 : ( button & 4 ? 2 : 0 ) ) ); + } + + return event; + } + }, + + fix: function( event ) { + if ( event[ jQuery.expando ] ) { + return event; + } + + // Create a writable copy of the event object and normalize some properties + var i, prop, + originalEvent = event, + fixHook = jQuery.event.fixHooks[ event.type ] || {}, + copy = fixHook.props ? this.props.concat( fixHook.props ) : this.props; + + event = jQuery.Event( originalEvent ); + + for ( i = copy.length; i; ) { + prop = copy[ --i ]; + event[ prop ] = originalEvent[ prop ]; + } + + // Fix target property, if necessary (#1925, IE 6/7/8 & Safari2) + if ( !event.target ) { + event.target = originalEvent.srcElement || document; + } + + // Target should not be a text node (#504, Safari) + if ( event.target.nodeType === 3 ) { + event.target = event.target.parentNode; + } + + // For mouse/key events, metaKey==false if it's undefined (#3368, #11328; IE6/7/8) + event.metaKey = !!event.metaKey; + + return fixHook.filter? fixHook.filter( event, originalEvent ) : event; + }, + + special: { + load: { + // Prevent triggered image.load events from bubbling to window.load + noBubble: true + }, + + focus: { + delegateType: "focusin" + }, + blur: { + delegateType: "focusout" + }, + + beforeunload: { + setup: function( data, namespaces, eventHandle ) { + // We only want to do this special case on windows + if ( jQuery.isWindow( this ) ) { + this.onbeforeunload = eventHandle; + } + }, + + teardown: function( namespaces, eventHandle ) { + if ( this.onbeforeunload === eventHandle ) { + this.onbeforeunload = null; + } + } + } + }, + + simulate: function( type, elem, event, bubble ) { + // Piggyback on a donor event to simulate a different one. + // Fake originalEvent to avoid donor's stopPropagation, but if the + // simulated event prevents default then we do the same on the donor. + var e = jQuery.extend( + new jQuery.Event(), + event, + { type: type, + isSimulated: true, + originalEvent: {} + } + ); + if ( bubble ) { + jQuery.event.trigger( e, null, elem ); + } else { + jQuery.event.dispatch.call( elem, e ); + } + if ( e.isDefaultPrevented() ) { + event.preventDefault(); + } + } +}; + +// Some plugins are using, but it's undocumented/deprecated and will be removed. +// The 1.7 special event interface should provide all the hooks needed now. +jQuery.event.handle = jQuery.event.dispatch; + +jQuery.removeEvent = document.removeEventListener ? + function( elem, type, handle ) { + if ( elem.removeEventListener ) { + elem.removeEventListener( type, handle, false ); + } + } : + function( elem, type, handle ) { + var name = "on" + type; + + if ( elem.detachEvent ) { + + // #8545, #7054, preventing memory leaks for custom events in IE6-8 + // detachEvent needed property on element, by name of that event, to properly expose it to GC + if ( typeof elem[ name ] === "undefined" ) { + elem[ name ] = null; + } + + elem.detachEvent( name, handle ); + } + }; + +jQuery.Event = function( src, props ) { + // Allow instantiation without the 'new' keyword + if ( !(this instanceof jQuery.Event) ) { + return new jQuery.Event( src, props ); + } + + // Event object + if ( src && src.type ) { + this.originalEvent = src; + this.type = src.type; + + // Events bubbling up the document may have been marked as prevented + // by a handler lower down the tree; reflect the correct value. + this.isDefaultPrevented = ( src.defaultPrevented || src.returnValue === false || + src.getPreventDefault && src.getPreventDefault() ) ? returnTrue : returnFalse; + + // Event type + } else { + this.type = src; + } + + // Put explicitly provided properties onto the event object + if ( props ) { + jQuery.extend( this, props ); + } + + // Create a timestamp if incoming event doesn't have one + this.timeStamp = src && src.timeStamp || jQuery.now(); + + // Mark it as fixed + this[ jQuery.expando ] = true; +}; + +function returnFalse() { + return false; +} +function returnTrue() { + return true; +} + +// jQuery.Event is based on DOM3 Events as specified by the ECMAScript Language Binding +// http://www.w3.org/TR/2003/WD-DOM-Level-3-Events-20030331/ecma-script-binding.html +jQuery.Event.prototype = { + preventDefault: function() { + this.isDefaultPrevented = returnTrue; + + var e = this.originalEvent; + if ( !e ) { + return; + } + + // if preventDefault exists run it on the original event + if ( e.preventDefault ) { + e.preventDefault(); + + // otherwise set the returnValue property of the original event to false (IE) + } else { + e.returnValue = false; + } + }, + stopPropagation: function() { + this.isPropagationStopped = returnTrue; + + var e = this.originalEvent; + if ( !e ) { + return; + } + // if stopPropagation exists run it on the original event + if ( e.stopPropagation ) { + e.stopPropagation(); + } + // otherwise set the cancelBubble property of the original event to true (IE) + e.cancelBubble = true; + }, + stopImmediatePropagation: function() { + this.isImmediatePropagationStopped = returnTrue; + this.stopPropagation(); + }, + isDefaultPrevented: returnFalse, + isPropagationStopped: returnFalse, + isImmediatePropagationStopped: returnFalse +}; + +// Create mouseenter/leave events using mouseover/out and event-time checks +jQuery.each({ + mouseenter: "mouseover", + mouseleave: "mouseout" +}, function( orig, fix ) { + jQuery.event.special[ orig ] = { + delegateType: fix, + bindType: fix, + + handle: function( event ) { + var ret, + target = this, + related = event.relatedTarget, + handleObj = event.handleObj, + selector = handleObj.selector; + + // For mousenter/leave call the handler if related is outside the target. + // NB: No relatedTarget if the mouse left/entered the browser window + if ( !related || (related !== target && !jQuery.contains( target, related )) ) { + event.type = handleObj.origType; + ret = handleObj.handler.apply( this, arguments ); + event.type = fix; + } + return ret; + } + }; +}); + +// IE submit delegation +if ( !jQuery.support.submitBubbles ) { + + jQuery.event.special.submit = { + setup: function() { + // Only need this for delegated form submit events + if ( jQuery.nodeName( this, "form" ) ) { + return false; + } + + // Lazy-add a submit handler when a descendant form may potentially be submitted + jQuery.event.add( this, "click._submit keypress._submit", function( e ) { + // Node name check avoids a VML-related crash in IE (#9807) + var elem = e.target, + form = jQuery.nodeName( elem, "input" ) || jQuery.nodeName( elem, "button" ) ? elem.form : undefined; + if ( form && !jQuery._data( form, "_submit_attached" ) ) { + jQuery.event.add( form, "submit._submit", function( event ) { + event._submit_bubble = true; + }); + jQuery._data( form, "_submit_attached", true ); + } + }); + // return undefined since we don't need an event listener + }, + + postDispatch: function( event ) { + // If form was submitted by the user, bubble the event up the tree + if ( event._submit_bubble ) { + delete event._submit_bubble; + if ( this.parentNode && !event.isTrigger ) { + jQuery.event.simulate( "submit", this.parentNode, event, true ); + } + } + }, + + teardown: function() { + // Only need this for delegated form submit events + if ( jQuery.nodeName( this, "form" ) ) { + return false; + } + + // Remove delegated handlers; cleanData eventually reaps submit handlers attached above + jQuery.event.remove( this, "._submit" ); + } + }; +} + +// IE change delegation and checkbox/radio fix +if ( !jQuery.support.changeBubbles ) { + + jQuery.event.special.change = { + + setup: function() { + + if ( rformElems.test( this.nodeName ) ) { + // IE doesn't fire change on a check/radio until blur; trigger it on click + // after a propertychange. Eat the blur-change in special.change.handle. + // This still fires onchange a second time for check/radio after blur. + if ( this.type === "checkbox" || this.type === "radio" ) { + jQuery.event.add( this, "propertychange._change", function( event ) { + if ( event.originalEvent.propertyName === "checked" ) { + this._just_changed = true; + } + }); + jQuery.event.add( this, "click._change", function( event ) { + if ( this._just_changed && !event.isTrigger ) { + this._just_changed = false; + } + // Allow triggered, simulated change events (#11500) + jQuery.event.simulate( "change", this, event, true ); + }); + } + return false; + } + // Delegated event; lazy-add a change handler on descendant inputs + jQuery.event.add( this, "beforeactivate._change", function( e ) { + var elem = e.target; + + if ( rformElems.test( elem.nodeName ) && !jQuery._data( elem, "_change_attached" ) ) { + jQuery.event.add( elem, "change._change", function( event ) { + if ( this.parentNode && !event.isSimulated && !event.isTrigger ) { + jQuery.event.simulate( "change", this.parentNode, event, true ); + } + }); + jQuery._data( elem, "_change_attached", true ); + } + }); + }, + + handle: function( event ) { + var elem = event.target; + + // Swallow native change events from checkbox/radio, we already triggered them above + if ( this !== elem || event.isSimulated || event.isTrigger || (elem.type !== "radio" && elem.type !== "checkbox") ) { + return event.handleObj.handler.apply( this, arguments ); + } + }, + + teardown: function() { + jQuery.event.remove( this, "._change" ); + + return !rformElems.test( this.nodeName ); + } + }; +} + +// Create "bubbling" focus and blur events +if ( !jQuery.support.focusinBubbles ) { + jQuery.each({ focus: "focusin", blur: "focusout" }, function( orig, fix ) { + + // Attach a single capturing handler while someone wants focusin/focusout + var attaches = 0, + handler = function( event ) { + jQuery.event.simulate( fix, event.target, jQuery.event.fix( event ), true ); + }; + + jQuery.event.special[ fix ] = { + setup: function() { + if ( attaches++ === 0 ) { + document.addEventListener( orig, handler, true ); + } + }, + teardown: function() { + if ( --attaches === 0 ) { + document.removeEventListener( orig, handler, true ); + } + } + }; + }); +} + +jQuery.fn.extend({ + + on: function( types, selector, data, fn, /*INTERNAL*/ one ) { + var origFn, type; + + // Types can be a map of types/handlers + if ( typeof types === "object" ) { + // ( types-Object, selector, data ) + if ( typeof selector !== "string" ) { // && selector != null + // ( types-Object, data ) + data = data || selector; + selector = undefined; + } + for ( type in types ) { + this.on( type, selector, data, types[ type ], one ); + } + return this; + } + + if ( data == null && fn == null ) { + // ( types, fn ) + fn = selector; + data = selector = undefined; + } else if ( fn == null ) { + if ( typeof selector === "string" ) { + // ( types, selector, fn ) + fn = data; + data = undefined; + } else { + // ( types, data, fn ) + fn = data; + data = selector; + selector = undefined; + } + } + if ( fn === false ) { + fn = returnFalse; + } else if ( !fn ) { + return this; + } + + if ( one === 1 ) { + origFn = fn; + fn = function( event ) { + // Can use an empty set, since event contains the info + jQuery().off( event ); + return origFn.apply( this, arguments ); + }; + // Use same guid so caller can remove using origFn + fn.guid = origFn.guid || ( origFn.guid = jQuery.guid++ ); + } + return this.each( function() { + jQuery.event.add( this, types, fn, data, selector ); + }); + }, + one: function( types, selector, data, fn ) { + return this.on( types, selector, data, fn, 1 ); + }, + off: function( types, selector, fn ) { + var handleObj, type; + if ( types && types.preventDefault && types.handleObj ) { + // ( event ) dispatched jQuery.Event + handleObj = types.handleObj; + jQuery( types.delegateTarget ).off( + handleObj.namespace ? handleObj.origType + "." + handleObj.namespace : handleObj.origType, + handleObj.selector, + handleObj.handler + ); + return this; + } + if ( typeof types === "object" ) { + // ( types-object [, selector] ) + for ( type in types ) { + this.off( type, selector, types[ type ] ); + } + return this; + } + if ( selector === false || typeof selector === "function" ) { + // ( types [, fn] ) + fn = selector; + selector = undefined; + } + if ( fn === false ) { + fn = returnFalse; + } + return this.each(function() { + jQuery.event.remove( this, types, fn, selector ); + }); + }, + + bind: function( types, data, fn ) { + return this.on( types, null, data, fn ); + }, + unbind: function( types, fn ) { + return this.off( types, null, fn ); + }, + + live: function( types, data, fn ) { + jQuery( this.context ).on( types, this.selector, data, fn ); + return this; + }, + die: function( types, fn ) { + jQuery( this.context ).off( types, this.selector || "**", fn ); + return this; + }, + + delegate: function( selector, types, data, fn ) { + return this.on( types, selector, data, fn ); + }, + undelegate: function( selector, types, fn ) { + // ( namespace ) or ( selector, types [, fn] ) + return arguments.length === 1 ? this.off( selector, "**" ) : this.off( types, selector || "**", fn ); + }, + + trigger: function( type, data ) { + return this.each(function() { + jQuery.event.trigger( type, data, this ); + }); + }, + triggerHandler: function( type, data ) { + if ( this[0] ) { + return jQuery.event.trigger( type, data, this[0], true ); + } + }, + + toggle: function( fn ) { + // Save reference to arguments for access in closure + var args = arguments, + guid = fn.guid || jQuery.guid++, + i = 0, + toggler = function( event ) { + // Figure out which function to execute + var lastToggle = ( jQuery._data( this, "lastToggle" + fn.guid ) || 0 ) % i; + jQuery._data( this, "lastToggle" + fn.guid, lastToggle + 1 ); + + // Make sure that clicks stop + event.preventDefault(); + + // and execute the function + return args[ lastToggle ].apply( this, arguments ) || false; + }; + + // link all the functions, so any of them can unbind this click handler + toggler.guid = guid; + while ( i < args.length ) { + args[ i++ ].guid = guid; + } + + return this.click( toggler ); + }, + + hover: function( fnOver, fnOut ) { + return this.mouseenter( fnOver ).mouseleave( fnOut || fnOver ); + } +}); + +jQuery.each( ("blur focus focusin focusout load resize scroll unload click dblclick " + + "mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave " + + "change select submit keydown keypress keyup error contextmenu").split(" "), function( i, name ) { + + // Handle event binding + jQuery.fn[ name ] = function( data, fn ) { + if ( fn == null ) { + fn = data; + data = null; + } + + return arguments.length > 0 ? + this.on( name, null, data, fn ) : + this.trigger( name ); + }; + + if ( rkeyEvent.test( name ) ) { + jQuery.event.fixHooks[ name ] = jQuery.event.keyHooks; + } + + if ( rmouseEvent.test( name ) ) { + jQuery.event.fixHooks[ name ] = jQuery.event.mouseHooks; + } +}); +/*! + * Sizzle CSS Selector Engine + * Copyright 2012 jQuery Foundation and other contributors + * Released under the MIT license + * http://sizzlejs.com/ + */ +(function( window, undefined ) { + +var cachedruns, + assertGetIdNotName, + Expr, + getText, + isXML, + contains, + compile, + sortOrder, + hasDuplicate, + outermostContext, + + baseHasDuplicate = true, + strundefined = "undefined", + + expando = ( "sizcache" + Math.random() ).replace( ".", "" ), + + Token = String, + document = window.document, + docElem = document.documentElement, + dirruns = 0, + done = 0, + pop = [].pop, + push = [].push, + slice = [].slice, + // Use a stripped-down indexOf if a native one is unavailable + indexOf = [].indexOf || function( elem ) { + var i = 0, + len = this.length; + for ( ; i < len; i++ ) { + if ( this[i] === elem ) { + return i; + } + } + return -1; + }, + + // Augment a function for special use by Sizzle + markFunction = function( fn, value ) { + fn[ expando ] = value == null || value; + return fn; + }, + + createCache = function() { + var cache = {}, + keys = []; + + return markFunction(function( key, value ) { + // Only keep the most recent entries + if ( keys.push( key ) > Expr.cacheLength ) { + delete cache[ keys.shift() ]; + } + + // Retrieve with (key + " ") to avoid collision with native Object.prototype properties (see Issue #157) + return (cache[ key + " " ] = value); + }, cache ); + }, + + classCache = createCache(), + tokenCache = createCache(), + compilerCache = createCache(), + + // Regex + + // Whitespace characters http://www.w3.org/TR/css3-selectors/#whitespace + whitespace = "[\\x20\\t\\r\\n\\f]", + // http://www.w3.org/TR/css3-syntax/#characters + characterEncoding = "(?:\\\\.|[-\\w]|[^\\x00-\\xa0])+", + + // Loosely modeled on CSS identifier characters + // An unquoted value should be a CSS identifier (http://www.w3.org/TR/css3-selectors/#attribute-selectors) + // Proper syntax: http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier + identifier = characterEncoding.replace( "w", "w#" ), + + // Acceptable operators http://www.w3.org/TR/selectors/#attribute-selectors + operators = "([*^$|!~]?=)", + attributes = "\\[" + whitespace + "*(" + characterEncoding + ")" + whitespace + + "*(?:" + operators + whitespace + "*(?:(['\"])((?:\\\\.|[^\\\\])*?)\\3|(" + identifier + ")|)|)" + whitespace + "*\\]", + + // Prefer arguments not in parens/brackets, + // then attribute selectors and non-pseudos (denoted by :), + // then anything else + // These preferences are here to reduce the number of selectors + // needing tokenize in the PSEUDO preFilter + pseudos = ":(" + characterEncoding + ")(?:\\((?:(['\"])((?:\\\\.|[^\\\\])*?)\\2|([^()[\\]]*|(?:(?:" + attributes + ")|[^:]|\\\\.)*|.*))\\)|)", + + // For matchExpr.POS and matchExpr.needsContext + pos = ":(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + whitespace + + "*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)", + + // Leading and non-escaped trailing whitespace, capturing some non-whitespace characters preceding the latter + rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" + whitespace + "+$", "g" ), + + rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ), + rcombinators = new RegExp( "^" + whitespace + "*([\\x20\\t\\r\\n\\f>+~])" + whitespace + "*" ), + rpseudo = new RegExp( pseudos ), + + // Easily-parseable/retrievable ID or TAG or CLASS selectors + rquickExpr = /^(?:#([\w\-]+)|(\w+)|\.([\w\-]+))$/, + + rnot = /^:not/, + rsibling = /[\x20\t\r\n\f]*[+~]/, + rendsWithNot = /:not\($/, + + rheader = /h\d/i, + rinputs = /input|select|textarea|button/i, + + rbackslash = /\\(?!\\)/g, + + matchExpr = { + "ID": new RegExp( "^#(" + characterEncoding + ")" ), + "CLASS": new RegExp( "^\\.(" + characterEncoding + ")" ), + "NAME": new RegExp( "^\\[name=['\"]?(" + characterEncoding + ")['\"]?\\]" ), + "TAG": new RegExp( "^(" + characterEncoding.replace( "w", "w*" ) + ")" ), + "ATTR": new RegExp( "^" + attributes ), + "PSEUDO": new RegExp( "^" + pseudos ), + "POS": new RegExp( pos, "i" ), + "CHILD": new RegExp( "^:(only|nth|first|last)-child(?:\\(" + whitespace + + "*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" + whitespace + + "*(\\d+)|))" + whitespace + "*\\)|)", "i" ), + // For use in libraries implementing .is() + "needsContext": new RegExp( "^" + whitespace + "*[>+~]|" + pos, "i" ) + }, + + // Support + + // Used for testing something on an element + assert = function( fn ) { + var div = document.createElement("div"); + + try { + return fn( div ); + } catch (e) { + return false; + } finally { + // release memory in IE + div = null; + } + }, + + // Check if getElementsByTagName("*") returns only elements + assertTagNameNoComments = assert(function( div ) { + div.appendChild( document.createComment("") ); + return !div.getElementsByTagName("*").length; + }), + + // Check if getAttribute returns normalized href attributes + assertHrefNotNormalized = assert(function( div ) { + div.innerHTML = "<a href='#'></a>"; + return div.firstChild && typeof div.firstChild.getAttribute !== strundefined && + div.firstChild.getAttribute("href") === "#"; + }), + + // Check if attributes should be retrieved by attribute nodes + assertAttributes = assert(function( div ) { + div.innerHTML = "<select></select>"; + var type = typeof div.lastChild.getAttribute("multiple"); + // IE8 returns a string for some attributes even when not present + return type !== "boolean" && type !== "string"; + }), + + // Check if getElementsByClassName can be trusted + assertUsableClassName = assert(function( div ) { + // Opera can't find a second classname (in 9.6) + div.innerHTML = "<div class='hidden e'></div><div class='hidden'></div>"; + if ( !div.getElementsByClassName || !div.getElementsByClassName("e").length ) { + return false; + } + + // Safari 3.2 caches class attributes and doesn't catch changes + div.lastChild.className = "e"; + return div.getElementsByClassName("e").length === 2; + }), + + // Check if getElementById returns elements by name + // Check if getElementsByName privileges form controls or returns elements by ID + assertUsableName = assert(function( div ) { + // Inject content + div.id = expando + 0; + div.innerHTML = "<a name='" + expando + "'></a><div name='" + expando + "'></div>"; + docElem.insertBefore( div, docElem.firstChild ); + + // Test + var pass = document.getElementsByName && + // buggy browsers will return fewer than the correct 2 + document.getElementsByName( expando ).length === 2 + + // buggy browsers will return more than the correct 0 + document.getElementsByName( expando + 0 ).length; + assertGetIdNotName = !document.getElementById( expando ); + + // Cleanup + docElem.removeChild( div ); + + return pass; + }); + +// If slice is not available, provide a backup +try { + slice.call( docElem.childNodes, 0 )[0].nodeType; +} catch ( e ) { + slice = function( i ) { + var elem, + results = []; + for ( ; (elem = this[i]); i++ ) { + results.push( elem ); + } + return results; + }; +} + +function Sizzle( selector, context, results, seed ) { + results = results || []; + context = context || document; + var match, elem, xml, m, + nodeType = context.nodeType; + + if ( !selector || typeof selector !== "string" ) { + return results; + } + + if ( nodeType !== 1 && nodeType !== 9 ) { + return []; + } + + xml = isXML( context ); + + if ( !xml && !seed ) { + if ( (match = rquickExpr.exec( selector )) ) { + // Speed-up: Sizzle("#ID") + if ( (m = match[1]) ) { + if ( nodeType === 9 ) { + elem = context.getElementById( m ); + // Check parentNode to catch when Blackberry 4.6 returns + // nodes that are no longer in the document #6963 + if ( elem && elem.parentNode ) { + // Handle the case where IE, Opera, and Webkit return items + // by name instead of ID + if ( elem.id === m ) { + results.push( elem ); + return results; + } + } else { + return results; + } + } else { + // Context is not a document + if ( context.ownerDocument && (elem = context.ownerDocument.getElementById( m )) && + contains( context, elem ) && elem.id === m ) { + results.push( elem ); + return results; + } + } + + // Speed-up: Sizzle("TAG") + } else if ( match[2] ) { + push.apply( results, slice.call(context.getElementsByTagName( selector ), 0) ); + return results; + + // Speed-up: Sizzle(".CLASS") + } else if ( (m = match[3]) && assertUsableClassName && context.getElementsByClassName ) { + push.apply( results, slice.call(context.getElementsByClassName( m ), 0) ); + return results; + } + } + } + + // All others + return select( selector.replace( rtrim, "$1" ), context, results, seed, xml ); +} + +Sizzle.matches = function( expr, elements ) { + return Sizzle( expr, null, null, elements ); +}; + +Sizzle.matchesSelector = function( elem, expr ) { + return Sizzle( expr, null, null, [ elem ] ).length > 0; +}; + +// Returns a function to use in pseudos for input types +function createInputPseudo( type ) { + return function( elem ) { + var name = elem.nodeName.toLowerCase(); + return name === "input" && elem.type === type; + }; +} + +// Returns a function to use in pseudos for buttons +function createButtonPseudo( type ) { + return function( elem ) { + var name = elem.nodeName.toLowerCase(); + return (name === "input" || name === "button") && elem.type === type; + }; +} + +// Returns a function to use in pseudos for positionals +function createPositionalPseudo( fn ) { + return markFunction(function( argument ) { + argument = +argument; + return markFunction(function( seed, matches ) { + var j, + matchIndexes = fn( [], seed.length, argument ), + i = matchIndexes.length; + + // Match elements found at the specified indexes + while ( i-- ) { + if ( seed[ (j = matchIndexes[i]) ] ) { + seed[j] = !(matches[j] = seed[j]); + } + } + }); + }); +} + +/** + * Utility function for retrieving the text value of an array of DOM nodes + * @param {Array|Element} elem + */ +getText = Sizzle.getText = function( elem ) { + var node, + ret = "", + i = 0, + nodeType = elem.nodeType; + + if ( nodeType ) { + if ( nodeType === 1 || nodeType === 9 || nodeType === 11 ) { + // Use textContent for elements + // innerText usage removed for consistency of new lines (see #11153) + if ( typeof elem.textContent === "string" ) { + return elem.textContent; + } else { + // Traverse its children + for ( elem = elem.firstChild; elem; elem = elem.nextSibling ) { + ret += getText( elem ); + } + } + } else if ( nodeType === 3 || nodeType === 4 ) { + return elem.nodeValue; + } + // Do not include comment or processing instruction nodes + } else { + + // If no nodeType, this is expected to be an array + for ( ; (node = elem[i]); i++ ) { + // Do not traverse comment nodes + ret += getText( node ); + } + } + return ret; +}; + +isXML = Sizzle.isXML = function( elem ) { + // documentElement is verified for cases where it doesn't yet exist + // (such as loading iframes in IE - #4833) + var documentElement = elem && (elem.ownerDocument || elem).documentElement; + return documentElement ? documentElement.nodeName !== "HTML" : false; +}; + +// Element contains another +contains = Sizzle.contains = docElem.contains ? + function( a, b ) { + var adown = a.nodeType === 9 ? a.documentElement : a, + bup = b && b.parentNode; + return a === bup || !!( bup && bup.nodeType === 1 && adown.contains && adown.contains(bup) ); + } : + docElem.compareDocumentPosition ? + function( a, b ) { + return b && !!( a.compareDocumentPosition( b ) & 16 ); + } : + function( a, b ) { + while ( (b = b.parentNode) ) { + if ( b === a ) { + return true; + } + } + return false; + }; + +Sizzle.attr = function( elem, name ) { + var val, + xml = isXML( elem ); + + if ( !xml ) { + name = name.toLowerCase(); + } + if ( (val = Expr.attrHandle[ name ]) ) { + return val( elem ); + } + if ( xml || assertAttributes ) { + return elem.getAttribute( name ); + } + val = elem.getAttributeNode( name ); + return val ? + typeof elem[ name ] === "boolean" ? + elem[ name ] ? name : null : + val.specified ? val.value : null : + null; +}; + +Expr = Sizzle.selectors = { + + // Can be adjusted by the user + cacheLength: 50, + + createPseudo: markFunction, + + match: matchExpr, + + // IE6/7 return a modified href + attrHandle: assertHrefNotNormalized ? + {} : + { + "href": function( elem ) { + return elem.getAttribute( "href", 2 ); + }, + "type": function( elem ) { + return elem.getAttribute("type"); + } + }, + + find: { + "ID": assertGetIdNotName ? + function( id, context, xml ) { + if ( typeof context.getElementById !== strundefined && !xml ) { + var m = context.getElementById( id ); + // Check parentNode to catch when Blackberry 4.6 returns + // nodes that are no longer in the document #6963 + return m && m.parentNode ? [m] : []; + } + } : + function( id, context, xml ) { + if ( typeof context.getElementById !== strundefined && !xml ) { + var m = context.getElementById( id ); + + return m ? + m.id === id || typeof m.getAttributeNode !== strundefined && m.getAttributeNode("id").value === id ? + [m] : + undefined : + []; + } + }, + + "TAG": assertTagNameNoComments ? + function( tag, context ) { + if ( typeof context.getElementsByTagName !== strundefined ) { + return context.getElementsByTagName( tag ); + } + } : + function( tag, context ) { + var results = context.getElementsByTagName( tag ); + + // Filter out possible comments + if ( tag === "*" ) { + var elem, + tmp = [], + i = 0; + + for ( ; (elem = results[i]); i++ ) { + if ( elem.nodeType === 1 ) { + tmp.push( elem ); + } + } + + return tmp; + } + return results; + }, + + "NAME": assertUsableName && function( tag, context ) { + if ( typeof context.getElementsByName !== strundefined ) { + return context.getElementsByName( name ); + } + }, + + "CLASS": assertUsableClassName && function( className, context, xml ) { + if ( typeof context.getElementsByClassName !== strundefined && !xml ) { + return context.getElementsByClassName( className ); + } + } + }, + + relative: { + ">": { dir: "parentNode", first: true }, + " ": { dir: "parentNode" }, + "+": { dir: "previousSibling", first: true }, + "~": { dir: "previousSibling" } + }, + + preFilter: { + "ATTR": function( match ) { + match[1] = match[1].replace( rbackslash, "" ); + + // Move the given value to match[3] whether quoted or unquoted + match[3] = ( match[4] || match[5] || "" ).replace( rbackslash, "" ); + + if ( match[2] === "~=" ) { + match[3] = " " + match[3] + " "; + } + + return match.slice( 0, 4 ); + }, + + "CHILD": function( match ) { + /* matches from matchExpr["CHILD"] + 1 type (only|nth|...) + 2 argument (even|odd|\d*|\d*n([+-]\d+)?|...) + 3 xn-component of xn+y argument ([+-]?\d*n|) + 4 sign of xn-component + 5 x of xn-component + 6 sign of y-component + 7 y of y-component + */ + match[1] = match[1].toLowerCase(); + + if ( match[1] === "nth" ) { + // nth-child requires argument + if ( !match[2] ) { + Sizzle.error( match[0] ); + } + + // numeric x and y parameters for Expr.filter.CHILD + // remember that false/true cast respectively to 0/1 + match[3] = +( match[3] ? match[4] + (match[5] || 1) : 2 * ( match[2] === "even" || match[2] === "odd" ) ); + match[4] = +( ( match[6] + match[7] ) || match[2] === "odd" ); + + // other types prohibit arguments + } else if ( match[2] ) { + Sizzle.error( match[0] ); + } + + return match; + }, + + "PSEUDO": function( match ) { + var unquoted, excess; + if ( matchExpr["CHILD"].test( match[0] ) ) { + return null; + } + + if ( match[3] ) { + match[2] = match[3]; + } else if ( (unquoted = match[4]) ) { + // Only check arguments that contain a pseudo + if ( rpseudo.test(unquoted) && + // Get excess from tokenize (recursively) + (excess = tokenize( unquoted, true )) && + // advance to the next closing parenthesis + (excess = unquoted.indexOf( ")", unquoted.length - excess ) - unquoted.length) ) { + + // excess is a negative index + unquoted = unquoted.slice( 0, excess ); + match[0] = match[0].slice( 0, excess ); + } + match[2] = unquoted; + } + + // Return only captures needed by the pseudo filter method (type and argument) + return match.slice( 0, 3 ); + } + }, + + filter: { + "ID": assertGetIdNotName ? + function( id ) { + id = id.replace( rbackslash, "" ); + return function( elem ) { + return elem.getAttribute("id") === id; + }; + } : + function( id ) { + id = id.replace( rbackslash, "" ); + return function( elem ) { + var node = typeof elem.getAttributeNode !== strundefined && elem.getAttributeNode("id"); + return node && node.value === id; + }; + }, + + "TAG": function( nodeName ) { + if ( nodeName === "*" ) { + return function() { return true; }; + } + nodeName = nodeName.replace( rbackslash, "" ).toLowerCase(); + + return function( elem ) { + return elem.nodeName && elem.nodeName.toLowerCase() === nodeName; + }; + }, + + "CLASS": function( className ) { + var pattern = classCache[ expando ][ className + " " ]; + + return pattern || + (pattern = new RegExp( "(^|" + whitespace + ")" + className + "(" + whitespace + "|$)" )) && + classCache( className, function( elem ) { + return pattern.test( elem.className || (typeof elem.getAttribute !== strundefined && elem.getAttribute("class")) || "" ); + }); + }, + + "ATTR": function( name, operator, check ) { + return function( elem, context ) { + var result = Sizzle.attr( elem, name ); + + if ( result == null ) { + return operator === "!="; + } + if ( !operator ) { + return true; + } + + result += ""; + + return operator === "=" ? result === check : + operator === "!=" ? result !== check : + operator === "^=" ? check && result.indexOf( check ) === 0 : + operator === "*=" ? check && result.indexOf( check ) > -1 : + operator === "$=" ? check && result.substr( result.length - check.length ) === check : + operator === "~=" ? ( " " + result + " " ).indexOf( check ) > -1 : + operator === "|=" ? result === check || result.substr( 0, check.length + 1 ) === check + "-" : + false; + }; + }, + + "CHILD": function( type, argument, first, last ) { + + if ( type === "nth" ) { + return function( elem ) { + var node, diff, + parent = elem.parentNode; + + if ( first === 1 && last === 0 ) { + return true; + } + + if ( parent ) { + diff = 0; + for ( node = parent.firstChild; node; node = node.nextSibling ) { + if ( node.nodeType === 1 ) { + diff++; + if ( elem === node ) { + break; + } + } + } + } + + // Incorporate the offset (or cast to NaN), then check against cycle size + diff -= last; + return diff === first || ( diff % first === 0 && diff / first >= 0 ); + }; + } + + return function( elem ) { + var node = elem; + + switch ( type ) { + case "only": + case "first": + while ( (node = node.previousSibling) ) { + if ( node.nodeType === 1 ) { + return false; + } + } + + if ( type === "first" ) { + return true; + } + + node = elem; + + /* falls through */ + case "last": + while ( (node = node.nextSibling) ) { + if ( node.nodeType === 1 ) { + return false; + } + } + + return true; + } + }; + }, + + "PSEUDO": function( pseudo, argument ) { + // pseudo-class names are case-insensitive + // http://www.w3.org/TR/selectors/#pseudo-classes + // Prioritize by case sensitivity in case custom pseudos are added with uppercase letters + // Remember that setFilters inherits from pseudos + var args, + fn = Expr.pseudos[ pseudo ] || Expr.setFilters[ pseudo.toLowerCase() ] || + Sizzle.error( "unsupported pseudo: " + pseudo ); + + // The user may use createPseudo to indicate that + // arguments are needed to create the filter function + // just as Sizzle does + if ( fn[ expando ] ) { + return fn( argument ); + } + + // But maintain support for old signatures + if ( fn.length > 1 ) { + args = [ pseudo, pseudo, "", argument ]; + return Expr.setFilters.hasOwnProperty( pseudo.toLowerCase() ) ? + markFunction(function( seed, matches ) { + var idx, + matched = fn( seed, argument ), + i = matched.length; + while ( i-- ) { + idx = indexOf.call( seed, matched[i] ); + seed[ idx ] = !( matches[ idx ] = matched[i] ); + } + }) : + function( elem ) { + return fn( elem, 0, args ); + }; + } + + return fn; + } + }, + + pseudos: { + "not": markFunction(function( selector ) { + // Trim the selector passed to compile + // to avoid treating leading and trailing + // spaces as combinators + var input = [], + results = [], + matcher = compile( selector.replace( rtrim, "$1" ) ); + + return matcher[ expando ] ? + markFunction(function( seed, matches, context, xml ) { + var elem, + unmatched = matcher( seed, null, xml, [] ), + i = seed.length; + + // Match elements unmatched by `matcher` + while ( i-- ) { + if ( (elem = unmatched[i]) ) { + seed[i] = !(matches[i] = elem); + } + } + }) : + function( elem, context, xml ) { + input[0] = elem; + matcher( input, null, xml, results ); + return !results.pop(); + }; + }), + + "has": markFunction(function( selector ) { + return function( elem ) { + return Sizzle( selector, elem ).length > 0; + }; + }), + + "contains": markFunction(function( text ) { + return function( elem ) { + return ( elem.textContent || elem.innerText || getText( elem ) ).indexOf( text ) > -1; + }; + }), + + "enabled": function( elem ) { + return elem.disabled === false; + }, + + "disabled": function( elem ) { + return elem.disabled === true; + }, + + "checked": function( elem ) { + // In CSS3, :checked should return both checked and selected elements + // http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked + var nodeName = elem.nodeName.toLowerCase(); + return (nodeName === "input" && !!elem.checked) || (nodeName === "option" && !!elem.selected); + }, + + "selected": function( elem ) { + // Accessing this property makes selected-by-default + // options in Safari work properly + if ( elem.parentNode ) { + elem.parentNode.selectedIndex; + } + + return elem.selected === true; + }, + + "parent": function( elem ) { + return !Expr.pseudos["empty"]( elem ); + }, + + "empty": function( elem ) { + // http://www.w3.org/TR/selectors/#empty-pseudo + // :empty is only affected by element nodes and content nodes(including text(3), cdata(4)), + // not comment, processing instructions, or others + // Thanks to Diego Perini for the nodeName shortcut + // Greater than "@" means alpha characters (specifically not starting with "#" or "?") + var nodeType; + elem = elem.firstChild; + while ( elem ) { + if ( elem.nodeName > "@" || (nodeType = elem.nodeType) === 3 || nodeType === 4 ) { + return false; + } + elem = elem.nextSibling; + } + return true; + }, + + "header": function( elem ) { + return rheader.test( elem.nodeName ); + }, + + "text": function( elem ) { + var type, attr; + // IE6 and 7 will map elem.type to 'text' for new HTML5 types (search, etc) + // use getAttribute instead to test this case + return elem.nodeName.toLowerCase() === "input" && + (type = elem.type) === "text" && + ( (attr = elem.getAttribute("type")) == null || attr.toLowerCase() === type ); + }, + + // Input types + "radio": createInputPseudo("radio"), + "checkbox": createInputPseudo("checkbox"), + "file": createInputPseudo("file"), + "password": createInputPseudo("password"), + "image": createInputPseudo("image"), + + "submit": createButtonPseudo("submit"), + "reset": createButtonPseudo("reset"), + + "button": function( elem ) { + var name = elem.nodeName.toLowerCase(); + return name === "input" && elem.type === "button" || name === "button"; + }, + + "input": function( elem ) { + return rinputs.test( elem.nodeName ); + }, + + "focus": function( elem ) { + var doc = elem.ownerDocument; + return elem === doc.activeElement && (!doc.hasFocus || doc.hasFocus()) && !!(elem.type || elem.href || ~elem.tabIndex); + }, + + "active": function( elem ) { + return elem === elem.ownerDocument.activeElement; + }, + + // Positional types + "first": createPositionalPseudo(function() { + return [ 0 ]; + }), + + "last": createPositionalPseudo(function( matchIndexes, length ) { + return [ length - 1 ]; + }), + + "eq": createPositionalPseudo(function( matchIndexes, length, argument ) { + return [ argument < 0 ? argument + length : argument ]; + }), + + "even": createPositionalPseudo(function( matchIndexes, length ) { + for ( var i = 0; i < length; i += 2 ) { + matchIndexes.push( i ); + } + return matchIndexes; + }), + + "odd": createPositionalPseudo(function( matchIndexes, length ) { + for ( var i = 1; i < length; i += 2 ) { + matchIndexes.push( i ); + } + return matchIndexes; + }), + + "lt": createPositionalPseudo(function( matchIndexes, length, argument ) { + for ( var i = argument < 0 ? argument + length : argument; --i >= 0; ) { + matchIndexes.push( i ); + } + return matchIndexes; + }), + + "gt": createPositionalPseudo(function( matchIndexes, length, argument ) { + for ( var i = argument < 0 ? argument + length : argument; ++i < length; ) { + matchIndexes.push( i ); + } + return matchIndexes; + }) + } +}; + +function siblingCheck( a, b, ret ) { + if ( a === b ) { + return ret; + } + + var cur = a.nextSibling; + + while ( cur ) { + if ( cur === b ) { + return -1; + } + + cur = cur.nextSibling; + } + + return 1; +} + +sortOrder = docElem.compareDocumentPosition ? + function( a, b ) { + if ( a === b ) { + hasDuplicate = true; + return 0; + } + + return ( !a.compareDocumentPosition || !b.compareDocumentPosition ? + a.compareDocumentPosition : + a.compareDocumentPosition(b) & 4 + ) ? -1 : 1; + } : + function( a, b ) { + // The nodes are identical, we can exit early + if ( a === b ) { + hasDuplicate = true; + return 0; + + // Fallback to using sourceIndex (in IE) if it's available on both nodes + } else if ( a.sourceIndex && b.sourceIndex ) { + return a.sourceIndex - b.sourceIndex; + } + + var al, bl, + ap = [], + bp = [], + aup = a.parentNode, + bup = b.parentNode, + cur = aup; + + // If the nodes are siblings (or identical) we can do a quick check + if ( aup === bup ) { + return siblingCheck( a, b ); + + // If no parents were found then the nodes are disconnected + } else if ( !aup ) { + return -1; + + } else if ( !bup ) { + return 1; + } + + // Otherwise they're somewhere else in the tree so we need + // to build up a full list of the parentNodes for comparison + while ( cur ) { + ap.unshift( cur ); + cur = cur.parentNode; + } + + cur = bup; + + while ( cur ) { + bp.unshift( cur ); + cur = cur.parentNode; + } + + al = ap.length; + bl = bp.length; + + // Start walking down the tree looking for a discrepancy + for ( var i = 0; i < al && i < bl; i++ ) { + if ( ap[i] !== bp[i] ) { + return siblingCheck( ap[i], bp[i] ); + } + } + + // We ended someplace up the tree so do a sibling check + return i === al ? + siblingCheck( a, bp[i], -1 ) : + siblingCheck( ap[i], b, 1 ); + }; + +// Always assume the presence of duplicates if sort doesn't +// pass them to our comparison function (as in Google Chrome). +[0, 0].sort( sortOrder ); +baseHasDuplicate = !hasDuplicate; + +// Document sorting and removing duplicates +Sizzle.uniqueSort = function( results ) { + var elem, + duplicates = [], + i = 1, + j = 0; + + hasDuplicate = baseHasDuplicate; + results.sort( sortOrder ); + + if ( hasDuplicate ) { + for ( ; (elem = results[i]); i++ ) { + if ( elem === results[ i - 1 ] ) { + j = duplicates.push( i ); + } + } + while ( j-- ) { + results.splice( duplicates[ j ], 1 ); + } + } + + return results; +}; + +Sizzle.error = function( msg ) { + throw new Error( "Syntax error, unrecognized expression: " + msg ); +}; + +function tokenize( selector, parseOnly ) { + var matched, match, tokens, type, + soFar, groups, preFilters, + cached = tokenCache[ expando ][ selector + " " ]; + + if ( cached ) { + return parseOnly ? 0 : cached.slice( 0 ); + } + + soFar = selector; + groups = []; + preFilters = Expr.preFilter; + + while ( soFar ) { + + // Comma and first run + if ( !matched || (match = rcomma.exec( soFar )) ) { + if ( match ) { + // Don't consume trailing commas as valid + soFar = soFar.slice( match[0].length ) || soFar; + } + groups.push( tokens = [] ); + } + + matched = false; + + // Combinators + if ( (match = rcombinators.exec( soFar )) ) { + tokens.push( matched = new Token( match.shift() ) ); + soFar = soFar.slice( matched.length ); + + // Cast descendant combinators to space + matched.type = match[0].replace( rtrim, " " ); + } + + // Filters + for ( type in Expr.filter ) { + if ( (match = matchExpr[ type ].exec( soFar )) && (!preFilters[ type ] || + (match = preFilters[ type ]( match ))) ) { + + tokens.push( matched = new Token( match.shift() ) ); + soFar = soFar.slice( matched.length ); + matched.type = type; + matched.matches = match; + } + } + + if ( !matched ) { + break; + } + } + + // Return the length of the invalid excess + // if we're just parsing + // Otherwise, throw an error or return tokens + return parseOnly ? + soFar.length : + soFar ? + Sizzle.error( selector ) : + // Cache the tokens + tokenCache( selector, groups ).slice( 0 ); +} + +function addCombinator( matcher, combinator, base ) { + var dir = combinator.dir, + checkNonElements = base && combinator.dir === "parentNode", + doneName = done++; + + return combinator.first ? + // Check against closest ancestor/preceding element + function( elem, context, xml ) { + while ( (elem = elem[ dir ]) ) { + if ( checkNonElements || elem.nodeType === 1 ) { + return matcher( elem, context, xml ); + } + } + } : + + // Check against all ancestor/preceding elements + function( elem, context, xml ) { + // We can't set arbitrary data on XML nodes, so they don't benefit from dir caching + if ( !xml ) { + var cache, + dirkey = dirruns + " " + doneName + " ", + cachedkey = dirkey + cachedruns; + while ( (elem = elem[ dir ]) ) { + if ( checkNonElements || elem.nodeType === 1 ) { + if ( (cache = elem[ expando ]) === cachedkey ) { + return elem.sizset; + } else if ( typeof cache === "string" && cache.indexOf(dirkey) === 0 ) { + if ( elem.sizset ) { + return elem; + } + } else { + elem[ expando ] = cachedkey; + if ( matcher( elem, context, xml ) ) { + elem.sizset = true; + return elem; + } + elem.sizset = false; + } + } + } + } else { + while ( (elem = elem[ dir ]) ) { + if ( checkNonElements || elem.nodeType === 1 ) { + if ( matcher( elem, context, xml ) ) { + return elem; + } + } + } + } + }; +} + +function elementMatcher( matchers ) { + return matchers.length > 1 ? + function( elem, context, xml ) { + var i = matchers.length; + while ( i-- ) { + if ( !matchers[i]( elem, context, xml ) ) { + return false; + } + } + return true; + } : + matchers[0]; +} + +function condense( unmatched, map, filter, context, xml ) { + var elem, + newUnmatched = [], + i = 0, + len = unmatched.length, + mapped = map != null; + + for ( ; i < len; i++ ) { + if ( (elem = unmatched[i]) ) { + if ( !filter || filter( elem, context, xml ) ) { + newUnmatched.push( elem ); + if ( mapped ) { + map.push( i ); + } + } + } + } + + return newUnmatched; +} + +function setMatcher( preFilter, selector, matcher, postFilter, postFinder, postSelector ) { + if ( postFilter && !postFilter[ expando ] ) { + postFilter = setMatcher( postFilter ); + } + if ( postFinder && !postFinder[ expando ] ) { + postFinder = setMatcher( postFinder, postSelector ); + } + return markFunction(function( seed, results, context, xml ) { + var temp, i, elem, + preMap = [], + postMap = [], + preexisting = results.length, + + // Get initial elements from seed or context + elems = seed || multipleContexts( selector || "*", context.nodeType ? [ context ] : context, [] ), + + // Prefilter to get matcher input, preserving a map for seed-results synchronization + matcherIn = preFilter && ( seed || !selector ) ? + condense( elems, preMap, preFilter, context, xml ) : + elems, + + matcherOut = matcher ? + // If we have a postFinder, or filtered seed, or non-seed postFilter or preexisting results, + postFinder || ( seed ? preFilter : preexisting || postFilter ) ? + + // ...intermediate processing is necessary + [] : + + // ...otherwise use results directly + results : + matcherIn; + + // Find primary matches + if ( matcher ) { + matcher( matcherIn, matcherOut, context, xml ); + } + + // Apply postFilter + if ( postFilter ) { + temp = condense( matcherOut, postMap ); + postFilter( temp, [], context, xml ); + + // Un-match failing elements by moving them back to matcherIn + i = temp.length; + while ( i-- ) { + if ( (elem = temp[i]) ) { + matcherOut[ postMap[i] ] = !(matcherIn[ postMap[i] ] = elem); + } + } + } + + if ( seed ) { + if ( postFinder || preFilter ) { + if ( postFinder ) { + // Get the final matcherOut by condensing this intermediate into postFinder contexts + temp = []; + i = matcherOut.length; + while ( i-- ) { + if ( (elem = matcherOut[i]) ) { + // Restore matcherIn since elem is not yet a final match + temp.push( (matcherIn[i] = elem) ); + } + } + postFinder( null, (matcherOut = []), temp, xml ); + } + + // Move matched elements from seed to results to keep them synchronized + i = matcherOut.length; + while ( i-- ) { + if ( (elem = matcherOut[i]) && + (temp = postFinder ? indexOf.call( seed, elem ) : preMap[i]) > -1 ) { + + seed[temp] = !(results[temp] = elem); + } + } + } + + // Add elements to results, through postFinder if defined + } else { + matcherOut = condense( + matcherOut === results ? + matcherOut.splice( preexisting, matcherOut.length ) : + matcherOut + ); + if ( postFinder ) { + postFinder( null, results, matcherOut, xml ); + } else { + push.apply( results, matcherOut ); + } + } + }); +} + +function matcherFromTokens( tokens ) { + var checkContext, matcher, j, + len = tokens.length, + leadingRelative = Expr.relative[ tokens[0].type ], + implicitRelative = leadingRelative || Expr.relative[" "], + i = leadingRelative ? 1 : 0, + + // The foundational matcher ensures that elements are reachable from top-level context(s) + matchContext = addCombinator( function( elem ) { + return elem === checkContext; + }, implicitRelative, true ), + matchAnyContext = addCombinator( function( elem ) { + return indexOf.call( checkContext, elem ) > -1; + }, implicitRelative, true ), + matchers = [ function( elem, context, xml ) { + return ( !leadingRelative && ( xml || context !== outermostContext ) ) || ( + (checkContext = context).nodeType ? + matchContext( elem, context, xml ) : + matchAnyContext( elem, context, xml ) ); + } ]; + + for ( ; i < len; i++ ) { + if ( (matcher = Expr.relative[ tokens[i].type ]) ) { + matchers = [ addCombinator( elementMatcher( matchers ), matcher ) ]; + } else { + matcher = Expr.filter[ tokens[i].type ].apply( null, tokens[i].matches ); + + // Return special upon seeing a positional matcher + if ( matcher[ expando ] ) { + // Find the next relative operator (if any) for proper handling + j = ++i; + for ( ; j < len; j++ ) { + if ( Expr.relative[ tokens[j].type ] ) { + break; + } + } + return setMatcher( + i > 1 && elementMatcher( matchers ), + i > 1 && tokens.slice( 0, i - 1 ).join("").replace( rtrim, "$1" ), + matcher, + i < j && matcherFromTokens( tokens.slice( i, j ) ), + j < len && matcherFromTokens( (tokens = tokens.slice( j )) ), + j < len && tokens.join("") + ); + } + matchers.push( matcher ); + } + } + + return elementMatcher( matchers ); +} + +function matcherFromGroupMatchers( elementMatchers, setMatchers ) { + var bySet = setMatchers.length > 0, + byElement = elementMatchers.length > 0, + superMatcher = function( seed, context, xml, results, expandContext ) { + var elem, j, matcher, + setMatched = [], + matchedCount = 0, + i = "0", + unmatched = seed && [], + outermost = expandContext != null, + contextBackup = outermostContext, + // We must always have either seed elements or context + elems = seed || byElement && Expr.find["TAG"]( "*", expandContext && context.parentNode || context ), + // Nested matchers should use non-integer dirruns + dirrunsUnique = (dirruns += contextBackup == null ? 1 : Math.E); + + if ( outermost ) { + outermostContext = context !== document && context; + cachedruns = superMatcher.el; + } + + // Add elements passing elementMatchers directly to results + for ( ; (elem = elems[i]) != null; i++ ) { + if ( byElement && elem ) { + for ( j = 0; (matcher = elementMatchers[j]); j++ ) { + if ( matcher( elem, context, xml ) ) { + results.push( elem ); + break; + } + } + if ( outermost ) { + dirruns = dirrunsUnique; + cachedruns = ++superMatcher.el; + } + } + + // Track unmatched elements for set filters + if ( bySet ) { + // They will have gone through all possible matchers + if ( (elem = !matcher && elem) ) { + matchedCount--; + } + + // Lengthen the array for every element, matched or not + if ( seed ) { + unmatched.push( elem ); + } + } + } + + // Apply set filters to unmatched elements + matchedCount += i; + if ( bySet && i !== matchedCount ) { + for ( j = 0; (matcher = setMatchers[j]); j++ ) { + matcher( unmatched, setMatched, context, xml ); + } + + if ( seed ) { + // Reintegrate element matches to eliminate the need for sorting + if ( matchedCount > 0 ) { + while ( i-- ) { + if ( !(unmatched[i] || setMatched[i]) ) { + setMatched[i] = pop.call( results ); + } + } + } + + // Discard index placeholder values to get only actual matches + setMatched = condense( setMatched ); + } + + // Add matches to results + push.apply( results, setMatched ); + + // Seedless set matches succeeding multiple successful matchers stipulate sorting + if ( outermost && !seed && setMatched.length > 0 && + ( matchedCount + setMatchers.length ) > 1 ) { + + Sizzle.uniqueSort( results ); + } + } + + // Override manipulation of globals by nested matchers + if ( outermost ) { + dirruns = dirrunsUnique; + outermostContext = contextBackup; + } + + return unmatched; + }; + + superMatcher.el = 0; + return bySet ? + markFunction( superMatcher ) : + superMatcher; +} + +compile = Sizzle.compile = function( selector, group /* Internal Use Only */ ) { + var i, + setMatchers = [], + elementMatchers = [], + cached = compilerCache[ expando ][ selector + " " ]; + + if ( !cached ) { + // Generate a function of recursive functions that can be used to check each element + if ( !group ) { + group = tokenize( selector ); + } + i = group.length; + while ( i-- ) { + cached = matcherFromTokens( group[i] ); + if ( cached[ expando ] ) { + setMatchers.push( cached ); + } else { + elementMatchers.push( cached ); + } + } + + // Cache the compiled function + cached = compilerCache( selector, matcherFromGroupMatchers( elementMatchers, setMatchers ) ); + } + return cached; +}; + +function multipleContexts( selector, contexts, results ) { + var i = 0, + len = contexts.length; + for ( ; i < len; i++ ) { + Sizzle( selector, contexts[i], results ); + } + return results; +} + +function select( selector, context, results, seed, xml ) { + var i, tokens, token, type, find, + match = tokenize( selector ), + j = match.length; + + if ( !seed ) { + // Try to minimize operations if there is only one group + if ( match.length === 1 ) { + + // Take a shortcut and set the context if the root selector is an ID + tokens = match[0] = match[0].slice( 0 ); + if ( tokens.length > 2 && (token = tokens[0]).type === "ID" && + context.nodeType === 9 && !xml && + Expr.relative[ tokens[1].type ] ) { + + context = Expr.find["ID"]( token.matches[0].replace( rbackslash, "" ), context, xml )[0]; + if ( !context ) { + return results; + } + + selector = selector.slice( tokens.shift().length ); + } + + // Fetch a seed set for right-to-left matching + for ( i = matchExpr["POS"].test( selector ) ? -1 : tokens.length - 1; i >= 0; i-- ) { + token = tokens[i]; + + // Abort if we hit a combinator + if ( Expr.relative[ (type = token.type) ] ) { + break; + } + if ( (find = Expr.find[ type ]) ) { + // Search, expanding context for leading sibling combinators + if ( (seed = find( + token.matches[0].replace( rbackslash, "" ), + rsibling.test( tokens[0].type ) && context.parentNode || context, + xml + )) ) { + + // If seed is empty or no tokens remain, we can return early + tokens.splice( i, 1 ); + selector = seed.length && tokens.join(""); + if ( !selector ) { + push.apply( results, slice.call( seed, 0 ) ); + return results; + } + + break; + } + } + } + } + } + + // Compile and execute a filtering function + // Provide `match` to avoid retokenization if we modified the selector above + compile( selector, match )( + seed, + context, + xml, + results, + rsibling.test( selector ) + ); + return results; +} + +if ( document.querySelectorAll ) { + (function() { + var disconnectedMatch, + oldSelect = select, + rescape = /'|\\/g, + rattributeQuotes = /\=[\x20\t\r\n\f]*([^'"\]]*)[\x20\t\r\n\f]*\]/g, + + // qSa(:focus) reports false when true (Chrome 21), no need to also add to buggyMatches since matches checks buggyQSA + // A support test would require too much code (would include document ready) + rbuggyQSA = [ ":focus" ], + + // matchesSelector(:active) reports false when true (IE9/Opera 11.5) + // A support test would require too much code (would include document ready) + // just skip matchesSelector for :active + rbuggyMatches = [ ":active" ], + matches = docElem.matchesSelector || + docElem.mozMatchesSelector || + docElem.webkitMatchesSelector || + docElem.oMatchesSelector || + docElem.msMatchesSelector; + + // Build QSA regex + // Regex strategy adopted from Diego Perini + assert(function( div ) { + // Select is set to empty string on purpose + // This is to test IE's treatment of not explictly + // setting a boolean content attribute, + // since its presence should be enough + // http://bugs.jquery.com/ticket/12359 + div.innerHTML = "<select><option selected=''></option></select>"; + + // IE8 - Some boolean attributes are not treated correctly + if ( !div.querySelectorAll("[selected]").length ) { + rbuggyQSA.push( "\\[" + whitespace + "*(?:checked|disabled|ismap|multiple|readonly|selected|value)" ); + } + + // Webkit/Opera - :checked should return selected option elements + // http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked + // IE8 throws error here (do not put tests after this one) + if ( !div.querySelectorAll(":checked").length ) { + rbuggyQSA.push(":checked"); + } + }); + + assert(function( div ) { + + // Opera 10-12/IE9 - ^= $= *= and empty values + // Should not select anything + div.innerHTML = "<p test=''></p>"; + if ( div.querySelectorAll("[test^='']").length ) { + rbuggyQSA.push( "[*^$]=" + whitespace + "*(?:\"\"|'')" ); + } + + // FF 3.5 - :enabled/:disabled and hidden elements (hidden elements are still enabled) + // IE8 throws error here (do not put tests after this one) + div.innerHTML = "<input type='hidden'/>"; + if ( !div.querySelectorAll(":enabled").length ) { + rbuggyQSA.push(":enabled", ":disabled"); + } + }); + + // rbuggyQSA always contains :focus, so no need for a length check + rbuggyQSA = /* rbuggyQSA.length && */ new RegExp( rbuggyQSA.join("|") ); + + select = function( selector, context, results, seed, xml ) { + // Only use querySelectorAll when not filtering, + // when this is not xml, + // and when no QSA bugs apply + if ( !seed && !xml && !rbuggyQSA.test( selector ) ) { + var groups, i, + old = true, + nid = expando, + newContext = context, + newSelector = context.nodeType === 9 && selector; + + // qSA works strangely on Element-rooted queries + // We can work around this by specifying an extra ID on the root + // and working up from there (Thanks to Andrew Dupont for the technique) + // IE 8 doesn't work on object elements + if ( context.nodeType === 1 && context.nodeName.toLowerCase() !== "object" ) { + groups = tokenize( selector ); + + if ( (old = context.getAttribute("id")) ) { + nid = old.replace( rescape, "\\$&" ); + } else { + context.setAttribute( "id", nid ); + } + nid = "[id='" + nid + "'] "; + + i = groups.length; + while ( i-- ) { + groups[i] = nid + groups[i].join(""); + } + newContext = rsibling.test( selector ) && context.parentNode || context; + newSelector = groups.join(","); + } + + if ( newSelector ) { + try { + push.apply( results, slice.call( newContext.querySelectorAll( + newSelector + ), 0 ) ); + return results; + } catch(qsaError) { + } finally { + if ( !old ) { + context.removeAttribute("id"); + } + } + } + } + + return oldSelect( selector, context, results, seed, xml ); + }; + + if ( matches ) { + assert(function( div ) { + // Check to see if it's possible to do matchesSelector + // on a disconnected node (IE 9) + disconnectedMatch = matches.call( div, "div" ); + + // This should fail with an exception + // Gecko does not error, returns false instead + try { + matches.call( div, "[test!='']:sizzle" ); + rbuggyMatches.push( "!=", pseudos ); + } catch ( e ) {} + }); + + // rbuggyMatches always contains :active and :focus, so no need for a length check + rbuggyMatches = /* rbuggyMatches.length && */ new RegExp( rbuggyMatches.join("|") ); + + Sizzle.matchesSelector = function( elem, expr ) { + // Make sure that attribute selectors are quoted + expr = expr.replace( rattributeQuotes, "='$1']" ); + + // rbuggyMatches always contains :active, so no need for an existence check + if ( !isXML( elem ) && !rbuggyMatches.test( expr ) && !rbuggyQSA.test( expr ) ) { + try { + var ret = matches.call( elem, expr ); + + // IE 9's matchesSelector returns false on disconnected nodes + if ( ret || disconnectedMatch || + // As well, disconnected nodes are said to be in a document + // fragment in IE 9 + elem.document && elem.document.nodeType !== 11 ) { + return ret; + } + } catch(e) {} + } + + return Sizzle( expr, null, null, [ elem ] ).length > 0; + }; + } + })(); +} + +// Deprecated +Expr.pseudos["nth"] = Expr.pseudos["eq"]; + +// Back-compat +function setFilters() {} +Expr.filters = setFilters.prototype = Expr.pseudos; +Expr.setFilters = new setFilters(); + +// Override sizzle attribute retrieval +Sizzle.attr = jQuery.attr; +jQuery.find = Sizzle; +jQuery.expr = Sizzle.selectors; +jQuery.expr[":"] = jQuery.expr.pseudos; +jQuery.unique = Sizzle.uniqueSort; +jQuery.text = Sizzle.getText; +jQuery.isXMLDoc = Sizzle.isXML; +jQuery.contains = Sizzle.contains; + + +})( window ); +var runtil = /Until$/, + rparentsprev = /^(?:parents|prev(?:Until|All))/, + isSimple = /^.[^:#\[\.,]*$/, + rneedsContext = jQuery.expr.match.needsContext, + // methods guaranteed to produce a unique set when starting from a unique set + guaranteedUnique = { + children: true, + contents: true, + next: true, + prev: true + }; + +jQuery.fn.extend({ + find: function( selector ) { + var i, l, length, n, r, ret, + self = this; + + if ( typeof selector !== "string" ) { + return jQuery( selector ).filter(function() { + for ( i = 0, l = self.length; i < l; i++ ) { + if ( jQuery.contains( self[ i ], this ) ) { + return true; + } + } + }); + } + + ret = this.pushStack( "", "find", selector ); + + for ( i = 0, l = this.length; i < l; i++ ) { + length = ret.length; + jQuery.find( selector, this[i], ret ); + + if ( i > 0 ) { + // Make sure that the results are unique + for ( n = length; n < ret.length; n++ ) { + for ( r = 0; r < length; r++ ) { + if ( ret[r] === ret[n] ) { + ret.splice(n--, 1); + break; + } + } + } + } + } + + return ret; + }, + + has: function( target ) { + var i, + targets = jQuery( target, this ), + len = targets.length; + + return this.filter(function() { + for ( i = 0; i < len; i++ ) { + if ( jQuery.contains( this, targets[i] ) ) { + return true; + } + } + }); + }, + + not: function( selector ) { + return this.pushStack( winnow(this, selector, false), "not", selector); + }, + + filter: function( selector ) { + return this.pushStack( winnow(this, selector, true), "filter", selector ); + }, + + is: function( selector ) { + return !!selector && ( + typeof selector === "string" ? + // If this is a positional/relative selector, check membership in the returned set + // so $("p:first").is("p:last") won't return true for a doc with two "p". + rneedsContext.test( selector ) ? + jQuery( selector, this.context ).index( this[0] ) >= 0 : + jQuery.filter( selector, this ).length > 0 : + this.filter( selector ).length > 0 ); + }, + + closest: function( selectors, context ) { + var cur, + i = 0, + l = this.length, + ret = [], + pos = rneedsContext.test( selectors ) || typeof selectors !== "string" ? + jQuery( selectors, context || this.context ) : + 0; + + for ( ; i < l; i++ ) { + cur = this[i]; + + while ( cur && cur.ownerDocument && cur !== context && cur.nodeType !== 11 ) { + if ( pos ? pos.index(cur) > -1 : jQuery.find.matchesSelector(cur, selectors) ) { + ret.push( cur ); + break; + } + cur = cur.parentNode; + } + } + + ret = ret.length > 1 ? jQuery.unique( ret ) : ret; + + return this.pushStack( ret, "closest", selectors ); + }, + + // Determine the position of an element within + // the matched set of elements + index: function( elem ) { + + // No argument, return index in parent + if ( !elem ) { + return ( this[0] && this[0].parentNode ) ? this.prevAll().length : -1; + } + + // index in selector + if ( typeof elem === "string" ) { + return jQuery.inArray( this[0], jQuery( elem ) ); + } + + // Locate the position of the desired element + return jQuery.inArray( + // If it receives a jQuery object, the first element is used + elem.jquery ? elem[0] : elem, this ); + }, + + add: function( selector, context ) { + var set = typeof selector === "string" ? + jQuery( selector, context ) : + jQuery.makeArray( selector && selector.nodeType ? [ selector ] : selector ), + all = jQuery.merge( this.get(), set ); + + return this.pushStack( isDisconnected( set[0] ) || isDisconnected( all[0] ) ? + all : + jQuery.unique( all ) ); + }, + + addBack: function( selector ) { + return this.add( selector == null ? + this.prevObject : this.prevObject.filter(selector) + ); + } +}); + +jQuery.fn.andSelf = jQuery.fn.addBack; + +// A painfully simple check to see if an element is disconnected +// from a document (should be improved, where feasible). +function isDisconnected( node ) { + return !node || !node.parentNode || node.parentNode.nodeType === 11; +} + +function sibling( cur, dir ) { + do { + cur = cur[ dir ]; + } while ( cur && cur.nodeType !== 1 ); + + return cur; +} + +jQuery.each({ + parent: function( elem ) { + var parent = elem.parentNode; + return parent && parent.nodeType !== 11 ? parent : null; + }, + parents: function( elem ) { + return jQuery.dir( elem, "parentNode" ); + }, + parentsUntil: function( elem, i, until ) { + return jQuery.dir( elem, "parentNode", until ); + }, + next: function( elem ) { + return sibling( elem, "nextSibling" ); + }, + prev: function( elem ) { + return sibling( elem, "previousSibling" ); + }, + nextAll: function( elem ) { + return jQuery.dir( elem, "nextSibling" ); + }, + prevAll: function( elem ) { + return jQuery.dir( elem, "previousSibling" ); + }, + nextUntil: function( elem, i, until ) { + return jQuery.dir( elem, "nextSibling", until ); + }, + prevUntil: function( elem, i, until ) { + return jQuery.dir( elem, "previousSibling", until ); + }, + siblings: function( elem ) { + return jQuery.sibling( ( elem.parentNode || {} ).firstChild, elem ); + }, + children: function( elem ) { + return jQuery.sibling( elem.firstChild ); + }, + contents: function( elem ) { + return jQuery.nodeName( elem, "iframe" ) ? + elem.contentDocument || elem.contentWindow.document : + jQuery.merge( [], elem.childNodes ); + } +}, function( name, fn ) { + jQuery.fn[ name ] = function( until, selector ) { + var ret = jQuery.map( this, fn, until ); + + if ( !runtil.test( name ) ) { + selector = until; + } + + if ( selector && typeof selector === "string" ) { + ret = jQuery.filter( selector, ret ); + } + + ret = this.length > 1 && !guaranteedUnique[ name ] ? jQuery.unique( ret ) : ret; + + if ( this.length > 1 && rparentsprev.test( name ) ) { + ret = ret.reverse(); + } + + return this.pushStack( ret, name, core_slice.call( arguments ).join(",") ); + }; +}); + +jQuery.extend({ + filter: function( expr, elems, not ) { + if ( not ) { + expr = ":not(" + expr + ")"; + } + + return elems.length === 1 ? + jQuery.find.matchesSelector(elems[0], expr) ? [ elems[0] ] : [] : + jQuery.find.matches(expr, elems); + }, + + dir: function( elem, dir, until ) { + var matched = [], + cur = elem[ dir ]; + + while ( cur && cur.nodeType !== 9 && (until === undefined || cur.nodeType !== 1 || !jQuery( cur ).is( until )) ) { + if ( cur.nodeType === 1 ) { + matched.push( cur ); + } + cur = cur[dir]; + } + return matched; + }, + + sibling: function( n, elem ) { + var r = []; + + for ( ; n; n = n.nextSibling ) { + if ( n.nodeType === 1 && n !== elem ) { + r.push( n ); + } + } + + return r; + } +}); + +// Implement the identical functionality for filter and not +function winnow( elements, qualifier, keep ) { + + // Can't pass null or undefined to indexOf in Firefox 4 + // Set to 0 to skip string check + qualifier = qualifier || 0; + + if ( jQuery.isFunction( qualifier ) ) { + return jQuery.grep(elements, function( elem, i ) { + var retVal = !!qualifier.call( elem, i, elem ); + return retVal === keep; + }); + + } else if ( qualifier.nodeType ) { + return jQuery.grep(elements, function( elem, i ) { + return ( elem === qualifier ) === keep; + }); + + } else if ( typeof qualifier === "string" ) { + var filtered = jQuery.grep(elements, function( elem ) { + return elem.nodeType === 1; + }); + + if ( isSimple.test( qualifier ) ) { + return jQuery.filter(qualifier, filtered, !keep); + } else { + qualifier = jQuery.filter( qualifier, filtered ); + } + } + + return jQuery.grep(elements, function( elem, i ) { + return ( jQuery.inArray( elem, qualifier ) >= 0 ) === keep; + }); +} +function createSafeFragment( document ) { + var list = nodeNames.split( "|" ), + safeFrag = document.createDocumentFragment(); + + if ( safeFrag.createElement ) { + while ( list.length ) { + safeFrag.createElement( + list.pop() + ); + } + } + return safeFrag; +} + +var nodeNames = "abbr|article|aside|audio|bdi|canvas|data|datalist|details|figcaption|figure|footer|" + + "header|hgroup|mark|meter|nav|output|progress|section|summary|time|video", + rinlinejQuery = / jQuery\d+="(?:null|\d+)"/g, + rleadingWhitespace = /^\s+/, + rxhtmlTag = /<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/gi, + rtagName = /<([\w:]+)/, + rtbody = /<tbody/i, + rhtml = /<|&#?\w+;/, + rnoInnerhtml = /<(?:script|style|link)/i, + rnocache = /<(?:script|object|embed|option|style)/i, + rnoshimcache = new RegExp("<(?:" + nodeNames + ")[\\s/>]", "i"), + rcheckableType = /^(?:checkbox|radio)$/, + // checked="checked" or checked + rchecked = /checked\s*(?:[^=]|=\s*.checked.)/i, + rscriptType = /\/(java|ecma)script/i, + rcleanScript = /^\s*<!(?:\[CDATA\[|\-\-)|[\]\-]{2}>\s*$/g, + wrapMap = { + option: [ 1, "<select multiple='multiple'>", "</select>" ], + legend: [ 1, "<fieldset>", "</fieldset>" ], + thead: [ 1, "<table>", "</table>" ], + tr: [ 2, "<table><tbody>", "</tbody></table>" ], + td: [ 3, "<table><tbody><tr>", "</tr></tbody></table>" ], + col: [ 2, "<table><tbody></tbody><colgroup>", "</colgroup></table>" ], + area: [ 1, "<map>", "</map>" ], + _default: [ 0, "", "" ] + }, + safeFragment = createSafeFragment( document ), + fragmentDiv = safeFragment.appendChild( document.createElement("div") ); + +wrapMap.optgroup = wrapMap.option; +wrapMap.tbody = wrapMap.tfoot = wrapMap.colgroup = wrapMap.caption = wrapMap.thead; +wrapMap.th = wrapMap.td; + +// IE6-8 can't serialize link, script, style, or any html5 (NoScope) tags, +// unless wrapped in a div with non-breaking characters in front of it. +if ( !jQuery.support.htmlSerialize ) { + wrapMap._default = [ 1, "X<div>", "</div>" ]; +} + +jQuery.fn.extend({ + text: function( value ) { + return jQuery.access( this, function( value ) { + return value === undefined ? + jQuery.text( this ) : + this.empty().append( ( this[0] && this[0].ownerDocument || document ).createTextNode( value ) ); + }, null, value, arguments.length ); + }, + + wrapAll: function( html ) { + if ( jQuery.isFunction( html ) ) { + return this.each(function(i) { + jQuery(this).wrapAll( html.call(this, i) ); + }); + } + + if ( this[0] ) { + // The elements to wrap the target around + var wrap = jQuery( html, this[0].ownerDocument ).eq(0).clone(true); + + if ( this[0].parentNode ) { + wrap.insertBefore( this[0] ); + } + + wrap.map(function() { + var elem = this; + + while ( elem.firstChild && elem.firstChild.nodeType === 1 ) { + elem = elem.firstChild; + } + + return elem; + }).append( this ); + } + + return this; + }, + + wrapInner: function( html ) { + if ( jQuery.isFunction( html ) ) { + return this.each(function(i) { + jQuery(this).wrapInner( html.call(this, i) ); + }); + } + + return this.each(function() { + var self = jQuery( this ), + contents = self.contents(); + + if ( contents.length ) { + contents.wrapAll( html ); + + } else { + self.append( html ); + } + }); + }, + + wrap: function( html ) { + var isFunction = jQuery.isFunction( html ); + + return this.each(function(i) { + jQuery( this ).wrapAll( isFunction ? html.call(this, i) : html ); + }); + }, + + unwrap: function() { + return this.parent().each(function() { + if ( !jQuery.nodeName( this, "body" ) ) { + jQuery( this ).replaceWith( this.childNodes ); + } + }).end(); + }, + + append: function() { + return this.domManip(arguments, true, function( elem ) { + if ( this.nodeType === 1 || this.nodeType === 11 ) { + this.appendChild( elem ); + } + }); + }, + + prepend: function() { + return this.domManip(arguments, true, function( elem ) { + if ( this.nodeType === 1 || this.nodeType === 11 ) { + this.insertBefore( elem, this.firstChild ); + } + }); + }, + + before: function() { + if ( !isDisconnected( this[0] ) ) { + return this.domManip(arguments, false, function( elem ) { + this.parentNode.insertBefore( elem, this ); + }); + } + + if ( arguments.length ) { + var set = jQuery.clean( arguments ); + return this.pushStack( jQuery.merge( set, this ), "before", this.selector ); + } + }, + + after: function() { + if ( !isDisconnected( this[0] ) ) { + return this.domManip(arguments, false, function( elem ) { + this.parentNode.insertBefore( elem, this.nextSibling ); + }); + } + + if ( arguments.length ) { + var set = jQuery.clean( arguments ); + return this.pushStack( jQuery.merge( this, set ), "after", this.selector ); + } + }, + + // keepData is for internal use only--do not document + remove: function( selector, keepData ) { + var elem, + i = 0; + + for ( ; (elem = this[i]) != null; i++ ) { + if ( !selector || jQuery.filter( selector, [ elem ] ).length ) { + if ( !keepData && elem.nodeType === 1 ) { + jQuery.cleanData( elem.getElementsByTagName("*") ); + jQuery.cleanData( [ elem ] ); + } + + if ( elem.parentNode ) { + elem.parentNode.removeChild( elem ); + } + } + } + + return this; + }, + + empty: function() { + var elem, + i = 0; + + for ( ; (elem = this[i]) != null; i++ ) { + // Remove element nodes and prevent memory leaks + if ( elem.nodeType === 1 ) { + jQuery.cleanData( elem.getElementsByTagName("*") ); + } + + // Remove any remaining nodes + while ( elem.firstChild ) { + elem.removeChild( elem.firstChild ); + } + } + + return this; + }, + + clone: function( dataAndEvents, deepDataAndEvents ) { + dataAndEvents = dataAndEvents == null ? false : dataAndEvents; + deepDataAndEvents = deepDataAndEvents == null ? dataAndEvents : deepDataAndEvents; + + return this.map( function () { + return jQuery.clone( this, dataAndEvents, deepDataAndEvents ); + }); + }, + + html: function( value ) { + return jQuery.access( this, function( value ) { + var elem = this[0] || {}, + i = 0, + l = this.length; + + if ( value === undefined ) { + return elem.nodeType === 1 ? + elem.innerHTML.replace( rinlinejQuery, "" ) : + undefined; + } + + // See if we can take a shortcut and just use innerHTML + if ( typeof value === "string" && !rnoInnerhtml.test( value ) && + ( jQuery.support.htmlSerialize || !rnoshimcache.test( value ) ) && + ( jQuery.support.leadingWhitespace || !rleadingWhitespace.test( value ) ) && + !wrapMap[ ( rtagName.exec( value ) || ["", ""] )[1].toLowerCase() ] ) { + + value = value.replace( rxhtmlTag, "<$1></$2>" ); + + try { + for (; i < l; i++ ) { + // Remove element nodes and prevent memory leaks + elem = this[i] || {}; + if ( elem.nodeType === 1 ) { + jQuery.cleanData( elem.getElementsByTagName( "*" ) ); + elem.innerHTML = value; + } + } + + elem = 0; + + // If using innerHTML throws an exception, use the fallback method + } catch(e) {} + } + + if ( elem ) { + this.empty().append( value ); + } + }, null, value, arguments.length ); + }, + + replaceWith: function( value ) { + if ( !isDisconnected( this[0] ) ) { + // Make sure that the elements are removed from the DOM before they are inserted + // this can help fix replacing a parent with child elements + if ( jQuery.isFunction( value ) ) { + return this.each(function(i) { + var self = jQuery(this), old = self.html(); + self.replaceWith( value.call( this, i, old ) ); + }); + } + + if ( typeof value !== "string" ) { + value = jQuery( value ).detach(); + } + + return this.each(function() { + var next = this.nextSibling, + parent = this.parentNode; + + jQuery( this ).remove(); + + if ( next ) { + jQuery(next).before( value ); + } else { + jQuery(parent).append( value ); + } + }); + } + + return this.length ? + this.pushStack( jQuery(jQuery.isFunction(value) ? value() : value), "replaceWith", value ) : + this; + }, + + detach: function( selector ) { + return this.remove( selector, true ); + }, + + domManip: function( args, table, callback ) { + + // Flatten any nested arrays + args = [].concat.apply( [], args ); + + var results, first, fragment, iNoClone, + i = 0, + value = args[0], + scripts = [], + l = this.length; + + // We can't cloneNode fragments that contain checked, in WebKit + if ( !jQuery.support.checkClone && l > 1 && typeof value === "string" && rchecked.test( value ) ) { + return this.each(function() { + jQuery(this).domManip( args, table, callback ); + }); + } + + if ( jQuery.isFunction(value) ) { + return this.each(function(i) { + var self = jQuery(this); + args[0] = value.call( this, i, table ? self.html() : undefined ); + self.domManip( args, table, callback ); + }); + } + + if ( this[0] ) { + results = jQuery.buildFragment( args, this, scripts ); + fragment = results.fragment; + first = fragment.firstChild; + + if ( fragment.childNodes.length === 1 ) { + fragment = first; + } + + if ( first ) { + table = table && jQuery.nodeName( first, "tr" ); + + // Use the original fragment for the last item instead of the first because it can end up + // being emptied incorrectly in certain situations (#8070). + // Fragments from the fragment cache must always be cloned and never used in place. + for ( iNoClone = results.cacheable || l - 1; i < l; i++ ) { + callback.call( + table && jQuery.nodeName( this[i], "table" ) ? + findOrAppend( this[i], "tbody" ) : + this[i], + i === iNoClone ? + fragment : + jQuery.clone( fragment, true, true ) + ); + } + } + + // Fix #11809: Avoid leaking memory + fragment = first = null; + + if ( scripts.length ) { + jQuery.each( scripts, function( i, elem ) { + if ( elem.src ) { + if ( jQuery.ajax ) { + jQuery.ajax({ + url: elem.src, + type: "GET", + dataType: "script", + async: false, + global: false, + "throws": true + }); + } else { + jQuery.error("no ajax"); + } + } else { + jQuery.globalEval( ( elem.text || elem.textContent || elem.innerHTML || "" ).replace( rcleanScript, "" ) ); + } + + if ( elem.parentNode ) { + elem.parentNode.removeChild( elem ); + } + }); + } + } + + return this; + } +}); + +function findOrAppend( elem, tag ) { + return elem.getElementsByTagName( tag )[0] || elem.appendChild( elem.ownerDocument.createElement( tag ) ); +} + +function cloneCopyEvent( src, dest ) { + + if ( dest.nodeType !== 1 || !jQuery.hasData( src ) ) { + return; + } + + var type, i, l, + oldData = jQuery._data( src ), + curData = jQuery._data( dest, oldData ), + events = oldData.events; + + if ( events ) { + delete curData.handle; + curData.events = {}; + + for ( type in events ) { + for ( i = 0, l = events[ type ].length; i < l; i++ ) { + jQuery.event.add( dest, type, events[ type ][ i ] ); + } + } + } + + // make the cloned public data object a copy from the original + if ( curData.data ) { + curData.data = jQuery.extend( {}, curData.data ); + } +} + +function cloneFixAttributes( src, dest ) { + var nodeName; + + // We do not need to do anything for non-Elements + if ( dest.nodeType !== 1 ) { + return; + } + + // clearAttributes removes the attributes, which we don't want, + // but also removes the attachEvent events, which we *do* want + if ( dest.clearAttributes ) { + dest.clearAttributes(); + } + + // mergeAttributes, in contrast, only merges back on the + // original attributes, not the events + if ( dest.mergeAttributes ) { + dest.mergeAttributes( src ); + } + + nodeName = dest.nodeName.toLowerCase(); + + if ( nodeName === "object" ) { + // IE6-10 improperly clones children of object elements using classid. + // IE10 throws NoModificationAllowedError if parent is null, #12132. + if ( dest.parentNode ) { + dest.outerHTML = src.outerHTML; + } + + // This path appears unavoidable for IE9. When cloning an object + // element in IE9, the outerHTML strategy above is not sufficient. + // If the src has innerHTML and the destination does not, + // copy the src.innerHTML into the dest.innerHTML. #10324 + if ( jQuery.support.html5Clone && (src.innerHTML && !jQuery.trim(dest.innerHTML)) ) { + dest.innerHTML = src.innerHTML; + } + + } else if ( nodeName === "input" && rcheckableType.test( src.type ) ) { + // IE6-8 fails to persist the checked state of a cloned checkbox + // or radio button. Worse, IE6-7 fail to give the cloned element + // a checked appearance if the defaultChecked value isn't also set + + dest.defaultChecked = dest.checked = src.checked; + + // IE6-7 get confused and end up setting the value of a cloned + // checkbox/radio button to an empty string instead of "on" + if ( dest.value !== src.value ) { + dest.value = src.value; + } + + // IE6-8 fails to return the selected option to the default selected + // state when cloning options + } else if ( nodeName === "option" ) { + dest.selected = src.defaultSelected; + + // IE6-8 fails to set the defaultValue to the correct value when + // cloning other types of input fields + } else if ( nodeName === "input" || nodeName === "textarea" ) { + dest.defaultValue = src.defaultValue; + + // IE blanks contents when cloning scripts + } else if ( nodeName === "script" && dest.text !== src.text ) { + dest.text = src.text; + } + + // Event data gets referenced instead of copied if the expando + // gets copied too + dest.removeAttribute( jQuery.expando ); +} + +jQuery.buildFragment = function( args, context, scripts ) { + var fragment, cacheable, cachehit, + first = args[ 0 ]; + + // Set context from what may come in as undefined or a jQuery collection or a node + // Updated to fix #12266 where accessing context[0] could throw an exception in IE9/10 & + // also doubles as fix for #8950 where plain objects caused createDocumentFragment exception + context = context || document; + context = !context.nodeType && context[0] || context; + context = context.ownerDocument || context; + + // Only cache "small" (1/2 KB) HTML strings that are associated with the main document + // Cloning options loses the selected state, so don't cache them + // IE 6 doesn't like it when you put <object> or <embed> elements in a fragment + // Also, WebKit does not clone 'checked' attributes on cloneNode, so don't cache + // Lastly, IE6,7,8 will not correctly reuse cached fragments that were created from unknown elems #10501 + if ( args.length === 1 && typeof first === "string" && first.length < 512 && context === document && + first.charAt(0) === "<" && !rnocache.test( first ) && + (jQuery.support.checkClone || !rchecked.test( first )) && + (jQuery.support.html5Clone || !rnoshimcache.test( first )) ) { + + // Mark cacheable and look for a hit + cacheable = true; + fragment = jQuery.fragments[ first ]; + cachehit = fragment !== undefined; + } + + if ( !fragment ) { + fragment = context.createDocumentFragment(); + jQuery.clean( args, context, fragment, scripts ); + + // Update the cache, but only store false + // unless this is a second parsing of the same content + if ( cacheable ) { + jQuery.fragments[ first ] = cachehit && fragment; + } + } + + return { fragment: fragment, cacheable: cacheable }; +}; + +jQuery.fragments = {}; + +jQuery.each({ + appendTo: "append", + prependTo: "prepend", + insertBefore: "before", + insertAfter: "after", + replaceAll: "replaceWith" +}, function( name, original ) { + jQuery.fn[ name ] = function( selector ) { + var elems, + i = 0, + ret = [], + insert = jQuery( selector ), + l = insert.length, + parent = this.length === 1 && this[0].parentNode; + + if ( (parent == null || parent && parent.nodeType === 11 && parent.childNodes.length === 1) && l === 1 ) { + insert[ original ]( this[0] ); + return this; + } else { + for ( ; i < l; i++ ) { + elems = ( i > 0 ? this.clone(true) : this ).get(); + jQuery( insert[i] )[ original ]( elems ); + ret = ret.concat( elems ); + } + + return this.pushStack( ret, name, insert.selector ); + } + }; +}); + +function getAll( elem ) { + if ( typeof elem.getElementsByTagName !== "undefined" ) { + return elem.getElementsByTagName( "*" ); + + } else if ( typeof elem.querySelectorAll !== "undefined" ) { + return elem.querySelectorAll( "*" ); + + } else { + return []; + } +} + +// Used in clean, fixes the defaultChecked property +function fixDefaultChecked( elem ) { + if ( rcheckableType.test( elem.type ) ) { + elem.defaultChecked = elem.checked; + } +} + +jQuery.extend({ + clone: function( elem, dataAndEvents, deepDataAndEvents ) { + var srcElements, + destElements, + i, + clone; + + if ( jQuery.support.html5Clone || jQuery.isXMLDoc(elem) || !rnoshimcache.test( "<" + elem.nodeName + ">" ) ) { + clone = elem.cloneNode( true ); + + // IE<=8 does not properly clone detached, unknown element nodes + } else { + fragmentDiv.innerHTML = elem.outerHTML; + fragmentDiv.removeChild( clone = fragmentDiv.firstChild ); + } + + if ( (!jQuery.support.noCloneEvent || !jQuery.support.noCloneChecked) && + (elem.nodeType === 1 || elem.nodeType === 11) && !jQuery.isXMLDoc(elem) ) { + // IE copies events bound via attachEvent when using cloneNode. + // Calling detachEvent on the clone will also remove the events + // from the original. In order to get around this, we use some + // proprietary methods to clear the events. Thanks to MooTools + // guys for this hotness. + + cloneFixAttributes( elem, clone ); + + // Using Sizzle here is crazy slow, so we use getElementsByTagName instead + srcElements = getAll( elem ); + destElements = getAll( clone ); + + // Weird iteration because IE will replace the length property + // with an element if you are cloning the body and one of the + // elements on the page has a name or id of "length" + for ( i = 0; srcElements[i]; ++i ) { + // Ensure that the destination node is not null; Fixes #9587 + if ( destElements[i] ) { + cloneFixAttributes( srcElements[i], destElements[i] ); + } + } + } + + // Copy the events from the original to the clone + if ( dataAndEvents ) { + cloneCopyEvent( elem, clone ); + + if ( deepDataAndEvents ) { + srcElements = getAll( elem ); + destElements = getAll( clone ); + + for ( i = 0; srcElements[i]; ++i ) { + cloneCopyEvent( srcElements[i], destElements[i] ); + } + } + } + + srcElements = destElements = null; + + // Return the cloned set + return clone; + }, + + clean: function( elems, context, fragment, scripts ) { + var i, j, elem, tag, wrap, depth, div, hasBody, tbody, len, handleScript, jsTags, + safe = context === document && safeFragment, + ret = []; + + // Ensure that context is a document + if ( !context || typeof context.createDocumentFragment === "undefined" ) { + context = document; + } + + // Use the already-created safe fragment if context permits + for ( i = 0; (elem = elems[i]) != null; i++ ) { + if ( typeof elem === "number" ) { + elem += ""; + } + + if ( !elem ) { + continue; + } + + // Convert html string into DOM nodes + if ( typeof elem === "string" ) { + if ( !rhtml.test( elem ) ) { + elem = context.createTextNode( elem ); + } else { + // Ensure a safe container in which to render the html + safe = safe || createSafeFragment( context ); + div = context.createElement("div"); + safe.appendChild( div ); + + // Fix "XHTML"-style tags in all browsers + elem = elem.replace(rxhtmlTag, "<$1></$2>"); + + // Go to html and back, then peel off extra wrappers + tag = ( rtagName.exec( elem ) || ["", ""] )[1].toLowerCase(); + wrap = wrapMap[ tag ] || wrapMap._default; + depth = wrap[0]; + div.innerHTML = wrap[1] + elem + wrap[2]; + + // Move to the right depth + while ( depth-- ) { + div = div.lastChild; + } + + // Remove IE's autoinserted <tbody> from table fragments + if ( !jQuery.support.tbody ) { + + // String was a <table>, *may* have spurious <tbody> + hasBody = rtbody.test(elem); + tbody = tag === "table" && !hasBody ? + div.firstChild && div.firstChild.childNodes : + + // String was a bare <thead> or <tfoot> + wrap[1] === "<table>" && !hasBody ? + div.childNodes : + []; + + for ( j = tbody.length - 1; j >= 0 ; --j ) { + if ( jQuery.nodeName( tbody[ j ], "tbody" ) && !tbody[ j ].childNodes.length ) { + tbody[ j ].parentNode.removeChild( tbody[ j ] ); + } + } + } + + // IE completely kills leading whitespace when innerHTML is used + if ( !jQuery.support.leadingWhitespace && rleadingWhitespace.test( elem ) ) { + div.insertBefore( context.createTextNode( rleadingWhitespace.exec(elem)[0] ), div.firstChild ); + } + + elem = div.childNodes; + + // Take out of fragment container (we need a fresh div each time) + div.parentNode.removeChild( div ); + } + } + + if ( elem.nodeType ) { + ret.push( elem ); + } else { + jQuery.merge( ret, elem ); + } + } + + // Fix #11356: Clear elements from safeFragment + if ( div ) { + elem = div = safe = null; + } + + // Reset defaultChecked for any radios and checkboxes + // about to be appended to the DOM in IE 6/7 (#8060) + if ( !jQuery.support.appendChecked ) { + for ( i = 0; (elem = ret[i]) != null; i++ ) { + if ( jQuery.nodeName( elem, "input" ) ) { + fixDefaultChecked( elem ); + } else if ( typeof elem.getElementsByTagName !== "undefined" ) { + jQuery.grep( elem.getElementsByTagName("input"), fixDefaultChecked ); + } + } + } + + // Append elements to a provided document fragment + if ( fragment ) { + // Special handling of each script element + handleScript = function( elem ) { + // Check if we consider it executable + if ( !elem.type || rscriptType.test( elem.type ) ) { + // Detach the script and store it in the scripts array (if provided) or the fragment + // Return truthy to indicate that it has been handled + return scripts ? + scripts.push( elem.parentNode ? elem.parentNode.removeChild( elem ) : elem ) : + fragment.appendChild( elem ); + } + }; + + for ( i = 0; (elem = ret[i]) != null; i++ ) { + // Check if we're done after handling an executable script + if ( !( jQuery.nodeName( elem, "script" ) && handleScript( elem ) ) ) { + // Append to fragment and handle embedded scripts + fragment.appendChild( elem ); + if ( typeof elem.getElementsByTagName !== "undefined" ) { + // handleScript alters the DOM, so use jQuery.merge to ensure snapshot iteration + jsTags = jQuery.grep( jQuery.merge( [], elem.getElementsByTagName("script") ), handleScript ); + + // Splice the scripts into ret after their former ancestor and advance our index beyond them + ret.splice.apply( ret, [i + 1, 0].concat( jsTags ) ); + i += jsTags.length; + } + } + } + } + + return ret; + }, + + cleanData: function( elems, /* internal */ acceptData ) { + var data, id, elem, type, + i = 0, + internalKey = jQuery.expando, + cache = jQuery.cache, + deleteExpando = jQuery.support.deleteExpando, + special = jQuery.event.special; + + for ( ; (elem = elems[i]) != null; i++ ) { + + if ( acceptData || jQuery.acceptData( elem ) ) { + + id = elem[ internalKey ]; + data = id && cache[ id ]; + + if ( data ) { + if ( data.events ) { + for ( type in data.events ) { + if ( special[ type ] ) { + jQuery.event.remove( elem, type ); + + // This is a shortcut to avoid jQuery.event.remove's overhead + } else { + jQuery.removeEvent( elem, type, data.handle ); + } + } + } + + // Remove cache only if it was not already removed by jQuery.event.remove + if ( cache[ id ] ) { + + delete cache[ id ]; + + // IE does not allow us to delete expando properties from nodes, + // nor does it have a removeAttribute function on Document nodes; + // we must handle all of these cases + if ( deleteExpando ) { + delete elem[ internalKey ]; + + } else if ( elem.removeAttribute ) { + elem.removeAttribute( internalKey ); + + } else { + elem[ internalKey ] = null; + } + + jQuery.deletedIds.push( id ); + } + } + } + } + } +}); +// Limit scope pollution from any deprecated API +(function() { + +var matched, browser; + +// Use of jQuery.browser is frowned upon. +// More details: http://api.jquery.com/jQuery.browser +// jQuery.uaMatch maintained for back-compat +jQuery.uaMatch = function( ua ) { + ua = ua.toLowerCase(); + + var match = /(chrome)[ \/]([\w.]+)/.exec( ua ) || + /(webkit)[ \/]([\w.]+)/.exec( ua ) || + /(opera)(?:.*version|)[ \/]([\w.]+)/.exec( ua ) || + /(msie) ([\w.]+)/.exec( ua ) || + ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec( ua ) || + []; + + return { + browser: match[ 1 ] || "", + version: match[ 2 ] || "0" + }; +}; + +matched = jQuery.uaMatch( navigator.userAgent ); +browser = {}; + +if ( matched.browser ) { + browser[ matched.browser ] = true; + browser.version = matched.version; +} + +// Chrome is Webkit, but Webkit is also Safari. +if ( browser.chrome ) { + browser.webkit = true; +} else if ( browser.webkit ) { + browser.safari = true; +} + +jQuery.browser = browser; + +jQuery.sub = function() { + function jQuerySub( selector, context ) { + return new jQuerySub.fn.init( selector, context ); + } + jQuery.extend( true, jQuerySub, this ); + jQuerySub.superclass = this; + jQuerySub.fn = jQuerySub.prototype = this(); + jQuerySub.fn.constructor = jQuerySub; + jQuerySub.sub = this.sub; + jQuerySub.fn.init = function init( selector, context ) { + if ( context && context instanceof jQuery && !(context instanceof jQuerySub) ) { + context = jQuerySub( context ); + } + + return jQuery.fn.init.call( this, selector, context, rootjQuerySub ); + }; + jQuerySub.fn.init.prototype = jQuerySub.fn; + var rootjQuerySub = jQuerySub(document); + return jQuerySub; +}; + +})(); +var curCSS, iframe, iframeDoc, + ralpha = /alpha\([^)]*\)/i, + ropacity = /opacity=([^)]*)/, + rposition = /^(top|right|bottom|left)$/, + // swappable if display is none or starts with table except "table", "table-cell", or "table-caption" + // see here for display values: https://developer.mozilla.org/en-US/docs/CSS/display + rdisplayswap = /^(none|table(?!-c[ea]).+)/, + rmargin = /^margin/, + rnumsplit = new RegExp( "^(" + core_pnum + ")(.*)$", "i" ), + rnumnonpx = new RegExp( "^(" + core_pnum + ")(?!px)[a-z%]+$", "i" ), + rrelNum = new RegExp( "^([-+])=(" + core_pnum + ")", "i" ), + elemdisplay = { BODY: "block" }, + + cssShow = { position: "absolute", visibility: "hidden", display: "block" }, + cssNormalTransform = { + letterSpacing: 0, + fontWeight: 400 + }, + + cssExpand = [ "Top", "Right", "Bottom", "Left" ], + cssPrefixes = [ "Webkit", "O", "Moz", "ms" ], + + eventsToggle = jQuery.fn.toggle; + +// return a css property mapped to a potentially vendor prefixed property +function vendorPropName( style, name ) { + + // shortcut for names that are not vendor prefixed + if ( name in style ) { + return name; + } + + // check for vendor prefixed names + var capName = name.charAt(0).toUpperCase() + name.slice(1), + origName = name, + i = cssPrefixes.length; + + while ( i-- ) { + name = cssPrefixes[ i ] + capName; + if ( name in style ) { + return name; + } + } + + return origName; +} + +function isHidden( elem, el ) { + elem = el || elem; + return jQuery.css( elem, "display" ) === "none" || !jQuery.contains( elem.ownerDocument, elem ); +} + +function showHide( elements, show ) { + var elem, display, + values = [], + index = 0, + length = elements.length; + + for ( ; index < length; index++ ) { + elem = elements[ index ]; + if ( !elem.style ) { + continue; + } + values[ index ] = jQuery._data( elem, "olddisplay" ); + if ( show ) { + // Reset the inline display of this element to learn if it is + // being hidden by cascaded rules or not + if ( !values[ index ] && elem.style.display === "none" ) { + elem.style.display = ""; + } + + // Set elements which have been overridden with display: none + // in a stylesheet to whatever the default browser style is + // for such an element + if ( elem.style.display === "" && isHidden( elem ) ) { + values[ index ] = jQuery._data( elem, "olddisplay", css_defaultDisplay(elem.nodeName) ); + } + } else { + display = curCSS( elem, "display" ); + + if ( !values[ index ] && display !== "none" ) { + jQuery._data( elem, "olddisplay", display ); + } + } + } + + // Set the display of most of the elements in a second loop + // to avoid the constant reflow + for ( index = 0; index < length; index++ ) { + elem = elements[ index ]; + if ( !elem.style ) { + continue; + } + if ( !show || elem.style.display === "none" || elem.style.display === "" ) { + elem.style.display = show ? values[ index ] || "" : "none"; + } + } + + return elements; +} + +jQuery.fn.extend({ + css: function( name, value ) { + return jQuery.access( this, function( elem, name, value ) { + return value !== undefined ? + jQuery.style( elem, name, value ) : + jQuery.css( elem, name ); + }, name, value, arguments.length > 1 ); + }, + show: function() { + return showHide( this, true ); + }, + hide: function() { + return showHide( this ); + }, + toggle: function( state, fn2 ) { + var bool = typeof state === "boolean"; + + if ( jQuery.isFunction( state ) && jQuery.isFunction( fn2 ) ) { + return eventsToggle.apply( this, arguments ); + } + + return this.each(function() { + if ( bool ? state : isHidden( this ) ) { + jQuery( this ).show(); + } else { + jQuery( this ).hide(); + } + }); + } +}); + +jQuery.extend({ + // Add in style property hooks for overriding the default + // behavior of getting and setting a style property + cssHooks: { + opacity: { + get: function( elem, computed ) { + if ( computed ) { + // We should always get a number back from opacity + var ret = curCSS( elem, "opacity" ); + return ret === "" ? "1" : ret; + + } + } + } + }, + + // Exclude the following css properties to add px + cssNumber: { + "fillOpacity": true, + "fontWeight": true, + "lineHeight": true, + "opacity": true, + "orphans": true, + "widows": true, + "zIndex": true, + "zoom": true + }, + + // Add in properties whose names you wish to fix before + // setting or getting the value + cssProps: { + // normalize float css property + "float": jQuery.support.cssFloat ? "cssFloat" : "styleFloat" + }, + + // Get and set the style property on a DOM Node + style: function( elem, name, value, extra ) { + // Don't set styles on text and comment nodes + if ( !elem || elem.nodeType === 3 || elem.nodeType === 8 || !elem.style ) { + return; + } + + // Make sure that we're working with the right name + var ret, type, hooks, + origName = jQuery.camelCase( name ), + style = elem.style; + + name = jQuery.cssProps[ origName ] || ( jQuery.cssProps[ origName ] = vendorPropName( style, origName ) ); + + // gets hook for the prefixed version + // followed by the unprefixed version + hooks = jQuery.cssHooks[ name ] || jQuery.cssHooks[ origName ]; + + // Check if we're setting a value + if ( value !== undefined ) { + type = typeof value; + + // convert relative number strings (+= or -=) to relative numbers. #7345 + if ( type === "string" && (ret = rrelNum.exec( value )) ) { + value = ( ret[1] + 1 ) * ret[2] + parseFloat( jQuery.css( elem, name ) ); + // Fixes bug #9237 + type = "number"; + } + + // Make sure that NaN and null values aren't set. See: #7116 + if ( value == null || type === "number" && isNaN( value ) ) { + return; + } + + // If a number was passed in, add 'px' to the (except for certain CSS properties) + if ( type === "number" && !jQuery.cssNumber[ origName ] ) { + value += "px"; + } + + // If a hook was provided, use that value, otherwise just set the specified value + if ( !hooks || !("set" in hooks) || (value = hooks.set( elem, value, extra )) !== undefined ) { + // Wrapped to prevent IE from throwing errors when 'invalid' values are provided + // Fixes bug #5509 + try { + style[ name ] = value; + } catch(e) {} + } + + } else { + // If a hook was provided get the non-computed value from there + if ( hooks && "get" in hooks && (ret = hooks.get( elem, false, extra )) !== undefined ) { + return ret; + } + + // Otherwise just get the value from the style object + return style[ name ]; + } + }, + + css: function( elem, name, numeric, extra ) { + var val, num, hooks, + origName = jQuery.camelCase( name ); + + // Make sure that we're working with the right name + name = jQuery.cssProps[ origName ] || ( jQuery.cssProps[ origName ] = vendorPropName( elem.style, origName ) ); + + // gets hook for the prefixed version + // followed by the unprefixed version + hooks = jQuery.cssHooks[ name ] || jQuery.cssHooks[ origName ]; + + // If a hook was provided get the computed value from there + if ( hooks && "get" in hooks ) { + val = hooks.get( elem, true, extra ); + } + + // Otherwise, if a way to get the computed value exists, use that + if ( val === undefined ) { + val = curCSS( elem, name ); + } + + //convert "normal" to computed value + if ( val === "normal" && name in cssNormalTransform ) { + val = cssNormalTransform[ name ]; + } + + // Return, converting to number if forced or a qualifier was provided and val looks numeric + if ( numeric || extra !== undefined ) { + num = parseFloat( val ); + return numeric || jQuery.isNumeric( num ) ? num || 0 : val; + } + return val; + }, + + // A method for quickly swapping in/out CSS properties to get correct calculations + swap: function( elem, options, callback ) { + var ret, name, + old = {}; + + // Remember the old values, and insert the new ones + for ( name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + ret = callback.call( elem ); + + // Revert the old values + for ( name in options ) { + elem.style[ name ] = old[ name ]; + } + + return ret; + } +}); + +// NOTE: To any future maintainer, we've window.getComputedStyle +// because jsdom on node.js will break without it. +if ( window.getComputedStyle ) { + curCSS = function( elem, name ) { + var ret, width, minWidth, maxWidth, + computed = window.getComputedStyle( elem, null ), + style = elem.style; + + if ( computed ) { + + // getPropertyValue is only needed for .css('filter') in IE9, see #12537 + ret = computed.getPropertyValue( name ) || computed[ name ]; + + if ( ret === "" && !jQuery.contains( elem.ownerDocument, elem ) ) { + ret = jQuery.style( elem, name ); + } + + // A tribute to the "awesome hack by Dean Edwards" + // Chrome < 17 and Safari 5.0 uses "computed value" instead of "used value" for margin-right + // Safari 5.1.7 (at least) returns percentage for a larger set of values, but width seems to be reliably pixels + // this is against the CSSOM draft spec: http://dev.w3.org/csswg/cssom/#resolved-values + if ( rnumnonpx.test( ret ) && rmargin.test( name ) ) { + width = style.width; + minWidth = style.minWidth; + maxWidth = style.maxWidth; + + style.minWidth = style.maxWidth = style.width = ret; + ret = computed.width; + + style.width = width; + style.minWidth = minWidth; + style.maxWidth = maxWidth; + } + } + + return ret; + }; +} else if ( document.documentElement.currentStyle ) { + curCSS = function( elem, name ) { + var left, rsLeft, + ret = elem.currentStyle && elem.currentStyle[ name ], + style = elem.style; + + // Avoid setting ret to empty string here + // so we don't default to auto + if ( ret == null && style && style[ name ] ) { + ret = style[ name ]; + } + + // From the awesome hack by Dean Edwards + // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291 + + // If we're not dealing with a regular pixel number + // but a number that has a weird ending, we need to convert it to pixels + // but not position css attributes, as those are proportional to the parent element instead + // and we can't measure the parent instead because it might trigger a "stacking dolls" problem + if ( rnumnonpx.test( ret ) && !rposition.test( name ) ) { + + // Remember the original values + left = style.left; + rsLeft = elem.runtimeStyle && elem.runtimeStyle.left; + + // Put in the new values to get a computed value out + if ( rsLeft ) { + elem.runtimeStyle.left = elem.currentStyle.left; + } + style.left = name === "fontSize" ? "1em" : ret; + ret = style.pixelLeft + "px"; + + // Revert the changed values + style.left = left; + if ( rsLeft ) { + elem.runtimeStyle.left = rsLeft; + } + } + + return ret === "" ? "auto" : ret; + }; +} + +function setPositiveNumber( elem, value, subtract ) { + var matches = rnumsplit.exec( value ); + return matches ? + Math.max( 0, matches[ 1 ] - ( subtract || 0 ) ) + ( matches[ 2 ] || "px" ) : + value; +} + +function augmentWidthOrHeight( elem, name, extra, isBorderBox ) { + var i = extra === ( isBorderBox ? "border" : "content" ) ? + // If we already have the right measurement, avoid augmentation + 4 : + // Otherwise initialize for horizontal or vertical properties + name === "width" ? 1 : 0, + + val = 0; + + for ( ; i < 4; i += 2 ) { + // both box models exclude margin, so add it if we want it + if ( extra === "margin" ) { + // we use jQuery.css instead of curCSS here + // because of the reliableMarginRight CSS hook! + val += jQuery.css( elem, extra + cssExpand[ i ], true ); + } + + // From this point on we use curCSS for maximum performance (relevant in animations) + if ( isBorderBox ) { + // border-box includes padding, so remove it if we want content + if ( extra === "content" ) { + val -= parseFloat( curCSS( elem, "padding" + cssExpand[ i ] ) ) || 0; + } + + // at this point, extra isn't border nor margin, so remove border + if ( extra !== "margin" ) { + val -= parseFloat( curCSS( elem, "border" + cssExpand[ i ] + "Width" ) ) || 0; + } + } else { + // at this point, extra isn't content, so add padding + val += parseFloat( curCSS( elem, "padding" + cssExpand[ i ] ) ) || 0; + + // at this point, extra isn't content nor padding, so add border + if ( extra !== "padding" ) { + val += parseFloat( curCSS( elem, "border" + cssExpand[ i ] + "Width" ) ) || 0; + } + } + } + + return val; +} + +function getWidthOrHeight( elem, name, extra ) { + + // Start with offset property, which is equivalent to the border-box value + var val = name === "width" ? elem.offsetWidth : elem.offsetHeight, + valueIsBorderBox = true, + isBorderBox = jQuery.support.boxSizing && jQuery.css( elem, "boxSizing" ) === "border-box"; + + // some non-html elements return undefined for offsetWidth, so check for null/undefined + // svg - https://bugzilla.mozilla.org/show_bug.cgi?id=649285 + // MathML - https://bugzilla.mozilla.org/show_bug.cgi?id=491668 + if ( val <= 0 || val == null ) { + // Fall back to computed then uncomputed css if necessary + val = curCSS( elem, name ); + if ( val < 0 || val == null ) { + val = elem.style[ name ]; + } + + // Computed unit is not pixels. Stop here and return. + if ( rnumnonpx.test(val) ) { + return val; + } + + // we need the check for style in case a browser which returns unreliable values + // for getComputedStyle silently falls back to the reliable elem.style + valueIsBorderBox = isBorderBox && ( jQuery.support.boxSizingReliable || val === elem.style[ name ] ); + + // Normalize "", auto, and prepare for extra + val = parseFloat( val ) || 0; + } + + // use the active box-sizing model to add/subtract irrelevant styles + return ( val + + augmentWidthOrHeight( + elem, + name, + extra || ( isBorderBox ? "border" : "content" ), + valueIsBorderBox + ) + ) + "px"; +} + + +// Try to determine the default display value of an element +function css_defaultDisplay( nodeName ) { + if ( elemdisplay[ nodeName ] ) { + return elemdisplay[ nodeName ]; + } + + var elem = jQuery( "<" + nodeName + ">" ).appendTo( document.body ), + display = elem.css("display"); + elem.remove(); + + // If the simple way fails, + // get element's real default display by attaching it to a temp iframe + if ( display === "none" || display === "" ) { + // Use the already-created iframe if possible + iframe = document.body.appendChild( + iframe || jQuery.extend( document.createElement("iframe"), { + frameBorder: 0, + width: 0, + height: 0 + }) + ); + + // Create a cacheable copy of the iframe document on first call. + // IE and Opera will allow us to reuse the iframeDoc without re-writing the fake HTML + // document to it; WebKit & Firefox won't allow reusing the iframe document. + if ( !iframeDoc || !iframe.createElement ) { + iframeDoc = ( iframe.contentWindow || iframe.contentDocument ).document; + iframeDoc.write("<!doctype html><html><body>"); + iframeDoc.close(); + } + + elem = iframeDoc.body.appendChild( iframeDoc.createElement(nodeName) ); + + display = curCSS( elem, "display" ); + document.body.removeChild( iframe ); + } + + // Store the correct default display + elemdisplay[ nodeName ] = display; + + return display; +} + +jQuery.each([ "height", "width" ], function( i, name ) { + jQuery.cssHooks[ name ] = { + get: function( elem, computed, extra ) { + if ( computed ) { + // certain elements can have dimension info if we invisibly show them + // however, it must have a current display style that would benefit from this + if ( elem.offsetWidth === 0 && rdisplayswap.test( curCSS( elem, "display" ) ) ) { + return jQuery.swap( elem, cssShow, function() { + return getWidthOrHeight( elem, name, extra ); + }); + } else { + return getWidthOrHeight( elem, name, extra ); + } + } + }, + + set: function( elem, value, extra ) { + return setPositiveNumber( elem, value, extra ? + augmentWidthOrHeight( + elem, + name, + extra, + jQuery.support.boxSizing && jQuery.css( elem, "boxSizing" ) === "border-box" + ) : 0 + ); + } + }; +}); + +if ( !jQuery.support.opacity ) { + jQuery.cssHooks.opacity = { + get: function( elem, computed ) { + // IE uses filters for opacity + return ropacity.test( (computed && elem.currentStyle ? elem.currentStyle.filter : elem.style.filter) || "" ) ? + ( 0.01 * parseFloat( RegExp.$1 ) ) + "" : + computed ? "1" : ""; + }, + + set: function( elem, value ) { + var style = elem.style, + currentStyle = elem.currentStyle, + opacity = jQuery.isNumeric( value ) ? "alpha(opacity=" + value * 100 + ")" : "", + filter = currentStyle && currentStyle.filter || style.filter || ""; + + // IE has trouble with opacity if it does not have layout + // Force it by setting the zoom level + style.zoom = 1; + + // if setting opacity to 1, and no other filters exist - attempt to remove filter attribute #6652 + if ( value >= 1 && jQuery.trim( filter.replace( ralpha, "" ) ) === "" && + style.removeAttribute ) { + + // Setting style.filter to null, "" & " " still leave "filter:" in the cssText + // if "filter:" is present at all, clearType is disabled, we want to avoid this + // style.removeAttribute is IE Only, but so apparently is this code path... + style.removeAttribute( "filter" ); + + // if there there is no filter style applied in a css rule, we are done + if ( currentStyle && !currentStyle.filter ) { + return; + } + } + + // otherwise, set new filter values + style.filter = ralpha.test( filter ) ? + filter.replace( ralpha, opacity ) : + filter + " " + opacity; + } + }; +} + +// These hooks cannot be added until DOM ready because the support test +// for it is not run until after DOM ready +jQuery(function() { + if ( !jQuery.support.reliableMarginRight ) { + jQuery.cssHooks.marginRight = { + get: function( elem, computed ) { + // WebKit Bug 13343 - getComputedStyle returns wrong value for margin-right + // Work around by temporarily setting element display to inline-block + return jQuery.swap( elem, { "display": "inline-block" }, function() { + if ( computed ) { + return curCSS( elem, "marginRight" ); + } + }); + } + }; + } + + // Webkit bug: https://bugs.webkit.org/show_bug.cgi?id=29084 + // getComputedStyle returns percent when specified for top/left/bottom/right + // rather than make the css module depend on the offset module, we just check for it here + if ( !jQuery.support.pixelPosition && jQuery.fn.position ) { + jQuery.each( [ "top", "left" ], function( i, prop ) { + jQuery.cssHooks[ prop ] = { + get: function( elem, computed ) { + if ( computed ) { + var ret = curCSS( elem, prop ); + // if curCSS returns percentage, fallback to offset + return rnumnonpx.test( ret ) ? jQuery( elem ).position()[ prop ] + "px" : ret; + } + } + }; + }); + } + +}); + +if ( jQuery.expr && jQuery.expr.filters ) { + jQuery.expr.filters.hidden = function( elem ) { + return ( elem.offsetWidth === 0 && elem.offsetHeight === 0 ) || (!jQuery.support.reliableHiddenOffsets && ((elem.style && elem.style.display) || curCSS( elem, "display" )) === "none"); + }; + + jQuery.expr.filters.visible = function( elem ) { + return !jQuery.expr.filters.hidden( elem ); + }; +} + +// These hooks are used by animate to expand properties +jQuery.each({ + margin: "", + padding: "", + border: "Width" +}, function( prefix, suffix ) { + jQuery.cssHooks[ prefix + suffix ] = { + expand: function( value ) { + var i, + + // assumes a single number if not a string + parts = typeof value === "string" ? value.split(" ") : [ value ], + expanded = {}; + + for ( i = 0; i < 4; i++ ) { + expanded[ prefix + cssExpand[ i ] + suffix ] = + parts[ i ] || parts[ i - 2 ] || parts[ 0 ]; + } + + return expanded; + } + }; + + if ( !rmargin.test( prefix ) ) { + jQuery.cssHooks[ prefix + suffix ].set = setPositiveNumber; + } +}); +var r20 = /%20/g, + rbracket = /\[\]$/, + rCRLF = /\r?\n/g, + rinput = /^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i, + rselectTextarea = /^(?:select|textarea)/i; + +jQuery.fn.extend({ + serialize: function() { + return jQuery.param( this.serializeArray() ); + }, + serializeArray: function() { + return this.map(function(){ + return this.elements ? jQuery.makeArray( this.elements ) : this; + }) + .filter(function(){ + return this.name && !this.disabled && + ( this.checked || rselectTextarea.test( this.nodeName ) || + rinput.test( this.type ) ); + }) + .map(function( i, elem ){ + var val = jQuery( this ).val(); + + return val == null ? + null : + jQuery.isArray( val ) ? + jQuery.map( val, function( val, i ){ + return { name: elem.name, value: val.replace( rCRLF, "\r\n" ) }; + }) : + { name: elem.name, value: val.replace( rCRLF, "\r\n" ) }; + }).get(); + } +}); + +//Serialize an array of form elements or a set of +//key/values into a query string +jQuery.param = function( a, traditional ) { + var prefix, + s = [], + add = function( key, value ) { + // If value is a function, invoke it and return its value + value = jQuery.isFunction( value ) ? value() : ( value == null ? "" : value ); + s[ s.length ] = encodeURIComponent( key ) + "=" + encodeURIComponent( value ); + }; + + // Set traditional to true for jQuery <= 1.3.2 behavior. + if ( traditional === undefined ) { + traditional = jQuery.ajaxSettings && jQuery.ajaxSettings.traditional; + } + + // If an array was passed in, assume that it is an array of form elements. + if ( jQuery.isArray( a ) || ( a.jquery && !jQuery.isPlainObject( a ) ) ) { + // Serialize the form elements + jQuery.each( a, function() { + add( this.name, this.value ); + }); + + } else { + // If traditional, encode the "old" way (the way 1.3.2 or older + // did it), otherwise encode params recursively. + for ( prefix in a ) { + buildParams( prefix, a[ prefix ], traditional, add ); + } + } + + // Return the resulting serialization + return s.join( "&" ).replace( r20, "+" ); +}; + +function buildParams( prefix, obj, traditional, add ) { + var name; + + if ( jQuery.isArray( obj ) ) { + // Serialize array item. + jQuery.each( obj, function( i, v ) { + if ( traditional || rbracket.test( prefix ) ) { + // Treat each array item as a scalar. + add( prefix, v ); + + } else { + // If array item is non-scalar (array or object), encode its + // numeric index to resolve deserialization ambiguity issues. + // Note that rack (as of 1.0.0) can't currently deserialize + // nested arrays properly, and attempting to do so may cause + // a server error. Possible fixes are to modify rack's + // deserialization algorithm or to provide an option or flag + // to force array serialization to be shallow. + buildParams( prefix + "[" + ( typeof v === "object" ? i : "" ) + "]", v, traditional, add ); + } + }); + + } else if ( !traditional && jQuery.type( obj ) === "object" ) { + // Serialize object item. + for ( name in obj ) { + buildParams( prefix + "[" + name + "]", obj[ name ], traditional, add ); + } + + } else { + // Serialize scalar item. + add( prefix, obj ); + } +} +var + // Document location + ajaxLocParts, + ajaxLocation, + + rhash = /#.*$/, + rheaders = /^(.*?):[ \t]*([^\r\n]*)\r?$/mg, // IE leaves an \r character at EOL + // #7653, #8125, #8152: local protocol detection + rlocalProtocol = /^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/, + rnoContent = /^(?:GET|HEAD)$/, + rprotocol = /^\/\//, + rquery = /\?/, + rscript = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, + rts = /([?&])_=[^&]*/, + rurl = /^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+)|)|)/, + + // Keep a copy of the old load method + _load = jQuery.fn.load, + + /* Prefilters + * 1) They are useful to introduce custom dataTypes (see ajax/jsonp.js for an example) + * 2) These are called: + * - BEFORE asking for a transport + * - AFTER param serialization (s.data is a string if s.processData is true) + * 3) key is the dataType + * 4) the catchall symbol "*" can be used + * 5) execution will start with transport dataType and THEN continue down to "*" if needed + */ + prefilters = {}, + + /* Transports bindings + * 1) key is the dataType + * 2) the catchall symbol "*" can be used + * 3) selection will start with transport dataType and THEN go to "*" if needed + */ + transports = {}, + + // Avoid comment-prolog char sequence (#10098); must appease lint and evade compression + allTypes = ["*/"] + ["*"]; + +// #8138, IE may throw an exception when accessing +// a field from window.location if document.domain has been set +try { + ajaxLocation = location.href; +} catch( e ) { + // Use the href attribute of an A element + // since IE will modify it given document.location + ajaxLocation = document.createElement( "a" ); + ajaxLocation.href = ""; + ajaxLocation = ajaxLocation.href; +} + +// Segment location into parts +ajaxLocParts = rurl.exec( ajaxLocation.toLowerCase() ) || []; + +// Base "constructor" for jQuery.ajaxPrefilter and jQuery.ajaxTransport +function addToPrefiltersOrTransports( structure ) { + + // dataTypeExpression is optional and defaults to "*" + return function( dataTypeExpression, func ) { + + if ( typeof dataTypeExpression !== "string" ) { + func = dataTypeExpression; + dataTypeExpression = "*"; + } + + var dataType, list, placeBefore, + dataTypes = dataTypeExpression.toLowerCase().split( core_rspace ), + i = 0, + length = dataTypes.length; + + if ( jQuery.isFunction( func ) ) { + // For each dataType in the dataTypeExpression + for ( ; i < length; i++ ) { + dataType = dataTypes[ i ]; + // We control if we're asked to add before + // any existing element + placeBefore = /^\+/.test( dataType ); + if ( placeBefore ) { + dataType = dataType.substr( 1 ) || "*"; + } + list = structure[ dataType ] = structure[ dataType ] || []; + // then we add to the structure accordingly + list[ placeBefore ? "unshift" : "push" ]( func ); + } + } + }; +} + +// Base inspection function for prefilters and transports +function inspectPrefiltersOrTransports( structure, options, originalOptions, jqXHR, + dataType /* internal */, inspected /* internal */ ) { + + dataType = dataType || options.dataTypes[ 0 ]; + inspected = inspected || {}; + + inspected[ dataType ] = true; + + var selection, + list = structure[ dataType ], + i = 0, + length = list ? list.length : 0, + executeOnly = ( structure === prefilters ); + + for ( ; i < length && ( executeOnly || !selection ); i++ ) { + selection = list[ i ]( options, originalOptions, jqXHR ); + // If we got redirected to another dataType + // we try there if executing only and not done already + if ( typeof selection === "string" ) { + if ( !executeOnly || inspected[ selection ] ) { + selection = undefined; + } else { + options.dataTypes.unshift( selection ); + selection = inspectPrefiltersOrTransports( + structure, options, originalOptions, jqXHR, selection, inspected ); + } + } + } + // If we're only executing or nothing was selected + // we try the catchall dataType if not done already + if ( ( executeOnly || !selection ) && !inspected[ "*" ] ) { + selection = inspectPrefiltersOrTransports( + structure, options, originalOptions, jqXHR, "*", inspected ); + } + // unnecessary when only executing (prefilters) + // but it'll be ignored by the caller in that case + return selection; +} + +// A special extend for ajax options +// that takes "flat" options (not to be deep extended) +// Fixes #9887 +function ajaxExtend( target, src ) { + var key, deep, + flatOptions = jQuery.ajaxSettings.flatOptions || {}; + for ( key in src ) { + if ( src[ key ] !== undefined ) { + ( flatOptions[ key ] ? target : ( deep || ( deep = {} ) ) )[ key ] = src[ key ]; + } + } + if ( deep ) { + jQuery.extend( true, target, deep ); + } +} + +jQuery.fn.load = function( url, params, callback ) { + if ( typeof url !== "string" && _load ) { + return _load.apply( this, arguments ); + } + + // Don't do a request if no elements are being requested + if ( !this.length ) { + return this; + } + + var selector, type, response, + self = this, + off = url.indexOf(" "); + + if ( off >= 0 ) { + selector = url.slice( off, url.length ); + url = url.slice( 0, off ); + } + + // If it's a function + if ( jQuery.isFunction( params ) ) { + + // We assume that it's the callback + callback = params; + params = undefined; + + // Otherwise, build a param string + } else if ( params && typeof params === "object" ) { + type = "POST"; + } + + // Request the remote document + jQuery.ajax({ + url: url, + + // if "type" variable is undefined, then "GET" method will be used + type: type, + dataType: "html", + data: params, + complete: function( jqXHR, status ) { + if ( callback ) { + self.each( callback, response || [ jqXHR.responseText, status, jqXHR ] ); + } + } + }).done(function( responseText ) { + + // Save response for use in complete callback + response = arguments; + + // See if a selector was specified + self.html( selector ? + + // Create a dummy div to hold the results + jQuery("<div>") + + // inject the contents of the document in, removing the scripts + // to avoid any 'Permission Denied' errors in IE + .append( responseText.replace( rscript, "" ) ) + + // Locate the specified elements + .find( selector ) : + + // If not, just inject the full result + responseText ); + + }); + + return this; +}; + +// Attach a bunch of functions for handling common AJAX events +jQuery.each( "ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split( " " ), function( i, o ){ + jQuery.fn[ o ] = function( f ){ + return this.on( o, f ); + }; +}); + +jQuery.each( [ "get", "post" ], function( i, method ) { + jQuery[ method ] = function( url, data, callback, type ) { + // shift arguments if data argument was omitted + if ( jQuery.isFunction( data ) ) { + type = type || callback; + callback = data; + data = undefined; + } + + return jQuery.ajax({ + type: method, + url: url, + data: data, + success: callback, + dataType: type + }); + }; +}); + +jQuery.extend({ + + getScript: function( url, callback ) { + return jQuery.get( url, undefined, callback, "script" ); + }, + + getJSON: function( url, data, callback ) { + return jQuery.get( url, data, callback, "json" ); + }, + + // Creates a full fledged settings object into target + // with both ajaxSettings and settings fields. + // If target is omitted, writes into ajaxSettings. + ajaxSetup: function( target, settings ) { + if ( settings ) { + // Building a settings object + ajaxExtend( target, jQuery.ajaxSettings ); + } else { + // Extending ajaxSettings + settings = target; + target = jQuery.ajaxSettings; + } + ajaxExtend( target, settings ); + return target; + }, + + ajaxSettings: { + url: ajaxLocation, + isLocal: rlocalProtocol.test( ajaxLocParts[ 1 ] ), + global: true, + type: "GET", + contentType: "application/x-www-form-urlencoded; charset=UTF-8", + processData: true, + async: true, + /* + timeout: 0, + data: null, + dataType: null, + username: null, + password: null, + cache: null, + throws: false, + traditional: false, + headers: {}, + */ + + accepts: { + xml: "application/xml, text/xml", + html: "text/html", + text: "text/plain", + json: "application/json, text/javascript", + "*": allTypes + }, + + contents: { + xml: /xml/, + html: /html/, + json: /json/ + }, + + responseFields: { + xml: "responseXML", + text: "responseText" + }, + + // List of data converters + // 1) key format is "source_type destination_type" (a single space in-between) + // 2) the catchall symbol "*" can be used for source_type + converters: { + + // Convert anything to text + "* text": window.String, + + // Text to html (true = no transformation) + "text html": true, + + // Evaluate text as a json expression + "text json": jQuery.parseJSON, + + // Parse text as xml + "text xml": jQuery.parseXML + }, + + // For options that shouldn't be deep extended: + // you can add your own custom options here if + // and when you create one that shouldn't be + // deep extended (see ajaxExtend) + flatOptions: { + context: true, + url: true + } + }, + + ajaxPrefilter: addToPrefiltersOrTransports( prefilters ), + ajaxTransport: addToPrefiltersOrTransports( transports ), + + // Main method + ajax: function( url, options ) { + + // If url is an object, simulate pre-1.5 signature + if ( typeof url === "object" ) { + options = url; + url = undefined; + } + + // Force options to be an object + options = options || {}; + + var // ifModified key + ifModifiedKey, + // Response headers + responseHeadersString, + responseHeaders, + // transport + transport, + // timeout handle + timeoutTimer, + // Cross-domain detection vars + parts, + // To know if global events are to be dispatched + fireGlobals, + // Loop variable + i, + // Create the final options object + s = jQuery.ajaxSetup( {}, options ), + // Callbacks context + callbackContext = s.context || s, + // Context for global events + // It's the callbackContext if one was provided in the options + // and if it's a DOM node or a jQuery collection + globalEventContext = callbackContext !== s && + ( callbackContext.nodeType || callbackContext instanceof jQuery ) ? + jQuery( callbackContext ) : jQuery.event, + // Deferreds + deferred = jQuery.Deferred(), + completeDeferred = jQuery.Callbacks( "once memory" ), + // Status-dependent callbacks + statusCode = s.statusCode || {}, + // Headers (they are sent all at once) + requestHeaders = {}, + requestHeadersNames = {}, + // The jqXHR state + state = 0, + // Default abort message + strAbort = "canceled", + // Fake xhr + jqXHR = { + + readyState: 0, + + // Caches the header + setRequestHeader: function( name, value ) { + if ( !state ) { + var lname = name.toLowerCase(); + name = requestHeadersNames[ lname ] = requestHeadersNames[ lname ] || name; + requestHeaders[ name ] = value; + } + return this; + }, + + // Raw string + getAllResponseHeaders: function() { + return state === 2 ? responseHeadersString : null; + }, + + // Builds headers hashtable if needed + getResponseHeader: function( key ) { + var match; + if ( state === 2 ) { + if ( !responseHeaders ) { + responseHeaders = {}; + while( ( match = rheaders.exec( responseHeadersString ) ) ) { + responseHeaders[ match[1].toLowerCase() ] = match[ 2 ]; + } + } + match = responseHeaders[ key.toLowerCase() ]; + } + return match === undefined ? null : match; + }, + + // Overrides response content-type header + overrideMimeType: function( type ) { + if ( !state ) { + s.mimeType = type; + } + return this; + }, + + // Cancel the request + abort: function( statusText ) { + statusText = statusText || strAbort; + if ( transport ) { + transport.abort( statusText ); + } + done( 0, statusText ); + return this; + } + }; + + // Callback for when everything is done + // It is defined here because jslint complains if it is declared + // at the end of the function (which would be more logical and readable) + function done( status, nativeStatusText, responses, headers ) { + var isSuccess, success, error, response, modified, + statusText = nativeStatusText; + + // Called once + if ( state === 2 ) { + return; + } + + // State is "done" now + state = 2; + + // Clear timeout if it exists + if ( timeoutTimer ) { + clearTimeout( timeoutTimer ); + } + + // Dereference transport for early garbage collection + // (no matter how long the jqXHR object will be used) + transport = undefined; + + // Cache response headers + responseHeadersString = headers || ""; + + // Set readyState + jqXHR.readyState = status > 0 ? 4 : 0; + + // Get response data + if ( responses ) { + response = ajaxHandleResponses( s, jqXHR, responses ); + } + + // If successful, handle type chaining + if ( status >= 200 && status < 300 || status === 304 ) { + + // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode. + if ( s.ifModified ) { + + modified = jqXHR.getResponseHeader("Last-Modified"); + if ( modified ) { + jQuery.lastModified[ ifModifiedKey ] = modified; + } + modified = jqXHR.getResponseHeader("Etag"); + if ( modified ) { + jQuery.etag[ ifModifiedKey ] = modified; + } + } + + // If not modified + if ( status === 304 ) { + + statusText = "notmodified"; + isSuccess = true; + + // If we have data + } else { + + isSuccess = ajaxConvert( s, response ); + statusText = isSuccess.state; + success = isSuccess.data; + error = isSuccess.error; + isSuccess = !error; + } + } else { + // We extract error from statusText + // then normalize statusText and status for non-aborts + error = statusText; + if ( !statusText || status ) { + statusText = "error"; + if ( status < 0 ) { + status = 0; + } + } + } + + // Set data for the fake xhr object + jqXHR.status = status; + jqXHR.statusText = ( nativeStatusText || statusText ) + ""; + + // Success/Error + if ( isSuccess ) { + deferred.resolveWith( callbackContext, [ success, statusText, jqXHR ] ); + } else { + deferred.rejectWith( callbackContext, [ jqXHR, statusText, error ] ); + } + + // Status-dependent callbacks + jqXHR.statusCode( statusCode ); + statusCode = undefined; + + if ( fireGlobals ) { + globalEventContext.trigger( "ajax" + ( isSuccess ? "Success" : "Error" ), + [ jqXHR, s, isSuccess ? success : error ] ); + } + + // Complete + completeDeferred.fireWith( callbackContext, [ jqXHR, statusText ] ); + + if ( fireGlobals ) { + globalEventContext.trigger( "ajaxComplete", [ jqXHR, s ] ); + // Handle the global AJAX counter + if ( !( --jQuery.active ) ) { + jQuery.event.trigger( "ajaxStop" ); + } + } + } + + // Attach deferreds + deferred.promise( jqXHR ); + jqXHR.success = jqXHR.done; + jqXHR.error = jqXHR.fail; + jqXHR.complete = completeDeferred.add; + + // Status-dependent callbacks + jqXHR.statusCode = function( map ) { + if ( map ) { + var tmp; + if ( state < 2 ) { + for ( tmp in map ) { + statusCode[ tmp ] = [ statusCode[tmp], map[tmp] ]; + } + } else { + tmp = map[ jqXHR.status ]; + jqXHR.always( tmp ); + } + } + return this; + }; + + // Remove hash character (#7531: and string promotion) + // Add protocol if not provided (#5866: IE7 issue with protocol-less urls) + // We also use the url parameter if available + s.url = ( ( url || s.url ) + "" ).replace( rhash, "" ).replace( rprotocol, ajaxLocParts[ 1 ] + "//" ); + + // Extract dataTypes list + s.dataTypes = jQuery.trim( s.dataType || "*" ).toLowerCase().split( core_rspace ); + + // A cross-domain request is in order when we have a protocol:host:port mismatch + if ( s.crossDomain == null ) { + parts = rurl.exec( s.url.toLowerCase() ); + s.crossDomain = !!( parts && + ( parts[ 1 ] !== ajaxLocParts[ 1 ] || parts[ 2 ] !== ajaxLocParts[ 2 ] || + ( parts[ 3 ] || ( parts[ 1 ] === "http:" ? 80 : 443 ) ) != + ( ajaxLocParts[ 3 ] || ( ajaxLocParts[ 1 ] === "http:" ? 80 : 443 ) ) ) + ); + } + + // Convert data if not already a string + if ( s.data && s.processData && typeof s.data !== "string" ) { + s.data = jQuery.param( s.data, s.traditional ); + } + + // Apply prefilters + inspectPrefiltersOrTransports( prefilters, s, options, jqXHR ); + + // If request was aborted inside a prefilter, stop there + if ( state === 2 ) { + return jqXHR; + } + + // We can fire global events as of now if asked to + fireGlobals = s.global; + + // Uppercase the type + s.type = s.type.toUpperCase(); + + // Determine if request has content + s.hasContent = !rnoContent.test( s.type ); + + // Watch for a new set of requests + if ( fireGlobals && jQuery.active++ === 0 ) { + jQuery.event.trigger( "ajaxStart" ); + } + + // More options handling for requests with no content + if ( !s.hasContent ) { + + // If data is available, append data to url + if ( s.data ) { + s.url += ( rquery.test( s.url ) ? "&" : "?" ) + s.data; + // #9682: remove data so that it's not used in an eventual retry + delete s.data; + } + + // Get ifModifiedKey before adding the anti-cache parameter + ifModifiedKey = s.url; + + // Add anti-cache in url if needed + if ( s.cache === false ) { + + var ts = jQuery.now(), + // try replacing _= if it is there + ret = s.url.replace( rts, "$1_=" + ts ); + + // if nothing was replaced, add timestamp to the end + s.url = ret + ( ( ret === s.url ) ? ( rquery.test( s.url ) ? "&" : "?" ) + "_=" + ts : "" ); + } + } + + // Set the correct header, if data is being sent + if ( s.data && s.hasContent && s.contentType !== false || options.contentType ) { + jqXHR.setRequestHeader( "Content-Type", s.contentType ); + } + + // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode. + if ( s.ifModified ) { + ifModifiedKey = ifModifiedKey || s.url; + if ( jQuery.lastModified[ ifModifiedKey ] ) { + jqXHR.setRequestHeader( "If-Modified-Since", jQuery.lastModified[ ifModifiedKey ] ); + } + if ( jQuery.etag[ ifModifiedKey ] ) { + jqXHR.setRequestHeader( "If-None-Match", jQuery.etag[ ifModifiedKey ] ); + } + } + + // Set the Accepts header for the server, depending on the dataType + jqXHR.setRequestHeader( + "Accept", + s.dataTypes[ 0 ] && s.accepts[ s.dataTypes[0] ] ? + s.accepts[ s.dataTypes[0] ] + ( s.dataTypes[ 0 ] !== "*" ? ", " + allTypes + "; q=0.01" : "" ) : + s.accepts[ "*" ] + ); + + // Check for headers option + for ( i in s.headers ) { + jqXHR.setRequestHeader( i, s.headers[ i ] ); + } + + // Allow custom headers/mimetypes and early abort + if ( s.beforeSend && ( s.beforeSend.call( callbackContext, jqXHR, s ) === false || state === 2 ) ) { + // Abort if not done already and return + return jqXHR.abort(); + + } + + // aborting is no longer a cancellation + strAbort = "abort"; + + // Install callbacks on deferreds + for ( i in { success: 1, error: 1, complete: 1 } ) { + jqXHR[ i ]( s[ i ] ); + } + + // Get transport + transport = inspectPrefiltersOrTransports( transports, s, options, jqXHR ); + + // If no transport, we auto-abort + if ( !transport ) { + done( -1, "No Transport" ); + } else { + jqXHR.readyState = 1; + // Send global event + if ( fireGlobals ) { + globalEventContext.trigger( "ajaxSend", [ jqXHR, s ] ); + } + // Timeout + if ( s.async && s.timeout > 0 ) { + timeoutTimer = setTimeout( function(){ + jqXHR.abort( "timeout" ); + }, s.timeout ); + } + + try { + state = 1; + transport.send( requestHeaders, done ); + } catch (e) { + // Propagate exception as error if not done + if ( state < 2 ) { + done( -1, e ); + // Simply rethrow otherwise + } else { + throw e; + } + } + } + + return jqXHR; + }, + + // Counter for holding the number of active queries + active: 0, + + // Last-Modified header cache for next request + lastModified: {}, + etag: {} + +}); + +/* Handles responses to an ajax request: + * - sets all responseXXX fields accordingly + * - finds the right dataType (mediates between content-type and expected dataType) + * - returns the corresponding response + */ +function ajaxHandleResponses( s, jqXHR, responses ) { + + var ct, type, finalDataType, firstDataType, + contents = s.contents, + dataTypes = s.dataTypes, + responseFields = s.responseFields; + + // Fill responseXXX fields + for ( type in responseFields ) { + if ( type in responses ) { + jqXHR[ responseFields[type] ] = responses[ type ]; + } + } + + // Remove auto dataType and get content-type in the process + while( dataTypes[ 0 ] === "*" ) { + dataTypes.shift(); + if ( ct === undefined ) { + ct = s.mimeType || jqXHR.getResponseHeader( "content-type" ); + } + } + + // Check if we're dealing with a known content-type + if ( ct ) { + for ( type in contents ) { + if ( contents[ type ] && contents[ type ].test( ct ) ) { + dataTypes.unshift( type ); + break; + } + } + } + + // Check to see if we have a response for the expected dataType + if ( dataTypes[ 0 ] in responses ) { + finalDataType = dataTypes[ 0 ]; + } else { + // Try convertible dataTypes + for ( type in responses ) { + if ( !dataTypes[ 0 ] || s.converters[ type + " " + dataTypes[0] ] ) { + finalDataType = type; + break; + } + if ( !firstDataType ) { + firstDataType = type; + } + } + // Or just use first one + finalDataType = finalDataType || firstDataType; + } + + // If we found a dataType + // We add the dataType to the list if needed + // and return the corresponding response + if ( finalDataType ) { + if ( finalDataType !== dataTypes[ 0 ] ) { + dataTypes.unshift( finalDataType ); + } + return responses[ finalDataType ]; + } +} + +// Chain conversions given the request and the original response +function ajaxConvert( s, response ) { + + var conv, conv2, current, tmp, + // Work with a copy of dataTypes in case we need to modify it for conversion + dataTypes = s.dataTypes.slice(), + prev = dataTypes[ 0 ], + converters = {}, + i = 0; + + // Apply the dataFilter if provided + if ( s.dataFilter ) { + response = s.dataFilter( response, s.dataType ); + } + + // Create converters map with lowercased keys + if ( dataTypes[ 1 ] ) { + for ( conv in s.converters ) { + converters[ conv.toLowerCase() ] = s.converters[ conv ]; + } + } + + // Convert to each sequential dataType, tolerating list modification + for ( ; (current = dataTypes[++i]); ) { + + // There's only work to do if current dataType is non-auto + if ( current !== "*" ) { + + // Convert response if prev dataType is non-auto and differs from current + if ( prev !== "*" && prev !== current ) { + + // Seek a direct converter + conv = converters[ prev + " " + current ] || converters[ "* " + current ]; + + // If none found, seek a pair + if ( !conv ) { + for ( conv2 in converters ) { + + // If conv2 outputs current + tmp = conv2.split(" "); + if ( tmp[ 1 ] === current ) { + + // If prev can be converted to accepted input + conv = converters[ prev + " " + tmp[ 0 ] ] || + converters[ "* " + tmp[ 0 ] ]; + if ( conv ) { + // Condense equivalence converters + if ( conv === true ) { + conv = converters[ conv2 ]; + + // Otherwise, insert the intermediate dataType + } else if ( converters[ conv2 ] !== true ) { + current = tmp[ 0 ]; + dataTypes.splice( i--, 0, current ); + } + + break; + } + } + } + } + + // Apply converter (if not an equivalence) + if ( conv !== true ) { + + // Unless errors are allowed to bubble, catch and return them + if ( conv && s["throws"] ) { + response = conv( response ); + } else { + try { + response = conv( response ); + } catch ( e ) { + return { state: "parsererror", error: conv ? e : "No conversion from " + prev + " to " + current }; + } + } + } + } + + // Update prev for next iteration + prev = current; + } + } + + return { state: "success", data: response }; +} +var oldCallbacks = [], + rquestion = /\?/, + rjsonp = /(=)\?(?=&|$)|\?\?/, + nonce = jQuery.now(); + +// Default jsonp settings +jQuery.ajaxSetup({ + jsonp: "callback", + jsonpCallback: function() { + var callback = oldCallbacks.pop() || ( jQuery.expando + "_" + ( nonce++ ) ); + this[ callback ] = true; + return callback; + } +}); + +// Detect, normalize options and install callbacks for jsonp requests +jQuery.ajaxPrefilter( "json jsonp", function( s, originalSettings, jqXHR ) { + + var callbackName, overwritten, responseContainer, + data = s.data, + url = s.url, + hasCallback = s.jsonp !== false, + replaceInUrl = hasCallback && rjsonp.test( url ), + replaceInData = hasCallback && !replaceInUrl && typeof data === "string" && + !( s.contentType || "" ).indexOf("application/x-www-form-urlencoded") && + rjsonp.test( data ); + + // Handle iff the expected data type is "jsonp" or we have a parameter to set + if ( s.dataTypes[ 0 ] === "jsonp" || replaceInUrl || replaceInData ) { + + // Get callback name, remembering preexisting value associated with it + callbackName = s.jsonpCallback = jQuery.isFunction( s.jsonpCallback ) ? + s.jsonpCallback() : + s.jsonpCallback; + overwritten = window[ callbackName ]; + + // Insert callback into url or form data + if ( replaceInUrl ) { + s.url = url.replace( rjsonp, "$1" + callbackName ); + } else if ( replaceInData ) { + s.data = data.replace( rjsonp, "$1" + callbackName ); + } else if ( hasCallback ) { + s.url += ( rquestion.test( url ) ? "&" : "?" ) + s.jsonp + "=" + callbackName; + } + + // Use data converter to retrieve json after script execution + s.converters["script json"] = function() { + if ( !responseContainer ) { + jQuery.error( callbackName + " was not called" ); + } + return responseContainer[ 0 ]; + }; + + // force json dataType + s.dataTypes[ 0 ] = "json"; + + // Install callback + window[ callbackName ] = function() { + responseContainer = arguments; + }; + + // Clean-up function (fires after converters) + jqXHR.always(function() { + // Restore preexisting value + window[ callbackName ] = overwritten; + + // Save back as free + if ( s[ callbackName ] ) { + // make sure that re-using the options doesn't screw things around + s.jsonpCallback = originalSettings.jsonpCallback; + + // save the callback name for future use + oldCallbacks.push( callbackName ); + } + + // Call if it was a function and we have a response + if ( responseContainer && jQuery.isFunction( overwritten ) ) { + overwritten( responseContainer[ 0 ] ); + } + + responseContainer = overwritten = undefined; + }); + + // Delegate to script + return "script"; + } +}); +// Install script dataType +jQuery.ajaxSetup({ + accepts: { + script: "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript" + }, + contents: { + script: /javascript|ecmascript/ + }, + converters: { + "text script": function( text ) { + jQuery.globalEval( text ); + return text; + } + } +}); + +// Handle cache's special case and global +jQuery.ajaxPrefilter( "script", function( s ) { + if ( s.cache === undefined ) { + s.cache = false; + } + if ( s.crossDomain ) { + s.type = "GET"; + s.global = false; + } +}); + +// Bind script tag hack transport +jQuery.ajaxTransport( "script", function(s) { + + // This transport only deals with cross domain requests + if ( s.crossDomain ) { + + var script, + head = document.head || document.getElementsByTagName( "head" )[0] || document.documentElement; + + return { + + send: function( _, callback ) { + + script = document.createElement( "script" ); + + script.async = "async"; + + if ( s.scriptCharset ) { + script.charset = s.scriptCharset; + } + + script.src = s.url; + + // Attach handlers for all browsers + script.onload = script.onreadystatechange = function( _, isAbort ) { + + if ( isAbort || !script.readyState || /loaded|complete/.test( script.readyState ) ) { + + // Handle memory leak in IE + script.onload = script.onreadystatechange = null; + + // Remove the script + if ( head && script.parentNode ) { + head.removeChild( script ); + } + + // Dereference the script + script = undefined; + + // Callback if not abort + if ( !isAbort ) { + callback( 200, "success" ); + } + } + }; + // Use insertBefore instead of appendChild to circumvent an IE6 bug. + // This arises when a base node is used (#2709 and #4378). + head.insertBefore( script, head.firstChild ); + }, + + abort: function() { + if ( script ) { + script.onload( 0, 1 ); + } + } + }; + } +}); +var xhrCallbacks, + // #5280: Internet Explorer will keep connections alive if we don't abort on unload + xhrOnUnloadAbort = window.ActiveXObject ? function() { + // Abort all pending requests + for ( var key in xhrCallbacks ) { + xhrCallbacks[ key ]( 0, 1 ); + } + } : false, + xhrId = 0; + +// Functions to create xhrs +function createStandardXHR() { + try { + return new window.XMLHttpRequest(); + } catch( e ) {} +} + +function createActiveXHR() { + try { + return new window.ActiveXObject( "Microsoft.XMLHTTP" ); + } catch( e ) {} +} + +// Create the request object +// (This is still attached to ajaxSettings for backward compatibility) +jQuery.ajaxSettings.xhr = window.ActiveXObject ? + /* Microsoft failed to properly + * implement the XMLHttpRequest in IE7 (can't request local files), + * so we use the ActiveXObject when it is available + * Additionally XMLHttpRequest can be disabled in IE7/IE8 so + * we need a fallback. + */ + function() { + return !this.isLocal && createStandardXHR() || createActiveXHR(); + } : + // For all other browsers, use the standard XMLHttpRequest object + createStandardXHR; + +// Determine support properties +(function( xhr ) { + jQuery.extend( jQuery.support, { + ajax: !!xhr, + cors: !!xhr && ( "withCredentials" in xhr ) + }); +})( jQuery.ajaxSettings.xhr() ); + +// Create transport if the browser can provide an xhr +if ( jQuery.support.ajax ) { + + jQuery.ajaxTransport(function( s ) { + // Cross domain only allowed if supported through XMLHttpRequest + if ( !s.crossDomain || jQuery.support.cors ) { + + var callback; + + return { + send: function( headers, complete ) { + + // Get a new xhr + var handle, i, + xhr = s.xhr(); + + // Open the socket + // Passing null username, generates a login popup on Opera (#2865) + if ( s.username ) { + xhr.open( s.type, s.url, s.async, s.username, s.password ); + } else { + xhr.open( s.type, s.url, s.async ); + } + + // Apply custom fields if provided + if ( s.xhrFields ) { + for ( i in s.xhrFields ) { + xhr[ i ] = s.xhrFields[ i ]; + } + } + + // Override mime type if needed + if ( s.mimeType && xhr.overrideMimeType ) { + xhr.overrideMimeType( s.mimeType ); + } + + // X-Requested-With header + // For cross-domain requests, seeing as conditions for a preflight are + // akin to a jigsaw puzzle, we simply never set it to be sure. + // (it can always be set on a per-request basis or even using ajaxSetup) + // For same-domain requests, won't change header if already provided. + if ( !s.crossDomain && !headers["X-Requested-With"] ) { + headers[ "X-Requested-With" ] = "XMLHttpRequest"; + } + + // Need an extra try/catch for cross domain requests in Firefox 3 + try { + for ( i in headers ) { + xhr.setRequestHeader( i, headers[ i ] ); + } + } catch( _ ) {} + + // Do send the request + // This may raise an exception which is actually + // handled in jQuery.ajax (so no try/catch here) + xhr.send( ( s.hasContent && s.data ) || null ); + + // Listener + callback = function( _, isAbort ) { + + var status, + statusText, + responseHeaders, + responses, + xml; + + // Firefox throws exceptions when accessing properties + // of an xhr when a network error occurred + // http://helpful.knobs-dials.com/index.php/Component_returned_failure_code:_0x80040111_(NS_ERROR_NOT_AVAILABLE) + try { + + // Was never called and is aborted or complete + if ( callback && ( isAbort || xhr.readyState === 4 ) ) { + + // Only called once + callback = undefined; + + // Do not keep as active anymore + if ( handle ) { + xhr.onreadystatechange = jQuery.noop; + if ( xhrOnUnloadAbort ) { + delete xhrCallbacks[ handle ]; + } + } + + // If it's an abort + if ( isAbort ) { + // Abort it manually if needed + if ( xhr.readyState !== 4 ) { + xhr.abort(); + } + } else { + status = xhr.status; + responseHeaders = xhr.getAllResponseHeaders(); + responses = {}; + xml = xhr.responseXML; + + // Construct response list + if ( xml && xml.documentElement /* #4958 */ ) { + responses.xml = xml; + } + + // When requesting binary data, IE6-9 will throw an exception + // on any attempt to access responseText (#11426) + try { + responses.text = xhr.responseText; + } catch( e ) { + } + + // Firefox throws an exception when accessing + // statusText for faulty cross-domain requests + try { + statusText = xhr.statusText; + } catch( e ) { + // We normalize with Webkit giving an empty statusText + statusText = ""; + } + + // Filter status for non standard behaviors + + // If the request is local and we have data: assume a success + // (success with no data won't get notified, that's the best we + // can do given current implementations) + if ( !status && s.isLocal && !s.crossDomain ) { + status = responses.text ? 200 : 404; + // IE - #1450: sometimes returns 1223 when it should be 204 + } else if ( status === 1223 ) { + status = 204; + } + } + } + } catch( firefoxAccessException ) { + if ( !isAbort ) { + complete( -1, firefoxAccessException ); + } + } + + // Call complete if needed + if ( responses ) { + complete( status, statusText, responses, responseHeaders ); + } + }; + + if ( !s.async ) { + // if we're in sync mode we fire the callback + callback(); + } else if ( xhr.readyState === 4 ) { + // (IE6 & IE7) if it's in cache and has been + // retrieved directly we need to fire the callback + setTimeout( callback, 0 ); + } else { + handle = ++xhrId; + if ( xhrOnUnloadAbort ) { + // Create the active xhrs callbacks list if needed + // and attach the unload handler + if ( !xhrCallbacks ) { + xhrCallbacks = {}; + jQuery( window ).unload( xhrOnUnloadAbort ); + } + // Add to list of active xhrs callbacks + xhrCallbacks[ handle ] = callback; + } + xhr.onreadystatechange = callback; + } + }, + + abort: function() { + if ( callback ) { + callback(0,1); + } + } + }; + } + }); +} +var fxNow, timerId, + rfxtypes = /^(?:toggle|show|hide)$/, + rfxnum = new RegExp( "^(?:([-+])=|)(" + core_pnum + ")([a-z%]*)$", "i" ), + rrun = /queueHooks$/, + animationPrefilters = [ defaultPrefilter ], + tweeners = { + "*": [function( prop, value ) { + var end, unit, + tween = this.createTween( prop, value ), + parts = rfxnum.exec( value ), + target = tween.cur(), + start = +target || 0, + scale = 1, + maxIterations = 20; + + if ( parts ) { + end = +parts[2]; + unit = parts[3] || ( jQuery.cssNumber[ prop ] ? "" : "px" ); + + // We need to compute starting value + if ( unit !== "px" && start ) { + // Iteratively approximate from a nonzero starting point + // Prefer the current property, because this process will be trivial if it uses the same units + // Fallback to end or a simple constant + start = jQuery.css( tween.elem, prop, true ) || end || 1; + + do { + // If previous iteration zeroed out, double until we get *something* + // Use a string for doubling factor so we don't accidentally see scale as unchanged below + scale = scale || ".5"; + + // Adjust and apply + start = start / scale; + jQuery.style( tween.elem, prop, start + unit ); + + // Update scale, tolerating zero or NaN from tween.cur() + // And breaking the loop if scale is unchanged or perfect, or if we've just had enough + } while ( scale !== (scale = tween.cur() / target) && scale !== 1 && --maxIterations ); + } + + tween.unit = unit; + tween.start = start; + // If a +=/-= token was provided, we're doing a relative animation + tween.end = parts[1] ? start + ( parts[1] + 1 ) * end : end; + } + return tween; + }] + }; + +// Animations created synchronously will run synchronously +function createFxNow() { + setTimeout(function() { + fxNow = undefined; + }, 0 ); + return ( fxNow = jQuery.now() ); +} + +function createTweens( animation, props ) { + jQuery.each( props, function( prop, value ) { + var collection = ( tweeners[ prop ] || [] ).concat( tweeners[ "*" ] ), + index = 0, + length = collection.length; + for ( ; index < length; index++ ) { + if ( collection[ index ].call( animation, prop, value ) ) { + + // we're done with this property + return; + } + } + }); +} + +function Animation( elem, properties, options ) { + var result, + index = 0, + tweenerIndex = 0, + length = animationPrefilters.length, + deferred = jQuery.Deferred().always( function() { + // don't match elem in the :animated selector + delete tick.elem; + }), + tick = function() { + var currentTime = fxNow || createFxNow(), + remaining = Math.max( 0, animation.startTime + animation.duration - currentTime ), + // archaic crash bug won't allow us to use 1 - ( 0.5 || 0 ) (#12497) + temp = remaining / animation.duration || 0, + percent = 1 - temp, + index = 0, + length = animation.tweens.length; + + for ( ; index < length ; index++ ) { + animation.tweens[ index ].run( percent ); + } + + deferred.notifyWith( elem, [ animation, percent, remaining ]); + + if ( percent < 1 && length ) { + return remaining; + } else { + deferred.resolveWith( elem, [ animation ] ); + return false; + } + }, + animation = deferred.promise({ + elem: elem, + props: jQuery.extend( {}, properties ), + opts: jQuery.extend( true, { specialEasing: {} }, options ), + originalProperties: properties, + originalOptions: options, + startTime: fxNow || createFxNow(), + duration: options.duration, + tweens: [], + createTween: function( prop, end, easing ) { + var tween = jQuery.Tween( elem, animation.opts, prop, end, + animation.opts.specialEasing[ prop ] || animation.opts.easing ); + animation.tweens.push( tween ); + return tween; + }, + stop: function( gotoEnd ) { + var index = 0, + // if we are going to the end, we want to run all the tweens + // otherwise we skip this part + length = gotoEnd ? animation.tweens.length : 0; + + for ( ; index < length ; index++ ) { + animation.tweens[ index ].run( 1 ); + } + + // resolve when we played the last frame + // otherwise, reject + if ( gotoEnd ) { + deferred.resolveWith( elem, [ animation, gotoEnd ] ); + } else { + deferred.rejectWith( elem, [ animation, gotoEnd ] ); + } + return this; + } + }), + props = animation.props; + + propFilter( props, animation.opts.specialEasing ); + + for ( ; index < length ; index++ ) { + result = animationPrefilters[ index ].call( animation, elem, props, animation.opts ); + if ( result ) { + return result; + } + } + + createTweens( animation, props ); + + if ( jQuery.isFunction( animation.opts.start ) ) { + animation.opts.start.call( elem, animation ); + } + + jQuery.fx.timer( + jQuery.extend( tick, { + anim: animation, + queue: animation.opts.queue, + elem: elem + }) + ); + + // attach callbacks from options + return animation.progress( animation.opts.progress ) + .done( animation.opts.done, animation.opts.complete ) + .fail( animation.opts.fail ) + .always( animation.opts.always ); +} + +function propFilter( props, specialEasing ) { + var index, name, easing, value, hooks; + + // camelCase, specialEasing and expand cssHook pass + for ( index in props ) { + name = jQuery.camelCase( index ); + easing = specialEasing[ name ]; + value = props[ index ]; + if ( jQuery.isArray( value ) ) { + easing = value[ 1 ]; + value = props[ index ] = value[ 0 ]; + } + + if ( index !== name ) { + props[ name ] = value; + delete props[ index ]; + } + + hooks = jQuery.cssHooks[ name ]; + if ( hooks && "expand" in hooks ) { + value = hooks.expand( value ); + delete props[ name ]; + + // not quite $.extend, this wont overwrite keys already present. + // also - reusing 'index' from above because we have the correct "name" + for ( index in value ) { + if ( !( index in props ) ) { + props[ index ] = value[ index ]; + specialEasing[ index ] = easing; + } + } + } else { + specialEasing[ name ] = easing; + } + } +} + +jQuery.Animation = jQuery.extend( Animation, { + + tweener: function( props, callback ) { + if ( jQuery.isFunction( props ) ) { + callback = props; + props = [ "*" ]; + } else { + props = props.split(" "); + } + + var prop, + index = 0, + length = props.length; + + for ( ; index < length ; index++ ) { + prop = props[ index ]; + tweeners[ prop ] = tweeners[ prop ] || []; + tweeners[ prop ].unshift( callback ); + } + }, + + prefilter: function( callback, prepend ) { + if ( prepend ) { + animationPrefilters.unshift( callback ); + } else { + animationPrefilters.push( callback ); + } + } +}); + +function defaultPrefilter( elem, props, opts ) { + var index, prop, value, length, dataShow, toggle, tween, hooks, oldfire, + anim = this, + style = elem.style, + orig = {}, + handled = [], + hidden = elem.nodeType && isHidden( elem ); + + // handle queue: false promises + if ( !opts.queue ) { + hooks = jQuery._queueHooks( elem, "fx" ); + if ( hooks.unqueued == null ) { + hooks.unqueued = 0; + oldfire = hooks.empty.fire; + hooks.empty.fire = function() { + if ( !hooks.unqueued ) { + oldfire(); + } + }; + } + hooks.unqueued++; + + anim.always(function() { + // doing this makes sure that the complete handler will be called + // before this completes + anim.always(function() { + hooks.unqueued--; + if ( !jQuery.queue( elem, "fx" ).length ) { + hooks.empty.fire(); + } + }); + }); + } + + // height/width overflow pass + if ( elem.nodeType === 1 && ( "height" in props || "width" in props ) ) { + // Make sure that nothing sneaks out + // Record all 3 overflow attributes because IE does not + // change the overflow attribute when overflowX and + // overflowY are set to the same value + opts.overflow = [ style.overflow, style.overflowX, style.overflowY ]; + + // Set display property to inline-block for height/width + // animations on inline elements that are having width/height animated + if ( jQuery.css( elem, "display" ) === "inline" && + jQuery.css( elem, "float" ) === "none" ) { + + // inline-level elements accept inline-block; + // block-level elements need to be inline with layout + if ( !jQuery.support.inlineBlockNeedsLayout || css_defaultDisplay( elem.nodeName ) === "inline" ) { + style.display = "inline-block"; + + } else { + style.zoom = 1; + } + } + } + + if ( opts.overflow ) { + style.overflow = "hidden"; + if ( !jQuery.support.shrinkWrapBlocks ) { + anim.done(function() { + style.overflow = opts.overflow[ 0 ]; + style.overflowX = opts.overflow[ 1 ]; + style.overflowY = opts.overflow[ 2 ]; + }); + } + } + + + // show/hide pass + for ( index in props ) { + value = props[ index ]; + if ( rfxtypes.exec( value ) ) { + delete props[ index ]; + toggle = toggle || value === "toggle"; + if ( value === ( hidden ? "hide" : "show" ) ) { + continue; + } + handled.push( index ); + } + } + + length = handled.length; + if ( length ) { + dataShow = jQuery._data( elem, "fxshow" ) || jQuery._data( elem, "fxshow", {} ); + if ( "hidden" in dataShow ) { + hidden = dataShow.hidden; + } + + // store state if its toggle - enables .stop().toggle() to "reverse" + if ( toggle ) { + dataShow.hidden = !hidden; + } + if ( hidden ) { + jQuery( elem ).show(); + } else { + anim.done(function() { + jQuery( elem ).hide(); + }); + } + anim.done(function() { + var prop; + jQuery.removeData( elem, "fxshow", true ); + for ( prop in orig ) { + jQuery.style( elem, prop, orig[ prop ] ); + } + }); + for ( index = 0 ; index < length ; index++ ) { + prop = handled[ index ]; + tween = anim.createTween( prop, hidden ? dataShow[ prop ] : 0 ); + orig[ prop ] = dataShow[ prop ] || jQuery.style( elem, prop ); + + if ( !( prop in dataShow ) ) { + dataShow[ prop ] = tween.start; + if ( hidden ) { + tween.end = tween.start; + tween.start = prop === "width" || prop === "height" ? 1 : 0; + } + } + } + } +} + +function Tween( elem, options, prop, end, easing ) { + return new Tween.prototype.init( elem, options, prop, end, easing ); +} +jQuery.Tween = Tween; + +Tween.prototype = { + constructor: Tween, + init: function( elem, options, prop, end, easing, unit ) { + this.elem = elem; + this.prop = prop; + this.easing = easing || "swing"; + this.options = options; + this.start = this.now = this.cur(); + this.end = end; + this.unit = unit || ( jQuery.cssNumber[ prop ] ? "" : "px" ); + }, + cur: function() { + var hooks = Tween.propHooks[ this.prop ]; + + return hooks && hooks.get ? + hooks.get( this ) : + Tween.propHooks._default.get( this ); + }, + run: function( percent ) { + var eased, + hooks = Tween.propHooks[ this.prop ]; + + if ( this.options.duration ) { + this.pos = eased = jQuery.easing[ this.easing ]( + percent, this.options.duration * percent, 0, 1, this.options.duration + ); + } else { + this.pos = eased = percent; + } + this.now = ( this.end - this.start ) * eased + this.start; + + if ( this.options.step ) { + this.options.step.call( this.elem, this.now, this ); + } + + if ( hooks && hooks.set ) { + hooks.set( this ); + } else { + Tween.propHooks._default.set( this ); + } + return this; + } +}; + +Tween.prototype.init.prototype = Tween.prototype; + +Tween.propHooks = { + _default: { + get: function( tween ) { + var result; + + if ( tween.elem[ tween.prop ] != null && + (!tween.elem.style || tween.elem.style[ tween.prop ] == null) ) { + return tween.elem[ tween.prop ]; + } + + // passing any value as a 4th parameter to .css will automatically + // attempt a parseFloat and fallback to a string if the parse fails + // so, simple values such as "10px" are parsed to Float. + // complex values such as "rotate(1rad)" are returned as is. + result = jQuery.css( tween.elem, tween.prop, false, "" ); + // Empty strings, null, undefined and "auto" are converted to 0. + return !result || result === "auto" ? 0 : result; + }, + set: function( tween ) { + // use step hook for back compat - use cssHook if its there - use .style if its + // available and use plain properties where available + if ( jQuery.fx.step[ tween.prop ] ) { + jQuery.fx.step[ tween.prop ]( tween ); + } else if ( tween.elem.style && ( tween.elem.style[ jQuery.cssProps[ tween.prop ] ] != null || jQuery.cssHooks[ tween.prop ] ) ) { + jQuery.style( tween.elem, tween.prop, tween.now + tween.unit ); + } else { + tween.elem[ tween.prop ] = tween.now; + } + } + } +}; + +// Remove in 2.0 - this supports IE8's panic based approach +// to setting things on disconnected nodes + +Tween.propHooks.scrollTop = Tween.propHooks.scrollLeft = { + set: function( tween ) { + if ( tween.elem.nodeType && tween.elem.parentNode ) { + tween.elem[ tween.prop ] = tween.now; + } + } +}; + +jQuery.each([ "toggle", "show", "hide" ], function( i, name ) { + var cssFn = jQuery.fn[ name ]; + jQuery.fn[ name ] = function( speed, easing, callback ) { + return speed == null || typeof speed === "boolean" || + // special check for .toggle( handler, handler, ... ) + ( !i && jQuery.isFunction( speed ) && jQuery.isFunction( easing ) ) ? + cssFn.apply( this, arguments ) : + this.animate( genFx( name, true ), speed, easing, callback ); + }; +}); + +jQuery.fn.extend({ + fadeTo: function( speed, to, easing, callback ) { + + // show any hidden elements after setting opacity to 0 + return this.filter( isHidden ).css( "opacity", 0 ).show() + + // animate to the value specified + .end().animate({ opacity: to }, speed, easing, callback ); + }, + animate: function( prop, speed, easing, callback ) { + var empty = jQuery.isEmptyObject( prop ), + optall = jQuery.speed( speed, easing, callback ), + doAnimation = function() { + // Operate on a copy of prop so per-property easing won't be lost + var anim = Animation( this, jQuery.extend( {}, prop ), optall ); + + // Empty animations resolve immediately + if ( empty ) { + anim.stop( true ); + } + }; + + return empty || optall.queue === false ? + this.each( doAnimation ) : + this.queue( optall.queue, doAnimation ); + }, + stop: function( type, clearQueue, gotoEnd ) { + var stopQueue = function( hooks ) { + var stop = hooks.stop; + delete hooks.stop; + stop( gotoEnd ); + }; + + if ( typeof type !== "string" ) { + gotoEnd = clearQueue; + clearQueue = type; + type = undefined; + } + if ( clearQueue && type !== false ) { + this.queue( type || "fx", [] ); + } + + return this.each(function() { + var dequeue = true, + index = type != null && type + "queueHooks", + timers = jQuery.timers, + data = jQuery._data( this ); + + if ( index ) { + if ( data[ index ] && data[ index ].stop ) { + stopQueue( data[ index ] ); + } + } else { + for ( index in data ) { + if ( data[ index ] && data[ index ].stop && rrun.test( index ) ) { + stopQueue( data[ index ] ); + } + } + } + + for ( index = timers.length; index--; ) { + if ( timers[ index ].elem === this && (type == null || timers[ index ].queue === type) ) { + timers[ index ].anim.stop( gotoEnd ); + dequeue = false; + timers.splice( index, 1 ); + } + } + + // start the next in the queue if the last step wasn't forced + // timers currently will call their complete callbacks, which will dequeue + // but only if they were gotoEnd + if ( dequeue || !gotoEnd ) { + jQuery.dequeue( this, type ); + } + }); + } +}); + +// Generate parameters to create a standard animation +function genFx( type, includeWidth ) { + var which, + attrs = { height: type }, + i = 0; + + // if we include width, step value is 1 to do all cssExpand values, + // if we don't include width, step value is 2 to skip over Left and Right + includeWidth = includeWidth? 1 : 0; + for( ; i < 4 ; i += 2 - includeWidth ) { + which = cssExpand[ i ]; + attrs[ "margin" + which ] = attrs[ "padding" + which ] = type; + } + + if ( includeWidth ) { + attrs.opacity = attrs.width = type; + } + + return attrs; +} + +// Generate shortcuts for custom animations +jQuery.each({ + slideDown: genFx("show"), + slideUp: genFx("hide"), + slideToggle: genFx("toggle"), + fadeIn: { opacity: "show" }, + fadeOut: { opacity: "hide" }, + fadeToggle: { opacity: "toggle" } +}, function( name, props ) { + jQuery.fn[ name ] = function( speed, easing, callback ) { + return this.animate( props, speed, easing, callback ); + }; +}); + +jQuery.speed = function( speed, easing, fn ) { + var opt = speed && typeof speed === "object" ? jQuery.extend( {}, speed ) : { + complete: fn || !fn && easing || + jQuery.isFunction( speed ) && speed, + duration: speed, + easing: fn && easing || easing && !jQuery.isFunction( easing ) && easing + }; + + opt.duration = jQuery.fx.off ? 0 : typeof opt.duration === "number" ? opt.duration : + opt.duration in jQuery.fx.speeds ? jQuery.fx.speeds[ opt.duration ] : jQuery.fx.speeds._default; + + // normalize opt.queue - true/undefined/null -> "fx" + if ( opt.queue == null || opt.queue === true ) { + opt.queue = "fx"; + } + + // Queueing + opt.old = opt.complete; + + opt.complete = function() { + if ( jQuery.isFunction( opt.old ) ) { + opt.old.call( this ); + } + + if ( opt.queue ) { + jQuery.dequeue( this, opt.queue ); + } + }; + + return opt; +}; + +jQuery.easing = { + linear: function( p ) { + return p; + }, + swing: function( p ) { + return 0.5 - Math.cos( p*Math.PI ) / 2; + } +}; + +jQuery.timers = []; +jQuery.fx = Tween.prototype.init; +jQuery.fx.tick = function() { + var timer, + timers = jQuery.timers, + i = 0; + + fxNow = jQuery.now(); + + for ( ; i < timers.length; i++ ) { + timer = timers[ i ]; + // Checks the timer has not already been removed + if ( !timer() && timers[ i ] === timer ) { + timers.splice( i--, 1 ); + } + } + + if ( !timers.length ) { + jQuery.fx.stop(); + } + fxNow = undefined; +}; + +jQuery.fx.timer = function( timer ) { + if ( timer() && jQuery.timers.push( timer ) && !timerId ) { + timerId = setInterval( jQuery.fx.tick, jQuery.fx.interval ); + } +}; + +jQuery.fx.interval = 13; + +jQuery.fx.stop = function() { + clearInterval( timerId ); + timerId = null; +}; + +jQuery.fx.speeds = { + slow: 600, + fast: 200, + // Default speed + _default: 400 +}; + +// Back Compat <1.8 extension point +jQuery.fx.step = {}; + +if ( jQuery.expr && jQuery.expr.filters ) { + jQuery.expr.filters.animated = function( elem ) { + return jQuery.grep(jQuery.timers, function( fn ) { + return elem === fn.elem; + }).length; + }; +} +var rroot = /^(?:body|html)$/i; + +jQuery.fn.offset = function( options ) { + if ( arguments.length ) { + return options === undefined ? + this : + this.each(function( i ) { + jQuery.offset.setOffset( this, options, i ); + }); + } + + var docElem, body, win, clientTop, clientLeft, scrollTop, scrollLeft, + box = { top: 0, left: 0 }, + elem = this[ 0 ], + doc = elem && elem.ownerDocument; + + if ( !doc ) { + return; + } + + if ( (body = doc.body) === elem ) { + return jQuery.offset.bodyOffset( elem ); + } + + docElem = doc.documentElement; + + // Make sure it's not a disconnected DOM node + if ( !jQuery.contains( docElem, elem ) ) { + return box; + } + + // If we don't have gBCR, just use 0,0 rather than error + // BlackBerry 5, iOS 3 (original iPhone) + if ( typeof elem.getBoundingClientRect !== "undefined" ) { + box = elem.getBoundingClientRect(); + } + win = getWindow( doc ); + clientTop = docElem.clientTop || body.clientTop || 0; + clientLeft = docElem.clientLeft || body.clientLeft || 0; + scrollTop = win.pageYOffset || docElem.scrollTop; + scrollLeft = win.pageXOffset || docElem.scrollLeft; + return { + top: box.top + scrollTop - clientTop, + left: box.left + scrollLeft - clientLeft + }; +}; + +jQuery.offset = { + + bodyOffset: function( body ) { + var top = body.offsetTop, + left = body.offsetLeft; + + if ( jQuery.support.doesNotIncludeMarginInBodyOffset ) { + top += parseFloat( jQuery.css(body, "marginTop") ) || 0; + left += parseFloat( jQuery.css(body, "marginLeft") ) || 0; + } + + return { top: top, left: left }; + }, + + setOffset: function( elem, options, i ) { + var position = jQuery.css( elem, "position" ); + + // set position first, in-case top/left are set even on static elem + if ( position === "static" ) { + elem.style.position = "relative"; + } + + var curElem = jQuery( elem ), + curOffset = curElem.offset(), + curCSSTop = jQuery.css( elem, "top" ), + curCSSLeft = jQuery.css( elem, "left" ), + calculatePosition = ( position === "absolute" || position === "fixed" ) && jQuery.inArray("auto", [curCSSTop, curCSSLeft]) > -1, + props = {}, curPosition = {}, curTop, curLeft; + + // need to be able to calculate position if either top or left is auto and position is either absolute or fixed + if ( calculatePosition ) { + curPosition = curElem.position(); + curTop = curPosition.top; + curLeft = curPosition.left; + } else { + curTop = parseFloat( curCSSTop ) || 0; + curLeft = parseFloat( curCSSLeft ) || 0; + } + + if ( jQuery.isFunction( options ) ) { + options = options.call( elem, i, curOffset ); + } + + if ( options.top != null ) { + props.top = ( options.top - curOffset.top ) + curTop; + } + if ( options.left != null ) { + props.left = ( options.left - curOffset.left ) + curLeft; + } + + if ( "using" in options ) { + options.using.call( elem, props ); + } else { + curElem.css( props ); + } + } +}; + + +jQuery.fn.extend({ + + position: function() { + if ( !this[0] ) { + return; + } + + var elem = this[0], + + // Get *real* offsetParent + offsetParent = this.offsetParent(), + + // Get correct offsets + offset = this.offset(), + parentOffset = rroot.test(offsetParent[0].nodeName) ? { top: 0, left: 0 } : offsetParent.offset(); + + // Subtract element margins + // note: when an element has margin: auto the offsetLeft and marginLeft + // are the same in Safari causing offset.left to incorrectly be 0 + offset.top -= parseFloat( jQuery.css(elem, "marginTop") ) || 0; + offset.left -= parseFloat( jQuery.css(elem, "marginLeft") ) || 0; + + // Add offsetParent borders + parentOffset.top += parseFloat( jQuery.css(offsetParent[0], "borderTopWidth") ) || 0; + parentOffset.left += parseFloat( jQuery.css(offsetParent[0], "borderLeftWidth") ) || 0; + + // Subtract the two offsets + return { + top: offset.top - parentOffset.top, + left: offset.left - parentOffset.left + }; + }, + + offsetParent: function() { + return this.map(function() { + var offsetParent = this.offsetParent || document.body; + while ( offsetParent && (!rroot.test(offsetParent.nodeName) && jQuery.css(offsetParent, "position") === "static") ) { + offsetParent = offsetParent.offsetParent; + } + return offsetParent || document.body; + }); + } +}); + + +// Create scrollLeft and scrollTop methods +jQuery.each( {scrollLeft: "pageXOffset", scrollTop: "pageYOffset"}, function( method, prop ) { + var top = /Y/.test( prop ); + + jQuery.fn[ method ] = function( val ) { + return jQuery.access( this, function( elem, method, val ) { + var win = getWindow( elem ); + + if ( val === undefined ) { + return win ? (prop in win) ? win[ prop ] : + win.document.documentElement[ method ] : + elem[ method ]; + } + + if ( win ) { + win.scrollTo( + !top ? val : jQuery( win ).scrollLeft(), + top ? val : jQuery( win ).scrollTop() + ); + + } else { + elem[ method ] = val; + } + }, method, val, arguments.length, null ); + }; +}); + +function getWindow( elem ) { + return jQuery.isWindow( elem ) ? + elem : + elem.nodeType === 9 ? + elem.defaultView || elem.parentWindow : + false; +} +// Create innerHeight, innerWidth, height, width, outerHeight and outerWidth methods +jQuery.each( { Height: "height", Width: "width" }, function( name, type ) { + jQuery.each( { padding: "inner" + name, content: type, "": "outer" + name }, function( defaultExtra, funcName ) { + // margin is only for outerHeight, outerWidth + jQuery.fn[ funcName ] = function( margin, value ) { + var chainable = arguments.length && ( defaultExtra || typeof margin !== "boolean" ), + extra = defaultExtra || ( margin === true || value === true ? "margin" : "border" ); + + return jQuery.access( this, function( elem, type, value ) { + var doc; + + if ( jQuery.isWindow( elem ) ) { + // As of 5/8/2012 this will yield incorrect results for Mobile Safari, but there + // isn't a whole lot we can do. See pull request at this URL for discussion: + // https://github.com/jquery/jquery/pull/764 + return elem.document.documentElement[ "client" + name ]; + } + + // Get document width or height + if ( elem.nodeType === 9 ) { + doc = elem.documentElement; + + // Either scroll[Width/Height] or offset[Width/Height] or client[Width/Height], whichever is greatest + // unfortunately, this causes bug #3838 in IE6/8 only, but there is currently no good, small way to fix it. + return Math.max( + elem.body[ "scroll" + name ], doc[ "scroll" + name ], + elem.body[ "offset" + name ], doc[ "offset" + name ], + doc[ "client" + name ] + ); + } + + return value === undefined ? + // Get width or height on the element, requesting but not forcing parseFloat + jQuery.css( elem, type, value, extra ) : + + // Set width or height on the element + jQuery.style( elem, type, value, extra ); + }, type, chainable ? margin : undefined, chainable, null ); + }; + }); +}); +// Expose jQuery to the global object +window.jQuery = window.$ = jQuery; + +// Expose jQuery as an AMD module, but only for AMD loaders that +// understand the issues with loading multiple versions of jQuery +// in a page that all might call define(). The loader will indicate +// they have special allowances for multiple jQuery versions by +// specifying define.amd.jQuery = true. Register as a named module, +// since jQuery can be concatenated with other files that may use define, +// but not use a proper concatenation script that understands anonymous +// AMD modules. A named AMD is safest and most robust way to register. +// Lowercase jquery is used because AMD module names are derived from +// file names, and jQuery is normally delivered in a lowercase file name. +// Do this after creating the global so that if an AMD module wants to call +// noConflict to hide this version of jQuery, it will work. +if ( typeof define === "function" && define.amd && define.amd.jQuery ) { + define( "jquery", [], function () { return jQuery; } ); +} + +})( window ); diff --git a/qa/workunits/erasure-code/plot.js b/qa/workunits/erasure-code/plot.js new file mode 100644 index 000000000..bd2bba5bb --- /dev/null +++ b/qa/workunits/erasure-code/plot.js @@ -0,0 +1,82 @@ +$(function() { + encode = []; + if (typeof encode_vandermonde_isa != 'undefined') { + encode.push({ + data: encode_vandermonde_isa, + label: "ISA, Vandermonde", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof encode_vandermonde_jerasure != 'undefined') { + encode.push({ + data: encode_vandermonde_jerasure, + label: "Jerasure Generic, Vandermonde", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof encode_cauchy_isa != 'undefined') { + encode.push({ + data: encode_cauchy_isa, + label: "ISA, Cauchy", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof encode_cauchy_jerasure != 'undefined') { + encode.push({ + data: encode_cauchy_jerasure, + label: "Jerasure, Cauchy", + points: { show: true }, + lines: { show: true }, + }); + } + $.plot("#encode", encode, { + xaxis: { + mode: "categories", + tickLength: 0 + }, + }); + + decode = []; + if (typeof decode_vandermonde_isa != 'undefined') { + decode.push({ + data: decode_vandermonde_isa, + label: "ISA, Vandermonde", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof decode_vandermonde_jerasure != 'undefined') { + decode.push({ + data: decode_vandermonde_jerasure, + label: "Jerasure Generic, Vandermonde", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof decode_cauchy_isa != 'undefined') { + decode.push({ + data: decode_cauchy_isa, + label: "ISA, Cauchy", + points: { show: true }, + lines: { show: true }, + }); + } + if (typeof decode_cauchy_jerasure != 'undefined') { + decode.push({ + data: decode_cauchy_jerasure, + label: "Jerasure, Cauchy", + points: { show: true }, + lines: { show: true }, + }); + } + $.plot("#decode", decode, { + xaxis: { + mode: "categories", + tickLength: 0 + }, + }); + +}); diff --git a/qa/workunits/false.sh b/qa/workunits/false.sh new file mode 100644 index 000000000..8a961b329 --- /dev/null +++ b/qa/workunits/false.sh @@ -0,0 +1,3 @@ +#!/bin/sh -ex + +false
\ No newline at end of file diff --git a/qa/workunits/fs/.gitignore b/qa/workunits/fs/.gitignore new file mode 100644 index 000000000..f7f7a0614 --- /dev/null +++ b/qa/workunits/fs/.gitignore @@ -0,0 +1 @@ +test_o_trunc diff --git a/qa/workunits/fs/Makefile b/qa/workunits/fs/Makefile new file mode 100644 index 000000000..c9934254d --- /dev/null +++ b/qa/workunits/fs/Makefile @@ -0,0 +1,11 @@ +CFLAGS = -Wall -Wextra -D_GNU_SOURCE + +TARGETS = test_o_trunc + +.c: + $(CC) $(CFLAGS) $@.c -o $@ + +all: $(TARGETS) + +clean: + rm $(TARGETS) diff --git a/qa/workunits/fs/cephfs_mirror_ha_gen.sh b/qa/workunits/fs/cephfs_mirror_ha_gen.sh new file mode 100755 index 000000000..35ee9d4c7 --- /dev/null +++ b/qa/workunits/fs/cephfs_mirror_ha_gen.sh @@ -0,0 +1,69 @@ +#!/bin/bash -ex +# +# cephfs_mirror_ha_gen.sh - generate workload to synchronize +# + +. $(dirname $0)/cephfs_mirror_helpers.sh + +cleanup() +{ + for i in `seq 1 $NR_DIRECTORIES` + do + local repo_name="${REPO_PATH_PFX}_$i" + for j in `seq 1 $NR_SNAPSHOTS` + do + snap_name=$repo_name/.snap/snap_$j + if test -d $snap_name; then + rmdir $snap_name + fi + done + done + exit 1 +} +trap cleanup EXIT + +configure_peer() +{ + ceph mgr module enable mirroring + ceph fs snapshot mirror enable $PRIMARY_FS + ceph fs snapshot mirror peer_add $PRIMARY_FS client.mirror_remote@ceph $BACKUP_FS + + for i in `seq 1 $NR_DIRECTORIES` + do + local repo_name="${REPO_PATH_PFX}_$i" + ceph fs snapshot mirror add $PRIMARY_FS "$MIRROR_SUBDIR/$repo_name" + done +} + +create_snaps() +{ + for i in `seq 1 $NR_DIRECTORIES` + do + local repo_name="${REPO_PATH_PFX}_$i" + for j in `seq 1 $NR_SNAPSHOTS` + do + snap_name=$repo_name/.snap/snap_$j + r=$(( $RANDOM % 100 + 5 )) + arr=($repo_name "reset" "--hard" "HEAD~$r") + exec_git_cmd "${arr[@]}" + mkdir $snap_name + store_checksum $snap_name + done + done +} + +unset CEPH_CLI_TEST_DUP_COMMAND + +echo "running generator on prmary file system..." + +# setup git repos to be used as data set +setup_repos + +# turn on mirroring, add peers... +configure_peer + +# snapshots on primary +create_snaps + +# do not cleanup when exiting on success.. +trap - EXIT diff --git a/qa/workunits/fs/cephfs_mirror_ha_verify.sh b/qa/workunits/fs/cephfs_mirror_ha_verify.sh new file mode 100755 index 000000000..8d8b3859c --- /dev/null +++ b/qa/workunits/fs/cephfs_mirror_ha_verify.sh @@ -0,0 +1,40 @@ +#!/bin/bash -ex +# +# cephfs_mirror_ha_verify.sh - verify synchronized snapshots +# + +. $(dirname $0)/cephfs_mirror_helpers.sh + +echo "running verifier on secondary file system..." + +for i in `seq 1 $NR_DIRECTORIES` +do + repo_name="${REPO_PATH_PFX}_$i" + for j in `seq 1 $NR_SNAPSHOTS` + do + for s in 1 1 2 4 4 4 4 4 8 8 8 8 16 16 32 64 64 128 128 + do + sleep $s + snap_name=$repo_name/.snap/snap_$j + if test -d $repo_name; then + echo "checking snapshot [$snap_name] in $repo_name" + if test -d $snap_name; then + echo "generating hash for $snap_name" + cksum='' + calc_checksum $snap_name cksum + ret=$(compare_checksum $cksum $snap_name) + if [ $ret -ne 0 ]; then + echo "checksum failed $snap_name ($cksum)" + return $ret + else + echo "checksum matched $snap_name ($cksum)" + break + fi + fi + fi + done + echo "couldn't complete verification for: $snap_name" + done +done + +echo "verify done!" diff --git a/qa/workunits/fs/cephfs_mirror_helpers.sh b/qa/workunits/fs/cephfs_mirror_helpers.sh new file mode 100644 index 000000000..69f1c6f3d --- /dev/null +++ b/qa/workunits/fs/cephfs_mirror_helpers.sh @@ -0,0 +1,66 @@ +PRIMARY_FS='dc' +BACKUP_FS='dc-backup' + +REPO=ceph-qa-suite +REPO_DIR=ceph_repo +REPO_PATH_PFX="$REPO_DIR/$REPO" + +NR_DIRECTORIES=4 +NR_SNAPSHOTS=4 +MIRROR_SUBDIR='/mirror' + +calc_checksum() +{ + local path=$1 + local -n ref=$2 + ref=`find -L $path -type f -exec md5sum {} + | awk '{ print $1 }' | md5sum | awk '{ print $1 }'` +} + +store_checksum() +{ + local path=$1 + local cksum='' #something invalid + local fhash=`echo -n $path | md5sum | awk '{ print $1 }'` + calc_checksum $path cksum + echo -n $cksum > "/tmp/primary-$fhash" +} + +compare_checksum() +{ + local ret=0 + local cksum=$1 + local path=$2 + local fhash=`echo -n $path | md5sum | awk '{ print $1 }'` + local cksum_ondisk=`cat /tmp/primary-$fhash` + if [ $cksum != $cksum_ondisk ]; then + echo "$cksum <> $cksum_ondisk" + ret=1 + fi + echo $ret +} + +exec_git_cmd() +{ + local arg=("$@") + local repo_name=${arg[0]} + local cmd=${arg[@]:1} + git --git-dir "$repo_name/.git" $cmd +} + +clone_repo() +{ + local repo_name=$1 + git clone --branch giant "http://github.com/ceph/$REPO" $repo_name +} + +setup_repos() +{ + mkdir "$REPO_DIR" + + for i in `seq 1 $NR_DIRECTORIES` + do + local repo_name="${REPO_PATH_PFX}_$i" + mkdir $repo_name + clone_repo $repo_name + done +} diff --git a/qa/workunits/fs/damage/test-first-damage.sh b/qa/workunits/fs/damage/test-first-damage.sh new file mode 100755 index 000000000..57447b957 --- /dev/null +++ b/qa/workunits/fs/damage/test-first-damage.sh @@ -0,0 +1,194 @@ +#!/bin/bash + +set -ex + +FIRST_DAMAGE="first-damage.py" +FS=cephfs +METADATA_POOL=cephfs_meta +MOUNT=~/mnt/mnt.0 +PYTHON=python3 + +function usage { + printf '%s: [--fs=<fs_name>] [--metadata-pool=<pool>] [--first-damage=</path/to/first-damage.py>]\n' + exit 1 +} + + +function create { + ceph config set mds mds_bal_fragment_dirs 0 + mkdir dir + DIR_INODE=$(stat -c '%i' dir) + touch dir/a + touch dir/"a space" + touch -- $(printf 'dir/\xff') + mkdir dir/.snap/1 + mkdir dir/.snap/2 + # two snaps + rm dir/a + mkdir dir/.snap/3 + # not present in HEAD + touch dir/a + mkdir dir/.snap/4 + # one snap + rm dir/a + touch dir/a + mkdir dir/.snap/5 + # unlink then create + rm dir/a + touch dir/a + # unlink then create, HEAD not snapped + ls dir/.snap/*/ + mkdir big + BIG_DIR_INODE=$(stat -c '%i' big) + for i in `seq 1 15000`; do + touch $(printf 'big/%08d' $i) + done +} + +function flush { + ceph tell mds."$FS":0 flush journal +} + +function damage { + local IS=$(printf '%llx.%08llx' "$DIR_INODE" 0) + local LS=$(ceph tell mds."$FS":0 dump snaps | jq .last_created) + + local T=$(mktemp -p /tmp) + + # nuke snap 1 version of "a" + rados --pool="$METADATA_POOL" getomapval "$IS" a_$(printf %x $((LS-4))) "$T" + printf '\xff\xff\xff\xf0' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat + rados --pool="$METADATA_POOL" setomapval "$IS" a_$(printf %x $((LS-4))) --input-file="$T" + + # nuke snap 4 version of "a" + rados --pool="$METADATA_POOL" getomapval "$IS" a_$(printf %x $((LS-1))) "$T" + printf '\xff\xff\xff\xff' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat + rados --pool="$METADATA_POOL" setomapval "$IS" a_$(printf %x $((LS-1))) --input-file="$T" + + # screw up HEAD + rados --pool="$METADATA_POOL" getomapval "$IS" a_head "$T" + printf '\xfe\xff\xff\xff' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat + rados --pool="$METADATA_POOL" setomapval "$IS" a_head --input-file="$T" + + # screw up HEAD on what dentry in big + IS=$(printf '%llx.%08llx' "$BIG_DIR_INODE" 0) + rados --pool="$METADATA_POOL" getomapval "$IS" 00009999_head "$T" + printf '\xfe\xff\xff\xff' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat + rados --pool="$METADATA_POOL" setomapval "$IS" 00009999_head --input-file="$T" + + rm -f "$T" +} + +function recover { + flush + ceph fs fail "$FS" + sleep 5 + cephfs-journal-tool --rank="$FS":0 event recover_dentries summary + cephfs-journal-tool --rank="$FS":0 journal reset + "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug1 --memo /tmp/memo1 "$METADATA_POOL" + "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug2 --memo /tmp/memo2 --repair-nosnap "$METADATA_POOL" + "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug3 --memo /tmp/memo3 --remove "$METADATA_POOL" + ceph fs set "$FS" joinable true +} + +function check { + stat dir || exit 1 + stat dir/a || exit 1 + for i in `seq 1 5`; do + stat dir/.snap/$i || exit 2 + done + stat dir/.snap/2/a || exit 3 + stat dir/.snap/5/a || exit 4 + if stat dir/.snap/1/a; then + echo should be gone + exit 5 + fi + if stat dir/.snap/3/a; then + echo should not ever exist + exit 6 + fi + if stat dir/.snap/4/a; then + echo should be gone + exit 7 + fi +} + +function cleanup { + rmdir dir/.snap/* + find dir + rm -rf dir +} + +function mount { + sudo --preserve-env=CEPH_CONF bin/mount.ceph :/ "$MOUNT" -o name=admin,noshare + df -h "$MOUNT" +} + +function main { + eval set -- $(getopt --name "$0" --options '' --longoptions 'help,fs:,metadata-pool:,first-damage:,mount:,python:' -- "$@") + + while [ "$#" -gt 0 ]; do + echo "$*" + echo "$1" + case "$1" in + -h|--help) + usage + ;; + --fs) + FS="$2" + shift 2 + ;; + --metadata-pool) + METADATA_POOL="$2" + shift 2 + ;; + --mount) + MOUNT="$2" + shift 2 + ;; + --first-damage) + FIRST_DAMAGE="$2" + shift 2 + ;; + --python) + PYTHON="$2" + shift 2 + ;; + --) + shift + break + ;; + *) + usage + ;; + esac + done + + mount + + pushd "$MOUNT" + create + popd + + sudo umount -f "$MOUNT" + + # flush dentries/inodes to omap + flush + + damage + + recover + + sleep 5 # for mds to join + + mount + + pushd "$MOUNT" + check + cleanup + popd + + sudo umount -f "$MOUNT" +} + +main "$@" diff --git a/qa/workunits/fs/fscrypt.sh b/qa/workunits/fs/fscrypt.sh new file mode 100755 index 000000000..ca856a62e --- /dev/null +++ b/qa/workunits/fs/fscrypt.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash + +set -xe + +mydir=`dirname $0` + +if [ $# -ne 2 ] +then + echo "2 parameters are required!\n" + echo "Usage:" + echo " fscrypt.sh <type> <testdir>" + echo " type: should be any of 'none', 'unlocked' or 'locked'" + echo " testdir: the test direcotry name" + exit 1 +fi + +fscrypt=$1 +testcase=$2 +testdir=fscrypt_test_${fscrypt}_${testcase} +mkdir $testdir + +XFSPROGS_DIR='xfprogs-dev-dir' +XFSTESTS_DIR='xfstest-dev-dir' +export XFS_IO_PROG="$(type -P xfs_io)" + +# Setup the xfstests env +setup_xfstests_env() +{ + git clone https://git.ceph.com/xfstests-dev.git $XFSTESTS_DIR --depth 1 + pushd $XFSTESTS_DIR + . common/encrypt + popd +} + +install_deps() +{ + local system_value=$(sudo lsb_release -is | awk '{print tolower($0)}') + case $system_value in + "centos" | "centosstream" | "fedora") + sudo yum install -y inih-devel userspace-rcu-devel \ + libblkid-devel gettext libedit-devel \ + libattr-devel device-mapper-devel libicu-devel + ;; + "ubuntu" | "debian") + sudo apt-get install -y libinih-dev liburcu-dev \ + libblkid-dev gettext libedit-dev libattr1-dev \ + libdevmapper-dev libicu-dev pkg-config + ;; + *) + echo "Unsupported distro $system_value" + exit 1 + ;; + esac +} + +# Install xfsprogs-dev from source to support "add_enckey" for xfs_io +install_xfsprogs() +{ + local install_xfsprogs=0 + + xfs_io -c "help add_enckey" | grep -q 'not found' && install_xfsprogs=1 + + if [ $install_xfsprogs -eq 1 ]; then + install_deps + + git clone https://git.ceph.com/xfsprogs-dev.git $XFSPROGS_DIR --depth 1 + pushd $XFSPROGS_DIR + make + sudo make install + popd + fi +} + +clean_up() +{ + rm -rf $XFSPROGS_DIR + rm -rf $XFSTESTS_DIR + rm -rf $testdir +} + +# For now will test the V2 encryption policy only as the +# V1 encryption policy is deprecated + +install_xfsprogs +setup_xfstests_env + +# Generate a fixed keying identifier +raw_key=$(_generate_raw_encryption_key) +keyid=$(_add_enckey $testdir "$raw_key" | awk '{print $NF}') + +case ${fscrypt} in + "none") + # do nothing for the test directory and will test it + # as one non-encrypted directory. + pushd $testdir + ${mydir}/../suites/${testcase}.sh + popd + clean_up + ;; + "unlocked") + # set encrypt policy with the key provided and then + # the test directory will be encrypted & unlocked + _set_encpolicy $testdir $keyid + pushd $testdir + ${mydir}/../suites/${testcase}.sh + popd + clean_up + ;; + "locked") + # remove the key, then the test directory will be locked + # and any modification will be denied by requiring the key + _rm_enckey $testdir $keyid + clean_up + ;; + *) + clean_up + echo "Unknown parameter $1" + exit 1 +esac diff --git a/qa/workunits/fs/full/subvolume_clone.sh b/qa/workunits/fs/full/subvolume_clone.sh new file mode 100755 index 000000000..a11131215 --- /dev/null +++ b/qa/workunits/fs/full/subvolume_clone.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash +set -ex + +# This testcase tests the 'ceph fs subvolume snapshot clone' when the osd is full. +# The clone fails with 'MetadataMgrException: -28 (error in write)' and +# truncates the config file of corresponding subvolume while updating the config file. +# Hence the subsequent subvolume commands on the clone fails with +# 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' traceback. + +# The osd is of the size 1GB. The full-ratios are set so that osd is treated full +# at around 600MB. The subvolume is created and 100MB is written. +# The subvolume is snapshotted and cloned ten times. Since the clone delay is set to 15 seconds, +# all the clones reach pending state for sure. Among ten clones, only few succeed and rest fails +# with ENOSPACE. + +# At this stage, ".meta" config file of the failed clones are checked if it's truncated. +# and clone status command is checked for traceback. + +# Note that the failed clones would be in retry loop and it's state would be 'pending' or 'in-progress'. +# It's state is not updated to 'failed' as the config update to gets ENOSPACE too. + +set -e +ignore_failure() { + if "$@"; then return 0; else return 0; fi +} + +expect_failure() { + if "$@"; then return 1; else return 0; fi +} + +NUM_CLONES=10 + +ceph fs subvolume create cephfs sub_0 +subvol_path_0=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null) + +# For debugging +echo "Before ratios are set" +df $CEPH_MNT +ceph osd df + +ceph osd set-full-ratio 0.6 +ceph osd set-nearfull-ratio 0.50 +ceph osd set-backfillfull-ratio 0.55 + +# For debugging +echo "After ratios are set" +df -h +ceph osd df + +for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/1MB_file-$i status=progress bs=1M count=1 conv=fdatasync;done + +# For debugging +echo "After subvolumes are written" +df -h $CEPH_MNT +ceph osd df + +# snapshot +ceph fs subvolume snapshot create cephfs sub_0 snap_0 + +# Set clone snapshot delay +ceph config set mgr mgr/volumes/snapshot_clone_delay 15 + +# Schedule few clones, some would fail with no space +for i in $(eval echo {1..$NUM_CLONES});do ceph fs subvolume snapshot clone cephfs sub_0 snap_0 clone_$i;done + +# Wait for osd is full +timeout=90 +while [ $timeout -gt 0 ] +do + health=$(ceph health detail) + [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break + echo "Wating for osd to be full: $timeout" + sleep 1 + let "timeout-=1" +done + +# For debugging +echo "After osd is full" +df -h $CEPH_MNT +ceph osd df + +# Check clone status, this should not crash +for i in $(eval echo {1..$NUM_CLONES}) +do + ignore_failure ceph fs clone status cephfs clone_$i >/tmp/out_${PID}_file 2>/tmp/error_${PID}_file + cat /tmp/error_${PID}_file + if grep "complete" /tmp/out_${PID}_file; then + echo "The clone_$i is completed" + else + #in-progress/pending clones, No traceback should be found in stderr + echo clone_$i in PENDING/IN-PROGRESS + expect_failure sudo grep "Traceback" /tmp/error_${PID}_file + #config file should not be truncated and GLOBAL section should be found + sudo grep "GLOBAL" $CEPH_MNT/volumes/_nogroup/clone_$i/.meta + fi +done + +# Hard cleanup +ignore_failure sudo rm -rf $CEPH_MNT/_index/clone/* +ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/clone_* +ignore_failure sudo rmdir $CEPH_MNT/volumes/_nogroup/sub_0/.snap/snap_0 +ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0 + +#Set the ratios back for other full tests to run +ceph osd set-full-ratio 0.95 +ceph osd set-nearfull-ratio 0.95 +ceph osd set-backfillfull-ratio 0.95 + +#After test +echo "After test" +df -h $CEPH_MNT +ceph osd df + +echo OK diff --git a/qa/workunits/fs/full/subvolume_rm.sh b/qa/workunits/fs/full/subvolume_rm.sh new file mode 100755 index 000000000..a464e30f5 --- /dev/null +++ b/qa/workunits/fs/full/subvolume_rm.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +set -ex + +# This testcase tests the scenario of the 'ceph fs subvolume rm' mgr command +# when the osd is full. The command used to hang. The osd is of the size 1GB. +# The subvolume is created and 500MB file is written. The full-ratios are +# set below 500MB such that the osd is treated as full. Now the subvolume is +# is removed. This should be successful with the introduction of FULL +# capabilities which the mgr holds. + +set -e +expect_failure() { + if "$@"; then return 1; else return 0; fi +} + +ceph fs subvolume create cephfs sub_0 +subvol_path=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null) + +#For debugging +echo "Before write" +df -h +ceph osd df + +sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/500MB_file-1 status=progress bs=1M count=500 + +ceph osd set-full-ratio 0.2 +ceph osd set-nearfull-ratio 0.16 +ceph osd set-backfillfull-ratio 0.18 + +timeout=30 +while [ $timeout -gt 0 ] +do + health=$(ceph health detail) + [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break + echo "Wating for osd to be full: $timeout" + sleep 1 + let "timeout-=1" +done + +#For debugging +echo "After ratio set" +df -h +ceph osd df + +#Delete subvolume +ceph fs subvolume rm cephfs sub_0 + +#Validate subvolume is deleted +expect_failure ceph fs subvolume info cephfs sub_0 + +#Wait for subvolume to delete data +trashdir=$CEPH_MNT/volumes/_deleting +timeout=30 +while [ $timeout -gt 0 ] +do + [ -z "$(sudo ls -A $trashdir)" ] && echo "Trash directory $trashdir is empty" && break + echo "Wating for trash dir to be empty: $timeout" + sleep 1 + let "timeout-=1" +done + +#Set the ratios back for other full tests to run +ceph osd set-full-ratio 0.95 +ceph osd set-nearfull-ratio 0.95 +ceph osd set-backfillfull-ratio 0.95 + +#After test +echo "After test" +df -h +ceph osd df + +echo OK diff --git a/qa/workunits/fs/full/subvolume_snapshot_rm.sh b/qa/workunits/fs/full/subvolume_snapshot_rm.sh new file mode 100755 index 000000000..f6d0add9f --- /dev/null +++ b/qa/workunits/fs/full/subvolume_snapshot_rm.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +set -ex + +# This testcase tests the 'ceph fs subvolume snapshot rm' when the osd is full. +# The snapshot rm fails with 'MetadataMgrException: -28 (error in write)' and +# truncates the config file of corresponding subvolume. Hence the subsequent +# snapshot rm of the same snapshot fails with 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' +# traceback. + +# The osd is of the size 1GB. The subvolume is created and 800MB file is written. +# Then full-ratios are set below 500MB such that the osd is treated as full. +# The subvolume snapshot is taken which succeeds as no extra space is required +# for snapshot. Now, the removal of the snapshot fails with ENOSPACE as it +# fails to remove the snapshot metadata set. The snapshot removal fails +# but should not traceback and truncate the config file. + +set -e +expect_failure() { + if "$@"; then return 1; else return 0; fi +} + +ignore_failure() { + if "$@"; then return 0; else return 0; fi +} + +ceph fs subvolume create cephfs sub_0 +subvol_path=$(ceph fs subvolume getpath cephfs sub_0 2>/dev/null) + +#For debugging +echo "Before write" +df $CEPH_MNT +ceph osd df + +# Write 800MB file and set full ratio to around 200MB +ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/800MB_file-1 status=progress bs=1M count=800 conv=fdatasync + +ceph osd set-full-ratio 0.2 +ceph osd set-nearfull-ratio 0.16 +ceph osd set-backfillfull-ratio 0.18 + +timeout=30 +while [ $timeout -gt 0 ] +do + health=$(ceph health detail) + [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break + echo "Wating for osd to be full: $timeout" + sleep 1 + let "timeout-=1" +done + +#Take snapshot +ceph fs subvolume snapshot create cephfs sub_0 snap_0 + +#Remove snapshot fails but should not throw traceback +expect_failure ceph fs subvolume snapshot rm cephfs sub_0 snap_0 2>/tmp/error_${PID}_file +cat /tmp/error_${PID}_file + +# No traceback should be found +expect_failure grep "Traceback" /tmp/error_${PID}_file + +# Validate config file is not truncated and GLOBAL section exists +sudo grep "GLOBAL" $CEPH_MNT/volumes/_nogroup/sub_0/.meta + +#For debugging +echo "After write" +df $CEPH_MNT +ceph osd df + +# Snapshot removal with force option should succeed +ceph fs subvolume snapshot rm cephfs sub_0 snap_0 --force + +#Cleanup from backend +ignore_failure sudo rm -f /tmp/error_${PID}_file +ignore_failure sudo rm -rf $CEPH_MNT/volumes/_nogroup/sub_0 + +#Set the ratios back for other full tests to run +ceph osd set-full-ratio 0.95 +ceph osd set-nearfull-ratio 0.95 +ceph osd set-backfillfull-ratio 0.95 + +#After test +echo "After test" +df -h $CEPH_MNT +ceph osd df + +echo OK diff --git a/qa/workunits/fs/maxentries/maxentries.sh b/qa/workunits/fs/maxentries/maxentries.sh new file mode 100755 index 000000000..d48fd956e --- /dev/null +++ b/qa/workunits/fs/maxentries/maxentries.sh @@ -0,0 +1,155 @@ +#!/usr/bin/env bash + +set -ex + +function expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +function make_files() +{ + set +x + temp_dir=`mktemp -d mkfile_test_XXXXXX` + for i in $(seq 1 $1) + do + echo -n | dd of="${temp_dir}/file_$i" conv=fsync || return 1 + sync "${temp_dir}" || return 1 + done + set -x + return 0 +} + +function make_dirs() +{ + set +x + temp_dir=`mktemp -d mkdir_test_XXXXXX` + for i in $(seq 1 $1) + do + mkdir -p ${temp_dir}/dir_${i} || return 1 + sync "${temp_dir}" || return 1 + done + set -x + return 0 +} + +function make_nodes() +{ + set +x + temp_dir=`mktemp -d mknod_test_XXXXXX` + for i in $(seq 1 $1) + do + mknod ${temp_dir}/fifo_${i} p || return 1 + sync "${temp_dir}" || return 1 + done + set -x + return 0 +} + +function rename_files() +{ + set +x + temp_dir=`mktemp -d rename_test_XXXXXX` + mkdir -p ${temp_dir}/rename + + for i in $(seq 1 $1) + do + touch ${temp_dir}/file_${i} || return 1 + + mv ${temp_dir}/file_${i} ${temp_dir}/rename/ || return 1 + sync "${temp_dir}" || return 1 + done + set -x + return 0 +} + +function make_symlinks() +{ + set +x + temp_dir=`mktemp -d symlink_test_XXXXXX` + mkdir -p ${temp_dir}/symlink + + touch ${temp_dir}/file + + for i in $(seq 1 $1) + do + ln -s ../file ${temp_dir}/symlink/sym_${i} || return 1 + sync "${temp_dir}" || return 1 + done + set -x + return 0 +} + +function make_links() +{ + set +x + temp_dir=`mktemp -d link_test_XXXXXX` + mkdir -p ${temp_dir}/link + + touch ${temp_dir}/file + + for i in $(seq 1 $1) + do + ln ${temp_dir}/file ${temp_dir}/link/link_${i} || return 1 + sync "${temp_dir}" || return 1 + done + set -x + return 0 +} + +function cleanup() +{ + rm -rf * +} + +test_dir="max_entries" +mkdir -p $test_dir +pushd $test_dir + +dir_max_entries=100 +ceph config set mds mds_dir_max_entries $dir_max_entries + +ok_dir_max_entries=$dir_max_entries +fail_dir_max_entries=$((dir_max_entries+1)) + +# make files test +make_files $ok_dir_max_entries +expect_false make_files $fail_dir_max_entries + +# make dirs test +make_dirs $ok_dir_max_entries +expect_false make_dirs $fail_dir_max_entries + +# make nodes test +make_nodes $ok_dir_max_entries +expect_false make_nodes $fail_dir_max_entries + +# rename files test +rename_files $ok_dir_max_entries +expect_false rename_files $fail_dir_max_entries + +# symlink files test +make_symlinks $ok_dir_max_entries +expect_false make_symlinks $fail_dir_max_entries + +# link files test +make_links $ok_dir_max_entries +expect_false make_links $fail_dir_max_entries + +# no limit (e.g., default value) +dir_max_entries=0 +ceph config set mds mds_dir_max_entries $dir_max_entries + +make_files 500 +make_dirs 500 +make_nodes 500 +rename_files 500 +make_symlinks 500 +make_links 500 + +cleanup + +popd # $test_dir + +echo OK diff --git a/qa/workunits/fs/misc/acl.sh b/qa/workunits/fs/misc/acl.sh new file mode 100755 index 000000000..198b05671 --- /dev/null +++ b/qa/workunits/fs/misc/acl.sh @@ -0,0 +1,50 @@ +#!/bin/sh -x + +set -e +mkdir -p testdir +cd testdir + +set +e +setfacl -d -m u:nobody:rw . +if test $? != 0; then + echo "Filesystem does not support ACL" + exit 0 +fi + +expect_failure() { + if "$@"; then return 1; else return 0; fi +} + +set -e +c=0 +while [ $c -lt 100 ] +do + c=`expr $c + 1` + # inherited ACL from parent directory's default ACL + mkdir d1 + c1=`getfacl d1 | grep -c "nobody:rw"` + echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null + c2=`getfacl d1 | grep -c "nobody:rw"` + rmdir d1 + if [ $c1 -ne 2 ] || [ $c2 -ne 2 ] + then + echo "ERROR: incorrect ACLs" + exit 1 + fi +done + +mkdir d1 + +# The ACL xattr only contains ACL header. ACL should be removed +# in this case. +setfattr -n system.posix_acl_access -v 0x02000000 d1 +setfattr -n system.posix_acl_default -v 0x02000000 . + +expect_failure getfattr -n system.posix_acl_access d1 +expect_failure getfattr -n system.posix_acl_default . + + +rmdir d1 +cd .. +rmdir testdir +echo OK diff --git a/qa/workunits/fs/misc/chmod.sh b/qa/workunits/fs/misc/chmod.sh new file mode 100755 index 000000000..de66776f1 --- /dev/null +++ b/qa/workunits/fs/misc/chmod.sh @@ -0,0 +1,60 @@ +#!/bin/sh -x + +set -e + +check_perms() { + + file=$1 + r=$(ls -la ${file}) + if test $? != 0; then + echo "ERROR: File listing/stat failed" + exit 1 + fi + + perms=$2 + if test "${perms}" != $(echo ${r} | awk '{print $1}') && \ + test "${perms}." != $(echo ${r} | awk '{print $1}') && \ + test "${perms}+" != $(echo ${r} | awk '{print $1}'); then + echo "ERROR: Permissions should be ${perms}" + exit 1 + fi +} + +file=test_chmod.$$ + +echo "foo" > ${file} +if test $? != 0; then + echo "ERROR: Failed to create file ${file}" + exit 1 +fi + +chmod 400 ${file} +if test $? != 0; then + echo "ERROR: Failed to change mode of ${file}" + exit 1 +fi + +check_perms ${file} "-r--------" + +set +e +echo "bar" >> ${file} +if test $? = 0; then + echo "ERROR: Write to read-only file should Fail" + exit 1 +fi + +set -e +chmod 600 ${file} +echo "bar" >> ${file} +if test $? != 0; then + echo "ERROR: Write to writeable file failed" + exit 1 +fi + +check_perms ${file} "-rw-------" + +echo "foo" >> ${file} +if test $? != 0; then + echo "ERROR: Failed to write to file" + exit 1 +fi diff --git a/qa/workunits/fs/misc/dac_override.sh b/qa/workunits/fs/misc/dac_override.sh new file mode 100755 index 000000000..dfb1a9091 --- /dev/null +++ b/qa/workunits/fs/misc/dac_override.sh @@ -0,0 +1,19 @@ +#!/bin/sh -x + +expect_failure() { + if "$@"; then return 1; else return 0; fi +} + +set -e + +mkdir -p testdir +file=test_chmod.$$ + +echo "foo" > testdir/${file} +sudo chmod 600 testdir + +# only root can read +expect_failure cat testdir/${file} + +# directory read/write DAC override for root should allow read +sudo cat testdir/${file} diff --git a/qa/workunits/fs/misc/direct_io.py b/qa/workunits/fs/misc/direct_io.py new file mode 100755 index 000000000..f7d59d95a --- /dev/null +++ b/qa/workunits/fs/misc/direct_io.py @@ -0,0 +1,42 @@ +#!/usr/bin/python3 + +import mmap +import os +import subprocess + +def main(): + path = "testfile" + fd = os.open(path, os.O_RDWR | os.O_CREAT | os.O_TRUNC | os.O_DIRECT, 0o644) + + ino = os.fstat(fd).st_ino + obj_name = "{ino:x}.00000000".format(ino=ino) + pool_name = os.getxattr(path, "ceph.file.layout.pool") + + buf = mmap.mmap(-1, 1) + buf.write(b'1') + os.write(fd, buf) + + proc = subprocess.Popen(['rados', '-p', pool_name, 'get', obj_name, 'tmpfile']) + proc.wait() + + with open('tmpfile', 'rb') as tmpf: + out = tmpf.read(1) + if out != b'1': + raise RuntimeError("data were not written to object store directly") + + with open('tmpfile', 'wb') as tmpf: + tmpf.write(b'2') + + proc = subprocess.Popen(['rados', '-p', pool_name, 'put', obj_name, 'tmpfile']) + proc.wait() + + os.lseek(fd, 0, os.SEEK_SET) + out = os.read(fd, 1) + if out != b'2': + raise RuntimeError("data were not directly read from object store") + + os.close(fd) + print('ok') + + +main() diff --git a/qa/workunits/fs/misc/dirfrag.sh b/qa/workunits/fs/misc/dirfrag.sh new file mode 100755 index 000000000..eea0ec3bc --- /dev/null +++ b/qa/workunits/fs/misc/dirfrag.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +set -e + +DEPTH=5 +COUNT=10000 + +kill_jobs() { + jobs -p | xargs kill +} +trap kill_jobs INT + +create_files() { + for i in `seq 1 $COUNT` + do + touch file$i + done +} + +delete_files() { + for i in `ls -f` + do + if [[ ${i}a = file*a ]] + then + rm -f $i + fi + done +} + +rm -rf testdir +mkdir testdir +cd testdir + +echo "creating folder hierarchy" +for i in `seq 1 $DEPTH`; do + mkdir dir$i + cd dir$i + create_files & +done +wait + +echo "created hierarchy, now cleaning up" + +for i in `seq 1 $DEPTH`; do + delete_files & + cd .. +done +wait + +echo "cleaned up hierarchy" +cd .. +rm -rf testdir diff --git a/qa/workunits/fs/misc/filelock_deadlock.py b/qa/workunits/fs/misc/filelock_deadlock.py new file mode 100755 index 000000000..398902f6c --- /dev/null +++ b/qa/workunits/fs/misc/filelock_deadlock.py @@ -0,0 +1,72 @@ +#!/usr/bin/python3 + +import errno +import fcntl +import os +import signal +import struct +import time + + +def handler(signum, frame): + pass + + +def lock_two(f1, f2): + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 10, 0, 0) + fcntl.fcntl(f1, fcntl.F_SETLKW, lockdata) + time.sleep(10) + + # don't wait forever + signal.signal(signal.SIGALRM, handler) + signal.alarm(10) + exitcode = 0 + try: + fcntl.fcntl(f2, fcntl.F_SETLKW, lockdata) + except IOError as e: + if e.errno == errno.EDEADLK: + exitcode = 1 + elif e.errno == errno.EINTR: + exitcode = 2 + else: + exitcode = 3 + os._exit(exitcode) + + +def main(): + pid1 = os.fork() + if pid1 == 0: + f1 = open("testfile1", 'w') + f2 = open("testfile2", 'w') + lock_two(f1, f2) + + pid2 = os.fork() + if pid2 == 0: + f1 = open("testfile2", 'w') + f2 = open("testfile3", 'w') + lock_two(f1, f2) + + pid3 = os.fork() + if pid3 == 0: + f1 = open("testfile3", 'w') + f2 = open("testfile1", 'w') + lock_two(f1, f2) + + deadlk_count = 0 + i = 0 + while i < 3: + pid, status = os.wait() + exitcode = status >> 8 + if exitcode == 1: + deadlk_count += 1 + elif exitcode != 0: + raise RuntimeError("unexpect exit code of child") + i += 1 + + if deadlk_count != 1: + raise RuntimeError("unexpect count of EDEADLK") + + print('ok') + + +main() diff --git a/qa/workunits/fs/misc/filelock_interrupt.py b/qa/workunits/fs/misc/filelock_interrupt.py new file mode 100755 index 000000000..b261d74fb --- /dev/null +++ b/qa/workunits/fs/misc/filelock_interrupt.py @@ -0,0 +1,94 @@ +#!/usr/bin/python3 + +from contextlib import contextmanager +import errno +import fcntl +import signal +import struct + +@contextmanager +def timeout(seconds): + def timeout_handler(signum, frame): + raise InterruptedError + + orig_handler = signal.signal(signal.SIGALRM, timeout_handler) + try: + signal.alarm(seconds) + yield + finally: + signal.alarm(0) + signal.signal(signal.SIGALRM, orig_handler) + + +""" +introduced by Linux 3.15 +""" +setattr(fcntl, "F_OFD_GETLK", 36) +setattr(fcntl, "F_OFD_SETLK", 37) +setattr(fcntl, "F_OFD_SETLKW", 38) + + +def main(): + f1 = open("testfile", 'w') + f2 = open("testfile", 'w') + + fcntl.flock(f1, fcntl.LOCK_SH | fcntl.LOCK_NB) + + """ + is flock interruptible? + """ + with timeout(5): + try: + fcntl.flock(f2, fcntl.LOCK_EX) + except InterruptedError: + pass + else: + raise RuntimeError("expect flock to block") + + fcntl.flock(f1, fcntl.LOCK_UN) + + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 10, 0, 0) + try: + fcntl.fcntl(f1, fcntl.F_OFD_SETLK, lockdata) + except IOError as e: + if e.errno != errno.EINVAL: + raise + else: + print('kernel does not support fcntl.F_OFD_SETLK') + return + + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 10, 10, 0, 0) + fcntl.fcntl(f2, fcntl.F_OFD_SETLK, lockdata) + + """ + is posix lock interruptible? + """ + with timeout(5): + try: + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) + fcntl.fcntl(f2, fcntl.F_OFD_SETLKW, lockdata) + except InterruptedError: + pass + else: + raise RuntimeError("expect posix lock to block") + + """ + file handler 2 should still hold lock on 10~10 + """ + try: + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 10, 10, 0, 0) + fcntl.fcntl(f1, fcntl.F_OFD_SETLK, lockdata) + except IOError as e: + if e.errno == errno.EAGAIN: + pass + else: + raise RuntimeError("expect file handler 2 to hold lock on 10~10") + + lockdata = struct.pack('hhllhh', fcntl.F_UNLCK, 0, 0, 0, 0, 0) + fcntl.fcntl(f1, fcntl.F_OFD_SETLK, lockdata) + fcntl.fcntl(f2, fcntl.F_OFD_SETLK, lockdata) + + print('ok') + + +main() diff --git a/qa/workunits/fs/misc/i_complete_vs_rename.sh b/qa/workunits/fs/misc/i_complete_vs_rename.sh new file mode 100755 index 000000000..a9b98271d --- /dev/null +++ b/qa/workunits/fs/misc/i_complete_vs_rename.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +set -e + +mkdir x +cd x +touch a +touch b +touch c +touch d +ls +chmod 777 . +stat e || true +touch f +touch g + +# over existing file +echo attempting rename over existing file... +touch ../xx +mv ../xx f +ls | grep f || false +echo rename over existing file is okay + +# over negative dentry +echo attempting rename over negative dentry... +touch ../xx +mv ../xx e +ls | grep e || false +echo rename over negative dentry is ok + +echo OK diff --git a/qa/workunits/fs/misc/layout_vxattrs.sh b/qa/workunits/fs/misc/layout_vxattrs.sh new file mode 100755 index 000000000..811336273 --- /dev/null +++ b/qa/workunits/fs/misc/layout_vxattrs.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash + +set -ex + +# detect data pool +datapool= +dir=. +while true ; do + echo $dir + datapool=$(getfattr -n ceph.dir.layout.pool $dir --only-values) && break + dir=$dir/.. +done + +# file +rm -f file file2 +touch file file2 + +getfattr -n ceph.file.layout file +getfattr -n ceph.file.layout file | grep -q object_size= +getfattr -n ceph.file.layout file | grep -q stripe_count= +getfattr -n ceph.file.layout file | grep -q stripe_unit= +getfattr -n ceph.file.layout file | grep -q pool= +getfattr -n ceph.file.layout.pool file +getfattr -n ceph.file.layout.pool_namespace file +getfattr -n ceph.file.layout.stripe_unit file +getfattr -n ceph.file.layout.stripe_count file +getfattr -n ceph.file.layout.object_size file + +getfattr -n ceph.file.layout.bogus file 2>&1 | grep -q 'No such attribute' +getfattr -n ceph.dir.layout file 2>&1 | grep -q 'No such attribute' + +setfattr -n ceph.file.layout.stripe_unit -v 1048576 file2 +setfattr -n ceph.file.layout.stripe_count -v 8 file2 +setfattr -n ceph.file.layout.object_size -v 10485760 file2 + +setfattr -n ceph.file.layout.pool -v $datapool file2 +getfattr -n ceph.file.layout.pool file2 | grep -q $datapool +setfattr -n ceph.file.layout.pool_namespace -v foons file2 +getfattr -n ceph.file.layout.pool_namespace file2 | grep -q foons +setfattr -x ceph.file.layout.pool_namespace file2 +getfattr -n ceph.file.layout.pool_namespace file2 | grep -q -v foons + +getfattr -n ceph.file.layout.stripe_unit file2 | grep -q 1048576 +getfattr -n ceph.file.layout.stripe_count file2 | grep -q 8 +getfattr -n ceph.file.layout.object_size file2 | grep -q 10485760 + +setfattr -n ceph.file.layout -v "stripe_unit=4194304 stripe_count=16 object_size=41943040 pool=$datapool pool_namespace=foons" file2 +getfattr -n ceph.file.layout.stripe_unit file2 | grep -q 4194304 +getfattr -n ceph.file.layout.stripe_count file2 | grep -q 16 +getfattr -n ceph.file.layout.object_size file2 | grep -q 41943040 +getfattr -n ceph.file.layout.pool file2 | grep -q $datapool +getfattr -n ceph.file.layout.pool_namespace file2 | grep -q foons + +setfattr -n ceph.file.layout -v "stripe_unit=1048576" file2 +getfattr -n ceph.file.layout.stripe_unit file2 | grep -q 1048576 +getfattr -n ceph.file.layout.stripe_count file2 | grep -q 16 +getfattr -n ceph.file.layout.object_size file2 | grep -q 41943040 +getfattr -n ceph.file.layout.pool file2 | grep -q $datapool +getfattr -n ceph.file.layout.pool_namespace file2 | grep -q foons + +setfattr -n ceph.file.layout -v "stripe_unit=2097152 stripe_count=4 object_size=2097152 pool=$datapool pool_namespace=barns" file2 +getfattr -n ceph.file.layout.stripe_unit file2 | grep -q 2097152 +getfattr -n ceph.file.layout.stripe_count file2 | grep -q 4 +getfattr -n ceph.file.layout.object_size file2 | grep -q 2097152 +getfattr -n ceph.file.layout.pool file2 | grep -q $datapool +getfattr -n ceph.file.layout.pool_namespace file2 | grep -q barns + +# dir +rm -f dir/file || true +rmdir dir || true +mkdir -p dir + +getfattr -d -m - dir | grep -q ceph.dir.layout && exit 1 || true +getfattr -d -m - dir | grep -q ceph.file.layout && exit 1 || true +getfattr -n ceph.dir.layout dir && exit 1 || true + +setfattr -n ceph.dir.layout.stripe_unit -v 1048576 dir +setfattr -n ceph.dir.layout.stripe_count -v 8 dir +setfattr -n ceph.dir.layout.object_size -v 10485760 dir +setfattr -n ceph.dir.layout.pool -v $datapool dir +setfattr -n ceph.dir.layout.pool_namespace -v dirns dir + +getfattr -n ceph.dir.layout dir +getfattr -n ceph.dir.layout dir | grep -q object_size=10485760 +getfattr -n ceph.dir.layout dir | grep -q stripe_count=8 +getfattr -n ceph.dir.layout dir | grep -q stripe_unit=1048576 +getfattr -n ceph.dir.layout dir | grep -q pool=$datapool +getfattr -n ceph.dir.layout dir | grep -q pool_namespace=dirns +getfattr -n ceph.dir.layout.pool dir | grep -q $datapool +getfattr -n ceph.dir.layout.stripe_unit dir | grep -q 1048576 +getfattr -n ceph.dir.layout.stripe_count dir | grep -q 8 +getfattr -n ceph.dir.layout.object_size dir | grep -q 10485760 +getfattr -n ceph.dir.layout.pool_namespace dir | grep -q dirns + + +setfattr -n ceph.file.layout -v "stripe_count=16" file2 +getfattr -n ceph.file.layout.stripe_count file2 | grep -q 16 +setfattr -n ceph.file.layout -v "object_size=10485760 stripe_count=8 stripe_unit=1048576 pool=$datapool pool_namespace=dirns" file2 +getfattr -n ceph.file.layout.stripe_count file2 | grep -q 8 + +touch dir/file +getfattr -n ceph.file.layout.pool dir/file | grep -q $datapool +getfattr -n ceph.file.layout.stripe_unit dir/file | grep -q 1048576 +getfattr -n ceph.file.layout.stripe_count dir/file | grep -q 8 +getfattr -n ceph.file.layout.object_size dir/file | grep -q 10485760 +getfattr -n ceph.file.layout.pool_namespace dir/file | grep -q dirns + +setfattr -x ceph.dir.layout.pool_namespace dir +getfattr -n ceph.dir.layout dir | grep -q -v pool_namespace=dirns + +setfattr -x ceph.dir.layout dir +getfattr -n ceph.dir.layout dir 2>&1 | grep -q 'No such attribute' + +echo OK + diff --git a/qa/workunits/fs/misc/mkpool_layout_vxattrs.sh b/qa/workunits/fs/misc/mkpool_layout_vxattrs.sh new file mode 100755 index 000000000..6b2fecbc0 --- /dev/null +++ b/qa/workunits/fs/misc/mkpool_layout_vxattrs.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +set -e + +touch foo.$$ +ceph osd pool create foo.$$ 8 +ceph fs add_data_pool cephfs foo.$$ +setfattr -n ceph.file.layout.pool -v foo.$$ foo.$$ + +# cleanup +rm foo.$$ +ceph fs rm_data_pool cephfs foo.$$ +ceph osd pool rm foo.$$ foo.$$ --yes-i-really-really-mean-it + +echo OK diff --git a/qa/workunits/fs/misc/multiple_rsync.sh b/qa/workunits/fs/misc/multiple_rsync.sh new file mode 100755 index 000000000..4397c1e7f --- /dev/null +++ b/qa/workunits/fs/misc/multiple_rsync.sh @@ -0,0 +1,25 @@ +#!/bin/sh -ex + + +# Populate with some arbitrary files from the local system. Take +# a copy to protect against false fails from system updates during test. +export PAYLOAD=/tmp/multiple_rsync_payload.$$ +sudo cp -r /usr/lib/ $PAYLOAD + +set -e + +sudo rsync -av $PAYLOAD payload.1 +sudo rsync -av $PAYLOAD payload.2 + +# this shouldn't transfer any additional files +echo we should get 4 here if no additional files are transferred +sudo rsync -auv $PAYLOAD payload.1 | tee /tmp/$$ +hexdump -C /tmp/$$ +wc -l /tmp/$$ | grep 4 +sudo rsync -auv $PAYLOAD payload.2 | tee /tmp/$$ +hexdump -C /tmp/$$ +wc -l /tmp/$$ | grep 4 +echo OK + +rm /tmp/$$ +sudo rm -rf $PAYLOAD diff --git a/qa/workunits/fs/misc/rstats.sh b/qa/workunits/fs/misc/rstats.sh new file mode 100755 index 000000000..e6b3eddf2 --- /dev/null +++ b/qa/workunits/fs/misc/rstats.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash + +set -x + +timeout=30 +old_value="" +new_value="" + +wait_until_changed() { + name=$1 + wait=0 + while [ $wait -lt $timeout ]; do + new_value=`getfattr --only-value -n ceph.dir.$name .` + [ $new_value == $old_value ] || return 0 + sleep 1 + wait=$(($wait + 1)) + done + return 1 +} + +check_rctime() { + old_sec=$(echo $old_value | cut -d. -f1) + old_nsec=$(echo $old_value | cut -d. -f2) + new_sec=$(echo $new_value | cut -d. -f1) + new_nsec=$(echo $new_value | cut -d. -f2) + [ "$old_sec" -lt "$new_sec" ] && return 0 + [ "$old_sec" -gt "$new_sec" ] && return 1 + [ "$old_nsec" -lt "$new_nsec" ] && return 0 + return 1 +} + +# sync(3) does not make ceph-fuse flush dirty caps, because fuse kernel module +# does not notify ceph-fuse about it. Use fsync(3) instead. +fsync_path() { + cmd="import os; fd=os.open(\"$1\", os.O_RDONLY); os.fsync(fd); os.close(fd)" + python3 -c "$cmd" +} + +set -e + +mkdir -p rstats_testdir/d1/d2 +cd rstats_testdir + +# rfiles +old_value=`getfattr --only-value -n ceph.dir.rfiles .` +[ $old_value == 0 ] || false +touch d1/d2/f1 +wait_until_changed rfiles +[ $new_value == $(($old_value + 1)) ] || false + +# rsubdirs +old_value=`getfattr --only-value -n ceph.dir.rsubdirs .` +[ $old_value == 3 ] || false +mkdir d1/d2/d3 +wait_until_changed rsubdirs +[ $new_value == $(($old_value + 1)) ] || false + +# rbytes +old_value=`getfattr --only-value -n ceph.dir.rbytes .` +[ $old_value == 0 ] || false +echo hello > d1/d2/f2 +fsync_path d1/d2/f2 +wait_until_changed rbytes +[ $new_value == $(($old_value + 6)) ] || false + +#rctime +old_value=`getfattr --only-value -n ceph.dir.rctime .` +touch d1/d2/d3 # touch existing file +fsync_path d1/d2/d3 +wait_until_changed rctime +check_rctime + +old_value=`getfattr --only-value -n ceph.dir.rctime .` +touch d1/d2/f3 # create new file +wait_until_changed rctime +check_rctime + +cd .. +rm -rf rstats_testdir +echo OK diff --git a/qa/workunits/fs/misc/trivial_sync.sh b/qa/workunits/fs/misc/trivial_sync.sh new file mode 100755 index 000000000..7c8c4e2b4 --- /dev/null +++ b/qa/workunits/fs/misc/trivial_sync.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +set -e + +mkdir foo +echo foo > bar +sync diff --git a/qa/workunits/fs/misc/xattrs.sh b/qa/workunits/fs/misc/xattrs.sh new file mode 100755 index 000000000..fcd94d22c --- /dev/null +++ b/qa/workunits/fs/misc/xattrs.sh @@ -0,0 +1,14 @@ +#!/bin/sh -x + +set -e + +touch file + +setfattr -n user.foo -v foo file +setfattr -n user.bar -v bar file +setfattr -n user.empty file +getfattr -d file | grep foo +getfattr -d file | grep bar +getfattr -d file | grep empty + +echo OK. diff --git a/qa/workunits/fs/multiclient_sync_read_eof.py b/qa/workunits/fs/multiclient_sync_read_eof.py new file mode 100755 index 000000000..15ecbb825 --- /dev/null +++ b/qa/workunits/fs/multiclient_sync_read_eof.py @@ -0,0 +1,42 @@ +#!/usr/bin/python3 + +import argparse +import os + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('mnt1') + parser.add_argument('mnt2') + parser.add_argument('fn') + args = parser.parse_args() + + open(os.path.join(args.mnt1, args.fn), 'w') + f1 = open(os.path.join(args.mnt1, args.fn), 'r+') + f2 = open(os.path.join(args.mnt2, args.fn), 'r+') + + f1.write('foo') + f1.flush() + a = f2.read(3) + print('got "%s"' % a) + assert a == 'foo' + f2.write('bar') + f2.flush() + a = f1.read(3) + print('got "%s"' % a) + assert a == 'bar' + + ## test short reads + f1.write('short') + f1.flush() + a = f2.read(100) + print('got "%s"' % a) + assert a == 'short' + f2.write('longer') + f2.flush() + a = f1.read(1000) + print('got "%s"' % a) + assert a == 'longer' + + print('ok') + +main() diff --git a/qa/workunits/fs/norstats/kernel_untar_tar.sh b/qa/workunits/fs/norstats/kernel_untar_tar.sh new file mode 100755 index 000000000..6a175dcd9 --- /dev/null +++ b/qa/workunits/fs/norstats/kernel_untar_tar.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# check if there is file changed while being archived + +set -e + +KERNEL=linux-4.0.5 + +wget -q http://download.ceph.com/qa/$KERNEL.tar.xz + +mkdir untar_tar +cd untar_tar + +tar Jxvf ../$KERNEL.tar.xz $KERNEL/Documentation/ +tar cf doc.tar $KERNEL + +tar xf doc.tar +sync +tar c $KERNEL >/dev/null + +rm -rf $KERNEL + +tar xf doc.tar +sync +tar c $KERNEL >/dev/null + +echo Ok diff --git a/qa/workunits/fs/quota/quota.sh b/qa/workunits/fs/quota/quota.sh new file mode 100755 index 000000000..1315be6d8 --- /dev/null +++ b/qa/workunits/fs/quota/quota.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash + +set -ex + +function expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +function write_file() +{ + set +x + for ((i=1;i<=$2;i++)) + do + dd if=/dev/zero of=$1 bs=1M count=1 conv=notrunc oflag=append 2>/dev/null >/dev/null + if [ $? != 0 ]; then + echo Try to write $(($i * 1048576)) + set -x + return 1 + fi + sleep 0.05 + done + set -x + return 0 +} + +mkdir quota-test +cd quota-test + +# bytes +setfattr . -n ceph.quota.max_bytes -v 100000000 # 100m +expect_false write_file big 1000 # 1g +expect_false write_file second 10 +setfattr . -n ceph.quota.max_bytes -v 0 +dd if=/dev/zero of=third bs=1M count=10 +dd if=/dev/zero of=big2 bs=1M count=100 + + +rm -rf * + +# files +setfattr . -n ceph.quota.max_files -v 5 +mkdir ok +touch ok/1 +touch ok/2 +touch 3 +expect_false touch shouldbefail # 5 files will include the "." +expect_false touch ok/shouldbefail # 5 files will include the "." +setfattr . -n ceph.quota.max_files -v 0 +touch shouldbecreated +touch shouldbecreated2 + + +rm -rf * + +# mix +mkdir bytes bytes/files + +setfattr bytes -n ceph.quota.max_bytes -v 10000000 #10m +setfattr bytes/files -n ceph.quota.max_files -v 5 +dd if=/dev/zero of=bytes/files/1 bs=1M count=4 +dd if=/dev/zero of=bytes/files/2 bs=1M count=4 +expect_false write_file bytes/files/3 1000 +expect_false write_file bytes/files/4 1000 +expect_false write_file bytes/files/5 1000 +stat --printf="%n %s\n" bytes/files/1 #4M +stat --printf="%n %s\n" bytes/files/2 #4M +stat --printf="%n %s\n" bytes/files/3 #bigger than 2M +stat --printf="%n %s\n" bytes/files/4 #should be zero +expect_false stat bytes/files/5 #shouldn't be exist + + + + +rm -rf * + +#mv +mkdir files limit +truncate files/file -s 10G +setfattr limit -n ceph.quota.max_bytes -v 1000000 #1m +expect_false mv files limit/ + + + +rm -rf * + +#limit by ancestor + +mkdir -p ancestor/p1/p2/parent/p3 +setfattr ancestor -n ceph.quota.max_bytes -v 1000000 +setfattr ancestor/p1/p2/parent -n ceph.quota.max_bytes -v 1000000000 #1g +expect_false write_file ancestor/p1/p2/parent/p3/file1 900 #900m +stat --printf="%n %s\n" ancestor/p1/p2/parent/p3/file1 + + +#get/set attribute + +setfattr -n ceph.quota.max_bytes -v 0 . +setfattr -n ceph.quota.max_bytes -v 1 . +setfattr -n ceph.quota.max_bytes -v 9223372036854775807 . +expect_false setfattr -n ceph.quota.max_bytes -v 9223372036854775808 . +expect_false setfattr -n ceph.quota.max_bytes -v -1 . +expect_false setfattr -n ceph.quota.max_bytes -v -9223372036854775808 . +expect_false setfattr -n ceph.quota.max_bytes -v -9223372036854775809 . + +setfattr -n ceph.quota.max_files -v 0 . +setfattr -n ceph.quota.max_files -v 1 . +setfattr -n ceph.quota.max_files -v 9223372036854775807 . +expect_false setfattr -n ceph.quota.max_files -v 9223372036854775808 . +expect_false setfattr -n ceph.quota.max_files -v -1 . +expect_false setfattr -n ceph.quota.max_files -v -9223372036854775808 . +expect_false setfattr -n ceph.quota.max_files -v -9223372036854775809 . + +setfattr -n ceph.quota -v "max_bytes=0 max_files=0" . +setfattr -n ceph.quota -v "max_bytes=1 max_files=0" . +setfattr -n ceph.quota -v "max_bytes=0 max_files=1" . +setfattr -n ceph.quota -v "max_bytes=1 max_files=1" . +expect_false setfattr -n ceph.quota -v "max_bytes=-1 max_files=0" . +expect_false setfattr -n ceph.quota -v "max_bytes=0 max_files=-1" . +expect_false setfattr -n ceph.quota -v "max_bytes=-1 max_files=-1" . + +#addme + +cd .. +rm -rf quota-test + +echo OK diff --git a/qa/workunits/fs/snap-hierarchy.sh b/qa/workunits/fs/snap-hierarchy.sh new file mode 100755 index 000000000..67f0e014b --- /dev/null +++ b/qa/workunits/fs/snap-hierarchy.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +set -ex + +if [ -d "$1" ]; then + mkdir -p -- "$1" && cd "$1" +fi + +[ "$VERIFY" != verify ] && mkdir 1 +[ "$VERIFY" != verify ] && mkdir 1/.snap/first +stat 1/.snap/first +[ "$VERIFY" != verify ] && mkdir 1/2 +stat 1/.snap/first/2 && exit 1 +[ "$VERIFY" != verify ] && mkdir 1/2/.snap/second +stat 1/2/.snap/second +[ "$VERIFY" != verify ] && touch 1/foo +stat 1/.snap/first/foo && exit 1 +[ "$VERIFY" != verify ] && mkdir 1/.snap/third +stat 1/.snap/third/foo || exit 1 +[ "$VERIFY" != verify ] && mkdir 1/2/3 +[ "$VERIFY" != verify ] && mkdir 1/2/.snap/fourth +stat 1/2/.snap/fourth/3 + +exit 0 diff --git a/qa/workunits/fs/snaps/snap-rm-diff.sh b/qa/workunits/fs/snaps/snap-rm-diff.sh new file mode 100755 index 000000000..30ffa9113 --- /dev/null +++ b/qa/workunits/fs/snaps/snap-rm-diff.sh @@ -0,0 +1,10 @@ +#!/bin/sh -ex + +wget -q http://download.ceph.com/qa/linux-2.6.33.tar.bz2 +mkdir foo +cp linux* foo +mkdir foo/.snap/barsnap +rm foo/linux* +diff -q foo/.snap/barsnap/linux* linux* && echo "passed: files are identical" +rmdir foo/.snap/barsnap +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-1.sh b/qa/workunits/fs/snaps/snaptest-1.sh new file mode 100755 index 000000000..431e83387 --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-1.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -ex + +echo 1 > file1 +echo 2 > file2 +echo 3 > file3 +[ -e file4 ] && rm file4 +mkdir .snap/snap1 +echo 4 > file4 +now=`ls` +then=`ls .snap/snap1` +rmdir .snap/snap1 +if [ "$now" = "$then" ]; then + echo live and snap contents are identical? + false +fi + +# do it again +echo 1 > file1 +echo 2 > file2 +echo 3 > file3 +mkdir .snap/snap1 +echo 4 > file4 +rmdir .snap/snap1 + +rm file? + +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-2.sh b/qa/workunits/fs/snaps/snaptest-2.sh new file mode 100755 index 000000000..11fe9316a --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-2.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +echo "Create dir 100 to 199 ..." +for i in $(seq 100 199); do + echo " create dir $i" + mkdir "$i" + for y in $(seq 10 20); do + echo "This is a test file before any snapshot was taken." >"$i/$y" + done +done + +echo "Take first snapshot .snap/test1" +mkdir .snap/test1 + +echo "Create dir 200 to 299 ..." +for i in $(seq 200 299); do + echo " create dir $i" + mkdir $i + for y in $(seq 20 29); do + echo "This is a test file. Created after .snap/test1" >"$i/$y" + done +done + +echo "Create a snapshot in every first level dir ..." +for dir in $(ls); do + echo " create $dir/.snap/snap-subdir-test" + mkdir "$dir/.snap/snap-subdir-test" + for y in $(seq 30 39); do + echo " create $dir/$y file after the snapshot" + echo "This is a test file. Created after $dir/.snap/snap-subdir-test" >"$dir/$y" + done +done + +echo "Take second snapshot .snap/test2" +mkdir .snap/test2 + +echo "Copy content of .snap/test1 to copyofsnap1 ..." +mkdir copyofsnap1 +cp -Rv .snap/test1 copyofsnap1/ + + +echo "Take third snapshot .snap/test3" +mkdir .snap/test3 + +echo "Delete the snapshots..." + +find ./ -type d -print | \ + xargs -I% -n1 find %/.snap -mindepth 1 -maxdepth 1 \ + \( ! -name "_*" \) -print 2>/dev/null + +find ./ -type d -print | \ + xargs -I% -n1 find %/.snap -mindepth 1 -maxdepth 1 \ + \( ! -name "_*" \) -print 2>/dev/null | \ + xargs -n1 rmdir + +echo "Delete all the files and directories ..." +rm -Rfv ./* + +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-authwb.sh b/qa/workunits/fs/snaps/snaptest-authwb.sh new file mode 100755 index 000000000..965ee8512 --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-authwb.sh @@ -0,0 +1,12 @@ +#!/bin/sh -x + +set -e + +touch foo +chmod +x foo +mkdir .snap/s +find .snap/s/foo -executable | grep foo +rmdir .snap/s +rm foo + +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-capwb.sh b/qa/workunits/fs/snaps/snaptest-capwb.sh new file mode 100755 index 000000000..d26f324b6 --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-capwb.sh @@ -0,0 +1,33 @@ +#!/bin/sh -x + +set -e + +mkdir foo + +# make sure mds handles it when the client does not send flushsnap +echo x > foo/x +sync +mkdir foo/.snap/ss +ln foo/x foo/xx +cat foo/.snap/ss/x +rmdir foo/.snap/ss + +# +echo a > foo/a +echo b > foo/b +mkdir foo/.snap/s +r=`cat foo/.snap/s/a` +[ -z "$r" ] && echo "a appears empty in snapshot" && false + +ln foo/b foo/b2 +cat foo/.snap/s/b + +echo "this used to hang:" +echo more >> foo/b2 +echo "oh, it didn't hang! good job." +cat foo/b +rmdir foo/.snap/s + +rm -r foo + +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-dir-rename.sh b/qa/workunits/fs/snaps/snaptest-dir-rename.sh new file mode 100755 index 000000000..3bbd9a11e --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-dir-rename.sh @@ -0,0 +1,17 @@ +#!/bin/sh -x + +set -e + +# +# make sure we keep an existing dn's seq +# + +mkdir a +mkdir .snap/bar +mkdir a/.snap/foo +rmdir a/.snap/foo +rmdir a +stat .snap/bar/a +rmdir .snap/bar + +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-double-null.sh b/qa/workunits/fs/snaps/snaptest-double-null.sh new file mode 100755 index 000000000..cdf32e4f0 --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-double-null.sh @@ -0,0 +1,23 @@ +#!/bin/sh -x + +set -e + +# multiple intervening snapshots with no modifications, and thus no +# snapflush client_caps messages. make sure the mds can handle this. + +for f in `seq 1 20` ; do + +mkdir a +cat > a/foo & +mkdir a/.snap/one +mkdir a/.snap/two +chmod 777 a/foo +sync # this might crash the mds +ps +rmdir a/.snap/* +rm a/foo +rmdir a + +done + +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-estale.sh b/qa/workunits/fs/snaps/snaptest-estale.sh new file mode 100755 index 000000000..a4fb94368 --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-estale.sh @@ -0,0 +1,13 @@ +#!/bin/sh -x + +mkdir .snap/foo + +echo "We want ENOENT, not ESTALE, here." +for f in `seq 1 100` +do + stat .snap/foo/$f 2>&1 | grep 'No such file' +done + +rmdir .snap/foo + +echo "OK" diff --git a/qa/workunits/fs/snaps/snaptest-git-ceph.sh b/qa/workunits/fs/snaps/snaptest-git-ceph.sh new file mode 100755 index 000000000..12c1f0fdc --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-git-ceph.sh @@ -0,0 +1,52 @@ +#!/bin/sh -x + +set -e + +# try it again if the clone is slow and the second time +retried=false +trap -- 'retry' EXIT +retry() { + rm -rf ceph + # double the timeout value + timeout 3600 git clone https://git.ceph.com/ceph.git +} +rm -rf ceph +timeout 1800 git clone https://git.ceph.com/ceph.git +trap - EXIT +cd ceph + +versions=`seq 1 90` + +for v in $versions +do + if [ $v -eq 48 ]; then + continue + fi + ver="v0.$v" + echo $ver + git reset --hard $ver + mkdir .snap/$ver +done + +for v in $versions +do + if [ $v -eq 48 ]; then + continue + fi + ver="v0.$v" + echo checking $ver + cd .snap/$ver + git diff --exit-code + cd ../.. +done + +for v in $versions +do + if [ $v -eq 48 ]; then + continue + fi + ver="v0.$v" + rmdir .snap/$ver +done + +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-hardlink.sh b/qa/workunits/fs/snaps/snaptest-hardlink.sh new file mode 100755 index 000000000..90f3583b1 --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-hardlink.sh @@ -0,0 +1,25 @@ +#!/bin/sh -x + +set -e + +mkdir 1 2 +echo asdf >1/file1 +echo asdf >1/file2 + +ln 1/file1 2/file1 +ln 1/file2 2/file2 + +mkdir 2/.snap/s1 + +echo qwer >1/file1 +grep asdf 2/.snap/s1/file1 + +rm -f 1/file2 +grep asdf 2/.snap/s1/file2 +rm -f 2/file2 +grep asdf 2/.snap/s1/file2 + +rmdir 2/.snap/s1 +rm -rf 1 2 + +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-intodir.sh b/qa/workunits/fs/snaps/snaptest-intodir.sh new file mode 100755 index 000000000..d6a220f73 --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-intodir.sh @@ -0,0 +1,22 @@ +#!/bin/sh -ex + +# this tests fix for #1399 +mkdir foo +mkdir foo/.snap/one +touch bar +mv bar foo +sync +# should not crash :) + +mkdir baz +mkdir baz/.snap/two +mv baz foo +sync +# should not crash :) + +# clean up. +rmdir foo/baz/.snap/two +rmdir foo/.snap/one +rm -r foo + +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-multiple-capsnaps.sh b/qa/workunits/fs/snaps/snaptest-multiple-capsnaps.sh new file mode 100755 index 000000000..5ebc852cf --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-multiple-capsnaps.sh @@ -0,0 +1,42 @@ +#!/bin/sh -x + +set -e + +echo asdf > a +mkdir .snap/1 +chmod 777 a +mkdir .snap/2 +echo qwer > a +mkdir .snap/3 +chmod 666 a +mkdir .snap/4 +echo zxcv > a +mkdir .snap/5 + +ls -al .snap/?/a + +grep asdf .snap/1/a +stat .snap/1/a | grep 'Size: 5' + +grep asdf .snap/2/a +stat .snap/2/a | grep 'Size: 5' +stat .snap/2/a | grep -- '-rwxrwxrwx' + +grep qwer .snap/3/a +stat .snap/3/a | grep 'Size: 5' +stat .snap/3/a | grep -- '-rwxrwxrwx' + +grep qwer .snap/4/a +stat .snap/4/a | grep 'Size: 5' +stat .snap/4/a | grep -- '-rw-rw-rw-' + +grep zxcv .snap/5/a +stat .snap/5/a | grep 'Size: 5' +stat .snap/5/a | grep -- '-rw-rw-rw-' + +rmdir .snap/[12345] + +echo "OK" + + + diff --git a/qa/workunits/fs/snaps/snaptest-name-limits.sh b/qa/workunits/fs/snaps/snaptest-name-limits.sh new file mode 100755 index 000000000..f40d0231e --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-name-limits.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# This tests snapshot names limits: names have to be < 240 chars +# + +function cleanup () +{ + rmdir d1/.snap/* + rm -rf d1 +} + +function fail () +{ + echo $@ + cleanup + exit 1 +} + +mkdir d1 + +longname=$(printf "%.241d" 2) +mkdir d1/.snap/$longname 2> /dev/null +[ -d d1/.snap/$longname ] && fail "Invalid snapshot exists: $longname" + +cleanup + +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-parents.sh b/qa/workunits/fs/snaps/snaptest-parents.sh new file mode 100755 index 000000000..7ab1ba7cf --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-parents.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +set -e + +echo "making directory tree and files" +mkdir -p 1/a/b/c/ +echo "i'm file1" > 1/a/file1 +echo "i'm file2" > 1/a/b/file2 +echo "i'm file3" > 1/a/b/c/file3 +echo "snapshotting" +mkdir 1/.snap/foosnap1 +mkdir 2 +echo "moving tree" +mv 1/a 2 +echo "checking snapshot contains tree..." +dir1=`find 1/.snap/foosnap1 | wc -w` +dir2=`find 2/ | wc -w` +#diff $dir1 $dir2 && echo "Success!" +test $dir1==$dir2 && echo "Success!" +echo "adding folder and file to tree..." +mkdir 2/a/b/c/d +echo "i'm file 4!" > 2/a/b/c/d/file4 +echo "snapshotting tree 2" +mkdir 2/.snap/barsnap2 +echo "comparing snapshots" +dir1=`find 1/.snap/foosnap1/ -maxdepth 2 | wc -w` +dir2=`find 2/.snap/barsnap2/ -maxdepth 2 | wc -w` +#diff $dir1 $dir2 && echo "Success!" +test $dir1==$dir2 && echo "Success!" +echo "moving subtree to first folder" +mv 2/a/b/c 1 +echo "comparing snapshots and new tree" +dir1=`find 1/ | wc -w` +dir2=`find 2/.snap/barsnap2/a/b/c | wc -w` +#diff $dir1 $dir2 && echo "Success!" +test $dir1==$dir2 && echo "Success!" +rmdir 1/.snap/* +rmdir 2/.snap/* +echo "OK" diff --git a/qa/workunits/fs/snaps/snaptest-realm-split.sh b/qa/workunits/fs/snaps/snaptest-realm-split.sh new file mode 100755 index 000000000..300cca21d --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-realm-split.sh @@ -0,0 +1,31 @@ +#!/bin/sh -x + +set -e + +mkdir -p 1/a +exec 3<> 1/a/file1 + +echo -n a >&3 + +mkdir 1/.snap/s1 + +echo -n b >&3 + +mkdir 2 +# create new snaprealm at dir a, file1's cap should be attached to the new snaprealm +mv 1/a 2 + +mkdir 2/.snap/s2 + +echo -n c >&3 + +exec 3>&- + +grep '^a$' 1/.snap/s1/a/file1 +grep '^ab$' 2/.snap/s2/a/file1 +grep '^abc$' 2/a/file1 + +rmdir 1/.snap/s1 +rmdir 2/.snap/s2 +rm -rf 1 2 +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-snap-rename.sh b/qa/workunits/fs/snaps/snaptest-snap-rename.sh new file mode 100755 index 000000000..aa7325b92 --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-snap-rename.sh @@ -0,0 +1,33 @@ +#!/bin/sh -x + +expect_failure() { + if "$@"; then return 1; else return 0; fi +} +set -e + +mkdir -p d1/d2 +mkdir -p d1/d3 +mkdir d1/.snap/foo +mkdir d1/d2/.snap/foo +mkdir d1/d3/.snap/foo +mkdir d1/d3/.snap/bar +mv d1/d2/.snap/foo d1/d2/.snap/bar +# snapshot name can't start with _ +expect_failure mv d1/d2/.snap/bar d1/d2/.snap/_bar +# can't rename parent snapshot +expect_failure mv d1/d2/.snap/_foo_* d1/d2/.snap/foo +expect_failure mv d1/d2/.snap/_foo_* d1/d2/.snap/_foo_1 +# can't rename snapshot to different directroy +expect_failure mv d1/d2/.snap/bar d1/.snap/ +# can't overwrite existing snapshot +expect_failure python3 -c "import os; os.rename('d1/d3/.snap/foo', 'd1/d3/.snap/bar')" +# can't move snaphost out of snapdir +expect_failure python3 -c "import os; os.rename('d1/.snap/foo', 'd1/foo')" + +rmdir d1/.snap/foo +rmdir d1/d2/.snap/bar +rmdir d1/d3/.snap/foo +rmdir d1/d3/.snap/bar +rm -rf d1 + +echo OK diff --git a/qa/workunits/fs/snaps/snaptest-snap-rm-cmp.sh b/qa/workunits/fs/snaps/snaptest-snap-rm-cmp.sh new file mode 100755 index 000000000..88a0e8ae5 --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-snap-rm-cmp.sh @@ -0,0 +1,24 @@ +#!/bin/sh -x + +set -e + +file=linux-2.6.33.tar.bz2 +wget -q http://download.ceph.com/qa/$file + +real=`md5sum $file | awk '{print $1}'` + +for f in `seq 1 20` +do + echo $f + cp $file a + mkdir .snap/s + rm a + cp .snap/s/a /tmp/a + cur=`md5sum /tmp/a | awk '{print $1}'` + if [ "$cur" != "$real" ]; then + echo "FAIL: bad match, /tmp/a $cur != real $real" + false + fi + rmdir .snap/s +done +rm $file diff --git a/qa/workunits/fs/snaps/snaptest-upchildrealms.sh b/qa/workunits/fs/snaps/snaptest-upchildrealms.sh new file mode 100755 index 000000000..4e531a966 --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-upchildrealms.sh @@ -0,0 +1,28 @@ +#!/bin/sh -x + +set -e + +# +# verify that a snap update on a parent realm will induce +# snap cap writeback for inodes child realms +# + +mkdir a +mkdir a/b +mkdir a/.snap/a1 +mkdir a/b/.snap/b1 +echo asdf > a/b/foo +mkdir a/.snap/a2 +# client _should_ have just queued a capsnap for writeback +ln a/b/foo a/b/bar # make the server cow the inode + +echo "this should not hang..." +cat a/b/.snap/_a2_*/foo +echo "good, it did not hang." + +rmdir a/b/.snap/b1 +rmdir a/.snap/a1 +rmdir a/.snap/a2 +rm -r a + +echo "OK" diff --git a/qa/workunits/fs/snaps/snaptest-xattrwb.sh b/qa/workunits/fs/snaps/snaptest-xattrwb.sh new file mode 100755 index 000000000..e503aed77 --- /dev/null +++ b/qa/workunits/fs/snaps/snaptest-xattrwb.sh @@ -0,0 +1,29 @@ +#!/bin/sh -x + +set -e + +echo "testing simple xattr wb" +touch x +setfattr -n user.foo x +mkdir .snap/s1 +getfattr -n user.foo .snap/s1/x | grep user.foo +rm x +rmdir .snap/s1 + +echo "testing wb with pre-wb server cow" +mkdir a +mkdir a/b +mkdir a/b/c +# b now has As but not Ax +setfattr -n user.foo a/b +mkdir a/.snap/s +mkdir a/b/cc +# b now has been cowed on the server, but we still have dirty xattr caps +getfattr -n user.foo a/b # there they are... +getfattr -n user.foo a/.snap/s/b | grep user.foo # should be there, too! + +# ok, clean up +rmdir a/.snap/s +rm -r a + +echo OK diff --git a/qa/workunits/fs/snaps/untar_snap_rm.sh b/qa/workunits/fs/snaps/untar_snap_rm.sh new file mode 100755 index 000000000..8a8412e66 --- /dev/null +++ b/qa/workunits/fs/snaps/untar_snap_rm.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +set -e + +do_tarball() { + wget http://download.ceph.com/qa/$1 + tar xvf$2 $1 + mkdir .snap/k + sync + rm -rv $3 + cp -av .snap/k . + rmdir .snap/k + rm -rv k + rm $1 +} + +do_tarball coreutils_8.5.orig.tar.gz z coreutils-8.5 +do_tarball linux-2.6.33.tar.bz2 j linux-2.6.33 diff --git a/qa/workunits/fs/test_o_trunc.c b/qa/workunits/fs/test_o_trunc.c new file mode 100644 index 000000000..1ce19e4bb --- /dev/null +++ b/qa/workunits/fs/test_o_trunc.c @@ -0,0 +1,45 @@ +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <stdlib.h> + +int main(int argc, char *argv[]) +{ + char obuf[32], ibuf[1024]; + int n, max = 0; + + if (argc > 2) + max = atoi(argv[2]); + if (!max) + max = 600; + + memset(obuf, 0xff, sizeof(obuf)); + + for (n = 1; n <= max; ++n) { + int fd, ret; + fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0644); + printf("%d/%d: open fd = %d\n", n, max, fd); + + ret = write(fd, obuf, sizeof(obuf)); + printf("write ret = %d\n", ret); + + sleep(1); + + ret = write(fd, obuf, sizeof(obuf)); + printf("write ret = %d\n", ret); + + ret = pread(fd, ibuf, sizeof(ibuf), 0); + printf("pread ret = %d\n", ret); + + if (memcmp(obuf, ibuf, sizeof(obuf))) { + printf("mismatch\n"); + close(fd); + break; + } + close(fd); + } + return 0; +} diff --git a/qa/workunits/fs/test_o_trunc.sh b/qa/workunits/fs/test_o_trunc.sh new file mode 100755 index 000000000..90a72600d --- /dev/null +++ b/qa/workunits/fs/test_o_trunc.sh @@ -0,0 +1,7 @@ +#!/bin/sh -ex + +mydir=`dirname $0` +$mydir/test_o_trunc trunc.foo 600 + +echo OK + diff --git a/qa/workunits/fs/test_python.sh b/qa/workunits/fs/test_python.sh new file mode 100755 index 000000000..6e39b95a4 --- /dev/null +++ b/qa/workunits/fs/test_python.sh @@ -0,0 +1,6 @@ +#!/bin/sh -ex + +# Running as root because the filesystem root directory will be +# owned by uid 0, and that's where we're writing. +sudo python3 -m pytest -v $(dirname $0)/../../../src/test/pybind/test_cephfs.py +exit 0 diff --git a/qa/workunits/hadoop/repl.sh b/qa/workunits/hadoop/repl.sh new file mode 100755 index 000000000..84f6150ab --- /dev/null +++ b/qa/workunits/hadoop/repl.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -e +set -x + +# bail if $TESTDIR is not set as this test will fail in that scenario +[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; } + +# if HADOOP_PREFIX is not set, use default +[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; } + +# create pools with different replication factors +for repl in 2 3 7 8 9; do + name=hadoop.$repl + ceph osd pool create $name 8 8 + ceph osd pool set $name size $repl + + id=`ceph osd dump | sed -n "s/^pool \([0-9]*\) '$name'.*/\1/p"` + ceph fs add_data_pool cephfs $id +done + +# create a file in each of the pools +for repl in 2 3 7 8 9; do + name=hadoop.$repl + $HADOOP_PREFIX/bin/hadoop fs -rm -f /$name.dat + dd if=/dev/zero bs=1048576 count=1 | \ + $HADOOP_PREFIX/bin/hadoop fs -Dceph.data.pools="$name" \ + -put - /$name.dat +done + +# check that hadoop reports replication matching +# that of the pool the file was written into +for repl in 2 3 7 8 9; do + name=hadoop.$repl + repl2=$($HADOOP_PREFIX/bin/hadoop fs -ls /$name.dat | awk '{print $2}') + if [ $repl -ne $repl2 ]; then + echo "replication factors didn't match!" + exit 1 + fi +done + +exit 0 diff --git a/qa/workunits/hadoop/terasort.sh b/qa/workunits/hadoop/terasort.sh new file mode 100755 index 000000000..3d6988a21 --- /dev/null +++ b/qa/workunits/hadoop/terasort.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +set -e +set -x + +INPUT=/terasort-input +OUTPUT=/terasort-output +REPORT=/tersort-report + +num_records=100000 +[ ! -z $NUM_RECORDS ] && num_records=$NUM_RECORDS + +# bail if $TESTDIR is not set as this test will fail in that scenario +[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; } + +# if HADOOP_PREFIX is not set, use default +[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; } + +# Nuke hadoop directories +$HADOOP_PREFIX/bin/hadoop fs -rm -r $INPUT $OUTPUT $REPORT || true + +# Generate terasort data +# +#-Ddfs.blocksize=512M \ +#-Dio.file.buffer.size=131072 \ +#-Dmapreduce.map.java.opts=-Xmx1536m \ +#-Dmapreduce.map.memory.mb=2048 \ +#-Dmapreduce.task.io.sort.mb=256 \ +#-Dyarn.app.mapreduce.am.resource.mb=1024 \ +#-Dmapred.map.tasks=64 \ +$HADOOP_PREFIX/bin/hadoop jar \ + $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \ + teragen \ + -Dmapred.map.tasks=9 \ + $num_records \ + $INPUT + +# Run the sort job +# +#-Ddfs.blocksize=512M \ +#-Dio.file.buffer.size=131072 \ +#-Dmapreduce.map.java.opts=-Xmx1536m \ +#-Dmapreduce.map.memory.mb=2048 \ +#-Dmapreduce.map.output.compress=true \ +#-Dmapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.Lz4Codec \ +#-Dmapreduce.reduce.java.opts=-Xmx1536m \ +#-Dmapreduce.reduce.memory.mb=2048 \ +#-Dmapreduce.task.io.sort.factor=100 \ +#-Dmapreduce.task.io.sort.mb=768 \ +#-Dyarn.app.mapreduce.am.resource.mb=1024 \ +#-Dmapred.reduce.tasks=100 \ +#-Dmapreduce.terasort.output.replication=1 \ +$HADOOP_PREFIX/bin/hadoop jar \ + $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \ + terasort \ + -Dmapred.reduce.tasks=10 \ + $INPUT $OUTPUT + +# Validate the sorted data +# +#-Ddfs.blocksize=512M \ +#-Dio.file.buffer.size=131072 \ +#-Dmapreduce.map.java.opts=-Xmx1536m \ +#-Dmapreduce.map.memory.mb=2048 \ +#-Dmapreduce.reduce.java.opts=-Xmx1536m \ +#-Dmapreduce.reduce.memory.mb=2048 \ +#-Dmapreduce.task.io.sort.mb=256 \ +#-Dyarn.app.mapreduce.am.resource.mb=1024 \ +#-Dmapred.reduce.tasks=1 \ +$HADOOP_PREFIX/bin/hadoop jar \ + $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \ + teravalidate \ + -Dmapred.reduce.tasks=1 \ + $OUTPUT $REPORT + +exit 0 diff --git a/qa/workunits/hadoop/wordcount.sh b/qa/workunits/hadoop/wordcount.sh new file mode 100755 index 000000000..616b08af2 --- /dev/null +++ b/qa/workunits/hadoop/wordcount.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +set -e +set -x + +WC_INPUT=/wc_input +WC_OUTPUT=/wc_output +DATA_INPUT=$(mktemp -d) + +echo "starting hadoop-wordcount test" + +# bail if $TESTDIR is not set as this test will fail in that scenario +[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; } + +# if HADOOP_PREFIX is not set, use default +[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; } + +# Nuke hadoop directories +$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true + +# Fetch and import testing data set +curl http://download.ceph.com/qa/hadoop_input_files.tar | tar xf - -C $DATA_INPUT +$HADOOP_PREFIX/bin/hadoop fs -copyFromLocal $DATA_INPUT $WC_INPUT +rm -rf $DATA_INPUT + +# Run the job +$HADOOP_PREFIX/bin/hadoop jar \ + $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \ + wordcount $WC_INPUT $WC_OUTPUT + +# Cleanup +$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true + +echo "completed hadoop-wordcount test" +exit 0 diff --git a/qa/workunits/kernel_untar_build.sh b/qa/workunits/kernel_untar_build.sh new file mode 100755 index 000000000..9b60f065c --- /dev/null +++ b/qa/workunits/kernel_untar_build.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -e + +wget -O linux.tar.gz http://download.ceph.com/qa/linux-5.4.tar.gz + +mkdir t +cd t +tar xzf ../linux.tar.gz +cd linux* +make defconfig +make -j`grep -c processor /proc/cpuinfo` +cd .. +if ! rm -rv linux* ; then + echo "uh oh rm -r failed, it left behind:" + find . + exit 1 +fi +cd .. +rm -rv t linux* diff --git a/qa/workunits/libcephfs/test.sh b/qa/workunits/libcephfs/test.sh new file mode 100755 index 000000000..c53fe893c --- /dev/null +++ b/qa/workunits/libcephfs/test.sh @@ -0,0 +1,10 @@ +#!/bin/sh -e + +ceph_test_libcephfs +ceph_test_libcephfs_access +ceph_test_libcephfs_reclaim +ceph_test_libcephfs_lazyio +ceph_test_libcephfs_newops +ceph_test_libcephfs_suidsgid + +exit 0 diff --git a/qa/workunits/mgr/test_localpool.sh b/qa/workunits/mgr/test_localpool.sh new file mode 100755 index 000000000..40a749e8d --- /dev/null +++ b/qa/workunits/mgr/test_localpool.sh @@ -0,0 +1,21 @@ +#!/bin/sh -ex + +ceph config set mgr mgr/localpool/subtree host +ceph config set mgr mgr/localpool/failure_domain osd +ceph mgr module enable localpool + +while ! ceph osd pool ls | grep '^by-host-' +do + sleep 5 +done + +ceph mgr module disable localpool +for p in `ceph osd pool ls | grep '^by-host-'` +do + ceph osd pool rm $p $p --yes-i-really-really-mean-it +done + +ceph config rm mgr mgr/localpool/subtree +ceph config rm mgr mgr/localpool/failure_domain + +echo OK diff --git a/qa/workunits/mgr/test_per_module_finisher.sh b/qa/workunits/mgr/test_per_module_finisher.sh new file mode 100755 index 000000000..dc66bce23 --- /dev/null +++ b/qa/workunits/mgr/test_per_module_finisher.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -ex + +# This testcase tests the per module finisher stats for enabled modules +# using check counter (qa/tasks/check_counter.py). + +# 'balancer' commands +ceph balancer pool ls + +# 'crash' commands +ceph crash ls +ceph crash ls-new + +# 'device' commands +ceph device query-daemon-health-metrics mon.a + +# 'iostat' command +ceph iostat & +pid=$! +sleep 3 +kill -SIGTERM $pid + +# 'pg_autoscaler' command +ceph osd pool autoscale-status + +# 'progress' command +ceph progress +ceph progress json + +# 'status' commands +ceph fs status +ceph osd status + +# 'telemetry' commands +ceph telemetry status +ceph telemetry diff + +echo OK diff --git a/qa/workunits/mon/auth_caps.sh b/qa/workunits/mon/auth_caps.sh new file mode 100755 index 000000000..1f59ae1f7 --- /dev/null +++ b/qa/workunits/mon/auth_caps.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash + +set -e +set -x +declare -A keymap + +combinations="r w x rw rx wx rwx" + +for i in ${combinations}; do + k="foo_$i" + k=`ceph auth get-or-create-key client.$i mon "allow $i"` || exit 1 + keymap["$i"]=$k +done + +# add special caps +keymap["all"]=`ceph auth get-or-create-key client.all mon 'allow *'` || exit 1 + +tmp=`mktemp` +ceph auth export > $tmp + +trap "rm $tmp" INT ERR EXIT QUIT 0 + +expect() { + + set +e + + local expected_ret=$1 + local ret + + shift + cmd=$@ + + eval $cmd + ret=$? + + set -e + + if [[ $ret -ne $expected_ret ]]; then + echo "ERROR: running \'$cmd\': expected $expected_ret got $ret" + return 1 + fi + + return 0 +} + +read_ops() { + local caps=$1 + local has_read=1 has_exec=1 + local ret + local args + + ( echo $caps | grep 'r' ) || has_read=0 + ( echo $caps | grep 'x' ) || has_exec=0 + + if [[ "$caps" == "all" ]]; then + has_read=1 + has_exec=1 + fi + + ret=13 + if [[ $has_read -gt 0 && $has_exec -gt 0 ]]; then + ret=0 + fi + + args="--id $caps --key ${keymap[$caps]}" + + expect $ret ceph auth get client.admin $args + expect $ret ceph auth get-key client.admin $args + expect $ret ceph auth export $args + expect $ret ceph auth export client.admin $args + expect $ret ceph auth ls $args + expect $ret ceph auth print-key client.admin $args + expect $ret ceph auth print_key client.admin $args +} + +write_ops() { + + local caps=$1 + local has_read=1 has_write=1 has_exec=1 + local ret + local args + + ( echo $caps | grep 'r' ) || has_read=0 + ( echo $caps | grep 'w' ) || has_write=0 + ( echo $caps | grep 'x' ) || has_exec=0 + + if [[ "$caps" == "all" ]]; then + has_read=1 + has_write=1 + has_exec=1 + fi + + ret=13 + if [[ $has_read -gt 0 && $has_write -gt 0 && $has_exec -gt 0 ]]; then + ret=0 + fi + + args="--id $caps --key ${keymap[$caps]}" + + expect $ret ceph auth add client.foo $args + expect $ret "ceph auth caps client.foo mon 'allow *' $args" + expect $ret ceph auth get-or-create client.admin $args + expect $ret ceph auth get-or-create-key client.admin $args + expect $ret ceph auth get-or-create-key client.baz $args + expect $ret ceph auth del client.foo $args + expect $ret ceph auth del client.baz $args + expect $ret ceph auth import -i $tmp $args +} + +echo "running combinations: ${!keymap[@]}" + +subcmd=$1 + +for i in ${!keymap[@]}; do + echo "caps: $i" + if [[ -z "$subcmd" || "$subcmd" == "read" || "$subcmd" == "all" ]]; then + read_ops $i + fi + + if [[ -z "$subcmd" || "$subcmd" == "write" || "$subcmd" == "all" ]]; then + write_ops $i + fi +done + +# cleanup +for i in ${combinations} all; do + ceph auth del client.$i || exit 1 +done + +echo "OK" diff --git a/qa/workunits/mon/auth_key_rotation.sh b/qa/workunits/mon/auth_key_rotation.sh new file mode 100755 index 000000000..1a53bab6d --- /dev/null +++ b/qa/workunits/mon/auth_key_rotation.sh @@ -0,0 +1,58 @@ +#!/usr/bin/bash -ex + +function expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + + +ceph auth export +ceph auth rm client.rot + +ceph auth get-or-create client.rot mon 'allow rwx' +ceph auth export client.rot | grep key +ceph auth export client.rot | expect_false grep pending.key + +ceph auth get-or-create-pending client.rot +ceph auth export client.rot | grep key +ceph auth export client.rot | grep pending.key + +ceph auth clear-pending client.rot +ceph auth export client.rot | expect_false grep pending.key + +ceph auth get-or-create-pending client.rot +ceph auth export client.rot | grep key +ceph auth export client.rot | grep pending.key +K=$(ceph auth export client.rot | grep 'key = ' | head -n 1 | awk '{print $3}') +PK=$(ceph auth export client.rot | grep pending.key | awk '{print $4}') +echo "K is $K" +echo "PK is $PK" +ceph -n client.rot --key $K -s + +ceph auth commit-pending client.rot +ceph auth export client.rot | expect_false grep pending.key +ceph auth export client.rot | grep key | grep $PK + +ceph auth get-or-create-pending client.rot +ceph auth export client.rot | grep key +ceph auth export client.rot | grep pending.key +K=$(ceph auth export client.rot | grep 'key = ' | head -n 1 | awk '{print $3}') +PK=$(ceph auth export client.rot | grep pending.key | awk '{print $4}') +echo "2, K is $K" +echo "2, PK is $PK" + +ceph auth export client.rot + +while ceph -n client.rot --key $K -s ; do + ceph auth export client.rot + ceph -n client.rot --key $PK -s + sleep 1 +done + +ceph auth export client.rot | expect_false grep pending.key +ceph auth export client.rot | grep key | grep $PK + +ceph -n client.rot --key $PK -s + +echo ok diff --git a/qa/workunits/mon/caps.py b/qa/workunits/mon/caps.py new file mode 100644 index 000000000..26c0cd14c --- /dev/null +++ b/qa/workunits/mon/caps.py @@ -0,0 +1,359 @@ +#!/usr/bin/python3 + +from __future__ import print_function + +import subprocess +import shlex +import errno +import sys +import os +import io +import re + +from ceph_argparse import * # noqa + +keyring_base = '/tmp/cephtest-caps.keyring' + +class UnexpectedReturn(Exception): + def __init__(self, cmd, ret, expected, msg): + if isinstance(cmd, list): + self.cmd = ' '.join(cmd) + else: + assert isinstance(cmd, str), 'cmd needs to be either a list or a str' + self.cmd = cmd + self.cmd = str(self.cmd) + self.ret = int(ret) + self.expected = int(expected) + self.msg = str(msg) + + def __str__(self): + return repr('{c}: expected return {e}, got {r} ({o})'.format( + c=self.cmd, e=self.expected, r=self.ret, o=self.msg)) + +def call(cmd): + if isinstance(cmd, list): + args = cmd + elif isinstance(cmd, str): + args = shlex.split(cmd) + else: + assert False, 'cmd is not a string/unicode nor a list!' + + print('call: {0}'.format(args)) + proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = proc.wait() + + return (ret, proc) + +def expect(cmd, expected_ret): + + try: + (r, p) = call(cmd) + except ValueError as e: + print('unable to run {c}: {err}'.format(c=repr(cmd), err=e.message), + file=sys.stderr) + return errno.EINVAL + + assert r == p.returncode, \ + 'wth? r was supposed to match returncode!' + + if r != expected_ret: + raise UnexpectedReturn(repr(cmd), r, expected_ret, str(p.stderr.read())) + + return p + +def expect_to_file(cmd, expected_ret, out_file): + + # Let the exception be propagated to the caller + p = expect(cmd, expected_ret) + assert p.returncode == expected_ret, \ + 'expected result doesn\'t match and no exception was thrown!' + + with io.open(out_file, 'ab') as file: + file.write(p.stdout.read()) + + return p + +class Command: + def __init__(self, cid, j): + self.cid = cid[3:] + self.perms = j['perm'] + self.module = j['module'] + + self.sig = '' + self.args = [] + for s in j['sig']: + if not isinstance(s, dict): + assert isinstance(s, str), \ + 'malformatted signature cid {0}: {1}\n{2}'.format(cid,s,j) + if len(self.sig) > 0: + self.sig += ' ' + self.sig += s + else: + self.args.append(s) + + def __str__(self): + return repr('command {0}: {1} (requires \'{2}\')'.format(self.cid,\ + self.sig, self.perms)) + + +def destroy_keyring(path): + if not os.path.exists(path): + raise Exception('oops! cannot remove inexistent keyring {0}'.format(path)) + + # grab all client entities from the keyring + entities = [m.group(1) for m in [re.match(r'\[client\.(.*)\]', l) + for l in [str(line.strip()) + for line in io.open(path,'r')]] if m is not None] + + # clean up and make sure each entity is gone + for e in entities: + expect('ceph auth del client.{0}'.format(e), 0) + expect('ceph auth get client.{0}'.format(e), errno.ENOENT) + + # remove keyring + os.unlink(path) + + return True + +def test_basic_auth(): + # make sure we can successfully add/del entities, change their caps + # and import/export keyrings. + + expect('ceph auth add client.basicauth', 0) + expect('ceph auth caps client.basicauth mon \'allow *\'', 0) + # entity exists and caps do not match + expect('ceph auth add client.basicauth', errno.EINVAL) + # this command attempts to change an existing state and will fail + expect('ceph auth add client.basicauth mon \'allow w\'', errno.EINVAL) + expect('ceph auth get-or-create client.basicauth', 0) + expect('ceph auth get-key client.basicauth', 0) + expect('ceph auth get-or-create client.basicauth2', 0) + # cleanup + expect('ceph auth del client.basicauth', 0) + expect('ceph auth del client.basicauth2', 0) + + return True + +def gen_module_keyring(module): + module_caps = [ + ('all', '{t} \'allow service {s} rwx\'', 0), + ('none', '', errno.EACCES), + ('wrong', '{t} \'allow service foobar rwx\'', errno.EACCES), + ('right', '{t} \'allow service {s} {p}\'', 0), + ('no-execute', '{t} \'allow service {s} x\'', errno.EACCES) + ] + + keyring = '{0}.service-{1}'.format(keyring_base,module) + for perms in 'r rw x'.split(): + for (n,p,r) in module_caps: + c = p.format(t='mon', s=module, p=perms) + expect_to_file( + 'ceph auth get-or-create client.{cn}-{cp} {caps}'.format( + cn=n,cp=perms,caps=c), 0, keyring) + + return keyring + + +def test_all(): + + + perms = { + 'good': { + 'broad':[ + ('rwx', 'allow *'), + ('r', 'allow r'), + ('rw', 'allow rw'), + ('x', 'allow x'), + ], + 'service':[ + ('rwx', 'allow service {s} rwx'), + ('r', 'allow service {s} r'), + ('rw', 'allow service {s} rw'), + ('x', 'allow service {s} x'), + ], + 'command':[ + ('rwx', 'allow command "{c}"'), + ], + 'command-with':[ + ('rwx', 'allow command "{c}" with {kv}') + ], + 'command-with-prefix':[ + ('rwx', 'allow command "{c}" with {key} prefix {val}') + ] + }, + 'bad': { + 'broad':[ + ('none', ''), + ], + 'service':[ + ('none1', 'allow service foo rwx'), + ('none2', 'allow service foo r'), + ('none3', 'allow service foo rw'), + ('none4', 'allow service foo x'), + ], + 'command':[ + ('none', 'allow command foo'), + ], + 'command-with':[ + ('none', 'allow command "{c}" with foo=bar'), + ], + 'command-with-prefix':[ + ('none', 'allow command "{c}" with foo prefix bar'), + ], + } + } + + cmds = { + '':[ + { + 'cmd':('status', '', 'r') + }, + { + 'pre':'heap start_profiler', + 'cmd':('heap', 'heapcmd=stats', 'rw'), + 'post':'heap stop_profiler' + } + ], + 'auth':[ + { + 'pre':'', + 'cmd':('auth ls', '', 'r'), + 'post':'' + }, + { + 'pre':'auth get-or-create client.foo mon \'allow *\'', + 'cmd':('auth caps', 'entity="client.foo"', 'rw'), + 'post':'auth del client.foo' + } + ], + 'pg':[ + { + 'cmd':('pg getmap', '', 'r'), + }, + ], + 'mds':[ + { + 'cmd':('mds getmap', '', 'r'), + }, + ], + 'mon':[ + { + 'cmd':('mon getmap', '', 'r') + }, + { + 'cmd':('mon remove', 'name=a', 'rw') + } + ], + 'osd':[ + { + 'cmd':('osd getmap', '', 'r'), + }, + { + 'cmd':('osd pause', '', 'rw'), + 'post':'osd unpause' + }, + { + 'cmd':('osd crush dump', '', 'r') + }, + ], + 'config-key':[ + { + 'pre':'config-key set foo bar', + 'cmd':('config-key get', 'key=foo', 'r') + }, + { + 'pre':'config-key set foo bar', + 'cmd':('config-key del', 'key=foo', 'rw') + } + ] + } + + for (module,cmd_lst) in cmds.items(): + k = keyring_base + '.' + module + for cmd in cmd_lst: + + (cmd_cmd, cmd_args, cmd_perm) = cmd['cmd'] + cmd_args_key = '' + cmd_args_val = '' + if len(cmd_args) > 0: + (cmd_args_key, cmd_args_val) = cmd_args.split('=') + + print('generating keyring for {m}/{c}'.format(m=module,c=cmd_cmd)) + # gen keyring + for (good_or_bad,kind_map) in perms.items(): + for (kind,lst) in kind_map.items(): + for (perm, cap) in lst: + cap_formatted = cap.format( + s=module, + c=cmd_cmd, + kv=cmd_args, + key=cmd_args_key, + val=cmd_args_val) + + if len(cap_formatted) == 0: + run_cap = '' + else: + run_cap = 'mon \'{fc}\''.format(fc=cap_formatted) + + cname = 'client.{gb}-{kind}-{p}'.format( + gb=good_or_bad,kind=kind,p=perm) + expect_to_file( + 'ceph auth get-or-create {n} {c}'.format( + n=cname,c=run_cap), 0, k) + # keyring generated + print('testing {m}/{c}'.format(m=module,c=cmd_cmd)) + + # test + for good_bad in perms.keys(): + for (kind,lst) in perms[good_bad].items(): + for (perm,_) in lst: + cname = 'client.{gb}-{k}-{p}'.format(gb=good_bad,k=kind,p=perm) + + if good_bad == 'good': + expect_ret = 0 + else: + expect_ret = errno.EACCES + + if ( cmd_perm not in perm ): + expect_ret = errno.EACCES + if 'with' in kind and len(cmd_args) == 0: + expect_ret = errno.EACCES + if 'service' in kind and len(module) == 0: + expect_ret = errno.EACCES + + if 'pre' in cmd and len(cmd['pre']) > 0: + expect('ceph {0}'.format(cmd['pre']), 0) + expect('ceph -n {cn} -k {k} {c} {arg_val}'.format( + cn=cname,k=k,c=cmd_cmd,arg_val=cmd_args_val), expect_ret) + if 'post' in cmd and len(cmd['post']) > 0: + expect('ceph {0}'.format(cmd['post']), 0) + # finish testing + destroy_keyring(k) + + + return True + + +def test_misc(): + + k = keyring_base + '.misc' + expect_to_file( + 'ceph auth get-or-create client.caps mon \'allow command "auth caps"' \ + ' with entity="client.caps"\'', 0, k) + expect('ceph -n client.caps -k {kf} quorum_status'.format(kf=k), errno.EACCES) + expect('ceph -n client.caps -k {kf} auth caps client.caps mon \'allow *\''.format(kf=k), 0) + expect('ceph -n client.caps -k {kf} quorum_status'.format(kf=k), 0) + destroy_keyring(k) + +def main(): + + test_basic_auth() + test_all() + test_misc() + + print('OK') + + return 0 + +if __name__ == '__main__': + main() diff --git a/qa/workunits/mon/caps.sh b/qa/workunits/mon/caps.sh new file mode 100755 index 000000000..eae5d8665 --- /dev/null +++ b/qa/workunits/mon/caps.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash + +set -x + +tmp=/tmp/cephtest-mon-caps-madness + +exit_on_error=1 + +[[ ! -z $TEST_EXIT_ON_ERROR ]] && exit_on_error=$TEST_EXIT_ON_ERROR + +if [ `uname` = FreeBSD ]; then + ETIMEDOUT=60 +else + ETIMEDOUT=110 +fi + +expect() +{ + cmd=$1 + expected_ret=$2 + + echo $cmd + eval $cmd >&/dev/null + ret=$? + + if [[ $ret -ne $expected_ret ]]; then + echo "Error: Expected return $expected_ret, got $ret" + [[ $exit_on_error -eq 1 ]] && exit 1 + return 1 + fi + + return 0 +} + +expect "ceph auth get-or-create client.bazar > $tmp.bazar.keyring" 0 +expect "ceph -k $tmp.bazar.keyring --user bazar quorum_status" 13 +ceph auth del client.bazar + +c="'allow command \"auth ls\", allow command quorum_status'" +expect "ceph auth get-or-create client.foo mon $c > $tmp.foo.keyring" 0 +expect "ceph -k $tmp.foo.keyring --user foo quorum_status" 0 +expect "ceph -k $tmp.foo.keyring --user foo auth ls" 0 +expect "ceph -k $tmp.foo.keyring --user foo auth export" 13 +expect "ceph -k $tmp.foo.keyring --user foo auth del client.bazar" 13 +expect "ceph -k $tmp.foo.keyring --user foo osd dump" 13 + +# monitor drops the subscribe message from client if it does not have enough caps +# for read from mon. in that case, the client will be waiting for mgrmap in vain, +# if it is instructed to send a command to mgr. "pg dump" is served by mgr. so, +# we need to set a timeout for testing this scenario. +# +# leave plenty of time here because the mons might be thrashing. +export CEPH_ARGS='--rados-mon-op-timeout=300' +expect "ceph -k $tmp.foo.keyring --user foo pg dump" $ETIMEDOUT +export CEPH_ARGS='' + +ceph auth del client.foo +expect "ceph -k $tmp.foo.keyring --user foo quorum_status" 13 + +c="'allow command service with prefix=list, allow command quorum_status'" +expect "ceph auth get-or-create client.bar mon $c > $tmp.bar.keyring" 0 +expect "ceph -k $tmp.bar.keyring --user bar quorum_status" 0 +expect "ceph -k $tmp.bar.keyring --user bar auth ls" 13 +expect "ceph -k $tmp.bar.keyring --user bar auth export" 13 +expect "ceph -k $tmp.bar.keyring --user bar auth del client.foo" 13 +expect "ceph -k $tmp.bar.keyring --user bar osd dump" 13 + +# again, we'll need to timeout. +export CEPH_ARGS='--rados-mon-op-timeout=300' +expect "ceph -k $tmp.bar.keyring --user bar pg dump" $ETIMEDOUT +export CEPH_ARGS='' + +ceph auth del client.bar +expect "ceph -k $tmp.bar.keyring --user bar quorum_status" 13 + +rm $tmp.bazar.keyring $tmp.foo.keyring $tmp.bar.keyring + +# invalid caps health warning +cat <<EOF | ceph auth import -i - +[client.bad] + caps mon = this is wrong + caps osd = does not parse + caps mds = also does not parse +EOF +ceph health | grep AUTH_BAD_CAP +ceph health detail | grep client.bad +ceph auth rm client.bad +expect "ceph auth health | grep AUTH_BAD_CAP" 1 + +echo OK diff --git a/qa/workunits/mon/config.sh b/qa/workunits/mon/config.sh new file mode 100755 index 000000000..1b00201ae --- /dev/null +++ b/qa/workunits/mon/config.sh @@ -0,0 +1,136 @@ +#!/bin/bash -ex + +function expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +ceph config dump + +# value validation +ceph config set mon.a debug_asok 22 +ceph config set mon.a debug_asok 22/33 +ceph config get mon.a debug_asok | grep 22 +ceph config set mon.a debug_asok 1/2 +expect_false ceph config set mon.a debug_asok foo +expect_false ceph config set mon.a debug_asok -10 +ceph config rm mon.a debug_asok + +ceph config set global log_graylog_port 123 +expect_false ceph config set global log_graylog_port asdf +ceph config rm global log_graylog_port + +ceph config set mon mon_cluster_log_to_stderr true +ceph config get mon.a mon_cluster_log_to_stderr | grep true +ceph config set mon mon_cluster_log_to_stderr 2 +ceph config get mon.a mon_cluster_log_to_stderr | grep true +ceph config set mon mon_cluster_log_to_stderr 1 +ceph config get mon.a mon_cluster_log_to_stderr | grep true +ceph config set mon mon_cluster_log_to_stderr false +ceph config get mon.a mon_cluster_log_to_stderr | grep false +ceph config set mon mon_cluster_log_to_stderr 0 +ceph config get mon.a mon_cluster_log_to_stderr | grep false +expect_false ceph config set mon mon_cluster_log_to_stderr fiddle +expect_false ceph config set mon mon_cluster_log_to_stderr '' +ceph config rm mon mon_cluster_log_to_stderr + +expect_false ceph config set mon.a osd_pool_default_type foo +ceph config set mon.a osd_pool_default_type replicated +ceph config rm mon.a osd_pool_default_type + +# scoping +ceph config set global debug_asok 33 +ceph config get mon.a debug_asok | grep 33 +ceph config set mon debug_asok 11 +ceph config get mon.a debug_asok | grep 11 +ceph config set mon.a debug_asok 22 +ceph config get mon.a debug_asok | grep 22 +ceph config rm mon.a debug_asok +ceph config get mon.a debug_asok | grep 11 +ceph config rm mon debug_asok +ceph config get mon.a debug_asok | grep 33 +# nested .-prefix scoping +ceph config set client.foo debug_asok 44 +ceph config get client.foo.bar debug_asok | grep 44 +ceph config get client.foo.bar.baz debug_asok | grep 44 +ceph config set client.foo.bar debug_asok 55 +ceph config get client.foo.bar.baz debug_asok | grep 55 +ceph config rm client.foo debug_asok +ceph config get client.foo.bar.baz debug_asok | grep 55 +ceph config rm client.foo.bar debug_asok +ceph config get client.foo.bar.baz debug_asok | grep 33 +ceph config rm global debug_asok + +# whitespace keys +ceph config set client.foo 'debug asok' 44 +ceph config get client.foo 'debug asok' | grep 44 +ceph config set client.foo debug_asok 55 +ceph config get client.foo 'debug asok' | grep 55 +ceph config set client.foo 'debug asok' 66 +ceph config get client.foo debug_asok | grep 66 +ceph config rm client.foo debug_asok +ceph config set client.foo debug_asok 66 +ceph config rm client.foo 'debug asok' + +# help +ceph config help debug_asok | grep debug_asok + +# show +ceph config set osd.0 debug_asok 33 +while ! ceph config show osd.0 | grep debug_asok | grep 33 | grep mon +do + sleep 1 +done +ceph config set osd.0 debug_asok 22 +while ! ceph config show osd.0 | grep debug_asok | grep 22 | grep mon +do + sleep 1 +done + +ceph tell osd.0 config set debug_asok 99 +while ! ceph config show osd.0 | grep debug_asok | grep 99 +do + sleep 1 +done +ceph config show osd.0 | grep debug_asok | grep 'override mon' +ceph tell osd.0 config unset debug_asok +ceph tell osd.0 config unset debug_asok + +ceph config rm osd.0 debug_asok +while ceph config show osd.0 | grep debug_asok | grep mon +do + sleep 1 +done +ceph config show osd.0 | grep -c debug_asok | grep 0 + +ceph config set osd.0 osd_scrub_cost 123 +while ! ceph config show osd.0 | grep osd_scrub_cost | grep mon +do + sleep 1 +done +ceph config rm osd.0 osd_scrub_cost + +# show-with-defaults +ceph config show-with-defaults osd.0 | grep debug_asok + +# assimilate +t1=`mktemp` +t2=`mktemp` +cat <<EOF > $t1 +[osd.0] +keyring = foo +debug_asok = 66 +EOF +ceph config assimilate-conf -i $t1 | tee $t2 + +grep keyring $t2 +expect_false grep debug_asok $t2 +rm -f $t1 $t2 + +expect_false ceph config reset +expect_false ceph config reset -1 +# we are at end of testing, so it's okay to revert everything +ceph config reset 0 + +echo OK diff --git a/qa/workunits/mon/crush_ops.sh b/qa/workunits/mon/crush_ops.sh new file mode 100755 index 000000000..a68761985 --- /dev/null +++ b/qa/workunits/mon/crush_ops.sh @@ -0,0 +1,237 @@ +#!/usr/bin/env bash + +set -ex + +function expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +ceph osd crush dump + +# rules +ceph osd crush rule dump +ceph osd crush rule ls +ceph osd crush rule list + +ceph osd crush rule create-simple foo default host +ceph osd crush rule create-simple foo default host +ceph osd crush rule create-simple bar default host + +ceph osd crush rm-device-class all +ceph osd crush set-device-class ssd osd.0 +ceph osd crush set-device-class hdd osd.1 +ceph osd crush rule create-replicated foo-ssd default host ssd +ceph osd crush rule create-replicated foo-hdd default host hdd +ceph osd crush rule ls-by-class ssd | grep 'foo-ssd' +ceph osd crush rule ls-by-class ssd | expect_false grep 'foo-hdd' +ceph osd crush rule ls-by-class hdd | grep 'foo-hdd' +ceph osd crush rule ls-by-class hdd | expect_false grep 'foo-ssd' + +ceph osd erasure-code-profile set ec-foo-ssd crush-device-class=ssd m=2 k=2 +ceph osd pool create ec-foo 2 erasure ec-foo-ssd +ceph osd pool rm ec-foo ec-foo --yes-i-really-really-mean-it + +ceph osd crush rule ls | grep foo + +ceph osd crush rule rename foo foo-asdf +ceph osd crush rule rename foo foo-asdf # idempotent +ceph osd crush rule rename bar bar-asdf +ceph osd crush rule ls | grep 'foo-asdf' +ceph osd crush rule ls | grep 'bar-asdf' +ceph osd crush rule rm foo 2>&1 | grep 'does not exist' +ceph osd crush rule rm bar 2>&1 | grep 'does not exist' +ceph osd crush rule rename foo-asdf foo +ceph osd crush rule rename foo-asdf foo # idempotent +ceph osd crush rule rename bar-asdf bar +ceph osd crush rule ls | expect_false grep 'foo-asdf' +ceph osd crush rule ls | expect_false grep 'bar-asdf' +ceph osd crush rule rm foo +ceph osd crush rule rm foo # idempotent +ceph osd crush rule rm bar + +# can't delete in-use rules, tho: +ceph osd pool create pinning_pool 1 +expect_false ceph osd crush rule rm replicated_rule +ceph osd pool rm pinning_pool pinning_pool --yes-i-really-really-mean-it + +# build a simple map +expect_false ceph osd crush add-bucket foo osd +ceph osd crush add-bucket foo root +o1=`ceph osd create` +o2=`ceph osd create` +ceph osd crush add $o1 1 host=host1 root=foo +ceph osd crush add $o1 1 host=host1 root=foo # idemptoent +ceph osd crush add $o2 1 host=host2 root=foo +ceph osd crush add $o2 1 host=host2 root=foo # idempotent +ceph osd crush add-bucket bar root +ceph osd crush add-bucket bar root # idempotent +ceph osd crush link host1 root=bar +ceph osd crush link host1 root=bar # idempotent +ceph osd crush link host2 root=bar +ceph osd crush link host2 root=bar # idempotent + +ceph osd tree | grep -c osd.$o1 | grep -q 2 +ceph osd tree | grep -c host1 | grep -q 2 +ceph osd tree | grep -c osd.$o2 | grep -q 2 +ceph osd tree | grep -c host2 | grep -q 2 +expect_false ceph osd crush rm host1 foo # not empty +ceph osd crush unlink host1 foo +ceph osd crush unlink host1 foo +ceph osd tree | grep -c host1 | grep -q 1 + +expect_false ceph osd crush rm foo # not empty +expect_false ceph osd crush rm bar # not empty +ceph osd crush unlink host1 bar +ceph osd tree | grep -c host1 | grep -q 1 # now an orphan +ceph osd crush rm osd.$o1 host1 +ceph osd crush rm host1 +ceph osd tree | grep -c host1 | grep -q 0 +expect_false ceph osd tree-from host1 +ceph osd tree-from host2 +expect_false ceph osd tree-from osd.$o2 + +expect_false ceph osd crush rm bar # not empty +ceph osd crush unlink host2 + +ceph osd crush add-bucket host-for-test host root=root-for-test rack=rack-for-test +ceph osd tree | grep host-for-test +ceph osd tree | grep rack-for-test +ceph osd tree | grep root-for-test +ceph osd crush rm host-for-test +ceph osd crush rm rack-for-test +ceph osd crush rm root-for-test + +# reference foo and bar with a rule +ceph osd crush rule create-simple foo-rule foo host firstn +expect_false ceph osd crush rm foo +ceph osd crush rule rm foo-rule + +ceph osd crush rm bar +ceph osd crush rm foo +ceph osd crush rm osd.$o2 host2 +ceph osd crush rm host2 + +ceph osd crush add-bucket foo host +ceph osd crush move foo root=default rack=localrack + +ceph osd crush create-or-move osd.$o1 1.0 root=default +ceph osd crush move osd.$o1 host=foo +ceph osd find osd.$o1 | grep host | grep foo + +ceph osd crush rm osd.$o1 +ceph osd crush rm osd.$o2 + +ceph osd crush rm foo + +# test reweight +o3=`ceph osd create` +ceph osd crush add $o3 123 root=default +ceph osd tree | grep osd.$o3 | grep 123 +ceph osd crush reweight osd.$o3 113 +expect_false ceph osd crush reweight osd.$o3 123456 +ceph osd tree | grep osd.$o3 | grep 113 +ceph osd crush rm osd.$o3 +ceph osd rm osd.$o3 + +# test reweight-subtree +o4=`ceph osd create` +o5=`ceph osd create` +ceph osd crush add $o4 123 root=default host=foobaz +ceph osd crush add $o5 123 root=default host=foobaz +ceph osd tree | grep osd.$o4 | grep 123 +ceph osd tree | grep osd.$o5 | grep 123 +ceph osd crush reweight-subtree foobaz 155 +expect_false ceph osd crush reweight-subtree foobaz 123456 +ceph osd tree | grep osd.$o4 | grep 155 +ceph osd tree | grep osd.$o5 | grep 155 +ceph osd crush rm osd.$o4 +ceph osd crush rm osd.$o5 +ceph osd rm osd.$o4 +ceph osd rm osd.$o5 + +# weight sets +# make sure we require luminous before testing weight-sets +ceph osd set-require-min-compat-client luminous +ceph osd crush weight-set dump +ceph osd crush weight-set ls +expect_false ceph osd crush weight-set reweight fooset osd.0 .9 +ceph osd pool create fooset 8 +ceph osd pool create barset 8 +ceph osd pool set barset size 3 +expect_false ceph osd crush weight-set reweight fooset osd.0 .9 +ceph osd crush weight-set create fooset flat +ceph osd crush weight-set create barset positional +ceph osd crush weight-set ls | grep fooset +ceph osd crush weight-set ls | grep barset +ceph osd crush weight-set dump +ceph osd crush weight-set reweight fooset osd.0 .9 +expect_false ceph osd crush weight-set reweight fooset osd.0 .9 .9 +expect_false ceph osd crush weight-set reweight barset osd.0 .9 +ceph osd crush weight-set reweight barset osd.0 .9 .9 .9 +ceph osd crush weight-set ls | grep -c fooset | grep -q 1 +ceph osd crush weight-set rm fooset +ceph osd crush weight-set ls | grep -c fooset | grep -q 0 +ceph osd crush weight-set ls | grep barset +ceph osd crush weight-set rm barset +ceph osd crush weight-set ls | grep -c barset | grep -q 0 +ceph osd crush weight-set create-compat +ceph osd crush weight-set ls | grep '(compat)' +ceph osd crush weight-set rm-compat + +# weight set vs device classes +ceph osd pool create cool 2 +ceph osd pool create cold 2 +ceph osd pool set cold size 2 +ceph osd crush weight-set create-compat +ceph osd crush weight-set create cool flat +ceph osd crush weight-set create cold positional +ceph osd crush rm-device-class osd.0 +ceph osd crush weight-set reweight-compat osd.0 10.5 +ceph osd crush weight-set reweight cool osd.0 11.5 +ceph osd crush weight-set reweight cold osd.0 12.5 12.4 +ceph osd crush set-device-class fish osd.0 +ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 10\\. +ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 11\\. +ceph osd crush tree --show-shadow | grep osd\\.0 | grep fish | grep 12\\. +ceph osd crush rm-device-class osd.0 +ceph osd crush set-device-class globster osd.0 +ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 10\\. +ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 11\\. +ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 12\\. +ceph osd crush weight-set reweight-compat osd.0 7.5 +ceph osd crush weight-set reweight cool osd.0 8.5 +ceph osd crush weight-set reweight cold osd.0 6.5 6.6 +ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 7\\. +ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 8\\. +ceph osd crush tree --show-shadow | grep osd\\.0 | grep globster | grep 6\\. +ceph osd crush rm-device-class osd.0 +ceph osd pool rm cool cool --yes-i-really-really-mean-it +ceph osd pool rm cold cold --yes-i-really-really-mean-it +ceph osd crush weight-set rm-compat + +# weight set vs device classes vs move +ceph osd crush weight-set create-compat +ceph osd crush add-bucket fooo host +ceph osd crush move fooo root=default +ceph osd crush add-bucket barr rack +ceph osd crush move barr root=default +ceph osd crush move fooo rack=barr +ceph osd crush rm fooo +ceph osd crush rm barr +ceph osd crush weight-set rm-compat + +# this sequence would crash at one point +ceph osd crush weight-set create-compat +ceph osd crush add-bucket r1 rack root=default +for f in `seq 1 32`; do + ceph osd crush add-bucket h$f host rack=r1 +done +for f in `seq 1 32`; do + ceph osd crush rm h$f +done +ceph osd crush rm r1 +ceph osd crush weight-set rm-compat + +echo OK diff --git a/qa/workunits/mon/osd.sh b/qa/workunits/mon/osd.sh new file mode 100755 index 000000000..535d6c137 --- /dev/null +++ b/qa/workunits/mon/osd.sh @@ -0,0 +1,24 @@ +#!/bin/sh -x + +set -e + +ua=`uuidgen` +ub=`uuidgen` + +# should get same id with same uuid +na=`ceph osd create $ua` +test $na -eq `ceph osd create $ua` + +nb=`ceph osd create $ub` +test $nb -eq `ceph osd create $ub` +test $nb -ne $na + +ceph osd rm $na +ceph osd rm $na +ceph osd rm $nb +ceph osd rm 1000 + +na2=`ceph osd create $ua` + +echo OK + diff --git a/qa/workunits/mon/pg_autoscaler.sh b/qa/workunits/mon/pg_autoscaler.sh new file mode 100755 index 000000000..4cf71a31c --- /dev/null +++ b/qa/workunits/mon/pg_autoscaler.sh @@ -0,0 +1,156 @@ +#!/bin/bash -ex + +NUM_OSDS=$(ceph osd ls | wc -l) +if [ $NUM_OSDS -lt 6 ]; then + echo "test requires at least 6 OSDs" + exit 1 +fi + +NUM_POOLS=$(ceph osd pool ls | wc -l) +if [ $NUM_POOLS -gt 0 ]; then + echo "test requires no preexisting pools" + exit 1 +fi + +function wait_for() { + local sec=$1 + local cmd=$2 + + while true ; do + if bash -c "$cmd" ; then + break + fi + sec=$(( $sec - 1 )) + if [ $sec -eq 0 ]; then + echo failed + return 1 + fi + sleep 1 + done + return 0 +} + +function power2() { echo "x=l($1)/l(2); scale=0; 2^((x+0.5)/1)" | bc -l;} + +function eval_actual_expected_val() { + local actual_value=$1 + local expected_value=$2 + if [[ $actual_value = $expected_value ]] + then + echo "Success: " $actual_value "=" $expected_value + else + echo "Error: " $actual_value "!=" $expected_value + exit 1 + fi +} + +# enable +ceph config set mgr mgr/pg_autoscaler/sleep_interval 60 +ceph mgr module enable pg_autoscaler +# ceph config set global osd_pool_default_pg_autoscale_mode on + +# pg_num_min +ceph osd pool create meta0 16 +ceph osd pool create bulk0 16 --bulk +ceph osd pool create bulk1 16 --bulk +ceph osd pool create bulk2 16 --bulk +ceph osd pool set meta0 pg_autoscale_mode on +ceph osd pool set bulk0 pg_autoscale_mode on +ceph osd pool set bulk1 pg_autoscale_mode on +ceph osd pool set bulk2 pg_autoscale_mode on +# set pool size +ceph osd pool set meta0 size 2 +ceph osd pool set bulk0 size 2 +ceph osd pool set bulk1 size 2 +ceph osd pool set bulk2 size 2 + +# get num pools again since we created more pools +NUM_POOLS=$(ceph osd pool ls | wc -l) + +# get bulk flag of each pool through the command ceph osd pool autoscale-status +BULK_FLAG_1=$(ceph osd pool autoscale-status | grep 'meta0' | grep -o -m 1 'True\|False' || true) +BULK_FLAG_2=$(ceph osd pool autoscale-status | grep 'bulk0' | grep -o -m 1 'True\|False' || true) +BULK_FLAG_3=$(ceph osd pool autoscale-status | grep 'bulk1' | grep -o -m 1 'True\|False' || true) +BULK_FLAG_4=$(ceph osd pool autoscale-status | grep 'bulk2' | grep -o -m 1 'True\|False' || true) + +# evaluate the accuracy of ceph osd pool autoscale-status specifically the `BULK` column + +eval_actual_expected_val $BULK_FLAG_1 'False' +eval_actual_expected_val $BULK_FLAG_2 'True' +eval_actual_expected_val $BULK_FLAG_3 'True' +eval_actual_expected_val $BULK_FLAG_4 'True' + +# This part of this code will now evaluate the accuracy of the autoscaler + +# get pool size +POOL_SIZE_1=$(ceph osd pool get meta0 size| grep -Eo '[0-9]{1,4}') +POOL_SIZE_2=$(ceph osd pool get bulk0 size| grep -Eo '[0-9]{1,4}') +POOL_SIZE_3=$(ceph osd pool get bulk1 size| grep -Eo '[0-9]{1,4}') +POOL_SIZE_4=$(ceph osd pool get bulk2 size| grep -Eo '[0-9]{1,4}') + +# Calculate target pg of each pools +# First Pool is a non-bulk so we do it first. +# Since the Capacity ratio = 0 we first meta pool remains the same pg_num + +TARGET_PG_1=$(ceph osd pool get meta0 pg_num| grep -Eo '[0-9]{1,4}') +PG_LEFT=$NUM_OSDS*100 +NUM_POOLS_LEFT=$NUM_POOLS-1 +# Rest of the pool is bulk and even pools so pretty straight forward +# calculations. +TARGET_PG_2=$(power2 $((($PG_LEFT)/($NUM_POOLS_LEFT)/($POOL_SIZE_2)))) +TARGET_PG_3=$(power2 $((($PG_LEFT)/($NUM_POOLS_LEFT)/($POOL_SIZE_3)))) +TARGET_PG_4=$(power2 $((($PG_LEFT)/($NUM_POOLS_LEFT)/($POOL_SIZE_4)))) + +# evaluate target_pg against pg num of each pools +wait_for 300 "ceph osd pool get meta0 pg_num | grep $TARGET_PG_1" +wait_for 300 "ceph osd pool get bulk0 pg_num | grep $TARGET_PG_2" +wait_for 300 "ceph osd pool get bulk1 pg_num | grep $TARGET_PG_3" +wait_for 300 "ceph osd pool get bulk2 pg_num | grep $TARGET_PG_4" + +# target ratio +ceph osd pool set meta0 target_size_ratio 5 +ceph osd pool set bulk0 target_size_ratio 1 +sleep 60 +APGS=$(ceph osd dump -f json-pretty | jq '.pools[0].pg_num_target') +BPGS=$(ceph osd dump -f json-pretty | jq '.pools[1].pg_num_target') +test $APGS -gt 100 +test $BPGS -gt 10 + +# small ratio change does not change pg_num +ceph osd pool set meta0 target_size_ratio 7 +ceph osd pool set bulk0 target_size_ratio 2 +sleep 60 +APGS2=$(ceph osd dump -f json-pretty | jq '.pools[0].pg_num_target') +BPGS2=$(ceph osd dump -f json-pretty | jq '.pools[1].pg_num_target') +test $APGS -eq $APGS2 +test $BPGS -eq $BPGS2 + +# target_size +ceph osd pool set meta0 target_size_bytes 1000000000000000 +ceph osd pool set bulk0 target_size_bytes 1000000000000000 +ceph osd pool set meta0 target_size_ratio 0 +ceph osd pool set bulk0 target_size_ratio 0 +wait_for 60 "ceph health detail | grep POOL_TARGET_SIZE_BYTES_OVERCOMMITTED" + +ceph osd pool set meta0 target_size_bytes 1000 +ceph osd pool set bulk0 target_size_bytes 1000 +ceph osd pool set meta0 target_size_ratio 1 +wait_for 60 "ceph health detail | grep POOL_HAS_TARGET_SIZE_BYTES_AND_RATIO" + +# test autoscale warn + +ceph osd pool create warn0 1 --autoscale-mode=warn +wait_for 120 "ceph health detail | grep POOL_TOO_FEW_PGS" + +ceph osd pool create warn1 256 --autoscale-mode=warn +wait_for 120 "ceph health detail | grep POOL_TOO_MANY_PGS" + +ceph osd pool rm meta0 meta0 --yes-i-really-really-mean-it +ceph osd pool rm bulk0 bulk0 --yes-i-really-really-mean-it +ceph osd pool rm bulk1 bulk1 --yes-i-really-really-mean-it +ceph osd pool rm bulk2 bulk2 --yes-i-really-really-mean-it +ceph osd pool rm warn0 warn0 --yes-i-really-really-mean-it +ceph osd pool rm warn1 warn1 --yes-i-really-really-mean-it + +echo OK + diff --git a/qa/workunits/mon/ping.py b/qa/workunits/mon/ping.py new file mode 100755 index 000000000..1f6d0a1dd --- /dev/null +++ b/qa/workunits/mon/ping.py @@ -0,0 +1,106 @@ +#!/usr/bin/python3 + +import json +import shlex +import subprocess + + +class UnexpectedReturn(Exception): + def __init__(self, cmd, ret, expected, msg): + if isinstance(cmd, list): + self.cmd = ' '.join(cmd) + else: + assert isinstance(cmd, str), \ + 'cmd needs to be either a list or a str' + self.cmd = cmd + self.cmd = str(self.cmd) + self.ret = int(ret) + self.expected = int(expected) + self.msg = str(msg) + + def __str__(self): + return repr('{c}: expected return {e}, got {r} ({o})'.format( + c=self.cmd, e=self.expected, r=self.ret, o=self.msg)) + + +def call(cmd): + if isinstance(cmd, list): + args = cmd + elif isinstance(cmd, str): + args = shlex.split(cmd) + else: + assert False, 'cmd is not a string/unicode nor a list!' + + print('call: {0}'.format(args)) + proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + procout, procerr = proc.communicate(None) + + return proc.returncode, procout, procerr + + +def expect(cmd, expected_ret): + try: + (r, out, err) = call(cmd) + except ValueError as e: + assert False, \ + 'unable to run {c}: {err}'.format(c=repr(cmd), err=str(e)) + + if r != expected_ret: + raise UnexpectedReturn(repr(cmd), r, expected_ret, err) + + return out.decode() if isinstance(out, bytes) else out + + +def get_quorum_status(timeout=300): + cmd = 'ceph quorum_status' + if timeout > 0: + cmd += ' --connect-timeout {0}'.format(timeout) + + out = expect(cmd, 0) + j = json.loads(out) + return j + + +def main(): + quorum_status = get_quorum_status() + mon_names = [mon['name'] for mon in quorum_status['monmap']['mons']] + + print('ping all monitors') + for m in mon_names: + print('ping mon.{0}'.format(m)) + out = expect('ceph ping mon.{0}'.format(m), 0) + reply = json.loads(out) + + assert reply['mon_status']['name'] == m, \ + 'reply obtained from mon.{0}, expected mon.{1}'.format( + reply['mon_status']['name'], m) + + print('test out-of-quorum reply') + for m in mon_names: + print('testing mon.{0}'.format(m)) + expect('ceph daemon mon.{0} quorum exit'.format(m), 0) + + quorum_status = get_quorum_status() + assert m not in quorum_status['quorum_names'], \ + 'mon.{0} was not supposed to be in quorum ({1})'.format( + m, quorum_status['quorum_names']) + + out = expect('ceph ping mon.{0}'.format(m), 0) + reply = json.loads(out) + mon_status = reply['mon_status'] + + assert mon_status['name'] == m, \ + 'reply obtained from mon.{0}, expected mon.{1}'.format( + mon_status['name'], m) + + assert mon_status['state'] == 'electing', \ + 'mon.{0} is in state {1}, expected electing'.format( + m, mon_status['state']) + + expect('ceph daemon mon.{0} quorum enter'.format(m), 0) + + print('OK') + + +if __name__ == '__main__': + main() diff --git a/qa/workunits/mon/pool_ops.sh b/qa/workunits/mon/pool_ops.sh new file mode 100755 index 000000000..23bb3c0be --- /dev/null +++ b/qa/workunits/mon/pool_ops.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash + +set -ex + +function expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +function get_config_value_or_die() +{ + local pool_name config_opt raw val + + pool_name=$1 + config_opt=$2 + + raw="`$SUDO ceph osd pool get $pool_name $config_opt 2>/dev/null`" + if [[ $? -ne 0 ]]; then + echo "error obtaining config opt '$config_opt' from '$pool_name': $raw" + exit 1 + fi + + raw=`echo $raw | sed -e 's/[{} "]//g'` + val=`echo $raw | cut -f2 -d:` + + echo "$val" + return 0 +} + +function expect_config_value() +{ + local pool_name config_opt expected_val val + pool_name=$1 + config_opt=$2 + expected_val=$3 + + val=$(get_config_value_or_die $pool_name $config_opt) + + if [[ "$val" != "$expected_val" ]]; then + echo "expected '$expected_val', got '$val'" + exit 1 + fi +} + +# pg_num min/max +TEST_POOL=testpool1234 +ceph osd pool create testpool1234 8 --autoscale-mode off +ceph osd pool set $TEST_POOL pg_num_min 2 +ceph osd pool get $TEST_POOL pg_num_min | grep 2 +ceph osd pool set $TEST_POOL pg_num_max 33 +ceph osd pool get $TEST_POOL pg_num_max | grep 33 +expect_false ceph osd pool set $TEST_POOL pg_num_min 9 +expect_false ceph osd pool set $TEST_POOL pg_num_max 7 +expect_false ceph osd pool set $TEST_POOL pg_num 1 +expect_false ceph osd pool set $TEST_POOL pg_num 44 +ceph osd pool set $TEST_POOL pg_num_min 0 +expect_false ceph osd pool get $TEST_POOL pg_num_min +ceph osd pool set $TEST_POOL pg_num_max 0 +expect_false ceph osd pool get $TEST_POOL pg_num_max +ceph osd pool delete $TEST_POOL $TEST_POOL --yes-i-really-really-mean-it + +# note: we need to pass the other args or ceph_argparse.py will take +# 'invalid' that is not replicated|erasure and assume it is the next +# argument, which is a string. +expect_false ceph osd pool create foo 123 123 invalid foo-profile foo-rule + +ceph osd pool create foo 123 123 replicated +ceph osd pool create fooo 123 123 erasure default +ceph osd pool create foooo 123 + +ceph osd pool create foo 123 # idempotent + +ceph osd pool set foo size 1 --yes-i-really-mean-it +expect_config_value "foo" "min_size" 1 +ceph osd pool set foo size 4 +expect_config_value "foo" "min_size" 2 +ceph osd pool set foo size 10 +expect_config_value "foo" "min_size" 5 +expect_false ceph osd pool set foo size 0 +expect_false ceph osd pool set foo size 20 + +# should fail due to safety interlock +expect_false ceph osd pool delete foo +expect_false ceph osd pool delete foo foo +expect_false ceph osd pool delete foo foo --force +expect_false ceph osd pool delete foo fooo --yes-i-really-mean-it +expect_false ceph osd pool delete foo --yes-i-really-mean-it foo + +ceph osd pool delete foooo foooo --yes-i-really-really-mean-it +ceph osd pool delete fooo fooo --yes-i-really-really-mean-it +ceph osd pool delete foo foo --yes-i-really-really-mean-it + +# idempotent +ceph osd pool delete foo foo --yes-i-really-really-mean-it +ceph osd pool delete fooo fooo --yes-i-really-really-mean-it +ceph osd pool delete fooo fooo --yes-i-really-really-mean-it + +# non-existent pool +ceph osd pool delete fuggg fuggg --yes-i-really-really-mean-it + +echo OK + + diff --git a/qa/workunits/mon/rbd_snaps_ops.sh b/qa/workunits/mon/rbd_snaps_ops.sh new file mode 100755 index 000000000..eb88565ea --- /dev/null +++ b/qa/workunits/mon/rbd_snaps_ops.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +# attempt to trigger #6047 + + +cmd_no=0 +expect() +{ + cmd_no=$(($cmd_no+1)) + cmd="$1" + expected=$2 + echo "[$cmd_no] $cmd" + eval $cmd + ret=$? + if [[ $ret -ne $expected ]]; then + echo "[$cmd_no] unexpected return '$ret', expected '$expected'" + exit 1 + fi +} + +ceph osd pool delete test test --yes-i-really-really-mean-it || true +expect 'ceph osd pool create test 8 8' 0 +expect 'ceph osd pool application enable test rbd' +expect 'ceph osd pool mksnap test snapshot' 0 +expect 'ceph osd pool rmsnap test snapshot' 0 + +expect 'rbd --pool=test --rbd_validate_pool=false create --size=102400 image' 0 +expect 'rbd --pool=test snap create image@snapshot' 22 + +expect 'ceph osd pool delete test test --yes-i-really-really-mean-it' 0 +expect 'ceph osd pool create test 8 8' 0 +expect 'rbd --pool=test pool init' 0 +expect 'rbd --pool=test create --size=102400 image' 0 +expect 'rbd --pool=test snap create image@snapshot' 0 +expect 'rbd --pool=test snap ls image' 0 +expect 'rbd --pool=test snap rm image@snapshot' 0 + +expect 'ceph osd pool mksnap test snapshot' 22 + +expect 'ceph osd pool delete test test --yes-i-really-really-mean-it' 0 + +# reproduce 7210 and expect it to be fixed +# basically create such a scenario where we end up deleting what used to +# be an unmanaged snapshot from a not-unmanaged pool + +ceph osd pool delete test-foo test-foo --yes-i-really-really-mean-it || true +expect 'ceph osd pool create test-foo 8' 0 +expect 'ceph osd pool application enable test-foo rbd' +expect 'rbd --pool test-foo create --size 1024 image' 0 +expect 'rbd --pool test-foo snap create image@snapshot' 0 + +ceph osd pool delete test-bar test-bar --yes-i-really-really-mean-it || true +expect 'ceph osd pool create test-bar 8' 0 +expect 'ceph osd pool application enable test-bar rbd' +expect 'rados cppool test-foo test-bar --yes-i-really-mean-it' 0 +expect 'rbd --pool test-bar snap rm image@snapshot' 95 +expect 'ceph osd pool delete test-foo test-foo --yes-i-really-really-mean-it' 0 +expect 'ceph osd pool delete test-bar test-bar --yes-i-really-really-mean-it' 0 + + +echo OK diff --git a/qa/workunits/mon/test_config_key_caps.sh b/qa/workunits/mon/test_config_key_caps.sh new file mode 100755 index 000000000..77b4b53b7 --- /dev/null +++ b/qa/workunits/mon/test_config_key_caps.sh @@ -0,0 +1,201 @@ +#!/usr/bin/env bash + +set -x +set -e + +tmp=$(mktemp -d -p /tmp test_mon_config_key_caps.XXXXX) +entities=() + +function cleanup() +{ + set +e + set +x + if [[ -e $tmp/keyring ]] && [[ -e $tmp/keyring.orig ]]; then + grep '\[.*\..*\]' $tmp/keyring.orig > $tmp/entities.orig + for e in $(grep '\[.*\..*\]' $tmp/keyring | \ + diff $tmp/entities.orig - | \ + sed -n 's/^.*\[\(.*\..*\)\]/\1/p'); + do + ceph auth rm $e 2>&1 >& /dev/null + done + fi + #rm -fr $tmp +} + +trap cleanup 0 # cleanup on exit + +function expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +# for cleanup purposes +ceph auth export -o $tmp/keyring.orig + +k=$tmp/keyring + +# setup a few keys +ceph config-key ls +ceph config-key set daemon-private/osd.123/test-foo +ceph config-key set mgr/test-foo +ceph config-key set device/test-foo +ceph config-key set test/foo + +allow_aa=client.allow_aa +allow_bb=client.allow_bb +allow_cc=client.allow_cc + +mgr_a=mgr.a +mgr_b=mgr.b +osd_a=osd.100 +osd_b=osd.200 + +prefix_aa=client.prefix_aa +prefix_bb=client.prefix_bb +prefix_cc=client.prefix_cc +match_aa=client.match_aa +match_bb=client.match_bb + +fail_aa=client.fail_aa +fail_bb=client.fail_bb +fail_cc=client.fail_cc +fail_dd=client.fail_dd +fail_ee=client.fail_ee +fail_ff=client.fail_ff +fail_gg=client.fail_gg +fail_writes=client.fail_writes + +ceph auth get-or-create $allow_aa mon 'allow *' +ceph auth get-or-create $allow_bb mon 'allow service config-key rwx' +ceph auth get-or-create $allow_cc mon 'allow command "config-key get"' + +ceph auth get-or-create $mgr_a mon 'allow profile mgr' +ceph auth get-or-create $mgr_b mon 'allow profile mgr' +ceph auth get-or-create $osd_a mon 'allow profile osd' +ceph auth get-or-create $osd_b mon 'allow profile osd' + +ceph auth get-or-create $prefix_aa mon \ + "allow command \"config-key get\" with key prefix client/$prefix_aa" + +cap="allow command \"config-key set\" with key prefix client/" +cap="$cap,allow command \"config-key get\" with key prefix client/$prefix_bb" +ceph auth get-or-create $prefix_bb mon "$cap" + +cap="allow command \"config-key get\" with key prefix client/" +cap="$cap, allow command \"config-key set\" with key prefix client/" +cap="$cap, allow command \"config-key ls\"" +ceph auth get-or-create $prefix_cc mon "$cap" + +cap="allow command \"config-key get\" with key=client/$match_aa/foo" +ceph auth get-or-create $match_aa mon "$cap" +cap="allow command \"config-key get\" with key=client/$match_bb/foo" +cap="$cap,allow command \"config-key set\" with key=client/$match_bb/foo" +ceph auth get-or-create $match_bb mon "$cap" + +ceph auth get-or-create $fail_aa mon 'allow rx' +ceph auth get-or-create $fail_bb mon 'allow r,allow w' +ceph auth get-or-create $fail_cc mon 'allow rw' +ceph auth get-or-create $fail_dd mon 'allow rwx' +ceph auth get-or-create $fail_ee mon 'allow profile bootstrap-rgw' +ceph auth get-or-create $fail_ff mon 'allow profile bootstrap-rbd' +# write commands will require rw; wx is not enough +ceph auth get-or-create $fail_gg mon 'allow service config-key wx' +# read commands will only require 'r'; 'rx' should be enough. +ceph auth get-or-create $fail_writes mon 'allow service config-key rx' + +# grab keyring +ceph auth export -o $k + +# keys will all the caps can do whatever +for c in $allow_aa $allow_bb $allow_cc $mgr_a $mgr_b; do + ceph -k $k --name $c config-key get daemon-private/osd.123/test-foo + ceph -k $k --name $c config-key get mgr/test-foo + ceph -k $k --name $c config-key get device/test-foo + ceph -k $k --name $c config-key get test/foo +done + +for c in $osd_a $osd_b; do + ceph -k $k --name $c config-key put daemon-private/$c/test-foo + ceph -k $k --name $c config-key get daemon-private/$c/test-foo + expect_false ceph -k $k --name $c config-key ls + expect_false ceph -k $k --name $c config-key get mgr/test-foo + expect_false ceph -k $k --name $c config-key get device/test-foo + expect_false ceph -k $k --name $c config-key get test/foo +done + +expect_false ceph -k $k --name $osd_a get daemon-private/$osd_b/test-foo +expect_false ceph -k $k --name $osd_b get daemon-private/$osd_a/test-foo + +expect_false ceph -k $k --name $prefix_aa \ + config-key ls +expect_false ceph -k $k --name $prefix_aa \ + config-key get daemon-private/osd.123/test-foo +expect_false ceph -k $k --name $prefix_aa \ + config-key set test/bar +expect_false ceph -k $k --name $prefix_aa \ + config-key set client/$prefix_aa/foo + +# write something so we can read, use a custom entity +ceph -k $k --name $allow_bb config-key set client/$prefix_aa/foo +ceph -k $k --name $prefix_aa config-key get client/$prefix_aa/foo +# check one writes to the other's prefix, the other is able to read +ceph -k $k --name $prefix_bb config-key set client/$prefix_aa/bar +ceph -k $k --name $prefix_aa config-key get client/$prefix_aa/bar + +ceph -k $k --name $prefix_bb config-key set client/$prefix_bb/foo +ceph -k $k --name $prefix_bb config-key get client/$prefix_bb/foo + +expect_false ceph -k $k --name $prefix_bb config-key get client/$prefix_aa/bar +expect_false ceph -k $k --name $prefix_bb config-key ls +expect_false ceph -k $k --name $prefix_bb \ + config-key get daemon-private/osd.123/test-foo +expect_false ceph -k $k --name $prefix_bb config-key get mgr/test-foo +expect_false ceph -k $k --name $prefix_bb config-key get device/test-foo +expect_false ceph -k $k --name $prefix_bb config-key get test/bar +expect_false ceph -k $k --name $prefix_bb config-key set test/bar + +ceph -k $k --name $prefix_cc config-key set client/$match_aa/foo +ceph -k $k --name $prefix_cc config-key set client/$match_bb/foo +ceph -k $k --name $prefix_cc config-key get client/$match_aa/foo +ceph -k $k --name $prefix_cc config-key get client/$match_bb/foo +expect_false ceph -k $k --name $prefix_cc config-key set other/prefix +expect_false ceph -k $k --name $prefix_cc config-key get mgr/test-foo +ceph -k $k --name $prefix_cc config-key ls >& /dev/null + +ceph -k $k --name $match_aa config-key get client/$match_aa/foo +expect_false ceph -k $k --name $match_aa config-key get client/$match_bb/foo +expect_false ceph -k $k --name $match_aa config-key set client/$match_aa/foo +ceph -k $k --name $match_bb config-key get client/$match_bb/foo +ceph -k $k --name $match_bb config-key set client/$match_bb/foo +expect_false ceph -k $k --name $match_bb config-key get client/$match_aa/foo +expect_false ceph -k $k --name $match_bb config-key set client/$match_aa/foo + +keys=(daemon-private/osd.123/test-foo + mgr/test-foo + device/test-foo + test/foo + client/$prefix_aa/foo + client/$prefix_bb/foo + client/$match_aa/foo + client/$match_bb/foo +) +# expect these all to fail accessing config-key +for c in $fail_aa $fail_bb $fail_cc \ + $fail_dd $fail_ee $fail_ff \ + $fail_gg; do + for m in get set; do + for key in ${keys[*]} client/$prefix_aa/foo client/$prefix_bb/foo; do + expect_false ceph -k $k --name $c config-key $m $key + done + done +done + +# fail writes but succeed on reads +expect_false ceph -k $k --name $fail_writes config-key set client/$match_aa/foo +expect_false ceph -k $k --name $fail_writes config-key set test/foo +ceph -k $k --name $fail_writes config-key ls +ceph -k $k --name $fail_writes config-key get client/$match_aa/foo +ceph -k $k --name $fail_writes config-key get daemon-private/osd.123/test-foo + +echo "OK" diff --git a/qa/workunits/mon/test_mon_config_key.py b/qa/workunits/mon/test_mon_config_key.py new file mode 100755 index 000000000..f81804c8a --- /dev/null +++ b/qa/workunits/mon/test_mon_config_key.py @@ -0,0 +1,463 @@ +#!/usr/bin/python3 +# +# test_mon_config_key - Test 'ceph config-key' interface +# +# Copyright (C) 2013 Inktank +# +# This is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1, as published by the Free Software +# Foundation. See file COPYING. +# +import argparse +import base64 +import errno +import json +import logging +import os +import random +import string +import subprocess +import sys +import time +from typing import List, Dict + +# +# Accepted Environment variables: +# CEPH_TEST_VERBOSE - be more verbose; '1' enables; '0' disables +# CEPH_TEST_DURATION - test duration in seconds +# CEPH_TEST_SEED - seed to be used during the test +# +# Accepted arguments and options (see --help): +# -v, --verbose - be more verbose +# -d, --duration SECS - test duration in seconds +# -s, --seed SEED - seed to be used during the test +# + + +LOG = logging.getLogger(os.path.basename(sys.argv[0].replace('.py', ''))) + +SIZES = [ + (0, 0), + (10, 0), + (25, 0), + (50, 0), + (100, 0), + (1000, 0), + (64 * 1024, 0), + (64 * 1024 + 1, -errno.EFBIG), + (128 * 1024, -errno.EFBIG) +] + +# tests will be randomly selected from the keys here, and the test +# suboperation will be randomly selected from the list in the values +# here. i.e. 'exists/existing' would test that a key the test put into +# the store earlier actually does still exist in the config store, +# and that's a separate test case from 'exists/enoent', which tests +# nonexistence of a key known to not be present. + +OPS = { + 'put': ['existing', 'new'], + 'del': ['existing', 'enoent'], + 'exists': ['existing', 'enoent'], + 'get': ['existing', 'enoent'], + 'list': ['existing', 'enoent'], + 'dump': ['existing', 'enoent'], +} + +CONFIG_PUT: List[str] = [] # list: keys +CONFIG_DEL: List[str] = [] # list: keys +CONFIG_EXISTING: Dict[str, int] = {} # map: key -> size + + +def run_cmd(cmd, expects=0): + full_cmd = ['ceph', 'config-key'] + cmd + + if expects < 0: + expects = -expects + + cmdlog = LOG.getChild('run_cmd') + cmdlog.debug('{fc}'.format(fc=' '.join(full_cmd))) + + proc = subprocess.run(full_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True) + if proc.returncode != expects: + cmdlog.error(f'cmd > {proc.args}') + cmdlog.error(f'expected return "{expects}" got "{proc.returncode}"') + cmdlog.error('stdout') + cmdlog.error(proc.stdout) + cmdlog.error('stderr') + cmdlog.error(proc.stderr) + + +def gen_data(size, rnd): + chars = string.ascii_letters + string.digits + return ''.join(rnd.choice(chars) for _ in range(size)) + + +def gen_key(rnd): + return gen_data(20, rnd) + + +def gen_tmp_file_path(rnd): + file_name = gen_data(20, rnd) + file_path = os.path.join('/tmp', 'ceph-test.' + file_name) + return file_path + + +def destroy_tmp_file(fpath): + if os.path.exists(fpath) and os.path.isfile(fpath): + os.unlink(fpath) + + +def write_data_file(data, rnd): + file_path = gen_tmp_file_path(rnd) + data_file = open(file_path, 'a+') + data_file.truncate() + data_file.write(data) + data_file.close() + return file_path + + +# end write_data_file + +def choose_random_op(rnd): + op = rnd.choice( + list(OPS.keys()) + ) + sop = rnd.choice(OPS[op]) + return op, sop + + +def parse_args(args): + parser = argparse.ArgumentParser( + description="Test the monitor's 'config-key' API", + ) + parser.add_argument( + '-v', '--verbose', + action='store_true', + help='be more verbose', + ) + parser.add_argument( + '-s', '--seed', + metavar='SEED', + help='use SEED instead of generating it in run-time', + ) + parser.add_argument( + '-d', '--duration', + metavar='SECS', + help='run test for SECS seconds (default: 300)', + ) + parser.set_defaults( + seed=None, + duration=300, + verbose=False, + ) + return parser.parse_args(args) + + +def main(): + args = parse_args(sys.argv[1:]) + + verbose = args.verbose + if os.environ.get('CEPH_TEST_VERBOSE') is not None: + verbose = (os.environ.get('CEPH_TEST_VERBOSE') == '1') + + duration = int(os.environ.get('CEPH_TEST_DURATION', args.duration)) + seed = os.environ.get('CEPH_TEST_SEED', args.seed) + seed = int(time.time()) if seed is None else int(seed) + + rnd = random.Random() + rnd.seed(seed) + + loglevel = logging.INFO + if verbose: + loglevel = logging.DEBUG + + logging.basicConfig(level=loglevel) + + LOG.info('seed: {s}'.format(s=seed)) + + start = time.time() + + while (time.time() - start) < duration: + (op, sop) = choose_random_op(rnd) + + LOG.info('{o}({s})'.format(o=op, s=sop)) + op_log = LOG.getChild('{o}({s})'.format(o=op, s=sop)) + + if op == 'put': + via_file = (rnd.uniform(0, 100) < 50.0) + + expected = 0 + cmd = ['put'] + key = None + + if sop == 'existing': + if len(CONFIG_EXISTING) == 0: + op_log.debug('no existing keys; continue') + continue + key = rnd.choice(CONFIG_PUT) + assert key in CONFIG_EXISTING, \ + "key '{k_}' not in CONFIG_EXISTING".format(k_=key) + + expected = 0 # the store just overrides the value if the key exists + # end if sop == 'existing' + elif sop == 'new': + for x in range(0, 10): + key = gen_key(rnd) + if key not in CONFIG_EXISTING: + break + key = None + if key is None: + op_log.error('unable to generate an unique key -- try again later.') + continue + + assert key not in CONFIG_PUT and key not in CONFIG_EXISTING, \ + 'key {k} was not supposed to exist!'.format(k=key) + + assert key is not None, \ + 'key must be != None' + + cmd += [key] + + (size, error) = rnd.choice(SIZES) + if size > 25: + via_file = True + + data = gen_data(size, rnd) + + if error == 0: # only add if we expect the put to be successful + if sop == 'new': + CONFIG_PUT.append(key) + CONFIG_EXISTING[key] = size + expected = error + + if via_file: + data_file = write_data_file(data, rnd) + cmd += ['-i', data_file] + else: + cmd += [data] + + op_log.debug('size: {sz}, via: {v}'.format( + sz=size, + v='file: {f}'.format(f=data_file) if via_file == True else 'cli') + ) + run_cmd(cmd, expects=expected) + if via_file: + destroy_tmp_file(data_file) + continue + + elif op == 'del': + expected = 0 + cmd = ['del'] + key = None + + if sop == 'existing': + if len(CONFIG_EXISTING) == 0: + op_log.debug('no existing keys; continue') + continue + key = rnd.choice(CONFIG_PUT) + assert key in CONFIG_EXISTING, \ + "key '{k_}' not in CONFIG_EXISTING".format(k_=key) + + if sop == 'enoent': + for x in range(0, 10): + key = base64.b64encode(os.urandom(20)).decode() + if key not in CONFIG_EXISTING: + break + key = None + if key is None: + op_log.error('unable to generate an unique key -- try again later.') + continue + assert key not in CONFIG_PUT and key not in CONFIG_EXISTING, \ + 'key {k} was not supposed to exist!'.format(k=key) + expected = 0 # deleting a non-existent key succeeds + + assert key is not None, \ + 'key must be != None' + + cmd += [key] + op_log.debug('key: {k}'.format(k=key)) + run_cmd(cmd, expects=expected) + if sop == 'existing': + CONFIG_DEL.append(key) + CONFIG_PUT.remove(key) + del CONFIG_EXISTING[key] + continue + + elif op == 'exists': + expected = 0 + cmd = ['exists'] + key = None + + if sop == 'existing': + if len(CONFIG_EXISTING) == 0: + op_log.debug('no existing keys; continue') + continue + key = rnd.choice(CONFIG_PUT) + assert key in CONFIG_EXISTING, \ + "key '{k_}' not in CONFIG_EXISTING".format(k_=key) + + if sop == 'enoent': + for x in range(0, 10): + key = base64.b64encode(os.urandom(20)).decode() + if key not in CONFIG_EXISTING: + break + key = None + if key is None: + op_log.error('unable to generate an unique key -- try again later.') + continue + assert key not in CONFIG_PUT and key not in CONFIG_EXISTING, \ + 'key {k} was not supposed to exist!'.format(k=key) + expected = -errno.ENOENT + + assert key is not None, \ + 'key must be != None' + + cmd += [key] + op_log.debug('key: {k}'.format(k=key)) + run_cmd(cmd, expects=expected) + continue + + elif op == 'get': + expected = 0 + cmd = ['get'] + key = None + + if sop == 'existing': + if len(CONFIG_EXISTING) == 0: + op_log.debug('no existing keys; continue') + continue + key = rnd.choice(CONFIG_PUT) + assert key in CONFIG_EXISTING, \ + "key '{k_}' not in CONFIG_EXISTING".format(k_=key) + + if sop == 'enoent': + for x in range(0, 10): + key = base64.b64encode(os.urandom(20)).decode() + if key not in CONFIG_EXISTING: + break + key = None + if key is None: + op_log.error('unable to generate an unique key -- try again later.') + continue + assert key not in CONFIG_PUT and key not in CONFIG_EXISTING, \ + 'key {k} was not supposed to exist!'.format(k=key) + expected = -errno.ENOENT + + assert key is not None, \ + 'key must be != None' + + file_path = gen_tmp_file_path(rnd) + cmd += [key, '-o', file_path] + op_log.debug('key: {k}'.format(k=key)) + run_cmd(cmd, expects=expected) + if sop == 'existing': + try: + temp_file = open(file_path, 'r+') + except IOError as err: + if err.errno == errno.ENOENT: + assert CONFIG_EXISTING[key] == 0, \ + "error opening '{fp}': {e}".format(fp=file_path, e=err) + continue + else: + assert False, \ + 'some error occurred: {e}'.format(e=err) + cnt = 0 + while True: + read_data = temp_file.read() + if read_data == '': + break + cnt += len(read_data) + assert cnt == CONFIG_EXISTING[key], \ + "wrong size from store for key '{k}': {sz}, expected {es}".format( + k=key, sz=cnt, es=CONFIG_EXISTING[key]) + destroy_tmp_file(file_path) + continue + + elif op == 'list' or op == 'dump': + expected = 0 + cmd = [op] + key = None + + if sop == 'existing': + if len(CONFIG_EXISTING) == 0: + op_log.debug('no existing keys; continue') + continue + key = rnd.choice(CONFIG_PUT) + assert key in CONFIG_EXISTING, \ + "key '{k_}' not in CONFIG_EXISTING".format(k_=key) + + if sop == 'enoent': + for x in range(0, 10): + key = base64.b64encode(os.urandom(20)).decode() + if key not in CONFIG_EXISTING: + break + key = None + if key is None: + op_log.error('unable to generate an unique key -- try again later.') + continue + assert key not in CONFIG_PUT and key not in CONFIG_EXISTING, \ + 'key {k} was not supposed to exist!'.format(k=key) + + assert key is not None, \ + 'key must be != None' + + file_path = gen_tmp_file_path(rnd) + cmd += ['-o', file_path] + op_log.debug('key: {k}'.format(k=key)) + run_cmd(cmd, expects=expected) + try: + temp_file = open(file_path, 'r+') + except IOError as err: + if err.errno == errno.ENOENT: + assert CONFIG_EXISTING[key] == 0, \ + "error opening '{fp}': {e}".format(fp=file_path, e=err) + continue + else: + assert False, \ + 'some error occurred: {e}'.format(e=err) + cnt = 0 + try: + read_data = json.load(temp_file) + except ValueError: + temp_file.seek(0) + assert False, "{op} output was not valid JSON:\n{filedata}".format( + op=op, filedata=temp_file.readlines()) + + if sop == 'existing': + assert key in read_data, "key '{k}' not found in list/dump output".format(k=key) + if op == 'dump': + cnt = len(read_data[key]) + assert cnt == CONFIG_EXISTING[key], \ + "wrong size from list for key '{k}': {sz}, expected {es}".format( + k=key, sz=cnt, es=CONFIG_EXISTING[key]) + elif sop == 'enoent': + assert key not in read_data, "key '{k}' found in list/dump output".format(k=key) + destroy_tmp_file(file_path) + continue + else: + assert False, 'unknown op {o}'.format(o=op) + + # check if all keys in 'CONFIG_PUT' exist and + # if all keys on 'CONFIG_DEL' don't. + # but first however, remove all keys in CONFIG_PUT that might + # be in CONFIG_DEL as well. + config_put_set = set(CONFIG_PUT) + config_del_set = set(CONFIG_DEL).difference(config_put_set) + + LOG.info('perform sanity checks on store') + + for k in config_put_set: + LOG.getChild('check(puts)').debug('key: {k_}'.format(k_=k)) + run_cmd(['exists', k], expects=0) + for k in config_del_set: + LOG.getChild('check(dels)').debug('key: {k_}'.format(k_=k)) + run_cmd(['exists', k], expects=-errno.ENOENT) + + +if __name__ == "__main__": + main() diff --git a/qa/workunits/mon/test_mon_osdmap_prune.sh b/qa/workunits/mon/test_mon_osdmap_prune.sh new file mode 100755 index 000000000..9cdd72179 --- /dev/null +++ b/qa/workunits/mon/test_mon_osdmap_prune.sh @@ -0,0 +1,205 @@ +#!/bin/bash + +. $(dirname $0)/../../standalone/ceph-helpers.sh + +set -x + +function wait_for_osdmap_manifest() { + + local what=${1:-"true"} + + local -a delays=($(get_timeout_delays $TIMEOUT .1)) + local -i loop=0 + + for ((i=0; i < ${#delays[*]}; ++i)); do + has_manifest=$(ceph report | jq 'has("osdmap_manifest")') + if [[ "$has_manifest" == "$what" ]]; then + return 0 + fi + + sleep ${delays[$i]} + done + + echo "osdmap_manifest never outputted on report" + ceph report + return 1 +} + +function wait_for_trim() { + + local -i epoch=$1 + local -a delays=($(get_timeout_delays $TIMEOUT .1)) + local -i loop=0 + + for ((i=0; i < ${#delays[*]}; ++i)); do + fc=$(ceph report | jq '.osdmap_first_committed') + if [[ $fc -eq $epoch ]]; then + return 0 + fi + sleep ${delays[$i]} + done + + echo "never trimmed up to epoch $epoch" + ceph report + return 1 +} + +function test_osdmap() { + + local epoch=$1 + local ret=0 + + tmp_map=$(mktemp) + ceph osd getmap $epoch -o $tmp_map || return 1 + if ! osdmaptool --print $tmp_map | grep "epoch $epoch" ; then + echo "ERROR: failed processing osdmap epoch $epoch" + ret=1 + fi + rm $tmp_map + return $ret +} + +function generate_osdmaps() { + + local -i num=$1 + + cmds=( set unset ) + for ((i=0; i < num; ++i)); do + ceph osd ${cmds[$((i%2))]} noup || return 1 + done + return 0 +} + +function test_mon_osdmap_prune() { + + create_pool foo 32 + wait_for_clean || return 1 + + ceph config set mon mon_debug_block_osdmap_trim true || return 1 + + generate_osdmaps 500 || return 1 + + report="$(ceph report)" + fc=$(jq '.osdmap_first_committed' <<< $report) + lc=$(jq '.osdmap_last_committed' <<< $report) + + [[ $((lc-fc)) -ge 500 ]] || return 1 + + wait_for_osdmap_manifest || return 1 + + manifest="$(ceph report | jq '.osdmap_manifest')" + + first_pinned=$(jq '.first_pinned' <<< $manifest) + last_pinned=$(jq '.last_pinned' <<< $manifest) + pinned_maps=( $(jq '.pinned_maps[]' <<< $manifest) ) + + # validate pinned maps list + [[ $first_pinned -eq ${pinned_maps[0]} ]] || return 1 + [[ $last_pinned -eq ${pinned_maps[-1]} ]] || return 1 + + # validate pinned maps range + [[ $first_pinned -lt $last_pinned ]] || return 1 + [[ $last_pinned -lt $lc ]] || return 1 + [[ $first_pinned -eq $fc ]] || return 1 + + # ensure all the maps are available, and work as expected + # this can take a while... + + for ((i=$first_pinned; i <= $last_pinned; ++i)); do + test_osdmap $i || return 1 + done + + # update pinned maps state: + # the monitor may have pruned & pinned additional maps since we last + # assessed state, given it's an iterative process. + # + manifest="$(ceph report | jq '.osdmap_manifest')" + first_pinned=$(jq '.first_pinned' <<< $manifest) + last_pinned=$(jq '.last_pinned' <<< $manifest) + pinned_maps=( $(jq '.pinned_maps[]' <<< $manifest) ) + + # test trimming maps + # + # we're going to perform the following tests: + # + # 1. force trim to a pinned map + # 2. force trim to a pinned map's previous epoch + # 3. trim all maps except the last 200 or so. + # + + # 1. force trim to a pinned map + # + [[ ${#pinned_maps[@]} -gt 10 ]] || return 1 + + trim_to=${pinned_maps[1]} + ceph config set mon mon_osd_force_trim_to $trim_to + ceph config set mon mon_min_osdmap_epochs 100 + ceph config set mon paxos_service_trim_min 1 + ceph config set mon mon_debug_block_osdmap_trim false + + # generate an epoch so we get to trim maps + ceph osd set noup + ceph osd unset noup + + wait_for_trim $trim_to || return 1 + + report="$(ceph report)" + fc=$(jq '.osdmap_first_committed' <<< $report) + [[ $fc -eq $trim_to ]] || return 1 + + old_first_pinned=$first_pinned + old_last_pinned=$last_pinned + first_pinned=$(jq '.osdmap_manifest.first_pinned' <<< $report) + last_pinned=$(jq '.osdmap_manifest.last_pinned' <<< $report) + [[ $first_pinned -eq $trim_to ]] || return 1 + [[ $first_pinned -gt $old_first_pinned ]] || return 1 + [[ $last_pinned -gt $old_first_pinned ]] || return 1 + + test_osdmap $trim_to || return 1 + test_osdmap $(( trim_to+1 )) || return 1 + + pinned_maps=( $(jq '.osdmap_manifest.pinned_maps[]' <<< $report) ) + + # 2. force trim to a pinned map's previous epoch + # + [[ ${#pinned_maps[@]} -gt 2 ]] || return 1 + trim_to=$(( ${pinned_maps[1]} - 1)) + ceph config set mon mon_osd_force_trim_to $trim_to + + # generate an epoch so we get to trim maps + ceph osd set noup + ceph osd unset noup + + wait_for_trim $trim_to || return 1 + + report="$(ceph report)" + fc=$(jq '.osdmap_first_committed' <<< $report) + [[ $fc -eq $trim_to ]] || return 1 + + old_first_pinned=$first_pinned + old_last_pinned=$last_pinned + first_pinned=$(jq '.osdmap_manifest.first_pinned' <<< $report) + last_pinned=$(jq '.osdmap_manifest.last_pinned' <<< $report) + pinned_maps=( $(jq '.osdmap_manifest.pinned_maps[]' <<< $report) ) + [[ $first_pinned -eq $trim_to ]] || return 1 + [[ ${pinned_maps[1]} -eq $(( trim_to+1)) ]] || return 1 + + test_osdmap $first_pinned || return 1 + test_osdmap $(( first_pinned + 1 )) || return 1 + + # 3. trim everything + # + ceph config set mon mon_osd_force_trim_to 0 + + # generate an epoch so we get to trim maps + ceph osd set noup + ceph osd unset noup + + wait_for_osdmap_manifest "false" || return 1 + + return 0 +} + +test_mon_osdmap_prune || exit 1 + +echo "OK" diff --git a/qa/workunits/mon/test_noautoscale_flag.sh b/qa/workunits/mon/test_noautoscale_flag.sh new file mode 100755 index 000000000..e1a45a4d8 --- /dev/null +++ b/qa/workunits/mon/test_noautoscale_flag.sh @@ -0,0 +1,104 @@ +#!/bin/bash -ex + +unset CEPH_CLI_TEST_DUP_COMMAND + +NUM_POOLS=$(ceph osd pool ls | wc -l) + +if [ "$NUM_POOLS" -gt 0 ]; then + echo "test requires no preexisting pools" + exit 1 +fi + +ceph osd pool set noautoscale + +ceph osd pool create pool_a + +echo 'pool_a autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_a | grep -o -m 1 'on\|off') + +NUM_POOLS=$[NUM_POOLS+1] + +sleep 2 + +# Count the number of Pools with AUTOSCALE `off` + +RESULT1=$(ceph osd pool autoscale-status | grep -oe 'off' | wc -l) + +# number of Pools with AUTOSCALE `off` should equal to $NUM_POOLS + +test "$RESULT1" -eq "$NUM_POOLS" + +ceph osd pool unset noautoscale + +echo $(ceph osd pool get noautoscale) + + +ceph osd pool create pool_b + +echo 'pool_a autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_a | grep -o -m 1 'on\|off') + +echo 'pool_b autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_b | grep -o -m 1 'on\|off') + + +NUM_POOLS=$[NUM_POOLS+1] + +sleep 2 + +# Count the number of Pools with AUTOSCALE `on` + +RESULT2=$(ceph osd pool autoscale-status | grep -oe 'on' | wc -l) + +# number of Pools with AUTOSCALE `on` should equal to 3 + +test "$RESULT2" -eq "$[NUM_POOLS-1]" + +ceph osd pool set noautoscale + +ceph osd pool create pool_c + +echo 'pool_a autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_a | grep -o -m 1 'on\|off') + +echo 'pool_b autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_b | grep -o -m 1 'on\|off') + +echo 'pool_c autoscale_mode:' $(ceph osd pool autoscale-status | grep pool_c | grep -o -m 1 'on\|off') + + +NUM_POOLS=$[NUM_POOLS+1] + +sleep 2 + +# Count the number of Pools with AUTOSCALE `off` + +RESULT3=$(ceph osd pool autoscale-status | grep -oe 'off' | wc -l) + +# number of Pools with AUTOSCALE `off` should equal to 4 + +test "$RESULT3" -eq "$NUM_POOLS" + +# Now we test if we retain individual pool state of autoscale mode +# when we set and unset the noautoscale flag. + +ceph osd pool unset noautoscale + +ceph osd pool set pool_a pg_autoscale_mode on + +ceph osd pool set pool_b pg_autoscale_mode warn + +ceph osd pool set noautoscale + +ceph osd pool unset noautoscale + +RESULT4=$(ceph osd pool autoscale-status | grep pool_a | grep -o -m 1 'on\|off\|warn') +RESULT5=$(ceph osd pool autoscale-status | grep pool_b | grep -o -m 1 'on\|off\|warn') +RESULT6=$(ceph osd pool autoscale-status | grep pool_c | grep -o -m 1 'on\|off\|warn') + +test "$RESULT4" == 'on' +test "$RESULT5" == 'warn' +test "$RESULT6" == 'off' + +ceph osd pool rm pool_a pool_a --yes-i-really-really-mean-it + +ceph osd pool rm pool_b pool_b --yes-i-really-really-mean-it + +ceph osd pool rm pool_c pool_c --yes-i-really-really-mean-it + +echo OK diff --git a/qa/workunits/objectstore/test_fuse.sh b/qa/workunits/objectstore/test_fuse.sh new file mode 100755 index 000000000..f1dcbd04f --- /dev/null +++ b/qa/workunits/objectstore/test_fuse.sh @@ -0,0 +1,129 @@ +#!/bin/sh -ex + +if ! id -u | grep -q '^0$'; then + echo "not root, re-running self via sudo" + sudo PATH=$PATH TYPE=$TYPE $0 + exit 0 +fi + +expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +COT=ceph-objectstore-tool +DATA=store_test_fuse_dir +[ -z "$TYPE" ] && TYPE=bluestore +MNT=store_test_fuse_mnt + +rm -rf $DATA +mkdir -p $DATA + +test -d $MNT && fusermount -u $MNT || true +rmdir $MNT || true +mkdir $MNT + +export CEPH_ARGS=--enable_experimental_unrecoverable_data_corrupting_features=bluestore + +$COT --no-mon-config --op mkfs --data-path $DATA --type $TYPE +$COT --no-mon-config --op fuse --data-path $DATA --mountpoint $MNT & + +while ! test -e $MNT/type ; do + echo waiting for $MNT/type to appear + sleep 1 +done + +umask 0 + +grep $TYPE $MNT/type + +# create collection +mkdir $MNT/meta +test -e $MNT/meta/bitwise_hash_start +test -d $MNT/meta/all +test -d $MNT/meta/by_bitwise_hash + +# create object +mkdir $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0# +test -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data +test -d $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr +test -d $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap +test -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/bitwise_hash +test -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap_header + +# omap header +echo omap header > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap_header +grep -q omap $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap_header + +# omap +echo value a > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keya +echo value b > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keyb +ls $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap | grep -c key | grep -q 2 +grep 'value a' $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keya +grep 'value b' $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keyb +rm $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keya +test ! -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keya +rm $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keyb +test ! -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/omap/keyb + +# attr +echo value a > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keya +echo value b > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keyb +ls $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr | grep -c key | grep -q 2 +grep 'value a' $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keya +grep 'value b' $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keyb +rm $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keya +test ! -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keya +rm $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keyb +test ! -e $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/attr/keyb + +# data +test ! -s $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data +echo asdfasdfasdf > $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data +test -s $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data +grep -q asdfasdfasdf $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data +truncate --size 4 $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data +stat --format=%s $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data | grep -q ^4$ +expect_false grep -q asdfasdfasdf $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data +rm $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data +test ! -s $MNT/meta/all/#-1:7b3f43c4:::osd_superblock:0#/data + + +# create pg collection +mkdir --mode 0003 $MNT/0.0_head +grep -q 00000000 $MNT/0.0_head/bitwise_hash_start +if [ "$TYPE" = "bluestore" ]; then + cat $MNT/0.0_head/bitwise_hash_bits + grep -q 3 $MNT/0.0_head/bitwise_hash_bits + grep -q 1fffffff $MNT/0.0_head/bitwise_hash_end +fi +test -d $MNT/0.0_head/all + +mkdir --mode 0003 $MNT/0.1_head +grep -q 80000000 $MNT/0.1_head/bitwise_hash_start +if [ "$TYPE" = "bluestore" ]; then + grep -q 3 $MNT/0.1_head/bitwise_hash_bits + grep -q 9fffffff $MNT/0.1_head/bitwise_hash_end +fi + +# create pg object +mkdir $MNT/0.0_head/all/#0:00000000::::head#/ +mkdir $MNT/0.0_head/all/#0:10000000:::foo:head#/ + +# verify pg bounds check +if [ "$TYPE" = "bluestore" ]; then + expect_false mkdir $MNT/0.0_head/all/#0:20000000:::bar:head#/ +fi + +# remove a collection +expect_false rmdir $MNT/0.0_head +rmdir $MNT/0.0_head/all/#0:10000000:::foo:head#/ +rmdir $MNT/0.0_head/all/#0:00000000::::head#/ +rmdir $MNT/0.0_head +rmdir $MNT/0.1_head + +fusermount -u $MNT +wait + +echo OK diff --git a/qa/workunits/osdc/stress_objectcacher.sh b/qa/workunits/osdc/stress_objectcacher.sh new file mode 100755 index 000000000..67baadc33 --- /dev/null +++ b/qa/workunits/osdc/stress_objectcacher.sh @@ -0,0 +1,28 @@ +#!/bin/sh -ex + +for i in $(seq 1 10) +do + for DELAY in 0 1000 + do + for OPS in 1000 10000 + do + for OBJECTS in 10 50 100 + do + for READS in 0.90 0.50 0.10 + do + for OP_SIZE in 4096 131072 1048576 + do + for MAX_DIRTY in 0 25165824 + do + ceph_test_objectcacher_stress --ops $OPS --percent-read $READS --delay-ns $DELAY --objects $OBJECTS --max-op-size $OP_SIZE --client-oc-max-dirty $MAX_DIRTY --stress-test > /dev/null 2>&1 + done + done + done + done + done + done +done + +ceph_test_objectcacher_stress --correctness-test > /dev/null 2>&1 + +echo OK diff --git a/qa/workunits/post-file.sh b/qa/workunits/post-file.sh new file mode 100755 index 000000000..120fb2634 --- /dev/null +++ b/qa/workunits/post-file.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -ex + +what="$1" +[ -z "$what" ] && what=/etc/udev/rules.d +sudo ceph-post-file -d ceph-test-workunit $what + +echo OK diff --git a/qa/workunits/rados/clone.sh b/qa/workunits/rados/clone.sh new file mode 100755 index 000000000..281e89f71 --- /dev/null +++ b/qa/workunits/rados/clone.sh @@ -0,0 +1,13 @@ +#!/bin/sh -x + +set -e + +rados -p data rm foo || true +rados -p data put foo.tmp /etc/passwd --object-locator foo +rados -p data clonedata foo.tmp foo --object-locator foo +rados -p data get foo /tmp/foo +cmp /tmp/foo /etc/passwd +rados -p data rm foo.tmp --object-locator foo +rados -p data rm foo + +echo OK
\ No newline at end of file diff --git a/qa/workunits/rados/load-gen-big.sh b/qa/workunits/rados/load-gen-big.sh new file mode 100755 index 000000000..6715658ec --- /dev/null +++ b/qa/workunits/rados/load-gen-big.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +rados -p rbd load-gen \ + --num-objects 10240 \ + --min-object-size 1048576 \ + --max-object-size 25600000 \ + --max-ops 1024 \ + --max-backlog 1024 \ + --read-percent 50 \ + --run-length 1200 diff --git a/qa/workunits/rados/load-gen-mix-small-long.sh b/qa/workunits/rados/load-gen-mix-small-long.sh new file mode 100755 index 000000000..593bad51d --- /dev/null +++ b/qa/workunits/rados/load-gen-mix-small-long.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +rados -p rbd load-gen \ + --num-objects 1024 \ + --min-object-size 1 \ + --max-object-size 1048576 \ + --max-ops 128 \ + --max-backlog 128 \ + --read-percent 50 \ + --run-length 1800 diff --git a/qa/workunits/rados/load-gen-mix-small.sh b/qa/workunits/rados/load-gen-mix-small.sh new file mode 100755 index 000000000..02db77bd0 --- /dev/null +++ b/qa/workunits/rados/load-gen-mix-small.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +rados -p rbd load-gen \ + --num-objects 1024 \ + --min-object-size 1 \ + --max-object-size 1048576 \ + --max-ops 128 \ + --max-backlog 128 \ + --read-percent 50 \ + --run-length 600 diff --git a/qa/workunits/rados/load-gen-mix.sh b/qa/workunits/rados/load-gen-mix.sh new file mode 100755 index 000000000..ad3b4be84 --- /dev/null +++ b/qa/workunits/rados/load-gen-mix.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +rados -p rbd load-gen \ + --num-objects 10240 \ + --min-object-size 1 \ + --max-object-size 1048576 \ + --max-ops 128 \ + --max-backlog 128 \ + --read-percent 50 \ + --run-length 600 diff --git a/qa/workunits/rados/load-gen-mostlyread.sh b/qa/workunits/rados/load-gen-mostlyread.sh new file mode 100755 index 000000000..236f82dd4 --- /dev/null +++ b/qa/workunits/rados/load-gen-mostlyread.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +rados -p rbd load-gen \ + --num-objects 51200 \ + --min-object-size 1 \ + --max-object-size 1048576 \ + --max-ops 128 \ + --max-backlog 128 \ + --read-percent 90 \ + --run-length 600 diff --git a/qa/workunits/rados/stress_watch.sh b/qa/workunits/rados/stress_watch.sh new file mode 100755 index 000000000..49f144bbc --- /dev/null +++ b/qa/workunits/rados/stress_watch.sh @@ -0,0 +1,7 @@ +#!/bin/sh -e + +ceph_test_stress_watch +ceph_multi_stress_watch rep reppool repobj +ceph_multi_stress_watch ec ecpool ecobj + +exit 0 diff --git a/qa/workunits/rados/test.sh b/qa/workunits/rados/test.sh new file mode 100755 index 000000000..daa25fe4d --- /dev/null +++ b/qa/workunits/rados/test.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +set -ex + +parallel=1 +[ "$1" = "--serial" ] && parallel=0 + +color="" +[ -t 1 ] && color="--gtest_color=yes" + +function cleanup() { + pkill -P $$ || true +} +trap cleanup EXIT ERR HUP INT QUIT + +declare -A pids + +for f in \ + api_aio api_aio_pp \ + api_io api_io_pp \ + api_asio api_list \ + api_lock api_lock_pp \ + api_misc api_misc_pp \ + api_tier_pp \ + api_pool \ + api_snapshots api_snapshots_pp \ + api_stat api_stat_pp \ + api_watch_notify api_watch_notify_pp \ + api_cmd api_cmd_pp \ + api_service api_service_pp \ + api_c_write_operations \ + api_c_read_operations \ + api_cls_remote_reads \ + list_parallel \ + open_pools_parallel \ + delete_pools_parallel +do + if [ $parallel -eq 1 ]; then + r=`printf '%25s' $f` + ff=`echo $f | awk '{print $1}'` + bash -o pipefail -exc "ceph_test_rados_$f $color 2>&1 | tee ceph_test_rados_$ff.log | sed \"s/^/$r: /\"" & + pid=$! + echo "test $f on pid $pid" + pids[$f]=$pid + else + ceph_test_rados_$f + fi +done + +ret=0 +if [ $parallel -eq 1 ]; then +for t in "${!pids[@]}" +do + pid=${pids[$t]} + if ! wait $pid + then + echo "error in $t ($pid)" + ret=1 + fi +done +fi + +exit $ret diff --git a/qa/workunits/rados/test_alloc_hint.sh b/qa/workunits/rados/test_alloc_hint.sh new file mode 100755 index 000000000..535201ca3 --- /dev/null +++ b/qa/workunits/rados/test_alloc_hint.sh @@ -0,0 +1,177 @@ +#!/usr/bin/env bash + +set -ex +shopt -s nullglob # fns glob expansion in expect_alloc_hint_eq() + +# +# Helpers +# + +function get_xml_val() { + local xml="$1" + local tag="$2" + + local regex=".*<${tag}>(.*)</${tag}>.*" + if [[ ! "${xml}" =~ ${regex} ]]; then + echo "'${xml}' xml doesn't match '${tag}' tag regex" >&2 + return 2 + fi + + echo "${BASH_REMATCH[1]}" +} + +function get_conf_val() { + set -e + + local entity="$1" + local option="$2" + + local val + val="$(sudo ceph daemon "${entity}" config get --format=xml "${option}")" + val="$(get_xml_val "${val}" "${option}")" + + echo "${val}" +} + +function setup_osd_data() { + for (( i = 0 ; i < "${NUM_OSDS}" ; i++ )); do + OSD_DATA[i]="$(get_conf_val "osd.$i" "osd_data")" + done +} + +function setup_pgid() { + local poolname="$1" + local objname="$2" + + local pgid + pgid="$(ceph osd map "${poolname}" "${objname}" --format=xml)" + pgid="$(get_xml_val "${pgid}" "pgid")" + + PGID="${pgid}" +} + +function expect_alloc_hint_eq() { + export CEPH_ARGS="--osd-objectstore=filestore" + local expected_extsize="$1" + + for (( i = 0 ; i < "${NUM_OSDS}" ; i++ )); do + # Make sure that stuff is flushed from the journal to the store + # by the time we get to it, as we prod the actual files and not + # the journal. + sudo ceph daemon "osd.${i}" "flush_journal" + + # e.g., .../25.6_head/foo__head_7FC1F406__19 + # .../26.bs1_head/bar__head_EFE6384B__1a_ffffffffffffffff_1 + local fns=$(sudo sh -c "ls ${OSD_DATA[i]}/current/${PGID}*_head/${OBJ}_*") + local count="${#fns[@]}" + if [ "${count}" -ne 1 ]; then + echo "bad fns count: ${count}" >&2 + return 2 + fi + + local extsize + extsize="$(sudo xfs_io -c extsize "${fns[0]}")" + local extsize_regex="^\[(.*)\] ${fns[0]}$" + if [[ ! "${extsize}" =~ ${extsize_regex} ]]; then + echo "extsize doesn't match extsize_regex: ${extsize}" >&2 + return 2 + fi + extsize="${BASH_REMATCH[1]}" + + if [ "${extsize}" -ne "${expected_extsize}" ]; then + echo "FAIL: alloc_hint: actual ${extsize}, expected ${expected_extsize}" >&2 + return 1 + fi + done +} + +# +# Global setup +# + +EC_K="2" +EC_M="1" +NUM_OSDS="$((EC_K + EC_M))" + +NUM_PG="12" +NUM_PGP="${NUM_PG}" + +LOW_CAP="$(get_conf_val "osd.0" "filestore_max_alloc_hint_size")" +HIGH_CAP="$((LOW_CAP * 10))" # 10M, assuming 1M default cap +SMALL_HINT="$((LOW_CAP / 4))" # 256K, assuming 1M default cap +BIG_HINT="$((LOW_CAP * 6))" # 6M, assuming 1M default cap + +setup_osd_data + +# +# ReplicatedBackend tests +# + +POOL="alloc_hint-rep" +ceph osd pool create "${POOL}" "${NUM_PG}" +ceph osd pool set "${POOL}" size "${NUM_OSDS}" --yes-i-really-mean-it +ceph osd pool application enable "${POOL}" rados + +OBJ="foo" +setup_pgid "${POOL}" "${OBJ}" +rados -p "${POOL}" create "${OBJ}" + +# Empty object, SMALL_HINT - expect SMALL_HINT +rados -p "${POOL}" set-alloc-hint "${OBJ}" "${SMALL_HINT}" "${SMALL_HINT}" +expect_alloc_hint_eq "${SMALL_HINT}" + +# Try changing to BIG_HINT (1) - expect LOW_CAP (BIG_HINT > LOW_CAP) +rados -p "${POOL}" set-alloc-hint "${OBJ}" "${BIG_HINT}" "${BIG_HINT}" +expect_alloc_hint_eq "${LOW_CAP}" + +# Bump the cap to HIGH_CAP +ceph tell 'osd.*' injectargs "--filestore_max_alloc_hint_size ${HIGH_CAP}" + +# Try changing to BIG_HINT (2) - expect BIG_HINT (BIG_HINT < HIGH_CAP) +rados -p "${POOL}" set-alloc-hint "${OBJ}" "${BIG_HINT}" "${BIG_HINT}" +expect_alloc_hint_eq "${BIG_HINT}" + +ceph tell 'osd.*' injectargs "--filestore_max_alloc_hint_size ${LOW_CAP}" + +# Populate object with some data +rados -p "${POOL}" put "${OBJ}" /etc/passwd + +# Try changing back to SMALL_HINT - expect BIG_HINT (non-empty object) +rados -p "${POOL}" set-alloc-hint "${OBJ}" "${SMALL_HINT}" "${SMALL_HINT}" +expect_alloc_hint_eq "${BIG_HINT}" + +OBJ="bar" +setup_pgid "${POOL}" "${OBJ}" + +# Non-existent object, SMALL_HINT - expect SMALL_HINT (object creation) +rados -p "${POOL}" set-alloc-hint "${OBJ}" "${SMALL_HINT}" "${SMALL_HINT}" +expect_alloc_hint_eq "${SMALL_HINT}" + +ceph osd pool delete "${POOL}" "${POOL}" --yes-i-really-really-mean-it + +# +# ECBackend tests +# + +PROFILE="alloc_hint-ecprofile" +POOL="alloc_hint-ec" +ceph osd erasure-code-profile set "${PROFILE}" k=2 m=1 crush-failure-domain=osd +ceph osd erasure-code-profile get "${PROFILE}" # just so it's logged +ceph osd pool create "${POOL}" "${NUM_PG}" "${NUM_PGP}" erasure "${PROFILE}" +ceph osd pool application enable "${POOL}" rados + +OBJ="baz" +setup_pgid "${POOL}" "${OBJ}" +rados -p "${POOL}" create "${OBJ}" + +# Empty object, SMALL_HINT - expect scaled-down SMALL_HINT +rados -p "${POOL}" set-alloc-hint "${OBJ}" "${SMALL_HINT}" "${SMALL_HINT}" +expect_alloc_hint_eq "$((SMALL_HINT / EC_K))" + +ceph osd pool delete "${POOL}" "${POOL}" --yes-i-really-really-mean-it + +# +# Global teardown +# + +echo "OK" diff --git a/qa/workunits/rados/test_cache_pool.sh b/qa/workunits/rados/test_cache_pool.sh new file mode 100755 index 000000000..f4187a98a --- /dev/null +++ b/qa/workunits/rados/test_cache_pool.sh @@ -0,0 +1,170 @@ +#!/usr/bin/env bash + +set -ex + +expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +# create pools, set up tier relationship +ceph osd pool create base_pool 2 +ceph osd pool application enable base_pool rados +ceph osd pool create partial_wrong 2 +ceph osd pool create wrong_cache 2 +ceph osd tier add base_pool partial_wrong +ceph osd tier add base_pool wrong_cache + +# populate base_pool with some data +echo "foo" > foo.txt +echo "bar" > bar.txt +echo "baz" > baz.txt +rados -p base_pool put fooobj foo.txt +rados -p base_pool put barobj bar.txt +# fill in wrong_cache backwards so we can tell we read from it +rados -p wrong_cache put fooobj bar.txt +rados -p wrong_cache put barobj foo.txt +# partial_wrong gets barobj backwards so we can check promote and non-promote +rados -p partial_wrong put barobj foo.txt + +# get the objects back before setting a caching pool +rados -p base_pool get fooobj tmp.txt +diff -q tmp.txt foo.txt +rados -p base_pool get barobj tmp.txt +diff -q tmp.txt bar.txt + +# set up redirect and make sure we get backwards results +ceph osd tier set-overlay base_pool wrong_cache +ceph osd tier cache-mode wrong_cache writeback +rados -p base_pool get fooobj tmp.txt +diff -q tmp.txt bar.txt +rados -p base_pool get barobj tmp.txt +diff -q tmp.txt foo.txt + +# switch cache pools and make sure we're doing promote +ceph osd tier remove-overlay base_pool +ceph osd tier set-overlay base_pool partial_wrong +ceph osd tier cache-mode partial_wrong writeback +rados -p base_pool get fooobj tmp.txt +diff -q tmp.txt foo.txt # hurray, it promoted! +rados -p base_pool get barobj tmp.txt +diff -q tmp.txt foo.txt # yep, we read partial_wrong's local object! + +# try a nonexistent object and make sure we get an error +expect_false rados -p base_pool get bazobj tmp.txt + +# drop the cache entirely and make sure contents are still the same +ceph osd tier remove-overlay base_pool +rados -p base_pool get fooobj tmp.txt +diff -q tmp.txt foo.txt +rados -p base_pool get barobj tmp.txt +diff -q tmp.txt bar.txt + +# create an empty cache pool and make sure it has objects after reading +ceph osd pool create empty_cache 2 + +touch empty.txt +rados -p empty_cache ls > tmp.txt +diff -q tmp.txt empty.txt + +ceph osd tier add base_pool empty_cache +ceph osd tier set-overlay base_pool empty_cache +ceph osd tier cache-mode empty_cache writeback +rados -p base_pool get fooobj tmp.txt +rados -p base_pool get barobj tmp.txt +expect_false rados -p base_pool get bazobj tmp.txt + +rados -p empty_cache ls > tmp.txt +expect_false diff -q tmp.txt empty.txt + +# cleanup +ceph osd tier remove-overlay base_pool +ceph osd tier remove base_pool wrong_cache +ceph osd tier remove base_pool partial_wrong +ceph osd tier remove base_pool empty_cache +ceph osd pool delete base_pool base_pool --yes-i-really-really-mean-it +ceph osd pool delete empty_cache empty_cache --yes-i-really-really-mean-it +ceph osd pool delete wrong_cache wrong_cache --yes-i-really-really-mean-it +ceph osd pool delete partial_wrong partial_wrong --yes-i-really-really-mean-it + +## set of base, cache +ceph osd pool create base 8 +ceph osd pool application enable base rados +ceph osd pool create cache 8 + +ceph osd tier add base cache +ceph osd tier cache-mode cache writeback +ceph osd tier set-overlay base cache + +# cache-flush, cache-evict +rados -p base put foo /etc/passwd +expect_false rados -p base cache-evict foo +expect_false rados -p base cache-flush foo +expect_false rados -p cache cache-evict foo +rados -p cache cache-flush foo +rados -p cache cache-evict foo +rados -p cache ls - | wc -l | grep 0 + +# cache-try-flush, cache-evict +rados -p base put foo /etc/passwd +expect_false rados -p base cache-evict foo +expect_false rados -p base cache-flush foo +expect_false rados -p cache cache-evict foo +rados -p cache cache-try-flush foo +rados -p cache cache-evict foo +rados -p cache ls - | wc -l | grep 0 + +# cache-flush-evict-all +rados -p base put bar /etc/passwd +rados -p cache ls - | wc -l | grep 1 +expect_false rados -p base cache-flush-evict-all +rados -p cache cache-flush-evict-all +rados -p cache ls - | wc -l | grep 0 + +# cache-try-flush-evict-all +rados -p base put bar /etc/passwd +rados -p cache ls - | wc -l | grep 1 +expect_false rados -p base cache-flush-evict-all +rados -p cache cache-try-flush-evict-all +rados -p cache ls - | wc -l | grep 0 + +# cache flush/evit when clone objects exist +rados -p base put testclone /etc/passwd +rados -p cache ls - | wc -l | grep 1 +ceph osd pool mksnap base snap +rados -p base put testclone /etc/hosts +rados -p cache cache-flush-evict-all +rados -p cache ls - | wc -l | grep 0 + +ceph osd tier cache-mode cache proxy --yes-i-really-mean-it +rados -p base -s snap get testclone testclone.txt +diff -q testclone.txt /etc/passwd +rados -p base get testclone testclone.txt +diff -q testclone.txt /etc/hosts + +# test --with-clones option +ceph osd tier cache-mode cache writeback +rados -p base put testclone2 /etc/passwd +rados -p cache ls - | wc -l | grep 1 +ceph osd pool mksnap base snap1 +rados -p base put testclone2 /etc/hosts +expect_false rados -p cache cache-flush testclone2 +rados -p cache cache-flush testclone2 --with-clones +expect_false rados -p cache cache-evict testclone2 +rados -p cache cache-evict testclone2 --with-clones +rados -p cache ls - | wc -l | grep 0 + +rados -p base -s snap1 get testclone2 testclone2.txt +diff -q testclone2.txt /etc/passwd +rados -p base get testclone2 testclone2.txt +diff -q testclone2.txt /etc/hosts + +# cleanup +ceph osd tier remove-overlay base +ceph osd tier remove base cache + +ceph osd pool delete cache cache --yes-i-really-really-mean-it +ceph osd pool delete base base --yes-i-really-really-mean-it + +echo OK diff --git a/qa/workunits/rados/test_crash.sh b/qa/workunits/rados/test_crash.sh new file mode 100755 index 000000000..26a4c9bdc --- /dev/null +++ b/qa/workunits/rados/test_crash.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +set -x + +# run on a single-node three-OSD cluster + +sudo killall -ABRT ceph-osd +sleep 5 + +# kill caused coredumps; find them and delete them, carefully, so as +# not to disturb other coredumps, or else teuthology will see them +# and assume test failure. sudos are because the core files are +# root/600 +for f in $(find $TESTDIR/archive/coredump -type f); do + gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f) + if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \ + ( \ + + expr match "$gdb_output" ".*terminated.*signal 6.*" || \ + expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \ + ) + then + sudo rm $f + fi +done + +# ceph-crash runs as the unprivileged "ceph" user, but when under test +# the ceph osd daemons are running as root, so their crash files aren't +# readable. let's chown them so they behave as they would in real life. +sudo chown -R ceph:ceph /var/lib/ceph/crash + +# let daemon find crashdumps on startup +sudo systemctl restart ceph-crash +sleep 30 + +# must be 3 crashdumps registered and moved to crash/posted +[ $(ceph crash ls | wc -l) = 4 ] || exit 1 # 4 here bc of the table header +[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1 + +# there should be a health warning +ceph health detail | grep RECENT_CRASH || exit 1 +ceph crash archive-all +sleep 30 +ceph health detail | grep -c RECENT_CRASH | grep 0 # should be gone! diff --git a/qa/workunits/rados/test_crushdiff.sh b/qa/workunits/rados/test_crushdiff.sh new file mode 100755 index 000000000..833ecbd0b --- /dev/null +++ b/qa/workunits/rados/test_crushdiff.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash + +set -ex + +REP_POOL= +EC_POOL= +TEMPDIR= + +OSD_NUM=$(ceph osd ls | wc -l) +test ${OSD_NUM} -gt 0 + +setup() { + local pool + + TEMPDIR=`mktemp -d` + + pool=test-crushdiff-rep-$$ + ceph osd pool create ${pool} 32 + REP_POOL=${pool} + rados -p ${REP_POOL} bench 5 write --no-cleanup + + if [ ${OSD_NUM} -gt 3 ]; then + pool=test-crushdiff-ec-$$ + ceph osd pool create ${pool} 32 32 erasure + EC_POOL=${pool} + rados -p ${EC_POOL} bench 5 write --no-cleanup + fi +} + +cleanup() { + set +e + + test -n "${EC_POOL}" && + ceph osd pool delete "${EC_POOL}" "${EC_POOL}" \ + --yes-i-really-really-mean-it + EC_POOL= + + test -n "${REP_POOL}" && + ceph osd pool delete "${REP_POOL}" "${REP_POOL}" \ + --yes-i-really-really-mean-it + REP_POOL= + + test -n "${TEMPDIR}" && rm -Rf ${TEMPDIR} + TEMPDIR= +} + +trap "cleanup" INT TERM EXIT + +setup + +# test without crushmap modification + +crushdiff export ${TEMPDIR}/cm.txt --verbose +crushdiff compare ${TEMPDIR}/cm.txt --verbose +crushdiff import ${TEMPDIR}/cm.txt --verbose + +# test using a compiled crushmap + +crushdiff export ${TEMPDIR}/cm --compiled --verbose +crushdiff compare ${TEMPDIR}/cm --compiled --verbose +crushdiff import ${TEMPDIR}/cm --compiled --verbose + +# test using "offline" osdmap and pg-dump + +ceph osd getmap -o ${TEMPDIR}/osdmap +ceph pg dump --format json > ${TEMPDIR}/pg-dump + +crushdiff export ${TEMPDIR}/cm.txt --osdmap ${TEMPDIR}/osdmap --verbose +crushdiff compare ${TEMPDIR}/cm.txt --osdmap ${TEMPDIR}/osdmap \ + --pg-dump ${TEMPDIR}/pg-dump --verbose | tee ${TEMPDIR}/compare.txt + +# test the diff is zero when the crushmap is not modified + +grep '^0/[0-9]* (0\.00%) pgs affected' ${TEMPDIR}/compare.txt +grep '^0/[0-9]* (0\.00%) objects affected' ${TEMPDIR}/compare.txt +grep '^0/[0-9]* (0\.00%) pg shards to move' ${TEMPDIR}/compare.txt +grep '^0/[0-9]* (0\.00%) pg object shards to move' ${TEMPDIR}/compare.txt +grep '^0\.00/.* (0\.00%) bytes to move' ${TEMPDIR}/compare.txt +crushdiff import ${TEMPDIR}/cm.txt --osdmap ${TEMPDIR}/osdmap --verbose + +if [ ${OSD_NUM} -gt 3 ]; then + + # test the diff is non-zero when the crushmap is modified + + cat ${TEMPDIR}/cm.txt >&2 + + weight=$(awk '/item osd\.0 weight ([0-9.]+)/ {print $4 * 3}' \ + ${TEMPDIR}/cm.txt) + test -n "${weight}" + sed -i -Ee 's/^(.*item osd\.0 weight )[0-9.]+/\1'${weight}'/' \ + ${TEMPDIR}/cm.txt + crushdiff compare ${TEMPDIR}/cm.txt --osdmap ${TEMPDIR}/osdmap \ + --pg-dump ${TEMPDIR}/pg-dump --verbose | tee ${TEMPDIR}/compare.txt + grep '^[1-9][0-9]*/[0-9]* (.*%) pgs affected' ${TEMPDIR}/compare.txt + grep '^[1-9][0-9]*/[0-9]* (.*%) objects affected' ${TEMPDIR}/compare.txt + grep '^[1-9][0-9]*/[0-9]* (.*%) pg shards to move' ${TEMPDIR}/compare.txt + grep '^[1-9][0-9]*/[0-9]* (.*%) pg object shards to move' \ + ${TEMPDIR}/compare.txt + grep '^.*/.* (.*%) bytes to move' ${TEMPDIR}/compare.txt + crushdiff import ${TEMPDIR}/cm.txt --osdmap ${TEMPDIR}/osdmap --verbose +fi + +echo OK diff --git a/qa/workunits/rados/test_dedup_tool.sh b/qa/workunits/rados/test_dedup_tool.sh new file mode 100755 index 000000000..18deb331b --- /dev/null +++ b/qa/workunits/rados/test_dedup_tool.sh @@ -0,0 +1,458 @@ +#!/usr/bin/env bash + +set -x + +die() { + echo "$@" + exit 1 +} + +do_run() { + if [ "$1" == "--tee" ]; then + shift + tee_out="$1" + shift + "$@" | tee $tee_out + else + "$@" + fi +} + +run_expect_succ() { + echo "RUN_EXPECT_SUCC: " "$@" + do_run "$@" + [ $? -ne 0 ] && die "expected success, but got failure! cmd: $@" +} + +run() { + echo "RUN: " $@ + do_run "$@" +} + +if [ -n "$CEPH_BIN" ] ; then + # CMake env + RADOS_TOOL="$CEPH_BIN/rados" + CEPH_TOOL="$CEPH_BIN/ceph" + DEDUP_TOOL="$CEPH_BIN/ceph-dedup-tool" +else + # executables should be installed by the QA env + RADOS_TOOL=$(which rados) + CEPH_TOOL=$(which ceph) + DEDUP_TOOL=$(which ceph-dedup-tool) +fi + +POOL=dedup_pool +OBJ=test_rados_obj + +[ -x "$RADOS_TOOL" ] || die "couldn't find $RADOS_TOOL binary to test" +[ -x "$CEPH_TOOL" ] || die "couldn't find $CEPH_TOOL binary to test" + +run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8 +sleep 5 + +function test_dedup_ratio_fixed() +{ + # case 1 + dd if=/dev/urandom of=dedup_object_1k bs=1K count=1 + for num in `seq 1 50` + do + dd if=dedup_object_1k of=dedup_object_100k bs=1K oflag=append conv=notrunc + done + for num in `seq 1 50` + do + dd if=/dev/zero of=dedup_object_100k bs=1K count=1 oflag=append conv=notrunc + done + + $RADOS_TOOL -p $POOL put $OBJ ./dedup_object_100k + RESULT=$($DEDUP_TOOL --op estimate --pool $POOL --chunk-size 1024 --chunk-algorithm fixed --fingerprint-algorithm sha1 | grep chunk_size_average | awk '{print$2}' | sed "s/\,//g") + # total size / the number of deduped object = 100K / 1 + if [ 51200 -ne $RESULT ]; + then + die "Estimate failed expecting 51200 result $RESULT" + fi + + # case 2 + dd if=/dev/zero of=dedup_object_10m bs=10M count=1 + + $RADOS_TOOL -p $POOL put $OBJ ./dedup_object_10m + RESULT=$($DEDUP_TOOL --op estimate --pool $POOL --chunk-size 4096 --chunk-algorithm fixed --fingerprint-algorithm sha1 | grep examined_bytes | awk '{print$2}') + # 10485760 + if [ 10485760 -ne $RESULT ]; + then + die "Estimate failed expecting 10485760 result $RESULT" + fi + + # case 3 max_thread + for num in `seq 0 20` + do + dd if=/dev/zero of=dedup_object_$num bs=4M count=1 + $RADOS_TOOL -p $POOL put dedup_object_$num ./dedup_object_$num + done + + RESULT=$($DEDUP_TOOL --op estimate --pool $POOL --chunk-size 4096 --chunk-algorithm fixed --fingerprint-algorithm sha1 --max-thread 4 | grep chunk_size_average | awk '{print$2}' | sed "s/\,//g") + + if [ 98566144 -ne $RESULT ]; + then + die "Estimate failed expecting 98566144 result $RESULT" + fi + + rm -rf ./dedup_object_1k ./dedup_object_100k ./dedup_object_10m + for num in `seq 0 20` + do + rm -rf ./dedup_object_$num + done + $RADOS_TOOL -p $POOL rm $OBJ + for num in `seq 0 20` + do + $RADOS_TOOL -p $POOL rm dedup_object_$num + done +} + +function test_dedup_chunk_scrub() +{ + + CHUNK_POOL=dedup_chunk_pool + run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8 + + echo "hi there" > foo + + echo "hi there" > bar + + echo "there" > foo-chunk + + echo "CHUNK" > bar-chunk + + $CEPH_TOOL osd pool set $POOL fingerprint_algorithm sha1 --yes-i-really-mean-it + $CEPH_TOOL osd pool set $POOL dedup_chunk_algorithm fastcdc --yes-i-really-mean-it + $CEPH_TOOL osd pool set $POOL dedup_cdc_chunk_size 4096 --yes-i-really-mean-it + $CEPH_TOOL osd pool set $POOL dedup_tier $CHUNK_POOL --yes-i-really-mean-it + + $RADOS_TOOL -p $POOL put foo ./foo + $RADOS_TOOL -p $POOL put bar ./bar + + $RADOS_TOOL -p $CHUNK_POOL put bar-chunk ./bar-chunk + $RADOS_TOOL -p $CHUNK_POOL put foo-chunk ./foo-chunk + + $RADOS_TOOL -p $POOL set-chunk bar 0 8 --target-pool $CHUNK_POOL bar-chunk 0 --with-reference + + echo -n "There hi" > test_obj + # dirty + $RADOS_TOOL -p $POOL put foo ./test_obj + $RADOS_TOOL -p $POOL set-chunk foo 0 8 --target-pool $CHUNK_POOL foo-chunk 0 --with-reference + # flush + $RADOS_TOOL -p $POOL tier-flush foo + sleep 2 + + $RADOS_TOOL ls -p $CHUNK_POOL + CHUNK_OID=$(echo -n "There hi" | sha1sum | awk '{print $1}') + + POOL_ID=$($CEPH_TOOL osd pool ls detail | grep $POOL | awk '{print$2}') + $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref bar --target-ref-pool-id $POOL_ID + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID) + + RESULT=$($DEDUP_TOOL --op chunk-scrub --chunk-pool $CHUNK_POOL | grep "Damaged object" | awk '{print$4}') + if [ $RESULT -ne "1" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Chunk-scrub failed expecting damaged objects is not 1" + fi + + $DEDUP_TOOL --op chunk-put-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref bar --target-ref-pool-id $POOL_ID + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep bar) + if [ -n "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Scrub failed expecting bar is removed" + fi + + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + + rm -rf ./foo ./bar ./foo-chunk ./bar-chunk ./test_obj + $RADOS_TOOL -p $POOL rm foo + $RADOS_TOOL -p $POOL rm bar +} + +function test_dedup_chunk_repair() +{ + + CHUNK_POOL=dedup_chunk_pool + run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8 + + echo -n "hi there" > foo + + echo -n "hi there" > bar + + echo -n "there" > foo-chunk + + echo -n "CHUNK" > bar-chunk + + $CEPH_TOOL osd pool set $POOL fingerprint_algorithm sha1 --yes-i-really-mean-it + $CEPH_TOOL osd pool set $POOL dedup_chunk_algorithm fastcdc --yes-i-really-mean-it + $CEPH_TOOL osd pool set $POOL dedup_cdc_chunk_size 4096 --yes-i-really-mean-it + $CEPH_TOOL osd pool set $POOL dedup_tier $CHUNK_POOL --yes-i-really-mean-it + + $RADOS_TOOL -p $POOL put foo ./foo + $RADOS_TOOL -p $POOL put bar ./bar + + $RADOS_TOOL -p $CHUNK_POOL put bar-chunk ./bar-chunk + $RADOS_TOOL -p $CHUNK_POOL put foo-chunk ./foo-chunk + + $RADOS_TOOL ls -p $CHUNK_POOL + CHUNK_OID=$(echo -n "hi there" | sha1sum | awk '{print $1}') + + POOL_ID=$($CEPH_TOOL osd pool ls detail | grep $POOL | awk '{print$2}') + $RADOS_TOOL -p $CHUNK_POOL put $CHUNK_OID ./foo + + # increase ref count by two, resuling in mismatch + $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref foo --target-ref-pool-id $POOL_ID + $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref foo --target-ref-pool-id $POOL_ID + $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref foo --target-ref-pool-id $POOL_ID + $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref foo --target-ref-pool-id $POOL_ID + $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object bar-chunk --target-ref bar --target-ref-pool-id $POOL_ID + $DEDUP_TOOL --op chunk-get-ref --chunk-pool $CHUNK_POOL --object bar-chunk --target-ref bar --target-ref-pool-id $POOL_ID + + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID) + RESULT=$($DEDUP_TOOL --op chunk-scrub --chunk-pool $CHUNK_POOL | grep "Damaged object" | awk '{print$4}') + if [ $RESULT -ne "2" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Chunk-scrub failed expecting damaged objects is not 1" + fi + + $DEDUP_TOOL --op chunk-repair --chunk-pool $CHUNK_POOL --object $CHUNK_OID --target-ref foo --target-ref-pool-id $POOL_ID + $DEDUP_TOOL --op chunk-repair --chunk-pool $CHUNK_POOL --object bar-chunk --target-ref bar --target-ref-pool-id $POOL_ID + + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep foo | wc -l) + if [ 0 -ne "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Scrub failed expecting bar is removed" + fi + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object bar-chunk | grep bar | wc -l) + if [ 0 -ne "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Scrub failed expecting bar is removed" + fi + + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + + rm -rf ./foo ./bar ./foo-chunk ./bar-chunk ./test_obj + $RADOS_TOOL -p $POOL rm foo + $RADOS_TOOL -p $POOL rm bar +} + +function test_dedup_object() +{ + + CHUNK_POOL=dedup_chunk_pool + run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8 + + echo "There hiHI" > foo + + $CEPH_TOOL osd pool set $POOL dedup_tier $CHUNK_POOL --yes-i-really-mean-it + $RADOS_TOOL -p $POOL put foo ./foo + + sleep 2 + + rados ls -p $CHUNK_POOL + + RESULT=$($DEDUP_TOOL --pool $POOL --op chunk-dedup --object foo --chunk-pool $CHUNK_POOL --source-off 0 --source-length 10 --fingerprint-algorithm sha1 ) + + POOL_ID=$($CEPH_TOOL osd pool ls detail | grep $POOL | awk '{print$2}') + CHUNK_OID=$(echo -n "There hiHI" | sha1sum | awk '{print $1}') + + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep foo) + + if [ -z "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Scrub failed expecting bar is removed" + fi + + $RADOS_TOOL -p $CHUNK_POOL get $CHUNK_OID ./chunk + VERIFY=$(cat ./chunk | sha1sum | awk '{print $1}') + if [ "$CHUNK_OID" != "$VERIFY" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Comparing failed expecting chunk mismatch" + fi + + echo -n "There hihiHI" > bar + + $RADOS_TOOL -p $POOL put bar ./bar + RESULT=$($DEDUP_TOOL --pool $POOL --op object-dedup --object bar --chunk-pool $CHUNK_POOL --fingerprint-algorithm sha1 --dedup-cdc-chunk-size 4096) + + CHUNK_OID=$(echo -n "There hihiHI" | sha1sum | awk '{print $1}') + + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep bar) + if [ -z "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Scrub failed expecting bar is removed" + fi + + $RADOS_TOOL -p $CHUNK_POOL get $CHUNK_OID ./chunk + VERIFY=$(cat ./chunk | sha1sum | awk '{print $1}') + if [ "$CHUNK_OID" != "$VERIFY" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Comparing failed expecting chunk mismatch" + fi + + echo -n "THERE HIHIHI" > bar + $RADOS_TOOL -p $POOL put bar ./bar + $RADOS_TOOL -p $POOL mksnap mysnap + + echo -n "There HIHIHI" > bar + $RADOS_TOOL -p $POOL put bar ./bar + + RESULT=$($DEDUP_TOOL --pool $POOL --op object-dedup --object bar --chunk-pool $CHUNK_POOL --fingerprint-algorithm sha1 --dedup-cdc-chunk-size 4096 --snap) + + CHUNK_OID=$(echo -n "THERE HIHIHI" | sha1sum | awk '{print $1}') + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep bar) + if [ -z "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Scrub failed expecting bar is removed" + fi + + CHUNK_OID=$(echo -n "There HIHIHI" | sha1sum | awk '{print $1}') + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep bar) + if [ -z "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Scrub failed expecting bar is removed" + fi + # rerun tier-flush + + RESULT=$($DEDUP_TOOL --pool $POOL --op object-dedup --object bar --chunk-pool $CHUNK_POOL --fingerprint-algorithm sha1 --dedup-cdc-chunk-size 4096) + + CHUNK_OID=$(echo -n "There HIHIHI" | sha1sum | awk '{print $1}') + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID | grep bar) + if [ -z "$RESULT" ] ; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Scrub failed expecting bar is removed" + fi + + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + + rm -rf ./foo ./bar ./chunk + $RADOS_TOOL -p $POOL rm foo + $RADOS_TOOL -p $POOL rm bar +} + +function test_sample_dedup() +{ + CHUNK_POOL=dedup_chunk_pool + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + + sleep 2 + + run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8 + run_expect_succ "$CEPH_TOOL" osd pool create "$CHUNK_POOL" 8 + run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" dedup_tier "$CHUNK_POOL" + run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" dedup_chunk_algorithm fastcdc + run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" dedup_cdc_chunk_size 8192 + run_expect_succ "$CEPH_TOOL" osd pool set "$POOL" fingerprint_algorithm sha1 + + # 8 Dedupable objects + CONTENT_1="There hiHI" + echo $CONTENT_1 > foo + for num in `seq 1 8` + do + $RADOS_TOOL -p $POOL put foo_$num ./foo + done + + # 1 Unique object + CONTENT_3="There hiHI3" + echo $CONTENT_3 > foo3 + $RADOS_TOOL -p $POOL put foo3_1 ./foo3 + + sleep 2 + + # Execute dedup crawler + RESULT=$($DEDUP_TOOL --pool $POOL --chunk-pool $CHUNK_POOL --op sample-dedup --chunk-algorithm fastcdc --fingerprint-algorithm sha1 --chunk-dedup-threshold 3 --sampling-ratio 50) + + CHUNK_OID_1=$(echo $CONTENT_1 | sha1sum | awk '{print $1}') + CHUNK_OID_3=$(echo $CONTENT_3 | sha1sum | awk '{print $1}') + + # Find chunk object has references of 8 dedupable meta objects + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID_1) + DEDUP_COUNT=0 + for num in `seq 1 8` + do + GREP_RESULT=$(echo $RESULT | grep foo_$num) + if [ -n "$GREP_RESULT" ]; then + DEDUP_COUNT=$(($DEDUP_COUNT + 1)) + fi + done + if [ $DEDUP_COUNT -lt 2 ]; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Chunk object has no reference of first meta object" + fi + + # 7 Duplicated objects but less than chunk dedup threshold + CONTENT_2="There hiHI2" + echo $CONTENT_2 > foo2 + for num in `seq 1 7` + do + $RADOS_TOOL -p $POOL put foo2_$num ./foo2 + done + CHUNK_OID_2=$(echo $CONTENT_2 | sha1sum | awk '{print $1}') + + RESULT=$($DEDUP_TOOL --pool $POOL --chunk-pool $CHUNK_POOL --op sample-dedup --chunk-algorithm fastcdc --fingerprint-algorithm sha1 --sampling-ratio 100 --chunk-dedup-threshold 2) + + # Objects duplicates less than chunk dedup threshold should be deduplicated because of they satisfies object-dedup-threshold + # The only object, which is crawled at the very first, should not be deduplicated because it was not duplicated at initial time + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID_2) + DEDUP_COUNT=0 + for num in `seq 1 7` + do + GREP_RESULT=$(echo $RESULT | grep foo2_$num) + if [ -n "$GREP_RESULT" ]; then + DEDUP_COUNT=$(($DEDUP_COUNT + 1)) + fi + done + if [ $DEDUP_COUNT -ne 6 ]; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Chunk object has no reference of first meta object" + fi + + # Unique object should not be deduplicated + RESULT=$($DEDUP_TOOL --op dump-chunk-refs --chunk-pool $CHUNK_POOL --object $CHUNK_OID_3) + GREP_RESULT=$($RESULT | grep $CHUNK_OID_3) + if [ -n "$GREP_RESULT" ]; then + $CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it + die "Chunk object has no reference of second meta object" + fi + + rm -rf ./foo ./foo2 ./foo3 + for num in `seq 1 8` + do + $RADOS_TOOL -p $POOL rm foo_$num + done + for num in `seq 1 2` + do + $RADOS_TOOL -p $POOL rm foo2_$num + done + $RADOS_TOOL -p $POOL rm foo3_1 + + $CEPH_TOOL osd pool delete $CHUNK_POOL $CHUNK_POOL --yes-i-really-really-mean-it +} + +test_dedup_ratio_fixed +test_dedup_chunk_scrub +test_dedup_chunk_repair +test_dedup_object +test_sample_dedup + +$CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it + +echo "SUCCESS!" +exit 0 diff --git a/qa/workunits/rados/test_envlibrados_for_rocksdb.sh b/qa/workunits/rados/test_envlibrados_for_rocksdb.sh new file mode 100755 index 000000000..371452f40 --- /dev/null +++ b/qa/workunits/rados/test_envlibrados_for_rocksdb.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +set -ex + +############################################ +# Helper functions +############################################ +source $(dirname $0)/../ceph-helpers-root.sh + +############################################ +# Install required tools +############################################ +echo "Install required tools" + +CURRENT_PATH=`pwd` + +############################################ +# Compile&Start RocksDB +############################################ +# install prerequisites +# for rocksdb +case $(distro_id) in + ubuntu|debian|devuan|softiron) + install git g++ libsnappy-dev zlib1g-dev libbz2-dev libradospp-dev cmake + ;; + centos|fedora|rhel) + case $(distro_id) in + rhel) + # RHEL needs CRB repo for snappy-devel + sudo subscription-manager repos --enable "codeready-builder-for-rhel-8-x86_64-rpms" + ;; + esac + install git gcc-c++.x86_64 snappy-devel zlib zlib-devel bzip2 bzip2-devel libradospp-devel.x86_64 cmake libarchive-3.3.3 + ;; + opensuse*|suse|sles) + install git gcc-c++ snappy-devel zlib-devel libbz2-devel libradospp-devel + ;; + *) + echo "$(distro_id) is unknown, $@ will have to be installed manually." + ;; +esac + +# # gflags +# sudo yum install gflags-devel +# +# wget https://github.com/schuhschuh/gflags/archive/master.zip +# unzip master.zip +# cd gflags-master +# mkdir build && cd build +# export CXXFLAGS="-fPIC" && cmake .. && make VERBOSE=1 +# make && make install + +# # snappy-devel + + +echo "Compile rocksdb" +if [ -e rocksdb ]; then + rm -fr rocksdb +fi + +pushd $(dirname /home/ubuntu/cephtest/clone.client.0/qa/workunits/rados/bash.sh)/../../../ +git submodule update --init src/rocksdb +popd +git clone $(dirname /home/ubuntu/cephtest/clone.client.0/qa/workunits/rados/bash.sh)/../../../src/rocksdb rocksdb + +# compile code +cd rocksdb +if type cmake3 > /dev/null 2>&1 ; then + CMAKE=cmake3 +else + CMAKE=cmake +fi + +[ -z "$BUILD_DIR" ] && BUILD_DIR=build +mkdir ${BUILD_DIR} && cd ${BUILD_DIR} && ${CMAKE} -DCMAKE_BUILD_TYPE=Debug -DWITH_TESTS=ON -DWITH_LIBRADOS=ON -DWITH_SNAPPY=ON -DWITH_GFLAGS=OFF -DFAIL_ON_WARNINGS=OFF .. +make rocksdb_env_librados_test -j8 + +echo "Copy ceph.conf" +# prepare ceph.conf +mkdir -p ../ceph/src/ +if [ -f "/etc/ceph/ceph.conf" ]; then + cp /etc/ceph/ceph.conf ../ceph/src/ +elif [ -f "/etc/ceph/ceph/ceph.conf" ]; then + cp /etc/ceph/ceph/ceph.conf ../ceph/src/ +else + echo "/etc/ceph/ceph/ceph.conf doesn't exist" +fi + +echo "Run EnvLibrados test" +# run test +if [ -f "../ceph/src/ceph.conf" ] + then + cp env_librados_test ~/cephtest/archive + ./env_librados_test +else + echo "../ceph/src/ceph.conf doesn't exist" +fi +cd ${CURRENT_PATH} diff --git a/qa/workunits/rados/test_hang.sh b/qa/workunits/rados/test_hang.sh new file mode 100755 index 000000000..724e0bb82 --- /dev/null +++ b/qa/workunits/rados/test_hang.sh @@ -0,0 +1,8 @@ +#!/bin/sh -ex + +# Hang forever for manual testing using the thrasher +while(true) +do + sleep 300 +done +exit 0 diff --git a/qa/workunits/rados/test_health_warnings.sh b/qa/workunits/rados/test_health_warnings.sh new file mode 100755 index 000000000..d393e5c68 --- /dev/null +++ b/qa/workunits/rados/test_health_warnings.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +set -uex + +# number of osds = 10 +crushtool -o crushmap --build --num_osds 10 host straw 2 rack straw 2 row straw 2 root straw 0 +ceph osd setcrushmap -i crushmap +ceph osd tree +ceph tell osd.* injectargs --osd_max_markdown_count 1024 --osd_max_markdown_period 1 +ceph osd set noout + +wait_for_healthy() { + while ceph health | grep down + do + sleep 1 + done +} + +test_mark_two_osds_same_host_down() { + ceph osd set noup + ceph osd down osd.0 osd.1 + ceph health detail + ceph health | grep "1 host" + ceph health | grep "2 osds" + ceph health detail | grep "osd.0" + ceph health detail | grep "osd.1" + ceph osd unset noup + wait_for_healthy +} + +test_mark_two_osds_same_rack_down() { + ceph osd set noup + ceph osd down osd.8 osd.9 + ceph health detail + ceph health | grep "1 host" + ceph health | grep "1 rack" + ceph health | grep "1 row" + ceph health | grep "2 osds" + ceph health detail | grep "osd.8" + ceph health detail | grep "osd.9" + ceph osd unset noup + wait_for_healthy +} + +test_mark_all_but_last_osds_down() { + ceph osd set noup + ceph osd down $(ceph osd ls | sed \$d) + ceph health detail + ceph health | grep "1 row" + ceph health | grep "2 racks" + ceph health | grep "4 hosts" + ceph health | grep "9 osds" + ceph osd unset noup + wait_for_healthy +} + +test_mark_two_osds_same_host_down_with_classes() { + ceph osd set noup + ceph osd crush set-device-class ssd osd.0 osd.2 osd.4 osd.6 osd.8 + ceph osd crush set-device-class hdd osd.1 osd.3 osd.5 osd.7 osd.9 + ceph osd down osd.0 osd.1 + ceph health detail + ceph health | grep "1 host" + ceph health | grep "2 osds" + ceph health detail | grep "osd.0" + ceph health detail | grep "osd.1" + ceph osd unset noup + wait_for_healthy +} + +test_mark_two_osds_same_host_down +test_mark_two_osds_same_rack_down +test_mark_all_but_last_osds_down +test_mark_two_osds_same_host_down_with_classes + +exit 0 diff --git a/qa/workunits/rados/test_large_omap_detection.py b/qa/workunits/rados/test_large_omap_detection.py new file mode 100755 index 000000000..b5c81a8d8 --- /dev/null +++ b/qa/workunits/rados/test_large_omap_detection.py @@ -0,0 +1,134 @@ +#!/usr/bin/python3 +# -*- mode:python -*- +# vim: ts=4 sw=4 smarttab expandtab +# +# Copyright (C) 2017 Red Hat <contact@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +import json +import rados +import shlex +import subprocess +import time + +def cleanup(cluster): + cluster.delete_pool('large-omap-test-pool') + cluster.shutdown() + +def init(): + # For local testing + #cluster = rados.Rados(conffile='./ceph.conf') + cluster = rados.Rados(conffile='/etc/ceph/ceph.conf') + cluster.connect() + print("\nCluster ID: " + cluster.get_fsid()) + cluster.create_pool('large-omap-test-pool') + ioctx = cluster.open_ioctx('large-omap-test-pool') + ioctx.write_full('large-omap-test-object1', b"Lorem ipsum") + op = ioctx.create_write_op() + + keys = [] + values = [] + for x in range(20001): + keys.append(str(x)) + values.append(b"X") + + ioctx.set_omap(op, tuple(keys), tuple(values)) + ioctx.operate_write_op(op, 'large-omap-test-object1', 0) + ioctx.release_write_op(op) + + ioctx.write_full('large-omap-test-object2', b"Lorem ipsum dolor") + op = ioctx.create_write_op() + + buffer = ("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do " + "eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut " + "enim ad minim veniam, quis nostrud exercitation ullamco laboris " + "nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in " + "reprehenderit in voluptate velit esse cillum dolore eu fugiat " + "nulla pariatur. Excepteur sint occaecat cupidatat non proident, " + "sunt in culpa qui officia deserunt mollit anim id est laborum.") + + keys = [] + values = [] + for x in range(20000): + keys.append(str(x)) + values.append(buffer.encode()) + + ioctx.set_omap(op, tuple(keys), tuple(values)) + ioctx.operate_write_op(op, 'large-omap-test-object2', 0) + ioctx.release_write_op(op) + ioctx.close() + return cluster + +def get_deep_scrub_timestamp(pgid): + cmd = ['ceph', 'pg', 'dump', '--format=json-pretty'] + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) + out = proc.communicate()[0] + try: + pgstats = json.loads(out)['pg_map']['pg_stats'] + except KeyError: + pgstats = json.loads(out)['pg_stats'] + for stat in pgstats: + if stat['pgid'] == pgid: + return stat['last_deep_scrub_stamp'] + +def wait_for_scrub(): + osds = set(); + pgs = dict(); + cmd = ['ceph', 'osd', 'map', 'large-omap-test-pool', + 'large-omap-test-object1', '--format=json-pretty'] + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) + out = proc.communicate()[0] + osds.add(json.loads(out)['acting_primary']) + pgs[json.loads(out)['pgid']] = get_deep_scrub_timestamp(json.loads(out)['pgid']) + cmd = ['ceph', 'osd', 'map', 'large-omap-test-pool', + 'large-omap-test-object2', '--format=json-pretty'] + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) + out = proc.communicate()[0] + osds.add(json.loads(out)['acting_primary']) + pgs[json.loads(out)['pgid']] = get_deep_scrub_timestamp(json.loads(out)['pgid']) + + for pg in pgs: + command = "ceph pg deep-scrub " + str(pg) + subprocess.check_call(shlex.split(command)) + + for pg in pgs: + RETRIES = 0 + while RETRIES < 60 and pgs[pg] == get_deep_scrub_timestamp(pg): + time.sleep(10) + RETRIES += 1 + +def check_health_output(): + RETRIES = 0 + result = 0 + while RETRIES < 6 and result != 2: + result = 0 + RETRIES += 1 + output = subprocess.check_output(["ceph", "health", "detail"]) + for line in output.splitlines(): + result += int(line.find(b'2 large omap objects') != -1) + time.sleep(10) + + if result != 2: + print("Error, got invalid output:") + print(output) + raise Exception + +def main(): + cluster = init() + wait_for_scrub() + check_health_output() + + cleanup(cluster) + +if __name__ == '__main__': + main() diff --git a/qa/workunits/rados/test_libcephsqlite.sh b/qa/workunits/rados/test_libcephsqlite.sh new file mode 100755 index 000000000..1810a3f3f --- /dev/null +++ b/qa/workunits/rados/test_libcephsqlite.sh @@ -0,0 +1,136 @@ +#!/bin/bash -ex + +# The main point of these tests beyond ceph_test_libcephsqlite is to: +# +# - Ensure you can load the Ceph VFS via the dynamic load extension mechanism +# in SQLite. +# - Check the behavior of a dead application, that it does not hold locks +# indefinitely. + +pool="$1" +ns="$(basename $0)" + +function sqlite { + background="$1" + if [ "$background" = b ]; then + shift + fi + a=$(cat) + printf "%s" "$a" >&2 + # We're doing job control gymnastics here to make sure that sqlite3 is the + # main process (i.e. the process group leader) in the background, not a bash + # function or job pipeline. + sqlite3 -cmd '.output /dev/null' -cmd '.load libcephsqlite.so' -cmd 'pragma journal_mode = PERSIST' -cmd ".open file:///$pool:$ns/baz.db?vfs=ceph" -cmd '.output stdout' <<<"$a" & + if [ "$background" != b ]; then + wait + fi +} + +function striper { + rados --pool=$pool --namespace="$ns" --striper "$@" +} + +function repeat { + n=$1 + shift + for ((i = 0; i < "$n"; ++i)); do + echo "$*" + done +} + +striper rm baz.db || true + +time sqlite <<EOF +create table if not exists foo (a INT); +insert into foo (a) values (RANDOM()); +drop table foo; +EOF + +striper stat baz.db +striper rm baz.db + +time sqlite <<EOF +CREATE TABLE IF NOT EXISTS rand(text BLOB NOT NULL); +$(repeat 10 'INSERT INTO rand (text) VALUES (RANDOMBLOB(4096));') +SELECT LENGTH(text) FROM rand; +DROP TABLE rand; +EOF + +time sqlite <<EOF +BEGIN TRANSACTION; +CREATE TABLE IF NOT EXISTS rand(text BLOB NOT NULL); +$(repeat 100 'INSERT INTO rand (text) VALUES (RANDOMBLOB(4096));') +COMMIT; +SELECT LENGTH(text) FROM rand; +DROP TABLE rand; +EOF + +# Connection death drops the lock: + +striper rm baz.db +date +sqlite b <<EOF +CREATE TABLE foo (a BLOB); +INSERT INTO foo VALUES ("start"); +WITH RECURSIVE c(x) AS + ( + VALUES(1) + UNION ALL + SELECT x+1 + FROM c + ) +INSERT INTO foo (a) + SELECT RANDOMBLOB(1<<20) + FROM c + LIMIT (1<<20); +EOF + +# Let it chew on that INSERT for a while so it writes data, it will not finish as it's trying to write 2^40 bytes... +sleep 10 +echo done + +jobs -l +kill -KILL -- $(jobs -p) +date +wait +date + +n=$(sqlite <<<"SELECT COUNT(*) FROM foo;") +[ "$n" -eq 1 ] + +# Connection "hang" loses the lock and cannot reacquire it: + +striper rm baz.db +date +sqlite b <<EOF +CREATE TABLE foo (a BLOB); +INSERT INTO foo VALUES ("start"); +WITH RECURSIVE c(x) AS + ( + VALUES(1) + UNION ALL + SELECT x+1 + FROM c + ) +INSERT INTO foo (a) + SELECT RANDOMBLOB(1<<20) + FROM c + LIMIT (1<<20); +EOF + +# Same thing, let it chew on the INSERT for a while... +sleep 20 +jobs -l +kill -STOP -- $(jobs -p) +# cephsqlite_lock_renewal_timeout is 30s +sleep 45 +date +kill -CONT -- $(jobs -p) +sleep 10 +date +# it should exit with an error as it lost the lock +wait +date + +n=$(sqlite <<<"SELECT COUNT(*) FROM foo;") +[ "$n" -eq 1 ] diff --git a/qa/workunits/rados/test_librados_build.sh b/qa/workunits/rados/test_librados_build.sh new file mode 100755 index 000000000..14e332515 --- /dev/null +++ b/qa/workunits/rados/test_librados_build.sh @@ -0,0 +1,87 @@ +#!/bin/bash -ex +# +# Compile and run a librados application outside of the ceph build system, so +# that we can be sure librados.h[pp] is still usable and hasn't accidentally +# started depending on internal headers. +# +# The script assumes all dependencies - e.g. curl, make, gcc, librados headers, +# libradosstriper headers, boost headers, etc. - are already installed. +# + +source $(dirname $0)/../ceph-helpers-root.sh + +trap cleanup EXIT + +SOURCES="hello_radosstriper.cc +hello_world_c.c +hello_world.cc +Makefile +" +BINARIES_TO_RUN="hello_world_c +hello_world_cpp +" +BINARIES="${BINARIES_TO_RUN}hello_radosstriper_cpp +" +# parse output like "octopus (dev)" +case $(librados-config --release | grep -Po ' \(\K[^\)]+') in + dev) + BRANCH=main;; + rc|stable) + BRANCH=$(librados-config --release | cut -d' ' -f1);; + *) + echo "unknown release '$(librados-config --release)'" >&2 + return 1;; +esac +DL_PREFIX="http://git.ceph.com/?p=ceph.git;a=blob_plain;hb=${BRANCH};f=examples/librados/" +#DL_PREFIX="https://raw.githubusercontent.com/ceph/ceph/master/examples/librados/" +DESTDIR=$(pwd) + +function cleanup () { + for f in $BINARIES$SOURCES ; do + rm -f "${DESTDIR}/$f" + done +} + +function get_sources () { + for s in $SOURCES ; do + curl --progress-bar --output $s -L ${DL_PREFIX}$s + done +} + +function check_sources () { + for s in $SOURCES ; do + test -f $s + done +} + +function check_binaries () { + for b in $BINARIES ; do + file $b + test -f $b + done +} + +function run_binaries () { + for b in $BINARIES_TO_RUN ; do + ./$b -c /etc/ceph/ceph.conf + done +} + +pushd $DESTDIR +case $(distro_id) in + centos|fedora|rhel|opensuse*|suse|sles) + install gcc-c++ make libradospp-devel librados-devel;; + ubuntu) + install gcc-11 g++-11 make libradospp-dev librados-dev + export CXX_FLAGS="-std=c++20";; + debian|devuan|softiron) + install g++ make libradospp-dev librados-dev;; + *) + echo "$(distro_id) is unknown, $@ will have to be installed manually." +esac +get_sources +check_sources +make all-system +check_binaries +run_binaries +popd diff --git a/qa/workunits/rados/test_pool_access.sh b/qa/workunits/rados/test_pool_access.sh new file mode 100755 index 000000000..4082870bc --- /dev/null +++ b/qa/workunits/rados/test_pool_access.sh @@ -0,0 +1,108 @@ +#!/usr/bin/env bash + +set -ex + +KEYRING=$(mktemp) +trap cleanup EXIT ERR HUP INT QUIT + +cleanup() { + (ceph auth del client.mon_read || true) >/dev/null 2>&1 + (ceph auth del client.mon_write || true) >/dev/null 2>&1 + + rm -f $KEYRING +} + +expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +create_pool_op() { + ID=$1 + POOL=$2 + + cat << EOF | CEPH_ARGS="-k $KEYRING" python3 +import rados + +cluster = rados.Rados(conffile="", rados_id="${ID}") +cluster.connect() +cluster.create_pool("${POOL}") +EOF +} + +delete_pool_op() { + ID=$1 + POOL=$2 + + cat << EOF | CEPH_ARGS="-k $KEYRING" python3 +import rados + +cluster = rados.Rados(conffile="", rados_id="${ID}") +cluster.connect() +cluster.delete_pool("${POOL}") +EOF +} + +create_pool_snap_op() { + ID=$1 + POOL=$2 + SNAP=$3 + + cat << EOF | CEPH_ARGS="-k $KEYRING" python3 +import rados + +cluster = rados.Rados(conffile="", rados_id="${ID}") +cluster.connect() +ioctx = cluster.open_ioctx("${POOL}") + +ioctx.create_snap("${SNAP}") +EOF +} + +remove_pool_snap_op() { + ID=$1 + POOL=$2 + SNAP=$3 + + cat << EOF | CEPH_ARGS="-k $KEYRING" python3 +import rados + +cluster = rados.Rados(conffile="", rados_id="${ID}") +cluster.connect() +ioctx = cluster.open_ioctx("${POOL}") + +ioctx.remove_snap("${SNAP}") +EOF +} + +test_pool_op() +{ + ceph auth get-or-create client.mon_read mon 'allow r' >> $KEYRING + ceph auth get-or-create client.mon_write mon 'allow *' >> $KEYRING + + expect_false create_pool_op mon_read pool1 + create_pool_op mon_write pool1 + + expect_false create_pool_snap_op mon_read pool1 snap1 + create_pool_snap_op mon_write pool1 snap1 + + expect_false remove_pool_snap_op mon_read pool1 snap1 + remove_pool_snap_op mon_write pool1 snap1 + + expect_false delete_pool_op mon_read pool1 + delete_pool_op mon_write pool1 +} + +key=`ceph auth get-or-create-key client.poolaccess1 mon 'allow r' osd 'allow *'` +rados --id poolaccess1 --key $key -p rbd ls + +key=`ceph auth get-or-create-key client.poolaccess2 mon 'allow r' osd 'allow * pool=nopool'` +expect_false rados --id poolaccess2 --key $key -p rbd ls + +key=`ceph auth get-or-create-key client.poolaccess3 mon 'allow r' osd 'allow rw pool=nopool'` +expect_false rados --id poolaccess3 --key $key -p rbd ls + +test_pool_op + +echo OK diff --git a/qa/workunits/rados/test_pool_quota.sh b/qa/workunits/rados/test_pool_quota.sh new file mode 100755 index 000000000..0eacefc64 --- /dev/null +++ b/qa/workunits/rados/test_pool_quota.sh @@ -0,0 +1,68 @@ +#!/bin/sh -ex + +p=`uuidgen` + +# objects +ceph osd pool create $p 12 +ceph osd pool set-quota $p max_objects 10 +ceph osd pool application enable $p rados + +for f in `seq 1 10` ; do + rados -p $p put obj$f /etc/passwd +done + +sleep 30 + +rados -p $p put onemore /etc/passwd & +pid=$! + +ceph osd pool set-quota $p max_objects 100 +wait $pid +[ $? -ne 0 ] && exit 1 || true + +rados -p $p put twomore /etc/passwd + +# bytes +ceph osd pool set-quota $p max_bytes 100 +sleep 30 + +rados -p $p put two /etc/passwd & +pid=$! + +ceph osd pool set-quota $p max_bytes 0 +ceph osd pool set-quota $p max_objects 0 +wait $pid +[ $? -ne 0 ] && exit 1 || true + +rados -p $p put three /etc/passwd + + +#one pool being full does not block a different pool + +pp=`uuidgen` + +ceph osd pool create $pp 12 +ceph osd pool application enable $pp rados + +# set objects quota +ceph osd pool set-quota $pp max_objects 10 +sleep 30 + +for f in `seq 1 10` ; do + rados -p $pp put obj$f /etc/passwd +done + +sleep 30 + +rados -p $p put threemore /etc/passwd + +ceph osd pool set-quota $p max_bytes 0 +ceph osd pool set-quota $p max_objects 0 + +sleep 30 +# done +ceph osd pool delete $p $p --yes-i-really-really-mean-it +ceph osd pool delete $pp $pp --yes-i-really-really-mean-it + +echo OK + diff --git a/qa/workunits/rados/test_python.sh b/qa/workunits/rados/test_python.sh new file mode 100755 index 000000000..cf4597a41 --- /dev/null +++ b/qa/workunits/rados/test_python.sh @@ -0,0 +1,5 @@ +#!/bin/sh -ex + +ceph osd pool create rbd +${PYTHON:-python3} -m pytest -v $(dirname $0)/../../../src/test/pybind/test_rados.py "$@" +exit 0 diff --git a/qa/workunits/rados/test_rados_timeouts.sh b/qa/workunits/rados/test_rados_timeouts.sh new file mode 100755 index 000000000..327c7ab32 --- /dev/null +++ b/qa/workunits/rados/test_rados_timeouts.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +set -x + +delay_mon() { + MSGTYPE=$1 + shift + $@ --rados-mon-op-timeout 1 --ms-inject-delay-type mon --ms-inject-delay-max 10000000 --ms-inject-delay-probability 1 --ms-inject-delay-msg-type $MSGTYPE + if [ $? -eq 0 ]; then + exit 1 + fi +} + +delay_osd() { + MSGTYPE=$1 + shift + $@ --rados-osd-op-timeout 1 --ms-inject-delay-type osd --ms-inject-delay-max 10000000 --ms-inject-delay-probability 1 --ms-inject-delay-msg-type $MSGTYPE + if [ $? -eq 0 ]; then + exit 2 + fi +} + +# pool ops +delay_mon omap rados lspools +delay_mon poolopreply ceph osd pool create test 8 +delay_mon poolopreply rados mksnap -p test snap +delay_mon poolopreply ceph osd pool rm test test --yes-i-really-really-mean-it + +# other mon ops +delay_mon getpoolstats rados df +delay_mon mon_command ceph df +delay_mon omap ceph osd dump +delay_mon omap ceph -s + +# osd ops +delay_osd osd_op_reply rados -p data put ls /bin/ls +delay_osd osd_op_reply rados -p data get ls - >/dev/null +delay_osd osd_op_reply rados -p data ls +delay_osd command_reply ceph tell osd.0 bench 1 1 + +# rbd commands, using more kinds of osd ops +rbd create -s 1 test +delay_osd osd_op_reply rbd watch test +delay_osd osd_op_reply rbd info test +delay_osd osd_op_reply rbd snap create test@snap +delay_osd osd_op_reply rbd import /bin/ls ls +rbd rm test + +echo OK diff --git a/qa/workunits/rados/test_rados_tool.sh b/qa/workunits/rados/test_rados_tool.sh new file mode 100755 index 000000000..9d025eee8 --- /dev/null +++ b/qa/workunits/rados/test_rados_tool.sh @@ -0,0 +1,924 @@ +#!/usr/bin/env bash + +set -x + +die() { + echo "$@" + exit 1 +} + +usage() { + cat <<EOF +test_rados_tool.sh: tests rados_tool +-c: RADOS configuration file to use [optional] +-k: keep temp files +-h: this help message +-p: set temporary pool to use [optional] +EOF +} + +do_run() { + if [ "$1" == "--tee" ]; then + shift + tee_out="$1" + shift + "$@" | tee $tee_out + else + "$@" + fi +} + +run_expect_fail() { + echo "RUN_EXPECT_FAIL: " "$@" + do_run "$@" + [ $? -eq 0 ] && die "expected failure, but got success! cmd: $@" +} + +run_expect_succ() { + echo "RUN_EXPECT_SUCC: " "$@" + do_run "$@" + [ $? -ne 0 ] && die "expected success, but got failure! cmd: $@" +} + +run_expect_nosignal() { + echo "RUN_EXPECT_NOSIGNAL: " "$@" + do_run "$@" + [ $? -ge 128 ] && die "expected success or fail, but got signal! cmd: $@" +} + +run() { + echo "RUN: " $@ + do_run "$@" +} + +if [ -n "$CEPH_BIN" ] ; then + # CMake env + RADOS_TOOL="$CEPH_BIN/rados" + CEPH_TOOL="$CEPH_BIN/ceph" +else + # executables should be installed by the QA env + RADOS_TOOL=$(which rados) + CEPH_TOOL=$(which ceph) +fi + +KEEP_TEMP_FILES=0 +POOL=trs_pool +POOL_CP_TARGET=trs_pool.2 +POOL_EC=trs_pool_ec + +[ -x "$RADOS_TOOL" ] || die "couldn't find $RADOS_TOOL binary to test" +[ -x "$CEPH_TOOL" ] || die "couldn't find $CEPH_TOOL binary to test" + +while getopts "c:hkp:" flag; do + case $flag in + c) RADOS_TOOL="$RADOS_TOOL -c $OPTARG";; + k) KEEP_TEMP_FILES=1;; + h) usage; exit 0;; + p) POOL=$OPTARG;; + *) echo; usage; exit 1;; + esac +done + +TDIR=`mktemp -d -t test_rados_tool.XXXXXXXXXX` || die "mktemp failed" +[ $KEEP_TEMP_FILES -eq 0 ] && trap "rm -rf ${TDIR}; exit" INT TERM EXIT + +# ensure rados doesn't segfault without --pool +run_expect_nosignal "$RADOS_TOOL" --snap "asdf" ls +run_expect_nosignal "$RADOS_TOOL" --snapid "0" ls +run_expect_nosignal "$RADOS_TOOL" --object-locator "asdf" ls +run_expect_nosignal "$RADOS_TOOL" --namespace "asdf" ls + +run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8 +run_expect_succ "$CEPH_TOOL" osd erasure-code-profile set myprofile k=2 m=1 stripe_unit=2K crush-failure-domain=osd --force +run_expect_succ "$CEPH_TOOL" osd pool create "$POOL_EC" 100 100 erasure myprofile + + +# expb happens to be the empty export for legacy reasons +run_expect_succ "$RADOS_TOOL" -p "$POOL" export "$TDIR/expb" + +# expa has objects foo, foo2 and bar +run_expect_succ "$RADOS_TOOL" -p "$POOL" put foo /etc/fstab +run_expect_succ "$RADOS_TOOL" -p "$POOL" put foo2 /etc/fstab +run_expect_succ "$RADOS_TOOL" -p "$POOL" put bar /etc/fstab +run_expect_succ "$RADOS_TOOL" -p "$POOL" export "$TDIR/expa" + +# expc has foo and foo2 with some attributes and omaps set +run_expect_succ "$RADOS_TOOL" -p "$POOL" rm bar +run_expect_succ "$RADOS_TOOL" -p "$POOL" setxattr foo "rados.toothbrush" "toothbrush" +run_expect_succ "$RADOS_TOOL" -p "$POOL" setxattr foo "rados.toothpaste" "crest" +run_expect_succ "$RADOS_TOOL" -p "$POOL" setomapval foo "rados.floss" "myfloss" +run_expect_succ "$RADOS_TOOL" -p "$POOL" setxattr foo2 "rados.toothbrush" "green" +run_expect_succ "$RADOS_TOOL" -p "$POOL" setomapheader foo2 "foo2.header" +run_expect_succ "$RADOS_TOOL" -p "$POOL" export "$TDIR/expc" + +# make sure that --create works +run "$CEPH_TOOL" osd pool rm "$POOL" "$POOL" --yes-i-really-really-mean-it +run_expect_succ "$RADOS_TOOL" -p "$POOL" --create import "$TDIR/expa" + +# make sure that lack of --create fails +run_expect_succ "$CEPH_TOOL" osd pool rm "$POOL" "$POOL" --yes-i-really-really-mean-it +run_expect_fail "$RADOS_TOOL" -p "$POOL" import "$TDIR/expa" + +run_expect_succ "$RADOS_TOOL" -p "$POOL" --create import "$TDIR/expa" + +# inaccessible import src should fail +run_expect_fail "$RADOS_TOOL" -p "$POOL" import "$TDIR/dir_nonexistent" + +# export an empty pool to test purge +run_expect_succ "$RADOS_TOOL" purge "$POOL" --yes-i-really-really-mean-it +run_expect_succ "$RADOS_TOOL" -p "$POOL" export "$TDIR/empty" +cmp -s "$TDIR/expb" "$TDIR/empty" \ + || die "failed to export the same stuff we imported!" +rm -f "$TDIR/empty" + +# import some stuff with extended attributes on it +run_expect_succ "$RADOS_TOOL" -p "$POOL" import "$TDIR/expc" +VAL=`"$RADOS_TOOL" -p "$POOL" getxattr foo "rados.toothbrush"` +[ ${VAL} = "toothbrush" ] || die "Invalid attribute after import" + +# the second time, the xattrs should match, so there should be nothing to do. +run_expect_succ "$RADOS_TOOL" -p "$POOL" import "$TDIR/expc" +VAL=`"$RADOS_TOOL" -p "$POOL" getxattr foo "rados.toothbrush"` +[ "${VAL}" = "toothbrush" ] || die "Invalid attribute after second import" + +# Now try with --no-overwrite option after changing an attribute +run_expect_succ "$RADOS_TOOL" -p "$POOL" setxattr foo "rados.toothbrush" "dentist" +run_expect_succ "$RADOS_TOOL" -p "$POOL" import --no-overwrite "$TDIR/expc" +VAL=`"$RADOS_TOOL" -p "$POOL" getxattr foo "rados.toothbrush"` +[ "${VAL}" = "dentist" ] || die "Invalid attribute after second import" + +# now force it to copy everything +run_expect_succ "$RADOS_TOOL" -p "$POOL" import "$TDIR/expc" +VAL=`"$RADOS_TOOL" -p "$POOL" getxattr foo "rados.toothbrush"` +[ "${VAL}" = "toothbrush" ] || die "Invalid attribute after second import" + +# test copy pool +run "$CEPH_TOOL" osd pool rm "$POOL" "$POOL" --yes-i-really-really-mean-it +run "$CEPH_TOOL" osd pool rm "$POOL_CP_TARGET" "$POOL_CP_TARGET" --yes-i-really-really-mean-it +run_expect_succ "$CEPH_TOOL" osd pool create "$POOL" 8 +run_expect_succ "$CEPH_TOOL" osd pool create "$POOL_CP_TARGET" 8 + +# create src files +mkdir -p "$TDIR/dir_cp_src" +for i in `seq 1 5`; do + fname="$TDIR/dir_cp_src/f.$i" + objname="f.$i" + dd if=/dev/urandom of="$fname" bs=$((1024*1024)) count=$i + run_expect_succ "$RADOS_TOOL" -p "$POOL" put $objname "$fname" + +# a few random attrs + for j in `seq 1 4`; do + rand_str=`dd if=/dev/urandom bs=4 count=1 | hexdump -x` + run_expect_succ "$RADOS_TOOL" -p "$POOL" setxattr $objname attr.$j "$rand_str" + run_expect_succ --tee "$fname.attr.$j" "$RADOS_TOOL" -p "$POOL" getxattr $objname attr.$j + done + + rand_str=`dd if=/dev/urandom bs=4 count=1 | hexdump -x` + run_expect_succ "$RADOS_TOOL" -p "$POOL" setomapheader $objname "$rand_str" + run_expect_succ --tee "$fname.omap.header" "$RADOS_TOOL" -p "$POOL" getomapheader $objname + +# a few random omap keys + for j in `seq 1 4`; do + rand_str=`dd if=/dev/urandom bs=4 count=1 | hexdump -x` + run_expect_succ "$RADOS_TOOL" -p "$POOL" setomapval $objname key.$j "$rand_str" + done + run_expect_succ --tee "$fname.omap.vals" "$RADOS_TOOL" -p "$POOL" listomapvals $objname +done + +run_expect_succ "$RADOS_TOOL" cppool "$POOL" "$POOL_CP_TARGET" + +mkdir -p "$TDIR/dir_cp_dst" +for i in `seq 1 5`; do + fname="$TDIR/dir_cp_dst/f.$i" + objname="f.$i" + run_expect_succ "$RADOS_TOOL" -p "$POOL_CP_TARGET" get $objname "$fname" + +# a few random attrs + for j in `seq 1 4`; do + run_expect_succ --tee "$fname.attr.$j" "$RADOS_TOOL" -p "$POOL_CP_TARGET" getxattr $objname attr.$j + done + + run_expect_succ --tee "$fname.omap.header" "$RADOS_TOOL" -p "$POOL_CP_TARGET" getomapheader $objname + run_expect_succ --tee "$fname.omap.vals" "$RADOS_TOOL" -p "$POOL_CP_TARGET" listomapvals $objname +done + +diff -q -r "$TDIR/dir_cp_src" "$TDIR/dir_cp_dst" \ + || die "copy pool validation failed!" + +for opt in \ + block-size \ + concurrent-ios \ + min-object-size \ + max-object-size \ + min-op-len \ + max-op-len \ + max-ops \ + max-backlog \ + target-throughput \ + read-percent \ + num-objects \ + run-length \ + ; do + run_expect_succ "$RADOS_TOOL" --$opt 4 df + run_expect_fail "$RADOS_TOOL" --$opt 4k df +done + +run_expect_succ "$RADOS_TOOL" lock list f.1 --lock-duration 4 --pool "$POOL" +echo # previous command doesn't output an end of line: issue #9735 +run_expect_fail "$RADOS_TOOL" lock list f.1 --lock-duration 4k --pool "$POOL" + +run_expect_succ "$RADOS_TOOL" mksnap snap1 --pool "$POOL" +snapid=$("$RADOS_TOOL" lssnap --pool "$POOL" | grep snap1 | cut -f1) +[ $? -ne 0 ] && die "expected success, but got failure! cmd: \"$RADOS_TOOL\" lssnap --pool \"$POOL\" | grep snap1 | cut -f1" +run_expect_succ "$RADOS_TOOL" ls --pool "$POOL" --snapid="$snapid" +run_expect_fail "$RADOS_TOOL" ls --pool "$POOL" --snapid="$snapid"k + +run_expect_succ "$RADOS_TOOL" truncate f.1 0 --pool "$POOL" +run_expect_fail "$RADOS_TOOL" truncate f.1 0k --pool "$POOL" + +run "$CEPH_TOOL" osd pool rm delete_me_mkpool_test delete_me_mkpool_test --yes-i-really-really-mean-it +run_expect_succ "$CEPH_TOOL" osd pool create delete_me_mkpool_test 1 + +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 write +run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 1k write +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 write --format json --output "$TDIR/bench.json" +run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 1 write --output "$TDIR/bench.json" +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --format json --no-cleanup +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 rand --format json +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 rand -f json +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 seq --format json +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 1 seq -f json +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-omap +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-object +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-xattr +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-xattr --write-object +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-xattr --write-omap +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-omap --write-object +run_expect_succ "$RADOS_TOOL" --pool "$POOL" bench 5 write --write-xattr --write-omap --write-object +run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-omap +run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-object +run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-xattr +run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-xattr --write-object +run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-xattr --write-omap +run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-omap --write-object +run_expect_fail "$RADOS_TOOL" --pool "$POOL" bench 5 read --write-xattr --write-omap --write-object + +for i in $("$RADOS_TOOL" --pool "$POOL" ls | grep "benchmark_data"); do + "$RADOS_TOOL" --pool "$POOL" truncate $i 0 +done + +run_expect_nosignal "$RADOS_TOOL" --pool "$POOL" bench 1 rand +run_expect_nosignal "$RADOS_TOOL" --pool "$POOL" bench 1 seq + +set -e + +OBJ=test_rados_obj + +expect_false() +{ + if "$@"; then return 1; else return 0; fi +} + +cleanup() { + $RADOS_TOOL -p $POOL rm $OBJ > /dev/null 2>&1 || true + $RADOS_TOOL -p $POOL_EC rm $OBJ > /dev/null 2>&1 || true +} + +test_omap() { + cleanup + for i in $(seq 1 1 10) + do + if [ $(($i % 2)) -eq 0 ]; then + $RADOS_TOOL -p $POOL setomapval $OBJ $i $i + else + echo -n "$i" | $RADOS_TOOL -p $POOL setomapval $OBJ $i + fi + $RADOS_TOOL -p $POOL getomapval $OBJ $i | grep -q "|$i|\$" + done + $RADOS_TOOL -p $POOL listomapvals $OBJ | grep -c value | grep 10 + for i in $(seq 1 1 5) + do + $RADOS_TOOL -p $POOL rmomapkey $OBJ $i + done + $RADOS_TOOL -p $POOL listomapvals $OBJ | grep -c value | grep 5 + $RADOS_TOOL -p $POOL clearomap $OBJ + $RADOS_TOOL -p $POOL listomapvals $OBJ | wc -l | grep 0 + cleanup + + for i in $(seq 1 1 10) + do + dd if=/dev/urandom bs=128 count=1 > $TDIR/omap_key + if [ $(($i % 2)) -eq 0 ]; then + $RADOS_TOOL -p $POOL --omap-key-file $TDIR/omap_key setomapval $OBJ $i + else + echo -n "$i" | $RADOS_TOOL -p $POOL --omap-key-file $TDIR/omap_key setomapval $OBJ + fi + $RADOS_TOOL -p $POOL --omap-key-file $TDIR/omap_key getomapval $OBJ | grep -q "|$i|\$" + $RADOS_TOOL -p $POOL --omap-key-file $TDIR/omap_key rmomapkey $OBJ + $RADOS_TOOL -p $POOL listomapvals $OBJ | grep -c value | grep 0 + done + cleanup +} + +test_xattr() { + cleanup + $RADOS_TOOL -p $POOL put $OBJ /etc/passwd + V1=`mktemp fooattrXXXXXXX` + V2=`mktemp fooattrXXXXXXX` + echo -n fooval > $V1 + expect_false $RADOS_TOOL -p $POOL setxattr $OBJ 2>/dev/null + expect_false $RADOS_TOOL -p $POOL setxattr $OBJ foo fooval extraarg 2>/dev/null + $RADOS_TOOL -p $POOL setxattr $OBJ foo fooval + $RADOS_TOOL -p $POOL getxattr $OBJ foo > $V2 + cmp $V1 $V2 + cat $V1 | $RADOS_TOOL -p $POOL setxattr $OBJ bar + $RADOS_TOOL -p $POOL getxattr $OBJ bar > $V2 + cmp $V1 $V2 + $RADOS_TOOL -p $POOL listxattr $OBJ > $V1 + grep -q foo $V1 + grep -q bar $V1 + [ `cat $V1 | wc -l` -eq 2 ] + rm $V1 $V2 + cleanup +} +test_rmobj() { + p=`uuidgen` + $CEPH_TOOL osd pool create $p 1 + $CEPH_TOOL osd pool set-quota $p max_objects 1 + V1=`mktemp fooattrXXXXXXX` + $RADOS_TOOL put $OBJ $V1 -p $p + while ! $CEPH_TOOL osd dump | grep 'full_quota max_objects' + do + sleep 2 + done + $RADOS_TOOL -p $p rm $OBJ --force-full + $CEPH_TOOL osd pool rm $p $p --yes-i-really-really-mean-it + rm $V1 +} + +test_ls() { + echo "Testing rados ls command" + p=`uuidgen` + $CEPH_TOOL osd pool create $p 1 + NS=10 + OBJS=20 + # Include default namespace (0) in the total + TOTAL=$(expr $OBJS \* $(expr $NS + 1)) + + for nsnum in `seq 0 $NS` + do + for onum in `seq 1 $OBJS` + do + if [ "$nsnum" = "0" ]; + then + "$RADOS_TOOL" -p $p put obj${onum} /etc/fstab 2> /dev/null + else + "$RADOS_TOOL" -p $p -N "NS${nsnum}" put obj${onum} /etc/fstab 2> /dev/null + fi + done + done + CHECK=$("$RADOS_TOOL" -p $p ls 2> /dev/null | wc -l) + if [ "$OBJS" -ne "$CHECK" ]; + then + die "Created $OBJS objects in default namespace but saw $CHECK" + fi + TESTNS=NS${NS} + CHECK=$("$RADOS_TOOL" -p $p -N $TESTNS ls 2> /dev/null | wc -l) + if [ "$OBJS" -ne "$CHECK" ]; + then + die "Created $OBJS objects in $TESTNS namespace but saw $CHECK" + fi + CHECK=$("$RADOS_TOOL" -p $p --all ls 2> /dev/null | wc -l) + if [ "$TOTAL" -ne "$CHECK" ]; + then + die "Created $TOTAL objects but saw $CHECK" + fi + + $CEPH_TOOL osd pool rm $p $p --yes-i-really-really-mean-it +} + +test_cleanup() { + echo "Testing rados cleanup command" + p=`uuidgen` + $CEPH_TOOL osd pool create $p 1 + NS=5 + OBJS=4 + # Include default namespace (0) in the total + TOTAL=$(expr $OBJS \* $(expr $NS + 1)) + + for nsnum in `seq 0 $NS` + do + for onum in `seq 1 $OBJS` + do + if [ "$nsnum" = "0" ]; + then + "$RADOS_TOOL" -p $p put obj${onum} /etc/fstab 2> /dev/null + else + "$RADOS_TOOL" -p $p -N "NS${nsnum}" put obj${onum} /etc/fstab 2> /dev/null + fi + done + done + + $RADOS_TOOL -p $p --all ls > $TDIR/before.ls.out 2> /dev/null + + $RADOS_TOOL -p $p bench 3 write --no-cleanup 2> /dev/null + $RADOS_TOOL -p $p -N NS1 bench 3 write --no-cleanup 2> /dev/null + $RADOS_TOOL -p $p -N NS2 bench 3 write --no-cleanup 2> /dev/null + $RADOS_TOOL -p $p -N NS3 bench 3 write --no-cleanup 2> /dev/null + # Leave dangling objects without a benchmark_last_metadata in NS4 + expect_false timeout 3 $RADOS_TOOL -p $p -N NS4 bench 30 write --no-cleanup 2> /dev/null + $RADOS_TOOL -p $p -N NS5 bench 3 write --no-cleanup 2> /dev/null + + $RADOS_TOOL -p $p -N NS3 cleanup 2> /dev/null + #echo "Check NS3 after specific cleanup" + CHECK=$($RADOS_TOOL -p $p -N NS3 ls | wc -l) + if [ "$OBJS" -ne "$CHECK" ] ; + then + die "Expected $OBJS objects in NS3 but saw $CHECK" + fi + + #echo "Try to cleanup all" + $RADOS_TOOL -p $p --all cleanup + #echo "Check all namespaces" + $RADOS_TOOL -p $p --all ls > $TDIR/after.ls.out 2> /dev/null + CHECK=$(cat $TDIR/after.ls.out | wc -l) + if [ "$TOTAL" -ne "$CHECK" ]; + then + die "Expected $TOTAL objects but saw $CHECK" + fi + if ! diff $TDIR/before.ls.out $TDIR/after.ls.out + then + die "Different objects found after cleanup" + fi + + set +e + run_expect_fail $RADOS_TOOL -p $p cleanup --prefix illegal_prefix + run_expect_succ $RADOS_TOOL -p $p cleanup --prefix benchmark_data_otherhost + set -e + + $CEPH_TOOL osd pool rm $p $p --yes-i-really-really-mean-it +} + +function test_append() +{ + cleanup + + # create object + touch ./rados_append_null + $RADOS_TOOL -p $POOL append $OBJ ./rados_append_null + $RADOS_TOOL -p $POOL get $OBJ ./rados_append_0_out + cmp ./rados_append_null ./rados_append_0_out + + # append 4k, total size 4k + dd if=/dev/zero of=./rados_append_4k bs=4k count=1 + $RADOS_TOOL -p $POOL append $OBJ ./rados_append_4k + $RADOS_TOOL -p $POOL get $OBJ ./rados_append_4k_out + cmp ./rados_append_4k ./rados_append_4k_out + + # append 4k, total size 8k + $RADOS_TOOL -p $POOL append $OBJ ./rados_append_4k + $RADOS_TOOL -p $POOL get $OBJ ./rados_append_4k_out + read_size=`ls -l ./rados_append_4k_out | awk -F ' ' '{print $5}'` + if [ 8192 -ne $read_size ]; + then + die "Append failed expecting 8192 read $read_size" + fi + + # append 10M, total size 10493952 + dd if=/dev/zero of=./rados_append_10m bs=10M count=1 + $RADOS_TOOL -p $POOL append $OBJ ./rados_append_10m + $RADOS_TOOL -p $POOL get $OBJ ./rados_append_10m_out + read_size=`ls -l ./rados_append_10m_out | awk -F ' ' '{print $5}'` + if [ 10493952 -ne $read_size ]; + then + die "Append failed expecting 10493952 read $read_size" + fi + + # cleanup + cleanup + + # create object + $RADOS_TOOL -p $POOL_EC append $OBJ ./rados_append_null + $RADOS_TOOL -p $POOL_EC get $OBJ ./rados_append_0_out + cmp rados_append_null rados_append_0_out + + # append 4k, total size 4k + $RADOS_TOOL -p $POOL_EC append $OBJ ./rados_append_4k + $RADOS_TOOL -p $POOL_EC get $OBJ ./rados_append_4k_out + cmp rados_append_4k rados_append_4k_out + + # append 4k, total size 8k + $RADOS_TOOL -p $POOL_EC append $OBJ ./rados_append_4k + $RADOS_TOOL -p $POOL_EC get $OBJ ./rados_append_4k_out + read_size=`ls -l ./rados_append_4k_out | awk -F ' ' '{print $5}'` + if [ 8192 -ne $read_size ]; + then + die "Append failed expecting 8192 read $read_size" + fi + + # append 10M, total size 10493952 + $RADOS_TOOL -p $POOL_EC append $OBJ ./rados_append_10m + $RADOS_TOOL -p $POOL_EC get $OBJ ./rados_append_10m_out + read_size=`ls -l ./rados_append_10m_out | awk -F ' ' '{print $5}'` + if [ 10493952 -ne $read_size ]; + then + die "Append failed expecting 10493952 read $read_size" + fi + + cleanup + rm -rf ./rados_append_null ./rados_append_0_out + rm -rf ./rados_append_4k ./rados_append_4k_out ./rados_append_10m ./rados_append_10m_out +} + +function test_put() +{ + # rados put test: + cleanup + + # create file in local fs + dd if=/dev/urandom of=rados_object_10k bs=1K count=10 + + # test put command + $RADOS_TOOL -p $POOL put $OBJ ./rados_object_10k + $RADOS_TOOL -p $POOL get $OBJ ./rados_object_10k_out + cmp ./rados_object_10k ./rados_object_10k_out + cleanup + + # test put command with offset 0 + $RADOS_TOOL -p $POOL put $OBJ ./rados_object_10k --offset 0 + $RADOS_TOOL -p $POOL get $OBJ ./rados_object_offset_0_out + cmp ./rados_object_10k ./rados_object_offset_0_out + cleanup + + # test put command with offset 1000 + $RADOS_TOOL -p $POOL put $OBJ ./rados_object_10k --offset 1000 + $RADOS_TOOL -p $POOL get $OBJ ./rados_object_offset_1000_out + cmp ./rados_object_10k ./rados_object_offset_1000_out 0 1000 + cleanup + + rm -rf ./rados_object_10k ./rados_object_10k_out ./rados_object_offset_0_out ./rados_object_offset_1000_out +} + +function test_stat() +{ + bluestore=$("$CEPH_TOOL" osd metadata | grep '"osd_objectstore": "bluestore"' | cut -f1) + # create file in local fs + dd if=/dev/urandom of=rados_object_128k bs=64K count=2 + + # rados df test (replicated_pool): + $RADOS_TOOL purge $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool rm $POOL $POOL --yes-i-really-really-mean-it + $CEPH_TOOL osd pool create $POOL 8 + $CEPH_TOOL osd pool set $POOL size 3 + + # put object with 1 MB gap in front + $RADOS_TOOL -p $POOL put $OBJ ./rados_object_128k --offset=1048576 + MATCH_CNT=0 + if [ "" == "$bluestore" ]; + then + STORED=1.1 + STORED_UNIT="MiB" + else + STORED=384 + STORED_UNIT="KiB" + fi + for i in {1..60} + do + IN=$($RADOS_TOOL -p $POOL df | grep $POOL ; [[ ! -z $? ]] && echo "") + [[ -z $IN ]] && sleep 1 && continue + IFS=' ' read -ra VALS <<< "$IN" + + # verification is a bit tricky due to stats report's eventual model + # VALS[1] - STORED + # VALS[2] - STORED units + # VALS[3] - OBJECTS + # VALS[5] - COPIES + # VALS[12] - WR_OPS + # VALS[13] - WR + # VALS[14] - WR uints + # implies replication factor 3 + if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "1" ] && [ ${VALS[13]} == 128 ] && [ ${VALS[14]} == "KiB" ] + then + # enforce multiple match to make sure stats aren't changing any more + MATCH_CNT=$((MATCH_CNT+1)) + [[ $MATCH_CNT == 3 ]] && break + sleep 1 + continue + fi + MATCH_CNT=0 + sleep 1 + continue + done + [[ -z $IN ]] && die "Failed to retrieve any pool stats within 60 seconds" + if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "1" ] || [ ${VALS[13]} != 128 ] || [ ${VALS[14]} != "KiB" ] + then + die "Failed to retrieve proper pool stats within 60 seconds" + fi + + # overwrite data at 1MB offset + $RADOS_TOOL -p $POOL put $OBJ ./rados_object_128k --offset=1048576 + MATCH_CNT=0 + if [ "" == "$bluestore" ]; + then + STORED=1.1 + STORED_UNIT="MiB" + else + STORED=384 + STORED_UNIT="KiB" + fi + for i in {1..60} + do + IN=$($RADOS_TOOL -p $POOL df | grep $POOL ; [[ ! -z $? ]] && echo "") + IFS=' ' read -ra VALS <<< "$IN" + + # verification is a bit tricky due to stats report's eventual model + # VALS[1] - STORED + # VALS[2] - STORED units + # VALS[3] - OBJECTS + # VALS[5] - COPIES + # VALS[12] - WR_OPS + # VALS[13] - WR + # VALS[14] - WR uints + # implies replication factor 3 + if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "2" ] && [ ${VALS[13]} == 256 ] && [ ${VALS[14]} == "KiB" ] + then + # enforce multiple match to make sure stats aren't changing any more + MATCH_CNT=$((MATCH_CNT+1)) + [[ $MATCH_CNT == 3 ]] && break + sleep 1 + continue + fi + MATCH_CNT=0 + sleep 1 + continue + done + if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "2" ] || [ ${VALS[13]} != 256 ] || [ ${VALS[14]} != "KiB" ] + then + die "Failed to retrieve proper pool stats within 60 seconds" + fi + + # write data at 64K offset + $RADOS_TOOL -p $POOL put $OBJ ./rados_object_128k --offset=65536 + MATCH_CNT=0 + if [ "" == "$bluestore" ]; + then + STORED=1.1 + STORED_UNIT="MiB" + else + STORED=768 + STORED_UNIT="KiB" + fi + for i in {1..60} + do + IN=$($RADOS_TOOL -p $POOL df | grep $POOL ; [[ ! -z $? ]] && echo "") + IFS=' ' read -ra VALS <<< "$IN" + + # verification is a bit tricky due to stats report's eventual model + # VALS[1] - STORED + # VALS[2] - STORED units + # VALS[3] - OBJECTS + # VALS[5] - COPIES + # VALS[12] - WR_OPS + # VALS[13] - WR + # VALS[14] - WR uints + # implies replication factor 3 + if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "3" ] && [ ${VALS[13]} == 384 ] && [ ${VALS[14]} == "KiB" ] + then + # enforce multiple match to make sure stats aren't changing any more + MATCH_CNT=$((MATCH_CNT+1)) + [[ $MATCH_CNT == 3 ]] && break + sleep 1 + continue + fi + MATCH_CNT=0 + sleep 1 + continue + done + if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "3" ] || [ ${VALS[13]} != 384 ] || [ ${VALS[14]} != "KiB" ] + then + die "Failed to retrieve proper pool stats within 60 seconds" + fi + + # overwrite object totally + $RADOS_TOOL -p $POOL put $OBJ ./rados_object_128k + MATCH_CNT=0 + if [ "" == "$bluestore" ]; + then + STORED=128 + STORED_UNIT="KiB" + else + STORED=384 + STORED_UNIT="KiB" + fi + for i in {1..60} + do + IN=$($RADOS_TOOL -p $POOL df | grep $POOL ; [[ ! -z $? ]] && echo "") + IFS=' ' read -ra VALS <<< "$IN" + + # verification is a bit tricky due to stats report's eventual model + # VALS[1] - STORED + # VALS[2] - STORED units + # VALS[3] - OBJECTS + # VALS[5] - COPIES + # VALS[12] - WR_OPS + # VALS[13] - WR + # VALS[14] - WR uints + # implies replication factor 3 + if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "4" ] && [ ${VALS[13]} == 512 ] && [ ${VALS[14]} == "KiB" ] + then + # enforce multiple match to make sure stats aren't changing any more + MATCH_CNT=$((MATCH_CNT+1)) + [[ $MATCH_CNT == 3 ]] && break + sleep 1 + continue + fi + MATCH_CNT=0 + sleep 1 + continue + done + if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "4" ] || [ ${VALS[13]} != 512 ] || [ ${VALS[14]} != "KiB" ] + then + die "Failed to retrieve proper pool stats within 60 seconds" + fi + + cleanup + + # after cleanup? + MATCH_CNT=0 + for i in {1..60} + do + IN=$($RADOS_TOOL -p $POOL df | grep $POOL ; [[ ! -z $? ]] && echo "") + IFS=' ' read -ra VALS <<< "$IN" + + # verification is a bit tricky due to stats report's eventual model + # VALS[1] - STORED + # VALS[2] - STORED units + # VALS[3] - OBJECTS + # VALS[5] - COPIES + # VALS[12] - WR_OPS + # VALS[13] - WR + # VALS[14] - WR uints + # implies replication factor 3 + if [ ${VALS[1]} == 0 ] && [ ${VALS[2]} == "B" ] && [ ${VALS[3]} == "0" ] && [ ${VALS[5]} == "0" ] && [ ${VALS[12]} == "5" ] && [ ${VALS[13]} == 512 ] && [ ${VALS[14]} == "KiB" ] + then + # enforce multiple match to make sure stats aren't changing any more + MATCH_CNT=$((MATCH_CNT+1)) + [[ $MATCH_CNT == 3 ]] && break + sleep 1 + continue + fi + MATCH_CNT=0 + sleep 1 + continue + done + if [ ${VALS[1]} != 0 ] || [ ${VALS[2]} != "B" ] || [ ${VALS[3]} != "0" ] || [ ${VALS[5]} != "0" ] || [ ${VALS[12]} != "5" ] || [ ${VALS[13]} != 512 ] || [ ${VALS[14]} != "KiB" ] + then + die "Failed to retrieve proper pool stats within 60 seconds" + fi + + ############ rados df test (EC pool): ############## + $RADOS_TOOL purge $POOL_EC --yes-i-really-really-mean-it + $CEPH_TOOL osd pool rm $POOL_EC $POOL_EC --yes-i-really-really-mean-it + $CEPH_TOOL osd erasure-code-profile set myprofile k=2 m=1 stripe_unit=2K crush-failure-domain=osd --force + $CEPH_TOOL osd pool create $POOL_EC 8 8 erasure + + # put object + $RADOS_TOOL -p $POOL_EC put $OBJ ./rados_object_128k + MATCH_CNT=0 + if [ "" == "$bluestore" ]; + then + STORED=128 + STORED_UNIT="KiB" + else + STORED=192 + STORED_UNIT="KiB" + fi + for i in {1..60} + do + IN=$($RADOS_TOOL -p $POOL_EC df | grep $POOL_EC ; [[ ! -z $? ]] && echo "") + [[ -z $IN ]] && sleep 1 && continue + IFS=' ' read -ra VALS <<< "$IN" + + # verification is a bit tricky due to stats report's eventual model + # VALS[1] - STORED + # VALS[2] - STORED units + # VALS[3] - OBJECTS + # VALS[5] - COPIES + # VALS[12] - WR_OPS + # VALS[13] - WR + # VALS[14] - WR uints + # implies replication factor 2+1 + if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "1" ] && [ ${VALS[13]} == 128 ] && [ ${VALS[14]} == "KiB" ] + then + # enforce multiple match to make sure stats aren't changing any more + MATCH_CNT=$((MATCH_CNT+1)) + [[ $MATCH_CNT == 3 ]] && break + sleep 1 + continue + fi + MATCH_CNT=0 + sleep 1 + continue + done + [[ -z $IN ]] && die "Failed to retrieve any pool stats within 60 seconds" + if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "1" ] || [ ${VALS[13]} != 128 ] || [ ${VALS[14]} != "KiB" ] + then + die "Failed to retrieve proper pool stats within 60 seconds" + fi + + # overwrite object + $RADOS_TOOL -p $POOL_EC put $OBJ ./rados_object_128k + MATCH_CNT=0 + if [ "" == "$bluestore" ]; + then + STORED=128 + STORED_UNIT="KiB" + else + STORED=192 + STORED_UNIT="KiB" + fi + for i in {1..60} + do + IN=$($RADOS_TOOL -p $POOL_EC df | grep $POOL_EC ; [[ ! -z $? ]] && echo "") + IFS=' ' read -ra VALS <<< "$IN" + + # verification is a bit tricky due to stats report's eventual model + # VALS[1] - STORED + # VALS[2] - STORED units + # VALS[3] - OBJECTS + # VALS[5] - COPIES + # VALS[12] - WR_OPS + # VALS[13] - WR + # VALS[14] - WR uints + # implies replication factor 2+1 + if [ ${VALS[1]} == $STORED ] && [ ${VALS[2]} == $STORED_UNIT ] && [ ${VALS[3]} == "1" ] && [ ${VALS[5]} == "3" ] && [ ${VALS[12]} == "2" ] && [ ${VALS[13]} == 256 ] && [ ${VALS[14]} == "KiB" ] + then + # enforce multiple match to make sure stats aren't changing any more + MATCH_CNT=$((MATCH_CNT+1)) + [[ $MATCH_CNT == 3 ]] && break + sleep 1 + continue + fi + MATCH_CNT=0 + sleep 1 + continue + done + if [ ${VALS[1]} != $STORED ] || [ ${VALS[2]} != $STORED_UNIT ] || [ ${VALS[3]} != "1" ] || [ ${VALS[5]} != "3" ] || [ ${VALS[12]} != "2" ] || [ ${VALS[13]} != 256 ] || [ ${VALS[14]} != "KiB" ] + then + die "Failed to retrieve proper pool stats within 60 seconds" + fi + + cleanup + + # after cleanup? + MATCH_CNT=0 + for i in {1..60} + do + IN=$($RADOS_TOOL -p $POOL_EC df | grep $POOL_EC ; [[ ! -z $? ]] && echo "") + IFS=' ' read -ra VALS <<< "$IN" + + # verification is a bit tricky due to stats report's eventual model + # VALS[1] - STORED + # VALS[2] - STORED units + # VALS[3] - OBJECTS + # VALS[5] - COPIES + # VALS[12] - WR_OPS + # VALS[13] - WR + # VALS[14] - WR uints + # implies replication factor 2+1 + if [ ${VALS[1]} == 0 ] && [ ${VALS[2]} == "B" ] && [ ${VALS[3]} == "0" ] && [ ${VALS[5]} == "0" ] && [ ${VALS[12]} == "3" ] && [ ${VALS[13]} == 256 ] && [ ${VALS[14]} == "KiB" ] + then + # enforce multiple match to make sure stats aren't changing any more + MATCH_CNT=$((MATCH_CNT+1)) + [[ $MATCH_CNT == 3 ]] && break + sleep 1 + continue + fi + MATCH_CNT=0 + sleep 1 + continue + done + if [ ${VALS[1]} != 0 ] || [ ${VALS[2]} != "B" ] || [ ${VALS[3]} != "0" ] || [ ${VALS[5]} != "0" ] || [ ${VALS[12]} != "3" ] || [ ${VALS[13]} != 256 ] || [ ${VALS[14]} != "KiB" ] + then + die "Failed to retrieve proper pool stats within 60 seconds" + fi + + rm -rf ./rados_object_128k +} + +test_xattr +test_omap +test_rmobj +test_ls +test_cleanup +test_append +test_put +test_stat + +# clean up environment, delete pool +$CEPH_TOOL osd pool delete $POOL $POOL --yes-i-really-really-mean-it +$CEPH_TOOL osd pool delete $POOL_EC $POOL_EC --yes-i-really-really-mean-it +$CEPH_TOOL osd pool delete $POOL_CP_TARGET $POOL_CP_TARGET --yes-i-really-really-mean-it + +echo "SUCCESS!" +exit 0 diff --git a/qa/workunits/rados/version_number_sanity.sh b/qa/workunits/rados/version_number_sanity.sh new file mode 100755 index 000000000..e7eb9be64 --- /dev/null +++ b/qa/workunits/rados/version_number_sanity.sh @@ -0,0 +1,30 @@ +#!/bin/bash -ex +# +# test that ceph RPM/DEB package version matches "ceph --version" +# (for a loose definition of "matches") +# +source /etc/os-release +case $ID in +debian|ubuntu) + RPMDEB='DEB' + dpkg-query --show ceph-common + PKG_NAME_AND_VERSION=$(dpkg-query --show ceph-common) + ;; +centos|fedora|rhel|opensuse*|suse|sles) + RPMDEB='RPM' + rpm -q ceph + PKG_NAME_AND_VERSION=$(rpm -q ceph) + ;; +*) + echo "Unsupported distro ->$ID<-! Bailing out." + exit 1 +esac +PKG_CEPH_VERSION=$(perl -e '"'"$PKG_NAME_AND_VERSION"'" =~ m/(\d+(\.\d+)+)/; print "$1\n";') +echo "According to $RPMDEB package, the ceph version under test is ->$PKG_CEPH_VERSION<-" +test -n "$PKG_CEPH_VERSION" +ceph --version +BUFFER=$(ceph --version) +CEPH_CEPH_VERSION=$(perl -e '"'"$BUFFER"'" =~ m/ceph version (\d+(\.\d+)+)/; print "$1\n";') +echo "According to \"ceph --version\", the ceph version under test is ->$CEPH_CEPH_VERSION<-" +test -n "$CEPH_CEPH_VERSION" +test "$PKG_CEPH_VERSION" = "$CEPH_CEPH_VERSION" diff --git a/qa/workunits/rbd/cli_generic.sh b/qa/workunits/rbd/cli_generic.sh new file mode 100755 index 000000000..57279d26d --- /dev/null +++ b/qa/workunits/rbd/cli_generic.sh @@ -0,0 +1,1715 @@ +#!/usr/bin/env bash +set -ex + +. $(dirname $0)/../../standalone/ceph-helpers.sh + +export RBD_FORCE_ALLOW_V1=1 + +# make sure rbd pool is EMPTY.. this is a test script!! +rbd ls | wc -l | grep -v '^0$' && echo "nonempty rbd pool, aborting! run this script on an empty test cluster only." && exit 1 + +IMGS="testimg1 testimg2 testimg3 testimg4 testimg5 testimg6 testimg-diff1 testimg-diff2 testimg-diff3 foo foo2 bar bar2 test1 test2 test3 test4 clone2" + +expect_fail() { + "$@" && return 1 || return 0 +} + +tiered=0 +if ceph osd dump | grep ^pool | grep "'rbd'" | grep tier; then + tiered=1 +fi + +remove_images() { + for img in $IMGS + do + (rbd snap purge $img || true) >/dev/null 2>&1 + (rbd rm $img || true) >/dev/null 2>&1 + done +} + +test_others() { + echo "testing import, export, resize, and snapshots..." + TMP_FILES="/tmp/img1 /tmp/img1.new /tmp/img2 /tmp/img2.new /tmp/img3 /tmp/img3.new /tmp/img-diff1.new /tmp/img-diff2.new /tmp/img-diff3.new /tmp/img1.snap1 /tmp/img1.snap1 /tmp/img-diff1.snap1" + + remove_images + rm -f $TMP_FILES + + # create an image + dd if=/bin/sh of=/tmp/img1 bs=1k count=1 seek=10 + dd if=/bin/dd of=/tmp/img1 bs=1k count=10 seek=100 + dd if=/bin/rm of=/tmp/img1 bs=1k count=100 seek=1000 + dd if=/bin/ls of=/tmp/img1 bs=1k seek=10000 + dd if=/bin/ln of=/tmp/img1 bs=1k seek=100000 + + # import, snapshot + rbd import $RBD_CREATE_ARGS /tmp/img1 testimg1 + rbd resize testimg1 --size=256 --allow-shrink + rbd export testimg1 /tmp/img2 + rbd snap create testimg1 --snap=snap1 + rbd resize testimg1 --size=128 && exit 1 || true # shrink should fail + rbd resize testimg1 --size=128 --allow-shrink + rbd export testimg1 /tmp/img3 + + # info + rbd info testimg1 | grep 'size 128 MiB' + rbd info --snap=snap1 testimg1 | grep 'size 256 MiB' + + # export-diff + rm -rf /tmp/diff-testimg1-1 /tmp/diff-testimg1-2 + rbd export-diff testimg1 --snap=snap1 /tmp/diff-testimg1-1 + rbd export-diff testimg1 --from-snap=snap1 /tmp/diff-testimg1-2 + + # import-diff + rbd create $RBD_CREATE_ARGS --size=1 testimg-diff1 + rbd import-diff --sparse-size 8K /tmp/diff-testimg1-1 testimg-diff1 + rbd import-diff --sparse-size 8K /tmp/diff-testimg1-2 testimg-diff1 + + # info + rbd info testimg1 | grep 'size 128 MiB' + rbd info --snap=snap1 testimg1 | grep 'size 256 MiB' + rbd info testimg-diff1 | grep 'size 128 MiB' + rbd info --snap=snap1 testimg-diff1 | grep 'size 256 MiB' + + # make copies + rbd copy testimg1 --snap=snap1 testimg2 + rbd copy testimg1 testimg3 + rbd copy testimg-diff1 --sparse-size 768K --snap=snap1 testimg-diff2 + rbd copy testimg-diff1 --sparse-size 768K testimg-diff3 + + # verify the result + rbd info testimg2 | grep 'size 256 MiB' + rbd info testimg3 | grep 'size 128 MiB' + rbd info testimg-diff2 | grep 'size 256 MiB' + rbd info testimg-diff3 | grep 'size 128 MiB' + + # deep copies + rbd deep copy testimg1 testimg4 + rbd deep copy testimg1 --snap=snap1 testimg5 + rbd info testimg4 | grep 'size 128 MiB' + rbd info testimg5 | grep 'size 256 MiB' + rbd snap ls testimg4 | grep -v 'SNAPID' | wc -l | grep 1 + rbd snap ls testimg4 | grep '.*snap1.*' + + rbd export testimg1 /tmp/img1.new + rbd export testimg2 /tmp/img2.new + rbd export testimg3 /tmp/img3.new + rbd export testimg-diff1 /tmp/img-diff1.new + rbd export testimg-diff2 /tmp/img-diff2.new + rbd export testimg-diff3 /tmp/img-diff3.new + + cmp /tmp/img2 /tmp/img2.new + cmp /tmp/img3 /tmp/img3.new + cmp /tmp/img2 /tmp/img-diff2.new + cmp /tmp/img3 /tmp/img-diff3.new + + # rollback + rbd snap rollback --snap=snap1 testimg1 + rbd snap rollback --snap=snap1 testimg-diff1 + rbd info testimg1 | grep 'size 256 MiB' + rbd info testimg-diff1 | grep 'size 256 MiB' + rbd export testimg1 /tmp/img1.snap1 + rbd export testimg-diff1 /tmp/img-diff1.snap1 + cmp /tmp/img2 /tmp/img1.snap1 + cmp /tmp/img2 /tmp/img-diff1.snap1 + + # test create, copy of zero-length images + rbd rm testimg2 + rbd rm testimg3 + rbd create testimg2 -s 0 + rbd cp testimg2 testimg3 + rbd deep cp testimg2 testimg6 + + # remove snapshots + rbd snap rm --snap=snap1 testimg1 + rbd snap rm --snap=snap1 testimg-diff1 + rbd info --snap=snap1 testimg1 2>&1 | grep 'error setting snapshot context: (2) No such file or directory' + rbd info --snap=snap1 testimg-diff1 2>&1 | grep 'error setting snapshot context: (2) No such file or directory' + + # sparsify + rbd sparsify testimg1 + + remove_images + rm -f $TMP_FILES +} + +test_rename() { + echo "testing rename..." + remove_images + + rbd create --image-format 1 -s 1 foo + rbd create --image-format 2 -s 1 bar + rbd rename foo foo2 + rbd rename foo2 bar 2>&1 | grep exists + rbd rename bar bar2 + rbd rename bar2 foo2 2>&1 | grep exists + + ceph osd pool create rbd2 8 + rbd pool init rbd2 + rbd create -p rbd2 -s 1 foo + rbd rename rbd2/foo rbd2/bar + rbd -p rbd2 ls | grep bar + rbd rename rbd2/bar foo + rbd rename --pool rbd2 foo bar + ! rbd rename rbd2/bar --dest-pool rbd foo + rbd rename --pool rbd2 bar --dest-pool rbd2 foo + rbd -p rbd2 ls | grep foo + ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it + + remove_images +} + +test_ls() { + echo "testing ls..." + remove_images + + rbd create --image-format 1 -s 1 test1 + rbd create --image-format 1 -s 1 test2 + rbd ls | grep test1 + rbd ls | grep test2 + rbd ls | wc -l | grep 2 + # look for fields in output of ls -l without worrying about space + rbd ls -l | grep 'test1.*1 MiB.*1' + rbd ls -l | grep 'test2.*1 MiB.*1' + + rbd rm test1 + rbd rm test2 + + rbd create --image-format 2 -s 1 test1 + rbd create --image-format 2 -s 1 test2 + rbd ls | grep test1 + rbd ls | grep test2 + rbd ls | wc -l | grep 2 + rbd ls -l | grep 'test1.*1 MiB.*2' + rbd ls -l | grep 'test2.*1 MiB.*2' + + rbd rm test1 + rbd rm test2 + + rbd create --image-format 2 -s 1 test1 + rbd create --image-format 1 -s 1 test2 + rbd ls | grep test1 + rbd ls | grep test2 + rbd ls | wc -l | grep 2 + rbd ls -l | grep 'test1.*1 MiB.*2' + rbd ls -l | grep 'test2.*1 MiB.*1' + remove_images + + # test that many images can be shown by ls + for i in $(seq -w 00 99); do + rbd create image.$i -s 1 + done + rbd ls | wc -l | grep 100 + rbd ls -l | grep image | wc -l | grep 100 + for i in $(seq -w 00 99); do + rbd rm image.$i + done + + for i in $(seq -w 00 99); do + rbd create image.$i --image-format 2 -s 1 + done + rbd ls | wc -l | grep 100 + rbd ls -l | grep image | wc -l | grep 100 + for i in $(seq -w 00 99); do + rbd rm image.$i + done +} + +test_remove() { + echo "testing remove..." + remove_images + + rbd remove "NOT_EXIST" && exit 1 || true # remove should fail + rbd create --image-format 1 -s 1 test1 + rbd rm test1 + rbd ls | wc -l | grep "^0$" + + rbd create --image-format 2 -s 1 test2 + rbd rm test2 + rbd ls | wc -l | grep "^0$" + + # check that remove succeeds even if it's + # interrupted partway through. simulate this + # by removing some objects manually. + + # remove with header missing (old format) + rbd create --image-format 1 -s 1 test1 + rados rm -p rbd test1.rbd + rbd rm test1 + rbd ls | wc -l | grep "^0$" + + if [ $tiered -eq 0 ]; then + # remove with header missing + rbd create --image-format 2 -s 1 test2 + HEADER=$(rados -p rbd ls | grep '^rbd_header') + rados -p rbd rm $HEADER + rbd rm test2 + rbd ls | wc -l | grep "^0$" + + # remove with id missing + rbd create --image-format 2 -s 1 test2 + rados -p rbd rm rbd_id.test2 + rbd rm test2 + rbd ls | wc -l | grep "^0$" + + # remove with header and id missing + rbd create --image-format 2 -s 1 test2 + HEADER=$(rados -p rbd ls | grep '^rbd_header') + rados -p rbd rm $HEADER + rados -p rbd rm rbd_id.test2 + rbd rm test2 + rbd ls | wc -l | grep "^0$" + fi + + # remove with rbd_children object missing (and, by extension, + # with child not mentioned in rbd_children) + rbd create --image-format 2 -s 1 test2 + rbd snap create test2@snap + rbd snap protect test2@snap + rbd clone test2@snap clone --rbd-default-clone-format 1 + + rados -p rbd rm rbd_children + rbd rm clone + rbd ls | grep clone | wc -l | grep '^0$' + + rbd snap unprotect test2@snap + rbd snap rm test2@snap + rbd rm test2 +} + +test_locking() { + echo "testing locking..." + remove_images + + rbd create $RBD_CREATE_ARGS -s 1 test1 + rbd lock list test1 | wc -l | grep '^0$' + rbd lock add test1 id + rbd lock list test1 | grep ' 1 ' + LOCKER=$(rbd lock list test1 | tail -n 1 | awk '{print $1;}') + rbd lock remove test1 id $LOCKER + rbd lock list test1 | wc -l | grep '^0$' + + rbd lock add test1 id --shared tag + rbd lock list test1 | grep ' 1 ' + rbd lock add test1 id --shared tag + rbd lock list test1 | grep ' 2 ' + rbd lock add test1 id2 --shared tag + rbd lock list test1 | grep ' 3 ' + rbd lock list test1 | tail -n 1 | awk '{print $2, $1;}' | xargs rbd lock remove test1 + if rbd info test1 | grep -qE "features:.*exclusive" + then + # new locking functionality requires all locks to be released + while [ -n "$(rbd lock list test1)" ] + do + rbd lock list test1 | tail -n 1 | awk '{print $2, $1;}' | xargs rbd lock remove test1 + done + fi + rbd rm test1 +} + +test_pool_image_args() { + echo "testing pool and image args..." + remove_images + + ceph osd pool delete test test --yes-i-really-really-mean-it || true + ceph osd pool create test 32 + rbd pool init test + truncate -s 1 /tmp/empty /tmp/empty@snap + + rbd ls | wc -l | grep 0 + rbd create -s 1 test1 + rbd ls | grep -q test1 + rbd import --image test2 /tmp/empty + rbd ls | grep -q test2 + rbd --dest test3 import /tmp/empty + rbd ls | grep -q test3 + rbd import /tmp/empty foo + rbd ls | grep -q foo + + # should fail due to "destination snapname specified" + rbd import --dest test/empty@snap /tmp/empty && exit 1 || true + rbd import /tmp/empty test/empty@snap && exit 1 || true + rbd import --image test/empty@snap /tmp/empty && exit 1 || true + rbd import /tmp/empty@snap && exit 1 || true + + rbd ls test | wc -l | grep 0 + rbd import /tmp/empty test/test1 + rbd ls test | grep -q test1 + rbd -p test import /tmp/empty test2 + rbd ls test | grep -q test2 + rbd --image test3 -p test import /tmp/empty + rbd ls test | grep -q test3 + rbd --image test4 -p test import /tmp/empty + rbd ls test | grep -q test4 + rbd --dest test5 -p test import /tmp/empty + rbd ls test | grep -q test5 + rbd --dest test6 --dest-pool test import /tmp/empty + rbd ls test | grep -q test6 + rbd --image test7 --dest-pool test import /tmp/empty + rbd ls test | grep -q test7 + rbd --image test/test8 import /tmp/empty + rbd ls test | grep -q test8 + rbd --dest test/test9 import /tmp/empty + rbd ls test | grep -q test9 + rbd import --pool test /tmp/empty + rbd ls test | grep -q empty + + # copy with no explicit pool goes to pool rbd + rbd copy test/test9 test10 + rbd ls test | grep -qv test10 + rbd ls | grep -q test10 + rbd copy test/test9 test/test10 + rbd ls test | grep -q test10 + rbd copy --pool test test10 --dest-pool test test11 + rbd ls test | grep -q test11 + rbd copy --dest-pool rbd --pool test test11 test12 + rbd ls | grep test12 + rbd ls test | grep -qv test12 + + rm -f /tmp/empty /tmp/empty@snap + ceph osd pool delete test test --yes-i-really-really-mean-it + + for f in foo test1 test10 test12 test2 test3 ; do + rbd rm $f + done +} + +test_clone() { + echo "testing clone..." + remove_images + rbd create test1 $RBD_CREATE_ARGS -s 1 + rbd snap create test1@s1 + rbd snap protect test1@s1 + + ceph osd pool create rbd2 8 + rbd pool init rbd2 + rbd clone test1@s1 rbd2/clone + rbd -p rbd2 ls | grep clone + rbd -p rbd2 ls -l | grep clone | grep test1@s1 + rbd ls | grep -v clone + rbd flatten rbd2/clone + rbd snap create rbd2/clone@s1 + rbd snap protect rbd2/clone@s1 + rbd clone rbd2/clone@s1 clone2 + rbd ls | grep clone2 + rbd ls -l | grep clone2 | grep rbd2/clone@s1 + rbd -p rbd2 ls | grep -v clone2 + + rbd rm clone2 + rbd snap unprotect rbd2/clone@s1 + rbd snap rm rbd2/clone@s1 + rbd rm rbd2/clone + rbd snap unprotect test1@s1 + rbd snap rm test1@s1 + rbd rm test1 + ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it +} + +test_trash() { + echo "testing trash..." + remove_images + + rbd create $RBD_CREATE_ARGS -s 1 test1 + rbd create $RBD_CREATE_ARGS -s 1 test2 + rbd ls | grep test1 + rbd ls | grep test2 + rbd ls | wc -l | grep 2 + rbd ls -l | grep 'test1.*2.*' + rbd ls -l | grep 'test2.*2.*' + + rbd trash mv test1 + rbd ls | grep test2 + rbd ls | wc -l | grep 1 + rbd ls -l | grep 'test2.*2.*' + + rbd trash ls | grep test1 + rbd trash ls | wc -l | grep 1 + rbd trash ls -l | grep 'test1.*USER.*' + rbd trash ls -l | grep -v 'protected until' + + ID=`rbd trash ls | cut -d ' ' -f 1` + rbd trash rm $ID + + rbd trash mv test2 + ID=`rbd trash ls | cut -d ' ' -f 1` + rbd info --image-id $ID | grep "rbd image 'test2'" + + rbd trash restore $ID + rbd ls | grep test2 + rbd ls | wc -l | grep 1 + rbd ls -l | grep 'test2.*2.*' + + rbd trash mv test2 --expires-at "3600 sec" + rbd trash ls | grep test2 + rbd trash ls | wc -l | grep 1 + rbd trash ls -l | grep 'test2.*USER.*protected until' + + rbd trash rm $ID 2>&1 | grep 'Deferment time has not expired' + rbd trash rm --image-id $ID --force + + rbd create $RBD_CREATE_ARGS -s 1 test1 + rbd snap create test1@snap1 + rbd snap protect test1@snap1 + rbd trash mv test1 + + rbd trash ls | grep test1 + rbd trash ls | wc -l | grep 1 + rbd trash ls -l | grep 'test1.*USER.*' + rbd trash ls -l | grep -v 'protected until' + + ID=`rbd trash ls | cut -d ' ' -f 1` + rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 1 + rbd snap ls --image-id $ID | grep '.*snap1.*' + + rbd snap unprotect --image-id $ID --snap snap1 + rbd snap rm --image-id $ID --snap snap1 + rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 0 + + rbd trash restore $ID + rbd snap create test1@snap1 + rbd snap create test1@snap2 + rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 2 + rbd snap purge --image-id $ID + rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 0 + + rbd rm --rbd_move_to_trash_on_remove=true --rbd_move_to_trash_on_remove_expire_seconds=3600 test1 + rbd trash ls | grep test1 + rbd trash ls | wc -l | grep 1 + rbd trash ls -l | grep 'test1.*USER.*protected until' + rbd trash rm $ID 2>&1 | grep 'Deferment time has not expired' + rbd trash rm --image-id $ID --force + + remove_images +} + +test_purge() { + echo "testing trash purge..." + remove_images + + rbd trash ls | wc -l | grep 0 + rbd trash purge + + rbd create $RBD_CREATE_ARGS --size 256 testimg1 + rbd create $RBD_CREATE_ARGS --size 256 testimg2 + rbd trash mv testimg1 + rbd trash mv testimg2 + rbd trash ls | wc -l | grep 2 + rbd trash purge + rbd trash ls | wc -l | grep 0 + + rbd create $RBD_CREATE_ARGS --size 256 testimg1 + rbd create $RBD_CREATE_ARGS --size 256 testimg2 + rbd trash mv testimg1 --expires-at "1 hour" + rbd trash mv testimg2 --expires-at "3 hours" + rbd trash ls | wc -l | grep 2 + rbd trash purge + rbd trash ls | wc -l | grep 2 + rbd trash purge --expired-before "now + 2 hours" + rbd trash ls | wc -l | grep 1 + rbd trash ls | grep testimg2 + rbd trash purge --expired-before "now + 4 hours" + rbd trash ls | wc -l | grep 0 + + rbd create $RBD_CREATE_ARGS --size 256 testimg1 + rbd snap create testimg1@snap # pin testimg1 + rbd create $RBD_CREATE_ARGS --size 256 testimg2 + rbd create $RBD_CREATE_ARGS --size 256 testimg3 + rbd trash mv testimg1 + rbd trash mv testimg2 + rbd trash mv testimg3 + rbd trash ls | wc -l | grep 3 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 1 + rbd trash ls | grep testimg1 + ID=$(rbd trash ls | awk '{ print $1 }') + rbd snap purge --image-id $ID + rbd trash purge + rbd trash ls | wc -l | grep 0 + + rbd create $RBD_CREATE_ARGS --size 256 testimg1 + rbd create $RBD_CREATE_ARGS --size 256 testimg2 + rbd snap create testimg2@snap # pin testimg2 + rbd create $RBD_CREATE_ARGS --size 256 testimg3 + rbd trash mv testimg1 + rbd trash mv testimg2 + rbd trash mv testimg3 + rbd trash ls | wc -l | grep 3 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 1 + rbd trash ls | grep testimg2 + ID=$(rbd trash ls | awk '{ print $1 }') + rbd snap purge --image-id $ID + rbd trash purge + rbd trash ls | wc -l | grep 0 + + rbd create $RBD_CREATE_ARGS --size 256 testimg1 + rbd create $RBD_CREATE_ARGS --size 256 testimg2 + rbd create $RBD_CREATE_ARGS --size 256 testimg3 + rbd snap create testimg3@snap # pin testimg3 + rbd trash mv testimg1 + rbd trash mv testimg2 + rbd trash mv testimg3 + rbd trash ls | wc -l | grep 3 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 1 + rbd trash ls | grep testimg3 + ID=$(rbd trash ls | awk '{ print $1 }') + rbd snap purge --image-id $ID + rbd trash purge + rbd trash ls | wc -l | grep 0 + + # test purging a clone with a chain of parents + rbd create $RBD_CREATE_ARGS --size 256 testimg1 + rbd snap create testimg1@snap + rbd clone --rbd-default-clone-format=2 testimg1@snap testimg2 + rbd snap rm testimg1@snap + rbd create $RBD_CREATE_ARGS --size 256 testimg3 + rbd snap create testimg2@snap + rbd clone --rbd-default-clone-format=2 testimg2@snap testimg4 + rbd clone --rbd-default-clone-format=2 testimg2@snap testimg5 + rbd snap rm testimg2@snap + rbd snap create testimg4@snap + rbd clone --rbd-default-clone-format=2 testimg4@snap testimg6 + rbd snap rm testimg4@snap + rbd trash mv testimg1 + rbd trash mv testimg2 + rbd trash mv testimg3 + rbd trash mv testimg4 + rbd trash ls | wc -l | grep 4 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 3 + rbd trash ls | grep testimg1 + rbd trash ls | grep testimg2 + rbd trash ls | grep testimg4 + rbd trash mv testimg6 + rbd trash ls | wc -l | grep 4 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 2 + rbd trash ls | grep testimg1 + rbd trash ls | grep testimg2 + rbd trash mv testimg5 + rbd trash ls | wc -l | grep 3 + rbd trash purge + rbd trash ls | wc -l | grep 0 + + rbd create $RBD_CREATE_ARGS --size 256 testimg1 + rbd snap create testimg1@snap + rbd clone --rbd-default-clone-format=2 testimg1@snap testimg2 + rbd snap rm testimg1@snap + rbd create $RBD_CREATE_ARGS --size 256 testimg3 + rbd snap create testimg3@snap # pin testimg3 + rbd snap create testimg2@snap + rbd clone --rbd-default-clone-format=2 testimg2@snap testimg4 + rbd clone --rbd-default-clone-format=2 testimg2@snap testimg5 + rbd snap rm testimg2@snap + rbd snap create testimg4@snap + rbd clone --rbd-default-clone-format=2 testimg4@snap testimg6 + rbd snap rm testimg4@snap + rbd trash mv testimg1 + rbd trash mv testimg2 + rbd trash mv testimg3 + rbd trash mv testimg4 + rbd trash ls | wc -l | grep 4 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 4 + rbd trash mv testimg6 + rbd trash ls | wc -l | grep 5 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 3 + rbd trash ls | grep testimg1 + rbd trash ls | grep testimg2 + rbd trash ls | grep testimg3 + rbd trash mv testimg5 + rbd trash ls | wc -l | grep 4 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 1 + rbd trash ls | grep testimg3 + ID=$(rbd trash ls | awk '{ print $1 }') + rbd snap purge --image-id $ID + rbd trash purge + rbd trash ls | wc -l | grep 0 + + # test purging a clone with a chain of auto-delete parents + rbd create $RBD_CREATE_ARGS --size 256 testimg1 + rbd snap create testimg1@snap + rbd clone --rbd-default-clone-format=2 testimg1@snap testimg2 + rbd snap rm testimg1@snap + rbd create $RBD_CREATE_ARGS --size 256 testimg3 + rbd snap create testimg2@snap + rbd clone --rbd-default-clone-format=2 testimg2@snap testimg4 + rbd clone --rbd-default-clone-format=2 testimg2@snap testimg5 + rbd snap rm testimg2@snap + rbd snap create testimg4@snap + rbd clone --rbd-default-clone-format=2 testimg4@snap testimg6 + rbd snap rm testimg4@snap + rbd rm --rbd_move_parent_to_trash_on_remove=true testimg1 + rbd rm --rbd_move_parent_to_trash_on_remove=true testimg2 + rbd trash mv testimg3 + rbd rm --rbd_move_parent_to_trash_on_remove=true testimg4 + rbd trash ls | wc -l | grep 4 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 3 + rbd trash ls | grep testimg1 + rbd trash ls | grep testimg2 + rbd trash ls | grep testimg4 + rbd trash mv testimg6 + rbd trash ls | wc -l | grep 4 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 2 + rbd trash ls | grep testimg1 + rbd trash ls | grep testimg2 + rbd trash mv testimg5 + rbd trash ls | wc -l | grep 3 + rbd trash purge + rbd trash ls | wc -l | grep 0 + + rbd create $RBD_CREATE_ARGS --size 256 testimg1 + rbd snap create testimg1@snap + rbd clone --rbd-default-clone-format=2 testimg1@snap testimg2 + rbd snap rm testimg1@snap + rbd create $RBD_CREATE_ARGS --size 256 testimg3 + rbd snap create testimg3@snap # pin testimg3 + rbd snap create testimg2@snap + rbd clone --rbd-default-clone-format=2 testimg2@snap testimg4 + rbd clone --rbd-default-clone-format=2 testimg2@snap testimg5 + rbd snap rm testimg2@snap + rbd snap create testimg4@snap + rbd clone --rbd-default-clone-format=2 testimg4@snap testimg6 + rbd snap rm testimg4@snap + rbd rm --rbd_move_parent_to_trash_on_remove=true testimg1 + rbd rm --rbd_move_parent_to_trash_on_remove=true testimg2 + rbd trash mv testimg3 + rbd rm --rbd_move_parent_to_trash_on_remove=true testimg4 + rbd trash ls | wc -l | grep 4 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 4 + rbd trash mv testimg6 + rbd trash ls | wc -l | grep 5 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 3 + rbd trash ls | grep testimg1 + rbd trash ls | grep testimg2 + rbd trash ls | grep testimg3 + rbd trash mv testimg5 + rbd trash ls | wc -l | grep 4 + rbd trash purge 2>&1 | grep 'some expired images could not be removed' + rbd trash ls | wc -l | grep 1 + rbd trash ls | grep testimg3 + ID=$(rbd trash ls | awk '{ print $1 }') + rbd snap purge --image-id $ID + rbd trash purge + rbd trash ls | wc -l | grep 0 +} + +test_deep_copy_clone() { + echo "testing deep copy clone..." + remove_images + + rbd create testimg1 $RBD_CREATE_ARGS --size 256 + rbd snap create testimg1 --snap=snap1 + rbd snap protect testimg1@snap1 + rbd clone testimg1@snap1 testimg2 + rbd snap create testimg2@snap2 + rbd deep copy testimg2 testimg3 + rbd info testimg3 | grep 'size 256 MiB' + rbd info testimg3 | grep 'parent: rbd/testimg1@snap1' + rbd snap ls testimg3 | grep -v 'SNAPID' | wc -l | grep 1 + rbd snap ls testimg3 | grep '.*snap2.*' + rbd info testimg2 | grep 'features:.*deep-flatten' || rbd snap rm testimg2@snap2 + rbd info testimg3 | grep 'features:.*deep-flatten' || rbd snap rm testimg3@snap2 + rbd flatten testimg2 + rbd flatten testimg3 + rbd snap unprotect testimg1@snap1 + rbd snap purge testimg2 + rbd snap purge testimg3 + rbd rm testimg2 + rbd rm testimg3 + + rbd snap protect testimg1@snap1 + rbd clone testimg1@snap1 testimg2 + rbd snap create testimg2@snap2 + rbd deep copy --flatten testimg2 testimg3 + rbd info testimg3 | grep 'size 256 MiB' + rbd info testimg3 | grep -v 'parent:' + rbd snap ls testimg3 | grep -v 'SNAPID' | wc -l | grep 1 + rbd snap ls testimg3 | grep '.*snap2.*' + rbd info testimg2 | grep 'features:.*deep-flatten' || rbd snap rm testimg2@snap2 + rbd flatten testimg2 + rbd snap unprotect testimg1@snap1 + + remove_images +} + +test_clone_v2() { + echo "testing clone v2..." + remove_images + + rbd create $RBD_CREATE_ARGS -s 1 test1 + rbd snap create test1@1 + rbd clone --rbd-default-clone-format=1 test1@1 test2 && exit 1 || true + rbd clone --rbd-default-clone-format=2 test1@1 test2 + rbd clone --rbd-default-clone-format=2 test1@1 test3 + + rbd snap protect test1@1 + rbd clone --rbd-default-clone-format=1 test1@1 test4 + + rbd children test1@1 | sort | tr '\n' ' ' | grep -E "test2.*test3.*test4" + rbd children --descendants test1 | sort | tr '\n' ' ' | grep -E "test2.*test3.*test4" + + rbd remove test4 + rbd snap unprotect test1@1 + + rbd snap remove test1@1 + rbd snap list --all test1 | grep -E "trash \(1\) *$" + + rbd snap create test1@2 + rbd rm test1 2>&1 | grep 'image has snapshots' + + rbd snap rm test1@2 + rbd rm test1 2>&1 | grep 'linked clones' + + rbd rm test3 + rbd rm test1 2>&1 | grep 'linked clones' + + rbd flatten test2 + rbd snap list --all test1 | wc -l | grep '^0$' + rbd rm test1 + rbd rm test2 + + rbd create $RBD_CREATE_ARGS -s 1 test1 + rbd snap create test1@1 + rbd snap create test1@2 + rbd clone test1@1 test2 --rbd-default-clone-format 2 + rbd clone test1@2 test3 --rbd-default-clone-format 2 + rbd snap rm test1@1 + rbd snap rm test1@2 + expect_fail rbd rm test1 + rbd rm test1 --rbd-move-parent-to-trash-on-remove=true + rbd trash ls -a | grep test1 + rbd rm test2 + rbd trash ls -a | grep test1 + rbd rm test3 + rbd trash ls -a | expect_fail grep test1 +} + +test_thick_provision() { + echo "testing thick provision..." + remove_images + + # Try to create small and large thick-pro image and + # check actual size. (64M and 4G) + + # Small thick-pro image test + rbd create $RBD_CREATE_ARGS --thick-provision -s 64M test1 + count=0 + ret="" + while [ $count -lt 10 ] + do + rbd du|grep test1|tr -s " "|cut -d " " -f 4-5|grep '^64 MiB' && ret=$? + if [ "$ret" = "0" ] + then + break; + fi + count=`expr $count + 1` + sleep 2 + done + rbd du + if [ "$ret" != "0" ] + then + exit 1 + fi + rbd rm test1 + rbd ls | grep test1 | wc -l | grep '^0$' + + # Large thick-pro image test + rbd create $RBD_CREATE_ARGS --thick-provision -s 4G test1 + count=0 + ret="" + while [ $count -lt 10 ] + do + rbd du|grep test1|tr -s " "|cut -d " " -f 4-5|grep '^4 GiB' && ret=$? + if [ "$ret" = "0" ] + then + break; + fi + count=`expr $count + 1` + sleep 2 + done + rbd du + if [ "$ret" != "0" ] + then + exit 1 + fi + rbd rm test1 + rbd ls | grep test1 | wc -l | grep '^0$' +} + +test_namespace() { + echo "testing namespace..." + remove_images + + rbd namespace ls | wc -l | grep '^0$' + rbd namespace create rbd/test1 + rbd namespace create --pool rbd --namespace test2 + rbd namespace create --namespace test3 + expect_fail rbd namespace create rbd/test3 + + rbd namespace list | grep 'test' | wc -l | grep '^3$' + + expect_fail rbd namespace remove --pool rbd missing + + rbd create $RBD_CREATE_ARGS --size 1G rbd/test1/image1 + + # default test1 ns to test2 ns clone + rbd bench --io-type write --io-pattern rand --io-total 32M --io-size 4K rbd/test1/image1 + rbd snap create rbd/test1/image1@1 + rbd clone --rbd-default-clone-format 2 rbd/test1/image1@1 rbd/test2/image1 + rbd snap rm rbd/test1/image1@1 + cmp <(rbd export rbd/test1/image1 -) <(rbd export rbd/test2/image1 -) + rbd rm rbd/test2/image1 + + # default ns to test1 ns clone + rbd create $RBD_CREATE_ARGS --size 1G rbd/image2 + rbd bench --io-type write --io-pattern rand --io-total 32M --io-size 4K rbd/image2 + rbd snap create rbd/image2@1 + rbd clone --rbd-default-clone-format 2 rbd/image2@1 rbd/test2/image2 + rbd snap rm rbd/image2@1 + cmp <(rbd export rbd/image2 -) <(rbd export rbd/test2/image2 -) + expect_fail rbd rm rbd/image2 + rbd rm rbd/test2/image2 + rbd rm rbd/image2 + + # v1 clones are supported within the same namespace + rbd create $RBD_CREATE_ARGS --size 1G rbd/test1/image3 + rbd snap create rbd/test1/image3@1 + rbd snap protect rbd/test1/image3@1 + rbd clone --rbd-default-clone-format 1 rbd/test1/image3@1 rbd/test1/image4 + rbd rm rbd/test1/image4 + rbd snap unprotect rbd/test1/image3@1 + rbd snap rm rbd/test1/image3@1 + rbd rm rbd/test1/image3 + + rbd create $RBD_CREATE_ARGS --size 1G --namespace test1 image2 + expect_fail rbd namespace remove rbd/test1 + + rbd group create rbd/test1/group1 + rbd group image add rbd/test1/group1 rbd/test1/image1 + rbd group rm rbd/test1/group1 + + rbd trash move rbd/test1/image1 + ID=`rbd trash --namespace test1 ls | cut -d ' ' -f 1` + rbd trash rm rbd/test1/${ID} + + rbd remove rbd/test1/image2 + + rbd namespace remove --pool rbd --namespace test1 + rbd namespace remove --namespace test3 + + rbd namespace list | grep 'test' | wc -l | grep '^1$' + rbd namespace remove rbd/test2 +} + +get_migration_state() { + local image=$1 + + rbd --format xml status $image | + $XMLSTARLET sel -t -v '//status/migration/state' +} + +test_migration() { + echo "testing migration..." + remove_images + ceph osd pool create rbd2 8 + rbd pool init rbd2 + + # Convert to new format + rbd create --image-format 1 -s 128M test1 + rbd info test1 | grep 'format: 1' + rbd migration prepare test1 --image-format 2 + test "$(get_migration_state test1)" = prepared + rbd info test1 | grep 'format: 2' + rbd rm test1 && exit 1 || true + rbd migration execute test1 + test "$(get_migration_state test1)" = executed + rbd migration commit test1 + get_migration_state test1 && exit 1 || true + + # Enable layering (and some other features) + rbd info test1 | grep 'features: .*layering' && exit 1 || true + rbd migration prepare test1 --image-feature \ + layering,exclusive-lock,object-map,fast-diff,deep-flatten + rbd info test1 | grep 'features: .*layering' + rbd migration execute test1 + rbd migration commit test1 + + # Migration to other pool + rbd migration prepare test1 rbd2/test1 + test "$(get_migration_state rbd2/test1)" = prepared + rbd ls | wc -l | grep '^0$' + rbd -p rbd2 ls | grep test1 + rbd migration execute test1 + test "$(get_migration_state rbd2/test1)" = executed + rbd rm rbd2/test1 && exit 1 || true + rbd migration commit test1 + + # Migration to other namespace + rbd namespace create rbd2/ns1 + rbd namespace create rbd2/ns2 + rbd migration prepare rbd2/test1 rbd2/ns1/test1 + test "$(get_migration_state rbd2/ns1/test1)" = prepared + rbd migration execute rbd2/test1 + test "$(get_migration_state rbd2/ns1/test1)" = executed + rbd migration commit rbd2/test1 + rbd migration prepare rbd2/ns1/test1 rbd2/ns2/test1 + rbd migration execute rbd2/ns2/test1 + rbd migration commit rbd2/ns2/test1 + + # Enable data pool + rbd create -s 128M test1 + rbd migration prepare test1 --data-pool rbd2 + rbd info test1 | grep 'data_pool: rbd2' + rbd migration execute test1 + rbd migration commit test1 + + # testing trash + rbd migration prepare test1 + expect_fail rbd trash mv test1 + ID=`rbd trash ls -a | cut -d ' ' -f 1` + expect_fail rbd trash rm $ID + expect_fail rbd trash restore $ID + rbd migration abort test1 + + # Migrate parent + rbd remove test1 + dd if=/dev/urandom bs=1M count=1 | rbd --image-format 2 import - test1 + md5sum=$(rbd export test1 - | md5sum) + rbd snap create test1@snap1 + rbd snap protect test1@snap1 + rbd snap create test1@snap2 + rbd clone test1@snap1 clone_v1 --rbd_default_clone_format=1 + rbd clone test1@snap2 clone_v2 --rbd_default_clone_format=2 + rbd info clone_v1 | fgrep 'parent: rbd/test1@snap1' + rbd info clone_v2 | fgrep 'parent: rbd/test1@snap2' + rbd info clone_v2 |grep 'op_features: clone-child' + test "$(rbd export clone_v1 - | md5sum)" = "${md5sum}" + test "$(rbd export clone_v2 - | md5sum)" = "${md5sum}" + test "$(rbd children test1@snap1)" = "rbd/clone_v1" + test "$(rbd children test1@snap2)" = "rbd/clone_v2" + rbd migration prepare test1 rbd2/test2 + rbd info clone_v1 | fgrep 'parent: rbd2/test2@snap1' + rbd info clone_v2 | fgrep 'parent: rbd2/test2@snap2' + rbd info clone_v2 | fgrep 'op_features: clone-child' + test "$(rbd children rbd2/test2@snap1)" = "rbd/clone_v1" + test "$(rbd children rbd2/test2@snap2)" = "rbd/clone_v2" + rbd migration execute test1 + expect_fail rbd migration commit test1 + rbd migration commit test1 --force + test "$(rbd export clone_v1 - | md5sum)" = "${md5sum}" + test "$(rbd export clone_v2 - | md5sum)" = "${md5sum}" + rbd migration prepare rbd2/test2 test1 + rbd info clone_v1 | fgrep 'parent: rbd/test1@snap1' + rbd info clone_v2 | fgrep 'parent: rbd/test1@snap2' + rbd info clone_v2 | fgrep 'op_features: clone-child' + test "$(rbd children test1@snap1)" = "rbd/clone_v1" + test "$(rbd children test1@snap2)" = "rbd/clone_v2" + rbd migration execute test1 + expect_fail rbd migration commit test1 + rbd migration commit test1 --force + test "$(rbd export clone_v1 - | md5sum)" = "${md5sum}" + test "$(rbd export clone_v2 - | md5sum)" = "${md5sum}" + rbd remove clone_v1 + rbd remove clone_v2 + rbd snap unprotect test1@snap1 + rbd snap purge test1 + rbd rm test1 + + for format in 1 2; do + # Abort migration after successful prepare + rbd create -s 128M --image-format ${format} test2 + rbd migration prepare test2 --data-pool rbd2 + rbd bench --io-type write --io-size 1024 --io-total 1024 test2 + rbd migration abort test2 + rbd bench --io-type write --io-size 1024 --io-total 1024 test2 + rbd rm test2 + + # Abort migration after successful execute + rbd create -s 128M --image-format ${format} test2 + rbd migration prepare test2 --data-pool rbd2 + rbd bench --io-type write --io-size 1024 --io-total 1024 test2 + rbd migration execute test2 + rbd migration abort test2 + rbd bench --io-type write --io-size 1024 --io-total 1024 test2 + rbd rm test2 + + # Migration is automatically aborted if prepare failed + rbd create -s 128M --image-format ${format} test2 + rbd migration prepare test2 --data-pool INVALID_DATA_POOL && exit 1 || true + rbd bench --io-type write --io-size 1024 --io-total 1024 test2 + rbd rm test2 + + # Abort migration to other pool + rbd create -s 128M --image-format ${format} test2 + rbd migration prepare test2 rbd2/test2 + rbd bench --io-type write --io-size 1024 --io-total 1024 rbd2/test2 + rbd migration abort test2 + rbd bench --io-type write --io-size 1024 --io-total 1024 test2 + rbd rm test2 + + # The same but abort using destination image + rbd create -s 128M --image-format ${format} test2 + rbd migration prepare test2 rbd2/test2 + rbd migration abort rbd2/test2 + rbd bench --io-type write --io-size 1024 --io-total 1024 test2 + rbd rm test2 + + test $format = 1 && continue + + # Abort migration to other namespace + rbd create -s 128M --image-format ${format} test2 + rbd migration prepare test2 rbd2/ns1/test3 + rbd bench --io-type write --io-size 1024 --io-total 1024 rbd2/ns1/test3 + rbd migration abort test2 + rbd bench --io-type write --io-size 1024 --io-total 1024 test2 + rbd rm test2 + done + + remove_images + ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it +} + +test_config() { + echo "testing config..." + remove_images + + expect_fail rbd config global set osd rbd_cache true + expect_fail rbd config global set global debug_ms 10 + expect_fail rbd config global set global rbd_UNKNOWN false + expect_fail rbd config global set global rbd_cache INVALID + rbd config global set global rbd_cache false + rbd config global set client rbd_cache true + rbd config global set client.123 rbd_cache false + rbd config global get global rbd_cache | grep '^false$' + rbd config global get client rbd_cache | grep '^true$' + rbd config global get client.123 rbd_cache | grep '^false$' + expect_fail rbd config global get client.UNKNOWN rbd_cache + rbd config global list global | grep '^rbd_cache * false * global *$' + rbd config global list client | grep '^rbd_cache * true * client *$' + rbd config global list client.123 | grep '^rbd_cache * false * client.123 *$' + rbd config global list client.UNKNOWN | grep '^rbd_cache * true * client *$' + rbd config global rm client rbd_cache + expect_fail rbd config global get client rbd_cache + rbd config global list client | grep '^rbd_cache * false * global *$' + rbd config global rm client.123 rbd_cache + rbd config global rm global rbd_cache + + rbd config pool set rbd rbd_cache true + rbd config pool list rbd | grep '^rbd_cache * true * pool *$' + rbd config pool get rbd rbd_cache | grep '^true$' + + rbd create $RBD_CREATE_ARGS -s 1 test1 + + rbd config image list rbd/test1 | grep '^rbd_cache * true * pool *$' + rbd config image set rbd/test1 rbd_cache false + rbd config image list rbd/test1 | grep '^rbd_cache * false * image *$' + rbd config image get rbd/test1 rbd_cache | grep '^false$' + rbd config image remove rbd/test1 rbd_cache + expect_fail rbd config image get rbd/test1 rbd_cache + rbd config image list rbd/test1 | grep '^rbd_cache * true * pool *$' + + rbd config pool remove rbd rbd_cache + expect_fail rbd config pool get rbd rbd_cache + rbd config pool list rbd | grep '^rbd_cache * true * config *$' + + rbd rm test1 +} + +test_trash_purge_schedule() { + echo "testing trash purge schedule..." + remove_images + ceph osd pool create rbd2 8 + rbd pool init rbd2 + rbd namespace create rbd2/ns1 + + test "$(ceph rbd trash purge schedule list)" = "{}" + ceph rbd trash purge schedule status | fgrep '"scheduled": []' + + expect_fail rbd trash purge schedule ls + test "$(rbd trash purge schedule ls -R --format json)" = "[]" + + rbd trash purge schedule add -p rbd 1d 01:30 + + rbd trash purge schedule ls -p rbd | grep 'every 1d starting at 01:30' + expect_fail rbd trash purge schedule ls + rbd trash purge schedule ls -R | grep 'every 1d starting at 01:30' + rbd trash purge schedule ls -R -p rbd | grep 'every 1d starting at 01:30' + expect_fail rbd trash purge schedule ls -p rbd2 + test "$(rbd trash purge schedule ls -p rbd2 -R --format json)" = "[]" + + rbd trash purge schedule add -p rbd2/ns1 2d + test "$(rbd trash purge schedule ls -p rbd2 -R --format json)" != "[]" + rbd trash purge schedule ls -p rbd2 -R | grep 'rbd2 *ns1 *every 2d' + rbd trash purge schedule rm -p rbd2/ns1 + test "$(rbd trash purge schedule ls -p rbd2 -R --format json)" = "[]" + + for i in `seq 12`; do + test "$(rbd trash purge schedule status --format xml | + $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd' && break + sleep 10 + done + rbd trash purge schedule status + test "$(rbd trash purge schedule status --format xml | + $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd' + test "$(rbd trash purge schedule status -p rbd --format xml | + $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd' + + rbd trash purge schedule add 2d 00:17 + rbd trash purge schedule ls | grep 'every 2d starting at 00:17' + rbd trash purge schedule ls -R | grep 'every 2d starting at 00:17' + expect_fail rbd trash purge schedule ls -p rbd2 + rbd trash purge schedule ls -p rbd2 -R | grep 'every 2d starting at 00:17' + rbd trash purge schedule ls -p rbd2/ns1 -R | grep 'every 2d starting at 00:17' + test "$(rbd trash purge schedule ls -R -p rbd2/ns1 --format xml | + $XMLSTARLET sel -t -v '//schedules/schedule/pool')" = "-" + test "$(rbd trash purge schedule ls -R -p rbd2/ns1 --format xml | + $XMLSTARLET sel -t -v '//schedules/schedule/namespace')" = "-" + test "$(rbd trash purge schedule ls -R -p rbd2/ns1 --format xml | + $XMLSTARLET sel -t -v '//schedules/schedule/items/item/start_time')" = "00:17:00" + + for i in `seq 12`; do + rbd trash purge schedule status --format xml | + $XMLSTARLET sel -t -v '//scheduled/item/pool' | grep 'rbd2' && break + sleep 10 + done + rbd trash purge schedule status + rbd trash purge schedule status --format xml | + $XMLSTARLET sel -t -v '//scheduled/item/pool' | grep 'rbd2' + echo $(rbd trash purge schedule status --format xml | + $XMLSTARLET sel -t -v '//scheduled/item/pool') | grep 'rbd rbd2 rbd2' + test "$(rbd trash purge schedule status -p rbd --format xml | + $XMLSTARLET sel -t -v '//scheduled/item/pool')" = 'rbd' + test "$(echo $(rbd trash purge schedule status -p rbd2 --format xml | + $XMLSTARLET sel -t -v '//scheduled/item/pool'))" = 'rbd2 rbd2' + + test "$(echo $(rbd trash purge schedule ls -R --format xml | + $XMLSTARLET sel -t -v '//schedules/schedule/items'))" = "2d00:17:00 1d01:30:00" + + rbd trash purge schedule add 1d + rbd trash purge schedule ls | grep 'every 2d starting at 00:17' + rbd trash purge schedule ls | grep 'every 1d' + + rbd trash purge schedule ls -R --format xml | + $XMLSTARLET sel -t -v '//schedules/schedule/items' | grep '2d00:17' + + rbd trash purge schedule rm 1d + rbd trash purge schedule ls | grep 'every 2d starting at 00:17' + rbd trash purge schedule rm 2d 00:17 + expect_fail rbd trash purge schedule ls + + for p in rbd2 rbd2/ns1; do + rbd create $RBD_CREATE_ARGS -s 1 rbd2/ns1/test1 + rbd trash mv rbd2/ns1/test1 + rbd trash ls rbd2/ns1 | wc -l | grep '^1$' + + rbd trash purge schedule add -p $p 1m + rbd trash purge schedule list -p rbd2 -R | grep 'every 1m' + rbd trash purge schedule list -p rbd2/ns1 -R | grep 'every 1m' + + for i in `seq 12`; do + rbd trash ls rbd2/ns1 | wc -l | grep '^1$' || break + sleep 10 + done + rbd trash ls rbd2/ns1 | wc -l | grep '^0$' + + # repeat with kicked in schedule, see https://tracker.ceph.com/issues/53915 + rbd trash purge schedule list -p rbd2 -R | grep 'every 1m' + rbd trash purge schedule list -p rbd2/ns1 -R | grep 'every 1m' + + rbd trash purge schedule status | grep 'rbd2 *ns1' + rbd trash purge schedule status -p rbd2 | grep 'rbd2 *ns1' + rbd trash purge schedule status -p rbd2/ns1 | grep 'rbd2 *ns1' + + rbd trash purge schedule rm -p $p 1m + done + + # Negative tests + rbd trash purge schedule add 2m + expect_fail rbd trash purge schedule add -p rbd dummy + expect_fail rbd trash purge schedule add dummy + expect_fail rbd trash purge schedule remove -p rbd dummy + expect_fail rbd trash purge schedule remove dummy + rbd trash purge schedule ls -p rbd | grep 'every 1d starting at 01:30' + rbd trash purge schedule ls | grep 'every 2m' + rbd trash purge schedule remove -p rbd 1d 01:30 + rbd trash purge schedule remove 2m + test "$(rbd trash purge schedule ls -R --format json)" = "[]" + + remove_images + ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it +} + +test_trash_purge_schedule_recovery() { + echo "testing recovery of trash_purge_schedule handler after module's RADOS client is blocklisted..." + remove_images + ceph osd pool create rbd3 8 + rbd pool init rbd3 + rbd namespace create rbd3/ns1 + + rbd trash purge schedule add -p rbd3/ns1 2d + rbd trash purge schedule ls -p rbd3 -R | grep 'rbd3 *ns1 *every 2d' + + # Fetch and blocklist the rbd_support module's RADOS client + CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] | + jq 'select(.name == "rbd_support")' | + jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') + ceph osd blocklist add $CLIENT_ADDR + ceph osd blocklist ls | grep $CLIENT_ADDR + + # Check that you can add a trash purge schedule after a few retries + expect_fail rbd trash purge schedule add -p rbd3 10m + sleep 10 + for i in `seq 24`; do + rbd trash purge schedule add -p rbd3 10m && break + sleep 10 + done + + rbd trash purge schedule ls -p rbd3 -R | grep 'every 10m' + # Verify that the schedule present before client blocklisting is preserved + rbd trash purge schedule ls -p rbd3 -R | grep 'rbd3 *ns1 *every 2d' + + rbd trash purge schedule remove -p rbd3 10m + rbd trash purge schedule remove -p rbd3/ns1 2d + rbd trash purge schedule ls -p rbd3 -R | expect_fail grep 'every 10m' + rbd trash purge schedule ls -p rbd3 -R | expect_fail grep 'rbd3 *ns1 *every 2d' + + ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it + +} + +test_mirror_snapshot_schedule() { + echo "testing mirror snapshot schedule..." + remove_images + ceph osd pool create rbd2 8 + rbd pool init rbd2 + rbd namespace create rbd2/ns1 + + rbd mirror pool enable rbd2 image + rbd mirror pool enable rbd2/ns1 image + rbd mirror pool peer add rbd2 cluster1 + + test "$(ceph rbd mirror snapshot schedule list)" = "{}" + ceph rbd mirror snapshot schedule status | fgrep '"scheduled_images": []' + + expect_fail rbd mirror snapshot schedule ls + test "$(rbd mirror snapshot schedule ls -R --format json)" = "[]" + + rbd create $RBD_CREATE_ARGS -s 1 rbd2/ns1/test1 + + test "$(rbd mirror image status rbd2/ns1/test1 | + grep -c mirror.primary)" = '0' + + rbd mirror image enable rbd2/ns1/test1 snapshot + + test "$(rbd mirror image status rbd2/ns1/test1 | + grep -c mirror.primary)" = '1' + + rbd mirror snapshot schedule add -p rbd2/ns1 --image test1 1m + expect_fail rbd mirror snapshot schedule ls + rbd mirror snapshot schedule ls -R | grep 'rbd2 *ns1 *test1 *every 1m' + expect_fail rbd mirror snapshot schedule ls -p rbd2 + rbd mirror snapshot schedule ls -p rbd2 -R | grep 'rbd2 *ns1 *test1 *every 1m' + expect_fail rbd mirror snapshot schedule ls -p rbd2/ns1 + rbd mirror snapshot schedule ls -p rbd2/ns1 -R | grep 'rbd2 *ns1 *test1 *every 1m' + test "$(rbd mirror snapshot schedule ls -p rbd2/ns1 --image test1)" = 'every 1m' + + for i in `seq 12`; do + test "$(rbd mirror image status rbd2/ns1/test1 | + grep -c mirror.primary)" -gt '1' && break + sleep 10 + done + + test "$(rbd mirror image status rbd2/ns1/test1 | + grep -c mirror.primary)" -gt '1' + + # repeat with kicked in schedule, see https://tracker.ceph.com/issues/53915 + expect_fail rbd mirror snapshot schedule ls + rbd mirror snapshot schedule ls -R | grep 'rbd2 *ns1 *test1 *every 1m' + expect_fail rbd mirror snapshot schedule ls -p rbd2 + rbd mirror snapshot schedule ls -p rbd2 -R | grep 'rbd2 *ns1 *test1 *every 1m' + expect_fail rbd mirror snapshot schedule ls -p rbd2/ns1 + rbd mirror snapshot schedule ls -p rbd2/ns1 -R | grep 'rbd2 *ns1 *test1 *every 1m' + test "$(rbd mirror snapshot schedule ls -p rbd2/ns1 --image test1)" = 'every 1m' + + rbd mirror snapshot schedule status + test "$(rbd mirror snapshot schedule status --format xml | + $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' + test "$(rbd mirror snapshot schedule status -p rbd2 --format xml | + $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' + test "$(rbd mirror snapshot schedule status -p rbd2/ns1 --format xml | + $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' + test "$(rbd mirror snapshot schedule status -p rbd2/ns1 --image test1 --format xml | + $XMLSTARLET sel -t -v '//scheduled_images/image/image')" = 'rbd2/ns1/test1' + + rbd mirror image demote rbd2/ns1/test1 + for i in `seq 12`; do + rbd mirror snapshot schedule status | grep 'rbd2/ns1/test1' || break + sleep 10 + done + rbd mirror snapshot schedule status | expect_fail grep 'rbd2/ns1/test1' + + rbd mirror image promote rbd2/ns1/test1 + for i in `seq 12`; do + rbd mirror snapshot schedule status | grep 'rbd2/ns1/test1' && break + sleep 10 + done + rbd mirror snapshot schedule status | grep 'rbd2/ns1/test1' + + rbd mirror snapshot schedule add 1h 00:15 + test "$(rbd mirror snapshot schedule ls)" = 'every 1h starting at 00:15:00' + rbd mirror snapshot schedule ls -R | grep 'every 1h starting at 00:15:00' + rbd mirror snapshot schedule ls -R | grep 'rbd2 *ns1 *test1 *every 1m' + expect_fail rbd mirror snapshot schedule ls -p rbd2 + rbd mirror snapshot schedule ls -p rbd2 -R | grep 'every 1h starting at 00:15:00' + rbd mirror snapshot schedule ls -p rbd2 -R | grep 'rbd2 *ns1 *test1 *every 1m' + expect_fail rbd mirror snapshot schedule ls -p rbd2/ns1 + rbd mirror snapshot schedule ls -p rbd2/ns1 -R | grep 'every 1h starting at 00:15:00' + rbd mirror snapshot schedule ls -p rbd2/ns1 -R | grep 'rbd2 *ns1 *test1 *every 1m' + test "$(rbd mirror snapshot schedule ls -p rbd2/ns1 --image test1)" = 'every 1m' + + # Negative tests + expect_fail rbd mirror snapshot schedule add dummy + expect_fail rbd mirror snapshot schedule add -p rbd2/ns1 --image test1 dummy + expect_fail rbd mirror snapshot schedule remove dummy + expect_fail rbd mirror snapshot schedule remove -p rbd2/ns1 --image test1 dummy + test "$(rbd mirror snapshot schedule ls)" = 'every 1h starting at 00:15:00' + test "$(rbd mirror snapshot schedule ls -p rbd2/ns1 --image test1)" = 'every 1m' + + rbd rm rbd2/ns1/test1 + for i in `seq 12`; do + rbd mirror snapshot schedule status | grep 'rbd2/ns1/test1' || break + sleep 10 + done + rbd mirror snapshot schedule status | expect_fail grep 'rbd2/ns1/test1' + + rbd mirror snapshot schedule remove + test "$(rbd mirror snapshot schedule ls -R --format json)" = "[]" + + remove_images + ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it +} + +test_mirror_snapshot_schedule_recovery() { + echo "testing recovery of mirror snapshot scheduler after module's RADOS client is blocklisted..." + remove_images + ceph osd pool create rbd3 8 + rbd pool init rbd3 + rbd namespace create rbd3/ns1 + + rbd mirror pool enable rbd3 image + rbd mirror pool enable rbd3/ns1 image + rbd mirror pool peer add rbd3 cluster1 + + rbd create $RBD_CREATE_ARGS -s 1 rbd3/ns1/test1 + rbd mirror image enable rbd3/ns1/test1 snapshot + test "$(rbd mirror image status rbd3/ns1/test1 | + grep -c mirror.primary)" = '1' + + rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 1m + test "$(rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1)" = 'every 1m' + + # Fetch and blocklist rbd_support module's RADOS client + CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] | + jq 'select(.name == "rbd_support")' | + jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') + ceph osd blocklist add $CLIENT_ADDR + ceph osd blocklist ls | grep $CLIENT_ADDR + + # Check that you can add a mirror snapshot schedule after a few retries + expect_fail rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m + sleep 10 + for i in `seq 24`; do + rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m && break + sleep 10 + done + + rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | grep 'every 2m' + # Verify that the schedule present before client blocklisting is preserved + rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | grep 'every 1m' + + rbd mirror snapshot schedule rm -p rbd3/ns1 --image test1 2m + rbd mirror snapshot schedule rm -p rbd3/ns1 --image test1 1m + rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | expect_fail grep 'every 2m' + rbd mirror snapshot schedule ls -p rbd3/ns1 --image test1 | expect_fail grep 'every 1m' + + rbd snap purge rbd3/ns1/test1 + rbd rm rbd3/ns1/test1 + ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it +} + +test_perf_image_iostat() { + echo "testing perf image iostat..." + remove_images + + ceph osd pool create rbd1 8 + rbd pool init rbd1 + rbd namespace create rbd1/ns + ceph osd pool create rbd2 8 + rbd pool init rbd2 + rbd namespace create rbd2/ns + + IMAGE_SPECS=("test1" "rbd1/test2" "rbd1/ns/test3" "rbd2/test4" "rbd2/ns/test5") + for spec in "${IMAGE_SPECS[@]}"; do + # ensure all images are created without a separate data pool + # as we filter iostat by specific pool specs below + rbd create $RBD_CREATE_ARGS --size 10G --rbd-default-data-pool '' $spec + done + + BENCH_PIDS=() + for spec in "${IMAGE_SPECS[@]}"; do + rbd bench --io-type write --io-pattern rand --io-total 10G --io-threads 1 \ + --rbd-cache false $spec >/dev/null 2>&1 & + BENCH_PIDS+=($!) + done + + # test specifying pool spec via spec syntax + test "$(rbd perf image iostat --format json rbd1 | + jq -r 'map(.image) | sort | join(" ")')" = 'test2' + test "$(rbd perf image iostat --format json rbd1/ns | + jq -r 'map(.image) | sort | join(" ")')" = 'test3' + test "$(rbd perf image iostat --format json --rbd-default-pool rbd1 /ns | + jq -r 'map(.image) | sort | join(" ")')" = 'test3' + + # test specifying pool spec via options + test "$(rbd perf image iostat --format json --pool rbd2 | + jq -r 'map(.image) | sort | join(" ")')" = 'test4' + test "$(rbd perf image iostat --format json --pool rbd2 --namespace ns | + jq -r 'map(.image) | sort | join(" ")')" = 'test5' + test "$(rbd perf image iostat --format json --rbd-default-pool rbd2 --namespace ns | + jq -r 'map(.image) | sort | join(" ")')" = 'test5' + + # test omitting pool spec (-> GLOBAL_POOL_KEY) + test "$(rbd perf image iostat --format json | + jq -r 'map(.image) | sort | join(" ")')" = 'test1 test2 test3 test4 test5' + + for pid in "${BENCH_PIDS[@]}"; do + kill $pid + done + wait + + remove_images + ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it + ceph osd pool rm rbd1 rbd1 --yes-i-really-really-mean-it +} + +test_perf_image_iostat_recovery() { + echo "testing recovery of perf handler after module's RADOS client is blocklisted..." + remove_images + + ceph osd pool create rbd3 8 + rbd pool init rbd3 + rbd namespace create rbd3/ns + + IMAGE_SPECS=("rbd3/test1" "rbd3/ns/test2") + for spec in "${IMAGE_SPECS[@]}"; do + # ensure all images are created without a separate data pool + # as we filter iostat by specific pool specs below + rbd create $RBD_CREATE_ARGS --size 10G --rbd-default-data-pool '' $spec + done + + BENCH_PIDS=() + for spec in "${IMAGE_SPECS[@]}"; do + rbd bench --io-type write --io-pattern rand --io-total 10G --io-threads 1 \ + --rbd-cache false $spec >/dev/null 2>&1 & + BENCH_PIDS+=($!) + done + + test "$(rbd perf image iostat --format json rbd3 | + jq -r 'map(.image) | sort | join(" ")')" = 'test1' + + # Fetch and blocklist the rbd_support module's RADOS client + CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] | + jq 'select(.name == "rbd_support")' | + jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') + ceph osd blocklist add $CLIENT_ADDR + ceph osd blocklist ls | grep $CLIENT_ADDR + + expect_fail rbd perf image iostat --format json rbd3/ns + sleep 10 + for i in `seq 24`; do + test "$(rbd perf image iostat --format json rbd3/ns | + jq -r 'map(.image) | sort | join(" ")')" = 'test2' && break + sleep 10 + done + + for pid in "${BENCH_PIDS[@]}"; do + kill $pid + done + wait + + remove_images + ceph osd pool rm rbd3 rbd3 --yes-i-really-really-mean-it +} + +test_mirror_pool_peer_bootstrap_create() { + echo "testing mirror pool peer bootstrap create..." + remove_images + + ceph osd pool create rbd1 8 + rbd pool init rbd1 + rbd mirror pool enable rbd1 image + ceph osd pool create rbd2 8 + rbd pool init rbd2 + rbd mirror pool enable rbd2 pool + + readarray -t MON_ADDRS < <(ceph mon dump | + sed -n 's/^[0-9]: \(.*\) mon\.[a-z]$/\1/p') + + # check that all monitors make it to the token even if only one + # valid monitor is specified + BAD_MON_ADDR="1.2.3.4:6789" + MON_HOST="${MON_ADDRS[0]},$BAD_MON_ADDR" + TOKEN="$(rbd mirror pool peer bootstrap create \ + --mon-host "$MON_HOST" rbd1 | base64 -d)" + TOKEN_FSID="$(jq -r '.fsid' <<< "$TOKEN")" + TOKEN_CLIENT_ID="$(jq -r '.client_id' <<< "$TOKEN")" + TOKEN_KEY="$(jq -r '.key' <<< "$TOKEN")" + TOKEN_MON_HOST="$(jq -r '.mon_host' <<< "$TOKEN")" + + test "$TOKEN_FSID" = "$(ceph fsid)" + test "$TOKEN_KEY" = "$(ceph auth get-key client.$TOKEN_CLIENT_ID)" + for addr in "${MON_ADDRS[@]}"; do + fgrep "$addr" <<< "$TOKEN_MON_HOST" + done + expect_fail fgrep "$BAD_MON_ADDR" <<< "$TOKEN_MON_HOST" + + # check that the token does not change, including across pools + test "$(rbd mirror pool peer bootstrap create \ + --mon-host "$MON_HOST" rbd1 | base64 -d)" = "$TOKEN" + test "$(rbd mirror pool peer bootstrap create \ + rbd1 | base64 -d)" = "$TOKEN" + test "$(rbd mirror pool peer bootstrap create \ + --mon-host "$MON_HOST" rbd2 | base64 -d)" = "$TOKEN" + test "$(rbd mirror pool peer bootstrap create \ + rbd2 | base64 -d)" = "$TOKEN" + + ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it + ceph osd pool rm rbd1 rbd1 --yes-i-really-really-mean-it +} + +test_tasks_removed_pool() { + echo "testing removing pool under running tasks..." + remove_images + + ceph osd pool create rbd2 8 + rbd pool init rbd2 + + rbd create $RBD_CREATE_ARGS --size 1G foo + rbd snap create foo@snap + rbd snap protect foo@snap + rbd clone foo@snap bar + + rbd create $RBD_CREATE_ARGS --size 1G rbd2/dummy + rbd bench --io-type write --io-pattern seq --io-size 1M --io-total 1G rbd2/dummy + rbd snap create rbd2/dummy@snap + rbd snap protect rbd2/dummy@snap + for i in {1..5}; do + rbd clone rbd2/dummy@snap rbd2/dummy$i + done + + # queue flattens on a few dummy images and remove that pool + test "$(ceph rbd task list)" = "[]" + for i in {1..5}; do + ceph rbd task add flatten rbd2/dummy$i + done + ceph osd pool delete rbd2 rbd2 --yes-i-really-really-mean-it + test "$(ceph rbd task list)" != "[]" + + # queue flatten on another image and check that it completes + rbd info bar | grep 'parent: ' + expect_fail rbd snap unprotect foo@snap + ceph rbd task add flatten bar + for i in {1..12}; do + rbd info bar | grep 'parent: ' || break + sleep 10 + done + rbd info bar | expect_fail grep 'parent: ' + rbd snap unprotect foo@snap + + # check that flattens disrupted by pool removal are cleaned up + for i in {1..12}; do + test "$(ceph rbd task list)" = "[]" && break + sleep 10 + done + test "$(ceph rbd task list)" = "[]" + + remove_images +} + +test_tasks_recovery() { + echo "testing task handler recovery after module's RADOS client is blocklisted..." + remove_images + + ceph osd pool create rbd2 8 + rbd pool init rbd2 + + rbd create $RBD_CREATE_ARGS --size 1G rbd2/img1 + rbd bench --io-type write --io-pattern seq --io-size 1M --io-total 1G rbd2/img1 + rbd snap create rbd2/img1@snap + rbd snap protect rbd2/img1@snap + rbd clone rbd2/img1@snap rbd2/clone1 + + # Fetch and blocklist rbd_support module's RADOS client + CLIENT_ADDR=$(ceph mgr dump | jq .active_clients[] | + jq 'select(.name == "rbd_support")' | + jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') + ceph osd blocklist add $CLIENT_ADDR + ceph osd blocklist ls | grep $CLIENT_ADDR + + expect_fail ceph rbd task add flatten rbd2/clone1 + sleep 10 + for i in `seq 24`; do + ceph rbd task add flatten rbd2/clone1 && break + sleep 10 + done + test "$(ceph rbd task list)" != "[]" + + for i in {1..12}; do + rbd info rbd2/clone1 | grep 'parent: ' || break + sleep 10 + done + rbd info rbd2/clone1 | expect_fail grep 'parent: ' + rbd snap unprotect rbd2/img1@snap + + test "$(ceph rbd task list)" = "[]" + ceph osd pool rm rbd2 rbd2 --yes-i-really-really-mean-it +} + +test_pool_image_args +test_rename +test_ls +test_remove +test_migration +test_config +RBD_CREATE_ARGS="" +test_others +test_locking +test_thick_provision +RBD_CREATE_ARGS="--image-format 2" +test_others +test_locking +test_clone +test_trash +test_purge +test_deep_copy_clone +test_clone_v2 +test_thick_provision +test_namespace +test_trash_purge_schedule +test_trash_purge_schedule_recovery +test_mirror_snapshot_schedule +test_mirror_snapshot_schedule_recovery +test_perf_image_iostat +test_perf_image_iostat_recovery +test_mirror_pool_peer_bootstrap_create +test_tasks_removed_pool +test_tasks_recovery + +echo OK diff --git a/qa/workunits/rbd/cli_migration.sh b/qa/workunits/rbd/cli_migration.sh new file mode 100755 index 000000000..be8e031fd --- /dev/null +++ b/qa/workunits/rbd/cli_migration.sh @@ -0,0 +1,357 @@ +#!/usr/bin/env bash +set -ex + +. $(dirname $0)/../../standalone/ceph-helpers.sh + +TEMPDIR= +IMAGE1=image1 +IMAGE2=image2 +IMAGE3=image3 +IMAGES="${IMAGE1} ${IMAGE2} ${IMAGE3}" + +cleanup() { + cleanup_tempdir + remove_images +} + +setup_tempdir() { + TEMPDIR=`mktemp -d` +} + +cleanup_tempdir() { + rm -rf ${TEMPDIR} +} + +create_base_image() { + local image=$1 + + rbd create --size 1G ${image} + rbd bench --io-type write --io-pattern rand --io-size=4K --io-total 256M ${image} + rbd snap create ${image}@1 + rbd bench --io-type write --io-pattern rand --io-size=4K --io-total 64M ${image} + rbd snap create ${image}@2 + rbd bench --io-type write --io-pattern rand --io-size=4K --io-total 128M ${image} +} + +export_raw_image() { + local image=$1 + + rm -rf "${TEMPDIR}/${image}" + rbd export ${image} "${TEMPDIR}/${image}" +} + +export_base_image() { + local image=$1 + + export_raw_image "${image}" + export_raw_image "${image}@1" + export_raw_image "${image}@2" +} + +remove_image() { + local image=$1 + + (rbd migration abort $image || true) >/dev/null 2>&1 + (rbd snap purge $image || true) >/dev/null 2>&1 + (rbd rm $image || true) >/dev/null 2>&1 +} + +remove_images() { + for image in ${IMAGES} + do + remove_image ${image} + done +} + +show_diff() +{ + local file1=$1 + local file2=$2 + + xxd "${file1}" > "${file1}.xxd" + xxd "${file2}" > "${file2}.xxd" + sdiff -s "${file1}.xxd" "${file2}.xxd" | head -n 64 + rm -f "${file1}.xxd" "${file2}.xxd" +} + +compare_images() { + local src_image=$1 + local dst_image=$2 + local ret=0 + + export_raw_image ${dst_image} + if ! cmp "${TEMPDIR}/${src_image}" "${TEMPDIR}/${dst_image}" + then + show_diff "${TEMPDIR}/${src_image}" "${TEMPDIR}/${dst_image}" + ret=1 + fi + return ${ret} +} + +test_import_native_format() { + local base_image=$1 + local dest_image=$2 + + rbd migration prepare --import-only "rbd/${base_image}@2" ${dest_image} + rbd migration abort ${dest_image} + + local pool_id=$(ceph osd pool ls detail --format xml | xmlstarlet sel -t -v "//pools/pool[pool_name='rbd']/pool_id") + cat > ${TEMPDIR}/spec.json <<EOF +{ + "type": "native", + "pool_id": ${pool_id}, + "pool_namespace": "", + "image_name": "${base_image}", + "snap_name": "2" +} +EOF + cat ${TEMPDIR}/spec.json + + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + + compare_images "${base_image}@1" "${dest_image}@1" + compare_images "${base_image}@2" "${dest_image}@2" + + rbd migration abort ${dest_image} + + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + rbd migration execute ${dest_image} + + compare_images "${base_image}@1" "${dest_image}@1" + compare_images "${base_image}@2" "${dest_image}@2" + + rbd migration abort ${dest_image} + + rbd migration prepare --import-only \ + --source-spec "{\"type\": \"native\", \"pool_id\": "${pool_id}", \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \ + ${dest_image} + rbd migration abort ${dest_image} + + rbd migration prepare --import-only \ + --source-spec "{\"type\": \"native\", \"pool_name\": \"rbd\", \"image_name\": \"${base_image}\", \"snap_name\": \"2\"}" \ + ${dest_image} + rbd migration execute ${dest_image} + rbd migration commit ${dest_image} + + compare_images "${base_image}@1" "${dest_image}@1" + compare_images "${base_image}@2" "${dest_image}@2" + + remove_image "${dest_image}" +} + +test_import_qcow_format() { + local base_image=$1 + local dest_image=$2 + + if ! qemu-img convert -f raw -O qcow rbd:rbd/${base_image} ${TEMPDIR}/${base_image}.qcow; then + echo "skipping QCOW test" + return 0 + fi + qemu-img info -f qcow ${TEMPDIR}/${base_image}.qcow + + cat > ${TEMPDIR}/spec.json <<EOF +{ + "type": "qcow", + "stream": { + "type": "file", + "file_path": "${TEMPDIR}/${base_image}.qcow" + } +} +EOF + cat ${TEMPDIR}/spec.json + + set +e + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + local error_code=$? + set -e + + if [ $error_code -eq 95 ]; then + echo "skipping QCOW test (librbd support disabled)" + return 0 + fi + test $error_code -eq 0 + + compare_images "${base_image}" "${dest_image}" + + rbd migration abort ${dest_image} + + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + + compare_images "${base_image}" "${dest_image}" + + rbd migration execute ${dest_image} + + compare_images "${base_image}" "${dest_image}" + + rbd migration commit ${dest_image} + + compare_images "${base_image}" "${dest_image}" + + remove_image "${dest_image}" +} + +test_import_qcow2_format() { + local base_image=$1 + local dest_image=$2 + + # create new image via qemu-img and its bench tool since we cannot + # import snapshot deltas into QCOW2 + qemu-img create -f qcow2 ${TEMPDIR}/${base_image}.qcow2 1G + + qemu-img bench -f qcow2 -w -c 65536 -d 16 --pattern 65 -s 4096 \ + -S $((($RANDOM % 262144) * 4096)) ${TEMPDIR}/${base_image}.qcow2 + qemu-img convert -f qcow2 -O raw ${TEMPDIR}/${base_image}.qcow2 \ + "${TEMPDIR}/${base_image}@snap1" + qemu-img snapshot -c "snap1" ${TEMPDIR}/${base_image}.qcow2 + + qemu-img bench -f qcow2 -w -c 16384 -d 16 --pattern 66 -s 4096 \ + -S $((($RANDOM % 262144) * 4096)) ${TEMPDIR}/${base_image}.qcow2 + qemu-img convert -f qcow2 -O raw ${TEMPDIR}/${base_image}.qcow2 \ + "${TEMPDIR}/${base_image}@snap2" + qemu-img snapshot -c "snap2" ${TEMPDIR}/${base_image}.qcow2 + + qemu-img bench -f qcow2 -w -c 32768 -d 16 --pattern 67 -s 4096 \ + -S $((($RANDOM % 262144) * 4096)) ${TEMPDIR}/${base_image}.qcow2 + qemu-img convert -f qcow2 -O raw ${TEMPDIR}/${base_image}.qcow2 \ + ${TEMPDIR}/${base_image} + + qemu-img info -f qcow2 ${TEMPDIR}/${base_image}.qcow2 + + cat > ${TEMPDIR}/spec.json <<EOF +{ + "type": "qcow", + "stream": { + "type": "file", + "file_path": "${TEMPDIR}/${base_image}.qcow2" + } +} +EOF + cat ${TEMPDIR}/spec.json + + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + + compare_images "${base_image}@snap1" "${dest_image}@snap1" + compare_images "${base_image}@snap2" "${dest_image}@snap2" + compare_images "${base_image}" "${dest_image}" + + rbd migration abort ${dest_image} + + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + + compare_images "${base_image}@snap1" "${dest_image}@snap1" + compare_images "${base_image}@snap2" "${dest_image}@snap2" + compare_images "${base_image}" "${dest_image}" + + rbd migration execute ${dest_image} + + compare_images "${base_image}@snap1" "${dest_image}@snap1" + compare_images "${base_image}@snap2" "${dest_image}@snap2" + compare_images "${base_image}" "${dest_image}" + + rbd migration commit ${dest_image} + + compare_images "${base_image}@snap1" "${dest_image}@snap1" + compare_images "${base_image}@snap2" "${dest_image}@snap2" + compare_images "${base_image}" "${dest_image}" + + remove_image "${dest_image}" +} + +test_import_raw_format() { + local base_image=$1 + local dest_image=$2 + + cat > ${TEMPDIR}/spec.json <<EOF +{ + "type": "raw", + "stream": { + "type": "file", + "file_path": "${TEMPDIR}/${base_image}" + } +} +EOF + cat ${TEMPDIR}/spec.json + + cat ${TEMPDIR}/spec.json | rbd migration prepare --import-only \ + --source-spec-path - ${dest_image} + compare_images ${base_image} ${dest_image} + rbd migration abort ${dest_image} + + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + rbd migration execute ${dest_image} + rbd migration commit ${dest_image} + + compare_images ${base_image} ${dest_image} + + remove_image "${dest_image}" + + cat > ${TEMPDIR}/spec.json <<EOF +{ + "type": "raw", + "stream": { + "type": "file", + "file_path": "${TEMPDIR}/${base_image}" + }, + "snapshots": [{ + "type": "raw", + "name": "snap1", + "stream": { + "type": "file", + "file_path": "${TEMPDIR}/${base_image}@1" + } + }, { + "type": "raw", + "name": "snap2", + "stream": { + "type": "file", + "file_path": "${TEMPDIR}/${base_image}@2" + } + }] +} +EOF + cat ${TEMPDIR}/spec.json + + rbd migration prepare --import-only \ + --source-spec-path ${TEMPDIR}/spec.json ${dest_image} + + rbd snap create ${dest_image}@head + rbd bench --io-type write --io-pattern rand --io-size=32K --io-total=32M ${dest_image} + + compare_images "${base_image}" "${dest_image}@head" + compare_images "${base_image}@1" "${dest_image}@snap1" + compare_images "${base_image}@2" "${dest_image}@snap2" + compare_images "${base_image}" "${dest_image}@head" + + rbd migration execute ${dest_image} + + compare_images "${base_image}@1" "${dest_image}@snap1" + compare_images "${base_image}@2" "${dest_image}@snap2" + compare_images "${base_image}" "${dest_image}@head" + + rbd migration commit ${dest_image} + + remove_image "${dest_image}" +} + +# make sure rbd pool is EMPTY.. this is a test script!! +rbd ls 2>&1 | wc -l | grep -v '^0$' && echo "nonempty rbd pool, aborting! run this script on an empty test cluster only." && exit 1 + +setup_tempdir +trap 'cleanup $?' INT TERM EXIT + +create_base_image ${IMAGE1} +export_base_image ${IMAGE1} + +test_import_native_format ${IMAGE1} ${IMAGE2} +test_import_qcow_format ${IMAGE1} ${IMAGE2} +test_import_qcow2_format ${IMAGE2} ${IMAGE3} +test_import_raw_format ${IMAGE1} ${IMAGE2} + +echo OK diff --git a/qa/workunits/rbd/concurrent.sh b/qa/workunits/rbd/concurrent.sh new file mode 100755 index 000000000..abaad75f5 --- /dev/null +++ b/qa/workunits/rbd/concurrent.sh @@ -0,0 +1,375 @@ +#!/usr/bin/env bash + +# Copyright (C) 2013 Inktank Storage, Inc. +# +# This is free software; see the source for copying conditions. +# There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# This is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as +# published by the Free Software Foundation version 2. + +# Alex Elder <elder@inktank.com> +# January 29, 2013 + +################################################################ + +# The purpose of this test is to exercise paths through the rbd +# code, making sure no bad pointer references or invalid reference +# count operations occur in the face of concurrent activity. +# +# Each pass of the test creates an rbd image, maps it, and writes +# some data into the image. It also reads some data from all of the +# other images that exist at the time the pass executes. Finally, +# the image is unmapped and removed. The image removal completes in +# the background. +# +# An iteration of the test consists of performing some number of +# passes, initating each pass as a background job, and finally +# sleeping for a variable delay. The delay is initially a specified +# value, but each iteration shortens that proportionally, such that +# the last iteration will not delay at all. +# +# The result exercises concurrent creates and deletes of rbd images, +# writes to new images, reads from both written and unwritten image +# data (including reads concurrent with writes), and attempts to +# unmap images being read. + +# Usage: concurrent [-i <iter>] [-c <count>] [-d <delay>] +# +# Exit status: +# 0: success +# 1: usage error +# 2: other runtime error +# 99: argument count error (programming error) +# 100: getopt error (internal error) + +################################################################ + +set -ex + +# Default flag values; RBD_CONCURRENT_ITER names are intended +# to be used in yaml scripts to pass in alternate values, e.g.: +# env: +# RBD_CONCURRENT_ITER: 20 +# RBD_CONCURRENT_COUNT: 5 +# RBD_CONCURRENT_DELAY: 3 +ITER_DEFAULT=${RBD_CONCURRENT_ITER:-100} +COUNT_DEFAULT=${RBD_CONCURRENT_COUNT:-5} +DELAY_DEFAULT=${RBD_CONCURRENT_DELAY:-5} # seconds + +CEPH_SECRET_FILE=${CEPH_SECRET_FILE:-} +CEPH_ID=${CEPH_ID:-admin} +SECRET_ARGS="" +if [ "${CEPH_SECRET_FILE}" ]; then + SECRET_ARGS="--secret $CEPH_SECRET_FILE" +fi + +################################################################ + +function setup() { + ID_MAX_DIR=$(mktemp -d /tmp/image_max_id.XXXXX) + ID_COUNT_DIR=$(mktemp -d /tmp/image_ids.XXXXXX) + NAMES_DIR=$(mktemp -d /tmp/image_names.XXXXXX) + SOURCE_DATA=$(mktemp /tmp/source_data.XXXXXX) + + # Use urandom to generate SOURCE_DATA + dd if=/dev/urandom of=${SOURCE_DATA} bs=2048 count=66 \ + >/dev/null 2>&1 + + # List of rbd id's *not* created by this script + export INITIAL_RBD_IDS=$(ls /sys/bus/rbd/devices) + + # Set up some environment for normal teuthology test setup. + # This really should not be necessary but I found it was. + + export CEPH_ARGS=" --name client.0" +} + +function cleanup() { + [ ! "${ID_MAX_DIR}" ] && return + local id + local image + + # Unmap mapped devices + for id in $(rbd_ids); do + image=$(cat "/sys/bus/rbd/devices/${id}/name") + rbd_unmap_image "${id}" + rbd_destroy_image "${image}" + done + # Get any leftover images + for image in $(rbd ls 2>/dev/null); do + rbd_destroy_image "${image}" + done + wait + sync + rm -f "${SOURCE_DATA}" + [ -d "${NAMES_DIR}" ] && rmdir "${NAMES_DIR}" + echo "Max concurrent rbd image count was $(get_max "${ID_COUNT_DIR}")" + rm -rf "${ID_COUNT_DIR}" + echo "Max rbd image id was $(get_max "${ID_MAX_DIR}")" + rm -rf "${ID_MAX_DIR}" +} + +function get_max() { + [ $# -eq 1 ] || exit 99 + local dir="$1" + + ls -U "${dir}" | sort -n | tail -1 +} + +trap cleanup HUP INT QUIT + +# print a usage message and quit +# +# if a message is supplied, print that first, and then exit +# with non-zero status +function usage() { + if [ $# -gt 0 ]; then + echo "" >&2 + echo "$@" >&2 + fi + + echo "" >&2 + echo "Usage: ${PROGNAME} <options> <tests>" >&2 + echo "" >&2 + echo " options:" >&2 + echo " -h or --help" >&2 + echo " show this message" >&2 + echo " -i or --iterations" >&2 + echo " iteration count (1 or more)" >&2 + echo " -c or --count" >&2 + echo " images created per iteration (1 or more)" >&2 + echo " -d or --delay" >&2 + echo " maximum delay between iterations" >&2 + echo "" >&2 + echo " defaults:" >&2 + echo " iterations: ${ITER_DEFAULT}" + echo " count: ${COUNT_DEFAULT}" + echo " delay: ${DELAY_DEFAULT} (seconds)" + echo "" >&2 + + [ $# -gt 0 ] && exit 1 + + exit 0 # This is used for a --help +} + +# parse command line arguments +function parseargs() { + ITER="${ITER_DEFAULT}" + COUNT="${COUNT_DEFAULT}" + DELAY="${DELAY_DEFAULT}" + + # Short option flags + SHORT_OPTS="" + SHORT_OPTS="${SHORT_OPTS},h" + SHORT_OPTS="${SHORT_OPTS},i:" + SHORT_OPTS="${SHORT_OPTS},c:" + SHORT_OPTS="${SHORT_OPTS},d:" + + # Short option flags + LONG_OPTS="" + LONG_OPTS="${LONG_OPTS},help" + LONG_OPTS="${LONG_OPTS},iterations:" + LONG_OPTS="${LONG_OPTS},count:" + LONG_OPTS="${LONG_OPTS},delay:" + + TEMP=$(getopt --name "${PROGNAME}" \ + --options "${SHORT_OPTS}" \ + --longoptions "${LONG_OPTS}" \ + -- "$@") + eval set -- "$TEMP" + + while [ "$1" != "--" ]; do + case "$1" in + -h|--help) + usage + ;; + -i|--iterations) + ITER="$2" + [ "${ITER}" -lt 1 ] && + usage "bad iterations value" + shift + ;; + -c|--count) + COUNT="$2" + [ "${COUNT}" -lt 1 ] && + usage "bad count value" + shift + ;; + -d|--delay) + DELAY="$2" + shift + ;; + *) + exit 100 # Internal error + ;; + esac + shift + done + shift +} + +function rbd_ids() { + [ $# -eq 0 ] || exit 99 + local ids + local i + + [ -d /sys/bus/rbd ] || return + ids=" $(echo $(ls /sys/bus/rbd/devices)) " + for i in ${INITIAL_RBD_IDS}; do + ids=${ids/ ${i} / } + done + echo ${ids} +} + +function update_maxes() { + local ids="$@" + local last_id + # These aren't 100% safe against concurrent updates but it + # should be pretty close + count=$(echo ${ids} | wc -w) + touch "${ID_COUNT_DIR}/${count}" + last_id=${ids% } + last_id=${last_id##* } + touch "${ID_MAX_DIR}/${last_id}" +} + +function rbd_create_image() { + [ $# -eq 0 ] || exit 99 + local image=$(basename $(mktemp "${NAMES_DIR}/image.XXXXXX")) + + rbd create "${image}" --size=1024 + echo "${image}" +} + +function rbd_image_id() { + [ $# -eq 1 ] || exit 99 + local image="$1" + + grep -l "${image}" /sys/bus/rbd/devices/*/name 2>/dev/null | + cut -d / -f 6 +} + +function rbd_map_image() { + [ $# -eq 1 ] || exit 99 + local image="$1" + local id + + sudo rbd map "${image}" --user "${CEPH_ID}" ${SECRET_ARGS} \ + > /dev/null 2>&1 + + id=$(rbd_image_id "${image}") + echo "${id}" +} + +function rbd_write_image() { + [ $# -eq 1 ] || exit 99 + local id="$1" + + # Offset and size here are meant to ensure beginning and end + # cross both (4K or 64K) page and (4MB) rbd object boundaries. + # It assumes the SOURCE_DATA file has size 66 * 2048 bytes + dd if="${SOURCE_DATA}" of="/dev/rbd${id}" bs=2048 seek=2015 \ + > /dev/null 2>&1 +} + +# All starting and ending offsets here are selected so they are not +# aligned on a (4 KB or 64 KB) page boundary +function rbd_read_image() { + [ $# -eq 1 ] || exit 99 + local id="$1" + + # First read starting and ending at an offset before any + # written data. The osd zero-fills data read from an + # existing rbd object, but before any previously-written + # data. + dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=3 \ + > /dev/null 2>&1 + # Next read starting at an offset before any written data, + # but ending at an offset that includes data that's been + # written. The osd zero-fills unwritten data at the + # beginning of a read. + dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=1983 \ + > /dev/null 2>&1 + # Read the data at offset 2015 * 2048 bytes (where it was + # written) and make sure it matches the original data. + cmp --quiet "${SOURCE_DATA}" "/dev/rbd${id}" 0 4126720 || + echo "MISMATCH!!!" + # Now read starting within the pre-written data, but ending + # beyond it. The rbd client zero-fills the unwritten + # portion at the end of a read. + dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=2079 \ + > /dev/null 2>&1 + # Now read starting from an unwritten range within a written + # rbd object. The rbd client zero-fills this. + dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=2115 \ + > /dev/null 2>&1 + # Finally read from an unwritten region which would reside + # in a different (non-existent) osd object. The osd client + # zero-fills unwritten data when the target object doesn't + # exist. + dd if="/dev/rbd${id}" of=/dev/null bs=2048 count=34 skip=4098 \ + > /dev/null 2>&1 +} + +function rbd_unmap_image() { + [ $# -eq 1 ] || exit 99 + local id="$1" + + sudo rbd unmap "/dev/rbd${id}" +} + +function rbd_destroy_image() { + [ $# -eq 1 ] || exit 99 + local image="$1" + + # Don't wait for it to complete, to increase concurrency + rbd rm "${image}" >/dev/null 2>&1 & + rm -f "${NAMES_DIR}/${image}" +} + +function one_pass() { + [ $# -eq 0 ] || exit 99 + local image + local id + local ids + local i + + image=$(rbd_create_image) + id=$(rbd_map_image "${image}") + ids=$(rbd_ids) + update_maxes "${ids}" + for i in ${rbd_ids}; do + if [ "${i}" -eq "${id}" ]; then + rbd_write_image "${i}" + else + rbd_read_image "${i}" + fi + done + rbd_unmap_image "${id}" + rbd_destroy_image "${image}" +} + +################################################################ + +parseargs "$@" + +setup + +for iter in $(seq 1 "${ITER}"); do + for count in $(seq 1 "${COUNT}"); do + one_pass & + done + # Sleep longer at first, overlap iterations more later. + # Use awk to get sub-second granularity (see sleep(1)). + sleep $(echo "${DELAY}" "${iter}" "${ITER}" | + awk '{ printf("%.2f\n", $1 - $1 * $2 / $3);}') + +done +wait + +cleanup + +exit 0 diff --git a/qa/workunits/rbd/crimson/test_crimson_librbd.sh b/qa/workunits/rbd/crimson/test_crimson_librbd.sh new file mode 100755 index 000000000..fb308de41 --- /dev/null +++ b/qa/workunits/rbd/crimson/test_crimson_librbd.sh @@ -0,0 +1,35 @@ +#!/bin/sh -e + +if [ -n "${VALGRIND}" ]; then + valgrind ${VALGRIND} --suppressions=${TESTDIR}/valgrind.supp \ + --error-exitcode=1 ceph_test_librbd +else + # Run test cases indivually to allow better selection + # of ongoing Crimson development. + # Disabled test groups are tracked here: + # https://tracker.ceph.com/issues/58791 + ceph_test_librbd --gtest_filter='TestLibRBD.*' + ceph_test_librbd --gtest_filter='EncryptedFlattenTest/0.*' + ceph_test_librbd --gtest_filter='EncryptedFlattenTest/1.*' + ceph_test_librbd --gtest_filter='EncryptedFlattenTest/2.*' + ceph_test_librbd --gtest_filter='EncryptedFlattenTest/3.*' + ceph_test_librbd --gtest_filter='EncryptedFlattenTest/4.*' + ceph_test_librbd --gtest_filter='EncryptedFlattenTest/5.*' + ceph_test_librbd --gtest_filter='EncryptedFlattenTest/6.*' + ceph_test_librbd --gtest_filter='EncryptedFlattenTest/7.*' + # ceph_test_librbd --gtest_filter='DiffIterateTest/0.*' + # ceph_test_librbd --gtest_filter='DiffIterateTest/1.*' + ceph_test_librbd --gtest_filter='TestImageWatcher.*' + ceph_test_librbd --gtest_filter='TestInternal.*' + ceph_test_librbd --gtest_filter='TestMirroring.*' + # ceph_test_librbd --gtest_filter='TestDeepCopy.*' + ceph_test_librbd --gtest_filter='TestGroup.*' + # ceph_test_librbd --gtest_filter='TestMigration.*' + ceph_test_librbd --gtest_filter='TestMirroringWatcher.*' + ceph_test_librbd --gtest_filter='TestObjectMap.*' + ceph_test_librbd --gtest_filter='TestOperations.*' + ceph_test_librbd --gtest_filter='TestTrash.*' + ceph_test_librbd --gtest_filter='TestJournalEntries.*' + ceph_test_librbd --gtest_filter='TestJournalReplay.*' +fi +exit 0 diff --git a/qa/workunits/rbd/diff.sh b/qa/workunits/rbd/diff.sh new file mode 100755 index 000000000..fbd6e0642 --- /dev/null +++ b/qa/workunits/rbd/diff.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +set -ex + +function cleanup() { + rbd snap purge foo || : + rbd rm foo || : + rbd snap purge foo.copy || : + rbd rm foo.copy || : + rbd snap purge foo.copy2 || : + rbd rm foo.copy2 || : + rm -f foo.diff foo.out +} + +cleanup + +rbd create foo --size 1000 +rbd bench --io-type write foo --io-size 4096 --io-threads 5 --io-total 4096000 --io-pattern rand + +#rbd cp foo foo.copy +rbd create foo.copy --size 1000 +rbd export-diff foo - | rbd import-diff - foo.copy + +rbd snap create foo --snap=two +rbd bench --io-type write foo --io-size 4096 --io-threads 5 --io-total 4096000 --io-pattern rand +rbd snap create foo --snap=three +rbd snap create foo.copy --snap=two + +rbd export-diff foo@two --from-snap three foo.diff && exit 1 || true # wrong snap order +rm -f foo.diff + +rbd export-diff foo@three --from-snap two foo.diff +rbd import-diff foo.diff foo.copy +rbd import-diff foo.diff foo.copy && exit 1 || true # this should fail with EEXIST on the end snap +rbd snap ls foo.copy | grep three + +rbd create foo.copy2 --size 1000 +rbd import-diff foo.diff foo.copy2 && exit 1 || true # this should fail bc the start snap dne + +rbd export foo foo.out +orig=`md5sum foo.out | awk '{print $1}'` +rm foo.out +rbd export foo.copy foo.out +copy=`md5sum foo.out | awk '{print $1}'` + +if [ "$orig" != "$copy" ]; then + echo does not match + exit 1 +fi + +cleanup + +echo OK + diff --git a/qa/workunits/rbd/diff_continuous.sh b/qa/workunits/rbd/diff_continuous.sh new file mode 100755 index 000000000..fd1785e07 --- /dev/null +++ b/qa/workunits/rbd/diff_continuous.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash + +set -ex +set -o pipefail + +function untar_workload() { + local i + for ((i = 0; i < 10; i++)); do + pv -L 10M linux-5.4.tar.gz > "${MOUNT}/linux-5.4.tar.gz" + tar -C "${MOUNT}" -xzf "${MOUNT}/linux-5.4.tar.gz" + sync "${MOUNT}" + rm -rf "${MOUNT}"/linux-5.4* + done +} + +function check_object_map() { + local spec="$1" + + rbd object-map check "${spec}" + + local flags + flags="$(rbd info "${spec}" | grep 'flags: ')" + if [[ "${flags}" =~ object\ map\ invalid ]]; then + echo "Object map invalid at ${spec}" + exit 1 + fi + if [[ "${flags}" =~ fast\ diff\ invalid ]]; then + echo "Fast diff invalid at ${spec}" + exit 1 + fi +} + +# RBD_DEVICE_TYPE is intended to be set from yaml, default to krbd +readonly DEVICE_TYPE="${RBD_DEVICE_TYPE:-krbd}" + +BASE_UUID="$(uuidgen)" +readonly BASE_UUID + +readonly SIZE="2G" +readonly SRC="${BASE_UUID}-src" +readonly DST="${BASE_UUID}-dst" +readonly MOUNT="${BASE_UUID}-mnt" + +rbd create -s "${SIZE}" --stripe-unit 64K --stripe-count 8 \ + --image-feature exclusive-lock,object-map,fast-diff "${SRC}" +rbd create -s "${SIZE}" --object-size 512K "${DST}" + +dev="$(sudo rbd device map -t "${DEVICE_TYPE}" "${SRC}")" +sudo mkfs.ext4 "${dev}" +mkdir "${MOUNT}" +sudo mount "${dev}" "${MOUNT}" +sudo chown "$(whoami)" "${MOUNT}" + +# start untar in the background +wget https://download.ceph.com/qa/linux-5.4.tar.gz +untar_workload & +untar_pid=$! + +# export initial incremental +snap_num=1 +rbd snap create "${SRC}@snap${snap_num}" +rbd export-diff "${SRC}@snap${snap_num}" "${BASE_UUID}@snap${snap_num}.diff" + +# keep exporting successive incrementals while untar is running +while kill -0 "${untar_pid}"; do + snap_num=$((snap_num + 1)) + rbd snap create "${SRC}@snap${snap_num}" + sleep $((RANDOM % 4 + 1)) + rbd export-diff --whole-object --from-snap "snap$((snap_num - 1))" \ + "${SRC}@snap${snap_num}" "${BASE_UUID}@snap${snap_num}.diff" +done + +sudo umount "${MOUNT}" +sudo rbd device unmap -t "${DEVICE_TYPE}" "${dev}" + +if ! wait "${untar_pid}"; then + echo "untar_workload failed" + exit 1 +fi + +echo "Exported ${snap_num} incrementals" +if ((snap_num < 30)); then + echo "Too few incrementals" + exit 1 +fi + +# validate +for ((i = 1; i <= snap_num; i++)); do + rbd import-diff "${BASE_UUID}@snap${i}.diff" "${DST}" + src_sum="$(rbd export "${SRC}@snap${i}" - | md5sum | awk '{print $1}')" + dst_sum="$(rbd export "${DST}@snap${i}" - | md5sum | awk '{print $1}')" + if [[ "${src_sum}" != "${dst_sum}" ]]; then + echo "Mismatch at snap${i}: ${src_sum} != ${dst_sum}" + exit 1 + fi + check_object_map "${SRC}@snap${i}" + # FIXME: this reproduces http://tracker.ceph.com/issues/37876 + # there is no fstrim involved but "rbd import-diff" can produce + # write-zeroes requests which turn into discards under the hood + # actual: EXISTS, expected: EXISTS_CLEAN inconsistency is harmless + # from a data integrity POV and data is validated above regardless, + # so just waive it for now + #check_object_map "${DST}@snap${i}" +done + +echo OK diff --git a/qa/workunits/rbd/huge-tickets.sh b/qa/workunits/rbd/huge-tickets.sh new file mode 100755 index 000000000..22853c07a --- /dev/null +++ b/qa/workunits/rbd/huge-tickets.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +# This is a test for http://tracker.ceph.com/issues/8979 and the fallout +# from triaging it. #8979 itself was random crashes on corrupted memory +# due to a buffer overflow (for tickets larger than 256 bytes), further +# inspection showed that vmalloced tickets weren't handled correctly as +# well. +# +# What we are doing here is generating three huge keyrings and feeding +# them to libceph (through 'rbd map' on a scratch image). Bad kernels +# will crash reliably either on corrupted memory somewhere or a bad page +# fault in scatterwalk_pagedone(). + +set -ex + +function generate_keyring() { + local user=$1 + local n=$2 + + ceph-authtool -C -n client.$user --cap mon 'allow *' --gen-key /tmp/keyring-$user + + set +x # don't pollute trace with echos + echo -en "\tcaps osd = \"allow rwx pool=rbd" >>/tmp/keyring-$user + for i in $(seq 1 $n); do + echo -n ", allow rwx pool=pool$i" >>/tmp/keyring-$user + done + echo "\"" >>/tmp/keyring-$user + set -x +} + +generate_keyring foo 1000 # ~25K, kmalloc +generate_keyring bar 20000 # ~500K, vmalloc +generate_keyring baz 300000 # ~8M, vmalloc + sg chaining + +rbd create --size 1 test + +for user in {foo,bar,baz}; do + ceph auth import -i /tmp/keyring-$user + DEV=$(sudo rbd map -n client.$user --keyring /tmp/keyring-$user test) + sudo rbd unmap $DEV +done diff --git a/qa/workunits/rbd/image_read.sh b/qa/workunits/rbd/image_read.sh new file mode 100755 index 000000000..ddca8356e --- /dev/null +++ b/qa/workunits/rbd/image_read.sh @@ -0,0 +1,680 @@ +#!/usr/bin/env bash + +# Copyright (C) 2013 Inktank Storage, Inc. +# +# This is free software; see the source for copying conditions. +# There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. +# +# This is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as +# published by the Free Software Foundation version 2. + +# Alex Elder <elder@inktank.com> +# April 10, 2013 + +################################################################ + +# The purpose of this test is to validate that data read from a +# mapped rbd image is what it's expected to be. +# +# By default it creates an image and fills it with some data. It +# then reads back the data at a series of offsets known to cover +# various situations (such as reading the beginning, end, or the +# entirety of an object, or doing a read that spans multiple +# objects), and stashes the results in a set of local files. +# +# It also creates and maps a snapshot of the original image after +# it's been filled, and reads back the same ranges of data from the +# snapshot. It then compares the data read back with what was read +# back from the original image, verifying they match. +# +# Clone functionality is tested as well, in which case a clone is +# made of the snapshot, and the same ranges of data are again read +# and compared with the original. In addition, a snapshot of that +# clone is created, and a clone of *that* snapshot is put through +# the same set of tests. (Clone testing can be optionally skipped.) + +################################################################ + +# Default parameter values. Environment variables, if set, will +# supercede these defaults. Such variables have names that begin +# with "IMAGE_READ_", for e.g. use IMAGE_READ_PAGE_SIZE=65536 +# to use 65536 as the page size. +set -e + +DEFAULT_VERBOSE=true +DEFAULT_TEST_CLONES=true +DEFAULT_LOCAL_FILES=false +DEFAULT_FORMAT=2 +DEFAULT_DOUBLE_ORDER=true +DEFAULT_HALF_ORDER=false +DEFAULT_PAGE_SIZE=4096 +DEFAULT_OBJECT_ORDER=22 +MIN_OBJECT_ORDER=12 # technically 9, but the rbd CLI enforces 12 +MAX_OBJECT_ORDER=32 + +RBD_FORCE_ALLOW_V1=1 + +PROGNAME=$(basename $0) + +ORIGINAL=original-$$ +SNAP1=snap1-$$ +CLONE1=clone1-$$ +SNAP2=snap2-$$ +CLONE2=clone2-$$ + +function err() { + if [ $# -gt 0 ]; then + echo "${PROGNAME}: $@" >&2 + fi + exit 2 +} + +function usage() { + if [ $# -gt 0 ]; then + echo "" >&2 + echo "${PROGNAME}: $@" >&2 + fi + echo "" >&2 + echo "Usage: ${PROGNAME} [<options>]" >&2 + echo "" >&2 + echo "options are:" >&2 + echo " -o object_order" >&2 + echo " must be ${MIN_OBJECT_ORDER}..${MAX_OBJECT_ORDER}" >&2 + echo " -p page_size (in bytes)" >&2 + echo " note: there must be at least 4 pages per object" >&2 + echo " -1" >&2 + echo " test using format 1 rbd images (default)" >&2 + echo " -2" >&2 + echo " test using format 2 rbd images" >&2 + echo " -c" >&2 + echo " also test rbd clone images (implies format 2)" >&2 + echo " -d" >&2 + echo " clone object order double its parent's (format 2)" >&2 + echo " -h" >&2 + echo " clone object order half of its parent's (format 2)" >&2 + echo " -l" >&2 + echo " use local files rather than rbd images" >&2 + echo " -v" >&2 + echo " disable reporting of what's going on" >&2 + echo "" >&2 + exit 1 +} + +function verbose() { + [ "${VERBOSE}" = true ] && echo "$@" + true # Don't let the verbose test spoil our return value +} + +function quiet() { + "$@" 2> /dev/null +} + +function boolean_toggle() { + [ $# -eq 1 ] || exit 99 + test "$1" = "true" && echo false || echo true +} + +function parseargs() { + local opts="o:p:12clv" + local lopts="order:,page_size:,local,clone,verbose" + local parsed + local clone_order_msg + + # use values from environment if available + VERBOSE="${IMAGE_READ_VERBOSE:-${DEFAULT_VERBOSE}}" + TEST_CLONES="${IMAGE_READ_TEST_CLONES:-${DEFAULT_TEST_CLONES}}" + LOCAL_FILES="${IMAGE_READ_LOCAL_FILES:-${DEFAULT_LOCAL_FILES}}" + DOUBLE_ORDER="${IMAGE_READ_DOUBLE_ORDER:-${DEFAULT_DOUBLE_ORDER}}" + HALF_ORDER="${IMAGE_READ_HALF_ORDER:-${DEFAULT_HALF_ORDER}}" + FORMAT="${IMAGE_READ_FORMAT:-${DEFAULT_FORMAT}}" + PAGE_SIZE="${IMAGE_READ_PAGE_SIZE:-${DEFAULT_PAGE_SIZE}}" + OBJECT_ORDER="${IMAGE_READ_OBJECT_ORDER:-${DEFAULT_OBJECT_ORDER}}" + + parsed=$(getopt -o "${opts}" -l "${lopts}" -n "${PROGNAME}" -- "$@") || + usage + eval set -- "${parsed}" + while true; do + case "$1" in + -v|--verbose) + VERBOSE=$(boolean_toggle "${VERBOSE}");; + -c|--clone) + TEST_CLONES=$(boolean_toggle "${TEST_CLONES}");; + -d|--double) + DOUBLE_ORDER=$(boolean_toggle "${DOUBLE_ORDER}");; + -h|--half) + HALF_ORDER=$(boolean_toggle "${HALF_ORDER}");; + -l|--local) + LOCAL_FILES=$(boolean_toggle "${LOCAL_FILES}");; + -1|-2) + FORMAT="${1:1}";; + -p|--page_size) + PAGE_SIZE="$2"; shift;; + -o|--order) + OBJECT_ORDER="$2"; shift;; + --) + shift; break;; + *) + err "getopt internal error" + esac + shift + done + [ $# -gt 0 ] && usage "excess arguments ($*)" + + if [ "${TEST_CLONES}" = true ]; then + # If we're using different object orders for clones, + # make sure the limits are updated accordingly. If + # both "half" and "double" are specified, just + # ignore them both. + if [ "${DOUBLE_ORDER}" = true ]; then + if [ "${HALF_ORDER}" = true ]; then + DOUBLE_ORDER=false + HALF_ORDER=false + else + ((MAX_OBJECT_ORDER -= 2)) + fi + elif [ "${HALF_ORDER}" = true ]; then + ((MIN_OBJECT_ORDER += 2)) + fi + fi + + [ "${OBJECT_ORDER}" -lt "${MIN_OBJECT_ORDER}" ] && + usage "object order (${OBJECT_ORDER}) must be" \ + "at least ${MIN_OBJECT_ORDER}" + [ "${OBJECT_ORDER}" -gt "${MAX_OBJECT_ORDER}" ] && + usage "object order (${OBJECT_ORDER}) must be" \ + "at most ${MAX_OBJECT_ORDER}" + + if [ "${TEST_CLONES}" = true ]; then + if [ "${DOUBLE_ORDER}" = true ]; then + ((CLONE1_ORDER = OBJECT_ORDER + 1)) + ((CLONE2_ORDER = OBJECT_ORDER + 2)) + clone_order_msg="double" + elif [ "${HALF_ORDER}" = true ]; then + ((CLONE1_ORDER = OBJECT_ORDER - 1)) + ((CLONE2_ORDER = OBJECT_ORDER - 2)) + clone_order_msg="half of" + else + CLONE1_ORDER="${OBJECT_ORDER}" + CLONE2_ORDER="${OBJECT_ORDER}" + clone_order_msg="the same as" + fi + fi + + [ "${TEST_CLONES}" != true ] || FORMAT=2 + + OBJECT_SIZE=$(echo "2 ^ ${OBJECT_ORDER}" | bc) + OBJECT_PAGES=$(echo "${OBJECT_SIZE} / ${PAGE_SIZE}" | bc) + IMAGE_SIZE=$((2 * 16 * OBJECT_SIZE / (1024 * 1024))) + [ "${IMAGE_SIZE}" -lt 1 ] && IMAGE_SIZE=1 + IMAGE_OBJECTS=$((IMAGE_SIZE * (1024 * 1024) / OBJECT_SIZE)) + + [ "${OBJECT_PAGES}" -lt 4 ] && + usage "object size (${OBJECT_SIZE}) must be" \ + "at least 4 * page size (${PAGE_SIZE})" + + echo "parameters for this run:" + echo " format ${FORMAT} images will be tested" + echo " object order is ${OBJECT_ORDER}, so" \ + "objects are ${OBJECT_SIZE} bytes" + echo " page size is ${PAGE_SIZE} bytes, so" \ + "there are are ${OBJECT_PAGES} pages in an object" + echo " derived image size is ${IMAGE_SIZE} MB, so" \ + "there are ${IMAGE_OBJECTS} objects in an image" + if [ "${TEST_CLONES}" = true ]; then + echo " clone functionality will be tested" + echo " object size for a clone will be ${clone_order_msg}" + echo " the object size of its parent image" + fi + + true # Don't let the clones test spoil our return value +} + +function image_dev_path() { + [ $# -eq 1 ] || exit 99 + local image_name="$1" + + if [ "${LOCAL_FILES}" = true ]; then + echo "${TEMP}/${image_name}" + return + fi + + echo "/dev/rbd/rbd/${image_name}" +} + +function out_data_dir() { + [ $# -lt 2 ] || exit 99 + local out_data="${TEMP}/data" + local image_name + + if [ $# -eq 1 ]; then + image_name="$1" + echo "${out_data}/${image_name}" + else + echo "${out_data}" + fi +} + +function setup() { + verbose "===== setting up =====" + TEMP=$(mktemp -d /tmp/rbd_image_read.XXXXX) + mkdir -p $(out_data_dir) + + # create and fill the original image with some data + create_image "${ORIGINAL}" + map_image "${ORIGINAL}" + fill_original + + # create a snapshot of the original + create_image_snap "${ORIGINAL}" "${SNAP1}" + map_image_snap "${ORIGINAL}" "${SNAP1}" + + if [ "${TEST_CLONES}" = true ]; then + # create a clone of the original snapshot + create_snap_clone "${ORIGINAL}" "${SNAP1}" \ + "${CLONE1}" "${CLONE1_ORDER}" + map_image "${CLONE1}" + + # create a snapshot of that clone + create_image_snap "${CLONE1}" "${SNAP2}" + map_image_snap "${CLONE1}" "${SNAP2}" + + # create a clone of that clone's snapshot + create_snap_clone "${CLONE1}" "${SNAP2}" \ + "${CLONE2}" "${CLONE2_ORDER}" + map_image "${CLONE2}" + fi +} + +function teardown() { + verbose "===== cleaning up =====" + if [ "${TEST_CLONES}" = true ]; then + unmap_image "${CLONE2}" || true + destroy_snap_clone "${CLONE1}" "${SNAP2}" "${CLONE2}" || true + + unmap_image_snap "${CLONE1}" "${SNAP2}" || true + destroy_image_snap "${CLONE1}" "${SNAP2}" || true + + unmap_image "${CLONE1}" || true + destroy_snap_clone "${ORIGINAL}" "${SNAP1}" "${CLONE1}" || true + fi + unmap_image_snap "${ORIGINAL}" "${SNAP1}" || true + destroy_image_snap "${ORIGINAL}" "${SNAP1}" || true + unmap_image "${ORIGINAL}" || true + destroy_image "${ORIGINAL}" || true + + rm -rf $(out_data_dir) + rmdir "${TEMP}" +} + +function create_image() { + [ $# -eq 1 ] || exit 99 + local image_name="$1" + local image_path + local bytes + + verbose "creating image \"${image_name}\"" + if [ "${LOCAL_FILES}" = true ]; then + image_path=$(image_dev_path "${image_name}") + bytes=$(echo "${IMAGE_SIZE} * 1024 * 1024 - 1" | bc) + quiet dd if=/dev/zero bs=1 count=1 seek="${bytes}" \ + of="${image_path}" + return + fi + + rbd create "${image_name}" --image-format "${FORMAT}" \ + --size "${IMAGE_SIZE}" --order "${OBJECT_ORDER}" \ + --image-shared +} + +function destroy_image() { + [ $# -eq 1 ] || exit 99 + local image_name="$1" + local image_path + + verbose "destroying image \"${image_name}\"" + if [ "${LOCAL_FILES}" = true ]; then + image_path=$(image_dev_path "${image_name}") + rm -f "${image_path}" + return + fi + + rbd rm "${image_name}" +} + +function map_image() { + [ $# -eq 1 ] || exit 99 + local image_name="$1" # can be image@snap too + + if [ "${LOCAL_FILES}" = true ]; then + return + fi + + sudo rbd map "${image_name}" +} + +function unmap_image() { + [ $# -eq 1 ] || exit 99 + local image_name="$1" # can be image@snap too + local image_path + + if [ "${LOCAL_FILES}" = true ]; then + return + fi + image_path=$(image_dev_path "${image_name}") + + if [ -e "${image_path}" ]; then + sudo rbd unmap "${image_path}" + fi +} + +function map_image_snap() { + [ $# -eq 2 ] || exit 99 + local image_name="$1" + local snap_name="$2" + local image_snap + + if [ "${LOCAL_FILES}" = true ]; then + return + fi + + image_snap="${image_name}@${snap_name}" + map_image "${image_snap}" +} + +function unmap_image_snap() { + [ $# -eq 2 ] || exit 99 + local image_name="$1" + local snap_name="$2" + local image_snap + + if [ "${LOCAL_FILES}" = true ]; then + return + fi + + image_snap="${image_name}@${snap_name}" + unmap_image "${image_snap}" +} + +function create_image_snap() { + [ $# -eq 2 ] || exit 99 + local image_name="$1" + local snap_name="$2" + local image_snap="${image_name}@${snap_name}" + local image_path + local snap_path + + verbose "creating snapshot \"${snap_name}\"" \ + "of image \"${image_name}\"" + if [ "${LOCAL_FILES}" = true ]; then + image_path=$(image_dev_path "${image_name}") + snap_path=$(image_dev_path "${image_snap}") + + cp "${image_path}" "${snap_path}" + return + fi + + rbd snap create "${image_snap}" +} + +function destroy_image_snap() { + [ $# -eq 2 ] || exit 99 + local image_name="$1" + local snap_name="$2" + local image_snap="${image_name}@${snap_name}" + local snap_path + + verbose "destroying snapshot \"${snap_name}\"" \ + "of image \"${image_name}\"" + if [ "${LOCAL_FILES}" = true ]; then + snap_path=$(image_dev_path "${image_snap}") + rm -rf "${snap_path}" + return + fi + + rbd snap rm "${image_snap}" +} + +function create_snap_clone() { + [ $# -eq 4 ] || exit 99 + local image_name="$1" + local snap_name="$2" + local clone_name="$3" + local clone_order="$4" + local image_snap="${image_name}@${snap_name}" + local snap_path + local clone_path + + verbose "creating clone image \"${clone_name}\"" \ + "of image snapshot \"${image_name}@${snap_name}\"" + if [ "${LOCAL_FILES}" = true ]; then + snap_path=$(image_dev_path "${image_name}@${snap_name}") + clone_path=$(image_dev_path "${clone_name}") + + cp "${snap_path}" "${clone_path}" + return + fi + + rbd snap protect "${image_snap}" + rbd clone --order "${clone_order}" --image-shared \ + "${image_snap}" "${clone_name}" +} + +function destroy_snap_clone() { + [ $# -eq 3 ] || exit 99 + local image_name="$1" + local snap_name="$2" + local clone_name="$3" + local image_snap="${image_name}@${snap_name}" + local clone_path + + verbose "destroying clone image \"${clone_name}\"" + if [ "${LOCAL_FILES}" = true ]; then + clone_path=$(image_dev_path "${clone_name}") + + rm -rf "${clone_path}" + return + fi + + rbd rm "${clone_name}" + rbd snap unprotect "${image_snap}" +} + +# function that produces "random" data with which to fill the image +function source_data() { + while quiet dd if=/bin/bash skip=$(($$ % 199)) bs="${PAGE_SIZE}"; do + : # Just do the dd + done +} + +function fill_original() { + local image_path=$(image_dev_path "${ORIGINAL}") + + verbose "filling original image" + # Fill 16 objects worth of "random" data + source_data | + quiet dd bs="${PAGE_SIZE}" count=$((16 * OBJECT_PAGES)) \ + of="${image_path}" +} + +function do_read() { + [ $# -eq 3 -o $# -eq 4 ] || exit 99 + local image_name="$1" + local offset="$2" + local length="$3" + [ "${length}" -gt 0 ] || err "do_read: length must be non-zero" + local image_path=$(image_dev_path "${image_name}") + local out_data=$(out_data_dir "${image_name}") + local range=$(printf "%06u~%04u" "${offset}" "${length}") + local out_file + + [ $# -eq 4 ] && offset=$((offset + 16 * OBJECT_PAGES)) + + verbose "reading \"${image_name}\" pages ${range}" + + out_file="${out_data}/pages_${range}" + + quiet dd bs="${PAGE_SIZE}" skip="${offset}" count="${length}" \ + if="${image_path}" of="${out_file}" +} + +function one_pass() { + [ $# -eq 1 -o $# -eq 2 ] || exit 99 + local image_name="$1" + local extended + [ $# -eq 2 ] && extended="true" + local offset + local length + + offset=0 + + # +-----------+-----------+--- + # |X:X:X...X:X| : : ... : | : + # +-----------+-----------+--- + length="${OBJECT_PAGES}" + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + # ---+-----------+--- + # : |X: : ... : | : + # ---+-----------+--- + length=1 + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + # ---+-----------+--- + # : | :X: ... : | : + # ---+-----------+--- + length=1 + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + # ---+-----------+--- + # : | : :X...X: | : + # ---+-----------+--- + length=$((OBJECT_PAGES - 3)) + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + # ---+-----------+--- + # : | : : ... :X| : + # ---+-----------+--- + length=1 + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + # ---+-----------+--- + # : |X:X:X...X:X| : + # ---+-----------+--- + length="${OBJECT_PAGES}" + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + offset=$((offset + 1)) # skip 1 + + # ---+-----------+--- + # : | :X:X...X:X| : + # ---+-----------+--- + length=$((OBJECT_PAGES - 1)) + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + # ---+-----------+-----------+--- + # : |X:X:X...X:X|X: : ... : | : + # ---+-----------+-----------+--- + length=$((OBJECT_PAGES + 1)) + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + # ---+-----------+-----------+--- + # : | :X:X...X:X|X: : ... : | : + # ---+-----------+-----------+--- + length="${OBJECT_PAGES}" + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + # ---+-----------+-----------+--- + # : | :X:X...X:X|X:X: ... : | : + # ---+-----------+-----------+--- + length=$((OBJECT_PAGES + 1)) + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + # ---+-----------+-----------+--- + # : | : :X...X:X|X:X:X...X:X| : + # ---+-----------+-----------+--- + length=$((2 * OBJECT_PAGES + 2)) + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + offset=$((offset + 1)) # skip 1 + + # ---+-----------+-----------+----- + # : | :X:X...X:X|X:X:X...X:X|X: : + # ---+-----------+-----------+----- + length=$((2 * OBJECT_PAGES)) + do_read "${image_name}" "${offset}" "${length}" ${extended} + offset=$((offset + length)) + + # --+-----------+-----------+-------- + # : | :X:X...X:X|X:X:X...X:X|X:X: : + # --+-----------+-----------+-------- + length=2049 + length=$((2 * OBJECT_PAGES + 1)) + do_read "${image_name}" "${offset}" "${length}" ${extended} + # offset=$((offset + length)) +} + +function run_using() { + [ $# -eq 1 ] || exit 99 + local image_name="$1" + local out_data=$(out_data_dir "${image_name}") + + verbose "===== running using \"${image_name}\" =====" + mkdir -p "${out_data}" + one_pass "${image_name}" + one_pass "${image_name}" extended +} + +function compare() { + [ $# -eq 1 ] || exit 99 + local image_name="$1" + local out_data=$(out_data_dir "${image_name}") + local original=$(out_data_dir "${ORIGINAL}") + + verbose "===== comparing \"${image_name}\" =====" + for i in $(ls "${original}"); do + verbose compare "\"${image_name}\" \"${i}\"" + cmp "${original}/${i}" "${out_data}/${i}" + done + [ "${image_name}" = "${ORIGINAL}" ] || rm -rf "${out_data}" +} + +function doit() { + [ $# -eq 1 ] || exit 99 + local image_name="$1" + + run_using "${image_name}" + compare "${image_name}" +} + +########## Start + +parseargs "$@" + +trap teardown EXIT HUP INT +setup + +run_using "${ORIGINAL}" +doit "${ORIGINAL}@${SNAP1}" +if [ "${TEST_CLONES}" = true ]; then + doit "${CLONE1}" + doit "${CLONE1}@${SNAP2}" + doit "${CLONE2}" +fi +rm -rf $(out_data_dir "${ORIGINAL}") + +echo "Success!" + +exit 0 diff --git a/qa/workunits/rbd/import_export.sh b/qa/workunits/rbd/import_export.sh new file mode 100755 index 000000000..89e8d35cf --- /dev/null +++ b/qa/workunits/rbd/import_export.sh @@ -0,0 +1,259 @@ +#!/bin/sh -ex + +# V1 image unsupported but required for testing purposes +export RBD_FORCE_ALLOW_V1=1 + +# returns data pool for a given image +get_image_data_pool () { + image=$1 + data_pool=$(rbd info $image | grep "data_pool: " | awk -F':' '{ print $NF }') + if [ -z $data_pool ]; then + data_pool='rbd' + fi + + echo $data_pool +} + +# return list of object numbers populated in image +objects () { + image=$1 + prefix=$(rbd info $image | grep block_name_prefix | awk '{print $NF;}') + + # strip off prefix and leading zeros from objects; sort, although + # it doesn't necessarily make sense as they're hex, at least it makes + # the list repeatable and comparable + objects=$(rados ls -p $(get_image_data_pool $image) | grep $prefix | \ + sed -e 's/'$prefix'\.//' -e 's/^0*\([0-9a-f]\)/\1/' | sort -u) + echo $objects +} + +# return false if either files don't compare or their ondisk +# sizes don't compare + +compare_files_and_ondisk_sizes () { + cmp -l $1 $2 || return 1 + origsize=$(stat $1 --format %b) + exportsize=$(stat $2 --format %b) + difference=$(($exportsize - $origsize)) + difference=${difference#-} # absolute value + test $difference -ge 0 -a $difference -lt 4096 +} + +TMPDIR=/tmp/rbd_import_export_$$ +rm -rf $TMPDIR +mkdir $TMPDIR +trap "rm -rf $TMPDIR" INT TERM EXIT + +# cannot import a dir +mkdir foo.$$ +rbd import foo.$$ foo.dir && exit 1 || true # should fail +rmdir foo.$$ + +# create a sparse file +dd if=/bin/sh of=${TMPDIR}/img bs=1k count=1 seek=10 +dd if=/bin/dd of=${TMPDIR}/img bs=1k count=10 seek=100 +dd if=/bin/rm of=${TMPDIR}/img bs=1k count=100 seek=1000 +dd if=/bin/ls of=${TMPDIR}/img bs=1k seek=10000 +dd if=/bin/ln of=${TMPDIR}/img bs=1k seek=100000 +dd if=/bin/grep of=${TMPDIR}/img bs=1k seek=1000000 + +rbd rm testimg || true + +rbd import $RBD_CREATE_ARGS ${TMPDIR}/img testimg +rbd export testimg ${TMPDIR}/img2 +rbd export testimg - > ${TMPDIR}/img3 +rbd rm testimg +cmp ${TMPDIR}/img ${TMPDIR}/img2 +cmp ${TMPDIR}/img ${TMPDIR}/img3 +rm ${TMPDIR}/img2 ${TMPDIR}/img3 + +# try again, importing from stdin +rbd import $RBD_CREATE_ARGS - testimg < ${TMPDIR}/img +rbd export testimg ${TMPDIR}/img2 +rbd export testimg - > ${TMPDIR}/img3 +rbd rm testimg +cmp ${TMPDIR}/img ${TMPDIR}/img2 +cmp ${TMPDIR}/img ${TMPDIR}/img3 + +rm ${TMPDIR}/img ${TMPDIR}/img2 ${TMPDIR}/img3 + +if rbd help export | grep -q export-format; then + # try with --export-format for snapshots + dd if=/bin/dd of=${TMPDIR}/img bs=1k count=10 seek=100 + rbd import $RBD_CREATE_ARGS ${TMPDIR}/img testimg + rbd snap create testimg@snap + rbd image-meta set testimg key1 value1 + IMAGEMETA_BEFORE=`rbd image-meta list testimg` + rbd export --export-format 2 testimg ${TMPDIR}/img_v2 + rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import + rbd info testimg_import + rbd info testimg_import@snap + IMAGEMETA_AFTER=`rbd image-meta list testimg_import` + [ "$IMAGEMETA_BEFORE" = "$IMAGEMETA_AFTER" ] + + # compare the contents between testimg and testimg_import + rbd export testimg_import ${TMPDIR}/img_import + compare_files_and_ondisk_sizes ${TMPDIR}/img ${TMPDIR}/img_import + + rbd export testimg@snap ${TMPDIR}/img_snap + rbd export testimg_import@snap ${TMPDIR}/img_snap_import + compare_files_and_ondisk_sizes ${TMPDIR}/img_snap ${TMPDIR}/img_snap_import + + rm ${TMPDIR}/img_v2 + rm ${TMPDIR}/img_import + rm ${TMPDIR}/img_snap + rm ${TMPDIR}/img_snap_import + + rbd snap rm testimg_import@snap + rbd remove testimg_import + rbd snap rm testimg@snap + rbd rm testimg + + # order + rbd import --order 20 ${TMPDIR}/img testimg + rbd export --export-format 2 testimg ${TMPDIR}/img_v2 + rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import + rbd info testimg_import|grep order|awk '{print $2}'|grep 20 + + rm ${TMPDIR}/img_v2 + + rbd remove testimg_import + rbd remove testimg + + # features + rbd import --image-feature layering ${TMPDIR}/img testimg + FEATURES_BEFORE=`rbd info testimg|grep features` + rbd export --export-format 2 testimg ${TMPDIR}/img_v2 + rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import + FEATURES_AFTER=`rbd info testimg_import|grep features` + if [ "$FEATURES_BEFORE" != "$FEATURES_AFTER" ]; then + false + fi + + rm ${TMPDIR}/img_v2 + + rbd remove testimg_import + rbd remove testimg + + # stripe + rbd import --stripe-count 1000 --stripe-unit 4096 ${TMPDIR}/img testimg + rbd export --export-format 2 testimg ${TMPDIR}/img_v2 + rbd import --export-format 2 ${TMPDIR}/img_v2 testimg_import + rbd info testimg_import|grep "stripe unit"|grep -Ei '(4 KiB|4096)' + rbd info testimg_import|grep "stripe count"|awk '{print $3}'|grep 1000 + + rm ${TMPDIR}/img_v2 + + rbd remove testimg_import + rbd remove testimg + + # snap protect + rbd import --image-format=2 ${TMPDIR}/img testimg + rbd snap create testimg@snap1 + rbd snap create testimg@snap2 + rbd snap protect testimg@snap2 + rbd export --export-format 2 testimg ${TMPDIR}/snap_protect + rbd import --export-format 2 ${TMPDIR}/snap_protect testimg_import + rbd info testimg_import@snap1 | grep 'protected: False' + rbd info testimg_import@snap2 | grep 'protected: True' + + rm ${TMPDIR}/snap_protect + + rbd snap unprotect testimg@snap2 + rbd snap unprotect testimg_import@snap2 + rbd snap purge testimg + rbd snap purge testimg_import + rbd remove testimg + rbd remove testimg_import +fi + +tiered=0 +if ceph osd dump | grep ^pool | grep "'rbd'" | grep tier; then + tiered=1 +fi + +# create specifically sparse files +# 1 1M block of sparse, 1 1M block of random +dd if=/dev/urandom bs=1M seek=1 count=1 of=${TMPDIR}/sparse1 + +# 1 1M block of random, 1 1M block of sparse +dd if=/dev/urandom bs=1M count=1 of=${TMPDIR}/sparse2; truncate ${TMPDIR}/sparse2 -s 2M + +# 1M-block images; validate resulting blocks + +# 1M sparse, 1M data +rbd rm sparse1 || true +rbd import $RBD_CREATE_ARGS --order 20 ${TMPDIR}/sparse1 +rbd ls -l | grep sparse1 | grep -Ei '(2 MiB|2048k)' +[ $tiered -eq 1 -o "$(objects sparse1)" = '1' ] + +# export, compare contents and on-disk size +rbd export sparse1 ${TMPDIR}/sparse1.out +compare_files_and_ondisk_sizes ${TMPDIR}/sparse1 ${TMPDIR}/sparse1.out +rm ${TMPDIR}/sparse1.out +rbd rm sparse1 + +# 1M data, 1M sparse +rbd rm sparse2 || true +rbd import $RBD_CREATE_ARGS --order 20 ${TMPDIR}/sparse2 +rbd ls -l | grep sparse2 | grep -Ei '(2 MiB|2048k)' +[ $tiered -eq 1 -o "$(objects sparse2)" = '0' ] +rbd export sparse2 ${TMPDIR}/sparse2.out +compare_files_and_ondisk_sizes ${TMPDIR}/sparse2 ${TMPDIR}/sparse2.out +rm ${TMPDIR}/sparse2.out +rbd rm sparse2 + +# extend sparse1 to 10 1M blocks, sparse at the end +truncate ${TMPDIR}/sparse1 -s 10M +# import from stdin just for fun, verify still sparse +rbd import $RBD_CREATE_ARGS --order 20 - sparse1 < ${TMPDIR}/sparse1 +rbd ls -l | grep sparse1 | grep -Ei '(10 MiB|10240k)' +[ $tiered -eq 1 -o "$(objects sparse1)" = '1' ] +rbd export sparse1 ${TMPDIR}/sparse1.out +compare_files_and_ondisk_sizes ${TMPDIR}/sparse1 ${TMPDIR}/sparse1.out +rm ${TMPDIR}/sparse1.out +rbd rm sparse1 + +# extend sparse2 to 4M total with two more nonsparse megs +dd if=/dev/urandom bs=2M count=1 of=${TMPDIR}/sparse2 oflag=append conv=notrunc +# again from stding +rbd import $RBD_CREATE_ARGS --order 20 - sparse2 < ${TMPDIR}/sparse2 +rbd ls -l | grep sparse2 | grep -Ei '(4 MiB|4096k)' +[ $tiered -eq 1 -o "$(objects sparse2)" = '0 2 3' ] +rbd export sparse2 ${TMPDIR}/sparse2.out +compare_files_and_ondisk_sizes ${TMPDIR}/sparse2 ${TMPDIR}/sparse2.out +rm ${TMPDIR}/sparse2.out +rbd rm sparse2 + +# zeros import to a sparse image. Note: all zeros currently +# doesn't work right now due to the way we handle 'empty' fiemaps; +# the image ends up zero-filled. + +echo "partially-sparse file imports to partially-sparse image" +rbd import $RBD_CREATE_ARGS --order 20 ${TMPDIR}/sparse1 sparse +[ $tiered -eq 1 -o "$(objects sparse)" = '1' ] +rbd rm sparse + +echo "zeros import through stdin to sparse image" +# stdin +dd if=/dev/zero bs=1M count=4 | rbd import $RBD_CREATE_ARGS - sparse +[ $tiered -eq 1 -o "$(objects sparse)" = '' ] +rbd rm sparse + +echo "zeros export to sparse file" +# Must be tricky to make image "by hand" ; import won't create a zero image +rbd create $RBD_CREATE_ARGS sparse --size 4 +prefix=$(rbd info sparse | grep block_name_prefix | awk '{print $NF;}') +# drop in 0 object directly +dd if=/dev/zero bs=4M count=1 | rados -p $(get_image_data_pool sparse) \ + put ${prefix}.000000000000 - +[ $tiered -eq 1 -o "$(objects sparse)" = '0' ] +# 1 object full of zeros; export should still create 0-disk-usage file +rm ${TMPDIR}/sparse || true +rbd export sparse ${TMPDIR}/sparse +[ $(stat ${TMPDIR}/sparse --format=%b) = '0' ] +rbd rm sparse + +rm ${TMPDIR}/sparse ${TMPDIR}/sparse1 ${TMPDIR}/sparse2 ${TMPDIR}/sparse3 || true + +echo OK diff --git a/qa/workunits/rbd/issue-20295.sh b/qa/workunits/rbd/issue-20295.sh new file mode 100755 index 000000000..3d617a066 --- /dev/null +++ b/qa/workunits/rbd/issue-20295.sh @@ -0,0 +1,18 @@ +#!/bin/sh -ex + +TEST_POOL=ecpool +TEST_IMAGE=test1 +PGS=12 + +ceph osd pool create $TEST_POOL $PGS $PGS erasure +ceph osd pool application enable $TEST_POOL rbd +ceph osd pool set $TEST_POOL allow_ec_overwrites true +rbd --data-pool $TEST_POOL create --size 1024G $TEST_IMAGE +rbd bench \ + --io-type write \ + --io-size 4096 \ + --io-pattern=rand \ + --io-total 100M \ + $TEST_IMAGE + +echo "OK" diff --git a/qa/workunits/rbd/journal.sh b/qa/workunits/rbd/journal.sh new file mode 100755 index 000000000..ba89e75c9 --- /dev/null +++ b/qa/workunits/rbd/journal.sh @@ -0,0 +1,326 @@ +#!/usr/bin/env bash +set -e + +. $(dirname $0)/../../standalone/ceph-helpers.sh + +function list_tests() +{ + echo "AVAILABLE TESTS" + for i in $TESTS; do + echo " $i" + done +} + +function usage() +{ + echo "usage: $0 [-h|-l|-t <testname> [-t <testname>...] [--no-cleanup]]" +} + +function expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +function save_commit_position() +{ + local journal=$1 + + rados -p rbd getomapval journal.${journal} client_ \ + $TMPDIR/${journal}.client_.omap +} + +function restore_commit_position() +{ + local journal=$1 + + rados -p rbd setomapval journal.${journal} client_ \ + < $TMPDIR/${journal}.client_.omap +} + +test_rbd_journal() +{ + local image=testrbdjournal$$ + + rbd create --image-feature exclusive-lock --image-feature journaling \ + --size 128 ${image} + local journal=$(rbd info ${image} --format=xml 2>/dev/null | + $XMLSTARLET sel -t -v "//image/journal") + test -n "${journal}" + rbd journal info ${journal} + rbd journal info --journal ${journal} + rbd journal info --image ${image} + + rbd feature disable ${image} journaling + + rbd info ${image} --format=xml 2>/dev/null | + expect_false $XMLSTARLET sel -t -v "//image/journal" + expect_false rbd journal info ${journal} + expect_false rbd journal info --image ${image} + + rbd feature enable ${image} journaling + + local journal1=$(rbd info ${image} --format=xml 2>/dev/null | + $XMLSTARLET sel -t -v "//image/journal") + test "${journal}" = "${journal1}" + + rbd journal info ${journal} + + rbd journal status ${journal} + + local count=10 + save_commit_position ${journal} + rbd bench --io-type write ${image} --io-size 4096 --io-threads 1 \ + --io-total $((4096 * count)) --io-pattern seq + rbd journal status --image ${image} | fgrep "tid=$((count - 1))" + restore_commit_position ${journal} + rbd journal status --image ${image} | fgrep "positions=[]" + local count1=$(rbd journal inspect --verbose ${journal} | + grep -c 'event_type.*AioWrite') + test "${count}" -eq "${count1}" + + rbd journal export ${journal} $TMPDIR/journal.export + local size=$(stat -c "%s" $TMPDIR/journal.export) + test "${size}" -gt 0 + + rbd export ${image} $TMPDIR/${image}.export + + local image1=${image}1 + rbd create --image-feature exclusive-lock --image-feature journaling \ + --size 128 ${image1} + journal1=$(rbd info ${image1} --format=xml 2>/dev/null | + $XMLSTARLET sel -t -v "//image/journal") + + save_commit_position ${journal1} + rbd journal import --dest ${image1} $TMPDIR/journal.export + rbd snap create ${image1}@test + restore_commit_position ${journal1} + # check that commit position is properly updated: the journal should contain + # 14 entries (2 AioFlush + 10 AioWrite + 1 SnapCreate + 1 OpFinish) and + # commit position set to tid=14 + rbd journal inspect --image ${image1} --verbose | awk ' + /AioFlush/ {a++} # match: "event_type": "AioFlush", + /AioWrite/ {w++} # match: "event_type": "AioWrite", + /SnapCreate/ {s++} # match: "event_type": "SnapCreate", + /OpFinish/ {f++} # match: "event_type": "OpFinish", + /entries inspected/ {t=$1; e=$4} # match: 14 entries inspected, 0 errors + {print} # for diagnostic + END { + if (a != 2 || w != 10 || s != 1 || f != 1 || t != 14 || e != 0) exit(1) + } + ' + + rbd export ${image1}@test $TMPDIR/${image1}.export + cmp $TMPDIR/${image}.export $TMPDIR/${image1}.export + + rbd journal reset ${journal} + + rbd journal inspect --verbose ${journal} | expect_false grep 'event_type' + + rbd snap purge ${image1} + rbd remove ${image1} + rbd remove ${image} +} + + +rbd_assert_eq() { + local image=$1 + local cmd=$2 + local param=$3 + local expected_val=$4 + + local val=$(rbd --format xml ${cmd} --image ${image} | + $XMLSTARLET sel -t -v "${param}") + test "${val}" = "${expected_val}" +} + +test_rbd_create() +{ + local image=testrbdcreate$$ + + rbd create --image-feature exclusive-lock --image-feature journaling \ + --journal-pool rbd \ + --journal-object-size 20M \ + --journal-splay-width 6 \ + --size 256 ${image} + + rbd_assert_eq ${image} 'journal info' '//journal/order' 25 + rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6 + rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd + + rbd remove ${image} +} + +test_rbd_copy() +{ + local src=testrbdcopys$$ + rbd create --size 256 ${src} + + local image=testrbdcopy$$ + rbd copy --image-feature exclusive-lock --image-feature journaling \ + --journal-pool rbd \ + --journal-object-size 20M \ + --journal-splay-width 6 \ + ${src} ${image} + + rbd remove ${src} + + rbd_assert_eq ${image} 'journal info' '//journal/order' 25 + rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6 + rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd + + rbd remove ${image} +} + +test_rbd_deep_copy() +{ + local src=testrbdcopys$$ + rbd create --size 256 ${src} + rbd snap create ${src}@snap1 + + local dest=testrbdcopy$$ + rbd deep copy --image-feature exclusive-lock --image-feature journaling \ + --journal-pool rbd \ + --journal-object-size 20M \ + --journal-splay-width 6 \ + ${src} ${dest} + + rbd snap purge ${src} + rbd remove ${src} + + rbd_assert_eq ${dest} 'journal info' '//journal/order' 25 + rbd_assert_eq ${dest} 'journal info' '//journal/splay_width' 6 + rbd_assert_eq ${dest} 'journal info' '//journal/object_pool' rbd + + rbd snap purge ${dest} + rbd remove ${dest} +} + +test_rbd_clone() +{ + local parent=testrbdclonep$$ + rbd create --image-feature layering --size 256 ${parent} + rbd snap create ${parent}@snap + rbd snap protect ${parent}@snap + + local image=testrbdclone$$ + rbd clone --image-feature layering --image-feature exclusive-lock --image-feature journaling \ + --journal-pool rbd \ + --journal-object-size 20M \ + --journal-splay-width 6 \ + ${parent}@snap ${image} + + rbd_assert_eq ${image} 'journal info' '//journal/order' 25 + rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6 + rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd + + rbd remove ${image} + rbd snap unprotect ${parent}@snap + rbd snap purge ${parent} + rbd remove ${parent} +} + +test_rbd_import() +{ + local src=testrbdimports$$ + rbd create --size 256 ${src} + + rbd export ${src} $TMPDIR/${src}.export + rbd remove ${src} + + local image=testrbdimport$$ + rbd import --image-feature exclusive-lock --image-feature journaling \ + --journal-pool rbd \ + --journal-object-size 20M \ + --journal-splay-width 6 \ + $TMPDIR/${src}.export ${image} + + rbd_assert_eq ${image} 'journal info' '//journal/order' 25 + rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6 + rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd + + rbd remove ${image} +} + +test_rbd_feature() +{ + local image=testrbdfeature$$ + + rbd create --image-feature exclusive-lock --size 256 ${image} + + rbd feature enable ${image} journaling \ + --journal-pool rbd \ + --journal-object-size 20M \ + --journal-splay-width 6 + + rbd_assert_eq ${image} 'journal info' '//journal/order' 25 + rbd_assert_eq ${image} 'journal info' '//journal/splay_width' 6 + rbd_assert_eq ${image} 'journal info' '//journal/object_pool' rbd + + rbd remove ${image} +} + +TESTS+=" rbd_journal" +TESTS+=" rbd_create" +TESTS+=" rbd_copy" +TESTS+=" rbd_clone" +TESTS+=" rbd_import" +TESTS+=" rbd_feature" + +# +# "main" follows +# + +tests_to_run=() + +cleanup=true + +while [[ $# -gt 0 ]]; do + opt=$1 + + case "$opt" in + "-l" ) + do_list=1 + ;; + "--no-cleanup" ) + cleanup=false + ;; + "-t" ) + shift + if [[ -z "$1" ]]; then + echo "missing argument to '-t'" + usage ; + exit 1 + fi + tests_to_run+=" $1" + ;; + "-h" ) + usage ; + exit 0 + ;; + esac + shift +done + +if [[ $do_list -eq 1 ]]; then + list_tests ; + exit 0 +fi + +TMPDIR=/tmp/rbd_journal$$ +mkdir $TMPDIR +if $cleanup; then + trap "rm -fr $TMPDIR" 0 +fi + +if test -z "$tests_to_run" ; then + tests_to_run="$TESTS" +fi + +for i in $tests_to_run; do + set -x + test_${i} + set +x +done + +echo OK diff --git a/qa/workunits/rbd/kernel.sh b/qa/workunits/rbd/kernel.sh new file mode 100755 index 000000000..faa5760ee --- /dev/null +++ b/qa/workunits/rbd/kernel.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +set -ex + +CEPH_SECRET_FILE=${CEPH_SECRET_FILE:-} +CEPH_ID=${CEPH_ID:-admin} +SECRET_ARGS='' +if [ ! -z $CEPH_SECRET_FILE ]; then + SECRET_ARGS="--secret $CEPH_SECRET_FILE" +fi + +TMP_FILES="/tmp/img1 /tmp/img1.small /tmp/img1.snap1 /tmp/img1.export /tmp/img1.trunc" + +function expect_false() { + if "$@"; then return 1; else return 0; fi +} + +function get_device_dir { + local POOL=$1 + local IMAGE=$2 + local SNAP=$3 + rbd device list | tail -n +2 | egrep "\s+$POOL\s+$IMAGE\s+$SNAP\s+" | + awk '{print $1;}' +} + +function clean_up { + [ -e /dev/rbd/rbd/testimg1@snap1 ] && + sudo rbd device unmap /dev/rbd/rbd/testimg1@snap1 + if [ -e /dev/rbd/rbd/testimg1 ]; then + sudo rbd device unmap /dev/rbd/rbd/testimg1 + rbd snap purge testimg1 || true + fi + rbd ls | grep testimg1 > /dev/null && rbd rm testimg1 || true + sudo rm -f $TMP_FILES +} + +clean_up + +trap clean_up INT TERM EXIT + +# create an image +dd if=/bin/sh of=/tmp/img1 bs=1k count=1 seek=10 +dd if=/bin/dd of=/tmp/img1 bs=1k count=10 seek=100 +dd if=/bin/rm of=/tmp/img1 bs=1k count=100 seek=1000 +dd if=/bin/ls of=/tmp/img1 bs=1k seek=10000 +dd if=/bin/ln of=/tmp/img1 bs=1k seek=100000 +dd if=/dev/zero of=/tmp/img1 count=0 seek=150000 + +# import +rbd import /tmp/img1 testimg1 +sudo rbd device map testimg1 --user $CEPH_ID $SECRET_ARGS + +DEV_ID1=$(get_device_dir rbd testimg1 -) +echo "dev_id1 = $DEV_ID1" +cat /sys/bus/rbd/devices/$DEV_ID1/size +cat /sys/bus/rbd/devices/$DEV_ID1/size | grep 76800000 + +sudo dd if=/dev/rbd/rbd/testimg1 of=/tmp/img1.export +cmp /tmp/img1 /tmp/img1.export + +# snapshot +rbd snap create testimg1 --snap=snap1 +sudo rbd device map --snap=snap1 testimg1 --user $CEPH_ID $SECRET_ARGS + +DEV_ID2=$(get_device_dir rbd testimg1 snap1) +cat /sys/bus/rbd/devices/$DEV_ID2/size | grep 76800000 + +sudo dd if=/dev/rbd/rbd/testimg1@snap1 of=/tmp/img1.snap1 +cmp /tmp/img1 /tmp/img1.snap1 + +# resize +rbd resize testimg1 --size=40 --allow-shrink +cat /sys/bus/rbd/devices/$DEV_ID1/size | grep 41943040 +cat /sys/bus/rbd/devices/$DEV_ID2/size | grep 76800000 + +sudo dd if=/dev/rbd/rbd/testimg1 of=/tmp/img1.small +cp /tmp/img1 /tmp/img1.trunc +truncate -s 41943040 /tmp/img1.trunc +cmp /tmp/img1.trunc /tmp/img1.small + +# rollback expects an unlocked image +# (acquire and) release the lock as a side effect +rbd bench --io-type read --io-size 1 --io-threads 1 --io-total 1 testimg1 + +# rollback and check data again +rbd snap rollback --snap=snap1 testimg1 +cat /sys/bus/rbd/devices/$DEV_ID1/size | grep 76800000 +cat /sys/bus/rbd/devices/$DEV_ID2/size | grep 76800000 +sudo rm -f /tmp/img1.snap1 /tmp/img1.export + +sudo dd if=/dev/rbd/rbd/testimg1@snap1 of=/tmp/img1.snap1 +cmp /tmp/img1 /tmp/img1.snap1 +sudo dd if=/dev/rbd/rbd/testimg1 of=/tmp/img1.export +cmp /tmp/img1 /tmp/img1.export + +# zeros are returned if an image or a snapshot is removed +expect_false cmp -n 76800000 /dev/rbd/rbd/testimg1@snap1 /dev/zero +rbd snap rm --snap=snap1 testimg1 +cmp -n 76800000 /dev/rbd/rbd/testimg1@snap1 /dev/zero + +echo OK diff --git a/qa/workunits/rbd/krbd_data_pool.sh b/qa/workunits/rbd/krbd_data_pool.sh new file mode 100755 index 000000000..8eada88bb --- /dev/null +++ b/qa/workunits/rbd/krbd_data_pool.sh @@ -0,0 +1,206 @@ +#!/usr/bin/env bash + +set -ex + +export RBD_FORCE_ALLOW_V1=1 + +function fill_image() { + local spec=$1 + + local dev + dev=$(sudo rbd map $spec) + xfs_io -c "pwrite -b $OBJECT_SIZE -S 0x78 -W 0 $IMAGE_SIZE" $dev + sudo rbd unmap $dev +} + +function create_clones() { + local spec=$1 + + rbd snap create $spec@snap + rbd snap protect $spec@snap + + local pool=${spec%/*} # pool/image is assumed + local image=${spec#*/} + local child_pool + for child_pool in $pool clonesonly; do + rbd clone $spec@snap $child_pool/$pool-$image-clone1 + rbd clone $spec@snap --data-pool repdata $child_pool/$pool-$image-clone2 + rbd clone $spec@snap --data-pool ecdata $child_pool/$pool-$image-clone3 + done +} + +function trigger_copyup() { + local spec=$1 + + local dev + dev=$(sudo rbd map $spec) + local i + { + for ((i = 0; i < $NUM_OBJECTS; i++)); do + echo pwrite -b $OBJECT_SIZE -S 0x59 $((i * OBJECT_SIZE + OBJECT_SIZE / 2)) $((OBJECT_SIZE / 2)) + done + echo fsync + echo quit + } | xfs_io $dev + sudo rbd unmap $dev +} + +function compare() { + local spec=$1 + local object=$2 + + local dev + dev=$(sudo rbd map $spec) + local i + for ((i = 0; i < $NUM_OBJECTS; i++)); do + dd if=$dev bs=$OBJECT_SIZE count=1 skip=$i | cmp $object - + done + sudo rbd unmap $dev +} + +function mkfs_and_mount() { + local spec=$1 + + local dev + dev=$(sudo rbd map $spec) + blkdiscard $dev + mkfs.ext4 -q -E nodiscard $dev + sudo mount $dev /mnt + sudo umount /mnt + sudo rbd unmap $dev +} + +function list_HEADs() { + local pool=$1 + + rados -p $pool ls | while read obj; do + if rados -p $pool stat $obj >/dev/null 2>&1; then + echo $obj + fi + done +} + +function count_data_objects() { + local spec=$1 + + local pool + pool=$(rbd info $spec | grep 'data_pool: ' | awk '{ print $NF }') + if [[ -z $pool ]]; then + pool=${spec%/*} # pool/image is assumed + fi + + local prefix + prefix=$(rbd info $spec | grep 'block_name_prefix: ' | awk '{ print $NF }') + rados -p $pool ls | grep -c $prefix +} + +function get_num_clones() { + local pool=$1 + + rados -p $pool --format=json df | + python3 -c 'import sys, json; print(json.load(sys.stdin)["pools"][0]["num_object_clones"])' +} + +ceph osd pool create repdata 24 24 +rbd pool init repdata +ceph osd erasure-code-profile set teuthologyprofile crush-failure-domain=osd m=1 k=2 +ceph osd pool create ecdata 24 24 erasure teuthologyprofile +rbd pool init ecdata +ceph osd pool set ecdata allow_ec_overwrites true +ceph osd pool create rbdnonzero 24 24 +rbd pool init rbdnonzero +ceph osd pool create clonesonly 24 24 +rbd pool init clonesonly + +for pool in rbd rbdnonzero; do + rbd create --size 200 --image-format 1 $pool/img0 + rbd create --size 200 $pool/img1 + rbd create --size 200 --data-pool repdata $pool/img2 + rbd create --size 200 --data-pool ecdata $pool/img3 +done + +IMAGE_SIZE=$(rbd info --format=json img1 | python3 -c 'import sys, json; print(json.load(sys.stdin)["size"])') +OBJECT_SIZE=$(rbd info --format=json img1 | python3 -c 'import sys, json; print(json.load(sys.stdin)["object_size"])') +NUM_OBJECTS=$((IMAGE_SIZE / OBJECT_SIZE)) +[[ $((IMAGE_SIZE % OBJECT_SIZE)) -eq 0 ]] + +OBJECT_X=$(mktemp) # xxxx +xfs_io -c "pwrite -b $OBJECT_SIZE -S 0x78 0 $OBJECT_SIZE" $OBJECT_X + +OBJECT_XY=$(mktemp) # xxYY +xfs_io -c "pwrite -b $OBJECT_SIZE -S 0x78 0 $((OBJECT_SIZE / 2))" \ + -c "pwrite -b $OBJECT_SIZE -S 0x59 $((OBJECT_SIZE / 2)) $((OBJECT_SIZE / 2))" \ + $OBJECT_XY + +for pool in rbd rbdnonzero; do + for i in {0..3}; do + fill_image $pool/img$i + if [[ $i -ne 0 ]]; then + create_clones $pool/img$i + for child_pool in $pool clonesonly; do + for j in {1..3}; do + trigger_copyup $child_pool/$pool-img$i-clone$j + done + done + fi + done +done + +# rbd_directory, rbd_children, rbd_info + img0 header + ... +NUM_META_RBDS=$((3 + 1 + 3 * (1*2 + 3*2))) +# rbd_directory, rbd_children, rbd_info + ... +NUM_META_CLONESONLY=$((3 + 2 * 3 * (3*2))) + +[[ $(rados -p rbd ls | wc -l) -eq $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]] +[[ $(rados -p repdata ls | wc -l) -eq $((1 + 14 * NUM_OBJECTS)) ]] +[[ $(rados -p ecdata ls | wc -l) -eq $((1 + 14 * NUM_OBJECTS)) ]] +[[ $(rados -p rbdnonzero ls | wc -l) -eq $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]] +[[ $(rados -p clonesonly ls | wc -l) -eq $((NUM_META_CLONESONLY + 6 * NUM_OBJECTS)) ]] + +for pool in rbd rbdnonzero; do + for i in {0..3}; do + [[ $(count_data_objects $pool/img$i) -eq $NUM_OBJECTS ]] + if [[ $i -ne 0 ]]; then + for child_pool in $pool clonesonly; do + for j in {1..3}; do + [[ $(count_data_objects $child_pool/$pool-img$i-clone$j) -eq $NUM_OBJECTS ]] + done + done + fi + done +done + +[[ $(get_num_clones rbd) -eq 0 ]] +[[ $(get_num_clones repdata) -eq 0 ]] +[[ $(get_num_clones ecdata) -eq 0 ]] +[[ $(get_num_clones rbdnonzero) -eq 0 ]] +[[ $(get_num_clones clonesonly) -eq 0 ]] + +for pool in rbd rbdnonzero; do + for i in {0..3}; do + compare $pool/img$i $OBJECT_X + mkfs_and_mount $pool/img$i + if [[ $i -ne 0 ]]; then + for child_pool in $pool clonesonly; do + for j in {1..3}; do + compare $child_pool/$pool-img$i-clone$j $OBJECT_XY + done + done + fi + done +done + +# mkfs_and_mount should discard some objects everywhere but in clonesonly +[[ $(list_HEADs rbd | wc -l) -lt $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]] +[[ $(list_HEADs repdata | wc -l) -lt $((1 + 14 * NUM_OBJECTS)) ]] +[[ $(list_HEADs ecdata | wc -l) -lt $((1 + 14 * NUM_OBJECTS)) ]] +[[ $(list_HEADs rbdnonzero | wc -l) -lt $((NUM_META_RBDS + 5 * NUM_OBJECTS)) ]] +[[ $(list_HEADs clonesonly | wc -l) -eq $((NUM_META_CLONESONLY + 6 * NUM_OBJECTS)) ]] + +[[ $(get_num_clones rbd) -eq $NUM_OBJECTS ]] +[[ $(get_num_clones repdata) -eq $((2 * NUM_OBJECTS)) ]] +[[ $(get_num_clones ecdata) -eq $((2 * NUM_OBJECTS)) ]] +[[ $(get_num_clones rbdnonzero) -eq $NUM_OBJECTS ]] +[[ $(get_num_clones clonesonly) -eq 0 ]] + +echo OK diff --git a/qa/workunits/rbd/krbd_exclusive_option.sh b/qa/workunits/rbd/krbd_exclusive_option.sh new file mode 100755 index 000000000..f8493ce98 --- /dev/null +++ b/qa/workunits/rbd/krbd_exclusive_option.sh @@ -0,0 +1,233 @@ +#!/usr/bin/env bash + +set -ex + +function expect_false() { + if "$@"; then return 1; else return 0; fi +} + +function assert_locked() { + local dev_id="${1#/dev/rbd}" + + local client_addr + client_addr="$(< $SYSFS_DIR/$dev_id/client_addr)" + + local client_id + client_id="$(< $SYSFS_DIR/$dev_id/client_id)" + # client4324 -> client.4324 + client_id="client.${client_id#client}" + + local watch_cookie + watch_cookie="$(rados -p rbd listwatchers rbd_header.$IMAGE_ID | + grep $client_id | cut -d ' ' -f 3 | cut -d '=' -f 2)" + [[ $(echo -n "$watch_cookie" | grep -c '^') -eq 1 ]] + + local actual + actual="$(rados -p rbd --format=json lock info rbd_header.$IMAGE_ID rbd_lock | + python3 -m json.tool --sort-keys)" + + local expected + expected="$(cat <<EOF | python3 -m json.tool --sort-keys +{ + "lockers": [ + { + "addr": "$client_addr", + "cookie": "auto $watch_cookie", + "description": "", + "expiration": "0.000000", + "name": "$client_id" + } + ], + "name": "rbd_lock", + "tag": "internal", + "type": "exclusive" +} +EOF + )" + + [ "$actual" = "$expected" ] +} + +function assert_unlocked() { + rados -p rbd --format=json lock info rbd_header.$IMAGE_ID rbd_lock | + grep '"lockers":\[\]' +} + +function blocklist_add() { + local dev_id="${1#/dev/rbd}" + + local client_addr + client_addr="$(< $SYSFS_DIR/$dev_id/client_addr)" + + ceph osd blocklist add $client_addr +} + +SYSFS_DIR="/sys/bus/rbd/devices" +IMAGE_NAME="exclusive-option-test" + +rbd create --size 1 --image-feature '' $IMAGE_NAME + +IMAGE_ID="$(rbd info --format=json $IMAGE_NAME | + python3 -c "import sys, json; print(json.load(sys.stdin)['block_name_prefix'].split('.')[1])")" + +DEV=$(sudo rbd map $IMAGE_NAME) +assert_unlocked +sudo rbd unmap $DEV +assert_unlocked + +expect_false sudo rbd map -o exclusive $IMAGE_NAME +assert_unlocked + +expect_false sudo rbd map -o lock_on_read $IMAGE_NAME +assert_unlocked + +rbd feature enable $IMAGE_NAME exclusive-lock +rbd snap create $IMAGE_NAME@snap + +DEV=$(sudo rbd map $IMAGE_NAME) +assert_locked $DEV +[[ $(blockdev --getro $DEV) -eq 0 ]] +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map $IMAGE_NAME@snap) +assert_unlocked +[[ $(blockdev --getro $DEV) -eq 1 ]] +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map -o ro $IMAGE_NAME) +assert_unlocked +[[ $(blockdev --getro $DEV) -eq 1 ]] +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map -o exclusive $IMAGE_NAME) +assert_locked $DEV +[[ $(blockdev --getro $DEV) -eq 0 ]] +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map -o exclusive $IMAGE_NAME@snap) +assert_unlocked +[[ $(blockdev --getro $DEV) -eq 1 ]] +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map -o exclusive,ro $IMAGE_NAME) +assert_unlocked +[[ $(blockdev --getro $DEV) -eq 1 ]] +sudo rbd unmap $DEV +assert_unlocked + +# alternate syntax +DEV=$(sudo rbd map --exclusive --read-only $IMAGE_NAME) +assert_unlocked +[[ $(blockdev --getro $DEV) -eq 1 ]] +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map $IMAGE_NAME) +assert_locked $DEV +OTHER_DEV=$(sudo rbd map -o noshare $IMAGE_NAME) +assert_locked $OTHER_DEV +dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct +assert_locked $DEV +dd if=/dev/urandom of=$OTHER_DEV bs=4k count=10 oflag=direct +assert_locked $OTHER_DEV +sudo rbd unmap $DEV +sudo rbd unmap $OTHER_DEV +assert_unlocked + +DEV=$(sudo rbd map $IMAGE_NAME) +assert_locked $DEV +OTHER_DEV=$(sudo rbd map -o noshare,exclusive $IMAGE_NAME) +assert_locked $OTHER_DEV +dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct +expect_false dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct +assert_locked $OTHER_DEV +sudo rbd unmap $OTHER_DEV +assert_unlocked +dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct +assert_unlocked +dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct +assert_locked $DEV +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map -o lock_on_read $IMAGE_NAME) +assert_locked $DEV +OTHER_DEV=$(sudo rbd map -o noshare,exclusive $IMAGE_NAME) +assert_locked $OTHER_DEV +expect_false dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct +expect_false dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct +sudo udevadm settle +assert_locked $OTHER_DEV +sudo rbd unmap $OTHER_DEV +assert_unlocked +dd if=$DEV of=/dev/null bs=4k count=10 iflag=direct +assert_locked $DEV +dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct +assert_locked $DEV +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map -o exclusive $IMAGE_NAME) +assert_locked $DEV +expect_false sudo rbd map -o noshare $IMAGE_NAME +assert_locked $DEV +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map -o exclusive $IMAGE_NAME) +assert_locked $DEV +expect_false sudo rbd map -o noshare,exclusive $IMAGE_NAME +assert_locked $DEV +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map $IMAGE_NAME) +assert_locked $DEV +rbd resize --size 1G $IMAGE_NAME +assert_unlocked +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map -o exclusive $IMAGE_NAME) +assert_locked $DEV +expect_false rbd resize --size 2G $IMAGE_NAME +assert_locked $DEV +sudo rbd unmap $DEV +assert_unlocked + +DEV=$(sudo rbd map $IMAGE_NAME) +assert_locked $DEV +dd if=/dev/urandom of=$DEV bs=4k count=10 oflag=direct +{ sleep 10; blocklist_add $DEV; } & +PID=$! +expect_false dd if=/dev/urandom of=$DEV bs=4k count=200000 oflag=direct +wait $PID +# break lock +OTHER_DEV=$(sudo rbd map -o noshare $IMAGE_NAME) +assert_locked $OTHER_DEV +sudo rbd unmap $DEV +assert_locked $OTHER_DEV +sudo rbd unmap $OTHER_DEV +assert_unlocked + +# induce a watch error after 30 seconds +DEV=$(sudo rbd map -o exclusive,osdkeepalive=60 $IMAGE_NAME) +assert_locked $DEV +OLD_WATCHER="$(rados -p rbd listwatchers rbd_header.$IMAGE_ID)" +sleep 40 +assert_locked $DEV +NEW_WATCHER="$(rados -p rbd listwatchers rbd_header.$IMAGE_ID)" +# same client_id, old cookie < new cookie +[ "$(echo "$OLD_WATCHER" | cut -d ' ' -f 2)" = \ + "$(echo "$NEW_WATCHER" | cut -d ' ' -f 2)" ] +[[ $(echo "$OLD_WATCHER" | cut -d ' ' -f 3 | cut -d '=' -f 2) -lt \ + $(echo "$NEW_WATCHER" | cut -d ' ' -f 3 | cut -d '=' -f 2) ]] +sudo rbd unmap $DEV +assert_unlocked + +echo OK diff --git a/qa/workunits/rbd/krbd_fallocate.sh b/qa/workunits/rbd/krbd_fallocate.sh new file mode 100755 index 000000000..79efa1a8b --- /dev/null +++ b/qa/workunits/rbd/krbd_fallocate.sh @@ -0,0 +1,151 @@ +#!/usr/bin/env bash + +# - fallocate -z deallocates because BLKDEV_ZERO_NOUNMAP hint is ignored by +# krbd +# +# - big unaligned blkdiscard and fallocate -z/-p leave the objects in place + +set -ex + +# no blkdiscard(8) in trusty +function py_blkdiscard() { + local offset=$1 + + python3 <<EOF +import fcntl, struct +BLKDISCARD = 0x1277 +with open('$DEV', 'w') as dev: + fcntl.ioctl(dev, BLKDISCARD, struct.pack('QQ', $offset, $IMAGE_SIZE - $offset)) +EOF +} + +# fallocate(1) in trusty doesn't support -z/-p +function py_fallocate() { + local mode=$1 + local offset=$2 + + python3 <<EOF +import os, ctypes, ctypes.util +FALLOC_FL_KEEP_SIZE = 0x01 +FALLOC_FL_PUNCH_HOLE = 0x02 +FALLOC_FL_ZERO_RANGE = 0x10 +libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True) +with open('$DEV', 'w') as dev: + if libc.fallocate(dev.fileno(), ctypes.c_int($mode), ctypes.c_long($offset), ctypes.c_long($IMAGE_SIZE - $offset)): + err = ctypes.get_errno() + raise OSError(err, os.strerror(err)) +EOF +} + +function allocate() { + xfs_io -c "pwrite -b $OBJECT_SIZE -W 0 $IMAGE_SIZE" $DEV + assert_allocated +} + +function assert_allocated() { + cmp <(od -xAx $DEV) - <<EOF +000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd +* +$(printf %x $IMAGE_SIZE) +EOF + [[ $(rados -p rbd ls | grep -c rbd_data.$IMAGE_ID) -eq $NUM_OBJECTS ]] +} + +function assert_zeroes() { + local num_objects_expected=$1 + + cmp <(od -xAx $DEV) - <<EOF +000000 0000 0000 0000 0000 0000 0000 0000 0000 +* +$(printf %x $IMAGE_SIZE) +EOF + [[ $(rados -p rbd ls | grep -c rbd_data.$IMAGE_ID) -eq $num_objects_expected ]] +} + +function assert_zeroes_unaligned() { + local num_objects_expected=$1 + + cmp <(od -xAx $DEV) - <<EOF +000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd +* +$(printf %x $((OBJECT_SIZE / 2))) 0000 0000 0000 0000 0000 0000 0000 0000 +* +$(printf %x $IMAGE_SIZE) +EOF + [[ $(rados -p rbd ls | grep -c rbd_data.$IMAGE_ID) -eq $num_objects_expected ]] + for ((i = 0; i < $num_objects_expected; i++)); do + rados -p rbd stat rbd_data.$IMAGE_ID.$(printf %016x $i) | egrep "(size $((OBJECT_SIZE / 2)))|(size 0)" + done +} + +IMAGE_NAME="fallocate-test" + +rbd create --size 200 $IMAGE_NAME + +IMAGE_SIZE=$(rbd info --format=json $IMAGE_NAME | python3 -c 'import sys, json; print(json.load(sys.stdin)["size"])') +OBJECT_SIZE=$(rbd info --format=json $IMAGE_NAME | python3 -c 'import sys, json; print(json.load(sys.stdin)["object_size"])') +NUM_OBJECTS=$((IMAGE_SIZE / OBJECT_SIZE)) +[[ $((IMAGE_SIZE % OBJECT_SIZE)) -eq 0 ]] + +IMAGE_ID="$(rbd info --format=json $IMAGE_NAME | + python3 -c "import sys, json; print(json.load(sys.stdin)['block_name_prefix'].split('.')[1])")" + +DEV=$(sudo rbd map $IMAGE_NAME) + +# make sure -ENOENT is hidden +assert_zeroes 0 +py_blkdiscard 0 +assert_zeroes 0 + +# blkdev_issue_discard +allocate +py_blkdiscard 0 +assert_zeroes 0 + +# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOUNMAP +allocate +py_fallocate FALLOC_FL_ZERO_RANGE\|FALLOC_FL_KEEP_SIZE 0 +assert_zeroes 0 + +# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOFALLBACK +allocate +py_fallocate FALLOC_FL_PUNCH_HOLE\|FALLOC_FL_KEEP_SIZE 0 +assert_zeroes 0 + +# unaligned blkdev_issue_discard +allocate +py_blkdiscard $((OBJECT_SIZE / 2)) +assert_zeroes_unaligned $NUM_OBJECTS + +# unaligned blkdev_issue_zeroout w/ BLKDEV_ZERO_NOUNMAP +allocate +py_fallocate FALLOC_FL_ZERO_RANGE\|FALLOC_FL_KEEP_SIZE $((OBJECT_SIZE / 2)) +assert_zeroes_unaligned $NUM_OBJECTS + +# unaligned blkdev_issue_zeroout w/ BLKDEV_ZERO_NOFALLBACK +allocate +py_fallocate FALLOC_FL_PUNCH_HOLE\|FALLOC_FL_KEEP_SIZE $((OBJECT_SIZE / 2)) +assert_zeroes_unaligned $NUM_OBJECTS + +sudo rbd unmap $DEV + +DEV=$(sudo rbd map -o notrim $IMAGE_NAME) + +# blkdev_issue_discard +allocate +py_blkdiscard 0 |& grep 'Operation not supported' +assert_allocated + +# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOUNMAP +allocate +py_fallocate FALLOC_FL_ZERO_RANGE\|FALLOC_FL_KEEP_SIZE 0 +assert_zeroes $NUM_OBJECTS + +# blkdev_issue_zeroout w/ BLKDEV_ZERO_NOFALLBACK +allocate +py_fallocate FALLOC_FL_PUNCH_HOLE\|FALLOC_FL_KEEP_SIZE 0 |& grep 'Operation not supported' +assert_allocated + +sudo rbd unmap $DEV + +echo OK diff --git a/qa/workunits/rbd/krbd_huge_osdmap.sh b/qa/workunits/rbd/krbd_huge_osdmap.sh new file mode 100755 index 000000000..0a550d674 --- /dev/null +++ b/qa/workunits/rbd/krbd_huge_osdmap.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# This is a test for https://tracker.ceph.com/issues/40481. +# +# An osdmap with 60000 slots encodes to ~16M, of which the ignored portion +# is ~13M. However in-memory osdmap is larger than ~3M: in-memory osd_addr +# array for 60000 OSDs is ~8M because of sockaddr_storage. +# +# Set mon_max_osd = 60000 in ceph.conf. + +set -ex + +function expect_false() { + if "$@"; then return 1; else return 0; fi +} + +function run_test() { + local dev + + # initially tiny, grow via incrementals + dev=$(sudo rbd map img) + for max in 8 60 600 6000 60000; do + ceph osd setmaxosd $max + expect_false sudo rbd map wait_for/latest_osdmap + xfs_io -c 'pwrite -w 0 12M' $DEV + done + ceph osd getcrushmap -o /dev/stdout | ceph osd setcrushmap -i /dev/stdin + expect_false sudo rbd map wait_for/latest_osdmap + xfs_io -c 'pwrite -w 0 12M' $DEV + sudo rbd unmap $dev + + # initially huge, shrink via incrementals + dev=$(sudo rbd map img) + for max in 60000 6000 600 60 8; do + ceph osd setmaxosd $max + expect_false sudo rbd map wait_for/latest_osdmap + xfs_io -c 'pwrite -w 0 12M' $DEV + done + ceph osd getcrushmap -o /dev/stdout | ceph osd setcrushmap -i /dev/stdin + expect_false sudo rbd map wait_for/latest_osdmap + xfs_io -c 'pwrite -w 0 12M' $DEV + sudo rbd unmap $dev +} + +rbd create --size 12M img +run_test +# repeat with primary affinity (adds an extra array) +ceph osd primary-affinity osd.0 0.5 +run_test + +echo OK diff --git a/qa/workunits/rbd/krbd_latest_osdmap_on_map.sh b/qa/workunits/rbd/krbd_latest_osdmap_on_map.sh new file mode 100755 index 000000000..f70f38639 --- /dev/null +++ b/qa/workunits/rbd/krbd_latest_osdmap_on_map.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -ex + +function run_test() { + ceph osd pool create foo 12 + rbd pool init foo + rbd create --size 1 foo/img + + local dev + dev=$(sudo rbd map foo/img) + sudo rbd unmap $dev + + ceph osd pool delete foo foo --yes-i-really-really-mean-it +} + +NUM_ITER=20 + +for ((i = 0; i < $NUM_ITER; i++)); do + run_test +done + +rbd create --size 1 img +DEV=$(sudo rbd map img) +for ((i = 0; i < $NUM_ITER; i++)); do + run_test +done +sudo rbd unmap $DEV + +echo OK diff --git a/qa/workunits/rbd/krbd_namespaces.sh b/qa/workunits/rbd/krbd_namespaces.sh new file mode 100755 index 000000000..0273d8499 --- /dev/null +++ b/qa/workunits/rbd/krbd_namespaces.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash + +set -ex + +function get_block_name_prefix() { + rbd info --format=json $1 | python3 -c "import sys, json; print(json.load(sys.stdin)['block_name_prefix'])" +} + +function do_pwrite() { + local spec=$1 + local old_byte=$2 + local new_byte=$3 + + local dev + dev=$(sudo rbd map $spec) + cmp <(dd if=/dev/zero bs=1M count=10 | tr \\000 \\$old_byte) $dev + xfs_io -c "pwrite -b 1M -S $new_byte 0 10M" $dev + sudo rbd unmap $dev +} + +function do_cmp() { + local spec=$1 + local byte=$2 + + local dev + dev=$(sudo rbd map $spec) + cmp <(dd if=/dev/zero bs=1M count=10 | tr \\000 \\$byte) $dev + sudo rbd unmap $dev +} + +function gen_child_specs() { + local i=$1 + + local child_specs="foo/img$i-clone1 foo/img$i-clone2 foo/ns1/img$i-clone1 foo/ns1/img$i-clone2" + if [[ $i -ge 3 ]]; then + child_specs="$child_specs foo/ns2/img$i-clone1 foo/ns2/img$i-clone2" + fi + echo $child_specs +} + +ceph osd pool create foo 12 +rbd pool init foo +ceph osd pool create bar 12 +rbd pool init bar + +ceph osd set-require-min-compat-client nautilus +rbd namespace create foo/ns1 +rbd namespace create foo/ns2 + +SPECS=(foo/img1 foo/img2 foo/ns1/img3 foo/ns1/img4) + +COUNT=1 +for spec in "${SPECS[@]}"; do + if [[ $spec =~ img1|img3 ]]; then + rbd create --size 10 $spec + else + rbd create --size 10 --data-pool bar $spec + fi + do_pwrite $spec 000 $(printf %03d $COUNT) + rbd snap create $spec@snap + COUNT=$((COUNT + 1)) +done +for i in {1..4}; do + for child_spec in $(gen_child_specs $i); do + if [[ $child_spec =~ clone1 ]]; then + rbd clone ${SPECS[i - 1]}@snap $child_spec + else + rbd clone --data-pool bar ${SPECS[i - 1]}@snap $child_spec + fi + do_pwrite $child_spec $(printf %03d $i) $(printf %03d $COUNT) + COUNT=$((COUNT + 1)) + done +done + +[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img1)) -eq 3 ]] +[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img2)) -eq 3 ]] +[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img3)) -eq 3 ]] +[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img4)) -eq 3 ]] + +[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img1-clone1)) -eq 3 ]] +[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img1-clone2)) -eq 3 ]] +[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img1-clone1)) -eq 3 ]] +[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img1-clone2)) -eq 3 ]] + +[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img2-clone1)) -eq 3 ]] +[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img2-clone2)) -eq 3 ]] +[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img2-clone1)) -eq 3 ]] +[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img2-clone2)) -eq 3 ]] + +[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img3-clone1)) -eq 3 ]] +[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img3-clone2)) -eq 3 ]] +[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img3-clone1)) -eq 3 ]] +[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img3-clone2)) -eq 3 ]] +[[ $(rados -p foo -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img3-clone1)) -eq 3 ]] +[[ $(rados -p bar -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img3-clone2)) -eq 3 ]] + +[[ $(rados -p foo ls | grep -c $(get_block_name_prefix foo/img4-clone1)) -eq 3 ]] +[[ $(rados -p bar ls | grep -c $(get_block_name_prefix foo/img4-clone2)) -eq 3 ]] +[[ $(rados -p foo -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img4-clone1)) -eq 3 ]] +[[ $(rados -p bar -N ns1 ls | grep -c $(get_block_name_prefix foo/ns1/img4-clone2)) -eq 3 ]] +[[ $(rados -p foo -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img4-clone1)) -eq 3 ]] +[[ $(rados -p bar -N ns2 ls | grep -c $(get_block_name_prefix foo/ns2/img4-clone2)) -eq 3 ]] + +COUNT=1 +for spec in "${SPECS[@]}"; do + do_cmp $spec $(printf %03d $COUNT) + COUNT=$((COUNT + 1)) +done +for i in {1..4}; do + for child_spec in $(gen_child_specs $i); do + do_cmp $child_spec $(printf %03d $COUNT) + COUNT=$((COUNT + 1)) + done +done + +echo OK diff --git a/qa/workunits/rbd/krbd_rxbounce.sh b/qa/workunits/rbd/krbd_rxbounce.sh new file mode 100755 index 000000000..ad00e3f96 --- /dev/null +++ b/qa/workunits/rbd/krbd_rxbounce.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash + +set -ex + +rbd create --size 256 img + +IMAGE_SIZE=$(rbd info --format=json img | python3 -c 'import sys, json; print(json.load(sys.stdin)["size"])') +OBJECT_SIZE=$(rbd info --format=json img | python3 -c 'import sys, json; print(json.load(sys.stdin)["object_size"])') +NUM_OBJECTS=$((IMAGE_SIZE / OBJECT_SIZE)) +[[ $((IMAGE_SIZE % OBJECT_SIZE)) -eq 0 ]] +OP_SIZE=16384 + +DEV=$(sudo rbd map img) +{ + for ((i = 0; i < $NUM_OBJECTS; i++)); do + echo pwrite -b $OP_SIZE -S $i $((i * OBJECT_SIZE)) $OP_SIZE + done + echo fsync + echo quit +} | xfs_io $DEV +sudo rbd unmap $DEV + +g++ -xc++ -o racereads - -lpthread <<EOF +#include <assert.h> +#include <fcntl.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <thread> +#include <vector> + +const int object_size = $OBJECT_SIZE; +const int num_objects = $NUM_OBJECTS; +const int read_len = $OP_SIZE; +const int num_reads = 1024; + +int main() { + int fd = open("$DEV", O_DIRECT | O_RDONLY); + assert(fd >= 0); + + void *buf; + int r = posix_memalign(&buf, 512, read_len); + assert(r == 0); + + std::vector<std::thread> threads; + for (int i = 0; i < num_objects; i++) { + threads.emplace_back( + [fd, buf, read_off = static_cast<off_t>(i) * object_size]() { + for (int i = 0; i < num_reads; i++) { + auto len = pread(fd, buf, read_len, read_off); + assert(len == read_len); + } + }); + } + + for (auto &t : threads) { + t.join(); + } +} +EOF + +DEV=$(sudo rbd map -o ms_mode=legacy img) +sudo dmesg -C +./racereads +[[ $(dmesg | grep -c 'libceph: osd.* bad crc/signature') -gt 100 ]] +sudo rbd unmap $DEV + +DEV=$(sudo rbd map -o ms_mode=legacy,rxbounce img) +sudo dmesg -C +./racereads +[[ $(dmesg | grep -c 'libceph: osd.* bad crc/signature') -eq 0 ]] +sudo rbd unmap $DEV + +DEV=$(sudo rbd map -o ms_mode=crc img) +sudo dmesg -C +./racereads +[[ $(dmesg | grep -c 'libceph: osd.* integrity error') -gt 100 ]] +sudo rbd unmap $DEV + +DEV=$(sudo rbd map -o ms_mode=crc,rxbounce img) +sudo dmesg -C +./racereads +[[ $(dmesg | grep -c 'libceph: osd.* integrity error') -eq 0 ]] +sudo rbd unmap $DEV + +# rxbounce is a no-op for secure mode +DEV=$(sudo rbd map -o ms_mode=secure img) +sudo dmesg -C +./racereads +[[ $(dmesg | grep -c 'libceph: osd.* integrity error') -eq 0 ]] +sudo rbd unmap $DEV + +DEV=$(sudo rbd map -o ms_mode=secure,rxbounce img) +sudo dmesg -C +./racereads +[[ $(dmesg | grep -c 'libceph: osd.* integrity error') -eq 0 ]] +sudo rbd unmap $DEV + +rbd rm img + +echo OK diff --git a/qa/workunits/rbd/krbd_stable_writes.sh b/qa/workunits/rbd/krbd_stable_writes.sh new file mode 100755 index 000000000..d00e5fd04 --- /dev/null +++ b/qa/workunits/rbd/krbd_stable_writes.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash + +set -ex + +function assert_dm() { + local name=$1 + local val=$2 + + local devno + devno=$(sudo dmsetup info -c --noheadings -o Major,Minor $name) + grep -q $val /sys/dev/block/$devno/queue/stable_writes +} + +function dmsetup_reload() { + local name=$1 + + local table + table=$(</dev/stdin) + + sudo dmsetup suspend $name + echo "$table" | sudo dmsetup reload $name + sudo dmsetup resume $name +} + +IMAGE_NAME="stable-writes-test" + +rbd create --size 1 $IMAGE_NAME +DEV=$(sudo rbd map $IMAGE_NAME) + +fallocate -l 1M loopfile +LOOP_DEV=$(sudo losetup -f --show loopfile) + +[[ $(blockdev --getsize64 $DEV) -eq 1048576 ]] +grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes + +rbd resize --size 2 $IMAGE_NAME +[[ $(blockdev --getsize64 $DEV) -eq 2097152 ]] +grep -q 1 /sys/block/${DEV#/dev/}/queue/stable_writes + +cat <<EOF | sudo dmsetup create tbl +0 1024 linear $LOOP_DEV 0 +EOF +assert_dm tbl 0 +sudo dmsetup remove tbl + +cat <<EOF | sudo dmsetup create tbl +0 1024 linear $DEV 0 +EOF +assert_dm tbl 1 +sudo dmsetup remove tbl + +cat <<EOF | sudo dmsetup create tbl +0 1024 linear $LOOP_DEV 0 +1024 2048 error +EOF +assert_dm tbl 0 +sudo dmsetup remove tbl + +cat <<EOF | sudo dmsetup create tbl +0 1024 linear $DEV 0 +1024 2048 error +EOF +assert_dm tbl 1 +sudo dmsetup remove tbl + +cat <<EOF | sudo dmsetup create tbl +0 1024 linear $LOOP_DEV 0 +1024 2048 linear $DEV 0 +EOF +assert_dm tbl 1 +sudo dmsetup remove tbl + +cat <<EOF | sudo dmsetup create tbl +0 1024 linear $DEV 0 +1024 2048 linear $LOOP_DEV 0 +EOF +assert_dm tbl 1 +sudo dmsetup remove tbl + +cat <<EOF | sudo dmsetup create tbl +0 1024 linear $LOOP_DEV 0 +EOF +assert_dm tbl 0 +cat <<EOF | dmsetup_reload tbl +0 1024 linear $LOOP_DEV 0 +1024 2048 linear $DEV 0 +EOF +assert_dm tbl 1 +cat <<EOF | dmsetup_reload tbl +0 1024 linear $LOOP_DEV 0 +EOF +assert_dm tbl 0 +sudo dmsetup remove tbl + +cat <<EOF | sudo dmsetup create tbl +0 1024 linear $DEV 0 +EOF +assert_dm tbl 1 +cat <<EOF | dmsetup_reload tbl +0 1024 linear $DEV 0 +1024 2048 linear $LOOP_DEV 0 +EOF +assert_dm tbl 1 +cat <<EOF | dmsetup_reload tbl +0 1024 linear $DEV 0 +EOF +assert_dm tbl 1 +sudo dmsetup remove tbl + +cat <<EOF | sudo dmsetup create tbl +0 1024 linear $DEV 0 +EOF +assert_dm tbl 1 +cat <<EOF | dmsetup_reload tbl +0 1024 linear $DEV 0 +1024 2048 linear $LOOP_DEV 0 +EOF +assert_dm tbl 1 +cat <<EOF | dmsetup_reload tbl +0 1024 error +1024 2048 linear $LOOP_DEV 0 +EOF +assert_dm tbl 0 +cat <<EOF | dmsetup_reload tbl +0 1024 linear $DEV 0 +1024 2048 linear $LOOP_DEV 0 +EOF +assert_dm tbl 1 +cat <<EOF | dmsetup_reload tbl +0 1024 linear $DEV 0 +EOF +assert_dm tbl 1 +sudo dmsetup remove tbl + +sudo losetup -d $LOOP_DEV +rm loopfile + +sudo rbd unmap $DEV +rbd rm $IMAGE_NAME + +echo OK diff --git a/qa/workunits/rbd/krbd_udev_enumerate.sh b/qa/workunits/rbd/krbd_udev_enumerate.sh new file mode 100755 index 000000000..494f958f8 --- /dev/null +++ b/qa/workunits/rbd/krbd_udev_enumerate.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +# This is a test for https://tracker.ceph.com/issues/41036, but it also +# triggers https://tracker.ceph.com/issues/41404 in some environments. + +set -ex + +function assert_exit_codes() { + declare -a pids=($@) + + for pid in ${pids[@]}; do + wait $pid + done +} + +function run_map() { + declare -a pids + + for i in {1..300}; do + sudo rbd map img$i & + pids+=($!) + done + + assert_exit_codes ${pids[@]} + [[ $(rbd showmapped | wc -l) -eq 301 ]] +} + +function run_unmap_by_dev() { + declare -a pids + + run_map + for i in {0..299}; do + sudo rbd unmap /dev/rbd$i & + pids+=($!) + done + + assert_exit_codes ${pids[@]} + [[ $(rbd showmapped | wc -l) -eq 0 ]] +} + +function run_unmap_by_spec() { + declare -a pids + + run_map + for i in {1..300}; do + sudo rbd unmap img$i & + pids+=($!) + done + + assert_exit_codes ${pids[@]} + [[ $(rbd showmapped | wc -l) -eq 0 ]] +} + +# Can't test with exclusive-lock, don't bother enabling deep-flatten. +# See https://tracker.ceph.com/issues/42492. +for i in {1..300}; do + rbd create --size 1 --image-feature '' img$i +done + +for i in {1..30}; do + echo Iteration $i + run_unmap_by_dev + run_unmap_by_spec +done + +echo OK diff --git a/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh b/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh new file mode 100755 index 000000000..7c9c53a2f --- /dev/null +++ b/qa/workunits/rbd/krbd_udev_netlink_enobufs.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# This is a test for https://tracker.ceph.com/issues/41404, verifying that udev +# events are properly reaped while the image is being (un)mapped in the kernel. +# UDEV_BUF_SIZE is 1M (giving us a 2M socket receive buffer), but modprobe + +# modprobe -r generate ~28M worth of "block" events. + +set -ex + +rbd create --size 1 img + +ceph osd pause +sudo rbd map img & +PID=$! +sudo modprobe scsi_debug max_luns=16 add_host=16 num_parts=1 num_tgts=16 +sudo udevadm settle +sudo modprobe -r scsi_debug +[[ $(rbd showmapped | wc -l) -eq 0 ]] +ceph osd unpause +wait $PID +[[ $(rbd showmapped | wc -l) -eq 2 ]] +sudo rbd unmap img + +echo OK diff --git a/qa/workunits/rbd/krbd_udev_netns.sh b/qa/workunits/rbd/krbd_udev_netns.sh new file mode 100755 index 000000000..e746a682e --- /dev/null +++ b/qa/workunits/rbd/krbd_udev_netns.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +set -ex + +sudo ip netns add ns1 +sudo ip link add veth1-ext type veth peer name veth1-int +sudo ip link set veth1-int netns ns1 + +sudo ip netns exec ns1 ip link set dev lo up +sudo ip netns exec ns1 ip addr add 192.168.1.2/24 dev veth1-int +sudo ip netns exec ns1 ip link set veth1-int up +sudo ip netns exec ns1 ip route add default via 192.168.1.1 + +sudo ip addr add 192.168.1.1/24 dev veth1-ext +sudo ip link set veth1-ext up + +# Enable forwarding between the namespace and the default route +# interface and set up NAT. In case of multiple default routes, +# just pick the first one. +if [[ $(sysctl -n net.ipv4.ip_forward) -eq 0 ]]; then + sudo iptables -P FORWARD DROP + sudo sysctl -w net.ipv4.ip_forward=1 +fi +IFACE="$(ip route list 0.0.0.0/0 | head -n 1 | cut -d ' ' -f 5)" +sudo iptables -A FORWARD -i veth1-ext -o "$IFACE" -j ACCEPT +sudo iptables -A FORWARD -i "$IFACE" -o veth1-ext -j ACCEPT +sudo iptables -t nat -A POSTROUTING -s 192.168.1.2 -o "$IFACE" -j MASQUERADE + +rbd create --size 300 img + +DEV="$(sudo rbd map img)" +mkfs.ext4 "$DEV" +sudo mount "$DEV" /mnt +sudo umount /mnt +sudo rbd unmap "$DEV" + +sudo ip netns exec ns1 bash <<'EOF' + +set -ex + +DEV="/dev/rbd/rbd/img" +[[ ! -e "$DEV" ]] + +# In a network namespace, "rbd map" maps the device and hangs waiting +# for udev add uevents. udev runs as usual (in particular creating the +# symlink which is used here because the device node is never printed), +# but the uevents it sends out never come because they don't cross +# network namespace boundaries. +set +e +timeout 30s rbd map img +RET=$? +set -e +[[ $RET -eq 124 ]] +[[ -L "$DEV" ]] +mkfs.ext4 -F "$DEV" +mount "$DEV" /mnt +umount /mnt + +# In a network namespace, "rbd unmap" unmaps the device and hangs +# waiting for udev remove uevents. udev runs as usual (removing the +# symlink), but the uevents it sends out never come because they don't +# cross network namespace boundaries. +set +e +timeout 30s rbd unmap "$DEV" +RET=$? +set -e +[[ $RET -eq 124 ]] +[[ ! -e "$DEV" ]] + +# Skip waiting for udev uevents with "-o noudev". +DEV="$(rbd map -o noudev img)" +mkfs.ext4 -F "$DEV" +mount "$DEV" /mnt +umount /mnt +rbd unmap -o noudev "$DEV" + +EOF + +rbd rm img + +sudo iptables -t nat -D POSTROUTING -s 192.168.1.2 -o "$IFACE" -j MASQUERADE +sudo iptables -D FORWARD -i "$IFACE" -o veth1-ext -j ACCEPT +sudo iptables -D FORWARD -i veth1-ext -o "$IFACE" -j ACCEPT +sudo ip netns delete ns1 + +echo OK diff --git a/qa/workunits/rbd/krbd_udev_symlinks.sh b/qa/workunits/rbd/krbd_udev_symlinks.sh new file mode 100755 index 000000000..271476527 --- /dev/null +++ b/qa/workunits/rbd/krbd_udev_symlinks.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash + +set -ex + +SPECS=( +rbd/img1 +rbd/img2 +rbd/img2@snap1 +rbd/img3 +rbd/img3@snap1 +rbd/img3@snap2 +rbd/ns1/img1 +rbd/ns1/img2 +rbd/ns1/img2@snap1 +rbd/ns1/img3 +rbd/ns1/img3@snap1 +rbd/ns1/img3@snap2 +rbd/ns2/img1 +rbd/ns2/img2 +rbd/ns2/img2@snap1 +rbd/ns2/img3 +rbd/ns2/img3@snap1 +rbd/ns2/img3@snap2 +custom/img1 +custom/img1@snap1 +custom/img2 +custom/img2@snap1 +custom/img2@snap2 +custom/img3 +custom/ns1/img1 +custom/ns1/img1@snap1 +custom/ns1/img2 +custom/ns1/img2@snap1 +custom/ns1/img2@snap2 +custom/ns1/img3 +custom/ns2/img1 +custom/ns2/img1@snap1 +custom/ns2/img2 +custom/ns2/img2@snap1 +custom/ns2/img2@snap2 +custom/ns2/img3 +) + +ceph osd pool create custom 8 +rbd pool init custom + +ceph osd set-require-min-compat-client nautilus +rbd namespace create rbd/ns1 +rbd namespace create rbd/ns2 +rbd namespace create custom/ns1 +rbd namespace create custom/ns2 + +# create in order, images before snapshots +for spec in "${SPECS[@]}"; do + if [[ "$spec" =~ snap ]]; then + rbd snap create "$spec" + else + rbd create --size 10 "$spec" + DEV="$(sudo rbd map "$spec")" + sudo sfdisk "$DEV" <<EOF +unit: sectors +${DEV}p1 : start= 2048, size= 2, type=83 +${DEV}p2 : start= 4096, size= 2, type=83 +EOF + sudo rbd unmap "$DEV" + fi +done + +[[ ! -e /dev/rbd ]] + +# map in random order +COUNT=${#SPECS[@]} +read -r -a INDEXES < <(python3 <<EOF +import random +l = list(range($COUNT)) +random.shuffle(l) +print(*l) +EOF +) + +DEVS=() +for idx in "${INDEXES[@]}"; do + DEVS+=("$(sudo rbd map "${SPECS[idx]}")") +done + +[[ $(rbd showmapped | wc -l) -eq $((COUNT + 1)) ]] + +for ((i = 0; i < COUNT; i++)); do + [[ "$(readlink -e "/dev/rbd/${SPECS[INDEXES[i]]}")" == "${DEVS[i]}" ]] + [[ "$(readlink -e "/dev/rbd/${SPECS[INDEXES[i]]}-part1")" == "${DEVS[i]}p1" ]] + [[ "$(readlink -e "/dev/rbd/${SPECS[INDEXES[i]]}-part2")" == "${DEVS[i]}p2" ]] +done + +for idx in "${INDEXES[@]}"; do + sudo rbd unmap "/dev/rbd/${SPECS[idx]}" +done + +[[ ! -e /dev/rbd ]] + +# remove in reverse order, snapshots before images +for ((i = COUNT - 1; i >= 0; i--)); do + if [[ "${SPECS[i]}" =~ snap ]]; then + rbd snap rm "${SPECS[i]}" + else + rbd rm "${SPECS[i]}" + fi +done + +rbd namespace rm custom/ns2 +rbd namespace rm custom/ns1 +rbd namespace rm rbd/ns2 +rbd namespace rm rbd/ns1 + +ceph osd pool delete custom custom --yes-i-really-really-mean-it + +echo OK diff --git a/qa/workunits/rbd/krbd_wac.sh b/qa/workunits/rbd/krbd_wac.sh new file mode 100755 index 000000000..134460409 --- /dev/null +++ b/qa/workunits/rbd/krbd_wac.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +set -ex + +wget http://download.ceph.com/qa/wac.c +gcc -o wac wac.c + +rbd create --size 300 img +DEV=$(sudo rbd map img) + +sudo mkfs.ext4 $DEV +sudo mount $DEV /mnt +set +e +sudo timeout 5m ./wac -l 65536 -n 64 -r /mnt/wac-test +RET=$? +set -e +[[ $RET -eq 124 ]] +sudo killall -w wac || true # wac forks +sudo umount /mnt + +sudo wipefs -a $DEV +sudo vgcreate vg_img $DEV +sudo lvcreate -L 256M -n lv_img vg_img +udevadm settle +sudo mkfs.ext4 /dev/mapper/vg_img-lv_img +sudo mount /dev/mapper/vg_img-lv_img /mnt +set +e +sudo timeout 5m ./wac -l 65536 -n 64 -r /mnt/wac-test +RET=$? +set -e +[[ $RET -eq 124 ]] +sudo killall -w wac || true # wac forks +sudo umount /mnt +sudo vgremove -f vg_img +sudo pvremove $DEV + +sudo rbd unmap $DEV +rbd rm img + +echo OK diff --git a/qa/workunits/rbd/krbd_watch_errors.sh b/qa/workunits/rbd/krbd_watch_errors.sh new file mode 100755 index 000000000..f650d2a74 --- /dev/null +++ b/qa/workunits/rbd/krbd_watch_errors.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +set -ex +set -o pipefail + +function refresh_loop() { + local dev_id="$1" + + set +x + + local i + for ((i = 1; ; i++)); do + echo 1 | sudo tee "${SYSFS_DIR}/${dev_id}/refresh" > /dev/null + if ((i % 100 == 0)); then + echo "Refreshed ${i} times" + fi + done +} + +readonly SYSFS_DIR="/sys/bus/rbd/devices" +readonly IMAGE_NAME="watch-errors-test" + +rbd create -s 1G --image-feature exclusive-lock "${IMAGE_NAME}" + +# induce a watch error every 30 seconds +dev="$(sudo rbd device map -o osdkeepalive=60 "${IMAGE_NAME}")" +dev_id="${dev#/dev/rbd}" + +# constantly refresh, not just on watch errors +refresh_loop "${dev_id}" & +refresh_pid=$! + +sudo dmesg -C + +# test that none of the above triggers a deadlock with a workload +fio --name test --filename="${dev}" --ioengine=libaio --direct=1 \ + --rw=randwrite --norandommap --randrepeat=0 --bs=512 --iodepth=128 \ + --time_based --runtime=1h --eta=never + +num_errors="$(dmesg | grep -c "rbd${dev_id}: encountered watch error")" +echo "Recorded ${num_errors} watch errors" + +kill "${refresh_pid}" +wait + +sudo rbd device unmap "${dev}" + +if ((num_errors < 60)); then + echo "Too few watch errors" + exit 1 +fi + +echo OK diff --git a/qa/workunits/rbd/luks-encryption.sh b/qa/workunits/rbd/luks-encryption.sh new file mode 100755 index 000000000..5d3cc68cd --- /dev/null +++ b/qa/workunits/rbd/luks-encryption.sh @@ -0,0 +1,217 @@ +#!/usr/bin/env bash +set -ex + +CEPH_ID=${CEPH_ID:-admin} +TMP_FILES="/tmp/passphrase /tmp/passphrase2 /tmp/testdata1 /tmp/testdata2 /tmp/cmpdata" + +_sudo() +{ + local cmd + + if [ `id -u` -eq 0 ] + then + "$@" + return $? + fi + + # Look for the command in the user path. If it fails run it as is, + # supposing it is in sudo path. + cmd=`which $1 2>/dev/null` || cmd=$1 + shift + sudo -nE "${cmd}" "$@" +} + +function drop_caches { + sudo sync + echo 3 | sudo tee /proc/sys/vm/drop_caches +} + +function expect_false() { + if "$@"; then return 1; else return 0; fi +} + +function test_encryption_format() { + local format=$1 + clean_up_cryptsetup + + # format + rbd encryption format testimg $format /tmp/passphrase + drop_caches + + # open encryption with cryptsetup + sudo cryptsetup open $RAW_DEV --type luks cryptsetupdev -d /tmp/passphrase + sudo chmod 666 /dev/mapper/cryptsetupdev + + # open encryption with librbd + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg -t nbd -o encryption-passphrase-file=/tmp/passphrase) + sudo chmod 666 $LIBRBD_DEV + + # write via librbd && compare + dd if=/tmp/testdata1 of=$LIBRBD_DEV oflag=direct bs=1M + dd if=/dev/mapper/cryptsetupdev of=/tmp/cmpdata iflag=direct bs=4M count=4 + cmp -n 16MB /tmp/cmpdata /tmp/testdata1 + + # write via cryptsetup && compare + dd if=/tmp/testdata2 of=/dev/mapper/cryptsetupdev oflag=direct bs=1M + dd if=$LIBRBD_DEV of=/tmp/cmpdata iflag=direct bs=4M count=4 + cmp -n 16MB /tmp/cmpdata /tmp/testdata2 + + # FIXME: encryption-aware flatten/resize misbehave if proxied to + # RAW_DEV mapping (i.e. if RAW_DEV mapping ows the lock) + # (acquire and) release the lock as a side effect + rbd bench --io-type read --io-size 1 --io-threads 1 --io-total 1 testimg + + # check that encryption-aware resize compensates LUKS header overhead + (( $(sudo blockdev --getsize64 $LIBRBD_DEV) < (32 << 20) )) + expect_false rbd resize --size 32M testimg + rbd resize --size 32M --encryption-passphrase-file /tmp/passphrase testimg + (( $(sudo blockdev --getsize64 $LIBRBD_DEV) == (32 << 20) )) + + _sudo rbd device unmap -t nbd $LIBRBD_DEV +} + +function test_clone_encryption() { + clean_up_cryptsetup + + # write 1MB plaintext + dd if=/tmp/testdata1 of=$RAW_DEV oflag=direct bs=1M count=1 + + # clone (luks1) + rbd snap create testimg@snap + rbd snap protect testimg@snap + rbd clone testimg@snap testimg1 + rbd encryption format testimg1 luks1 /tmp/passphrase + + # open encryption with librbd, write one more MB, close + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg1 -t nbd -o encryption-format=luks1,encryption-passphrase-file=/tmp/passphrase) + sudo chmod 666 $LIBRBD_DEV + dd if=$LIBRBD_DEV of=/tmp/cmpdata iflag=direct bs=1M count=1 + cmp -n 1MB /tmp/cmpdata /tmp/testdata1 + dd if=/tmp/testdata1 of=$LIBRBD_DEV seek=1 skip=1 oflag=direct bs=1M count=1 + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + # second clone (luks2) + rbd snap create testimg1@snap + rbd snap protect testimg1@snap + rbd clone testimg1@snap testimg2 + rbd encryption format testimg2 luks2 /tmp/passphrase2 + + # open encryption with librbd, write one more MB, close + LIBRBD_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd -o encryption-format=luks2,encryption-passphrase-file=/tmp/passphrase2,encryption-format=luks1,encryption-passphrase-file=/tmp/passphrase) + sudo chmod 666 $LIBRBD_DEV + dd if=$LIBRBD_DEV of=/tmp/cmpdata iflag=direct bs=1M count=2 + cmp -n 2MB /tmp/cmpdata /tmp/testdata1 + dd if=/tmp/testdata1 of=$LIBRBD_DEV seek=2 skip=2 oflag=direct bs=1M count=1 + _sudo rbd device unmap -t nbd $LIBRBD_DEV + + # flatten + expect_false rbd flatten testimg2 --encryption-format luks1 --encryption-format luks2 --encryption-passphrase-file /tmp/passphrase2 --encryption-passphrase-file /tmp/passphrase + rbd flatten testimg2 --encryption-format luks2 --encryption-format luks1 --encryption-passphrase-file /tmp/passphrase2 --encryption-passphrase-file /tmp/passphrase + + # verify with cryptsetup + RAW_FLAT_DEV=$(_sudo rbd -p rbd map testimg2 -t nbd) + sudo cryptsetup open $RAW_FLAT_DEV --type luks cryptsetupdev -d /tmp/passphrase2 + sudo chmod 666 /dev/mapper/cryptsetupdev + dd if=/dev/mapper/cryptsetupdev of=/tmp/cmpdata iflag=direct bs=1M count=3 + cmp -n 3MB /tmp/cmpdata /tmp/testdata1 + _sudo rbd device unmap -t nbd $RAW_FLAT_DEV +} + +function test_clone_and_load_with_a_single_passphrase { + local expectedfail=$1 + + # clone and format + rbd snap create testimg@snap + rbd snap protect testimg@snap + rbd clone testimg@snap testimg1 + rbd encryption format testimg1 luks2 /tmp/passphrase2 + + if [ "$expectedfail" = "true" ] + then + expect_false rbd flatten testimg1 --encryption-passphrase-file /tmp/passphrase2 + rbd flatten testimg1 --encryption-passphrase-file /tmp/passphrase2 --encryption-passphrase-file /tmp/passphrase + else + rbd flatten testimg1 --encryption-passphrase-file /tmp/passphrase2 + fi + + rbd remove testimg1 + rbd snap unprotect testimg@snap + rbd snap remove testimg@snap +} + +function test_plaintext_detection { + # 16k LUKS header + sudo cryptsetup -q luksFormat --type luks2 --luks2-metadata-size 16k $RAW_DEV /tmp/passphrase + test_clone_and_load_with_a_single_passphrase true + + # 4m LUKS header + sudo cryptsetup -q luksFormat --type luks2 --luks2-metadata-size 4m $RAW_DEV /tmp/passphrase + test_clone_and_load_with_a_single_passphrase true + + # no luks header + dd if=/dev/zero of=$RAW_DEV oflag=direct bs=4M count=8 + test_clone_and_load_with_a_single_passphrase false +} + +function get_nbd_device_paths { + rbd device list -t nbd | tail -n +2 | egrep "\s+rbd\s+testimg" | awk '{print $5;}' +} + +function clean_up_cryptsetup() { + ls /dev/mapper/cryptsetupdev && sudo cryptsetup close cryptsetupdev || true +} + +function clean_up { + sudo rm -f $TMP_FILES + clean_up_cryptsetup + for device in $(get_nbd_device_paths); do + _sudo rbd device unmap -t nbd $device + done + + rbd remove testimg2 || true + rbd snap unprotect testimg1@snap || true + rbd snap remove testimg1@snap || true + rbd remove testimg1 || true + rbd snap unprotect testimg@snap || true + rbd snap remove testimg@snap || true + rbd remove testimg || true +} + +if [[ $(uname) != "Linux" ]]; then + echo "LUKS encryption tests only supported on Linux" + exit 0 +fi + + +if [[ $(($(ceph-conf --name client.${CEPH_ID} --show-config-value rbd_default_features) & 64)) != 0 ]]; then + echo "LUKS encryption tests not supported alongside image journaling feature" + exit 0 +fi + +clean_up + +trap clean_up INT TERM EXIT + +# generate test data +dd if=/dev/urandom of=/tmp/testdata1 bs=4M count=4 +dd if=/dev/urandom of=/tmp/testdata2 bs=4M count=4 + +# create passphrase files +printf "pass\0word\n" > /tmp/passphrase +printf "\t password2 " > /tmp/passphrase2 + +# create an image +rbd create testimg --size=32M + +# map raw data to nbd device +RAW_DEV=$(_sudo rbd -p rbd map testimg -t nbd) +sudo chmod 666 $RAW_DEV + +test_plaintext_detection + +test_encryption_format luks1 +test_encryption_format luks2 + +test_clone_encryption + +echo OK diff --git a/qa/workunits/rbd/map-snapshot-io.sh b/qa/workunits/rbd/map-snapshot-io.sh new file mode 100755 index 000000000..a69d84829 --- /dev/null +++ b/qa/workunits/rbd/map-snapshot-io.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +# http://tracker.ceph.com/issues/3964 + +set -ex + +rbd create image -s 100 +DEV=$(sudo rbd map image) +dd if=/dev/zero of=$DEV oflag=direct count=10 +rbd snap create image@s1 +dd if=/dev/zero of=$DEV oflag=direct count=10 # used to fail +rbd snap rm image@s1 +dd if=/dev/zero of=$DEV oflag=direct count=10 +sudo rbd unmap $DEV +rbd rm image + +echo OK diff --git a/qa/workunits/rbd/map-unmap.sh b/qa/workunits/rbd/map-unmap.sh new file mode 100755 index 000000000..99863849e --- /dev/null +++ b/qa/workunits/rbd/map-unmap.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +set -ex + +RUN_TIME=300 # approximate duration of run (seconds) + +[ $# -eq 1 ] && RUN_TIME="$1" + +IMAGE_NAME="image-$$" +IMAGE_SIZE="1024" # MB + +function get_time() { + date '+%s' +} + +function times_up() { + local end_time="$1" + + test $(get_time) -ge "${end_time}" +} + +function map_unmap() { + [ $# -eq 1 ] || exit 99 + local image_name="$1" + + local dev + dev="$(sudo rbd map "${image_name}")" + sudo rbd unmap "${dev}" +} + +#### Start + +rbd create "${IMAGE_NAME}" --size="${IMAGE_SIZE}" + +COUNT=0 +START_TIME=$(get_time) +END_TIME=$(expr $(get_time) + ${RUN_TIME}) +while ! times_up "${END_TIME}"; do + map_unmap "${IMAGE_NAME}" + COUNT=$(expr $COUNT + 1) +done +ELAPSED=$(expr "$(get_time)" - "${START_TIME}") + +rbd rm "${IMAGE_NAME}" + +echo "${COUNT} iterations completed in ${ELAPSED} seconds" diff --git a/qa/workunits/rbd/merge_diff.sh b/qa/workunits/rbd/merge_diff.sh new file mode 100755 index 000000000..eb8597304 --- /dev/null +++ b/qa/workunits/rbd/merge_diff.sh @@ -0,0 +1,477 @@ +#!/usr/bin/env bash +set -ex + +export RBD_FORCE_ALLOW_V1=1 + +pool=rbd +gen=$pool/gen +out=$pool/out +testno=1 + +mkdir -p merge_diff_test +pushd merge_diff_test + +function expect_false() +{ + if "$@"; then return 1; else return 0; fi +} + +function clear_all() +{ + fusermount -u mnt || true + + rbd snap purge --no-progress $gen || true + rbd rm --no-progress $gen || true + rbd snap purge --no-progress $out || true + rbd rm --no-progress $out || true + + rm -rf diffs || true +} + +function rebuild() +{ + clear_all + echo Starting test $testno + ((testno++)) + if [[ "$2" -lt "$1" ]] && [[ "$3" -gt "1" ]]; then + rbd create $gen --size 100 --object-size $1 --stripe-unit $2 --stripe-count $3 --image-format $4 + else + rbd create $gen --size 100 --object-size $1 --image-format $4 + fi + rbd create $out --size 1 --object-size 524288 + mkdir -p mnt diffs + # lttng has atexit handlers that need to be fork/clone aware + LD_PRELOAD=liblttng-ust-fork.so.0 rbd-fuse -p $pool mnt +} + +function write() +{ + dd if=/dev/urandom of=mnt/gen bs=1M conv=notrunc seek=$1 count=$2 +} + +function snap() +{ + rbd snap create $gen@$1 +} + +function resize() +{ + rbd resize --no-progress $gen --size $1 --allow-shrink +} + +function export_diff() +{ + if [ $2 == "head" ]; then + target="$gen" + else + target="$gen@$2" + fi + if [ $1 == "null" ]; then + rbd export-diff --no-progress $target diffs/$1.$2 + else + rbd export-diff --no-progress $target --from-snap $1 diffs/$1.$2 + fi +} + +function merge_diff() +{ + rbd merge-diff diffs/$1.$2 diffs/$2.$3 diffs/$1.$3 +} + +function check() +{ + rbd import-diff --no-progress diffs/$1.$2 $out || return -1 + if [ "$2" == "head" ]; then + sum1=`rbd export $gen - | md5sum` + else + sum1=`rbd export $gen@$2 - | md5sum` + fi + sum2=`rbd export $out - | md5sum` + if [ "$sum1" != "$sum2" ]; then + exit -1 + fi + if [ "$2" != "head" ]; then + rbd snap ls $out | awk '{print $2}' | grep "^$2\$" || return -1 + fi +} + +#test f/t header +rebuild 4194304 4194304 1 2 +write 0 1 +snap a +write 1 1 +export_diff null a +export_diff a head +merge_diff null a head +check null head + +rebuild 4194304 4194304 1 2 +write 0 1 +snap a +write 1 1 +snap b +write 2 1 +export_diff null a +export_diff a b +export_diff b head +merge_diff null a b +check null b + +rebuild 4194304 4194304 1 2 +write 0 1 +snap a +write 1 1 +snap b +write 2 1 +export_diff null a +export_diff a b +export_diff b head +merge_diff a b head +check null a +check a head + +rebuild 4194304 4194304 1 2 +write 0 1 +snap a +write 1 1 +snap b +write 2 1 +export_diff null a +export_diff a b +export_diff b head +rbd merge-diff diffs/null.a diffs/a.b - | rbd merge-diff - diffs/b.head - > diffs/null.head +check null head + +#data test +rebuild 4194304 4194304 1 2 +write 4 2 +snap s101 +write 0 3 +write 8 2 +snap s102 +export_diff null s101 +export_diff s101 s102 +merge_diff null s101 s102 +check null s102 + +rebuild 4194304 4194304 1 2 +write 0 3 +write 2 5 +write 8 2 +snap s201 +write 0 2 +write 6 3 +snap s202 +export_diff null s201 +export_diff s201 s202 +merge_diff null s201 s202 +check null s202 + +rebuild 4194304 4194304 1 2 +write 0 4 +write 12 6 +snap s301 +write 0 6 +write 10 5 +write 16 4 +snap s302 +export_diff null s301 +export_diff s301 s302 +merge_diff null s301 s302 +check null s302 + +rebuild 4194304 4194304 1 2 +write 0 12 +write 14 2 +write 18 2 +snap s401 +write 1 2 +write 5 6 +write 13 3 +write 18 2 +snap s402 +export_diff null s401 +export_diff s401 s402 +merge_diff null s401 s402 +check null s402 + +rebuild 4194304 4194304 1 2 +write 2 4 +write 10 12 +write 27 6 +write 36 4 +snap s501 +write 0 24 +write 28 4 +write 36 4 +snap s502 +export_diff null s501 +export_diff s501 s502 +merge_diff null s501 s502 +check null s502 + +rebuild 4194304 4194304 1 2 +write 0 8 +resize 5 +snap r1 +resize 20 +write 12 8 +snap r2 +resize 8 +write 4 4 +snap r3 +export_diff null r1 +export_diff r1 r2 +export_diff r2 r3 +merge_diff null r1 r2 +merge_diff null r2 r3 +check null r3 + +rebuild 4194304 4194304 1 2 +write 0 8 +resize 5 +snap r1 +resize 20 +write 12 8 +snap r2 +resize 8 +write 4 4 +snap r3 +resize 10 +snap r4 +export_diff null r1 +export_diff r1 r2 +export_diff r2 r3 +export_diff r3 r4 +merge_diff null r1 r2 +merge_diff null r2 r3 +merge_diff null r3 r4 +check null r4 + +# merge diff doesn't yet support fancy striping +# rebuild 4194304 65536 8 2 +# write 0 32 +# snap r1 +# write 16 32 +# snap r2 +# export_diff null r1 +# export_diff r1 r2 +# expect_false merge_diff null r1 r2 + +rebuild 4194304 4194304 1 2 +write 0 1 +write 2 1 +write 4 1 +write 6 1 +snap s1 +write 1 1 +write 3 1 +write 5 1 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 1 1 +write 3 1 +write 5 1 +snap s1 +write 0 1 +write 2 1 +write 4 1 +write 6 1 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 0 3 +write 6 3 +write 12 3 +snap s1 +write 1 1 +write 7 1 +write 13 1 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 0 3 +write 6 3 +write 12 3 +snap s1 +write 0 1 +write 6 1 +write 12 1 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 0 3 +write 6 3 +write 12 3 +snap s1 +write 2 1 +write 8 1 +write 14 1 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 1 1 +write 7 1 +write 13 1 +snap s1 +write 0 3 +write 6 3 +write 12 3 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 0 1 +write 6 1 +write 12 1 +snap s1 +write 0 3 +write 6 3 +write 12 3 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 2 1 +write 8 1 +write 14 1 +snap s1 +write 0 3 +write 6 3 +write 12 3 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 0 3 +write 6 3 +write 12 3 +snap s1 +write 0 3 +write 6 3 +write 12 3 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 2 4 +write 8 4 +write 14 4 +snap s1 +write 0 3 +write 6 3 +write 12 3 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 0 4 +write 6 4 +write 12 4 +snap s1 +write 0 3 +write 6 3 +write 12 3 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 0 6 +write 6 6 +write 12 6 +snap s1 +write 0 3 +write 6 3 +write 12 3 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 3 6 +write 9 6 +write 15 6 +snap s1 +write 0 3 +write 6 3 +write 12 3 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 0 8 +snap s1 +resize 2 +resize 100 +snap s2 +export_diff null s1 +export_diff s1 s2 +merge_diff null s1 s2 +check null s2 + +rebuild 4194304 4194304 1 2 +write 0 8 +snap s1 +resize 2 +resize 100 +snap s2 +write 20 2 +snap s3 +export_diff null s1 +export_diff s1 s2 +export_diff s2 s3 +merge_diff s1 s2 s3 +check null s1 +check s1 s3 + +#addme + +clear_all +popd +rm -rf merge_diff_test + +echo OK diff --git a/qa/workunits/rbd/notify_master.sh b/qa/workunits/rbd/notify_master.sh new file mode 100755 index 000000000..99ccd74db --- /dev/null +++ b/qa/workunits/rbd/notify_master.sh @@ -0,0 +1,5 @@ +#!/bin/sh -ex + +relpath=$(dirname $0)/../../../src/test/librbd +python3 $relpath/test_notify.py master +exit 0 diff --git a/qa/workunits/rbd/notify_slave.sh b/qa/workunits/rbd/notify_slave.sh new file mode 100755 index 000000000..7f49a0c7d --- /dev/null +++ b/qa/workunits/rbd/notify_slave.sh @@ -0,0 +1,5 @@ +#!/bin/sh -ex + +relpath=$(dirname $0)/../../../src/test/librbd +python3 $relpath/test_notify.py slave +exit 0 diff --git a/qa/workunits/rbd/permissions.sh b/qa/workunits/rbd/permissions.sh new file mode 100755 index 000000000..f8a9aaa71 --- /dev/null +++ b/qa/workunits/rbd/permissions.sh @@ -0,0 +1,269 @@ +#!/usr/bin/env bash +set -ex + +IMAGE_FEATURES="layering,exclusive-lock,object-map,fast-diff" + +clone_v2_enabled() { + image_spec=$1 + rbd info $image_spec | grep "clone-parent" +} + +create_pools() { + ceph osd pool create images 32 + rbd pool init images + ceph osd pool create volumes 32 + rbd pool init volumes +} + +delete_pools() { + (ceph osd pool delete images images --yes-i-really-really-mean-it || true) >/dev/null 2>&1 + (ceph osd pool delete volumes volumes --yes-i-really-really-mean-it || true) >/dev/null 2>&1 + +} + +recreate_pools() { + delete_pools + create_pools +} + +delete_users() { + (ceph auth del client.volumes || true) >/dev/null 2>&1 + (ceph auth del client.images || true) >/dev/null 2>&1 + + (ceph auth del client.snap_none || true) >/dev/null 2>&1 + (ceph auth del client.snap_all || true) >/dev/null 2>&1 + (ceph auth del client.snap_pool || true) >/dev/null 2>&1 + (ceph auth del client.snap_profile_all || true) >/dev/null 2>&1 + (ceph auth del client.snap_profile_pool || true) >/dev/null 2>&1 + + (ceph auth del client.mon_write || true) >/dev/null 2>&1 +} + +create_users() { + ceph auth get-or-create client.volumes \ + mon 'profile rbd' \ + osd 'profile rbd pool=volumes, profile rbd-read-only pool=images' \ + mgr 'profile rbd pool=volumes, profile rbd-read-only pool=images' >> $KEYRING + ceph auth get-or-create client.images mon 'profile rbd' osd 'profile rbd pool=images' >> $KEYRING + + ceph auth get-or-create client.snap_none mon 'allow r' >> $KEYRING + ceph auth get-or-create client.snap_all mon 'allow r' osd 'allow w' >> $KEYRING + ceph auth get-or-create client.snap_pool mon 'allow r' osd 'allow w pool=images' >> $KEYRING + ceph auth get-or-create client.snap_profile_all mon 'allow r' osd 'profile rbd' >> $KEYRING + ceph auth get-or-create client.snap_profile_pool mon 'allow r' osd 'profile rbd pool=images' >> $KEYRING + + ceph auth get-or-create client.mon_write mon 'allow *' >> $KEYRING +} + +expect() { + + set +e + + local expected_ret=$1 + local ret + + shift + cmd=$@ + + eval $cmd + ret=$? + + set -e + + if [[ $ret -ne $expected_ret ]]; then + echo "ERROR: running \'$cmd\': expected $expected_ret got $ret" + return 1 + fi + + return 0 +} + +test_images_access() { + rbd -k $KEYRING --id images create --image-format 2 --image-feature $IMAGE_FEATURES -s 1 images/foo + rbd -k $KEYRING --id images snap create images/foo@snap + rbd -k $KEYRING --id images snap protect images/foo@snap + rbd -k $KEYRING --id images snap unprotect images/foo@snap + rbd -k $KEYRING --id images snap protect images/foo@snap + rbd -k $KEYRING --id images export images/foo@snap - >/dev/null + expect 16 rbd -k $KEYRING --id images snap rm images/foo@snap + + rbd -k $KEYRING --id volumes clone --image-feature $IMAGE_FEATURES images/foo@snap volumes/child + + if ! clone_v2_enabled images/foo; then + expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap + fi + + expect 1 rbd -k $KEYRING --id volumes snap unprotect images/foo@snap + expect 1 rbd -k $KEYRING --id images flatten volumes/child + rbd -k $KEYRING --id volumes flatten volumes/child + expect 1 rbd -k $KEYRING --id volumes snap unprotect images/foo@snap + rbd -k $KEYRING --id images snap unprotect images/foo@snap + + expect 39 rbd -k $KEYRING --id images rm images/foo + rbd -k $KEYRING --id images snap rm images/foo@snap + rbd -k $KEYRING --id images rm images/foo + rbd -k $KEYRING --id volumes rm volumes/child +} + +test_volumes_access() { + rbd -k $KEYRING --id images create --image-format 2 --image-feature $IMAGE_FEATURES -s 1 images/foo + rbd -k $KEYRING --id images snap create images/foo@snap + rbd -k $KEYRING --id images snap protect images/foo@snap + + # commands that work with read-only access + rbd -k $KEYRING --id volumes info images/foo@snap + rbd -k $KEYRING --id volumes snap ls images/foo + rbd -k $KEYRING --id volumes export images/foo - >/dev/null + rbd -k $KEYRING --id volumes cp images/foo volumes/foo_copy + rbd -k $KEYRING --id volumes rm volumes/foo_copy + rbd -k $KEYRING --id volumes children images/foo@snap + rbd -k $KEYRING --id volumes lock list images/foo + + # commands that fail with read-only access + expect 1 rbd -k $KEYRING --id volumes resize -s 2 images/foo --allow-shrink + expect 1 rbd -k $KEYRING --id volumes snap create images/foo@2 + expect 1 rbd -k $KEYRING --id volumes snap rollback images/foo@snap + expect 1 rbd -k $KEYRING --id volumes snap remove images/foo@snap + expect 1 rbd -k $KEYRING --id volumes snap purge images/foo + expect 1 rbd -k $KEYRING --id volumes snap unprotect images/foo@snap + expect 1 rbd -k $KEYRING --id volumes flatten images/foo + expect 1 rbd -k $KEYRING --id volumes lock add images/foo test + expect 1 rbd -k $KEYRING --id volumes lock remove images/foo test locker + expect 1 rbd -k $KEYRING --id volumes ls rbd + + # create clone and snapshot + rbd -k $KEYRING --id volumes clone --image-feature $IMAGE_FEATURES images/foo@snap volumes/child + rbd -k $KEYRING --id volumes snap create volumes/child@snap1 + rbd -k $KEYRING --id volumes snap protect volumes/child@snap1 + rbd -k $KEYRING --id volumes snap create volumes/child@snap2 + + # make sure original snapshot stays protected + if clone_v2_enabled images/foo; then + rbd -k $KEYRING --id volumes flatten volumes/child + rbd -k $KEYRING --id volumes snap rm volumes/child@snap2 + rbd -k $KEYRING --id volumes snap unprotect volumes/child@snap1 + else + expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap + rbd -k $KEYRING --id volumes flatten volumes/child + expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap + rbd -k $KEYRING --id volumes snap rm volumes/child@snap2 + expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap + expect 2 rbd -k $KEYRING --id volumes snap rm volumes/child@snap2 + rbd -k $KEYRING --id volumes snap unprotect volumes/child@snap1 + expect 16 rbd -k $KEYRING --id images snap unprotect images/foo@snap + fi + + # clean up + rbd -k $KEYRING --id volumes snap rm volumes/child@snap1 + rbd -k $KEYRING --id images snap unprotect images/foo@snap + rbd -k $KEYRING --id images snap rm images/foo@snap + rbd -k $KEYRING --id images rm images/foo + rbd -k $KEYRING --id volumes rm volumes/child +} + +create_self_managed_snapshot() { + ID=$1 + POOL=$2 + + cat << EOF | CEPH_ARGS="-k $KEYRING" python3 +import rados + +with rados.Rados(conffile="", rados_id="${ID}") as cluster: + ioctx = cluster.open_ioctx("${POOL}") + + snap_id = ioctx.create_self_managed_snap() + print ("Created snap id {}".format(snap_id)) +EOF +} + +remove_self_managed_snapshot() { + ID=$1 + POOL=$2 + + cat << EOF | CEPH_ARGS="-k $KEYRING" python3 +import rados + +with rados.Rados(conffile="", rados_id="mon_write") as cluster1, \ + rados.Rados(conffile="", rados_id="${ID}") as cluster2: + ioctx1 = cluster1.open_ioctx("${POOL}") + + snap_id = ioctx1.create_self_managed_snap() + print ("Created snap id {}".format(snap_id)) + + ioctx2 = cluster2.open_ioctx("${POOL}") + + ioctx2.remove_self_managed_snap(snap_id) + print ("Removed snap id {}".format(snap_id)) +EOF +} + +test_remove_self_managed_snapshots() { + # Ensure users cannot create self-managed snapshots w/o permissions + expect 1 create_self_managed_snapshot snap_none images + expect 1 create_self_managed_snapshot snap_none volumes + + create_self_managed_snapshot snap_all images + create_self_managed_snapshot snap_all volumes + + create_self_managed_snapshot snap_pool images + expect 1 create_self_managed_snapshot snap_pool volumes + + create_self_managed_snapshot snap_profile_all images + create_self_managed_snapshot snap_profile_all volumes + + create_self_managed_snapshot snap_profile_pool images + expect 1 create_self_managed_snapshot snap_profile_pool volumes + + # Ensure users cannot delete self-managed snapshots w/o permissions + expect 1 remove_self_managed_snapshot snap_none images + expect 1 remove_self_managed_snapshot snap_none volumes + + remove_self_managed_snapshot snap_all images + remove_self_managed_snapshot snap_all volumes + + remove_self_managed_snapshot snap_pool images + expect 1 remove_self_managed_snapshot snap_pool volumes + + remove_self_managed_snapshot snap_profile_all images + remove_self_managed_snapshot snap_profile_all volumes + + remove_self_managed_snapshot snap_profile_pool images + expect 1 remove_self_managed_snapshot snap_profile_pool volumes +} + +test_rbd_support() { + # read-only commands should work on both pools + ceph -k $KEYRING --id volumes rbd perf image stats volumes + ceph -k $KEYRING --id volumes rbd perf image stats images + + # read/write commands should only work on 'volumes' + rbd -k $KEYRING --id volumes create --image-format 2 --image-feature $IMAGE_FEATURES -s 1 volumes/foo + ceph -k $KEYRING --id volumes rbd task add remove volumes/foo + expect 13 ceph -k $KEYRING --id volumes rbd task add remove images/foo +} + +cleanup() { + rm -f $KEYRING +} + +KEYRING=$(mktemp) +trap cleanup EXIT ERR HUP INT QUIT + +delete_users +create_users + +recreate_pools +test_images_access + +recreate_pools +test_volumes_access + +test_remove_self_managed_snapshots + +test_rbd_support + +delete_pools +delete_users + +echo OK +exit 0 diff --git a/qa/workunits/rbd/qemu-iotests.sh b/qa/workunits/rbd/qemu-iotests.sh new file mode 100755 index 000000000..a2e9e0600 --- /dev/null +++ b/qa/workunits/rbd/qemu-iotests.sh @@ -0,0 +1,47 @@ +#!/bin/sh -ex + +# Run qemu-iotests against rbd. These are block-level tests that go +# through qemu but do not involve running a full vm. Note that these +# require the admin ceph user, as there's no way to pass the ceph user +# to qemu-iotests currently. + +testlist='001 002 003 004 005 008 009 010 011 021 025 032 033' + +git clone https://github.com/qemu/qemu.git +cd qemu + + +if grep -iqE '(bionic|focal|jammy|platform:el9)' /etc/os-release; then + git checkout v2.11.0 +elif grep -iqE '(xenial|platform:el8)' /etc/os-release; then + git checkout v2.3.0 +else + # use v2.2.0-rc3 (last released version that handles all the tests + git checkout 2528043f1f299e0e88cb026f1ca7c40bbb4e1f80 +fi + +cd tests/qemu-iotests +# qemu-iotests expects a binary called just 'qemu' to be available +if [ -x '/usr/bin/qemu-system-x86_64' ] +then + QEMU='/usr/bin/qemu-system-x86_64' +else + QEMU='/usr/libexec/qemu-kvm' +fi + +# Bionic (v2.11.0) tests expect all tools in current directory +ln -s $QEMU qemu +ln -s /usr/bin/qemu-img +ln -s /usr/bin/qemu-io +ln -s /usr/bin/qemu-nbd + +# this is normally generated by configure, but has nothing but a python +# binary definition, which we don't care about. for some reason it is +# not present on trusty. +touch common.env + +# TEST_DIR is the pool for rbd +TEST_DIR=rbd ./check -rbd $testlist + +cd ../../.. +rm -rf qemu diff --git a/qa/workunits/rbd/qemu_dynamic_features.sh b/qa/workunits/rbd/qemu_dynamic_features.sh new file mode 100755 index 000000000..70e9fbb3c --- /dev/null +++ b/qa/workunits/rbd/qemu_dynamic_features.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +set -x + +if [[ -z "${IMAGE_NAME}" ]]; then + echo image name must be provided + exit 1 +fi + +is_qemu_running() { + rbd status ${IMAGE_NAME} | grep -v "Watchers: none" +} + +wait_for_qemu() { + while ! is_qemu_running ; do + echo "*** Waiting for QEMU" + sleep 30 + done +} + +wait_for_qemu +rbd feature disable ${IMAGE_NAME} journaling +rbd feature disable ${IMAGE_NAME} object-map +rbd feature disable ${IMAGE_NAME} exclusive-lock + +while is_qemu_running ; do + echo "*** Enabling all features" + rbd feature enable ${IMAGE_NAME} exclusive-lock || break + rbd feature enable ${IMAGE_NAME} journaling || break + rbd feature enable ${IMAGE_NAME} object-map || break + if is_qemu_running ; then + sleep 60 + fi + + echo "*** Disabling all features" + rbd feature disable ${IMAGE_NAME} journaling || break + rbd feature disable ${IMAGE_NAME} object-map || break + rbd feature disable ${IMAGE_NAME} exclusive-lock || break + if is_qemu_running ; then + sleep 60 + fi +done + +if is_qemu_running ; then + echo "RBD command failed on alive QEMU" + exit 1 +fi diff --git a/qa/workunits/rbd/qemu_rebuild_object_map.sh b/qa/workunits/rbd/qemu_rebuild_object_map.sh new file mode 100755 index 000000000..2647dcdcd --- /dev/null +++ b/qa/workunits/rbd/qemu_rebuild_object_map.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -ex + +if [[ -z "${IMAGE_NAME}" ]]; then + echo image name must be provided + exit 1 +fi + +is_qemu_running() { + rbd status ${IMAGE_NAME} | grep -v "Watchers: none" +} + +wait_for_qemu() { + while ! is_qemu_running ; do + echo "*** Waiting for QEMU" + sleep 30 + done +} + +wait_for_qemu +rbd feature disable ${IMAGE_NAME} journaling || true +rbd feature disable ${IMAGE_NAME} fast-diff || true +rbd feature disable ${IMAGE_NAME} object-map || true +rbd feature disable ${IMAGE_NAME} exclusive-lock || true + +rbd feature enable ${IMAGE_NAME} exclusive-lock +rbd feature enable ${IMAGE_NAME} object-map + +while is_qemu_running ; do + echo "*** Rebuilding object map" + rbd object-map rebuild ${IMAGE_NAME} + + if is_qemu_running ; then + sleep 60 + fi +done + diff --git a/qa/workunits/rbd/qos.sh b/qa/workunits/rbd/qos.sh new file mode 100755 index 000000000..feb1d5144 --- /dev/null +++ b/qa/workunits/rbd/qos.sh @@ -0,0 +1,90 @@ +#!/bin/sh -ex + +POOL=rbd +IMAGE=test$$ +IMAGE_SIZE=1G +TOLERANCE_PRCNT=10 + +rbd_bench() { + local image=$1 + local type=$2 + local total=$3 + local qos_type=$4 + local qos_limit=$5 + local iops_var_name=$6 + local bps_var_name=$7 + local timeout=$8 + local timeout_cmd="" + + if [ -n "${timeout}" ]; then + timeout_cmd="timeout --preserve-status ${timeout}" + fi + + # parse `rbd bench` output for string like this: + # elapsed: 25 ops: 2560 ops/sec: 100.08 bytes/sec: 409.13 MiB + iops_bps=$(${timeout_cmd} rbd bench "${image}" \ + --io-type ${type} --io-size 4K \ + --io-total ${total} --rbd-cache=false \ + --rbd_qos_${qos_type}_limit ${qos_limit} | + awk '/elapsed:.* GiB/ {print int($6) ":" int($8) * 1024 * 1024 * 1024} + /elapsed:.* MiB/ {print int($6) ":" int($8) * 1024 * 1024} + /elapsed:.* KiB/ {print int($6) ":" int($8) * 1024} + /elapsed:.* B/ {print int($6) ":" int($8)}') + eval ${iops_var_name}=${iops_bps%:*} + eval ${bps_var_name}=${iops_bps#*:} +} + +rbd create "${POOL}/${IMAGE}" -s ${IMAGE_SIZE} +rbd bench "${POOL}/${IMAGE}" --io-type write --io-size 4M --io-total ${IMAGE_SIZE} + +rbd_bench "${POOL}/${IMAGE}" write ${IMAGE_SIZE} iops 0 iops bps 60 +iops_unlimited=$iops +bps_unlimited=$bps + +test "${iops_unlimited}" -ge 20 || exit 0 + +io_total=$((bps_unlimited * 30)) + +rbd_bench "${POOL}/${IMAGE}" write ${io_total} iops $((iops_unlimited / 2)) iops bps +test "${iops}" -le $((iops_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100)) + +rbd_bench "${POOL}/${IMAGE}" write ${io_total} write_iops $((iops_unlimited / 2)) iops bps +test "${iops}" -le $((iops_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100)) + +rbd_bench "${POOL}/${IMAGE}" write ${io_total} bps $((bps_unlimited / 2)) iops bps +test "${bps}" -le $((bps_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100)) + +rbd_bench "${POOL}/${IMAGE}" write ${io_total} write_bps $((bps_unlimited / 2)) iops bps +test "${bps}" -le $((bps_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100)) + +rbd_bench "${POOL}/${IMAGE}" read ${io_total} iops 0 iops bps +iops_unlimited=$iops +bps_unlimited=$bps + +test "${iops_unlimited}" -ge 20 || exit 0 + +io_total=$((bps_unlimited * 30)) + +rbd_bench "${POOL}/${IMAGE}" read ${io_total} iops $((iops_unlimited / 2)) iops bps +test "${iops}" -le $((iops_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100)) + +rbd_bench "${POOL}/${IMAGE}" read ${io_total} read_iops $((iops_unlimited / 2)) iops bps +test "${iops}" -le $((iops_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100)) + +rbd_bench "${POOL}/${IMAGE}" read ${io_total} bps $((bps_unlimited / 2)) iops bps +test "${bps}" -le $((bps_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100)) + +rbd_bench "${POOL}/${IMAGE}" read ${io_total} read_bps $((bps_unlimited / 2)) iops bps +test "${bps}" -le $((bps_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100)) + +# test a config override is applied +rbd config image set "${POOL}/${IMAGE}" rbd_qos_iops_limit $((iops_unlimited / 4)) +rbd_bench "${POOL}/${IMAGE}" read ${io_total} iops $((iops_unlimited / 2)) iops bps +test "${iops}" -le $((iops_unlimited / 4 * (100 + TOLERANCE_PRCNT) / 100)) +rbd config image remove "${POOL}/${IMAGE}" rbd_qos_iops_limit +rbd_bench "${POOL}/${IMAGE}" read ${io_total} iops $((iops_unlimited / 2)) iops bps +test "${iops}" -le $((iops_unlimited / 2 * (100 + TOLERANCE_PRCNT) / 100)) + +rbd rm "${POOL}/${IMAGE}" + +echo OK diff --git a/qa/workunits/rbd/rbd-ggate.sh b/qa/workunits/rbd/rbd-ggate.sh new file mode 100755 index 000000000..1bf89da38 --- /dev/null +++ b/qa/workunits/rbd/rbd-ggate.sh @@ -0,0 +1,239 @@ +#!/bin/sh -ex + +POOL=testrbdggate$$ +NS=ns +IMAGE=test +SIZE=64 +DATA= +DEV= + +if which xmlstarlet > /dev/null 2>&1; then + XMLSTARLET=xmlstarlet +elif which xml > /dev/null 2>&1; then + XMLSTARLET=xml +else + echo "Missing xmlstarlet binary!" + exit 1 +fi + +if [ `uname -K` -ge 1200078 ] ; then + RBD_GGATE_RESIZE_SUPPORTED=1 +fi + +_sudo() +{ + local cmd + + if [ `id -u` -eq 0 ] + then + "$@" + return $? + fi + + # Look for the command in the user path. If it fails run it as is, + # supposing it is in sudo path. + cmd=`which $1 2>/dev/null` || cmd=$1 + shift + sudo -nE "${cmd}" "$@" +} + +check_geom_gate() +{ + # See if geom_date is load, or can be loaded. + # Otherwise the tests can not run + if ! kldstat -q -n geom_gate ; then + # See if we can load it + if ! _sudo kldload geom_gate ; then + echo Not able to load geom_gate + echo check /var/log/messages as to why + exit 1 + fi + fi +} + +setup() +{ + local ns x + + if [ -e CMakeCache.txt ]; then + # running under cmake build dir + + CEPH_SRC=$(readlink -f $(dirname $0)/../../../src) + CEPH_ROOT=${PWD} + CEPH_BIN=${CEPH_ROOT}/bin + + export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH} + export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind:${CEPH_ROOT}/lib/cython_modules/lib.3 + PATH=${CEPH_BIN}:${PATH} + fi + + _sudo echo test sudo + check_geom_gate + + trap cleanup INT TERM EXIT + TEMPDIR=`mktemp -d` + DATA=${TEMPDIR}/data + dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE} + ceph osd pool create ${POOL} 32 + + rbd namespace create ${POOL}/${NS} + for ns in '' ${NS}; do + rbd --dest-pool ${POOL} --dest-namespace "${ns}" --no-progress import \ + ${DATA} ${IMAGE} + done +} + +cleanup() +{ + local ns s + + set +e + rm -Rf ${TEMPDIR} + if [ -n "${DEV}" ] + then + _sudo rbd-ggate unmap ${DEV} + fi + + ceph osd pool delete ${POOL} ${POOL} --yes-i-really-really-mean-it +} + +expect_false() +{ + if "$@"; then return 1; else return 0; fi +} + +# +# main +# + +setup + +echo exit status test +expect_false rbd-ggate +expect_false rbd-ggate INVALIDCMD +if [ `id -u` -ne 0 ] +then + expect_false rbd-ggate map ${IMAGE} +fi +expect_false _sudo rbd-ggate map INVALIDIMAGE + +echo map test using the first unused device +DEV=`_sudo rbd-ggate map ${POOL}/${IMAGE}` +rbd-ggate list | grep " ${DEV} *$" + +echo map test specifying the device +expect_false _sudo rbd-ggate --device ${DEV} map ${POOL}/${IMAGE} +dev1=${DEV} +_sudo rbd-ggate unmap ${DEV} +rbd-ggate list | expect_false grep " ${DEV} *$" +DEV= +# XXX: race possible when the device is reused by other process +DEV=`_sudo rbd-ggate --device ${dev1} map ${POOL}/${IMAGE}` +[ "${DEV}" = "${dev1}" ] +rbd-ggate list | grep " ${DEV} *$" + +echo list format test +expect_false _sudo rbd-ggate --format INVALID list +rbd-ggate --format json --pretty-format list +rbd-ggate --format xml list + +echo read test +[ "`dd if=${DATA} bs=1M | md5`" = "`_sudo dd if=${DEV} bs=1M | md5`" ] + +echo write test +dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE} +_sudo dd if=${DATA} of=${DEV} bs=1M +_sudo sync +[ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ] + +echo trim test +provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` +used=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` +[ "${used}" -eq "${provisioned}" ] +_sudo newfs -E ${DEV} +_sudo sync +provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` +used=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` +[ "${used}" -lt "${provisioned}" ] + +echo resize test +devname=$(basename ${DEV}) +size=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}') +test -n "${size}" +rbd resize ${POOL}/${IMAGE} --size $((SIZE * 2))M +rbd info ${POOL}/${IMAGE} +if [ -z "$RBD_GGATE_RESIZE_SUPPORTED" ]; then + # when resizing is not supported: + # resizing the underlying image for a GEOM ggate will stop the + # ggate process servicing the device. So we can resize and test + # the disappearance of the device + rbd-ggate list | expect_false grep " ${DEV} *$" +else + rbd-ggate list | grep " ${DEV} *$" + size2=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}') + test -n "${size2}" + test ${size2} -eq $((size * 2)) + dd if=/dev/urandom of=${DATA} bs=1M count=$((SIZE * 2)) + _sudo dd if=${DATA} of=${DEV} bs=1M + _sudo sync + [ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ] + rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M + rbd info ${POOL}/${IMAGE} + size2=$(geom gate list ${devname} | awk '$1 ~ /Mediasize:/ {print $2}') + test -n "${size2}" + test ${size2} -eq ${size} + truncate -s ${SIZE}M ${DATA} + [ "`dd if=${DATA} bs=1M | md5`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5`" ] + _sudo rbd-ggate unmap ${DEV} +fi +DEV= + +echo read-only option test +DEV=`_sudo rbd-ggate map --read-only ${POOL}/${IMAGE}` +devname=$(basename ${DEV}) +rbd-ggate list | grep " ${DEV} *$" +access=$(geom gate list ${devname} | awk '$1 == "access:" {print $2}') +test "${access}" = "read-only" +_sudo dd if=${DEV} of=/dev/null bs=1M +expect_false _sudo dd if=${DATA} of=${DEV} bs=1M +_sudo rbd-ggate unmap ${DEV} + +echo exclusive option test +DEV=`_sudo rbd-ggate map --exclusive ${POOL}/${IMAGE}` +rbd-ggate list | grep " ${DEV} *$" +_sudo dd if=${DATA} of=${DEV} bs=1M +_sudo sync +expect_false timeout 10 \ + rbd -p ${POOL} bench ${IMAGE} --io-type=write --io-size=1024 --io-total=1024 +_sudo rbd-ggate unmap ${DEV} +DEV= +rbd bench -p ${POOL} ${IMAGE} --io-type=write --io-size=1024 --io-total=1024 + +echo unmap by image name test +DEV=`_sudo rbd-ggate map ${POOL}/${IMAGE}` +rbd-ggate list | grep " ${DEV} *$" +_sudo rbd-ggate unmap "${POOL}/${IMAGE}" +rbd-ggate list | expect_false grep " ${DEV} *$" +DEV= + +echo map/unmap snap test +rbd snap create ${POOL}/${IMAGE}@snap +DEV=`_sudo rbd-ggate map ${POOL}/${IMAGE}@snap` +rbd-ggate list | grep " ${DEV} *$" +_sudo rbd-ggate unmap "${POOL}/${IMAGE}@snap" +rbd-ggate list | expect_false grep " ${DEV} *$" +DEV= + +echo map/unmap namespace test +rbd snap create ${POOL}/${NS}/${IMAGE}@snap +DEV=`_sudo rbd-ggate map ${POOL}/${NS}/${IMAGE}@snap` +rbd-ggate list | grep " ${DEV} *$" +_sudo rbd-ggate unmap "${POOL}/${NS}/${IMAGE}@snap" +rbd-ggate list | expect_false grep "${DEV} $" +DEV= + +echo OK diff --git a/qa/workunits/rbd/rbd-nbd.sh b/qa/workunits/rbd/rbd-nbd.sh new file mode 100755 index 000000000..bc89e9be5 --- /dev/null +++ b/qa/workunits/rbd/rbd-nbd.sh @@ -0,0 +1,500 @@ +#!/usr/bin/env bash +set -ex + +. $(dirname $0)/../../standalone/ceph-helpers.sh + +POOL=rbd +ANOTHER_POOL=new_default_pool$$ +NS=ns +IMAGE=testrbdnbd$$ +SIZE=64 +DATA= +DEV= + +_sudo() +{ + local cmd + + if [ `id -u` -eq 0 ] + then + "$@" + return $? + fi + + # Look for the command in the user path. If it fails run it as is, + # supposing it is in sudo path. + cmd=`which $1 2>/dev/null` || cmd=$1 + shift + sudo -nE "${cmd}" "$@" +} + +setup() +{ + local ns x + + if [ -e CMakeCache.txt ]; then + # running under cmake build dir + + CEPH_SRC=$(readlink -f $(dirname $0)/../../../src) + CEPH_ROOT=${PWD} + CEPH_BIN=${CEPH_ROOT}/bin + + export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH} + export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind:${CEPH_ROOT}/lib/cython_modules/lib.3 + PATH=${CEPH_BIN}:${PATH} + fi + + _sudo echo test sudo + + trap cleanup INT TERM EXIT + TEMPDIR=`mktemp -d` + DATA=${TEMPDIR}/data + dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE} + + rbd namespace create ${POOL}/${NS} + + for ns in '' ${NS}; do + rbd --dest-pool ${POOL} --dest-namespace "${ns}" --no-progress import \ + ${DATA} ${IMAGE} + done + + # create another pool + ceph osd pool create ${ANOTHER_POOL} 8 + rbd pool init ${ANOTHER_POOL} +} + +function cleanup() +{ + local ns s + + set +e + + mount | fgrep ${TEMPDIR}/mnt && _sudo umount -f ${TEMPDIR}/mnt + + rm -Rf ${TEMPDIR} + if [ -n "${DEV}" ] + then + _sudo rbd device --device-type nbd unmap ${DEV} + fi + + for ns in '' ${NS}; do + if rbd -p ${POOL} --namespace "${ns}" status ${IMAGE} 2>/dev/null; then + for s in 0.5 1 2 4 8 16 32; do + sleep $s + rbd -p ${POOL} --namespace "${ns}" status ${IMAGE} | + grep 'Watchers: none' && break + done + rbd -p ${POOL} --namespace "${ns}" snap purge ${IMAGE} + rbd -p ${POOL} --namespace "${ns}" remove ${IMAGE} + fi + done + rbd namespace remove ${POOL}/${NS} + + # cleanup/reset default pool + rbd config global rm global rbd_default_pool + ceph osd pool delete ${ANOTHER_POOL} ${ANOTHER_POOL} --yes-i-really-really-mean-it +} + +function expect_false() +{ + if "$@"; then return 1; else return 0; fi +} + +function get_pid() +{ + local pool=$1 + local ns=$2 + + PID=$(rbd device --device-type nbd --format xml list | $XMLSTARLET sel -t -v \ + "//devices/device[pool='${pool}'][namespace='${ns}'][image='${IMAGE}'][device='${DEV}']/id") + test -n "${PID}" || return 1 + ps -p ${PID} -C rbd-nbd +} + +unmap_device() +{ + local args=$1 + local pid=$2 + + _sudo rbd device --device-type nbd unmap ${args} + rbd device --device-type nbd list | expect_false grep "^${pid}\\b" || return 1 + ps -C rbd-nbd | expect_false grep "^ *${pid}\\b" || return 1 + + # workaround possible race between unmap and following map + sleep 0.5 +} + +# +# main +# + +setup + +# exit status test +expect_false rbd-nbd +expect_false rbd-nbd INVALIDCMD +if [ `id -u` -ne 0 ] +then + expect_false rbd device --device-type nbd map ${IMAGE} +fi +expect_false _sudo rbd device --device-type nbd map INVALIDIMAGE +expect_false _sudo rbd-nbd --device INVALIDDEV map ${IMAGE} + +# list format test +expect_false rbd device --device-type nbd --format INVALID list +rbd device --device-type nbd --format json --pretty-format list +rbd device --device-type nbd --format xml list + +# map test using the first unused device +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` +get_pid ${POOL} +# map test specifying the device +expect_false _sudo rbd-nbd --device ${DEV} map ${POOL}/${IMAGE} +dev1=${DEV} +unmap_device ${DEV} ${PID} +DEV= +# XXX: race possible when the device is reused by other process +DEV=`_sudo rbd-nbd --device ${dev1} map ${POOL}/${IMAGE}` +[ "${DEV}" = "${dev1}" ] +rbd device --device-type nbd list | grep "${IMAGE}" +get_pid ${POOL} + +# read test +[ "`dd if=${DATA} bs=1M | md5sum`" = "`_sudo dd if=${DEV} bs=1M | md5sum`" ] + +# write test +dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE} +_sudo dd if=${DATA} of=${DEV} bs=1M oflag=direct +[ "`dd if=${DATA} bs=1M | md5sum`" = "`rbd -p ${POOL} --no-progress export ${IMAGE} - | md5sum`" ] +unmap_device ${DEV} ${PID} + +# notrim test +DEV=`_sudo rbd device --device-type nbd --options notrim map ${POOL}/${IMAGE}` +get_pid ${POOL} +provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` +used=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` +[ "${used}" -eq "${provisioned}" ] +# should fail discard as at time of mapping notrim was used +expect_false _sudo blkdiscard ${DEV} +sync +provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` +used=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` +[ "${used}" -eq "${provisioned}" ] +unmap_device ${DEV} ${PID} + +# trim test +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` +get_pid ${POOL} +provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` +used=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` +[ "${used}" -eq "${provisioned}" ] +# should honor discard as at time of mapping trim was considered by default +_sudo blkdiscard ${DEV} +sync +provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/provisioned_size" -v .` +used=`rbd -p ${POOL} --format xml du ${IMAGE} | + $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` +[ "${used}" -lt "${provisioned}" ] + +# resize test +devname=$(basename ${DEV}) +blocks=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions) +test -n "${blocks}" +rbd resize ${POOL}/${IMAGE} --size $((SIZE * 2))M +rbd info ${POOL}/${IMAGE} +blocks2=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions) +test -n "${blocks2}" +test ${blocks2} -eq $((blocks * 2)) +rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M +blocks2=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions) +test -n "${blocks2}" +test ${blocks2} -eq ${blocks} + +# read-only option test +unmap_device ${DEV} ${PID} +DEV=`_sudo rbd --device-type nbd map --read-only ${POOL}/${IMAGE}` +PID=$(rbd device --device-type nbd list | awk -v pool=${POOL} -v img=${IMAGE} -v dev=${DEV} \ + '$2 == pool && $3 == img && $5 == dev {print $1}') +test -n "${PID}" +ps -p ${PID} -C rbd-nbd + +_sudo dd if=${DEV} of=/dev/null bs=1M +expect_false _sudo dd if=${DATA} of=${DEV} bs=1M oflag=direct +unmap_device ${DEV} ${PID} + +# exclusive option test +DEV=`_sudo rbd --device-type nbd map --exclusive ${POOL}/${IMAGE}` +get_pid ${POOL} + +_sudo dd if=${DATA} of=${DEV} bs=1M oflag=direct +expect_false timeout 10 \ + rbd bench ${IMAGE} --io-type write --io-size=1024 --io-total=1024 +unmap_device ${DEV} ${PID} +DEV= +rbd bench ${IMAGE} --io-type write --io-size=1024 --io-total=1024 + +# unmap by image name test +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` +get_pid ${POOL} +unmap_device ${IMAGE} ${PID} +DEV= + +# map/unmap snap test +rbd snap create ${POOL}/${IMAGE}@snap +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}@snap` +get_pid ${POOL} +unmap_device "${IMAGE}@snap" ${PID} +DEV= + +# map/unmap snap test with --snap-id +SNAPID=`rbd snap ls ${POOL}/${IMAGE} | awk '$2 == "snap" {print $1}'` +DEV=`_sudo rbd device --device-type nbd map --snap-id ${SNAPID} ${POOL}/${IMAGE}` +get_pid ${POOL} +unmap_device "--snap-id ${SNAPID} ${IMAGE}" ${PID} +DEV= + +# map/unmap namespace test +rbd snap create ${POOL}/${NS}/${IMAGE}@snap +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${NS}/${IMAGE}@snap` +get_pid ${POOL} ${NS} +unmap_device "${POOL}/${NS}/${IMAGE}@snap" ${PID} +DEV= + +# map/unmap namespace test with --snap-id +SNAPID=`rbd snap ls ${POOL}/${NS}/${IMAGE} | awk '$2 == "snap" {print $1}'` +DEV=`_sudo rbd device --device-type nbd map --snap-id ${SNAPID} ${POOL}/${NS}/${IMAGE}` +get_pid ${POOL} ${NS} +unmap_device "--snap-id ${SNAPID} ${POOL}/${NS}/${IMAGE}" ${PID} +DEV= + +# map/unmap namespace using options test +DEV=`_sudo rbd device --device-type nbd map --pool ${POOL} --namespace ${NS} --image ${IMAGE}` +get_pid ${POOL} ${NS} +unmap_device "--pool ${POOL} --namespace ${NS} --image ${IMAGE}" ${PID} +DEV=`_sudo rbd device --device-type nbd map --pool ${POOL} --namespace ${NS} --image ${IMAGE} --snap snap` +get_pid ${POOL} ${NS} +unmap_device "--pool ${POOL} --namespace ${NS} --image ${IMAGE} --snap snap" ${PID} +DEV= + +# unmap by image name test 2 +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` +get_pid ${POOL} +pid=$PID +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${NS}/${IMAGE}` +get_pid ${POOL} ${NS} +unmap_device ${POOL}/${NS}/${IMAGE} ${PID} +DEV= +unmap_device ${POOL}/${IMAGE} ${pid} + +# map/unmap test with just image name and expect image to come from default pool +if [ "${POOL}" = "rbd" ];then + DEV=`_sudo rbd device --device-type nbd map ${IMAGE}` + get_pid ${POOL} + unmap_device ${IMAGE} ${PID} + DEV= +fi + +# map/unmap test with just image name after changing default pool +rbd config global set global rbd_default_pool ${ANOTHER_POOL} +rbd create --size 10M ${IMAGE} +DEV=`_sudo rbd device --device-type nbd map ${IMAGE}` +get_pid ${ANOTHER_POOL} +unmap_device ${IMAGE} ${PID} +DEV= + +# reset +rbd config global rm global rbd_default_pool + +# auto unmap test +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` +get_pid ${POOL} +_sudo kill ${PID} +for i in `seq 10`; do + rbd device --device-type nbd list | expect_false grep "^${PID} *${POOL} *${IMAGE}" && break + sleep 1 +done +rbd device --device-type nbd list | expect_false grep "^${PID} *${POOL} *${IMAGE}" + +# quiesce test +QUIESCE_HOOK=${TEMPDIR}/quiesce.sh +DEV=`_sudo rbd device --device-type nbd map --quiesce --quiesce-hook ${QUIESCE_HOOK} ${POOL}/${IMAGE}` +get_pid ${POOL} + +# test it fails if the hook does not exists +test ! -e ${QUIESCE_HOOK} +expect_false rbd snap create ${POOL}/${IMAGE}@quiesce1 +_sudo dd if=${DATA} of=${DEV} bs=1M count=1 oflag=direct + +# test the hook is executed +touch ${QUIESCE_HOOK} +chmod +x ${QUIESCE_HOOK} +cat > ${QUIESCE_HOOK} <<EOF +#/bin/sh +echo "test the hook is executed" >&2 +echo \$1 > ${TEMPDIR}/\$2 +EOF +rbd snap create ${POOL}/${IMAGE}@quiesce1 +_sudo dd if=${DATA} of=${DEV} bs=1M count=1 oflag=direct +test "$(cat ${TEMPDIR}/quiesce)" = ${DEV} +test "$(cat ${TEMPDIR}/unquiesce)" = ${DEV} + +# test snap create fails if the hook fails +touch ${QUIESCE_HOOK} +chmod +x ${QUIESCE_HOOK} +cat > ${QUIESCE_HOOK} <<EOF +#/bin/sh +echo "test snap create fails if the hook fails" >&2 +exit 22 +EOF +expect_false rbd snap create ${POOL}/${IMAGE}@quiesce2 +_sudo dd if=${DATA} of=${DEV} bs=1M count=1 oflag=direct + +# test the hook is slow +cat > ${QUIESCE_HOOK} <<EOF +#/bin/sh +echo "test the hook is slow" >&2 +sleep 7 +EOF +rbd snap create ${POOL}/${IMAGE}@quiesce2 +_sudo dd if=${DATA} of=${DEV} bs=1M count=1 oflag=direct + +# test rbd-nbd_quiesce hook that comes with distribution +unmap_device ${DEV} ${PID} +LOG_FILE=${TEMPDIR}/rbd-nbd.log +if [ -n "${CEPH_SRC}" ]; then + QUIESCE_HOOK=${CEPH_SRC}/tools/rbd_nbd/rbd-nbd_quiesce + DEV=`_sudo rbd device --device-type nbd map --quiesce --quiesce-hook ${QUIESCE_HOOK} \ + ${POOL}/${IMAGE} --log-file=${LOG_FILE}` +else + DEV=`_sudo rbd device --device-type nbd map --quiesce ${POOL}/${IMAGE} --log-file=${LOG_FILE}` +fi +get_pid ${POOL} +_sudo mkfs ${DEV} +mkdir ${TEMPDIR}/mnt +_sudo mount ${DEV} ${TEMPDIR}/mnt +rbd snap create ${POOL}/${IMAGE}@quiesce3 +_sudo dd if=${DATA} of=${TEMPDIR}/mnt/test bs=1M count=1 oflag=direct +_sudo umount ${TEMPDIR}/mnt +unmap_device ${DEV} ${PID} +DEV= +cat ${LOG_FILE} +expect_false grep 'quiesce failed' ${LOG_FILE} + +# test detach/attach +OUT=`_sudo rbd device --device-type nbd --options try-netlink,show-cookie map ${POOL}/${IMAGE}` +read DEV COOKIE <<< "${OUT}" +get_pid ${POOL} +_sudo mount ${DEV} ${TEMPDIR}/mnt +_sudo rbd device detach ${POOL}/${IMAGE} --device-type nbd +expect_false get_pid ${POOL} +expect_false _sudo rbd device attach --device ${DEV} ${POOL}/${IMAGE} --device-type nbd +if [ -n "${COOKIE}" ]; then + _sudo rbd device attach --device ${DEV} --cookie ${COOKIE} ${POOL}/${IMAGE} --device-type nbd +else + _sudo rbd device attach --device ${DEV} ${POOL}/${IMAGE} --device-type nbd --force +fi +get_pid ${POOL} +_sudo rbd device detach ${DEV} --device-type nbd +expect_false get_pid ${POOL} +if [ -n "${COOKIE}" ]; then + _sudo rbd device attach --device ${DEV} --cookie ${COOKIE} ${POOL}/${IMAGE} --device-type nbd +else + _sudo rbd device attach --device ${DEV} ${POOL}/${IMAGE} --device-type nbd --force +fi +get_pid ${POOL} +ls ${TEMPDIR}/mnt/ +dd if=${TEMPDIR}/mnt/test of=/dev/null bs=1M count=1 +_sudo dd if=${DATA} of=${TEMPDIR}/mnt/test1 bs=1M count=1 oflag=direct +_sudo umount ${TEMPDIR}/mnt +unmap_device ${DEV} ${PID} +# if kernel supports cookies +if [ -n "${COOKIE}" ]; then + OUT=`_sudo rbd device --device-type nbd --show-cookie --cookie "abc de" --options try-netlink map ${POOL}/${IMAGE}` + read DEV ANOTHER_COOKIE <<< "${OUT}" + get_pid ${POOL} + test "${ANOTHER_COOKIE}" = "abc de" + unmap_device ${DEV} ${PID} +fi +DEV= + +# test detach/attach with --snap-id +SNAPID=`rbd snap ls ${POOL}/${IMAGE} | awk '$2 == "snap" {print $1}'` +OUT=`_sudo rbd device --device-type nbd --options try-netlink,show-cookie map --snap-id ${SNAPID} ${POOL}/${IMAGE}` +read DEV COOKIE <<< "${OUT}" +get_pid ${POOL} +_sudo rbd device detach ${POOL}/${IMAGE} --snap-id ${SNAPID} --device-type nbd +expect_false get_pid ${POOL} +expect_false _sudo rbd device attach --device ${DEV} --snap-id ${SNAPID} ${POOL}/${IMAGE} --device-type nbd +if [ -n "${COOKIE}" ]; then + _sudo rbd device attach --device ${DEV} --cookie ${COOKIE} --snap-id ${SNAPID} ${POOL}/${IMAGE} --device-type nbd +else + _sudo rbd device attach --device ${DEV} --snap-id ${SNAPID} ${POOL}/${IMAGE} --device-type nbd --force +fi +get_pid ${POOL} +_sudo rbd device detach ${DEV} --device-type nbd +expect_false get_pid ${POOL} +DEV= + +# test discard granularity with journaling +rbd config image set ${POOL}/${IMAGE} rbd_discard_granularity_bytes 4096 +rbd feature enable ${POOL}/${IMAGE} journaling +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` +get_pid ${POOL} +# since a discard will now be pruned to only whole blocks (0..4095, 4096..8191) +# let us test all the cases around those alignments. 512 is the smallest +# possible block blkdiscard allows us to use. Thus the test checks +# 512 before, on the alignment, 512 after. +_sudo blkdiscard --offset 0 --length $((4096-512)) ${DEV} +_sudo blkdiscard --offset 0 --length 4096 ${DEV} +_sudo blkdiscard --offset 0 --length $((4096+512)) ${DEV} +_sudo blkdiscard --offset 512 --length $((8192-1024)) ${DEV} +_sudo blkdiscard --offset 512 --length $((8192-512)) ${DEV} +_sudo blkdiscard --offset 512 --length 8192 ${DEV} +# wait for commit log to be empty, 10 seconds should be well enough +tries=0 +queue_length=`rbd journal inspect --pool ${POOL} --image ${IMAGE} | awk '/entries inspected/ {print $1}'` +while [ ${tries} -lt 10 ] && [ ${queue_length} -gt 0 ]; do + rbd journal inspect --pool ${POOL} --image ${IMAGE} --verbose + sleep 1 + queue_length=`rbd journal inspect --pool ${POOL} --image ${IMAGE} | awk '/entries inspected/ {print $1}'` + tries=$((tries+1)) +done +[ ${queue_length} -eq 0 ] +unmap_device ${DEV} ${PID} +DEV= +rbd feature disable ${POOL}/${IMAGE} journaling +rbd config image rm ${POOL}/${IMAGE} rbd_discard_granularity_bytes + +# test that disabling a feature so that the op is proxied to rbd-nbd +# (arranged here by blkdiscard before "rbd feature disable") doesn't hang +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` +get_pid ${POOL} +rbd feature enable ${POOL}/${IMAGE} journaling +_sudo blkdiscard --offset 0 --length 4096 ${DEV} +rbd feature disable ${POOL}/${IMAGE} journaling +unmap_device ${DEV} ${PID} +DEV= + +# test that rbd_op_threads setting takes effect +EXPECTED=`ceph-conf --show-config-value librados_thread_count` +DEV=`_sudo rbd device --device-type nbd map ${POOL}/${IMAGE}` +get_pid ${POOL} +ACTUAL=`ps -p ${PID} -T | grep -c io_context_pool` +[ ${ACTUAL} -eq ${EXPECTED} ] +unmap_device ${DEV} ${PID} +EXPECTED=$((EXPECTED * 3 + 1)) +DEV=`_sudo rbd device --device-type nbd --rbd-op-threads ${EXPECTED} map ${POOL}/${IMAGE}` +get_pid ${POOL} +ACTUAL=`ps -p ${PID} -T | grep -c io_context_pool` +[ ${ACTUAL} -eq ${EXPECTED} ] +unmap_device ${DEV} ${PID} +DEV= + +echo OK diff --git a/qa/workunits/rbd/rbd_groups.sh b/qa/workunits/rbd/rbd_groups.sh new file mode 100755 index 000000000..a32618484 --- /dev/null +++ b/qa/workunits/rbd/rbd_groups.sh @@ -0,0 +1,258 @@ +#!/usr/bin/env bash + +set -ex + +# +# rbd_consistency_groups.sh - test consistency groups cli commands +# + +# +# Functions +# + +create_group() +{ + local group_name=$1 + + rbd group create $group_name +} + +list_groups() +{ + rbd group list +} + +check_group_exists() +{ + local group_name=$1 + list_groups | grep $group_name +} + +remove_group() +{ + local group_name=$1 + + rbd group remove $group_name +} + +rename_group() +{ + local src_name=$1 + local dest_name=$2 + + rbd group rename $src_name $dest_name +} + +check_group_does_not_exist() +{ + local group_name=$1 + for v in $(list_groups); do + if [ "$v" == "$group_name" ]; then + return 1 + fi + done + return 0 +} + +create_image() +{ + local image_name=$1 + rbd create --size 10M $image_name +} + +remove_image() +{ + local image_name=$1 + rbd remove $image_name +} + +add_image_to_group() +{ + local image_name=$1 + local group_name=$2 + rbd group image add $group_name $image_name +} + +remove_image_from_group() +{ + local image_name=$1 + local group_name=$2 + rbd group image remove $group_name $image_name +} + +check_image_in_group() +{ + local image_name=$1 + local group_name=$2 + for v in $(rbd group image list $group_name); do + local vtrimmed=${v#*/} + if [ "$vtrimmed" = "$image_name" ]; then + return 0 + fi + done + return 1 +} + +check_image_not_in_group() +{ + local image_name=$1 + local group_name=$2 + for v in $(rbd group image list $group_name); do + local vtrimmed=${v#*/} + if [ "$vtrimmed" = "$image_name" ]; then + return 1 + fi + done + return 0 +} + +create_snapshot() +{ + local group_name=$1 + local snap_name=$2 + rbd group snap create $group_name@$snap_name +} + +create_snapshots() +{ + local group_name=$1 + local snap_name=$2 + local snap_count=$3 + for i in `seq 1 $snap_count`; do + rbd group snap create $group_name@$snap_name$i + done +} + +remove_snapshot() +{ + local group_name=$1 + local snap_name=$2 + rbd group snap remove $group_name@$snap_name +} + +remove_snapshots() +{ + local group_name=$1 + local snap_name=$2 + local snap_count=$3 + for i in `seq 1 $snap_count`; do + rbd group snap remove $group_name@$snap_name$i + done +} + +rename_snapshot() +{ + local group_name=$1 + local snap_name=$2 + local new_snap_name=$3 + rbd group snap rename $group_name@$snap_name $new_snap_name +} + +list_snapshots() +{ + local group_name=$1 + rbd group snap list $group_name +} + +rollback_snapshot() +{ + local group_name=$1 + local snap_name=$2 + rbd group snap rollback $group_name@$snap_name +} + +check_snapshot_in_group() +{ + local group_name=$1 + local snap_name=$2 + list_snapshots $group_name | grep $snap_name +} + +check_snapshots_count_in_group() +{ + local group_name=$1 + local snap_name=$2 + local expected_count=$3 + local actual_count + actual_count=$(list_snapshots $group_name | grep -c $snap_name) + (( actual_count == expected_count )) +} + +check_snapshot_not_in_group() +{ + local group_name=$1 + local snap_name=$2 + for v in $(list_snapshots $group_name | awk '{print $1}'); do + if [ "$v" = "$snap_name" ]; then + return 1 + fi + done + return 0 +} + +echo "TEST: create remove consistency group" +group="test_consistency_group" +new_group="test_new_consistency_group" +create_group $group +check_group_exists $group +rename_group $group $new_group +check_group_exists $new_group +remove_group $new_group +check_group_does_not_exist $new_group +echo "PASSED" + +echo "TEST: add remove images to consistency group" +image="test_image" +group="test_consistency_group" +create_image $image +create_group $group +add_image_to_group $image $group +check_image_in_group $image $group +remove_image_from_group $image $group +check_image_not_in_group $image $group +remove_group $group +remove_image $image +echo "PASSED" + +echo "TEST: create remove snapshots of consistency group" +image="test_image" +group="test_consistency_group" +snap="group_snap" +new_snap="new_group_snap" +sec_snap="group_snap2" +create_image $image +create_group $group +add_image_to_group $image $group +create_snapshot $group $snap +check_snapshot_in_group $group $snap +rename_snapshot $group $snap $new_snap +check_snapshot_not_in_group $group $snap +create_snapshot $group $sec_snap +check_snapshot_in_group $group $sec_snap +rollback_snapshot $group $new_snap +remove_snapshot $group $new_snap +check_snapshot_not_in_group $group $new_snap +remove_snapshot $group $sec_snap +check_snapshot_not_in_group $group $sec_snap +remove_group $group +remove_image $image +echo "PASSED" + +echo "TEST: list snapshots of consistency group" +image="test_image" +group="test_consistency_group" +snap="group_snap" +create_image $image +create_group $group +add_image_to_group $image $group +create_snapshots $group $snap 10 +check_snapshots_count_in_group $group $snap 10 +remove_snapshots $group $snap 10 +create_snapshots $group $snap 100 +check_snapshots_count_in_group $group $snap 100 +remove_snapshots $group $snap 100 +remove_group $group +remove_image $image +echo "PASSED" + +echo "OK" diff --git a/qa/workunits/rbd/rbd_mirror_bootstrap.sh b/qa/workunits/rbd/rbd_mirror_bootstrap.sh new file mode 100755 index 000000000..6ef06f2b8 --- /dev/null +++ b/qa/workunits/rbd/rbd_mirror_bootstrap.sh @@ -0,0 +1,58 @@ +#!/bin/sh -ex +# +# rbd_mirror_bootstrap.sh - test peer bootstrap create/import +# + +RBD_MIRROR_MANUAL_PEERS=1 +RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-1} +. $(dirname $0)/rbd_mirror_helpers.sh + +setup + +testlog "TEST: bootstrap cluster2 from cluster1" +# create token on cluster1 and import to cluster2 +TOKEN=${TEMPDIR}/peer-token +TOKEN_2=${TEMPDIR}/peer-token-2 +CEPH_ARGS='' rbd --cluster ${CLUSTER1} mirror pool peer bootstrap create ${POOL} > ${TOKEN} +CEPH_ARGS='' rbd --cluster ${CLUSTER1} mirror pool peer bootstrap create ${PARENT_POOL} > ${TOKEN_2} +cmp ${TOKEN} ${TOKEN_2} + +CEPH_ARGS='' rbd --cluster ${CLUSTER2} --pool ${POOL} mirror pool peer bootstrap import ${TOKEN} --direction rx-only +CEPH_ARGS='' rbd --cluster ${CLUSTER2} --pool ${PARENT_POOL} mirror pool peer bootstrap import ${TOKEN} --direction rx-tx + +start_mirrors ${CLUSTER1} +start_mirrors ${CLUSTER2} + +testlog "TEST: verify rx-only direction" +# rx-only peer is added immediately by "rbd mirror pool peer bootstrap import" +rbd --cluster ${CLUSTER2} --pool ${POOL} mirror pool info --format json | jq -e '.peers[0].direction == "rx-only"' +# tx-only peer is added asynchronously by mirror_peer_ping class method +while ! rbd --cluster ${CLUSTER1} --pool ${POOL} mirror pool info --format json | jq -e '.peers | length > 0'; do + sleep 1 +done +rbd --cluster ${CLUSTER1} --pool ${POOL} mirror pool info --format json | jq -e '.peers[0].direction == "tx-only"' + +create_image_and_enable_mirror ${CLUSTER1} ${POOL} image1 + +wait_for_image_replay_started ${CLUSTER2} ${POOL} image1 +write_image ${CLUSTER1} ${POOL} image1 100 +wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} image1 + +testlog "TEST: verify rx-tx direction" +# both rx-tx peers are added immediately by "rbd mirror pool peer bootstrap import" +rbd --cluster ${CLUSTER1} --pool ${PARENT_POOL} mirror pool info --format json | jq -e '.peers[0].direction == "rx-tx"' +rbd --cluster ${CLUSTER2} --pool ${PARENT_POOL} mirror pool info --format json | jq -e '.peers[0].direction == "rx-tx"' + +create_image ${CLUSTER1} ${PARENT_POOL} image1 +create_image ${CLUSTER2} ${PARENT_POOL} image2 + +enable_mirror ${CLUSTER1} ${PARENT_POOL} image1 +enable_mirror ${CLUSTER2} ${PARENT_POOL} image2 + +wait_for_image_replay_started ${CLUSTER2} ${PARENT_POOL} image1 +write_image ${CLUSTER1} ${PARENT_POOL} image1 100 +wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${PARENT_POOL} image1 + +wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} image2 +write_image ${CLUSTER2} ${PARENT_POOL} image2 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} image2 diff --git a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh new file mode 100755 index 000000000..0ba3c97d7 --- /dev/null +++ b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh @@ -0,0 +1,38 @@ +#!/bin/sh -ex +# +# rbd_mirror_fsx_compare.sh - test rbd-mirror daemon under FSX workload +# +# The script is used to compare FSX-generated images between two clusters. +# + +. $(dirname $0)/rbd_mirror_helpers.sh + +trap 'cleanup $?' INT TERM EXIT + +setup_tempdir + +testlog "TEST: wait for all images" +image_count=$(rbd --cluster ${CLUSTER1} --pool ${POOL} ls | wc -l) +retrying_seconds=0 +sleep_seconds=10 +while [ ${retrying_seconds} -le 7200 ]; do + [ $(rbd --cluster ${CLUSTER2} --pool ${POOL} ls | wc -l) -ge ${image_count} ] && break + sleep ${sleep_seconds} + retrying_seconds=$(($retrying_seconds+${sleep_seconds})) +done + +testlog "TEST: snapshot all pool images" +snap_id=`uuidgen` +for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do + create_snapshot ${CLUSTER1} ${POOL} ${image} ${snap_id} +done + +testlog "TEST: wait for snapshots" +for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do + wait_for_snap_present ${CLUSTER2} ${POOL} ${image} ${snap_id} +done + +testlog "TEST: compare image snapshots" +for image in $(rbd --cluster ${CLUSTER1} --pool ${POOL} ls); do + compare_image_snapshots ${POOL} ${image} +done diff --git a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh new file mode 100755 index 000000000..d988987ba --- /dev/null +++ b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh @@ -0,0 +1,10 @@ +#!/bin/sh -ex +# +# rbd_mirror_fsx_prepare.sh - test rbd-mirror daemon under FSX workload +# +# The script is used to compare FSX-generated images between two clusters. +# + +. $(dirname $0)/rbd_mirror_helpers.sh + +setup diff --git a/qa/workunits/rbd/rbd_mirror_ha.sh b/qa/workunits/rbd/rbd_mirror_ha.sh new file mode 100755 index 000000000..37739a83d --- /dev/null +++ b/qa/workunits/rbd/rbd_mirror_ha.sh @@ -0,0 +1,210 @@ +#!/bin/sh -ex +# +# rbd_mirror_ha.sh - test rbd-mirror daemons in HA mode +# + +RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-7} + +. $(dirname $0)/rbd_mirror_helpers.sh + +setup + +is_leader() +{ + local instance=$1 + local pool=$2 + + test -n "${pool}" || pool=${POOL} + + admin_daemon "${CLUSTER1}:${instance}" \ + rbd mirror status ${pool} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} | + grep '"leader": true' +} + +wait_for_leader() +{ + local s instance + + for s in 1 1 2 4 4 4 4 4 8 8 8 8 16 16 32 64; do + sleep $s + for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do + is_leader ${instance} || continue + LEADER=${instance} + return 0 + done + done + + LEADER= + return 1 +} + +release_leader() +{ + local pool=$1 + local cmd="rbd mirror leader release" + + test -n "${pool}" && cmd="${cmd} ${pool} ${CLUSTER2}" + + admin_daemon "${CLUSTER1}:${LEADER}" ${cmd} +} + +wait_for_leader_released() +{ + local i + + test -n "${LEADER}" + for i in `seq 10`; do + is_leader ${LEADER} || return 0 + sleep 1 + done + + return 1 +} + +test_replay() +{ + local image + + for image; do + wait_for_image_replay_started ${CLUSTER1}:${LEADER} ${POOL} ${image} + write_image ${CLUSTER2} ${POOL} ${image} 100 + wait_for_replay_complete ${CLUSTER1}:${LEADER} ${CLUSTER2} ${POOL} \ + ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' \ + 'primary_position' \ + "${MIRROR_USER_ID_PREFIX}${LEADER} on $(hostname -s)" + if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} \ + 'down+unknown' + fi + compare_images ${POOL} ${image} + done +} + +testlog "TEST: start first daemon instance and test replay" +start_mirror ${CLUSTER1}:0 +image1=test1 +create_image ${CLUSTER2} ${POOL} ${image1} +LEADER=0 +test_replay ${image1} + +testlog "TEST: release leader and wait it is reacquired" +is_leader 0 ${POOL} +is_leader 0 ${PARENT_POOL} +release_leader ${POOL} +wait_for_leader_released +is_leader 0 ${PARENT_POOL} +wait_for_leader +release_leader +wait_for_leader_released +expect_failure "" is_leader 0 ${PARENT_POOL} +wait_for_leader + +testlog "TEST: start second daemon instance and test replay" +start_mirror ${CLUSTER1}:1 +image2=test2 +create_image ${CLUSTER2} ${POOL} ${image2} +test_replay ${image1} ${image2} + +testlog "TEST: release leader and test it is acquired by secondary" +is_leader 0 ${POOL} +is_leader 0 ${PARENT_POOL} +release_leader ${POOL} +wait_for_leader_released +wait_for_leader +test_replay ${image1} ${image2} +release_leader +wait_for_leader_released +wait_for_leader +test "${LEADER}" = 0 + +testlog "TEST: stop first daemon instance and test replay" +stop_mirror ${CLUSTER1}:0 +image3=test3 +create_image ${CLUSTER2} ${POOL} ${image3} +LEADER=1 +test_replay ${image1} ${image2} ${image3} + +testlog "TEST: start first daemon instance and test replay" +start_mirror ${CLUSTER1}:0 +image4=test4 +create_image ${CLUSTER2} ${POOL} ${image4} +test_replay ${image3} ${image4} + +testlog "TEST: crash leader and test replay" +stop_mirror ${CLUSTER1}:1 -KILL +image5=test5 +create_image ${CLUSTER2} ${POOL} ${image5} +LEADER=0 +test_replay ${image1} ${image4} ${image5} + +testlog "TEST: start crashed leader and test replay" +start_mirror ${CLUSTER1}:1 +image6=test6 +create_image ${CLUSTER2} ${POOL} ${image6} +test_replay ${image1} ${image6} + +testlog "TEST: start yet another daemon instance and test replay" +start_mirror ${CLUSTER1}:2 +image7=test7 +create_image ${CLUSTER2} ${POOL} ${image7} +test_replay ${image1} ${image7} + +testlog "TEST: release leader and test it is acquired by secondary" +is_leader 0 +release_leader +wait_for_leader_released +wait_for_leader +test_replay ${image1} ${image2} + +testlog "TEST: stop leader and test replay" +stop_mirror ${CLUSTER1}:${LEADER} +image8=test8 +create_image ${CLUSTER2} ${POOL} ${image8} +prev_leader=${LEADER} +wait_for_leader +test_replay ${image1} ${image8} + +testlog "TEST: start previous leader and test replay" +start_mirror ${CLUSTER1}:${prev_leader} +image9=test9 +create_image ${CLUSTER2} ${POOL} ${image9} +test_replay ${image1} ${image9} + +testlog "TEST: crash leader and test replay" +stop_mirror ${CLUSTER1}:${LEADER} -KILL +image10=test10 +create_image ${CLUSTER2} ${POOL} ${image10} +prev_leader=${LEADER} +wait_for_leader +test_replay ${image1} ${image10} + +testlog "TEST: start previous leader and test replay" +start_mirror ${CLUSTER1}:${prev_leader} +image11=test11 +create_image ${CLUSTER2} ${POOL} ${image11} +test_replay ${image1} ${image11} + +testlog "TEST: start some more daemon instances and test replay" +start_mirror ${CLUSTER1}:3 +start_mirror ${CLUSTER1}:4 +start_mirror ${CLUSTER1}:5 +start_mirror ${CLUSTER1}:6 +image13=test13 +create_image ${CLUSTER2} ${POOL} ${image13} +test_replay ${leader} ${image1} ${image13} + +testlog "TEST: release leader and test it is acquired by secondary" +release_leader +wait_for_leader_released +wait_for_leader +test_replay ${image1} ${image2} + +testlog "TEST: in loop: stop leader and test replay" +for i in 0 1 2 3 4 5; do + stop_mirror ${CLUSTER1}:${LEADER} + wait_for_leader + test_replay ${image1} +done + +stop_mirror ${CLUSTER1}:${LEADER} diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh new file mode 100755 index 000000000..f4961b925 --- /dev/null +++ b/qa/workunits/rbd/rbd_mirror_helpers.sh @@ -0,0 +1,1488 @@ +#!/bin/sh +# +# rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions +# +# The scripts starts two ("local" and "remote") clusters using mstart.sh script, +# creates a temporary directory, used for cluster configs, daemon logs, admin +# socket, temporary files, and launches rbd-mirror daemon. +# +# There are several env variables useful when troubleshooting a test failure: +# +# RBD_MIRROR_NOCLEANUP - if not empty, don't run the cleanup (stop processes, +# destroy the clusters and remove the temp directory) +# on exit, so it is possible to check the test state +# after failure. +# RBD_MIRROR_TEMDIR - use this path when creating the temporary directory +# (should not exist) instead of running mktemp(1). +# RBD_MIRROR_ARGS - use this to pass additional arguments to started +# rbd-mirror daemons. +# RBD_MIRROR_VARGS - use this to pass additional arguments to vstart.sh +# when starting clusters. +# RBD_MIRROR_INSTANCES - number of daemons to start per cluster +# RBD_MIRROR_CONFIG_KEY - if not empty, use config-key for remote cluster +# secrets +# The cleanup can be done as a separate step, running the script with +# `cleanup ${RBD_MIRROR_TEMDIR}' arguments. +# +# Note, as other workunits tests, rbd_mirror_journal.sh expects to find ceph binaries +# in PATH. +# +# Thus a typical troubleshooting session: +# +# From Ceph src dir (CEPH_SRC_PATH), start the test in NOCLEANUP mode and with +# TEMPDIR pointing to a known location: +# +# cd $CEPH_SRC_PATH +# PATH=$CEPH_SRC_PATH:$PATH +# RBD_MIRROR_NOCLEANUP=1 RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \ +# ../qa/workunits/rbd/rbd_mirror_journal.sh +# +# After the test failure cd to TEMPDIR and check the current state: +# +# cd /tmp/tmp.rbd_mirror +# ls +# less rbd-mirror.cluster1_daemon.$pid.log +# ceph --cluster cluster1 -s +# ceph --cluster cluster1 -s +# rbd --cluster cluster2 -p mirror ls +# rbd --cluster cluster2 -p mirror journal status --image test +# ceph --admin-daemon rbd-mirror.cluster1_daemon.cluster1.$pid.asok help +# ... +# +# Also you can execute commands (functions) from the script: +# +# cd $CEPH_SRC_PATH +# export RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror +# ../qa/workunits/rbd/rbd_mirror_journal.sh status +# ../qa/workunits/rbd/rbd_mirror_journal.sh stop_mirror cluster1 +# ../qa/workunits/rbd/rbd_mirror_journal.sh start_mirror cluster2 +# ../qa/workunits/rbd/rbd_mirror_journal.sh flush cluster2 +# ... +# +# Eventually, run the cleanup: +# +# cd $CEPH_SRC_PATH +# RBD_MIRROR_TEMDIR=/tmp/tmp.rbd_mirror \ +# ../qa/workunits/rbd/rbd_mirror_journal.sh cleanup +# + +if type xmlstarlet > /dev/null 2>&1; then + XMLSTARLET=xmlstarlet +elif type xml > /dev/null 2>&1; then + XMLSTARLET=xml +else + echo "Missing xmlstarlet binary!" + exit 1 +fi + +RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-2} + +CLUSTER1=cluster1 +CLUSTER2=cluster2 +PEER_CLUSTER_SUFFIX= +POOL=mirror +PARENT_POOL=mirror_parent +NS1=ns1 +NS2=ns2 +TEMPDIR= +CEPH_ID=${CEPH_ID:-mirror} +RBD_IMAGE_FEATURES=${RBD_IMAGE_FEATURES:-layering,exclusive-lock,journaling} +MIRROR_USER_ID_PREFIX=${MIRROR_USER_ID_PREFIX:-${CEPH_ID}.} +MIRROR_POOL_MODE=${MIRROR_POOL_MODE:-pool} +MIRROR_IMAGE_MODE=${MIRROR_IMAGE_MODE:-journal} + +export CEPH_ARGS="--id ${CEPH_ID}" + +LAST_MIRROR_INSTANCE=$((${RBD_MIRROR_INSTANCES} - 1)) + +CEPH_ROOT=$(readlink -f $(dirname $0)/../../../src) +CEPH_BIN=. +CEPH_SRC=. +if [ -e CMakeCache.txt ]; then + CEPH_SRC=${CEPH_ROOT} + CEPH_ROOT=${PWD} + CEPH_BIN=./bin + + # needed for ceph CLI under cmake + export LD_LIBRARY_PATH=${CEPH_ROOT}/lib:${LD_LIBRARY_PATH} + export PYTHONPATH=${PYTHONPATH}:${CEPH_SRC}/pybind:${CEPH_ROOT}/lib/cython_modules/lib.3 +fi + +# These vars facilitate running this script in an environment with +# ceph installed from packages, like teuthology. These are not defined +# by default. +# +# RBD_MIRROR_USE_EXISTING_CLUSTER - if set, do not start and stop ceph clusters +# RBD_MIRROR_USE_RBD_MIRROR - if set, use an existing instance of rbd-mirror +# running as ceph client $CEPH_ID. If empty, +# this script will start and stop rbd-mirror + +# +# Functions +# + +# Parse a value in format cluster[:instance] and set cluster and instance vars. +set_cluster_instance() +{ + local val=$1 + local cluster_var_name=$2 + local instance_var_name=$3 + + cluster=${val%:*} + instance=${val##*:} + + if [ "${instance}" = "${val}" ]; then + # instance was not specified, use default + instance=0 + fi + + eval ${cluster_var_name}=${cluster} + eval ${instance_var_name}=${instance} +} + +daemon_asok_file() +{ + local local_cluster=$1 + local cluster=$2 + local instance + + set_cluster_instance "${local_cluster}" local_cluster instance + + echo $(ceph-conf --cluster $local_cluster --name "client.${MIRROR_USER_ID_PREFIX}${instance}" 'admin socket') +} + +daemon_pid_file() +{ + local cluster=$1 + local instance + + set_cluster_instance "${cluster}" cluster instance + + echo $(ceph-conf --cluster $cluster --name "client.${MIRROR_USER_ID_PREFIX}${instance}" 'pid file') +} + +testlog() +{ + echo $(date '+%F %T') $@ | tee -a "${TEMPDIR}/rbd-mirror.test.log" >&2 +} + +expect_failure() +{ + local expected="$1" ; shift + local out=${TEMPDIR}/expect_failure.out + + if "$@" > ${out} 2>&1 ; then + cat ${out} >&2 + return 1 + fi + + if [ -z "${expected}" ]; then + return 0 + fi + + if ! grep -q "${expected}" ${out} ; then + cat ${out} >&2 + return 1 + fi + + return 0 +} + +mkfname() +{ + echo "$@" | sed -e 's|[/ ]|_|g' +} + +create_users() +{ + local cluster=$1 + + CEPH_ARGS='' ceph --cluster "${cluster}" \ + auth get-or-create client.${CEPH_ID} \ + mon 'profile rbd' osd 'profile rbd' mgr 'profile rbd' >> \ + ${CEPH_ROOT}/run/${cluster}/keyring + for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do + CEPH_ARGS='' ceph --cluster "${cluster}" \ + auth get-or-create client.${MIRROR_USER_ID_PREFIX}${instance} \ + mon 'profile rbd-mirror' osd 'profile rbd' mgr 'profile rbd' >> \ + ${CEPH_ROOT}/run/${cluster}/keyring + done +} + +setup_cluster() +{ + local cluster=$1 + + CEPH_ARGS='' ${CEPH_SRC}/mstart.sh ${cluster} -n ${RBD_MIRROR_VARGS} + + cd ${CEPH_ROOT} + rm -f ${TEMPDIR}/${cluster}.conf + ln -s $(readlink -f run/${cluster}/ceph.conf) \ + ${TEMPDIR}/${cluster}.conf + + cd ${TEMPDIR} + create_users "${cluster}" + + for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do + cat<<EOF >> ${TEMPDIR}/${cluster}.conf +[client.${MIRROR_USER_ID_PREFIX}${instance}] + admin socket = ${TEMPDIR}/rbd-mirror.\$cluster-\$name.asok + pid file = ${TEMPDIR}/rbd-mirror.\$cluster-\$name.pid + log file = ${TEMPDIR}/rbd-mirror.${cluster}_daemon.${instance}.log +EOF + done +} + +peer_add() +{ + local cluster=$1 ; shift + local pool=$1 ; shift + local client_cluster=$1 ; shift + local remote_cluster="${client_cluster##*@}" + + local uuid_var_name + if [ -n "$1" ]; then + uuid_var_name=$1 ; shift + fi + + local error_code + local peer_uuid + + for s in 1 2 4 8 16 32; do + set +e + peer_uuid=$(rbd --cluster ${cluster} mirror pool peer add \ + ${pool} ${client_cluster} $@) + error_code=$? + set -e + + if [ $error_code -eq 17 ]; then + # raced with a remote heartbeat ping -- remove and retry + sleep $s + peer_uuid=$(rbd mirror pool info --cluster ${cluster} --pool ${pool} --format xml | \ + xmlstarlet sel -t -v "//peers/peer[site_name='${remote_cluster}']/uuid") + + CEPH_ARGS='' rbd --cluster ${cluster} --pool ${pool} mirror pool peer remove ${peer_uuid} + else + test $error_code -eq 0 + if [ -n "$uuid_var_name" ]; then + eval ${uuid_var_name}=${peer_uuid} + fi + return 0 + fi + done + + return 1 +} + +setup_pools() +{ + local cluster=$1 + local remote_cluster=$2 + local mon_map_file + local mon_addr + local admin_key_file + local uuid + + CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${POOL} 64 64 + CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${PARENT_POOL} 64 64 + + CEPH_ARGS='' rbd --cluster ${cluster} pool init ${POOL} + CEPH_ARGS='' rbd --cluster ${cluster} pool init ${PARENT_POOL} + + if [ -n "${RBD_MIRROR_CONFIG_KEY}" ]; then + PEER_CLUSTER_SUFFIX=-DNE + fi + + CEPH_ARGS='' rbd --cluster ${cluster} mirror pool enable \ + --site-name ${cluster}${PEER_CLUSTER_SUFFIX} ${POOL} ${MIRROR_POOL_MODE} + rbd --cluster ${cluster} mirror pool enable ${PARENT_POOL} image + + rbd --cluster ${cluster} namespace create ${POOL}/${NS1} + rbd --cluster ${cluster} namespace create ${POOL}/${NS2} + + rbd --cluster ${cluster} mirror pool enable ${POOL}/${NS1} ${MIRROR_POOL_MODE} + rbd --cluster ${cluster} mirror pool enable ${POOL}/${NS2} image + + if [ -z ${RBD_MIRROR_MANUAL_PEERS} ]; then + if [ -z ${RBD_MIRROR_CONFIG_KEY} ]; then + peer_add ${cluster} ${POOL} ${remote_cluster} + peer_add ${cluster} ${PARENT_POOL} ${remote_cluster} + else + mon_map_file=${TEMPDIR}/${remote_cluster}.monmap + CEPH_ARGS='' ceph --cluster ${remote_cluster} mon getmap > ${mon_map_file} + mon_addr=$(monmaptool --print ${mon_map_file} | grep -E 'mon\.' | + head -n 1 | sed -E 's/^[0-9]+: ([^ ]+).+$/\1/' | sed -E 's/\/[0-9]+//g') + + admin_key_file=${TEMPDIR}/${remote_cluster}.client.${CEPH_ID}.key + CEPH_ARGS='' ceph --cluster ${remote_cluster} auth get-key client.${CEPH_ID} > ${admin_key_file} + + CEPH_ARGS='' peer_add ${cluster} ${POOL} \ + client.${CEPH_ID}@${remote_cluster}${PEER_CLUSTER_SUFFIX} '' \ + --remote-mon-host "${mon_addr}" --remote-key-file ${admin_key_file} + + peer_add ${cluster} ${PARENT_POOL} client.${CEPH_ID}@${remote_cluster}${PEER_CLUSTER_SUFFIX} uuid + CEPH_ARGS='' rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} mon-host ${mon_addr} + CEPH_ARGS='' rbd --cluster ${cluster} mirror pool peer set ${PARENT_POOL} ${uuid} key-file ${admin_key_file} + fi + fi +} + +setup_tempdir() +{ + if [ -n "${RBD_MIRROR_TEMDIR}" ]; then + test -d "${RBD_MIRROR_TEMDIR}" || + mkdir "${RBD_MIRROR_TEMDIR}" + TEMPDIR="${RBD_MIRROR_TEMDIR}" + cd ${TEMPDIR} + else + TEMPDIR=`mktemp -d` + fi +} + +setup() +{ + local c + trap 'cleanup $?' INT TERM EXIT + + setup_tempdir + if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then + setup_cluster "${CLUSTER1}" + setup_cluster "${CLUSTER2}" + fi + + setup_pools "${CLUSTER1}" "${CLUSTER2}" + setup_pools "${CLUSTER2}" "${CLUSTER1}" + + if [ -n "${RBD_MIRROR_MIN_COMPAT_CLIENT}" ]; then + CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd \ + set-require-min-compat-client ${RBD_MIRROR_MIN_COMPAT_CLIENT} + CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd \ + set-require-min-compat-client ${RBD_MIRROR_MIN_COMPAT_CLIENT} + fi +} + +cleanup() +{ + local error_code=$1 + + set +e + + if [ "${error_code}" -ne 0 ]; then + status + fi + + if [ -z "${RBD_MIRROR_NOCLEANUP}" ]; then + local cluster instance + + CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it + CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it + CEPH_ARGS='' ceph --cluster ${CLUSTER1} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it + CEPH_ARGS='' ceph --cluster ${CLUSTER2} osd pool rm ${PARENT_POOL} ${PARENT_POOL} --yes-i-really-really-mean-it + + for cluster in "${CLUSTER1}" "${CLUSTER2}"; do + stop_mirrors "${cluster}" + done + + if [ -z "${RBD_MIRROR_USE_EXISTING_CLUSTER}" ]; then + cd ${CEPH_ROOT} + CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER1} + CEPH_ARGS='' ${CEPH_SRC}/mstop.sh ${CLUSTER2} + fi + test "${RBD_MIRROR_TEMDIR}" = "${TEMPDIR}" || rm -Rf ${TEMPDIR} + fi + + if [ "${error_code}" -eq 0 ]; then + echo "OK" + else + echo "FAIL" + fi + + exit ${error_code} +} + +start_mirror() +{ + local cluster=$1 + local instance + + set_cluster_instance "${cluster}" cluster instance + + test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return + + rbd-mirror \ + --cluster ${cluster} \ + --id ${MIRROR_USER_ID_PREFIX}${instance} \ + --rbd-mirror-delete-retry-interval=5 \ + --rbd-mirror-image-state-check-interval=5 \ + --rbd-mirror-journal-poll-age=1 \ + --rbd-mirror-pool-replayers-refresh-interval=5 \ + --debug-rbd=30 --debug-journaler=30 \ + --debug-rbd_mirror=30 \ + --daemonize=true \ + ${RBD_MIRROR_ARGS} +} + +start_mirrors() +{ + local cluster=$1 + + for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do + start_mirror "${cluster}:${instance}" + done +} + +stop_mirror() +{ + local cluster=$1 + local sig=$2 + + test -n "${RBD_MIRROR_USE_RBD_MIRROR}" && return + + local pid + pid=$(cat $(daemon_pid_file "${cluster}") 2>/dev/null) || : + if [ -n "${pid}" ] + then + kill ${sig} ${pid} + for s in 1 2 4 8 16 32; do + sleep $s + ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' && break + done + ps auxww | awk -v pid=${pid} '$2 == pid {print; exit 1}' + fi + rm -f $(daemon_asok_file "${cluster}" "${CLUSTER1}") + rm -f $(daemon_asok_file "${cluster}" "${CLUSTER2}") + rm -f $(daemon_pid_file "${cluster}") +} + +stop_mirrors() +{ + local cluster=$1 + local sig=$2 + + for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do + stop_mirror "${cluster}:${instance}" "${sig}" + done +} + +admin_daemon() +{ + local cluster=$1 ; shift + local instance + + set_cluster_instance "${cluster}" cluster instance + + local asok_file=$(daemon_asok_file "${cluster}:${instance}" "${cluster}") + test -S "${asok_file}" + + ceph --admin-daemon ${asok_file} $@ +} + +admin_daemons() +{ + local cluster_instance=$1 ; shift + local cluster="${cluster_instance%:*}" + local instance="${cluster_instance##*:}" + local loop_instance + + for s in 0 1 2 4 8 8 8 8 8 8 8 8 16 16; do + sleep ${s} + if [ "${instance}" != "${cluster_instance}" ]; then + admin_daemon "${cluster}:${instance}" $@ && return 0 + else + for loop_instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do + admin_daemon "${cluster}:${loop_instance}" $@ && return 0 + done + fi + done + return 1 +} + +all_admin_daemons() +{ + local cluster=$1 ; shift + + for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do + admin_daemon "${cluster}:${instance}" $@ + done +} + +status() +{ + local cluster daemon image_pool image_ns image + + for cluster in ${CLUSTER1} ${CLUSTER2} + do + echo "${cluster} status" + CEPH_ARGS='' ceph --cluster ${cluster} -s + CEPH_ARGS='' ceph --cluster ${cluster} service dump + CEPH_ARGS='' ceph --cluster ${cluster} service status + echo + + for image_pool in ${POOL} ${PARENT_POOL} + do + for image_ns in "" "${NS1}" "${NS2}" + do + echo "${cluster} ${image_pool} ${image_ns} images" + rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" ls -l + echo + + echo "${cluster} ${image_pool}${image_ns} mirror pool info" + rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" mirror pool info + echo + + echo "${cluster} ${image_pool}${image_ns} mirror pool status" + CEPH_ARGS='' rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" mirror pool status --verbose + echo + + for image in `rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" ls 2>/dev/null` + do + echo "image ${image} info" + rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" info ${image} + echo + echo "image ${image} journal status" + rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" journal status --image ${image} + echo + echo "image ${image} snapshots" + rbd --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" snap ls --all ${image} + echo + done + + echo "${cluster} ${image_pool} ${image_ns} rbd_mirroring omap vals" + rados --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" listomapvals rbd_mirroring + echo "${cluster} ${image_pool} ${image_ns} rbd_mirror_leader omap vals" + rados --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" listomapvals rbd_mirror_leader + echo + done + done + done + + local ret + + for cluster in "${CLUSTER1}" "${CLUSTER2}" + do + for instance in `seq 0 ${LAST_MIRROR_INSTANCE}`; do + local pid_file=$(daemon_pid_file ${cluster}:${instance}) + if [ ! -e ${pid_file} ] + then + echo "${cluster} rbd-mirror not running or unknown" \ + "(${pid_file} not exist)" + continue + fi + + local pid + pid=$(cat ${pid_file} 2>/dev/null) || : + if [ -z "${pid}" ] + then + echo "${cluster} rbd-mirror not running or unknown" \ + "(can't find pid using ${pid_file})" + ret=1 + continue + fi + + echo "${daemon} rbd-mirror process in ps output:" + if ps auxww | + awk -v pid=${pid} 'NR == 1 {print} $2 == pid {print; exit 1}' + then + echo + echo "${cluster} rbd-mirror not running" \ + "(can't find pid $pid in ps output)" + ret=1 + continue + fi + echo + + local asok_file=$(daemon_asok_file ${cluster}:${instance} ${cluster}) + if [ ! -S "${asok_file}" ] + then + echo "${cluster} rbd-mirror asok is unknown (${asok_file} not exits)" + ret=1 + continue + fi + + echo "${cluster} rbd-mirror status" + ceph --admin-daemon ${asok_file} rbd mirror status + echo + done + done + + return ${ret} +} + +flush() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local cmd="rbd mirror flush" + + if [ -n "${image}" ] + then + cmd="${cmd} ${pool}/${image}" + fi + + admin_daemons "${cluster}" ${cmd} +} + +test_image_replay_state() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local test_state=$4 + local status_result + local current_state=stopped + + status_result=$(admin_daemons "${cluster}" rbd mirror status ${pool}/${image} | grep -i 'state') || return 1 + echo "${status_result}" | grep -i 'Replaying' && current_state=started + test "${test_state}" = "${current_state}" +} + +wait_for_image_replay_state() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local state=$4 + local s + + # TODO: add a way to force rbd-mirror to update replayers + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do + sleep ${s} + test_image_replay_state "${cluster}" "${pool}" "${image}" "${state}" && return 0 + done + return 1 +} + +wait_for_image_replay_started() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + wait_for_image_replay_state "${cluster}" "${pool}" "${image}" started +} + +wait_for_image_replay_stopped() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + wait_for_image_replay_state "${cluster}" "${pool}" "${image}" stopped +} + +get_journal_position() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local id_regexp=$4 + + # Parse line like below, looking for the first position + # [id=, commit_position=[positions=[[object_number=1, tag_tid=3, entry_tid=9], [object_number=0, tag_tid=3, entry_tid=8], [object_number=3, tag_tid=3, entry_tid=7], [object_number=2, tag_tid=3, entry_tid=6]]]] + + local status_log=${TEMPDIR}/$(mkfname ${CLUSTER2}-${pool}-${image}.status) + rbd --cluster ${cluster} journal status --image ${pool}/${image} | + tee ${status_log} >&2 + sed -nEe 's/^.*\[id='"${id_regexp}"',.*positions=\[\[([^]]*)\],.*state=connected.*$/\1/p' \ + ${status_log} +} + +get_master_journal_position() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + get_journal_position "${cluster}" "${pool}" "${image}" '' +} + +get_mirror_journal_position() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + get_journal_position "${cluster}" "${pool}" "${image}" '..*' +} + +wait_for_journal_replay_complete() +{ + local local_cluster=$1 + local cluster=$2 + local pool=$3 + local image=$4 + local s master_pos mirror_pos last_mirror_pos + local master_tag master_entry mirror_tag mirror_entry + + while true; do + for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16 32 32; do + sleep ${s} + flush "${local_cluster}" "${pool}" "${image}" + master_pos=$(get_master_journal_position "${cluster}" "${pool}" "${image}") + mirror_pos=$(get_mirror_journal_position "${cluster}" "${pool}" "${image}") + test -n "${master_pos}" -a "${master_pos}" = "${mirror_pos}" && return 0 + test "${mirror_pos}" != "${last_mirror_pos}" && break + done + + test "${mirror_pos}" = "${last_mirror_pos}" && return 1 + last_mirror_pos="${mirror_pos}" + + # handle the case where the mirror is ahead of the master + master_tag=$(echo "${master_pos}" | grep -Eo "tag_tid=[0-9]*" | cut -d'=' -f 2) + mirror_tag=$(echo "${mirror_pos}" | grep -Eo "tag_tid=[0-9]*" | cut -d'=' -f 2) + master_entry=$(echo "${master_pos}" | grep -Eo "entry_tid=[0-9]*" | cut -d'=' -f 2) + mirror_entry=$(echo "${mirror_pos}" | grep -Eo "entry_tid=[0-9]*" | cut -d'=' -f 2) + test "${master_tag}" = "${mirror_tag}" -a ${master_entry} -le ${mirror_entry} && return 0 + done + return 1 +} + +mirror_image_snapshot() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster "${cluster}" mirror image snapshot "${pool}/${image}" +} + +get_newest_mirror_snapshot() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local log=$4 + + rbd --cluster "${cluster}" snap list --all "${pool}/${image}" --format xml | \ + xmlstarlet sel -t -c "//snapshots/snapshot[namespace/complete='true' and position()=last()]" > \ + ${log} || true +} + +wait_for_snapshot_sync_complete() +{ + local local_cluster=$1 + local cluster=$2 + local pool=$3 + local image=$4 + + local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.status) + local local_status_log=${TEMPDIR}/$(mkfname ${local_cluster}-${pool}-${image}.status) + + mirror_image_snapshot "${cluster}" "${pool}" "${image}" + get_newest_mirror_snapshot "${cluster}" "${pool}" "${image}" "${status_log}" + local snapshot_id=$(xmlstarlet sel -t -v "//snapshot/id" < ${status_log}) + + while true; do + for s in 0.2 0.4 0.8 1.6 2 2 4 4 8 8 16 16 32 32; do + sleep ${s} + + get_newest_mirror_snapshot "${local_cluster}" "${pool}" "${image}" "${local_status_log}" + local primary_snapshot_id=$(xmlstarlet sel -t -v "//snapshot/namespace/primary_snap_id" < ${local_status_log}) + + test "${snapshot_id}" = "${primary_snapshot_id}" && return 0 + done + + return 1 + done + return 1 +} + +wait_for_replay_complete() +{ + local local_cluster=$1 + local cluster=$2 + local pool=$3 + local image=$4 + + if [ "${MIRROR_IMAGE_MODE}" = "journal" ]; then + wait_for_journal_replay_complete ${local_cluster} ${cluster} ${pool} ${image} + elif [ "${MIRROR_IMAGE_MODE}" = "snapshot" ]; then + wait_for_snapshot_sync_complete ${local_cluster} ${cluster} ${pool} ${image} + else + return 1 + fi +} + + +test_status_in_pool_dir() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local state_pattern="$4" + local description_pattern="$5" + local service_pattern="$6" + + local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.mirror_status) + CEPH_ARGS='' rbd --cluster ${cluster} mirror image status ${pool}/${image} | + tee ${status_log} >&2 + grep "^ state: .*${state_pattern}" ${status_log} || return 1 + grep "^ description: .*${description_pattern}" ${status_log} || return 1 + + if [ -n "${service_pattern}" ]; then + grep "service: *${service_pattern}" ${status_log} || return 1 + elif echo ${state_pattern} | grep '^up+'; then + grep "service: *${MIRROR_USER_ID_PREFIX}.* on " ${status_log} || return 1 + else + grep "service: " ${status_log} && return 1 + fi + + # recheck using `mirror pool status` command to stress test it. + + local last_update="$(sed -nEe 's/^ last_update: *(.*) *$/\1/p' ${status_log})" + test_mirror_pool_status_verbose \ + ${cluster} ${pool} ${image} "${state_pattern}" "${last_update}" && + return 0 + + echo "'mirror pool status' test failed" >&2 + exit 1 +} + +test_mirror_pool_status_verbose() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local state_pattern="$4" + local prev_last_update="$5" + + local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}.mirror_status) + + rbd --cluster ${cluster} mirror pool status ${pool} --verbose --format xml \ + > ${status_log} + + local last_update state + last_update=$($XMLSTARLET sel -t -v \ + "//images/image[name='${image}']/last_update" < ${status_log}) + state=$($XMLSTARLET sel -t -v \ + "//images/image[name='${image}']/state" < ${status_log}) + + echo "${state}" | grep "${state_pattern}" || + test "${last_update}" '>' "${prev_last_update}" +} + +wait_for_status_in_pool_dir() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local state_pattern="$4" + local description_pattern="$5" + local service_pattern="$6" + + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do + sleep ${s} + test_status_in_pool_dir ${cluster} ${pool} ${image} "${state_pattern}" \ + "${description_pattern}" "${service_pattern}" && + return 0 + done + return 1 +} + +create_image() +{ + local cluster=$1 ; shift + local pool=$1 ; shift + local image=$1 ; shift + local size=128 + + if [ -n "$1" ]; then + size=$1 + shift + fi + + rbd --cluster ${cluster} create --size ${size} \ + --image-feature "${RBD_IMAGE_FEATURES}" $@ ${pool}/${image} +} + +create_image_and_enable_mirror() +{ + local cluster=$1 ; shift + local pool=$1 ; shift + local image=$1 ; shift + local mode=${1:-${MIRROR_IMAGE_MODE}} + if [ -n "$1" ]; then + shift + fi + + create_image ${cluster} ${pool} ${image} $@ + if [ "${MIRROR_POOL_MODE}" = "image" ] || [ "$pool" = "${PARENT_POOL}" ]; then + enable_mirror ${cluster} ${pool} ${image} ${mode} + fi +} + +enable_journaling() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster ${cluster} feature enable ${pool}/${image} journaling +} + +set_image_meta() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local key=$4 + local val=$5 + + rbd --cluster ${cluster} image-meta set ${pool}/${image} $key $val +} + +compare_image_meta() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local key=$4 + local value=$5 + + test `rbd --cluster ${cluster} image-meta get ${pool}/${image} ${key}` = "${value}" +} + +rename_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local new_name=$4 + + rbd --cluster=${cluster} rename ${pool}/${image} ${pool}/${new_name} +} + +remove_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster=${cluster} snap purge ${pool}/${image} + rbd --cluster=${cluster} rm ${pool}/${image} +} + +remove_image_retry() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + for s in 0 1 2 4 8 16 32; do + sleep ${s} + remove_image ${cluster} ${pool} ${image} && return 0 + done + return 1 +} + +trash_move() { + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster=${cluster} trash move ${pool}/${image} +} + +trash_restore() { + local cluster=$1 + local pool=$2 + local image_id=$3 + + rbd --cluster=${cluster} trash restore ${pool}/${image_id} +} + +clone_image() +{ + local cluster=$1 + local parent_pool=$2 + local parent_image=$3 + local parent_snap=$4 + local clone_pool=$5 + local clone_image=$6 + + shift 6 + + rbd --cluster ${cluster} clone \ + ${parent_pool}/${parent_image}@${parent_snap} \ + ${clone_pool}/${clone_image} --image-feature "${RBD_IMAGE_FEATURES}" $@ +} + +clone_image_and_enable_mirror() +{ + local cluster=$1 + local parent_pool=$2 + local parent_image=$3 + local parent_snap=$4 + local clone_pool=$5 + local clone_image=$6 + shift 6 + + local mode=${1:-${MIRROR_IMAGE_MODE}} + if [ -n "$1" ]; then + shift + fi + + clone_image ${cluster} ${parent_pool} ${parent_image} ${parent_snap} ${clone_pool} ${clone_image} $@ + enable_mirror ${cluster} ${clone_pool} ${clone_image} ${mode} +} + +disconnect_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster ${cluster} journal client disconnect \ + --image ${pool}/${image} +} + +create_snapshot() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap=$4 + + rbd --cluster ${cluster} snap create ${pool}/${image}@${snap} +} + +remove_snapshot() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap=$4 + + rbd --cluster ${cluster} snap rm ${pool}/${image}@${snap} +} + +rename_snapshot() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap=$4 + local new_snap=$5 + + rbd --cluster ${cluster} snap rename ${pool}/${image}@${snap} \ + ${pool}/${image}@${new_snap} +} + +purge_snapshots() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster ${cluster} snap purge ${pool}/${image} +} + +protect_snapshot() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap=$4 + + rbd --cluster ${cluster} snap protect ${pool}/${image}@${snap} +} + +unprotect_snapshot() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap=$4 + + rbd --cluster ${cluster} snap unprotect ${pool}/${image}@${snap} +} + +unprotect_snapshot_retry() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap=$4 + + for s in 0 1 2 4 8 16 32; do + sleep ${s} + unprotect_snapshot ${cluster} ${pool} ${image} ${snap} && return 0 + done + return 1 +} + +wait_for_snap_present() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap_name=$4 + local s + + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do + sleep ${s} + rbd --cluster ${cluster} info ${pool}/${image}@${snap_name} || continue + return 0 + done + return 1 +} + +test_snap_moved_to_trash() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap_name=$4 + + rbd --cluster ${cluster} snap ls ${pool}/${image} --all | + grep -F " trash (${snap_name})" +} + +wait_for_snap_moved_to_trash() +{ + local s + + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do + sleep ${s} + test_snap_moved_to_trash $@ || continue + return 0 + done + return 1 +} + +test_snap_removed_from_trash() +{ + test_snap_moved_to_trash $@ && return 1 + return 0 +} + +wait_for_snap_removed_from_trash() +{ + local s + + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16 16 16 32 32 32 32; do + sleep ${s} + test_snap_removed_from_trash $@ || continue + return 0 + done + return 1 +} + +count_mirror_snaps() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster ${cluster} snap ls ${pool}/${image} --all | + grep -c -F " mirror (" +} + +write_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local count=$4 + local size=$5 + + test -n "${size}" || size=4096 + + rbd --cluster ${cluster} bench ${pool}/${image} --io-type write \ + --io-size ${size} --io-threads 1 --io-total $((size * count)) \ + --io-pattern rand +} + +stress_write_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local duration=$(awk 'BEGIN {srand(); print int(10 * rand()) + 5}') + + set +e + timeout ${duration}s ceph_test_rbd_mirror_random_write \ + --cluster ${cluster} ${pool} ${image} \ + --debug-rbd=20 --debug-journaler=20 \ + 2> ${TEMPDIR}/rbd-mirror-random-write.log + error_code=$? + set -e + + if [ $error_code -eq 124 ]; then + return 0 + fi + return 1 +} + +show_diff() +{ + local file1=$1 + local file2=$2 + + xxd ${file1} > ${file1}.xxd + xxd ${file2} > ${file2}.xxd + sdiff -s ${file1}.xxd ${file2}.xxd | head -n 64 + rm -f ${file1}.xxd ${file2}.xxd +} + +compare_images() +{ + local pool=$1 + local image=$2 + local ret=0 + + local rmt_export=${TEMPDIR}/$(mkfname ${CLUSTER2}-${pool}-${image}.export) + local loc_export=${TEMPDIR}/$(mkfname ${CLUSTER1}-${pool}-${image}.export) + + rm -f ${rmt_export} ${loc_export} + rbd --cluster ${CLUSTER2} export ${pool}/${image} ${rmt_export} + rbd --cluster ${CLUSTER1} export ${pool}/${image} ${loc_export} + if ! cmp ${rmt_export} ${loc_export} + then + show_diff ${rmt_export} ${loc_export} + ret=1 + fi + rm -f ${rmt_export} ${loc_export} + return ${ret} +} + +compare_image_snapshots() +{ + local pool=$1 + local image=$2 + local ret=0 + + local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export + local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export + + for snap_name in $(rbd --cluster ${CLUSTER1} --format xml \ + snap list ${pool}/${image} | \ + $XMLSTARLET sel -t -v "//snapshot/name" | \ + grep -E -v "^\.rbd-mirror\."); do + rm -f ${rmt_export} ${loc_export} + rbd --cluster ${CLUSTER2} export ${pool}/${image}@${snap_name} ${rmt_export} + rbd --cluster ${CLUSTER1} export ${pool}/${image}@${snap_name} ${loc_export} + if ! cmp ${rmt_export} ${loc_export} + then + show_diff ${rmt_export} ${loc_export} + ret=1 + fi + done + rm -f ${rmt_export} ${loc_export} + return ${ret} +} + +demote_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster=${cluster} mirror image demote ${pool}/${image} +} + +promote_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local force=$4 + + rbd --cluster=${cluster} mirror image promote ${pool}/${image} ${force} +} + +set_pool_mirror_mode() +{ + local cluster=$1 + local pool=$2 + local mode=${3:-${MIRROR_POOL_MODE}} + + rbd --cluster=${cluster} mirror pool enable ${pool} ${mode} +} + +disable_mirror() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster=${cluster} mirror image disable ${pool}/${image} +} + +enable_mirror() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local mode=${4:-${MIRROR_IMAGE_MODE}} + + rbd --cluster=${cluster} mirror image enable ${pool}/${image} ${mode} + # Display image info including the global image id for debugging purpose + rbd --cluster=${cluster} info ${pool}/${image} +} + +test_image_present() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local test_state=$4 + local image_id=$5 + local current_state=deleted + local current_image_id + + current_image_id=$(get_image_id ${cluster} ${pool} ${image}) + test -n "${current_image_id}" && + test -z "${image_id}" -o "${image_id}" = "${current_image_id}" && + current_state=present + + test "${test_state}" = "${current_state}" +} + +wait_for_image_present() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local state=$4 + local image_id=$5 + local s + + test -n "${image_id}" || + image_id=$(get_image_id ${cluster} ${pool} ${image}) + + # TODO: add a way to force rbd-mirror to update replayers + for s in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16 32 32; do + sleep ${s} + test_image_present \ + "${cluster}" "${pool}" "${image}" "${state}" "${image_id}" && + return 0 + done + return 1 +} + +get_image_id() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster=${cluster} info ${pool}/${image} | + sed -ne 's/^.*block_name_prefix: rbd_data\.//p' +} + +request_resync_image() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local image_id_var_name=$4 + + eval "${image_id_var_name}='$(get_image_id ${cluster} ${pool} ${image})'" + eval 'test -n "$'${image_id_var_name}'"' + + rbd --cluster=${cluster} mirror image resync ${pool}/${image} +} + +get_image_data_pool() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster ${cluster} info ${pool}/${image} | + awk '$1 == "data_pool:" {print $2}' +} + +get_clone_format() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster ${cluster} info ${pool}/${image} | + awk 'BEGIN { + format = 1 + } + $1 == "parent:" { + parent = $2 + } + /op_features: .*clone-child/ { + format = 2 + } + END { + if (!parent) exit 1 + print format + }' +} + +list_omap_keys() +{ + local cluster=$1 + local pool=$2 + local obj_name=$3 + + rados --cluster ${cluster} -p ${pool} listomapkeys ${obj_name} +} + +count_omap_keys_with_filter() +{ + local cluster=$1 + local pool=$2 + local obj_name=$3 + local filter=$4 + + list_omap_keys ${cluster} ${pool} ${obj_name} | grep -c ${filter} +} + +wait_for_omap_keys() +{ + local cluster=$1 + local pool=$2 + local obj_name=$3 + local filter=$4 + + for s in 0 1 2 2 4 4 8 8 8 16 16 32; do + sleep $s + + set +e + test "$(count_omap_keys_with_filter ${cluster} ${pool} ${obj_name} ${filter})" = 0 + error_code=$? + set -e + + if [ $error_code -eq 0 ]; then + return 0 + fi + done + + return 1 +} + +wait_for_image_in_omap() +{ + local cluster=$1 + local pool=$2 + + wait_for_omap_keys ${cluster} ${pool} rbd_mirroring status_global + wait_for_omap_keys ${cluster} ${pool} rbd_mirroring image_ + wait_for_omap_keys ${cluster} ${pool} rbd_mirror_leader image_map +} + +# +# Main +# + +if [ "$#" -gt 0 ] +then + if [ -z "${RBD_MIRROR_TEMDIR}" ] + then + echo "RBD_MIRROR_TEMDIR is not set" >&2 + exit 1 + fi + + TEMPDIR="${RBD_MIRROR_TEMDIR}" + cd ${TEMPDIR} + $@ + exit $? +fi diff --git a/qa/workunits/rbd/rbd_mirror_journal.sh b/qa/workunits/rbd/rbd_mirror_journal.sh new file mode 100755 index 000000000..54f6aeec8 --- /dev/null +++ b/qa/workunits/rbd/rbd_mirror_journal.sh @@ -0,0 +1,614 @@ +#!/bin/sh -ex +# +# rbd_mirror_journal.sh - test rbd-mirror daemon in journal-based mirroring mode +# +# The scripts starts two ("local" and "remote") clusters using mstart.sh script, +# creates a temporary directory, used for cluster configs, daemon logs, admin +# socket, temporary files, and launches rbd-mirror daemon. +# + +. $(dirname $0)/rbd_mirror_helpers.sh + +setup + +testlog "TEST: add image and test replay" +start_mirrors ${CLUSTER1} +image=test +create_image ${CLUSTER2} ${POOL} ${image} +set_image_meta ${CLUSTER2} ${POOL} ${image} "key1" "value1" +set_image_meta ${CLUSTER2} ${POOL} ${image} "key2" "value2" +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'down+unknown' +fi +compare_images ${POOL} ${image} +compare_image_meta ${CLUSTER1} ${POOL} ${image} "key1" "value1" +compare_image_meta ${CLUSTER1} ${POOL} ${image} "key2" "value2" + +testlog "TEST: stop mirror, add image, start mirror and test replay" +stop_mirrors ${CLUSTER1} +image1=test1 +create_image ${CLUSTER2} ${POOL} ${image1} +write_image ${CLUSTER2} ${POOL} ${image1} 100 +start_mirrors ${CLUSTER1} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image1} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' 'primary_position' +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image1} 'down+unknown' +fi +compare_images ${POOL} ${image1} + +testlog "TEST: test the first image is replaying after restart" +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' +compare_images ${POOL} ${image} + +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + testlog "TEST: stop/start/restart mirror via admin socket" + all_admin_daemons ${CLUSTER1} rbd mirror stop + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + all_admin_daemons ${CLUSTER1} rbd mirror start + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror restart + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror stop + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + all_admin_daemons ${CLUSTER1} rbd mirror restart + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror start ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + + all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + flush ${CLUSTER1} + all_admin_daemons ${CLUSTER1} rbd mirror status +fi + +remove_image_retry ${CLUSTER2} ${POOL} ${image1} + +testlog "TEST: test image rename" +new_name="${image}_RENAMED" +rename_image ${CLUSTER2} ${POOL} ${image} ${new_name} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying' +admin_daemons ${CLUSTER1} rbd mirror status ${POOL}/${new_name} +admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${new_name} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying' +rename_image ${CLUSTER2} ${POOL} ${new_name} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + +testlog "TEST: test trash move restore" +image_id=$(get_image_id ${CLUSTER2} ${POOL} ${image}) +trash_move ${CLUSTER2} ${POOL} ${image} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' +trash_restore ${CLUSTER2} ${POOL} ${image_id} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + +testlog "TEST: check if removed images' OMAP are removed (with rbd-mirror on one cluster)" +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} + +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + +testlog "TEST: failover and failback" +start_mirrors ${CLUSTER2} + +# demote and promote same cluster +demote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' +compare_images ${POOL} ${image} + +# failover (unmodified) +demote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} + +# failback (unmodified) +demote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' +compare_images ${POOL} ${image} + +# failover +demote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} +write_image ${CLUSTER1} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying' 'primary_position' +compare_images ${POOL} ${image} + +# failback +demote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' +compare_images ${POOL} ${image} + +testlog "TEST: failover / failback loop" +for i in `seq 1 20`; do + demote_image ${CLUSTER2} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' + promote_image ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying' + demote_image ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' + promote_image ${CLUSTER2} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +done + +testlog "TEST: force promote" +force_promote_image=test_force_promote +create_image ${CLUSTER2} ${POOL} ${force_promote_image} +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+replaying' 'primary_position' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' +promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force' +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image} +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' +write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100 +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image} +remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image} + +testlog "TEST: cloned images" +testlog " - default" +parent_image=test_parent +parent_snap=snap +create_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} +write_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} 100 +create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +protect_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} + +clone_image=test_clone +clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image} +write_image ${CLUSTER2} ${POOL} ${clone_image} 100 + +enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} journal +wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} ${parent_image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${parent_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL} ${parent_image} 'up+replaying' 'primary_position' +compare_images ${PARENT_POOL} ${parent_image} + +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${clone_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} 'up+replaying' 'primary_position' +compare_images ${POOL} ${clone_image} +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image} + +testlog " - clone v1" +clone_image ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image}1 + +clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \ + ${clone_image}_v1 --rbd-default-clone-format 1 +test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1 +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1 +test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1 +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v1 +remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}1 +unprotect_snapshot_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} + +testlog " - clone v2" +parent_snap=snap_v2 +create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \ + ${clone_image}_v2 --rbd-default-clone-format 2 +test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v2) = 2 +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v2 +test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v2) = 2 + +remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +test_snap_moved_to_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v2 +wait_for_image_present ${CLUSTER1} ${POOL} ${clone_image}_v2 'deleted' +test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} + +testlog " - clone v2 non-primary" +create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +wait_for_snap_present ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +clone_image ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \ + ${clone_image}_v2 --rbd-default-clone-format 2 +remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2 +wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_image_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} + +testlog "TEST: data pool" +dp_image=test_data_pool +create_image ${CLUSTER2} ${POOL} ${dp_image} 128 --data-pool ${PARENT_POOL} +data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${dp_image} +data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap1' +write_image ${CLUSTER2} ${POOL} ${dp_image} 100 +create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap2' +write_image ${CLUSTER2} ${POOL} ${dp_image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${dp_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' 'primary_position' +compare_images ${POOL} ${dp_image}@snap1 +compare_images ${POOL} ${dp_image}@snap2 +compare_images ${POOL} ${dp_image} +remove_image_retry ${CLUSTER2} ${POOL} ${dp_image} + +testlog "TEST: disable mirroring / delete non-primary image" +image2=test2 +image3=test3 +image4=test4 +image5=test5 +for i in ${image2} ${image3} ${image4} ${image5}; do + create_image ${CLUSTER2} ${POOL} ${i} + write_image ${CLUSTER2} ${POOL} ${i} 100 + create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' + create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' + if [ "${i}" = "${image4}" ] || [ "${i}" = "${image5}" ]; then + protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' + protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' + fi + write_image ${CLUSTER2} ${POOL} ${i} 100 + wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present' + wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2' +done + +set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' +for i in ${image2} ${image4}; do + disable_mirror ${CLUSTER2} ${POOL} ${i} +done + +unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap1' +unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap2' +for i in ${image3} ${image5}; do + remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' + remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' + remove_image_retry ${CLUSTER2} ${POOL} ${i} +done + +for i in ${image2} ${image3} ${image4} ${image5}; do + wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted' +done + +set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool' +for i in ${image2} ${image4}; do + enable_journaling ${CLUSTER2} ${POOL} ${i} + wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present' + wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2' + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${i} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${i} + compare_images ${POOL} ${i} +done + +testlog "TEST: remove mirroring pool" +pool=pool_to_remove +for cluster in ${CLUSTER1} ${CLUSTER2}; do + CEPH_ARGS='' ceph --cluster ${cluster} osd pool create ${pool} 16 16 + CEPH_ARGS='' rbd --cluster ${cluster} pool init ${pool} + rbd --cluster ${cluster} mirror pool enable ${pool} pool +done +peer_add ${CLUSTER1} ${pool} ${CLUSTER2} +peer_add ${CLUSTER2} ${pool} ${CLUSTER1} +rdp_image=test_remove_data_pool +create_image ${CLUSTER2} ${pool} ${image} 128 +create_image ${CLUSTER2} ${POOL} ${rdp_image} 128 --data-pool ${pool} +write_image ${CLUSTER2} ${pool} ${image} 100 +write_image ${CLUSTER2} ${POOL} ${rdp_image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${pool} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${pool} ${image} 'up+replaying' 'primary_position' +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${rdp_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${rdp_image} 'up+replaying' 'primary_position' +for cluster in ${CLUSTER1} ${CLUSTER2}; do + CEPH_ARGS='' ceph --cluster ${cluster} osd pool rm ${pool} ${pool} --yes-i-really-really-mean-it +done +remove_image_retry ${CLUSTER2} ${POOL} ${rdp_image} +wait_for_image_present ${CLUSTER1} ${POOL} ${rdp_image} 'deleted' +for i in 0 1 2 4 8 8 8 8 16 16; do + sleep $i + admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} || break +done +admin_daemons "${CLUSTER2}" rbd mirror status ${pool}/${image} && false + +testlog "TEST: snapshot rename" +snap_name='snap_rename' +create_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_0" +for i in `seq 1 20`; do + rename_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_$(expr ${i} - 1)" "${snap_name}_${i}" +done +wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}" + +unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap1' +unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap2' +for i in ${image2} ${image4}; do + remove_image_retry ${CLUSTER2} ${POOL} ${i} +done + +testlog "TEST: disable mirror while daemon is stopped" +stop_mirrors ${CLUSTER1} +stop_mirrors ${CLUSTER2} +set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' +disable_mirror ${CLUSTER2} ${POOL} ${image} +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + test_image_present ${CLUSTER1} ${POOL} ${image} 'present' +fi +start_mirrors ${CLUSTER1} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' +set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool' +enable_journaling ${CLUSTER2} ${POOL} ${image} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + +testlog "TEST: non-default namespace image mirroring" +testlog " - replay" +create_image ${CLUSTER2} ${POOL}/${NS1} ${image} +create_image ${CLUSTER2} ${POOL}/${NS2} ${image} +enable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} journal +wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS2} ${image} +write_image ${CLUSTER2} ${POOL}/${NS1} ${image} 100 +write_image ${CLUSTER2} ${POOL}/${NS2} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS2} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${image} 'up+replaying' 'primary_position' +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS2} ${image} 'up+replaying' 'primary_position' +compare_images ${POOL}/${NS1} ${image} +compare_images ${POOL}/${NS2} ${image} + +testlog " - disable mirroring / delete image" +remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${image} +disable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} +wait_for_image_present ${CLUSTER1} ${POOL}/${NS1} ${image} 'deleted' +wait_for_image_present ${CLUSTER1} ${POOL}/${NS2} ${image} 'deleted' +remove_image_retry ${CLUSTER2} ${POOL}/${NS2} ${image} + +testlog " - data pool" +dp_image=test_data_pool +create_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 128 --data-pool ${PARENT_POOL} +data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL}/${NS1} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${dp_image} +data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL}/${NS1} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +write_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${dp_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${dp_image} 'up+replaying' 'primary_position' +compare_images ${POOL}/${NS1} ${dp_image} +remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${dp_image} + +testlog "TEST: simple image resync" +request_resync_image ${CLUSTER1} ${POOL} ${image} image_id +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' +compare_images ${POOL} ${image} + +testlog "TEST: image resync while replayer is stopped" +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + request_resync_image ${CLUSTER1} ${POOL} ${image} image_id + admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} + admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' + compare_images ${POOL} ${image} +fi + +testlog "TEST: request image resync while daemon is offline" +stop_mirrors ${CLUSTER1} +request_resync_image ${CLUSTER1} ${POOL} ${image} image_id +start_mirrors ${CLUSTER1} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' +compare_images ${POOL} ${image} +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +testlog "TEST: client disconnect" +image=laggy +create_image ${CLUSTER2} ${POOL} ${image} 128 --journal-object-size 64K +write_image ${CLUSTER2} ${POOL} ${image} 10 + +testlog " - replay stopped after disconnect" +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" +disconnect_image ${CLUSTER2} ${POOL} ${image} +test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected' + +testlog " - replay started after resync requested" +request_resync_image ${CLUSTER1} ${POOL} ${image} image_id +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" +compare_images ${POOL} ${image} + +testlog " - disconnected after max_concurrent_object_sets reached" +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" + set_image_meta ${CLUSTER2} ${POOL} ${image} \ + conf_rbd_journal_max_concurrent_object_sets 1 + write_image ${CLUSTER2} ${POOL} ${image} 20 16384 + write_image ${CLUSTER2} ${POOL} ${image} 20 16384 + test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" + set_image_meta ${CLUSTER2} ${POOL} ${image} \ + conf_rbd_journal_max_concurrent_object_sets 0 + + testlog " - replay is still stopped (disconnected) after restart" + admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected' +fi + +testlog " - replay started after resync requested" +request_resync_image ${CLUSTER1} ${POOL} ${image} image_id +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" +compare_images ${POOL} ${image} + +testlog " - rbd_mirroring_resync_after_disconnect config option" +set_image_meta ${CLUSTER2} ${POOL} ${image} \ + conf_rbd_mirroring_resync_after_disconnect true +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +image_id=$(get_image_id ${CLUSTER1} ${POOL} ${image}) +disconnect_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +test -n "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" +compare_images ${POOL} ${image} +set_image_meta ${CLUSTER2} ${POOL} ${image} \ + conf_rbd_mirroring_resync_after_disconnect false +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +disconnect_image ${CLUSTER2} ${POOL} ${image} +test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})" +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected' +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +testlog "TEST: split-brain" +image=split-brain +create_image ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' +promote_image ${CLUSTER1} ${POOL} ${image} --force +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' +write_image ${CLUSTER1} ${POOL} ${image} 10 +demote_image ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain' +request_resync_image ${CLUSTER1} ${POOL} ${image} image_id +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position' +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +testlog "TEST: check if removed images' OMAP are removed" +start_mirrors ${CLUSTER2} +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} + +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + # teuthology will trash the daemon + testlog "TEST: no blocklists" + CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries" + CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries" +fi diff --git a/qa/workunits/rbd/rbd_mirror_snapshot.sh b/qa/workunits/rbd/rbd_mirror_snapshot.sh new file mode 100755 index 000000000..c70d48b09 --- /dev/null +++ b/qa/workunits/rbd/rbd_mirror_snapshot.sh @@ -0,0 +1,517 @@ +#!/bin/sh -ex +# +# rbd_mirror_snapshot.sh - test rbd-mirror daemon in snapshot-based mirroring mode +# +# The scripts starts two ("local" and "remote") clusters using mstart.sh script, +# creates a temporary directory, used for cluster configs, daemon logs, admin +# socket, temporary files, and launches rbd-mirror daemon. +# + +MIRROR_POOL_MODE=image +MIRROR_IMAGE_MODE=snapshot + +. $(dirname $0)/rbd_mirror_helpers.sh + +setup + +testlog "TEST: add image and test replay" +start_mirrors ${CLUSTER1} +image=test +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} +set_image_meta ${CLUSTER2} ${POOL} ${image} "key1" "value1" +set_image_meta ${CLUSTER2} ${POOL} ${image} "key2" "value2" +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'down+unknown' +fi +compare_images ${POOL} ${image} +compare_image_meta ${CLUSTER1} ${POOL} ${image} "key1" "value1" +compare_image_meta ${CLUSTER1} ${POOL} ${image} "key2" "value2" + +testlog "TEST: stop mirror, add image, start mirror and test replay" +stop_mirrors ${CLUSTER1} +image1=test1 +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image1} +write_image ${CLUSTER2} ${POOL} ${image1} 100 +start_mirrors ${CLUSTER1} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image1} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image1} 'down+unknown' +fi +compare_images ${POOL} ${image1} + +testlog "TEST: test the first image is replaying after restart" +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +compare_images ${POOL} ${image} + +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + testlog "TEST: stop/start/restart mirror via admin socket" + all_admin_daemons ${CLUSTER1} rbd mirror stop + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + all_admin_daemons ${CLUSTER1} rbd mirror start + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror restart + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror stop + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + all_admin_daemons ${CLUSTER1} rbd mirror restart + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror start ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + + all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + + all_admin_daemons ${CLUSTER1} rbd mirror stop ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+stopped' + + all_admin_daemons ${CLUSTER1} rbd mirror restart ${POOL} ${CLUSTER2}${PEER_CLUSTER_SUFFIX} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image1} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image1} 'up+replaying' + + flush ${CLUSTER1} + all_admin_daemons ${CLUSTER1} rbd mirror status +fi + +remove_image_retry ${CLUSTER2} ${POOL} ${image1} + +testlog "TEST: test image rename" +new_name="${image}_RENAMED" +rename_image ${CLUSTER2} ${POOL} ${image} ${new_name} +mirror_image_snapshot ${CLUSTER2} ${POOL} ${new_name} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying' +admin_daemons ${CLUSTER1} rbd mirror status ${POOL}/${new_name} +admin_daemons ${CLUSTER1} rbd mirror restart ${POOL}/${new_name} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${new_name} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${new_name} 'up+replaying' +rename_image ${CLUSTER2} ${POOL} ${new_name} ${image} +mirror_image_snapshot ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + +testlog "TEST: test trash move restore" +image_id=$(get_image_id ${CLUSTER2} ${POOL} ${image}) +trash_move ${CLUSTER2} ${POOL} ${image} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' +trash_restore ${CLUSTER2} ${POOL} ${image_id} +enable_mirror ${CLUSTER2} ${POOL} ${image} snapshot +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + +testlog "TEST: check if removed images' OMAP are removed (with rbd-mirror on one cluster)" +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} + +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + +testlog "TEST: failover and failback" +start_mirrors ${CLUSTER2} + +# demote and promote same cluster +demote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +compare_images ${POOL} ${image} + +# failover (unmodified) +demote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} + +# failback (unmodified) +demote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' +compare_images ${POOL} ${image} + +# failover +demote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} +write_image ${CLUSTER1} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying' +compare_images ${POOL} ${image} + +# failback +demote_image ${CLUSTER1} ${POOL} ${image} +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' +promote_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' +compare_images ${POOL} ${image} + +testlog "TEST: failover / failback loop" +for i in `seq 1 20`; do + demote_image ${CLUSTER2} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' + promote_image ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER2} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+replaying' + demote_image ${CLUSTER1} ${POOL} ${image} + wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+unknown' + promote_image ${CLUSTER2} ${POOL} ${image} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${image} 'up+stopped' + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +done +# check that demote (or other mirror snapshots) don't pile up +test "$(count_mirror_snaps ${CLUSTER1} ${POOL} ${image})" -le 3 +test "$(count_mirror_snaps ${CLUSTER2} ${POOL} ${image})" -le 3 + +testlog "TEST: force promote" +force_promote_image=test_force_promote +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${force_promote_image} +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+replaying' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' +promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force' +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image} +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' +write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100 +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image} +remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image} + +testlog "TEST: cloned images" +testlog " - default" +parent_image=test_parent +parent_snap=snap +create_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} +write_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} 100 +create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +protect_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} + +clone_image=test_clone +clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image} +write_image ${CLUSTER2} ${POOL} ${clone_image} 100 +enable_mirror ${CLUSTER2} ${POOL} ${clone_image} snapshot + +wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL} ${parent_image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${parent_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL} ${parent_image} 'up+replaying' +compare_images ${PARENT_POOL} ${parent_image} + +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${clone_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} 'up+replaying' +compare_images ${POOL} ${clone_image} +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image} + +testlog " - clone v1" +clone_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL} ${parent_image} \ + ${parent_snap} ${POOL} ${clone_image}1 + +clone_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} \ + ${parent_snap} ${POOL} ${clone_image}_v1 snapshot --rbd-default-clone-format 1 +test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1 +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1 +test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1 +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v1 +remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}1 +unprotect_snapshot_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} + +testlog " - clone v2" +parent_snap=snap_v2 +create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} +clone_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} \ + ${parent_snap} ${POOL} ${clone_image}_v2 snapshot --rbd-default-clone-format 2 +test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v2) = 2 +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v2 +test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v2) = 2 + +remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} +test_snap_moved_to_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v2 +wait_for_image_present ${CLUSTER1} ${POOL} ${clone_image}_v2 'deleted' +test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} + +testlog " - clone v2 non-primary" +create_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} +wait_for_snap_present ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +clone_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL} ${parent_image} \ + ${parent_snap} ${POOL} ${clone_image}_v2 snapshot --rbd-default-clone-format 2 +remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} +mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} +wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2 +wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} +remove_image_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} + +testlog "TEST: data pool" +dp_image=test_data_pool +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${dp_image} snapshot 128 --data-pool ${PARENT_POOL} +data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${dp_image} +data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap1' +write_image ${CLUSTER2} ${POOL} ${dp_image} 100 +create_snapshot ${CLUSTER2} ${POOL} ${dp_image} 'snap2' +write_image ${CLUSTER2} ${POOL} ${dp_image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${dp_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' +compare_images ${POOL} ${dp_image}@snap1 +compare_images ${POOL} ${dp_image}@snap2 +compare_images ${POOL} ${dp_image} +remove_image_retry ${CLUSTER2} ${POOL} ${dp_image} + +testlog "TEST: disable mirroring / delete non-primary image" +image2=test2 +image3=test3 +image4=test4 +image5=test5 +for i in ${image2} ${image3} ${image4} ${image5}; do + create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${i} + write_image ${CLUSTER2} ${POOL} ${i} 100 + create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' + create_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' + if [ "${i}" = "${image4}" ] || [ "${i}" = "${image5}" ]; then + protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' + protect_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' + fi + write_image ${CLUSTER2} ${POOL} ${i} 100 + mirror_image_snapshot ${CLUSTER2} ${POOL} ${i} + wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'present' + wait_for_snap_present ${CLUSTER1} ${POOL} ${i} 'snap2' +done + +set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' +for i in ${image2} ${image4}; do + disable_mirror ${CLUSTER2} ${POOL} ${i} +done + +unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap1' +unprotect_snapshot ${CLUSTER2} ${POOL} ${image5} 'snap2' +for i in ${image3} ${image5}; do + remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap1' + remove_snapshot ${CLUSTER2} ${POOL} ${i} 'snap2' + remove_image_retry ${CLUSTER2} ${POOL} ${i} +done + +for i in ${image2} ${image3} ${image4} ${image5}; do + wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted' +done + +testlog "TEST: snapshot rename" +snap_name='snap_rename' +enable_mirror ${CLUSTER2} ${POOL} ${image2} +create_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_0" +for i in `seq 1 20`; do + rename_snapshot ${CLUSTER2} ${POOL} ${image2} "${snap_name}_$(expr ${i} - 1)" "${snap_name}_${i}" +done +mirror_image_snapshot ${CLUSTER2} ${POOL} ${image2} +wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}" + +unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap1' +unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap2' +for i in ${image2} ${image4}; do + remove_image_retry ${CLUSTER2} ${POOL} ${i} +done + +testlog "TEST: disable mirror while daemon is stopped" +stop_mirrors ${CLUSTER1} +stop_mirrors ${CLUSTER2} +disable_mirror ${CLUSTER2} ${POOL} ${image} +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + test_image_present ${CLUSTER1} ${POOL} ${image} 'present' +fi +start_mirrors ${CLUSTER1} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' +enable_mirror ${CLUSTER2} ${POOL} ${image} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + +testlog "TEST: non-default namespace image mirroring" +testlog " - replay" +create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS1} ${image} +create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${image} +wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS2} ${image} +write_image ${CLUSTER2} ${POOL}/${NS1} ${image} 100 +write_image ${CLUSTER2} ${POOL}/${NS2} ${image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS2} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${image} 'up+replaying' +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS2} ${image} 'up+replaying' +compare_images ${POOL}/${NS1} ${image} +compare_images ${POOL}/${NS2} ${image} + +testlog " - disable mirroring / delete image" +remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${image} +disable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image} +wait_for_image_present ${CLUSTER1} ${POOL}/${NS1} ${image} 'deleted' +wait_for_image_present ${CLUSTER1} ${POOL}/${NS2} ${image} 'deleted' +remove_image_retry ${CLUSTER2} ${POOL}/${NS2} ${image} + +testlog " - data pool" +dp_image=test_data_pool +create_image_and_enable_mirror ${CLUSTER2} ${POOL}/${NS1} ${dp_image} snapshot 128 --data-pool ${PARENT_POOL} +data_pool=$(get_image_data_pool ${CLUSTER2} ${POOL}/${NS1} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +wait_for_image_replay_started ${CLUSTER1} ${POOL}/${NS1} ${dp_image} +data_pool=$(get_image_data_pool ${CLUSTER1} ${POOL}/${NS1} ${dp_image}) +test "${data_pool}" = "${PARENT_POOL}" +write_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${dp_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${dp_image} 'up+replaying' +compare_images ${POOL}/${NS1} ${dp_image} +remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${dp_image} + +testlog "TEST: simple image resync" +request_resync_image ${CLUSTER1} ${POOL} ${image} image_id +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +compare_images ${POOL} ${image} + +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + testlog "TEST: image resync while replayer is stopped" + admin_daemons ${CLUSTER1} rbd mirror stop ${POOL}/${image} + wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} + request_resync_image ${CLUSTER1} ${POOL} ${image} image_id + admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} + admin_daemons ${CLUSTER1} rbd mirror start ${POOL}/${image} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + compare_images ${POOL} ${image} +fi + +testlog "TEST: request image resync while daemon is offline" +stop_mirrors ${CLUSTER1} +request_resync_image ${CLUSTER1} ${POOL} ${image} image_id +start_mirrors ${CLUSTER1} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +compare_images ${POOL} ${image} +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +testlog "TEST: split-brain" +image=split-brain +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +promote_image ${CLUSTER1} ${POOL} ${image} --force +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+stopped' +write_image ${CLUSTER1} ${POOL} ${image} 10 +demote_image ${CLUSTER1} ${POOL} ${image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain' +request_resync_image ${CLUSTER1} ${POOL} ${image} image_id +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' +remove_image_retry ${CLUSTER2} ${POOL} ${image} + +testlog "TEST: check if removed images' OMAP are removed" +start_mirrors ${CLUSTER2} +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} + +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + # teuthology will trash the daemon + testlog "TEST: no blocklists" + CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries" + CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries" +fi diff --git a/qa/workunits/rbd/rbd_mirror_stress.sh b/qa/workunits/rbd/rbd_mirror_stress.sh new file mode 100755 index 000000000..cb79aba7e --- /dev/null +++ b/qa/workunits/rbd/rbd_mirror_stress.sh @@ -0,0 +1,221 @@ +#!/bin/sh -ex +# +# rbd_mirror_stress.sh - stress test rbd-mirror daemon +# +# The following additional environment variables affect the test: +# +# RBD_MIRROR_REDUCE_WRITES - if not empty, don't run the stress bench write +# tool during the many image test +# + +IMAGE_COUNT=50 +export LOCKDEP=0 + +. $(dirname $0)/rbd_mirror_helpers.sh + +setup + +create_snap() +{ + local cluster=$1 + local pool=$2 + local image=$3 + local snap_name=$4 + + rbd --cluster ${cluster} -p ${pool} snap create ${image}@${snap_name} \ + --debug-rbd=20 --debug-journaler=20 2> ${TEMPDIR}/rbd-snap-create.log +} + +compare_image_snaps() +{ + local pool=$1 + local image=$2 + local snap_name=$3 + local ret=0 + + local rmt_export=${TEMPDIR}/${CLUSTER2}-${pool}-${image}.export + local loc_export=${TEMPDIR}/${CLUSTER1}-${pool}-${image}.export + + rm -f ${rmt_export} ${loc_export} + rbd --cluster ${CLUSTER2} -p ${pool} export ${image}@${snap_name} ${rmt_export} + rbd --cluster ${CLUSTER1} -p ${pool} export ${image}@${snap_name} ${loc_export} + if ! cmp ${rmt_export} ${loc_export} + then + show_diff ${rmt_export} ${loc_export} + ret=1 + fi + rm -f ${rmt_export} ${loc_export} + return ${ret} +} + +wait_for_pool_images() +{ + local cluster=$1 + local pool=$2 + local image_count=$3 + local s + local count + local last_count=0 + + while true; do + for s in `seq 1 40`; do + test $s -ne 1 && sleep 30 + count=$(rbd --cluster ${cluster} -p ${pool} mirror pool status | grep 'images: ' | cut -d' ' -f 2) + test "${count}" = "${image_count}" && return 0 + + # reset timeout if making forward progress + test $count -ne $last_count && break + done + + test $count -eq $last_count && break + last_count=$count + done + rbd --cluster ${cluster} -p ${pool} mirror pool status --verbose >&2 + return 1 +} + +wait_for_pool_healthy() +{ + local cluster=$1 + local pool=$2 + local s + local state + + for s in `seq 1 40`; do + test $s -ne 1 && sleep 30 + state=$(rbd --cluster ${cluster} -p ${pool} mirror pool status | grep 'image health:' | cut -d' ' -f 3) + test "${state}" = "ERROR" && break + test "${state}" = "OK" && return 0 + done + rbd --cluster ${cluster} -p ${pool} mirror pool status --verbose >&2 + return 1 +} + +start_mirrors ${CLUSTER1} +start_mirrors ${CLUSTER2} + +testlog "TEST: add image and test replay after client crashes" +image=test +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} ${MIRROR_IMAGE_MODE} '512M' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + +clean_snap_name= +for i in `seq 1 10` +do + stress_write_image ${CLUSTER2} ${POOL} ${image} + + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' + + snap_name="snap${i}" + create_snap ${CLUSTER2} ${POOL} ${image} ${snap_name} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} + wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${snap_name} + + if [ -n "${clean_snap_name}" ]; then + compare_image_snaps ${POOL} ${image} ${clean_snap_name} + fi + compare_image_snaps ${POOL} ${image} ${snap_name} + + clean_snap_name="snap${i}-clean" + create_snap ${CLUSTER2} ${POOL} ${image} ${clean_snap_name} +done + +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${clean_snap_name} + +for i in `seq 1 10` +do + snap_name="snap${i}" + compare_image_snaps ${POOL} ${image} ${snap_name} + + snap_name="snap${i}-clean" + compare_image_snaps ${POOL} ${image} ${snap_name} +done + +for i in `seq 1 10` +do + snap_name="snap${i}" + remove_snapshot ${CLUSTER2} ${POOL} ${image} ${snap_name} + + snap_name="snap${i}-clean" + remove_snapshot ${CLUSTER2} ${POOL} ${image} ${snap_name} +done + +remove_image_retry ${CLUSTER2} ${POOL} ${image} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' + +testlog "TEST: create many images" +snap_name="snap" +for i in `seq 1 ${IMAGE_COUNT}` +do + image="image_${i}" + create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image} ${MIRROR_IMAGE_MODE} '128M' + if [ -n "${RBD_MIRROR_REDUCE_WRITES}" ]; then + write_image ${CLUSTER2} ${POOL} ${image} 100 + else + stress_write_image ${CLUSTER2} ${POOL} ${image} + fi +done + +wait_for_pool_images ${CLUSTER2} ${POOL} ${IMAGE_COUNT} +wait_for_pool_healthy ${CLUSTER2} ${POOL} + +wait_for_pool_images ${CLUSTER1} ${POOL} ${IMAGE_COUNT} +wait_for_pool_healthy ${CLUSTER1} ${POOL} + +testlog "TEST: compare many images" +for i in `seq 1 ${IMAGE_COUNT}` +do + image="image_${i}" + create_snap ${CLUSTER2} ${POOL} ${image} ${snap_name} + wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} + wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} + wait_for_snap_present ${CLUSTER1} ${POOL} ${image} ${snap_name} + compare_image_snaps ${POOL} ${image} ${snap_name} +done + +testlog "TEST: delete many images" +for i in `seq 1 ${IMAGE_COUNT}` +do + image="image_${i}" + remove_snapshot ${CLUSTER2} ${POOL} ${image} ${snap_name} + remove_image_retry ${CLUSTER2} ${POOL} ${image} +done + +testlog "TEST: image deletions should propagate" +wait_for_pool_images ${CLUSTER1} ${POOL} 0 +wait_for_pool_healthy ${CLUSTER1} ${POOL} 0 +for i in `seq 1 ${IMAGE_COUNT}` +do + image="image_${i}" + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' +done + +testlog "TEST: delete images during bootstrap" +set_pool_mirror_mode ${CLUSTER1} ${POOL} 'image' +set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' + +start_mirror ${CLUSTER1} +image=test + +for i in `seq 1 10` +do + image="image_${i}" + create_image ${CLUSTER2} ${POOL} ${image} '512M' + enable_mirror ${CLUSTER2} ${POOL} ${image} + + stress_write_image ${CLUSTER2} ${POOL} ${image} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present' + + disable_mirror ${CLUSTER2} ${POOL} ${image} + wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' + purge_snapshots ${CLUSTER2} ${POOL} ${image} + remove_image_retry ${CLUSTER2} ${POOL} ${image} +done + +testlog "TEST: check if removed images' OMAP are removed" + +wait_for_image_in_omap ${CLUSTER1} ${POOL} +wait_for_image_in_omap ${CLUSTER2} ${POOL} diff --git a/qa/workunits/rbd/rbd_support_module_recovery.sh b/qa/workunits/rbd/rbd_support_module_recovery.sh new file mode 100755 index 000000000..e9defced2 --- /dev/null +++ b/qa/workunits/rbd/rbd_support_module_recovery.sh @@ -0,0 +1,77 @@ +#!/bin/bash +set -ex + +POOL=rbd +IMAGE_PREFIX=image +NUM_IMAGES=20 +RUN_TIME=3600 + +rbd mirror pool enable ${POOL} image +rbd mirror pool peer add ${POOL} dummy + +# Create images and schedule their mirror snapshots +for ((i = 1; i <= ${NUM_IMAGES}; i++)); do + rbd create -s 1G --image-feature exclusive-lock ${POOL}/${IMAGE_PREFIX}$i + rbd mirror image enable ${POOL}/${IMAGE_PREFIX}$i snapshot + rbd mirror snapshot schedule add -p ${POOL} --image ${IMAGE_PREFIX}$i 1m +done + +# Run fio workloads on images via kclient +# Test the recovery of the rbd_support module and its scheduler from their +# librbd client being blocklisted while a exclusive lock gets passed around +# between their librbd client and a kclient trying to take mirror snapshots +# and perform I/O on the same image. +for ((i = 1; i <= ${NUM_IMAGES}; i++)); do + DEVS[$i]=$(sudo rbd device map ${POOL}/${IMAGE_PREFIX}$i) + fio --name=fiotest --filename=${DEVS[$i]} --rw=randrw --bs=4K --direct=1 \ + --ioengine=libaio --iodepth=2 --runtime=43200 --time_based \ + &> /dev/null & +done + +# Repeatedly blocklist rbd_support module's client ~10s after the module +# recovers from previous blocklisting +CURRENT_TIME=$(date +%s) +END_TIME=$((CURRENT_TIME + RUN_TIME)) +PREV_CLIENT_ADDR="" +CLIENT_ADDR="" +while ((CURRENT_TIME <= END_TIME)); do + if [[ -n "${CLIENT_ADDR}" ]] && + [[ "${CLIENT_ADDR}" != "${PREV_CLIENT_ADDR}" ]]; then + ceph osd blocklist add ${CLIENT_ADDR} + # Confirm rbd_support module's client is blocklisted + ceph osd blocklist ls | grep -q ${CLIENT_ADDR} + PREV_CLIENT_ADDR=${CLIENT_ADDR} + fi + sleep 10 + CLIENT_ADDR=$(ceph mgr dump | + jq .active_clients[] | + jq 'select(.name == "rbd_support")' | + jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') + CURRENT_TIME=$(date +%s) +done + +# Confirm that rbd_support module recovered from repeated blocklisting +# Check that you can add a mirror snapshot schedule after a few retries +for ((i = 1; i <= 24; i++)); do + rbd mirror snapshot schedule add -p ${POOL} \ + --image ${IMAGE_PREFIX}1 2m && break + sleep 10 +done +rbd mirror snapshot schedule ls -p ${POOL} --image ${IMAGE_PREFIX}1 | + grep 'every 2m' +# Verify that the schedule present before client blocklisting is preserved +rbd mirror snapshot schedule ls -p ${POOL} --image ${IMAGE_PREFIX}1 | + grep 'every 1m' +rbd mirror snapshot schedule rm -p ${POOL} --image ${IMAGE_PREFIX}1 2m +for ((i = 1; i <= ${NUM_IMAGES}; i++)); do + rbd mirror snapshot schedule rm -p ${POOL} --image ${IMAGE_PREFIX}$i 1m +done + +# cleanup +killall fio || true +wait +for ((i = 1; i <= ${NUM_IMAGES}; i++)); do + sudo rbd device unmap ${DEVS[$i]} +done + +echo OK diff --git a/qa/workunits/rbd/read-flags.sh b/qa/workunits/rbd/read-flags.sh new file mode 100755 index 000000000..7d787ce67 --- /dev/null +++ b/qa/workunits/rbd/read-flags.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +set -ex + +# create a snapshot, then export it and check that setting read flags works +# by looking at --debug-ms output + +function clean_up { + rm -f test.log || true + rbd snap remove test@snap || true + rbd rm test || true +} + +function test_read_flags { + local IMAGE=$1 + local SET_BALANCED=$2 + local SET_LOCALIZED=$3 + local EXPECT_BALANCED=$4 + local EXPECT_LOCALIZED=$5 + + local EXTRA_ARGS="--log-file test.log --debug-ms 1 --no-log-to-stderr" + if [ "$SET_BALANCED" = 'y' ]; then + EXTRA_ARGS="$EXTRA_ARGS --rbd-balance-snap-reads" + elif [ "$SET_LOCALIZED" = 'y' ]; then + EXTRA_ARGS="$EXTRA_ARGS --rbd-localize-snap-reads" + fi + + rbd export $IMAGE - $EXTRA_ARGS > /dev/null + if [ "$EXPECT_BALANCED" = 'y' ]; then + grep -q balance_reads test.log + else + grep -L balance_reads test.log | grep -q test.log + fi + if [ "$EXPECT_LOCALIZED" = 'y' ]; then + grep -q localize_reads test.log + else + grep -L localize_reads test.log | grep -q test.log + fi + rm -f test.log + +} + +clean_up + +trap clean_up INT TERM EXIT + +rbd create --image-feature layering -s 10 test +rbd snap create test@snap + +# export from non snapshot with or without settings should not have flags +test_read_flags test n n n n +test_read_flags test y y n n + +# export from snapshot should have read flags in log if they are set +test_read_flags test@snap n n n n +test_read_flags test@snap y n y n +test_read_flags test@snap n y n y + +# balanced_reads happens to take priority over localize_reads +test_read_flags test@snap y y y n + +echo OK diff --git a/qa/workunits/rbd/simple_big.sh b/qa/workunits/rbd/simple_big.sh new file mode 100755 index 000000000..70aafda4c --- /dev/null +++ b/qa/workunits/rbd/simple_big.sh @@ -0,0 +1,12 @@ +#!/bin/sh -ex + +mb=100000 + +rbd create foo --size $mb +DEV=$(sudo rbd map foo) +dd if=/dev/zero of=$DEV bs=1M count=$mb +dd if=$DEV of=/dev/null bs=1M count=$mb +sudo rbd unmap $DEV +rbd rm foo + +echo OK diff --git a/qa/workunits/rbd/test_admin_socket.sh b/qa/workunits/rbd/test_admin_socket.sh new file mode 100755 index 000000000..6b960787b --- /dev/null +++ b/qa/workunits/rbd/test_admin_socket.sh @@ -0,0 +1,151 @@ +#!/usr/bin/env bash +set -ex + +TMPDIR=/tmp/rbd_test_admin_socket$$ +mkdir $TMPDIR +trap "rm -fr $TMPDIR" 0 + +. $(dirname $0)/../../standalone/ceph-helpers.sh + +function expect_false() +{ + set -x + if "$@"; then return 1; else return 0; fi +} + +function rbd_watch_out_file() +{ + echo ${TMPDIR}/rbd_watch_$1.out +} + +function rbd_watch_pid_file() +{ + echo ${TMPDIR}/rbd_watch_$1.pid +} + +function rbd_watch_fifo() +{ + echo ${TMPDIR}/rbd_watch_$1.fifo +} + +function rbd_watch_asok() +{ + echo ${TMPDIR}/rbd_watch_$1.asok +} + +function rbd_get_perfcounter() +{ + local image=$1 + local counter=$2 + local name + + name=$(ceph --format xml --admin-daemon $(rbd_watch_asok ${image}) \ + perf schema | $XMLSTARLET el -d3 | + grep "/librbd-.*-${image}/${counter}\$") + test -n "${name}" || return 1 + + ceph --format xml --admin-daemon $(rbd_watch_asok ${image}) perf dump | + $XMLSTARLET sel -t -m "${name}" -v . +} + +function rbd_check_perfcounter() +{ + local image=$1 + local counter=$2 + local expected_val=$3 + local val= + + val=$(rbd_get_perfcounter ${image} ${counter}) + + test "${val}" -eq "${expected_val}" +} + +function rbd_watch_start() +{ + local image=$1 + local asok=$(rbd_watch_asok ${image}) + + mkfifo $(rbd_watch_fifo ${image}) + (cat $(rbd_watch_fifo ${image}) | + rbd --admin-socket ${asok} watch ${image} \ + > $(rbd_watch_out_file ${image}) 2>&1)& + + # find pid of the started rbd watch process + local pid + for i in `seq 10`; do + pid=$(ps auxww | awk "/[r]bd --admin.* watch ${image}/ {print \$2}") + test -n "${pid}" && break + sleep 0.1 + done + test -n "${pid}" + echo ${pid} > $(rbd_watch_pid_file ${image}) + + # find watcher admin socket + test -n "${asok}" + for i in `seq 10`; do + test -S "${asok}" && break + sleep 0.1 + done + test -S "${asok}" + + # configure debug level + ceph --admin-daemon "${asok}" config set debug_rbd 20 + + # check that watcher is registered + rbd status ${image} | expect_false grep "Watchers: none" +} + +function rbd_watch_end() +{ + local image=$1 + local regexp=$2 + + # send 'enter' to watch to exit + echo > $(rbd_watch_fifo ${image}) + # just in case it is not terminated + kill $(cat $(rbd_watch_pid_file ${image})) || : + + # output rbd watch out file for easier troubleshooting + cat $(rbd_watch_out_file ${image}) + + # cleanup + rm -f $(rbd_watch_fifo ${image}) $(rbd_watch_pid_file ${image}) \ + $(rbd_watch_out_file ${image}) $(rbd_watch_asok ${image}) +} + +pool="rbd" +image=testimg$$ +ceph_admin="ceph --admin-daemon $(rbd_watch_asok ${image})" + +rbd create --size 128 ${pool}/${image} + +# check rbd cache commands are present in help output +rbd_cache_flush="rbd cache flush ${pool}/${image}" +rbd_cache_invalidate="rbd cache invalidate ${pool}/${image}" + +rbd_watch_start ${image} +${ceph_admin} help | fgrep "${rbd_cache_flush}" +${ceph_admin} help | fgrep "${rbd_cache_invalidate}" +rbd_watch_end ${image} + +# test rbd cache commands with disabled and enabled cache +for conf_rbd_cache in false true; do + + rbd image-meta set ${image} conf_rbd_cache ${conf_rbd_cache} + + rbd_watch_start ${image} + + rbd_check_perfcounter ${image} flush 0 + ${ceph_admin} ${rbd_cache_flush} + # 'flush' counter should increase regardless if cache is enabled + rbd_check_perfcounter ${image} flush 1 + + rbd_check_perfcounter ${image} invalidate_cache 0 + ${ceph_admin} ${rbd_cache_invalidate} + # 'invalidate_cache' counter should increase regardless if cache is enabled + rbd_check_perfcounter ${image} invalidate_cache 1 + + rbd_watch_end ${image} +done + +rbd rm ${image} diff --git a/qa/workunits/rbd/test_librbd.sh b/qa/workunits/rbd/test_librbd.sh new file mode 100755 index 000000000..447306bb4 --- /dev/null +++ b/qa/workunits/rbd/test_librbd.sh @@ -0,0 +1,9 @@ +#!/bin/sh -e + +if [ -n "${VALGRIND}" ]; then + valgrind ${VALGRIND} --suppressions=${TESTDIR}/valgrind.supp \ + --error-exitcode=1 ceph_test_librbd +else + ceph_test_librbd +fi +exit 0 diff --git a/qa/workunits/rbd/test_librbd_python.sh b/qa/workunits/rbd/test_librbd_python.sh new file mode 100755 index 000000000..a33100829 --- /dev/null +++ b/qa/workunits/rbd/test_librbd_python.sh @@ -0,0 +1,12 @@ +#!/bin/sh -ex + +relpath=$(dirname $0)/../../../src/test/pybind + +if [ -n "${VALGRIND}" ]; then + valgrind ${VALGRIND} --suppressions=${TESTDIR}/valgrind.supp \ + --errors-for-leak-kinds=definite --error-exitcode=1 \ + python3 -m pytest -v $relpath/test_rbd.py "$@" +else + python3 -m pytest -v $relpath/test_rbd.py "$@" +fi +exit 0 diff --git a/qa/workunits/rbd/test_lock_fence.sh b/qa/workunits/rbd/test_lock_fence.sh new file mode 100755 index 000000000..7cf2d21c5 --- /dev/null +++ b/qa/workunits/rbd/test_lock_fence.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# can't use -e because of background process +set -x + +IMAGE=rbdrw-image +LOCKID=rbdrw +RELPATH=$(dirname $0)/../../../src/test/librbd +RBDRW=$RELPATH/rbdrw.py + +rbd create $IMAGE --size 10 --image-format 2 --image-shared || exit 1 + +# rbdrw loops doing I/O to $IMAGE after locking with lockid $LOCKID +python3 $RBDRW $IMAGE $LOCKID & +iochild=$! + +# give client time to lock and start reading/writing +LOCKS='[]' +while [ "$LOCKS" == '[]' ] +do + LOCKS=$(rbd lock list $IMAGE --format json) + sleep 1 +done + +clientaddr=$(rbd lock list $IMAGE | tail -1 | awk '{print $NF;}') +clientid=$(rbd lock list $IMAGE | tail -1 | awk '{print $1;}') +echo "clientaddr: $clientaddr" +echo "clientid: $clientid" + +ceph osd blocklist add $clientaddr || exit 1 + +wait $iochild +rbdrw_exitcode=$? +if [ $rbdrw_exitcode != 108 ] +then + echo "wrong exitcode from rbdrw: $rbdrw_exitcode" + exit 1 +else + echo "rbdrw stopped with ESHUTDOWN" +fi + +set -e +ceph osd blocklist rm $clientaddr +rbd lock remove $IMAGE $LOCKID "$clientid" +# rbdrw will have exited with an existing watch, so, until #3527 is fixed, +# hang out until the watch expires +sleep 30 +rbd rm $IMAGE +echo OK diff --git a/qa/workunits/rbd/test_rbd_mirror.sh b/qa/workunits/rbd/test_rbd_mirror.sh new file mode 100755 index 000000000..e139dd7e4 --- /dev/null +++ b/qa/workunits/rbd/test_rbd_mirror.sh @@ -0,0 +1,9 @@ +#!/bin/sh -e + +if [ -n "${VALGRIND}" ]; then + valgrind ${VALGRIND} --suppressions=${TESTDIR}/valgrind.supp \ + --error-exitcode=1 ceph_test_rbd_mirror +else + ceph_test_rbd_mirror +fi +exit 0 diff --git a/qa/workunits/rbd/test_rbd_tasks.sh b/qa/workunits/rbd/test_rbd_tasks.sh new file mode 100755 index 000000000..b9663e601 --- /dev/null +++ b/qa/workunits/rbd/test_rbd_tasks.sh @@ -0,0 +1,276 @@ +#!/usr/bin/env bash +set -ex + +POOL=rbd_tasks +POOL_NS=ns1 + +setup() { + trap 'cleanup' INT TERM EXIT + + ceph osd pool create ${POOL} 128 + rbd pool init ${POOL} + rbd namespace create ${POOL}/${POOL_NS} + + TEMPDIR=`mktemp -d` +} + +cleanup() { + ceph osd pool rm ${POOL} ${POOL} --yes-i-really-really-mean-it + + rm -rf ${TEMPDIR} +} + +wait_for() { + local TEST_FN=$1 + shift 1 + local TEST_FN_ARGS=("$@") + + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do + sleep ${s} + + ${TEST_FN} "${TEST_FN_ARGS[@]}" || continue + return 0 + done + return 1 +} + +task_exists() { + local TASK_ID=$1 + [[ -z "${TASK_ID}" ]] && exit 1 + + ceph rbd task list ${TASK_ID} || return 1 + return 0 +} + +task_dne() { + local TASK_ID=$1 + [[ -z "${TASK_ID}" ]] && exit 1 + + ceph rbd task list ${TASK_ID} || return 0 + return 1 +} + +task_in_progress() { + local TASK_ID=$1 + [[ -z "${TASK_ID}" ]] && exit 1 + + [[ $(ceph rbd task list ${TASK_ID} | jq '.in_progress') == 'true' ]] +} + +test_remove() { + echo "test_remove" + + local IMAGE=`uuidgen` + rbd create --size 1 --image-shared ${POOL}/${IMAGE} + + # MGR might require some time to discover the OSD map w/ new pool + wait_for ceph rbd task add remove ${POOL}/${IMAGE} +} + +test_flatten() { + echo "test_flatten" + + local PARENT_IMAGE=`uuidgen` + local CHILD_IMAGE=`uuidgen` + + rbd create --size 1 --image-shared ${POOL}/${PARENT_IMAGE} + rbd snap create ${POOL}/${PARENT_IMAGE}@snap + rbd clone ${POOL}/${PARENT_IMAGE}@snap ${POOL}/${POOL_NS}/${CHILD_IMAGE} --rbd-default-clone-format=2 + [[ "$(rbd info --format json ${POOL}/${POOL_NS}/${CHILD_IMAGE} | jq 'has("parent")')" == "true" ]] + + local TASK_ID=`ceph rbd task add flatten ${POOL}/${POOL_NS}/${CHILD_IMAGE} | jq --raw-output ".id"` + wait_for task_dne ${TASK_ID} + + [[ "$(rbd info --format json ${POOL}/${POOL_NS}/${CHILD_IMAGE} | jq 'has("parent")')" == "false" ]] +} + +test_trash_remove() { + echo "test_trash_remove" + + local IMAGE=`uuidgen` + rbd create --size 1 --image-shared ${POOL}/${IMAGE} + local IMAGE_ID=`rbd info --format json ${POOL}/${IMAGE} | jq --raw-output ".id"` + rbd trash mv ${POOL}/${IMAGE} + [[ -n "$(rbd trash list ${POOL})" ]] || exit 1 + + local TASK_ID=`ceph rbd task add trash remove ${POOL}/${IMAGE_ID} | jq --raw-output ".id"` + wait_for task_dne ${TASK_ID} + + [[ -z "$(rbd trash list ${POOL})" ]] || exit 1 +} + +test_migration_execute() { + echo "test_migration_execute" + + local SOURCE_IMAGE=`uuidgen` + local TARGET_IMAGE=`uuidgen` + rbd create --size 1 --image-shared ${POOL}/${SOURCE_IMAGE} + rbd migration prepare ${POOL}/${SOURCE_IMAGE} ${POOL}/${TARGET_IMAGE} + [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "prepared" ]] + + local TASK_ID=`ceph rbd task add migration execute ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"` + wait_for task_dne ${TASK_ID} + + [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "executed" ]] +} + +test_migration_commit() { + echo "test_migration_commit" + + local SOURCE_IMAGE=`uuidgen` + local TARGET_IMAGE=`uuidgen` + rbd create --size 1 --image-shared ${POOL}/${SOURCE_IMAGE} + rbd migration prepare ${POOL}/${SOURCE_IMAGE} ${POOL}/${TARGET_IMAGE} + [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "prepared" ]] + + local TASK_ID=`ceph rbd task add migration execute ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"` + wait_for task_dne ${TASK_ID} + + TASK_ID=`ceph rbd task add migration commit ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"` + wait_for task_dne ${TASK_ID} + + [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq 'has("migration")')" == "false" ]] + (rbd info ${POOL}/${SOURCE_IMAGE} && return 1) || true + rbd info ${POOL}/${TARGET_IMAGE} +} + +test_migration_abort() { + echo "test_migration_abort" + + local SOURCE_IMAGE=`uuidgen` + local TARGET_IMAGE=`uuidgen` + rbd create --size 1 --image-shared ${POOL}/${SOURCE_IMAGE} + rbd migration prepare ${POOL}/${SOURCE_IMAGE} ${POOL}/${TARGET_IMAGE} + [[ "$(rbd status --format json ${POOL}/${TARGET_IMAGE} | jq --raw-output '.migration.state')" == "prepared" ]] + + local TASK_ID=`ceph rbd task add migration execute ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"` + wait_for task_dne ${TASK_ID} + + TASK_ID=`ceph rbd task add migration abort ${POOL}/${TARGET_IMAGE} | jq --raw-output ".id"` + wait_for task_dne ${TASK_ID} + + [[ "$(rbd status --format json ${POOL}/${SOURCE_IMAGE} | jq 'has("migration")')" == "false" ]] + rbd info ${POOL}/${SOURCE_IMAGE} + (rbd info ${POOL}/${TARGET_IMAGE} && return 1) || true +} + +test_list() { + echo "test_list" + + local IMAGE_1=`uuidgen` + local IMAGE_2=`uuidgen` + + rbd create --size 1T --image-shared ${POOL}/${IMAGE_1} + rbd create --size 1T --image-shared ${POOL}/${IMAGE_2} + + local TASK_ID_1=`ceph rbd task add remove ${POOL}/${IMAGE_1} | jq --raw-output ".id"` + local TASK_ID_2=`ceph rbd task add remove ${POOL}/${IMAGE_2} | jq --raw-output ".id"` + + local LIST_FILE="${TEMPDIR}/list_file" + ceph rbd task list > ${LIST_FILE} + cat ${LIST_FILE} + + [[ $(jq "[.[] | .id] | contains([\"${TASK_ID_1}\", \"${TASK_ID_2}\"])" ${LIST_FILE}) == "true" ]] + + ceph rbd task cancel ${TASK_ID_1} + ceph rbd task cancel ${TASK_ID_2} +} + +test_cancel() { + echo "test_cancel" + + local IMAGE=`uuidgen` + rbd create --size 1T --image-shared ${POOL}/${IMAGE} + local TASK_ID=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"` + + wait_for task_exists ${TASK_ID} + + ceph rbd task cancel ${TASK_ID} + wait_for task_dne ${TASK_ID} +} + +test_duplicate_task() { + echo "test_duplicate_task" + + local IMAGE=`uuidgen` + rbd create --size 1T --image-shared ${POOL}/${IMAGE} + local IMAGE_ID=`rbd info --format json ${POOL}/${IMAGE} | jq --raw-output ".id"` + rbd trash mv ${POOL}/${IMAGE} + + local TASK_ID_1=`ceph rbd task add trash remove ${POOL}/${IMAGE_ID} | jq --raw-output ".id"` + local TASK_ID_2=`ceph rbd task add trash remove ${POOL}/${IMAGE_ID} | jq --raw-output ".id"` + + [[ "${TASK_ID_1}" == "${TASK_ID_2}" ]] + + ceph rbd task cancel ${TASK_ID_1} +} + +test_duplicate_name() { + echo "test_duplicate_name" + + local IMAGE=`uuidgen` + rbd create --size 1G --image-shared ${POOL}/${IMAGE} + local TASK_ID_1=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"` + + wait_for task_dne ${TASK_ID_1} + + rbd create --size 1G --image-shared ${POOL}/${IMAGE} + local TASK_ID_2=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"` + + [[ "${TASK_ID_1}" != "${TASK_ID_2}" ]] + wait_for task_dne ${TASK_ID_2} + + local TASK_ID_3=`ceph rbd task add remove ${POOL}/${IMAGE} | jq --raw-output ".id"` + + [[ "${TASK_ID_2}" == "${TASK_ID_3}" ]] +} + +test_progress() { + echo "test_progress" + + local IMAGE_1=`uuidgen` + local IMAGE_2=`uuidgen` + + rbd create --size 1 --image-shared ${POOL}/${IMAGE_1} + local TASK_ID_1=`ceph rbd task add remove ${POOL}/${IMAGE_1} | jq --raw-output ".id"` + + wait_for task_dne ${TASK_ID_1} + + local PROGRESS_FILE="${TEMPDIR}/progress_file" + ceph progress json > ${PROGRESS_FILE} + cat ${PROGRESS_FILE} + + [[ $(jq "[.completed | .[].id] | contains([\"${TASK_ID_1}\"])" ${PROGRESS_FILE}) == "true" ]] + + rbd create --size 1T --image-shared ${POOL}/${IMAGE_2} + local TASK_ID_2=`ceph rbd task add remove ${POOL}/${IMAGE_2} | jq --raw-output ".id"` + + wait_for task_in_progress ${TASK_ID_2} + ceph progress json > ${PROGRESS_FILE} + cat ${PROGRESS_FILE} + + [[ $(jq "[.events | .[].id] | contains([\"${TASK_ID_2}\"])" ${PROGRESS_FILE}) == "true" ]] + + ceph rbd task cancel ${TASK_ID_2} + wait_for task_dne ${TASK_ID_2} + + ceph progress json > ${PROGRESS_FILE} + cat ${PROGRESS_FILE} + + [[ $(jq "[.completed | map(select(.failed)) | .[].id] | contains([\"${TASK_ID_2}\"])" ${PROGRESS_FILE}) == "true" ]] +} + +setup +test_remove +test_flatten +test_trash_remove +test_migration_execute +test_migration_commit +test_migration_abort +test_list +test_cancel +test_duplicate_task +test_duplicate_name +test_progress + +echo OK diff --git a/qa/workunits/rbd/test_rbdmap_RBDMAPFILE.sh b/qa/workunits/rbd/test_rbdmap_RBDMAPFILE.sh new file mode 100755 index 000000000..501c69cd5 --- /dev/null +++ b/qa/workunits/rbd/test_rbdmap_RBDMAPFILE.sh @@ -0,0 +1,34 @@ +#!/bin/sh +# +# Regression test for http://tracker.ceph.com/issues/14984 +# +# When the bug is present, starting the rbdmap service causes +# a bogus log message to be emitted to the log because the RBDMAPFILE +# environment variable is not set. +# +# When the bug is not present, starting the rbdmap service will emit +# no log messages, because /etc/ceph/rbdmap does not contain any lines +# that require processing. +# +set -ex + +echo "TEST: save timestamp for use later with journalctl --since" +TIMESTAMP=$(date +%Y-%m-%d\ %H:%M:%S) + +echo "TEST: assert that rbdmap has not logged anything since boot" +journalctl -b 0 -t rbdmap | grep 'rbdmap\[[[:digit:]]' && exit 1 +journalctl -b 0 -t init-rbdmap | grep 'rbdmap\[[[:digit:]]' && exit 1 + +echo "TEST: restart the rbdmap.service" +sudo systemctl restart rbdmap.service + +echo "TEST: ensure that /usr/bin/rbdmap runs to completion" +until sudo systemctl status rbdmap.service | grep 'active (exited)' ; do + sleep 0.5 +done + +echo "TEST: assert that rbdmap has not logged anything since TIMESTAMP" +journalctl --since "$TIMESTAMP" -t rbdmap | grep 'rbdmap\[[[:digit:]]' && exit 1 +journalctl --since "$TIMESTAMP" -t init-rbdmap | grep 'rbdmap\[[[:digit:]]' && exit 1 + +exit 0 diff --git a/qa/workunits/rbd/verify_pool.sh b/qa/workunits/rbd/verify_pool.sh new file mode 100755 index 000000000..08bcca506 --- /dev/null +++ b/qa/workunits/rbd/verify_pool.sh @@ -0,0 +1,27 @@ +#!/bin/sh -ex + +POOL_NAME=rbd_test_validate_pool +PG_NUM=32 + +tear_down () { + ceph osd pool delete $POOL_NAME $POOL_NAME --yes-i-really-really-mean-it || true +} + +set_up () { + tear_down + ceph osd pool create $POOL_NAME $PG_NUM + ceph osd pool mksnap $POOL_NAME snap + rbd pool init $POOL_NAME +} + +trap tear_down EXIT HUP INT +set_up + +# creating an image in a pool-managed snapshot pool should fail +rbd create --pool $POOL_NAME --size 1 foo && exit 1 || true + +# should succeed if the pool already marked as validated +printf "overwrite validated" | rados --pool $POOL_NAME put rbd_info - +rbd create --pool $POOL_NAME --size 1 foo + +echo OK diff --git a/qa/workunits/rename/all.sh b/qa/workunits/rename/all.sh new file mode 100755 index 000000000..f97ff773f --- /dev/null +++ b/qa/workunits/rename/all.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -ex + +dir=`dirname $0` + +CEPH_TOOL='./ceph' +$CEPH_TOOL || CEPH_TOOL='ceph' + +CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/prepare.sh + +CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/pri_nul.sh +rm ./?/* || true + +CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/rem_nul.sh +rm ./?/* || true + +CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/pri_pri.sh +rm ./?/* || true + +CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/rem_pri.sh +rm ./?/* || true + +CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/rem_rem.sh +rm ./?/* || true + +CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/pri_nul.sh +rm -r ./?/* || true + +CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/pri_pri.sh +rm -r ./?/* || true + +CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/dir_pri_pri.sh +rm -r ./?/* || true + +CEPH_ARGS=$CEPH_ARGS CEPH_TOOL=$CEPH_TOOL $dir/dir_pri_nul.sh +rm -r ./?/* || true + diff --git a/qa/workunits/rename/dir_pri_nul.sh b/qa/workunits/rename/dir_pri_nul.sh new file mode 100755 index 000000000..dd8106b63 --- /dev/null +++ b/qa/workunits/rename/dir_pri_nul.sh @@ -0,0 +1,28 @@ +#!/bin/sh -ex + +# dir: srcdn=destdn +mkdir ./a/dir1 +mv ./a/dir1 ./a/dir1.renamed + +# dir: diff +mkdir ./a/dir2 +mv ./a/dir2 ./b/dir2 + +# dir: diff, child subtree on target +mkdir -p ./a/dir3/child/foo +$CEPH_TOOL mds tell 0 export_dir /a/dir3/child 1 +sleep 5 +mv ./a/dir3 ./b/dir3 + +# dir: diff, child subtree on other +mkdir -p ./a/dir4/child/foo +$CEPH_TOOL mds tell 0 export_dir /a/dir4/child 2 +sleep 5 +mv ./a/dir4 ./b/dir4 + +# dir: witness subtree adjustment +mkdir -p ./a/dir5/1/2/3/4 +$CEPH_TOOL mds tell 0 export_dir /a/dir5/1/2/3 2 +sleep 5 +mv ./a/dir5 ./b + diff --git a/qa/workunits/rename/dir_pri_pri.sh b/qa/workunits/rename/dir_pri_pri.sh new file mode 100755 index 000000000..de235fcd3 --- /dev/null +++ b/qa/workunits/rename/dir_pri_pri.sh @@ -0,0 +1,11 @@ +#!/bin/sh -ex + +# dir, srcdn=destdn +mkdir ./a/dir1 +mkdir ./a/dir2 +mv -T ./a/dir1 ./a/dir2 + +# dir, different +mkdir ./a/dir3 +mkdir ./b/dir4 +mv -T ./a/dir3 ./b/dir4 diff --git a/qa/workunits/rename/plan.txt b/qa/workunits/rename/plan.txt new file mode 100644 index 000000000..b423b4140 --- /dev/null +++ b/qa/workunits/rename/plan.txt @@ -0,0 +1,111 @@ +#!/bin/sh + +# srcdn destdn targeti + +## pri auth null auth - +## pri rep null auth - +## rem auth null auth - +## rem rep null auth - + +#/ pri auth null rep - dup of pr_na +#/ pri rep null rep - +#/ rem auth null rep - dup of rr_na +#/ rem rep null rep - + + +## pri auth pri auth - +# pri rep pri auth - +## rem auth pri auth - +# rem rep pri auth - + +# pri auth pri rep - +# pri rep pri rep - +# rem auth pri rep - +# rem rep pri rep - + +## pri auth rem auth auth +# pri rep rem auth auth +## rem auth rem auth auth +# rem rep rem auth auth + +# pri auth rem rep auth +# pri rep rem rep auth +# rem auth rem rep auth +# rem rep rem rep auth + +# pri auth rem auth rep +# pri rep rem auth rep +# rem auth rem auth rep +# rem rep rem auth rep + +# pri auth rem rep rep +# pri rep rem rep rep +# rem auth rem rep rep +# rem rep rem rep rep + + +types of operations + +pri nul + srcdn=destdn + diff + +rem nul + srci=srcdn=destdn + srci=srcdn + srcdn=destdn + srci=destdn + all different + +pri pri + srcdn=destdn + different + +rem pri + srci=srcdn=destdn + srci=srcdn + srcdn=destdn + srci=destdn + all different + +pri rem + srcdn=destdn=desti + srcdn=destdn + destdn=desti + srcdn=desti + all different + +rem rem + srci=srcdn=destdn=desti + srci=srcdn=destdn + srci=srcdn=desti + srci=destdn=desti + srcdni=destdn=desti + srci=srcdn destdn=desti + srci=destdn srcdn=desti + srci=desti srcdn=destdn + srci=srcdn + srci=destdn + srci=desti + srcdn=destdn + srcdn=desti + destdn=desti + all different + + + + + + + + + +p n same +r n same +p n diff +r n diff + +p p same +r p same + +p r diff --git a/qa/workunits/rename/prepare.sh b/qa/workunits/rename/prepare.sh new file mode 100755 index 000000000..b5ba4ae58 --- /dev/null +++ b/qa/workunits/rename/prepare.sh @@ -0,0 +1,21 @@ +#!/bin/sh -ex + +$CEPH_TOOL mds tell 0 injectargs '--mds-bal-interval 0' +$CEPH_TOOL mds tell 1 injectargs '--mds-bal-interval 0' +$CEPH_TOOL mds tell 2 injectargs '--mds-bal-interval 0' +$CEPH_TOOL mds tell 3 injectargs '--mds-bal-interval 0' +#$CEPH_TOOL mds tell 4 injectargs '--mds-bal-interval 0' + +mkdir -p ./a/a +mkdir -p ./b/b +mkdir -p ./c/c +mkdir -p ./d/d + +mount_dir=`df . | grep -o " /.*" | grep -o "/.*"` +cur_dir=`pwd` +ceph_dir=${cur_dir##$mount_dir} +$CEPH_TOOL mds tell 0 export_dir $ceph_dir/b 1 +$CEPH_TOOL mds tell 0 export_dir $ceph_dir/c 2 +$CEPH_TOOL mds tell 0 export_dir $ceph_dir/d 3 +sleep 5 + diff --git a/qa/workunits/rename/pri_nul.sh b/qa/workunits/rename/pri_nul.sh new file mode 100755 index 000000000..c40ec1d25 --- /dev/null +++ b/qa/workunits/rename/pri_nul.sh @@ -0,0 +1,11 @@ +#!/bin/sh -ex + +# srcdn=destdn +touch ./a/file1 +mv ./a/file1 ./a/file1.renamed + +# different +touch ./a/file2 +mv ./a/file2 ./b + + diff --git a/qa/workunits/rename/pri_pri.sh b/qa/workunits/rename/pri_pri.sh new file mode 100755 index 000000000..b74985fe3 --- /dev/null +++ b/qa/workunits/rename/pri_pri.sh @@ -0,0 +1,12 @@ +#!/bin/sh -ex + +# srcdn=destdn +touch ./a/file1 +touch ./a/file2 +mv ./a/file1 ./a/file2 + +# different (srcdn != destdn) +touch ./a/file3 +touch ./b/file4 +mv ./a/file3 ./b/file4 + diff --git a/qa/workunits/rename/pri_rem.sh b/qa/workunits/rename/pri_rem.sh new file mode 100755 index 000000000..a1cd03d10 --- /dev/null +++ b/qa/workunits/rename/pri_rem.sh @@ -0,0 +1,31 @@ +#!/bin/sh -ex + +dotest() { + src=$1 + desti=$2 + destdn=$3 + n=$4 + + touch ./$src/src$n + touch ./$desti/desti$n + ln ./$desti/desti$n ./$destdn/destdn$n + + mv ./$src/src$n ./$destdn/destdn$n +} + + +# srcdn=destdn=desti +dotest 'a' 'a' 'a' 1 + +# destdn=desti +dotest 'b' 'a' 'a' 2 + +# srcdn=destdn +dotest 'a' 'b' 'a' 3 + +# srcdn=desti +dotest 'a' 'a' 'b' 4 + +# all different +dotest 'a' 'b' 'c' 5 + diff --git a/qa/workunits/rename/rem_nul.sh b/qa/workunits/rename/rem_nul.sh new file mode 100755 index 000000000..a71033108 --- /dev/null +++ b/qa/workunits/rename/rem_nul.sh @@ -0,0 +1,29 @@ +#!/bin/sh -ex + +dotest() { + srci=$1 + srcdn=$2 + dest=$3 + n=$4 + + touch ./$srci/srci$n + ln ./$srci/srci$n ./$srcdn/srcdn$n + + mv ./$srcdn/srcdn$n ./$dest/dest$n +} + +# srci=srcdn=destdn +dotest 'a' 'a' 'a' 1 + +# srcdn=destdn +dotest 'b' 'a' 'a' 2 + +# srci=destdn +dotest 'a' 'b' 'a' 3 + +# srci=srcdn +dotest 'a' 'a' 'b' 4 + +# all different +dotest 'a' 'b' 'c' 5 + diff --git a/qa/workunits/rename/rem_pri.sh b/qa/workunits/rename/rem_pri.sh new file mode 100755 index 000000000..501ac5e1a --- /dev/null +++ b/qa/workunits/rename/rem_pri.sh @@ -0,0 +1,29 @@ +#!/bin/sh -ex + +dotest() { + srci=$1 + srcdn=$2 + dest=$3 + n=$4 + + touch ./$srci/srci$n + ln ./$srci/srci$n ./$srcdn/srcdn$n + touch ./$dest/dest$n + + mv ./$srcdn/srcdn$n ./$dest/dest$n +} + +# srci=srcdn=destdn +dotest 'a' 'a' 'a' 1 + +# srcdn=destdn +dotest 'b' 'a' 'a' 2 + +# srci=destdn +dotest 'a' 'b' 'a' 3 + +# srci=srcdn +dotest 'a' 'a' 'b' 4 + +# all different +dotest 'a' 'b' 'c' 5 diff --git a/qa/workunits/rename/rem_rem.sh b/qa/workunits/rename/rem_rem.sh new file mode 100755 index 000000000..80028c517 --- /dev/null +++ b/qa/workunits/rename/rem_rem.sh @@ -0,0 +1,61 @@ +#!/bin/sh -ex + +dotest() { + srci=$1 + srcdn=$2 + desti=$3 + destdn=$4 + n=$5 + + touch ./$srci/srci$n + ln ./$srci/srci$n ./$srcdn/srcdn$n + touch ./$desti/desti$n + ln ./$desti/desti$n ./$destdn/destdn$n + + mv ./$srcdn/srcdn$n ./$destdn/destdn$n +} + +# srci=srcdn=destdn=desti +dotest 'a' 'a' 'a' 'a' 1 + +# srcdn=destdn=desti +dotest 'b' 'a' 'a' 'a' 2 + +# srci=destdn=desti +dotest 'a' 'b' 'a' 'a' 3 + +# srci=srcdn=destdn +dotest 'a' 'a' 'b' 'a' 4 + +# srci=srcdn=desti +dotest 'a' 'a' 'a' 'b' 5 + +# srci=srcdn destdn=desti +dotest 'a' 'a' 'b' 'b' 6 + +# srci=destdn srcdn=desti +dotest 'a' 'b' 'b' 'a' 7 + +# srci=desti srcdn=destdn +dotest 'a' 'b' 'a' 'b' 8 + +# srci=srcdn +dotest 'a' 'a' 'b' 'c' 9 + +# srci=desti +dotest 'a' 'b' 'a' 'c' 10 + +# srci=destdn +dotest 'a' 'b' 'c' 'a' 11 + +# srcdn=desti +dotest 'a' 'b' 'b' 'c' 12 + +# srcdn=destdn +dotest 'a' 'b' 'c' 'b' 13 + +# destdn=desti +dotest 'a' 'b' 'c' 'c' 14 + +# all different +dotest 'a' 'b' 'c' 'd' 15 diff --git a/qa/workunits/rest/test-restful.sh b/qa/workunits/rest/test-restful.sh new file mode 100755 index 000000000..fde0d107a --- /dev/null +++ b/qa/workunits/rest/test-restful.sh @@ -0,0 +1,10 @@ +#!/bin/sh -ex + +mydir=`dirname $0` + +secret=`ceph config-key get mgr/restful/keys/admin` +url=$(ceph mgr dump|jq -r .services.restful|sed -e 's/\/$//') +echo "url $url secret $secret" +$mydir/test_mgr_rest_api.py $url $secret + +echo $0 OK diff --git a/qa/workunits/rest/test_mgr_rest_api.py b/qa/workunits/rest/test_mgr_rest_api.py new file mode 100755 index 000000000..74126ab78 --- /dev/null +++ b/qa/workunits/rest/test_mgr_rest_api.py @@ -0,0 +1,98 @@ +#! /usr/bin/env python3 + +import requests +import time +import sys +import json + +# Do not show the stupid message about verify=False. ignore exceptions bc +# this doesn't work on some distros. +try: + from requests.packages.urllib3.exceptions import InsecureRequestWarning + requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) +except: + pass + +if len(sys.argv) < 3: + print("Usage: %s <url> <admin_key>" % sys.argv[0]) + sys.exit(1) + +addr = sys.argv[1] +auth = ('admin', sys.argv[2]) +headers = {'Content-type': 'application/json'} + +request = None + +# Create a pool and get its id +request = requests.post( + addr + '/pool?wait=yes', + data=json.dumps({'name': 'supertestfriends', 'pg_num': 128}), + headers=headers, + verify=False, + auth=auth) +print(request.text) +request = requests.get(addr + '/pool', verify=False, auth=auth) +assert(request.json()[-1]['pool_name'] == 'supertestfriends') +pool_id = request.json()[-1]['pool'] + +# get a mon name +request = requests.get(addr + '/mon', verify=False, auth=auth) +firstmon = request.json()[0]['name'] +print('first mon is %s' % firstmon) + +# get a server name +request = requests.get(addr + '/osd', verify=False, auth=auth) +aserver = request.json()[0]['server'] +print('a server is %s' % aserver) + + +screenplay = [ + ('get', '/', {}), + ('get', '/config/cluster', {}), + ('get', '/crush/rule', {}), + ('get', '/doc', {}), + ('get', '/mon', {}), + ('get', '/mon/' + firstmon, {}), + ('get', '/osd', {}), + ('get', '/osd/0', {}), + ('get', '/osd/0/command', {}), + ('get', '/pool/1', {}), + ('get', '/server', {}), + ('get', '/server/' + aserver, {}), + ('post', '/osd/0/command', {'command': 'scrub'}), + ('post', '/pool?wait=1', {'name': 'supertestfriends', 'pg_num': 128}), + ('patch', '/osd/0', {'in': False}), + ('patch', '/config/osd', {'pause': True}), + ('get', '/config/osd', {}), + ('patch', '/pool/' + str(pool_id), {'size': 2}), + ('patch', '/config/osd', {'pause': False}), + ('patch', '/osd/0', {'in': True}), + ('get', '/pool', {}), + ('delete', '/pool/' + str(pool_id) + '?wait=1', {}), + ('get', '/request?page=0', {}), + ('delete', '/request', {}), + ('get', '/request', {}), + ('patch', '/pool/1', {'pg_num': 128}), + ('patch', '/pool/1', {'pgp_num': 128}), + ('get', '/perf?daemon=.*', {}), +] + +for method, endpoint, args in screenplay: + if method == 'sleep': + time.sleep(endpoint) + continue + url = addr + endpoint + print("URL = " + url) + request = getattr(requests, method)( + url, + data=json.dumps(args) if args else None, + headers=headers, + verify=False, + auth=auth) + assert request is not None + print(request.text) + if request.status_code != 200 or 'error' in request.json(): + print('ERROR: %s request for URL "%s" failed' % (method, url)) + sys.exit(1) + +print('OK') diff --git a/qa/workunits/restart/test-backtraces.py b/qa/workunits/restart/test-backtraces.py new file mode 100755 index 000000000..37ddef539 --- /dev/null +++ b/qa/workunits/restart/test-backtraces.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 + +from __future__ import print_function + +import subprocess +import json +import os +import time +import sys + +import rados as rados +import cephfs as cephfs + +prefix='testbt' + +def get_name(b, i, j): + c = '{pre}.{pid}.{i}.{j}'.format(pre=prefix, pid=os.getpid(), i=i, j=j) + return c, b + '/' + c + +def mkdir(ceph, d): + print("mkdir {d}".format(d=d), file=sys.stderr) + ceph.mkdir(d, 0o755) + return ceph.stat(d)['st_ino'] + +def create(ceph, f): + print("creating {f}".format(f=f), file=sys.stderr) + fd = ceph.open(f, os.O_CREAT | os.O_RDWR, 0o644) + ceph.close(fd) + return ceph.stat(f)['st_ino'] + +def set_mds_config_param(ceph, param): + with open('/dev/null', 'rb') as devnull: + confarg = '' + if conf != '': + confarg = '-c {c}'.format(c=conf) + r = subprocess.call("ceph {ca} mds tell a injectargs '{p}'".format(ca=confarg, p=param), shell=True, stdout=devnull) + if r != 0: + raise Exception + + +class _TrimIndentFile(object): + def __init__(self, fp): + self.fp = fp + + def readline(self): + line = self.fp.readline() + return line.lstrip(' \t') + +def _optionxform(s): + s = s.replace('_', ' ') + s = '_'.join(s.split()) + return s + +def conf_set_kill_mds(location, killnum): + print('setting mds kill config option for {l}.{k}'.format(l=location, k=killnum), file=sys.stderr) + print("restart mds a mds_kill_{l}_at {k}".format(l=location, k=killnum)) + sys.stdout.flush() + for l in sys.stdin.readline(): + if l == 'restarted': + break + +def flush(ceph, testnum): + print('flushing {t}'.format(t=testnum), file=sys.stderr) + set_mds_config_param(ceph, '--mds_log_max_segments 1') + + for i in range(1, 500): + f = '{p}.{pid}.{t}.{i}'.format(p=prefix, pid=os.getpid(), t=testnum, i=i) + print('flushing with create {f}'.format(f=f), file=sys.stderr) + fd = ceph.open(f, os.O_CREAT | os.O_RDWR, 0o644) + ceph.close(fd) + ceph.unlink(f) + + print('flush doing shutdown', file=sys.stderr) + ceph.shutdown() + print('flush reinitializing ceph', file=sys.stderr) + ceph = cephfs.LibCephFS(conffile=conf) + print('flush doing mount', file=sys.stderr) + ceph.mount() + return ceph + +def kill_mds(ceph, location, killnum): + print('killing mds: {l}.{k}'.format(l=location, k=killnum), file=sys.stderr) + set_mds_config_param(ceph, '--mds_kill_{l}_at {k}'.format(l=location, k=killnum)) + +def wait_for_mds(ceph): + # wait for restart + while True: + confarg = '' + if conf != '': + confarg = '-c {c}'.format(c=conf) + r = subprocess.check_output("ceph {ca} mds stat".format(ca=confarg), shell=True).decode() + if r.find('a=up:active'): + break + time.sleep(1) + +def decode(value): + + tmpfile = '/tmp/{p}.{pid}'.format(p=prefix, pid=os.getpid()) + with open(tmpfile, 'w+') as f: + f.write(value) + + p = subprocess.Popen( + [ + 'ceph-dencoder', + 'import', + tmpfile, + 'type', + 'inode_backtrace_t', + 'decode', + 'dump_json', + ], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + ) + (stdout, _) = p.communicate(input=value) + p.stdin.close() + if p.returncode != 0: + raise Exception + os.remove(tmpfile) + return json.loads(stdout) + +class VerifyFailure(Exception): + pass + +def verify(rados_ioctx, ino, values, pool): + print('getting parent attr for ino: %lx.00000000' % ino, file=sys.stderr) + savede = None + for i in range(1, 20): + try: + savede = None + binbt = rados_ioctx.get_xattr('%lx.00000000' % ino, 'parent') + except rados.ObjectNotFound as e: + # wait for a bit to let segments get flushed out + savede = e + time.sleep(10) + if savede: + raise savede + + bt = decode(binbt) + + ind = 0 + if bt['ino'] != ino: + raise VerifyFailure('inode mismatch: {bi} != {ino}\n\tbacktrace:\n\t\t{bt}\n\tfailed verify against:\n\t\t{i}, {v}'.format( + bi=bt['ancestors'][ind]['dname'], ino=ino, bt=bt, i=ino, v=values)) + for (n, i) in values: + if bt['ancestors'][ind]['dirino'] != i: + raise VerifyFailure('ancestor dirino mismatch: {b} != {ind}\n\tbacktrace:\n\t\t{bt}\n\tfailed verify against:\n\t\t{i}, {v}'.format( + b=bt['ancestors'][ind]['dirino'], ind=i, bt=bt, i=ino, v=values)) + if bt['ancestors'][ind]['dname'] != n: + raise VerifyFailure('ancestor dname mismatch: {b} != {n}\n\tbacktrace:\n\t\t{bt}\n\tfailed verify against:\n\t\t{i}, {v}'.format( + b=bt['ancestors'][ind]['dname'], n=n, bt=bt, i=ino, v=values)) + ind += 1 + + if bt['pool'] != pool: + raise VerifyFailure('pool mismatch: {btp} != {p}\n\tbacktrace:\n\t\t{bt}\n\tfailed verify against:\n\t\t{i}, {v}'.format( + btp=bt['pool'], p=pool, bt=bt, i=ino, v=values)) + +def make_abc(ceph, rooti, i): + expected_bt = [] + c, d = get_name("/", i, 0) + expected_bt = [(c, rooti)] + expected_bt + di = mkdir(ceph, d) + c, d = get_name(d, i, 1) + expected_bt = [(c, di)] + expected_bt + di = mkdir(ceph, d) + c, f = get_name(d, i, 2) + fi = create(ceph, f) + expected_bt = [(c, di)] + expected_bt + return fi, expected_bt + +test = -1 +if len(sys.argv) > 1: + test = int(sys.argv[1]) + +conf = '' +if len(sys.argv) > 2: + conf = sys.argv[2] + +radosobj = rados.Rados(conffile=conf) +radosobj.connect() +ioctx = radosobj.open_ioctx('data') + +ceph = cephfs.LibCephFS(conffile=conf) +ceph.mount() + +rooti = ceph.stat('/')['st_ino'] + +test = -1 +if len(sys.argv) > 1: + test = int(sys.argv[1]) + +conf = '/etc/ceph/ceph.conf' +if len(sys.argv) > 2: + conf = sys.argv[2] + +# create /a/b/c +# flush +# verify + +i = 0 +if test < 0 or test == i: + print('Running test %d: basic verify' % i, file=sys.stderr) + ino, expected_bt = make_abc(ceph, rooti, i) + ceph = flush(ceph, i) + verify(ioctx, ino, expected_bt, 0) + +i += 1 + +# kill-mds-at-openc-1 +# create /a/b/c +# restart-mds +# flush +# verify + +if test < 0 or test == i: + print('Running test %d: kill openc' % i, file=sys.stderr) + print("restart mds a") + sys.stdout.flush() + kill_mds(ceph, 'openc', 1) + ino, expected_bt = make_abc(ceph, rooti, i) + ceph = flush(ceph, i) + verify(ioctx, ino, expected_bt, 0) + +i += 1 + +# kill-mds-at-openc-1 +# create /a/b/c +# restart-mds with kill-mds-at-replay-1 +# restart-mds +# flush +# verify +if test < 0 or test == i: + print('Running test %d: kill openc/replay' % i, file=sys.stderr) + # these are reversed because we want to prepare the config + conf_set_kill_mds('journal_replay', 1) + kill_mds(ceph, 'openc', 1) + print("restart mds a") + sys.stdout.flush() + ino, expected_bt = make_abc(ceph, rooti, i) + ceph = flush(ceph, i) + verify(ioctx, ino, expected_bt, 0) + +i += 1 + +ioctx.close() +radosobj.shutdown() +ceph.shutdown() + +print("done") +sys.stdout.flush() diff --git a/qa/workunits/rgw/common.py b/qa/workunits/rgw/common.py new file mode 100755 index 000000000..2c9c5d035 --- /dev/null +++ b/qa/workunits/rgw/common.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 + +import errno +import subprocess +import logging as log +import boto3 +import botocore.exceptions +import random +import json +from time import sleep + +log.basicConfig(format = '%(message)s', level=log.DEBUG) +log.getLogger('botocore').setLevel(log.CRITICAL) +log.getLogger('boto3').setLevel(log.CRITICAL) +log.getLogger('urllib3').setLevel(log.CRITICAL) + +def exec_cmd(cmd, wait = True, **kwargs): + check_retcode = kwargs.pop('check_retcode', True) + kwargs['shell'] = True + kwargs['stdout'] = subprocess.PIPE + proc = subprocess.Popen(cmd, **kwargs) + log.info(proc.args) + if wait: + out, _ = proc.communicate() + if check_retcode: + assert(proc.returncode == 0) + return out + return (out, proc.returncode) + return '' + +def create_user(uid, display_name, access_key, secret_key): + _, ret = exec_cmd(f'radosgw-admin user create --uid {uid} --display-name "{display_name}" --access-key {access_key} --secret {secret_key}', check_retcode=False) + assert(ret == 0 or errno.EEXIST) + +def boto_connect(access_key, secret_key, config=None): + def try_connect(portnum, ssl, proto): + endpoint = proto + '://localhost:' + portnum + conn = boto3.resource('s3', + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + use_ssl=ssl, + endpoint_url=endpoint, + verify=False, + config=config, + ) + try: + list(conn.buckets.limit(1)) # just verify we can list buckets + except botocore.exceptions.ConnectionError as e: + print(e) + raise + print('connected to', endpoint) + return conn + try: + return try_connect('80', False, 'http') + except botocore.exceptions.ConnectionError: + try: # retry on non-privileged http port + return try_connect('8000', False, 'http') + except botocore.exceptions.ConnectionError: + # retry with ssl + return try_connect('443', True, 'https') + +def put_objects(bucket, key_list): + objs = [] + for key in key_list: + o = bucket.put_object(Key=key, Body=b"some_data") + objs.append((o.key, o.version_id)) + return objs + +def create_unlinked_objects(conn, bucket, key_list): + # creates an unlinked/unlistable object for each key in key_list + + object_versions = [] + try: + exec_cmd('ceph config set client rgw_debug_inject_set_olh_err 2') + exec_cmd('ceph config set client rgw_debug_inject_olh_cancel_modification_err true') + sleep(1) + for key in key_list: + tag = str(random.randint(0, 1_000_000)) + try: + bucket.put_object(Key=key, Body=b"some_data", Metadata = { + 'tag': tag, + }) + except Exception as e: + log.debug(e) + out = exec_cmd(f'radosgw-admin bi list --bucket {bucket.name} --object {key}') + instance_entries = filter( + lambda x: x['type'] == 'instance', + json.loads(out.replace(b'\x80', b'0x80'))) + found = False + for ie in instance_entries: + instance_id = ie['entry']['instance'] + ov = conn.ObjectVersion(bucket.name, key, instance_id).head() + if ov['Metadata'] and ov['Metadata']['tag'] == tag: + object_versions.append((key, instance_id)) + found = True + break + if not found: + raise Exception(f'failed to create unlinked object for key={key}') + finally: + exec_cmd('ceph config rm client rgw_debug_inject_set_olh_err') + exec_cmd('ceph config rm client rgw_debug_inject_olh_cancel_modification_err') + return object_versions + diff --git a/qa/workunits/rgw/keystone-fake-server.py b/qa/workunits/rgw/keystone-fake-server.py new file mode 100755 index 000000000..c05ad7bfd --- /dev/null +++ b/qa/workunits/rgw/keystone-fake-server.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2022 Binero +# +# Author: Tobias Urdin <tobias.urdin@binero.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. + +from datetime import datetime, timedelta +import logging +import json +from http.server import BaseHTTPRequestHandler, HTTPServer + + +DEFAULT_DOMAIN = { + 'id': 'default', + 'name': 'Default', +} + + +PROJECTS = { + 'admin': { + 'domain': DEFAULT_DOMAIN, + 'id': 'a6944d763bf64ee6a275f1263fae0352', + 'name': 'admin', + }, + 'deadbeef': { + 'domain': DEFAULT_DOMAIN, + 'id': 'b4221c214dd64ee6a464g2153fae3813', + 'name': 'deadbeef', + }, +} + + +USERS = { + 'admin': { + 'domain': DEFAULT_DOMAIN, + 'id': '51cc68287d524c759f47c811e6463340', + 'name': 'admin', + }, + 'deadbeef': { + 'domain': DEFAULT_DOMAIN, + 'id': '99gg485738df758349jf8d848g774392', + 'name': 'deadbeef', + }, +} + + +USERROLES = { + 'admin': [ + { + 'id': '51cc68287d524c759f47c811e6463340', + 'name': 'admin', + } + ], + 'deadbeef': [ + { + 'id': '98bd32184f854f393a72b932g5334124', + 'name': 'Member', + } + ], +} + + +TOKENS = { + 'admin-token-1': { + 'username': 'admin', + 'project': 'admin', + 'expired': False, + }, + 'user-token-1': { + 'username': 'deadbeef', + 'project': 'deadbeef', + 'expired': False, + }, + 'user-token-2': { + 'username': 'deadbeef', + 'project': 'deadbeef', + 'expired': True, + }, +} + + +def _generate_token_result(username, project, expired=False): + userdata = USERS[username] + projectdata = PROJECTS[project] + userroles = USERROLES[username] + + if expired: + then = datetime.now() - timedelta(hours=2) + issued_at = then.strftime('%Y-%m-%dT%H:%M:%SZ') + expires_at = (then + timedelta(hours=1)).strftime('%Y-%m-%dT%H:%M:%SZ') + else: + now = datetime.now() + issued_at = now.strftime('%Y-%m-%dT%H:%M:%SZ') + expires_at = (now + timedelta(seconds=10)).strftime('%Y-%m-%dT%H:%M:%SZ') + + result = { + 'token': { + 'audit_ids': ['3T2dc1CGQxyJsHdDu1xkcw'], + 'catalog': [], + 'expires_at': expires_at, + 'is_domain': False, + 'issued_at': issued_at, + 'methods': ['password'], + 'project': projectdata, + 'roles': userroles, + 'user': userdata, + } + } + + return result + + +COUNTERS = { + 'get_total': 0, + 'post_total': 0, +} + + +class HTTPRequestHandler(BaseHTTPRequestHandler): + def do_GET(self): + # This is not part of the Keystone API + if self.path == '/stats': + self._handle_stats() + return + + if str(self.path).startswith('/v3/auth/tokens'): + self._handle_get_auth() + else: + self.send_response(403) + self.end_headers() + + def do_POST(self): + if self.path == '/v3/auth/tokens': + self._handle_post_auth() + else: + self.send_response(400) + self.end_headers() + + def _get_data(self): + length = int(self.headers.get('content-length')) + data = self.rfile.read(length).decode('utf8') + return json.loads(data) + + def _set_data(self, data): + jdata = json.dumps(data) + self.wfile.write(jdata.encode('utf8')) + + def _handle_stats(self): + self.send_response(200) + self.end_headers() + self._set_data(COUNTERS) + + def _handle_get_auth(self): + logging.info('Increasing get_total counter from %d -> %d' % (COUNTERS['get_total'], COUNTERS['get_total']+1)) + COUNTERS['get_total'] += 1 + auth_token = self.headers.get('X-Subject-Token', None) + if auth_token and auth_token in TOKENS: + tokendata = TOKENS[auth_token] + if tokendata['expired'] and 'allow_expired=1' not in self.path: + self.send_response(404) + self.end_headers() + else: + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + result = _generate_token_result(tokendata['username'], tokendata['project'], tokendata['expired']) + self._set_data(result) + else: + self.send_response(404) + self.end_headers() + + def _handle_post_auth(self): + logging.info('Increasing post_total counter from %d -> %d' % (COUNTERS['post_total'], COUNTERS['post_total']+1)) + COUNTERS['post_total'] += 1 + data = self._get_data() + user = data['auth']['identity']['password']['user'] + if user['name'] == 'admin' and user['password'] == 'ADMIN': + self.send_response(201) + self.send_header('Content-Type', 'application/json') + self.send_header('X-Subject-Token', 'admin-token-1') + self.end_headers() + tokendata = TOKENS['admin-token-1'] + result = _generate_token_result(tokendata['username'], tokendata['project'], tokendata['expired']) + self._set_data(result) + else: + self.send_response(401) + self.end_headers() + + +def main(): + logging.basicConfig(level=logging.DEBUG) + logging.info('Starting keystone-fake-server') + server = HTTPServer(('localhost', 5000), HTTPRequestHandler) + server.serve_forever() + + +if __name__ == '__main__': + main() diff --git a/qa/workunits/rgw/keystone-service-token.sh b/qa/workunits/rgw/keystone-service-token.sh new file mode 100755 index 000000000..fc39731ca --- /dev/null +++ b/qa/workunits/rgw/keystone-service-token.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2022 Binero +# +# Author: Tobias Urdin <tobias.urdin@binero.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +trap cleanup EXIT + +function cleanup() { + kill $KEYSTONE_FAKE_SERVER_PID + wait +} + +function run() { + $CEPH_ROOT/qa/workunits/rgw//keystone-fake-server.py & + KEYSTONE_FAKE_SERVER_PID=$! + # Give fake Keystone server some seconds to startup + sleep 5 + $CEPH_ROOT/qa/workunits/rgw/test-keystone-service-token.py +} + +main keystone-service-token "$@" diff --git a/qa/workunits/rgw/olh_noname_key b/qa/workunits/rgw/olh_noname_key new file mode 100644 index 000000000..6138c57cd --- /dev/null +++ b/qa/workunits/rgw/olh_noname_key @@ -0,0 +1 @@ +€1001_04/57/0457f727ec113e418d5b16d206b200ed068c0533554883ce811df7c932a3df68/2018_12_11/2889999/3386469/metadata.gz
\ No newline at end of file diff --git a/qa/workunits/rgw/olh_noname_val b/qa/workunits/rgw/olh_noname_val Binary files differnew file mode 100644 index 000000000..ff442e137 --- /dev/null +++ b/qa/workunits/rgw/olh_noname_val diff --git a/qa/workunits/rgw/run-bucket-check.sh b/qa/workunits/rgw/run-bucket-check.sh new file mode 100755 index 000000000..85e02db5e --- /dev/null +++ b/qa/workunits/rgw/run-bucket-check.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -ex + +# assume working ceph environment (radosgw-admin in path) and rgw on localhost:80 +# localhost::443 for ssl + +mydir=`dirname $0` + +python3 -m venv $mydir +source $mydir/bin/activate +pip install pip --upgrade +pip install boto3 + +## run test +$mydir/bin/python3 $mydir/test_rgw_bucket_check.py + +deactivate +echo OK. + diff --git a/qa/workunits/rgw/run-datacache.sh b/qa/workunits/rgw/run-datacache.sh new file mode 100755 index 000000000..5c00da1da --- /dev/null +++ b/qa/workunits/rgw/run-datacache.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -ex + +#assume working ceph environment (radosgw-admin in path) and rgw on localhost:80 +# localhost::443 for ssl + +mydir=`dirname $0` + +python3 -m venv $mydir +source $mydir/bin/activate +pip install pip --upgrade +pip install configobj + +## run test +$mydir/bin/python3 $mydir/test_rgw_datacache.py + +deactivate +echo OK. + diff --git a/qa/workunits/rgw/run-reshard.sh b/qa/workunits/rgw/run-reshard.sh new file mode 100755 index 000000000..bdab0aabb --- /dev/null +++ b/qa/workunits/rgw/run-reshard.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -ex + +# this test uses fault injection to abort during 'radosgw-admin bucket reshard' +# disable coredumps so teuthology won't mark a failure +ulimit -c 0 + +#assume working ceph environment (radosgw-admin in path) and rgw on localhost:80 +# localhost::443 for ssl + +mydir=`dirname $0` + +python3 -m venv $mydir +source $mydir/bin/activate +pip install pip --upgrade +pip install boto3 + +## run test +$mydir/bin/python3 $mydir/test_rgw_reshard.py + +deactivate +echo OK. + diff --git a/qa/workunits/rgw/run-s3tests.sh b/qa/workunits/rgw/run-s3tests.sh new file mode 100755 index 000000000..727bef9eb --- /dev/null +++ b/qa/workunits/rgw/run-s3tests.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +set -ex + +# run s3-tests from current directory. assume working +# ceph environment (radosgw-admin in path) and rgw on localhost:8000 +# (the vstart default). + +branch=$1 +[ -z "$1" ] && branch=master +port=$2 +[ -z "$2" ] && port=8000 # this is vstart's default + +## + +[ -z "$BUILD_DIR" ] && BUILD_DIR=build + +if [ -e CMakeCache.txt ]; then + BIN_PATH=$PWD/bin +elif [ -e $root_path/../${BUILD_DIR}/CMakeCache.txt ]; then + cd $root_path/../${BUILD_DIR} + BIN_PATH=$PWD/bin +fi +PATH=$PATH:$BIN_PATH + +dir=tmp.s3-tests.$$ + +# clone and bootstrap +mkdir $dir +cd $dir +git clone https://github.com/ceph/s3-tests +cd s3-tests +git checkout ceph-$branch +S3TEST_CONF=s3tests.conf.SAMPLE tox -- -m "not fails_on_rgw and not sse_s3 and not lifecycle_expiration and not test_of_sts and not webidentity_test" -v + +cd ../.. +rm -rf $dir + +echo OK. + diff --git a/qa/workunits/rgw/run-versioning.sh b/qa/workunits/rgw/run-versioning.sh new file mode 100755 index 000000000..df60b7b03 --- /dev/null +++ b/qa/workunits/rgw/run-versioning.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -ex + +# assume working ceph environment (radosgw-admin in path) and rgw on localhost:80 +# localhost::443 for ssl + +mydir=`dirname $0` + +python3 -m venv $mydir +source $mydir/bin/activate +pip install pip --upgrade +pip install boto3 + +## run test +$mydir/bin/python3 $mydir/test_rgw_versioning.py + +deactivate +echo OK. + diff --git a/qa/workunits/rgw/s3_bucket_quota.pl b/qa/workunits/rgw/s3_bucket_quota.pl new file mode 100755 index 000000000..7f5476ef6 --- /dev/null +++ b/qa/workunits/rgw/s3_bucket_quota.pl @@ -0,0 +1,393 @@ +#! /usr/bin/perl + +=head1 NAME + +s3_bucket_quota.pl - Script to test the rgw bucket quota functionality using s3 interface. + +=head1 SYNOPSIS + +Use: + perl s3_bucket_quota.pl [--help] + +Examples: + perl s3_bucket_quota.pl + or + perl s3_bucket_quota.pl --help + +=head1 DESCRIPTION + +This script intends to test the rgw bucket quota funcionality using s3 interface +and reports the test results + +=head1 ARGUMENTS + +s3_bucket_quota.pl takes the following arguments: + --help + (optional) Displays the usage message. + +=cut + +use Amazon::S3; +use Data::Dumper; +#use strict; +use IO::File; +use Getopt::Long; +use Digest::MD5; +use Pod::Usage(); +use FindBin; +use lib $FindBin::Bin; +use s3_utilities; +use Net::Domain qw(hostfqdn); + +my $help; + +Getopt::Long::GetOptions( + 'help' => \$help +); +Pod::Usage::pod2usage(-verbose => 1) && exit if ($help); + +#== local variables === +our $mytestfilename; +my $mytestfilename1; +my $logmsg; +my $kruft; +my $s3; +my $hostdom = $ENV{RGW_FQDN}||hostfqdn(); +my $port = $ENV{RGW_PORT}||80; +our $hostname = "$hostdom:$port"; +our $testfileloc; +my $rgw_user = "qa_user"; + +# Function that deletes the user $rgw_user and write to logfile. +sub delete_user +{ + my $cmd = "$radosgw_admin user rm --uid=$rgw_user"; + my $cmd_op = get_command_output($cmd); + if ($cmd_op !~ /aborting/){ + print "user $rgw_user deleted\n"; + } else { + print "user $rgw_user NOT deleted\n"; + return 1; + } + return 0; +} + +sub quota_set_max_size { + my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-size=1048576000`; + if ($set_quota !~ /./){ + print "quota set for the bucket: $bucketname \n"; + } else { + print "quota set failed for the bucket: $bucketname \n"; + exit 1; + } + return 0; +} + +sub quota_set_max_size_zero { + run_s3($rgw_user); + my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-size=0`; + if ($set_quota !~ /./){ + pass ("quota set for the bucket: $bucketname with max size as zero\n"); + } else { + fail ("quota set with max size 0 failed for the bucket: $bucketname \n"); + } + delete_bucket(); +} + +sub quota_set_max_objs_zero { + run_s3($rgw_user); + my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-objects=0`; + if ($set_quota !~ /./){ + pass ("quota set for the bucket: $bucketname with max objects as zero\n"); + } else { + fail ("quota set with max objects 0 failed for the bucket: $bucketname \n"); + } + delete_bucket(); +} + +sub quota_set_neg_size { + run_s3($rgw_user); + my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-size=-1`; + if ($set_quota !~ /./){ + pass ("quota set for the bucket: $bucketname with max size -1\n"); + } else { + fail ("quota set failed for the bucket: $bucketname with max size -1 \n"); + } + delete_bucket(); +} + +sub quota_set_neg_objs { + run_s3($rgw_user); + my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-objects=-1`; + if ($set_quota !~ /./){ + pass ("quota set for the bucket: $bucketname max objects -1 \n"); + } else { + fail ("quota set failed for the bucket: $bucketname \n with max objects -1"); + } + delete_bucket(); +} + +sub quota_set_user_objs { + my $set_quota = `$radosgw_admin quota set --uid=$rgw_user --quota-scope=bucket`; + my $set_quota1 = `$radosgw_admin quota set --bucket=$bucketname --max-objects=1`; + if ($set_quota1 !~ /./){ + print "bucket quota max_objs set for the given user: $bucketname \n"; + } else { + print "bucket quota max_objs set failed for the given user: $bucketname \n"; + exit 1; + } + return 0; +} + +sub quota_set_user_size { + my $set_quota = `$radosgw_admin quota set --uid=$rgw_user --quota-scope=bucket`; + my $set_quota1 = `$radosgw_admin quota set --bucket=$bucketname --max-size=1048576000`; + if ($set_quota1 !~ /./){ + print "bucket quota max size set for the given user: $bucketname \n"; + } else { + print "bucket quota max size set failed for the user: $bucketname \n"; + exit 1; + } + return 0; +} + +sub quota_set_max_obj { + # set max objects + my $set_quota = `$radosgw_admin quota set --bucket=$bucketname --max-objects=1`; + if ($set_quota !~ /./){ + print "quota set for the bucket: $bucketname \n"; + } else { + print "quota set failed for the bucket: $bucketname \n"; + exit 1; + } + return 0; +} + +sub quota_enable { + my $en_quota = `$radosgw_admin quota enable --bucket=$bucketname`; + if ($en_quota !~ /./){ + print "quota enabled for the bucket: $bucketname \n"; + } else { + print "quota enable failed for the bucket: $bucketname \n"; + exit 1; + } + return 0; +} + +sub quota_disable { + my $dis_quota = `$radosgw_admin quota disable --bucket=$bucketname`; + if ($dis_quota !~ /./){ + print "quota disabled for the bucket: $bucketname \n"; + } else { + print "quota disable failed for the bucket: $bucketname \n"; + exit 1; + } + return 0; +} + +# upload a file to the bucket +sub upload_file { + print "adding file to bucket: $mytestfilename\n"; + ($bucket->add_key_filename( $mytestfilename, $testfileloc, + { content_type => 'text/plain', }, + ) and (print "upload file successful\n" ) and return 0 ) or (return 1); +} + +# delete the bucket +sub delete_bucket { + #($bucket->delete_key($mytestfilename1) and print "delete keys on bucket succeeded second time\n" ) or die $s3->err . "delete keys on bucket failed second time\n" . $s3->errstr; + ($bucket->delete_bucket) and (print "bucket delete succeeded \n") or die $s3->err . "delete bucket failed\n" . $s3->errstr; +} + +# set bucket quota with max_objects and verify +sub test_max_objects { + my $size = '10Mb'; + create_file($size); + run_s3($rgw_user); + quota_set_max_obj(); + quota_enable(); + my $ret_value = upload_file(); + if ($ret_value == 0){ + pass ( "Test max objects passed" ); + } else { + fail ( "Test max objects failed" ); + } + delete_user(); + delete_keys($mytestfilename); + delete_bucket(); +} + +# Set bucket quota for specific user and ensure max objects set for the user is validated +sub test_max_objects_per_user{ + my $size = '10Mb'; + create_file($size); + run_s3($rgw_user); + quota_set_user_objs(); + quota_enable(); + my $ret_value = upload_file(); + if ($ret_value == 0){ + pass ( "Test max objects for the given user passed" ); + } else { + fail ( "Test max objects for the given user failed" ); + } + delete_user(); + delete_keys($mytestfilename); + delete_bucket(); +} + +# set bucket quota with max_objects and try to exceed the max_objects and verify +sub test_beyond_max_objs { + my $size = "10Mb"; + create_file($size); + run_s3($rgw_user); + quota_set_max_obj(); + quota_enable(); + upload_file(); + my $ret_value = readd_file(); + if ($ret_value == 1){ + pass ( "set max objects and test beyond max objects passed" ); + } else { + fail ( "set max objects and test beyond max objects failed" ); + } + delete_user(); + delete_keys($mytestfilename); + delete_bucket(); +} + +# set bucket quota for a user with max_objects and try to exceed the max_objects and verify +sub test_beyond_max_objs_user { + my $size = "10Mb"; + create_file($size); + run_s3($rgw_user); + quota_set_user_objs(); + quota_enable(); + upload_file(); + my $ret_value = readd_file(); + if ($ret_value == 1){ + pass ( "set max objects for a given user and test beyond max objects passed" ); + } else { + fail ( "set max objects for a given user and test beyond max objects failed" ); + } + delete_user(); + delete_keys($mytestfilename); + delete_bucket(); +} + +# set bucket quota for max size and ensure it is validated +sub test_quota_size { + my $ret_value; + my $size = "2Gb"; + create_file($size); + run_s3($rgw_user); + quota_set_max_size(); + quota_enable(); + my $ret_value = upload_file(); + if ($ret_value == 1) { + pass ( "set max size and ensure that objects upload beyond max size is not entertained" ); + my $retdel = delete_keys($mytestfilename); + if ($retdel == 0) { + print "delete objects successful \n"; + my $size1 = "1Gb"; + create_file($size1); + my $ret_val1 = upload_file(); + if ($ret_val1 == 0) { + pass ( "set max size and ensure that the max size is in effect" ); + } else { + fail ( "set max size and ensure the max size takes effect" ); + } + } + } else { + fail ( "set max size and ensure that objects beyond max size is not allowed" ); + } + delete_user(); + delete_keys($mytestfilename); + delete_bucket(); +} + +# set bucket quota for max size for a given user and ensure it is validated +sub test_quota_size_user { + my $ret_value; + my $size = "2Gb"; + create_file($size); + run_s3($rgw_user); + quota_set_user_size(); + quota_enable(); + my $ret_value = upload_file(); + if ($ret_value == 1) { + pass ( "set max size for a given user and ensure that objects upload beyond max size is not entertained" ); + my $retdel = delete_keys($mytestfilename); + if ($retdel == 0) { + print "delete objects successful \n"; + my $size1 = "1Gb"; + create_file($size1); + my $ret_val1 = upload_file(); + if ($ret_val1 == 0) { + pass ( "set max size for a given user and ensure that the max size is in effect" ); + } else { + fail ( "set max size for a given user and ensure the max size takes effect" ); + } + } + } else { + fail ( "set max size for a given user and ensure that objects beyond max size is not allowed" ); + } + delete_user(); + delete_keys($mytestfilename); + delete_bucket(); +} + +# set bucket quota size but disable quota and verify +sub test_quota_size_disabled { + my $ret_value; + my $size = "2Gb"; + create_file($size); + run_s3($rgw_user); + quota_set_max_size(); + quota_disable(); + my $ret_value = upload_file(); + if ($ret_value == 0) { + pass ( "bucket quota size doesnt take effect when quota is disabled" ); + } else { + fail ( "bucket quota size doesnt take effect when quota is disabled" ); + } + delete_user(); + delete_keys($mytestfilename); + delete_bucket(); +} + +# set bucket quota size for a given user but disable quota and verify +sub test_quota_size_disabled_user { + my $ret_value; + my $size = "2Gb"; + create_file($size); + run_s3($rgw_user); + quota_set_user_size(); + quota_disable(); + my $ret_value = upload_file(); + if ($ret_value == 0) { + pass ( "bucket quota size for a given user doesnt take effect when quota is disabled" ); + } else { + fail ( "bucket quota size for a given user doesnt take effect when quota is disabled" ); + } + delete_user(); + delete_keys($mytestfilename); + delete_bucket(); +} + +# set bucket quota for specified user and verify + +#== Main starts here=== +ceph_os_info(); +test_max_objects(); +test_max_objects_per_user(); +test_beyond_max_objs(); +test_beyond_max_objs_user(); +quota_set_max_size_zero(); +quota_set_max_objs_zero(); +quota_set_neg_objs(); +quota_set_neg_size(); +test_quota_size(); +test_quota_size_user(); +test_quota_size_disabled(); +test_quota_size_disabled_user(); + +print "OK"; diff --git a/qa/workunits/rgw/s3_multipart_upload.pl b/qa/workunits/rgw/s3_multipart_upload.pl new file mode 100755 index 000000000..ab29e6b03 --- /dev/null +++ b/qa/workunits/rgw/s3_multipart_upload.pl @@ -0,0 +1,151 @@ +#! /usr/bin/perl + +=head1 NAME + +s3_multipart_upload.pl - Script to test rgw multipart upload using s3 interface. + +=head1 SYNOPSIS + +Use: + perl s3_multipart_upload.pl [--help] + +Examples: + perl s3_multipart_upload.pl + or + perl s3_multipart_upload.pl --help + +=head1 DESCRIPTION + +This script intends to test the rgw multipart upload followed by a download +and verify checksum using s3 interface and reports test results + +=head1 ARGUMENTS + +s3_multipart_upload.pl takes the following arguments: + --help + (optional) Displays the usage message. + +=cut + +use Amazon::S3; +use Data::Dumper; +use IO::File; +use Getopt::Long; +use Digest::MD5; +use Pod::Usage(); +use FindBin; +use lib $FindBin::Bin; +use s3_utilities; +use Net::Domain qw(hostfqdn); + +my $help; + +Getopt::Long::GetOptions( + 'help' => \$help +); +Pod::Usage::pod2usage(-verbose => 1) && exit if ($help); + +#== local variables === +my $s3; +my $hostdom = $ENV{RGW_FQDN}||hostfqdn(); +my $port = $ENV{RGW_PORT}||80; +our $hostname = "$hostdom:$port"; +our $testfileloc; +our $mytestfilename; + +# upload a file to the bucket +sub upload_file { + my ($fsize, $i) = @_; + create_file($fsize, $i); + print "adding file to bucket $bucketname: $mytestfilename\n"; + ($bucket->add_key_filename( $mytestfilename, $testfileloc, + { content_type => 'text/plain', }, + ) and (print "upload file successful\n" ) and return 0 ) or (print "upload failed\n" and return 1); +} + +# delete the bucket +sub delete_bucket { + ($bucket->delete_bucket) and (print "bucket delete succeeded \n") or die $s3->err . "delete bucket failed\n" . $s3->errstr; +} + +# Function to perform multipart upload of given file size to the user bucket via s3 interface +sub multipart_upload +{ + my ($size, $parts) = @_; + # generate random user every time + my $user = rand(); + # Divide the file size in to equal parts and upload to bucket in multiple parts + my $fsize = ($size/$parts); + my $fsize1; + run_s3($user); + if ($parts == 10){ + $fsize1 = '100Mb'; + } elsif ($parts == 100){ + $fsize1 = '10Mb'; + } + foreach my $i(1..$parts){ + print "uploading file - part $i \n"; + upload_file($fsize1, $i); + } + fetch_file_from_bucket($fsize1, $parts); + compare_cksum($fsize1, $parts); + purge_data($user); +} + +# Function to download the files from bucket to verify there is no data corruption +sub fetch_file_from_bucket +{ + # fetch file from the bucket + my ($fsize, $parts) = @_; + foreach my $i(1..$parts){ + my $src_file = "$fsize.$i"; + my $dest_file = "/tmp/downloadfile.$i"; + print + "Downloading $src_file from bucket to $dest_file \n"; + $response = + $bucket->get_key_filename( $src_file, GET, + $dest_file ) + or die $s3->err . ": " . $s3->errstr; + } +} + +# Compare the source file with destination file and verify checksum to ensure +# the files are not corrupted +sub compare_cksum +{ + my ($fsize, $parts)=@_; + my $md5 = Digest::MD5->new; + my $flag = 0; + foreach my $i (1..$parts){ + my $src_file = "/tmp/"."$fsize".".$i"; + my $dest_file = "/tmp/downloadfile".".$i"; + open( FILE, $src_file ) + or die "Error: Could not open $src_file for MD5 checksum..."; + open( DLFILE, $dest_file ) + or die "Error: Could not open $dest_file for MD5 checksum."; + binmode(FILE); + binmode(DLFILE); + my $md5sum = $md5->addfile(*FILE)->hexdigest; + my $md5sumdl = $md5->addfile(*DLFILE)->hexdigest; + close FILE; + close DLFILE; + # compare the checksums + if ( $md5sum eq $md5sumdl ) { + $flag++; + } + } + if ($flag == $parts){ + pass("checksum verification for multipart upload passed" ); + }else{ + fail("checksum verification for multipart upload failed" ); + } +} + +#== Main starts here=== +ceph_os_info(); +check(); +# The following test runs multi part upload of file size 1Gb in 10 parts +multipart_upload('1048576000', 10); +# The following test runs multipart upload of 1 Gb file in 100 parts +multipart_upload('1048576000', 100); +print "OK"; diff --git a/qa/workunits/rgw/s3_user_quota.pl b/qa/workunits/rgw/s3_user_quota.pl new file mode 100755 index 000000000..6d5c02a9a --- /dev/null +++ b/qa/workunits/rgw/s3_user_quota.pl @@ -0,0 +1,191 @@ +#! /usr/bin/perl + +=head1 NAME + +s3_user_quota.pl - Script to test the rgw user quota functionality using s3 interface. + +=head1 SYNOPSIS + +Use: + perl s3_user_quota.pl [--help] + +Examples: + perl s3_user_quota.pl + or + perl s3_user_quota.pl --help + +=head1 DESCRIPTION + +This script intends to test the rgw user quota funcionality using s3 interface +and reports the test results + +=head1 ARGUMENTS + +s3_user_quota.pl takes the following arguments: + --help + (optional) Displays the usage message. + +=cut + +use Amazon::S3; +use Data::Dumper; +use IO::File; +use Getopt::Long; +use Digest::MD5; +use Pod::Usage(); +use FindBin; +use lib $FindBin::Bin; +use s3_utilities; +use Net::Domain qw(hostfqdn); + +my $help; + +Getopt::Long::GetOptions( + 'help' => \$help +); +Pod::Usage::pod2usage(-verbose => 1) && exit if ($help); + +#== local variables === +our $mytestfilename; +my $mytestfilename1; +my $logmsg; +my $kruft; +my $s3; +my $hostdom = $ENV{RGW_FQDN}||hostfqdn(); +my $port = $ENV{RGW_PORT}||80; +our $hostname = "$hostdom:$port"; +our $testfileloc; +our $cnt; + +sub quota_set_max_size_per_user { + my ($maxsize, $size1,$rgw_user) = @_; + run_s3($rgw_user); + my $set_quota = `$radosgw_admin quota set --uid=$rgw_user --quota-scope=user --max-size=$maxsize`; + if (($set_quota !~ /./)&&($maxsize == 0)){ + my $ret = test_max_objs($size1, $rgw_user); + if ($ret == 1){ + pass("quota set for user: $rgw_user with max_size=$maxsize passed" ); + }else { + fail("quota set for user: $rgw_user with max_size=$maxsize failed" ); + } + } elsif (($set_quota !~ /./) && ($maxsize != 0)) { + my $ret = test_max_objs($size1, $rgw_user); + if ($ret == 0){ + pass("quota set for user: $rgw_user with max_size=$maxsize passed" ); + }else { + fail("quota set for user: $rgw_user with max_size=$maxsize failed" ); + } + } + delete_keys($mytestfilename); + purge_data($rgw_user); + return 0; +} + +sub max_size_per_user { + my ($maxsize, $size1,$rgw_user) = @_; + run_s3($rgw_user); + my $set_quota = `$radosgw_admin quota set --uid=$rgw_user --quota-scope=user --max-size=$maxsize`; + if (($set_quota !~ /./) && ($maxsize != 0)) { + my $ret = test_max_objs($size1, $rgw_user); + if ($ret == 0){ + $cnt++; + } + } + return $cnt; +} + +sub quota_set_max_obj_per_user { + # set max objects + my ($maxobjs, $size1, $rgw_user) = @_; + run_s3($rgw_user); + my $set_quota = `$radosgw_admin quota set --uid=$rgw_user --quota-scope=user --max-objects=$maxobjs`; + if (($set_quota !~ /./) && ($maxobjs == 0)){ + my $ret = test_max_objs($size1, $rgw_user); + if ($ret == 1){ + pass("quota set for user: $rgw_user with max_objects=$maxobjs passed" ); + }else { + fail("quota set for user: $rgw_user with max_objects=$maxobjs failed" ); + } + } elsif (($set_quota !~ /./) && ($maxobjs == 1)) { + my $ret = test_max_objs($size1, $rgw_user); + if ($ret == 0){ + pass("quota set for user: $rgw_user with max_objects=$maxobjs passed" ); + }else { + fail("quota set for user: $rgw_user with max_objects=$maxobjs failed" ); + } + } + delete_keys($mytestfilename); + purge_data($rgw_user); +} + +sub quota_enable_user { + my ($rgw_user) = @_; + my $en_quota = `$radosgw_admin quota enable --uid=$rgw_user --quota-scope=user`; + if ($en_quota !~ /./){ + print "quota enabled for the user $rgw_user \n"; + } else { + print "quota enable failed for the user $rgw_user \n"; + exit 1; + } + return 0; +} + +sub quota_disable_user { + my $dis_quota = `$radosgw_admin quota disable --uid=$rgw_user --quota-scope=user`; + if ($dis_quota !~ /./){ + print "quota disabled for the user $rgw_user \n"; + } else { + print "quota disable failed for the user $rgw_user \n"; + exit 1; + } + return 0; +} + +# upload a file to the bucket +sub upload_file { + print "adding file to bucket $bucketname: $mytestfilename\n"; + ($bucket->add_key_filename( $mytestfilename, $testfileloc, + { content_type => 'text/plain', }, + ) and (print "upload file successful\n" ) and return 0 ) or (return 1); +} + +# delete the bucket +sub delete_bucket { + ($bucket->delete_bucket) and (print "bucket delete succeeded \n") or die $s3->err . "delete bucket failed\n" . $s3->errstr; +} + +#Function to upload the given file size to bucket and verify +sub test_max_objs { + my ($size, $rgw_user) = @_; + create_file($size); + quota_enable_user($rgw_user); + my $ret_value = upload_file(); + return $ret_value; +} + +# set user quota and ensure it is validated +sub test_user_quota_max_size{ + my ($max_buckets,$size, $fsize) = @_; + my $usr = rand(); + foreach my $i (1..$max_buckets){ + my $ret_value = max_size_per_user($size, $fsize, $usr ); + } + if ($ret_value == $max_buckets){ + fail( "user quota max size for $usr failed on $max_buckets buckets" ); + } else { + pass( "user quota max size for $usr passed on $max_buckets buckets" ); + } + delete_keys($mytestfilename); + purge_data($usr); +} + +#== Main starts here=== +ceph_os_info(); +check(); +quota_set_max_obj_per_user('0', '10Mb', 'usr1'); +quota_set_max_obj_per_user('1', '10Mb', 'usr2'); +quota_set_max_size_per_user(0, '10Mb', 'usr1'); +quota_set_max_size_per_user(1048576000, '1Gb', 'usr2'); +test_user_quota_max_size(3,1048576000,'100Mb'); +test_user_quota_max_size(2,1048576000, '1Gb'); +print "OK"; diff --git a/qa/workunits/rgw/s3_utilities.pm b/qa/workunits/rgw/s3_utilities.pm new file mode 100644 index 000000000..3c3fae900 --- /dev/null +++ b/qa/workunits/rgw/s3_utilities.pm @@ -0,0 +1,233 @@ +# Common subroutines shared by the s3 testing code +my $sec; +my $min; +my $hour; +my $mon; +my $year; +my $mday; +my $wday; +my $yday; +my $isdst; +my $PASS_CNT = 0; +my $FAIL_CNT = 0; + +our $radosgw_admin = $ENV{RGW_ADMIN}||"sudo radosgw-admin"; + +# function to get the current time stamp from the test set up +sub get_timestamp { + ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); + if ($mon < 10) { $mon = "0$mon"; } + if ($hour < 10) { $hour = "0$hour"; } + if ($min < 10) { $min = "0$min"; } + if ($sec < 10) { $sec = "0$sec"; } + $year=$year+1900; + return $year . '_' . $mon . '_' . $mday . '_' . $hour . '_' . $min . '_' . $sec; +} + +# Function to check if radosgw is already running +sub get_status { + my $service = "radosgw"; + my $cmd = "pgrep $service"; + my $status = get_cmd_op($cmd); + if ($status =~ /\d+/ ){ + return 0; + } + return 1; +} + +# function to execute the command and return output +sub get_cmd_op +{ + my $cmd = shift; + my $excmd = `$cmd`; + return $excmd; +} + +#Function that executes the CLI commands and returns the output of the command +sub get_command_output { + my $cmd_output = shift; + open( FH, ">>$test_log" ); + print FH "\"$cmd_output\"\n"; + my $exec_cmd = `$cmd_output 2>&1`; + print FH "$exec_cmd\n"; + close(FH); + return $exec_cmd; +} + +# Function to get the hostname +sub get_hostname +{ + my $cmd = "hostname"; + my $get_host = get_command_output($cmd); + chomp($get_host); + return($get_host); +} + +sub pass { + my ($comment) = @_; + print "Comment required." unless length $comment; + chomp $comment; + print_border2(); + print "Test case: $TC_CNT PASSED - $comment \n"; + print_border2(); + $PASS_CNT++; +} + +sub fail { + my ($comment) = @_; + print "Comment required." unless length $comment; + chomp $comment; + print_border2(); + print "Test case: $TC_CNT FAILED - $comment \n"; + print_border2(); + $FAIL_CNT++; +} + +sub print_border2 { + print "~" x 90 . "\n"; +} + +# Function to create the user "qa_user" and extract the user access_key and secret_key of the user +sub get_user_info +{ + my ($rgw_user) = @_; + my $cmd = "$radosgw_admin user create --uid=$rgw_user --display-name=$rgw_user"; + my $cmd_op = get_command_output($cmd); + if ($cmd_op !~ /keys/){ + return (0,0); + } + my @get_user = (split/\n/,$cmd_op); + foreach (@get_user) { + if ($_ =~ /access_key/ ){ + $get_acc_key = $_; + } elsif ($_ =~ /secret_key/ ){ + $get_sec_key = $_; + } + } + my $access_key = $get_acc_key; + my $acc_key = (split /:/, $access_key)[1]; + $acc_key =~ s/\\//g; + $acc_key =~ s/ //g; + $acc_key =~ s/"//g; + $acc_key =~ s/,//g; + my $secret_key = $get_sec_key; + my $sec_key = (split /:/, $secret_key)[1]; + $sec_key =~ s/\\//g; + $sec_key =~ s/ //g; + $sec_key =~ s/"//g; + $sec_key =~ s/,//g; + return ($acc_key, $sec_key); +} + +# Function that deletes the given user and all associated user data +sub purge_data +{ + my ($rgw_user) = @_; + my $cmd = "$radosgw_admin user rm --uid=$rgw_user --purge-data"; + my $cmd_op = get_command_output($cmd); + if ($cmd_op !~ /./){ + print "user $rgw_user deleted\n"; + } else { + print "user $rgw_user NOT deleted\n"; + return 1; + } + return 0; +} + +# Read PRETTY_NAME from /etc/os-release +sub os_pretty_name +{ + open(FH, '<', '/etc/os-release') or die $!; + while (my $line = <FH>) { + chomp $line; + if ($line =~ /^\s*PRETTY_NAME=\"?([^"]*)\"?/) { + return $1; + } + } + close(FH); +} + + +# Function to get the Ceph and distro info +sub ceph_os_info +{ + my $ceph_v = get_command_output ( "ceph -v" ); + my @ceph_arr = split(" ",$ceph_v); + $ceph_v = "Ceph Version: $ceph_arr[2]"; + my $os_distro = os_pretty_name(); + $os_distro = "Linux Flavor:$os_distro"; + return ($ceph_v, $os_distro); +} + +# Execute the test case based on the input to the script +sub create_file { + my ($file_size, $part) = @_; + my $cnt; + $mytestfilename = "$file_size.$part"; + $testfileloc = "/tmp/".$mytestfilename; + if ($file_size == '10Mb'){ + $cnt = 1; + } elsif ($file_size == '100Mb'){ + $cnt = 10; + } elsif ($file_size == '500Mb'){ + $cnt = 50; + } elsif ($file_size == '1Gb'){ + $cnt = 100; + } elsif ($file_size == '2Gb'){ + $cnt = 200; + } + my $ret = system("dd if=/dev/zero of=$testfileloc bs=10485760 count=$cnt"); + if ($ret) { exit 1 }; + return 0; +} + +sub run_s3 +{ +# Run tests for the S3 functionality + # Modify access key and secret key to suit the user account + my ($user) = @_; + our ( $access_key, $secret_key ) = get_user_info($user); + if ( ($access_key) && ($secret_key) ) { + $s3 = Amazon::S3->new( + { + aws_access_key_id => $access_key, + aws_secret_access_key => $secret_key, + host => $hostname, + secure => 0, + retry => 1, + } + ); + } + +our $bucketname = 'buck_'.get_timestamp(); +# create a new bucket (the test bucket) +our $bucket = $s3->add_bucket( { bucket => $bucketname } ) + or die $s3->err. "bucket $bucketname create failed\n". $s3->errstr; + print "Bucket Created: $bucketname \n"; + return 0; +} + +# delete keys +sub delete_keys { + (($bucket->delete_key($_[0])) and return 0) or return 1; +} + +# Read the file back to bucket +sub readd_file { + system("dd if=/dev/zero of=/tmp/10MBfile1 bs=10485760 count=1"); + $mytestfilename1 = '10MBfile1'; + print "readding file to bucket: $mytestfilename1\n"; + ((($bucket->add_key_filename( $mytestfilename1, $testfileloc, + { content_type => 'text/plain', }, + )) and (print "readding file success\n") and return 0) or (return 1)); +} + +# check if rgw service is already running +sub check +{ + my $state = get_status(); + if ($state) { + exit 1; + } +} +1 diff --git a/qa/workunits/rgw/test-keystone-service-token.py b/qa/workunits/rgw/test-keystone-service-token.py new file mode 100755 index 000000000..2c7f21e93 --- /dev/null +++ b/qa/workunits/rgw/test-keystone-service-token.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2022 Binero +# +# Author: Tobias Urdin <tobias.urdin@binero.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. + +import sys +import requests +import time + + +# b4221c214dd64ee6a464g2153fae3813 is ID of deadbeef project +SWIFT_URL = 'http://localhost:8000/swift/v1/AUTH_b4221c214dd64ee6a464g2153fae3813' +KEYSTONE_URL = 'http://localhost:5000' + + +def get_stats(): + stats_url = '%s/stats' % KEYSTONE_URL + return requests.get(stats_url) + + +def test_list_containers(): + # Loop five list container requests with same token + for i in range(0, 5): + r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-1'}) + if r.status_code != 204: + print('FAILED, status code is %d not 204' % r.status_code) + sys.exit(1) + + # Get stats from fake Keystone server + r = get_stats() + if r.status_code != 200: + print('FAILED, status code is %d not 200' % r.status_code) + sys.exit(1) + stats = r.json() + + # Verify admin token was cached + if stats['post_total'] != 1: + print('FAILED, post_total stat is %d not 1' % stats['post_total']) + sys.exit(1) + + # Verify user token was cached + if stats['get_total'] != 1: + print('FAILED, get_total stat is %d not 1' % stats['get_total']) + sys.exit(1) + + print('Wait for cache to be invalid') + time.sleep(11) + + r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-1'}) + if r.status_code != 204: + print('FAILED, status code is %d not 204' % r.status_code) + sys.exit(1) + + # Get stats from fake Keystone server + r = get_stats() + if r.status_code != 200: + print('FAILED, status code is %d not 200' % r.status_code) + sys.exit(1) + stats = r.json() + + if stats['post_total'] != 2: + print('FAILED, post_total stat is %d not 2' % stats['post_total']) + sys.exit(1) + + if stats['get_total'] != 2: + print('FAILED, get_total stat is %d not 2' % stats['get_total']) + sys.exit(1) + + +def test_expired_token(): + # Try listing containers with an expired token + for i in range(0, 3): + r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-2'}) + if r.status_code != 401: + print('FAILED, status code is %d not 401' % r.status_code) + sys.exit(1) + + # Get stats from fake Keystone server + r = get_stats() + if r.status_code != 200: + print('FAILED, status code is %d not 200' % r.status_code) + sys.exit(1) + stats = r.json() + + # Verify admin token was cached + if stats['post_total'] != 2: + print('FAILED, post_total stat is %d not 2' % stats['post_total']) + sys.exit(1) + + # Verify we got to fake Keystone server since expired tokens is not cached + if stats['get_total'] != 5: + print('FAILED, get_total stat is %d not 5' % stats['get_total']) + sys.exit(1) + + +def test_expired_token_with_service_token(): + # Try listing containers with an expired token but with a service token + for i in range(0, 3): + r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-2', 'X-Service-Token': 'admin-token-1'}) + if r.status_code != 204: + print('FAILED, status code is %d not 204' % r.status_code) + sys.exit(1) + + # Get stats from fake Keystone server + r = get_stats() + if r.status_code != 200: + print('FAILED, status code is %d not 200' % r.status_code) + sys.exit(1) + stats = r.json() + + # Verify admin token was cached + if stats['post_total'] != 2: + print('FAILED, post_total stat is %d not 2' % stats['post_total']) + sys.exit(1) + + # Verify we got to fake Keystone server since expired tokens is not cached + if stats['get_total'] != 7: + print('FAILED, get_total stat is %d not 7' % stats['get_total']) + sys.exit(1) + + print('Wait for cache to be invalid') + time.sleep(11) + + r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-2', 'X-Service-Token': 'admin-token-1'}) + if r.status_code != 204: + print('FAILED, status code is %d not 204' % r.status_code) + sys.exit(1) + + # Get stats from fake Keystone server + r = get_stats() + if r.status_code != 200: + print('FAILED, status code is %d not 200' % r.status_code) + sys.exit(1) + stats = r.json() + + if stats['post_total'] != 3: + print('FAILED, post_total stat is %d not 3' % stats['post_total']) + sys.exit(1) + + if stats['get_total'] != 9: + print('FAILED, get_total stat is %d not 9' % stats['get_total']) + sys.exit(1) + + +def test_expired_token_with_invalid_service_token(): + print('Wait for cache to be invalid') + time.sleep(11) + + # Test with a token that doesn't have allowed role as service token + for i in range(0, 3): + r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-2', 'X-Service-Token': 'user-token-1'}) + if r.status_code != 401: + print('FAILED, status code is %d not 401' % r.status_code) + sys.exit(1) + + # Make sure we get user-token-1 cached + r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-1'}) + if r.status_code != 204: + print('FAILED, status code is %d not 204' % r.status_code) + sys.exit(1) + + # Test that a cached token (that is invalid as service token) cannot be used as service token + for i in range(0, 3): + r = requests.get(SWIFT_URL, headers={'X-Auth-Token': 'user-token-2', 'X-Service-Token': 'user-token-1'}) + if r.status_code != 401: + print('FAILED, status code is %d not 401' % r.status_code) + sys.exit(1) + + +def main(): + test_list_containers() + test_expired_token() + test_expired_token_with_service_token() + test_expired_token_with_invalid_service_token() + + +if __name__ == '__main__': + main() diff --git a/qa/workunits/rgw/test_librgw_file.sh b/qa/workunits/rgw/test_librgw_file.sh new file mode 100755 index 000000000..1371ff711 --- /dev/null +++ b/qa/workunits/rgw/test_librgw_file.sh @@ -0,0 +1,59 @@ +#!/bin/sh -e + + +if [ -z ${AWS_ACCESS_KEY_ID} ] +then + export AWS_ACCESS_KEY_ID=`openssl rand -base64 20` + export AWS_SECRET_ACCESS_KEY=`openssl rand -base64 40` + + radosgw-admin user create --uid ceph-test-librgw-file \ + --access-key $AWS_ACCESS_KEY_ID \ + --secret $AWS_SECRET_ACCESS_KEY \ + --display-name "librgw test user" \ + --email librgw@example.com || echo "librgw user exists" + + # keyring override for teuthology env + KEYRING="/etc/ceph/ceph.keyring" + K="-k ${KEYRING}" +fi + +# nfsns is the main suite + +# create herarchy, and then list it +echo "phase 1.1" +ceph_test_librgw_file_nfsns ${K} --hier1 --dirs1 --create --rename --verbose + +# the older librgw_file can consume the namespace +echo "phase 1.2" +ceph_test_librgw_file_nfsns ${K} --getattr --verbose + +# and delete the hierarchy +echo "phase 1.3" +ceph_test_librgw_file_nfsns ${K} --hier1 --dirs1 --delete --verbose + +# bulk create/delete buckets +echo "phase 2.1" +ceph_test_librgw_file_cd ${K} --create --multi --verbose +echo "phase 2.2" +ceph_test_librgw_file_cd ${K} --delete --multi --verbose + +# write continuation test +echo "phase 3.1" +ceph_test_librgw_file_aw ${K} --create --large --verify +echo "phase 3.2" +ceph_test_librgw_file_aw ${K} --delete --large + +# continued readdir +echo "phase 4.1" +ceph_test_librgw_file_marker ${K} --create --marker1 --marker2 --nobjs=100 --verbose +echo "phase 4.2" +ceph_test_librgw_file_marker ${K} --delete --verbose + +# advanced i/o--but skip readv/writev for now--split delete from +# create and stat ops to avoid fault in sysobject cache +echo "phase 5.1" +ceph_test_librgw_file_gp ${K} --get --stat --put --create +echo "phase 5.2" +ceph_test_librgw_file_gp ${K} --delete + +exit 0 diff --git a/qa/workunits/rgw/test_rgw_bucket_check.py b/qa/workunits/rgw/test_rgw_bucket_check.py new file mode 100755 index 000000000..bfa6d65d6 --- /dev/null +++ b/qa/workunits/rgw/test_rgw_bucket_check.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 + +import logging as log +import json +import botocore +from common import exec_cmd, create_user, boto_connect, put_objects, create_unlinked_objects +from botocore.config import Config + +""" +Tests behavior of radosgw-admin bucket check commands. +""" +# The test cases in this file have been annotated for inventory. +# To extract the inventory (in csv format) use the command: +# +# grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' +# +# + +""" Constants """ +USER = 'check-tester' +DISPLAY_NAME = 'Check Testing' +ACCESS_KEY = 'OJODXSLNX4LUNHQG99PA' +SECRET_KEY = '3l6ffld34qaymfomuh832j94738aie2x4p2o8h6n' +BUCKET_NAME = 'check-bucket' + +def main(): + """ + execute bucket check commands + """ + create_user(USER, DISPLAY_NAME, ACCESS_KEY, SECRET_KEY) + + connection = boto_connect(ACCESS_KEY, SECRET_KEY, Config(retries = { + 'total_max_attempts': 1, + })) + + # pre-test cleanup + try: + bucket = connection.Bucket(BUCKET_NAME) + bucket.objects.all().delete() + bucket.object_versions.all().delete() + bucket.delete() + except botocore.exceptions.ClientError as e: + if not e.response['Error']['Code'] == 'NoSuchBucket': + raise + + bucket = connection.create_bucket(Bucket=BUCKET_NAME) + + null_version_keys = ['a', 'z'] + null_version_objs = put_objects(bucket, null_version_keys) + + connection.BucketVersioning(BUCKET_NAME).enable() + + ok_keys = ['a', 'b', 'c', 'd'] + unlinked_keys = ['c', 'd', 'e', 'f'] + ok_objs = put_objects(bucket, ok_keys) + + # TESTCASE 'recalculated bucket check stats are correct' + log.debug('TEST: recalculated bucket check stats are correct\n') + exec_cmd(f'radosgw-admin bucket check --fix --bucket {BUCKET_NAME}') + out = exec_cmd(f'radosgw-admin bucket stats --bucket {BUCKET_NAME}') + json_out = json.loads(out) + log.debug(json_out['usage']) + assert json_out['usage']['rgw.main']['num_objects'] == 6 + + # TESTCASE 'bucket check unlinked does not report normal entries' + log.debug('TEST: bucket check unlinked does not report normal entries\n') + out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --min-age-hours 0 --dump-keys') + json_out = json.loads(out) + assert len(json_out) == 0 + + unlinked_objs = create_unlinked_objects(connection, bucket, unlinked_keys) + + # TESTCASE 'bucket check unlinked finds unlistable entries' + log.debug('TEST: bucket check unlinked finds unlistable entries\n') + out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --min-age-hours 0 --dump-keys') + json_out = json.loads(out) + assert len(json_out) == len(unlinked_keys) + + # TESTCASE 'unlinked entries are not listable' + log.debug('TEST: unlinked entries are not listable\n') + for ov in bucket.object_versions.all(): + assert (ov.key, ov.version_id) not in unlinked_objs, f'object "{ov.key}:{ov.version_id}" was found in bucket listing' + + # TESTCASE 'GET returns 404 for unlinked entry keys that have no other versions' + log.debug('TEST: GET returns 404 for unlinked entry keys that have no other versions\n') + noent_keys = set(unlinked_keys) - set(ok_keys) + for key in noent_keys: + try: + bucket.Object(key).get() + assert False, 'GET did not return 404 for key={key} with no prior successful PUT' + except botocore.exceptions.ClientError as e: + assert e.response['ResponseMetadata']['HTTPStatusCode'] == 404 + + # TESTCASE 'bucket check unlinked fixes unlistable entries' + log.debug('TEST: bucket check unlinked fixes unlistable entries\n') + out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --fix --min-age-hours 0 --rgw-olh-pending-timeout-sec 0 --dump-keys') + json_out = json.loads(out) + assert len(json_out) == len(unlinked_keys) + for o in unlinked_objs: + try: + connection.ObjectVersion(bucket.name, o[0], o[1]).head() + assert False, f'head for unlistable object {o[0]}:{o[1]} succeeded after fix' + except botocore.exceptions.ClientError as e: + assert e.response['ResponseMetadata']['HTTPStatusCode'] == 404 + + # TESTCASE 'bucket check unlinked fix does not affect normal entries' + log.debug('TEST: bucket check unlinked does not affect normal entries\n') + all_listable = list(bucket.object_versions.all()) + assert len(all_listable) == len(ok_keys) + len(null_version_keys), 'some normal objects were not accounted for in object listing after unlinked fix' + for o in ok_objs: + assert o in map(lambda x: (x.key, x.version_id), all_listable), "normal object not listable after fix" + connection.ObjectVersion(bucket.name, o[0], o[1]).head() + + # TESTCASE 'bucket check unlinked does not find new unlistable entries after fix' + log.debug('TEST: bucket check unlinked does not find new unlistable entries after fix\n') + out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --min-age-hours 0 --dump-keys') + json_out = json.loads(out) + assert len(json_out) == 0 + + # for this set of keys we can produce leftover OLH object/entries by + # deleting the normal object instance since we should already have a leftover + # pending xattr on the OLH object due to the errors associated with the + # prior unlinked entries that were created for the same keys + leftover_pending_xattr_keys = set(ok_keys).intersection(unlinked_keys) + objs_to_delete = filter(lambda x: x[0] in leftover_pending_xattr_keys, ok_objs) + + for o in objs_to_delete: + connection.ObjectVersion(bucket.name, o[0], o[1]).delete() + + for key in leftover_pending_xattr_keys: + out = exec_cmd(f'radosgw-admin bi list --bucket {BUCKET_NAME} --object {key}') + idx_entries = json.loads(out.replace(b'\x80', b'0x80')) + assert len(idx_entries) > 0, 'failed to create leftover OLH entries for key {key}' + + # TESTCASE 'bucket check olh finds leftover OLH entries' + log.debug('TEST: bucket check olh finds leftover OLH entries\n') + out = exec_cmd(f'radosgw-admin bucket check olh --bucket {BUCKET_NAME} --dump-keys') + json_out = json.loads(out) + assert len(json_out) == len(leftover_pending_xattr_keys) + + # TESTCASE 'bucket check olh fixes leftover OLH entries' + log.debug('TEST: bucket check olh fixes leftover OLH entries\n') + out = exec_cmd(f'radosgw-admin bucket check olh --bucket {BUCKET_NAME} --fix --rgw-olh-pending-timeout-sec 0 --dump-keys') + json_out = json.loads(out) + assert len(json_out) == len(leftover_pending_xattr_keys) + + for key in leftover_pending_xattr_keys: + out = exec_cmd(f'radosgw-admin bi list --bucket {BUCKET_NAME} --object {key}') + idx_entries = json.loads(out.replace(b'\x80', b'0x80')) + assert len(idx_entries) == 0, 'index entries still exist for key={key} after olh fix' + + # TESTCASE 'bucket check olh does not find new leftover OLH entries after fix' + log.debug('TEST: bucket check olh does not find new leftover OLH entries after fix\n') + out = exec_cmd(f'radosgw-admin bucket check olh --bucket {BUCKET_NAME} --dump-keys') + json_out = json.loads(out) + assert len(json_out) == 0 + + # TESTCASE 'bucket check fixes do not affect null version objects' + log.debug('TEST: verify that bucket check fixes do not affect null version objects\n') + for o in null_version_objs: + connection.ObjectVersion(bucket.name, o[0], 'null').head() + + all_versions = list(map(lambda x: (x.key, x.version_id), bucket.object_versions.all())) + for key in null_version_keys: + assert (key, 'null') in all_versions + + # TESTCASE 'bucket check stats are correct in the presence of unlinked entries' + log.debug('TEST: bucket check stats are correct in the presence of unlinked entries\n') + bucket.object_versions.all().delete() + null_version_objs = put_objects(bucket, null_version_keys) + ok_objs = put_objects(bucket, ok_keys) + unlinked_objs = create_unlinked_objects(connection, bucket, unlinked_keys) + exec_cmd(f'radosgw-admin bucket check --fix --bucket {BUCKET_NAME}') + out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {BUCKET_NAME} --fix --min-age-hours 0 --rgw-olh-pending-timeout-sec 0 --dump-keys') + json_out = json.loads(out) + assert len(json_out) == len(unlinked_keys) + bucket.object_versions.all().delete() + out = exec_cmd(f'radosgw-admin bucket stats --bucket {BUCKET_NAME}') + json_out = json.loads(out) + log.debug(json_out['usage']) + assert json_out['usage']['rgw.main']['size'] == 0 + assert json_out['usage']['rgw.main']['num_objects'] == 0 + assert json_out['usage']['rgw.main']['size_actual'] == 0 + assert json_out['usage']['rgw.main']['size_kb'] == 0 + assert json_out['usage']['rgw.main']['size_kb_actual'] == 0 + assert json_out['usage']['rgw.main']['size_kb_utilized'] == 0 + + # Clean up + log.debug("Deleting bucket {}".format(BUCKET_NAME)) + bucket.object_versions.all().delete() + bucket.delete() + +main() +log.info("Completed bucket check tests") diff --git a/qa/workunits/rgw/test_rgw_datacache.py b/qa/workunits/rgw/test_rgw_datacache.py new file mode 100755 index 000000000..f070ec0f1 --- /dev/null +++ b/qa/workunits/rgw/test_rgw_datacache.py @@ -0,0 +1,209 @@ +#!/usr/bin/python3 + +import logging as log +from configobj import ConfigObj +import subprocess +import json +import os + +""" +Runs a test against a rgw with the data cache enabled. A client must be +set in the config for this task. This client must be the same client +that is in the config for the `rgw` task. + +In the `overrides` section `datacache` and `datacache` must be configured for +the `rgw` task and the ceph conf overrides must contain the below config +variables in the client section. + +`s3cmd` must be added as an extra_package to the install task. + +In the `workunit` task, `- rgw/run-datacache.sh` must be set for the client that +is in the config for the `rgw` task. The `RGW_DATACACHE_PATH` variable must be +set in the workunit's `env` and it must match the `datacache_path` given to the +`rgw` task in `overrides`. +Ex: +- install: + extra_packages: + deb: ['s3cmd'] + rpm: ['s3cmd'] +- overrides: + rgw: + datacache: true + datacache_path: /tmp/rgw_datacache + install: + extra_packages: + deb: ['s3cmd'] + rpm: ['s3cmd'] + ceph: + conf: + client: + rgw d3n l1 datacache persistent path: /tmp/rgw_datacache/ + rgw d3n l1 datacache size: 10737417240 + rgw d3n l1 local datacache enabled: true + rgw enable ops log: true +- rgw: + client.0: +- workunit: + clients: + client.0: + - rgw/run-datacache.sh + env: + RGW_DATACACHE_PATH: /tmp/rgw_datacache + cleanup: true +""" + +log.basicConfig(level=log.DEBUG) + +""" Constants """ +USER = 'rgw_datacache_user' +DISPLAY_NAME = 'DatacacheUser' +ACCESS_KEY = 'NX5QOQKC6BH2IDN8HC7A' +SECRET_KEY = 'LnEsqNNqZIpkzauboDcLXLcYaWwLQ3Kop0zAnKIn' +BUCKET_NAME = 'datacachebucket' +FILE_NAME = '7M.dat' +GET_FILE_NAME = '7M-get.dat' + +def exec_cmd(cmd): + log.debug("exec_cmd(%s)", cmd) + try: + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) + out, err = proc.communicate() + if proc.returncode == 0: + log.info('command succeeded') + if out is not None: log.info(out) + return out + else: + raise Exception("error: %s \nreturncode: %s" % (err, proc.returncode)) + except Exception as e: + log.error('command failed') + log.error(e) + return False + +def get_radosgw_endpoint(): + out = exec_cmd('sudo netstat -nltp | egrep "rados|valgr"') # short for radosgw/valgrind + x = out.decode('utf8').split(" ") + port = [i for i in x if ':' in i][0].split(':')[1] + log.info('radosgw port: %s' % port) + proto = "http" + hostname = '127.0.0.1' + + if port == '443': + proto = "https" + + endpoint = hostname + + log.info("radosgw endpoint is: %s", endpoint) + return endpoint, proto + +def create_s3cmd_config(path, proto): + """ + Creates a minimal config file for s3cmd + """ + log.info("Creating s3cmd config...") + + use_https_config = "False" + log.info("proto for s3cmd config is %s", proto) + if proto == "https": + use_https_config = "True" + + s3cmd_config = ConfigObj( + indent_type='', + infile={ + 'default': + { + 'host_bucket': 'no.way.in.hell', + 'use_https': use_https_config, + }, + } + ) + + f = open(path, 'wb') + s3cmd_config.write(f) + f.close() + log.info("s3cmd config written") + +def get_cmd_output(cmd_out): + out = cmd_out.decode('utf8') + out = out.strip('\n') + return out + +def main(): + """ + execute the datacache test + """ + # setup for test + cache_dir = os.environ['RGW_DATACACHE_PATH'] + log.debug("datacache dir from config is: %s", cache_dir) + + out = exec_cmd('pwd') + pwd = get_cmd_output(out) + log.debug("pwd is: %s", pwd) + + endpoint, proto = get_radosgw_endpoint() + + # create 7M file to put + outfile = pwd + '/' + FILE_NAME + exec_cmd('dd if=/dev/urandom of=%s bs=1M count=7' % (outfile)) + + # create user + exec_cmd('radosgw-admin user create --uid %s --display-name %s --access-key %s --secret %s' + % (USER, DISPLAY_NAME, ACCESS_KEY, SECRET_KEY)) + + # create s3cmd config + s3cmd_config_path = pwd + '/s3cfg' + create_s3cmd_config(s3cmd_config_path, proto) + + # create a bucket + exec_cmd('s3cmd --access_key=%s --secret_key=%s --config=%s --no-check-hostname --host=%s mb s3://%s' + % (ACCESS_KEY, SECRET_KEY, s3cmd_config_path, endpoint, BUCKET_NAME)) + + # put an object in the bucket + exec_cmd('s3cmd --access_key=%s --secret_key=%s --config=%s --no-check-hostname --host=%s put %s s3://%s' + % (ACCESS_KEY, SECRET_KEY, s3cmd_config_path, endpoint, outfile, BUCKET_NAME)) + + # get object from bucket + get_file_path = pwd + '/' + GET_FILE_NAME + exec_cmd('s3cmd --access_key=%s --secret_key=%s --config=%s --no-check-hostname --host=%s get s3://%s/%s %s --force' + % (ACCESS_KEY, SECRET_KEY, s3cmd_config_path, endpoint, BUCKET_NAME, FILE_NAME, get_file_path)) + + # get info of object + out = exec_cmd('radosgw-admin object stat --bucket=%s --object=%s' % (BUCKET_NAME, FILE_NAME)) + + json_op = json.loads(out) + cached_object_name = json_op['manifest']['prefix'] + log.debug("Cached object name is: %s", cached_object_name) + + # check that the cache is enabled (does the cache directory empty) + out = exec_cmd('find %s -type f | wc -l' % (cache_dir)) + chk_cache_dir = int(get_cmd_output(out)) + log.debug("Check cache dir content: %s", chk_cache_dir) + if chk_cache_dir == 0: + log.info("NOTICE: datacache test object not found, inspect if datacache was bypassed or disabled during this check.") + return + + # list the files in the cache dir for troubleshooting + out = exec_cmd('ls -l %s' % (cache_dir)) + # get name of cached object and check if it exists in the cache + out = exec_cmd('find %s -name "*%s1"' % (cache_dir, cached_object_name)) + cached_object_path = get_cmd_output(out) + log.debug("Path of file in datacache is: %s", cached_object_path) + out = exec_cmd('basename %s' % (cached_object_path)) + basename_cmd_out = get_cmd_output(out) + log.debug("Name of file in datacache is: %s", basename_cmd_out) + + # check to see if the cached object is in Ceph + out = exec_cmd('rados ls -p default.rgw.buckets.data') + rados_ls_out = get_cmd_output(out) + log.debug("rados ls output is: %s", rados_ls_out) + + assert(basename_cmd_out in rados_ls_out) + log.debug("RGW Datacache test SUCCESS") + + # remove datacache dir + #cmd = exec_cmd('rm -rf %s' % (cache_dir)) + #log.debug("RGW Datacache dir deleted") + #^ commenting for future refrence - the work unit will continue running tests and if the cache_dir is removed + # all the writes to cache will fail with errno 2 ENOENT No such file or directory. + +main() +log.info("Completed Datacache tests") diff --git a/qa/workunits/rgw/test_rgw_gc_log.sh b/qa/workunits/rgw/test_rgw_gc_log.sh new file mode 100755 index 000000000..ab4015aef --- /dev/null +++ b/qa/workunits/rgw/test_rgw_gc_log.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_rgw_gc_log + +exit 0 diff --git a/qa/workunits/rgw/test_rgw_obj.sh b/qa/workunits/rgw/test_rgw_obj.sh new file mode 100755 index 000000000..01dd2b5ee --- /dev/null +++ b/qa/workunits/rgw/test_rgw_obj.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_rgw_obj + +exit 0 diff --git a/qa/workunits/rgw/test_rgw_orphan_list.sh b/qa/workunits/rgw/test_rgw_orphan_list.sh new file mode 100755 index 000000000..34d550cea --- /dev/null +++ b/qa/workunits/rgw/test_rgw_orphan_list.sh @@ -0,0 +1,519 @@ +#!/usr/bin/env bash + +# set -x +set -e + +# if defined, debug messages will be displayed and prepended with the string +# debug="DEBUG" + +huge_size=5100 # in megabytes +big_size=7 # in megabytes + +huge_obj=/tmp/huge_obj.temp.$$ +big_obj=/tmp/big_obj.temp.$$ +empty_obj=/tmp/empty_obj.temp.$$ + +fifo=/tmp/orphan-fifo.$$ +awscli_dir=${HOME}/awscli_temp +export PATH=${PATH}:${awscli_dir} + +rgw_host=$(hostname --fqdn) +if echo "$rgw_host" | grep -q '\.' ; then + : +else + host_domain=".front.sepia.ceph.com" + echo "WARNING: rgw hostname -- $rgw_host -- does not appear to be fully qualified; PUNTING and appending $host_domain" + rgw_host="${rgw_host}${host_domain}" +fi +rgw_port=80 + +echo "Fully Qualified Domain Name: $rgw_host" + +success() { + echo OK. + exit 0 +} + +######################################################################## +# INSTALL AND CONFIGURE TOOLING + +install_awscli() { + # NB: this does verify authenticity and integrity of downloaded + # file; see + # https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2-linux.html + here="$(pwd)" + cd "$HOME" + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" + unzip awscliv2.zip + mkdir -p $awscli_dir + ./aws/install -i $awscli_dir + cd "$here" +} + +uninstall_awscli() { + here="$(pwd)" + cd "$HOME" + rm -rf $awscli_dir ./aws awscliv2.zip + cd "$here" +} + +sudo yum -y install s3cmd +sudo yum -y install python3-setuptools +sudo yum -y install python3-pip +sudo pip3 install --upgrade setuptools +sudo pip3 install python-swiftclient + +# get ready for transition from s3cmd to awscli +if false ;then + install_awscli + aws --version + uninstall_awscli +fi + +s3config=/tmp/s3config.$$ + +# do not include the port when it is 80; the host base is used in the +# v4 signature and it needs to follow this convention for signatures +# to match +if [ "$rgw_port" -ne 80 ] ;then + s3_host_base="${rgw_host}:${rgw_port}" +else + s3_host_base="$rgw_host" +fi + +cat >${s3config} <<EOF +[default] +host_base = $s3_host_base +access_key = 0555b35654ad1656d804 +secret_key = h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q== +bucket_location = us-east-1 +check_ssl_certificate = True +check_ssl_hostname = True +default_mime_type = binary/octet-stream +delete_removed = False +dry_run = False +enable_multipart = True +encoding = UTF-8 +encrypt = False +follow_symlinks = False +force = False +guess_mime_type = True +host_bucket = anything.with.three.dots +multipart_chunk_size_mb = 15 +multipart_max_chunks = 10000 +recursive = False +recv_chunk = 65536 +send_chunk = 65536 +signature_v2 = False +socket_timeout = 300 +use_https = False +use_mime_magic = True +verbosity = WARNING +EOF + + +# set up swift authentication +export ST_AUTH=http://${rgw_host}:${rgw_port}/auth/v1.0 +export ST_USER=test:tester +export ST_KEY=testing + +create_users() { + # Create S3 user + local akey='0555b35654ad1656d804' + local skey='h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==' + radosgw-admin user create --uid testid \ + --access-key $akey --secret $skey \ + --display-name 'M. Tester' --email tester@ceph.com + + # Create Swift user + radosgw-admin user create --subuser=test:tester \ + --display-name=Tester-Subuser --key-type=swift \ + --secret=testing --access=full +} + +myswift() { + if [ -n "$debug" ] ;then + echo "${debug}: swift --verbose --debug $@" + fi + swift --verbose --debug "$@" + local code=$? + if [ $code -ne 0 ] ;then + echo "ERROR: code = $code ; command = s3cmd --config=${s3config} --verbose --debug "$@"" + exit $code + fi +} + +mys3cmd() { + if [ -n "$debug" ] ;then + echo "${debug}: s3cmd --config=${s3config} --verbose --debug $@" + fi + s3cmd --config=${s3config} --verbose --debug "$@" + local code=$? + if [ $code -ne 0 ] ;then + echo "ERROR: code = $code ; command = s3cmd --config=${s3config} --verbose --debug "$@"" + exit $code + fi +} + +mys3uploadkill() { + if [ $# -ne 5 ] ;then + echo "$0: error expecting 5 arguments" + exit 1 + fi + + local_file="$1" + remote_bkt="$2" + remote_obj="$3" + fifo="$4" + stop_part="$5" + + mkfifo $fifo + + s3cmd --config=${s3config} put $local_file \ + s3://${remote_bkt}/${remote_obj} \ + --progress \ + --multipart-chunk-size-mb=5 >$fifo & + set +e # don't allow errors to stop script + while read line ;do + echo "$line" | grep --quiet "part $stop_part " + if [ ${PIPESTATUS[1]} -eq 0 ] ;then + kill -9 $(jobs -p) + break + fi + done <$fifo + set -e + + rm -f $fifo +} + +mys3upload() { + obj=$1 + bucket=$2 + dest_obj=$3 + + mys3cmd put -q $obj s3://${bucket}/$dest_obj +} + +######################################################################## +# PREP + +create_users +dd if=/dev/urandom of=$big_obj bs=1M count=${big_size} +dd if=/dev/urandom of=$huge_obj bs=1M count=${huge_size} +touch $empty_obj + +quick_tests() { + echo TRY A SWIFT COMMAND + myswift upload swift-plain-ctr $big_obj --object-name swift-obj-test + myswift list + myswift list swift-plain-ctr + + echo TRY A RADOSGW-ADMIN COMMAND + radosgw-admin bucket list # make sure rgw is up and running +} + +######################################################################## +# S3 TESTS + +#################################### +# regular multipart test + +mys3cmd mb s3://multipart-bkt +mys3upload $huge_obj multipart-bkt multipart-obj +mys3cmd ls +mys3cmd ls s3://multipart-bkt + +#################################### +# multipart test with incomplete uploads + +bkt="incomplete-mp-bkt-1" + +mys3cmd mb s3://$bkt + +mys3uploadkill $huge_obj $bkt incomplete-mp-obj-c $fifo 20 + +# generate an incomplete multipart with more than 1,000 parts +mys3uploadkill $huge_obj $bkt incomplete-mp-obj-b $fifo 1005 + +# generate more than 1000 incomplet multiparts +for c in $(seq 1005) ;do + mys3uploadkill $huge_obj $bkt incomplete-mp-obj-c-$c $fifo 3 +done + +#################################### +# resharded bucket + +bkt=resharded-bkt-1 + +mys3cmd mb s3://$bkt + +for f in $(seq 8) ; do + dest_obj="reshard-obj-${f}" + mys3cmd put -q $big_obj s3://${bkt}/$dest_obj +done + +radosgw-admin bucket reshard --num-shards 3 --bucket=$bkt --yes-i-really-mean-it +radosgw-admin bucket reshard --num-shards 5 --bucket=$bkt --yes-i-really-mean-it + +#################################### +# versioned bucket + +if true ;then + echo "WARNING: versioned bucket test currently turned off" +else + bkt=versioned-bkt-1 + + mys3cmd mb s3://$bkt + + # bucket-enable-versioning $bkt + + for f in $(seq 3) ;do + for g in $(seq 10) ;do + dest_obj="versioned-obj-${g}" + mys3cmd put -q $big_obj s3://${bkt}/$dest_obj + done + done + + for g in $(seq 1 2 10) ;do + dest_obj="versioned-obj-${g}" + mys3cmd rm s3://${bkt}/$dest_obj + done +fi + +############################################################ +# copy small objects + +o_bkt="orig-bkt-1" +d_bkt="copy-bkt-1" +mys3cmd mb s3://$o_bkt + +for f in $(seq 4) ;do + dest_obj="orig-obj-$f" + mys3cmd put -q $big_obj s3://${o_bkt}/$dest_obj +done + +mys3cmd mb s3://$d_bkt + +mys3cmd cp s3://${o_bkt}/orig-obj-1 s3://${d_bkt}/copied-obj-1 +mys3cmd cp s3://${o_bkt}/orig-obj-3 s3://${d_bkt}/copied-obj-3 + +for f in $(seq 5 6) ;do + dest_obj="orig-obj-$f" + mys3cmd put -q $big_obj s3://${d_bkt}/$dest_obj +done + +############################################################ +# copy small objects and delete original + +o_bkt="orig-bkt-2" +d_bkt="copy-bkt-2" + +mys3cmd mb s3://$o_bkt + +for f in $(seq 4) ;do + dest_obj="orig-obj-$f" + mys3cmd put -q $big_obj s3://${o_bkt}/$dest_obj +done + +mys3cmd mb s3://$d_bkt + +mys3cmd cp s3://${o_bkt}/orig-obj-1 s3://${d_bkt}/copied-obj-1 +mys3cmd cp s3://${o_bkt}/orig-obj-3 s3://${d_bkt}/copied-obj-3 + +for f in $(seq 5 6) ;do + dest_obj="orig-obj-$f" + mys3cmd put -q $big_obj s3://${d_bkt}/$dest_obj +done + +mys3cmd rb --recursive s3://${o_bkt} + +############################################################ +# copy multipart objects + +o_bkt="orig-mp-bkt-3" +d_bkt="copy-mp-bkt-3" + +mys3cmd mb s3://$o_bkt + +for f in $(seq 2) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${o_bkt}/$dest_obj +done + +mys3cmd mb s3://$d_bkt + +mys3cmd cp s3://${o_bkt}/orig-multipart-obj-1 \ + s3://${d_bkt}/copied-multipart-obj-1 + +for f in $(seq 5 5) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${d_bkt}/$dest_obj +done + + +############################################################ +# copy multipart objects and delete original + +o_bkt="orig-mp-bkt-4" +d_bkt="copy-mp-bkt-4" + +mys3cmd mb s3://$o_bkt + +for f in $(seq 2) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${o_bkt}/$dest_obj +done + +mys3cmd mb s3://$d_bkt + +mys3cmd cp s3://${o_bkt}/orig-multipart-obj-1 \ + s3://${d_bkt}/copied-multipart-obj-1 + +for f in $(seq 5 5) ;do + dest_obj="orig-multipart-obj-$f" + mys3cmd put -q $huge_obj s3://${d_bkt}/$dest_obj +done + +mys3cmd rb --recursive s3://$o_bkt + +######################################################################## +# SWIFT TESTS + +# 600MB +segment_size=629145600 + +############################################################ +# plain test + +for f in $(seq 4) ;do + myswift upload swift-plain-ctr $big_obj --object-name swift-obj-$f +done + +############################################################ +# zero-len test + +myswift upload swift-zerolen-ctr $empty_obj --object-name subdir/ +myswift upload swift-zerolen-ctr $big_obj --object-name subdir/abc1 +myswift upload swift-zerolen-ctr $empty_obj --object-name subdir/empty1 +myswift upload swift-zerolen-ctr $big_obj --object-name subdir/xyz1 + +############################################################ +# dlo test + +# upload in 300MB segments +myswift upload swift-dlo-ctr $huge_obj --object-name dlo-obj-1 \ + -S $segment_size + +############################################################ +# slo test + +# upload in 300MB segments +myswift upload swift-slo-ctr $huge_obj --object-name slo-obj-1 \ + -S $segment_size --use-slo + +############################################################ +# large object copy test + +# upload in 300MB segments +o_ctr=swift-orig-ctr +o_obj=slo-orig-obj-1 +d_ctr=swift-copy-ctr +d_obj=slo-copy-obj-1 +myswift upload $o_ctr $big_obj --object-name $o_obj + +myswift copy --destination /${d_ctr}/${d_obj} \ + $o_ctr $o_obj + +myswift delete $o_ctr $o_obj + +############################################################ +# huge dlo object copy test + +o_ctr=swift-orig-dlo-ctr-1 +o_obj=dlo-orig-dlo-obj-1 +d_ctr=swift-copy-dlo-ctr-1 +d_obj=dlo-copy-dlo-obj-1 + +myswift upload $o_ctr $huge_obj --object-name $o_obj \ + -S $segment_size + +myswift copy --destination /${d_ctr}/${d_obj} \ + $o_ctr $o_obj + +############################################################ +# huge dlo object copy and orig delete + +o_ctr=swift-orig-dlo-ctr-2 +o_obj=dlo-orig-dlo-obj-2 +d_ctr=swift-copy-dlo-ctr-2 +d_obj=dlo-copy-dlo-obj-2 + +myswift upload $o_ctr $huge_obj --object-name $o_obj \ + -S $segment_size + +myswift copy --destination /${d_ctr}/${d_obj} \ + $o_ctr $o_obj + +myswift delete $o_ctr $o_obj + +############################################################ +# huge slo object copy test + +o_ctr=swift-orig-slo-ctr-1 +o_obj=slo-orig-slo-obj-1 +d_ctr=swift-copy-slo-ctr-1 +d_obj=slo-copy-slo-obj-1 +myswift upload $o_ctr $huge_obj --object-name $o_obj \ + -S $segment_size --use-slo + +myswift copy --destination /${d_ctr}/${d_obj} $o_ctr $o_obj + +############################################################ +# huge slo object copy test and orig delete + +o_ctr=swift-orig-slo-ctr-2 +o_obj=slo-orig-slo-obj-2 +d_ctr=swift-copy-slo-ctr-2 +d_obj=slo-copy-slo-obj-2 +myswift upload $o_ctr $huge_obj --object-name $o_obj \ + -S $segment_size --use-slo + +myswift copy --destination /${d_ctr}/${d_obj} $o_ctr $o_obj + +myswift delete $o_ctr $o_obj + +######################################################################## +# FORCE GARBAGE COLLECTION + +sleep 6 # since for testing age at which gc can happen is 5 secs +radosgw-admin gc process --include-all + + +######################################## +# DO ORPHAN LIST + +pool="default.rgw.buckets.data" + +rgw-orphan-list $pool + +# we only expect there to be one output file, but loop just in case +ol_error="" +for f in orphan-list-*.out ; do + if [ -s "$f" ] ;then # if file non-empty + ol_error="${ol_error}:$f" + echo "One ore more orphans found in $f:" + cat "$f" + fi +done + +if [ -n "$ol_error" ] ;then + echo "ERROR: orphans found when none expected" + exit 1 +fi + +######################################################################## +# CLEAN UP + +rm -f $empty_obj $big_obj $huge_obj $s3config + +success diff --git a/qa/workunits/rgw/test_rgw_reshard.py b/qa/workunits/rgw/test_rgw_reshard.py new file mode 100755 index 000000000..6326e7b17 --- /dev/null +++ b/qa/workunits/rgw/test_rgw_reshard.py @@ -0,0 +1,311 @@ +#!/usr/bin/python3 + +import errno +import time +import logging as log +import json +import os +from common import exec_cmd, boto_connect, create_user, put_objects, create_unlinked_objects + +""" +Rgw manual and dynamic resharding testing against a running instance +""" +# The test cases in this file have been annotated for inventory. +# To extract the inventory (in csv format) use the command: +# +# grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' +# +# + +""" Constants """ +USER = 'tester' +DISPLAY_NAME = 'Testing' +ACCESS_KEY = 'NX5QOQKC6BH2IDN8HC7A' +SECRET_KEY = 'LnEsqNNqZIpkzauboDcLXLcYaWwLQ3Kop0zAnKIn' +BUCKET_NAME = 'a-bucket' +VER_BUCKET_NAME = 'myver' +INDEX_POOL = 'default.rgw.buckets.index' + +class BucketStats: + def __init__(self, bucket_name, bucket_id, num_objs=0, size_kb=0, num_shards=0): + self.bucket_name = bucket_name + self.bucket_id = bucket_id + self.num_objs = num_objs + self.size_kb = size_kb + self.num_shards = num_shards if num_shards > 0 else 1 + + def get_num_shards(self): + self.num_shards = get_bucket_num_shards(self.bucket_name, self.bucket_id) + + +def get_bucket_stats(bucket_name): + """ + function to get bucket stats + """ + cmd = exec_cmd("radosgw-admin bucket stats --bucket {}".format(bucket_name)) + json_op = json.loads(cmd) + #print(json.dumps(json_op, indent = 4, sort_keys=True)) + bucket_id = json_op['id'] + num_shards = json_op['num_shards'] + if len(json_op['usage']) > 0: + num_objects = json_op['usage']['rgw.main']['num_objects'] + size_kb = json_op['usage']['rgw.main']['size_kb'] + else: + num_objects = 0 + size_kb = 0 + log.debug(" \nBUCKET_STATS: \nbucket: {} id: {} num_objects: {} size_kb: {} num_shards: {}\n".format(bucket_name, bucket_id, + num_objects, size_kb, num_shards)) + return BucketStats(bucket_name, bucket_id, num_objects, size_kb, num_shards) + +def get_bucket_layout(bucket_name): + res = exec_cmd("radosgw-admin bucket layout --bucket {}".format(bucket_name)) + return json.loads(res) + +def get_bucket_shard0(bucket_name): + bucket_id = get_bucket_stats(bucket_name).bucket_id + index_gen = get_bucket_layout(bucket_name)['layout']['current_index']['gen'] + return '.dir.%s.%d.0' % (bucket_id, index_gen) + +def get_bucket_num_shards(bucket_name, bucket_id): + """ + function to get bucket num shards + """ + metadata = 'bucket.instance:' + bucket_name + ':' + bucket_id + cmd = exec_cmd('radosgw-admin metadata get {}'.format(metadata)) + json_op = json.loads(cmd) + num_shards = json_op['data']['bucket_info']['num_shards'] + return num_shards + +def run_bucket_reshard_cmd(bucket_name, num_shards, **kwargs): + cmd = 'radosgw-admin bucket reshard --bucket {} --num-shards {}'.format(bucket_name, num_shards) + cmd += ' --rgw-reshard-bucket-lock-duration 30' # reduce to minimum + if 'error_at' in kwargs: + cmd += ' --inject-error-at {}'.format(kwargs.pop('error_at')) + elif 'abort_at' in kwargs: + cmd += ' --inject-abort-at {}'.format(kwargs.pop('abort_at')) + if 'error_code' in kwargs: + cmd += ' --inject-error-code {}'.format(kwargs.pop('error_code')) + return exec_cmd(cmd, **kwargs) + +def test_bucket_reshard(conn, name, **fault): + # create a bucket with non-default ACLs to verify that reshard preserves them + bucket = conn.create_bucket(Bucket=name, ACL='authenticated-read') + grants = bucket.Acl().grants + + objs = [] + try: + # create objs + for i in range(0, 20): + objs += [bucket.put_object(Key='key' + str(i), Body=b"some_data")] + + old_shard_count = get_bucket_stats(name).num_shards + num_shards_expected = old_shard_count + 1 + + # try reshard with fault injection + _, ret = run_bucket_reshard_cmd(name, num_shards_expected, check_retcode=False, **fault) + + if fault.get('error_code') == errno.ECANCELED: + assert(ret == 0) # expect ECANCELED to retry and succeed + else: + assert(ret != 0 and ret != errno.EBUSY) + + # check shard count + cur_shard_count = get_bucket_stats(name).num_shards + assert(cur_shard_count == old_shard_count) + + # verify that the bucket is writeable by deleting an object + objs.pop().delete() + + assert grants == bucket.Acl().grants # recheck grants after cancel + + # retry reshard without fault injection. if radosgw-admin aborted, + # we'll have to retry until the reshard lock expires + while True: + _, ret = run_bucket_reshard_cmd(name, num_shards_expected, check_retcode=False) + if ret == errno.EBUSY: + log.info('waiting 30 seconds for reshard lock to expire...') + time.sleep(30) + continue + assert(ret == 0) + break + + # recheck shard count + final_shard_count = get_bucket_stats(name).num_shards + assert(final_shard_count == num_shards_expected) + + assert grants == bucket.Acl().grants # recheck grants after commit + finally: + # cleanup on resharded bucket must succeed + bucket.delete_objects(Delete={'Objects':[{'Key':o.key} for o in objs]}) + bucket.delete() + + +def main(): + """ + execute manual and dynamic resharding commands + """ + create_user(USER, DISPLAY_NAME, ACCESS_KEY, SECRET_KEY) + + connection = boto_connect(ACCESS_KEY, SECRET_KEY) + + # create a bucket + bucket = connection.create_bucket(Bucket=BUCKET_NAME) + ver_bucket = connection.create_bucket(Bucket=VER_BUCKET_NAME) + connection.BucketVersioning(VER_BUCKET_NAME).enable() + + bucket_acl = connection.BucketAcl(BUCKET_NAME).load() + ver_bucket_acl = connection.BucketAcl(VER_BUCKET_NAME).load() + + # TESTCASE 'reshard-add','reshard','add','add bucket to resharding queue','succeeds' + log.debug('TEST: reshard add\n') + + num_shards_expected = get_bucket_stats(BUCKET_NAME).num_shards + 1 + cmd = exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME, num_shards_expected)) + cmd = exec_cmd('radosgw-admin reshard list') + json_op = json.loads(cmd) + log.debug('bucket name {}'.format(json_op[0]['bucket_name'])) + assert json_op[0]['bucket_name'] == BUCKET_NAME + assert json_op[0]['tentative_new_num_shards'] == num_shards_expected + + # TESTCASE 'reshard-process','reshard','','process bucket resharding','succeeds' + log.debug('TEST: reshard process\n') + cmd = exec_cmd('radosgw-admin reshard process') + time.sleep(5) + # check bucket shards num + bucket_stats1 = get_bucket_stats(BUCKET_NAME) + if bucket_stats1.num_shards != num_shards_expected: + log.error("Resharding failed on bucket {}. Expected number of shards are not created\n".format(BUCKET_NAME)) + + # TESTCASE 'reshard-add','reshard','add','add non empty bucket to resharding queue','succeeds' + log.debug('TEST: reshard add non empty bucket\n') + # create objs + num_objs = 8 + for i in range(0, num_objs): + connection.Object(BUCKET_NAME, ('key'+str(i))).put(Body=b"some_data") + + num_shards_expected = get_bucket_stats(BUCKET_NAME).num_shards + 1 + cmd = exec_cmd('radosgw-admin reshard add --bucket {} --num-shards {}'.format(BUCKET_NAME, num_shards_expected)) + cmd = exec_cmd('radosgw-admin reshard list') + json_op = json.loads(cmd) + assert json_op[0]['bucket_name'] == BUCKET_NAME + assert json_op[0]['tentative_new_num_shards'] == num_shards_expected + + # TESTCASE 'reshard process ,'reshard','process','reshard non empty bucket','succeeds' + log.debug('TEST: reshard process non empty bucket\n') + cmd = exec_cmd('radosgw-admin reshard process') + # check bucket shards num + bucket_stats1 = get_bucket_stats(BUCKET_NAME) + if bucket_stats1.num_shards != num_shards_expected: + log.error("Resharding failed on bucket {}. Expected number of shards are not created\n".format(BUCKET_NAME)) + + # TESTCASE 'manual bucket resharding','inject error','fail','check bucket accessibility', 'retry reshard' + log.debug('TEST: reshard bucket with EIO injected at set_target_layout\n') + test_bucket_reshard(connection, 'error-at-set-target-layout', error_at='set_target_layout') + log.debug('TEST: reshard bucket with ECANCELED injected at set_target_layout\n') + test_bucket_reshard(connection, 'error-at-set-target-layout', error_at='set_target_layout', error_code=errno.ECANCELED) + log.debug('TEST: reshard bucket with abort at set_target_layout\n') + test_bucket_reshard(connection, 'abort-at-set-target-layout', abort_at='set_target_layout') + + log.debug('TEST: reshard bucket with EIO injected at block_writes\n') + test_bucket_reshard(connection, 'error-at-block-writes', error_at='block_writes') + log.debug('TEST: reshard bucket with abort at block_writes\n') + test_bucket_reshard(connection, 'abort-at-block-writes', abort_at='block_writes') + + log.debug('TEST: reshard bucket with EIO injected at commit_target_layout\n') + test_bucket_reshard(connection, 'error-at-commit-target-layout', error_at='commit_target_layout') + log.debug('TEST: reshard bucket with ECANCELED injected at commit_target_layout\n') + test_bucket_reshard(connection, 'error-at-commit-target-layout', error_at='commit_target_layout', error_code=errno.ECANCELED) + log.debug('TEST: reshard bucket with abort at commit_target_layout\n') + test_bucket_reshard(connection, 'abort-at-commit-target-layout', abort_at='commit_target_layout') + + log.debug('TEST: reshard bucket with EIO injected at do_reshard\n') + test_bucket_reshard(connection, 'error-at-do-reshard', error_at='do_reshard') + log.debug('TEST: reshard bucket with abort at do_reshard\n') + test_bucket_reshard(connection, 'abort-at-do-reshard', abort_at='do_reshard') + + # TESTCASE 'versioning reshard-','bucket', reshard','versioning reshard','succeeds' + log.debug(' test: reshard versioned bucket') + num_shards_expected = get_bucket_stats(VER_BUCKET_NAME).num_shards + 1 + cmd = exec_cmd('radosgw-admin bucket reshard --bucket {} --num-shards {}'.format(VER_BUCKET_NAME, + num_shards_expected)) + # check bucket shards num + ver_bucket_stats = get_bucket_stats(VER_BUCKET_NAME) + assert ver_bucket_stats.num_shards == num_shards_expected + + # TESTCASE 'check acl' + new_bucket_acl = connection.BucketAcl(BUCKET_NAME).load() + assert new_bucket_acl == bucket_acl + new_ver_bucket_acl = connection.BucketAcl(VER_BUCKET_NAME).load() + assert new_ver_bucket_acl == ver_bucket_acl + + # TESTCASE 'check reshard removes olh entries with empty name' + log.debug(' test: reshard removes olh entries with empty name') + bucket.objects.all().delete() + + + # get name of shard 0 object, add a bogus olh entry with empty name + bucket_shard0 = get_bucket_shard0(BUCKET_NAME) + if 'CEPH_ROOT' in os.environ: + k = '%s/qa/workunits/rgw/olh_noname_key' % os.environ['CEPH_ROOT'] + v = '%s/qa/workunits/rgw/olh_noname_val' % os.environ['CEPH_ROOT'] + else: + k = 'olh_noname_key' + v = 'olh_noname_val' + exec_cmd('rados -p %s setomapval %s --omap-key-file %s < %s' % (INDEX_POOL, bucket_shard0, k, v)) + + # check that bi list has one entry with empty name + cmd = exec_cmd('radosgw-admin bi list --bucket %s' % BUCKET_NAME) + json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80 + assert len(json_op) == 1 + assert json_op[0]['entry']['key']['name'] == '' + + # reshard to prune the bogus olh + cmd = exec_cmd('radosgw-admin bucket reshard --bucket %s --num-shards %s --yes-i-really-mean-it' % (BUCKET_NAME, 1)) + + # get that bi list has zero entries + cmd = exec_cmd('radosgw-admin bi list --bucket %s' % BUCKET_NAME) + json_op = json.loads(cmd.decode('utf-8', 'ignore')) # ignore utf-8 can't decode 0x80 + assert len(json_op) == 0 + + # TESTCASE 'check that PUT succeeds during reshard' + log.debug(' test: PUT succeeds during reshard') + num_shards = get_bucket_stats(VER_BUCKET_NAME).num_shards + exec_cmd('''radosgw-admin --inject-delay-at=do_reshard --inject-delay-ms=5000 \ + bucket reshard --bucket {} --num-shards {}''' + .format(VER_BUCKET_NAME, num_shards + 1), wait = False) + time.sleep(1) + ver_bucket.put_object(Key='put_during_reshard', Body=b"some_data") + log.debug('put object successful') + + # TESTCASE 'check that bucket stats are correct after reshard with unlinked entries' + log.debug('TEST: check that bucket stats are correct after reshard with unlinked entries\n') + ver_bucket.object_versions.all().delete() + ok_keys = ['a', 'b', 'c'] + unlinked_keys = ['x', 'y', 'z'] + put_objects(ver_bucket, ok_keys) + create_unlinked_objects(connection, ver_bucket, unlinked_keys) + cmd = exec_cmd(f'radosgw-admin bucket reshard --bucket {VER_BUCKET_NAME} --num-shards 17 --yes-i-really-mean-it') + out = exec_cmd(f'radosgw-admin bucket check unlinked --bucket {VER_BUCKET_NAME} --fix --min-age-hours 0 --rgw-olh-pending-timeout-sec 0 --dump-keys') + json_out = json.loads(out) + assert len(json_out) == len(unlinked_keys) + ver_bucket.object_versions.all().delete() + out = exec_cmd(f'radosgw-admin bucket stats --bucket {VER_BUCKET_NAME}') + json_out = json.loads(out) + log.debug(json_out['usage']) + assert json_out['usage']['rgw.main']['size'] == 0 + assert json_out['usage']['rgw.main']['num_objects'] == 0 + assert json_out['usage']['rgw.main']['size_actual'] == 0 + assert json_out['usage']['rgw.main']['size_kb'] == 0 + assert json_out['usage']['rgw.main']['size_kb_actual'] == 0 + assert json_out['usage']['rgw.main']['size_kb_utilized'] == 0 + + # Clean up + log.debug("Deleting bucket {}".format(BUCKET_NAME)) + bucket.objects.all().delete() + bucket.delete() + log.debug("Deleting bucket {}".format(VER_BUCKET_NAME)) + ver_bucket.object_versions.all().delete() + ver_bucket.delete() + +main() +log.info("Completed resharding tests") diff --git a/qa/workunits/rgw/test_rgw_s3_mp_reupload.py b/qa/workunits/rgw/test_rgw_s3_mp_reupload.py new file mode 100755 index 000000000..b3cb2d5ab --- /dev/null +++ b/qa/workunits/rgw/test_rgw_s3_mp_reupload.py @@ -0,0 +1,121 @@ +import boto3 +import botocore.exceptions +import sys +import os +import subprocess + +#boto3.set_stream_logger(name='botocore') + +# handles two optional system arguments: +# <bucket-name> : default is "bkt134" +# <0 or 1> : 0 -> upload aborted, 1 -> completed; default is completed + +if len(sys.argv) >= 2: + bucket_name = sys.argv[1] +else: + bucket_name = "bkt314738362229" +print("bucket nams is %s" % bucket_name) + +complete_mpu = True +if len(sys.argv) >= 3: + complete_mpu = int(sys.argv[2]) > 0 + +versioned_bucket = False +if len(sys.argv) >= 4: + versioned_bucket = int(sys.argv[3]) > 0 + +rgw_host = os.environ['RGW_HOST'] +access_key = os.environ['RGW_ACCESS_KEY'] +secret_key = os.environ['RGW_SECRET_KEY'] + +try: + endpoint='http://%s:%d' % (rgw_host, 80) + client = boto3.client('s3', + endpoint_url=endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + res = client.create_bucket(Bucket=bucket_name) +except botocore.exceptions.EndpointConnectionError: + try: + endpoint='https://%s:%d' % (rgw_host, 443) + client = boto3.client('s3', + endpoint_url=endpoint, + verify=False, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + res = client.create_bucket(Bucket=bucket_name) + except botocore.exceptions.EndpointConnectionError: + endpoint='http://%s:%d' % (rgw_host, 8000) + client = boto3.client('s3', + endpoint_url=endpoint, + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + res = client.create_bucket(Bucket=bucket_name) + +print("endpoint is %s" % endpoint) + +if versioned_bucket: + res = client.put_bucket_versioning( + Bucket=bucket_name, + VersioningConfiguration={ + 'MFADelete': 'Disabled', + 'Status': 'Enabled'} + ) + +key = "mpu_test4" +nparts = 2 +ndups = 11 +do_reupload = True + +part_path = "/tmp/mp_part_5m" +subprocess.run(["dd", "if=/dev/urandom", "of=" + part_path, "bs=1M", "count=5"], check=True) + +f = open(part_path, 'rb') + +res = client.create_multipart_upload(Bucket=bucket_name, Key=key) +mpu_id = res["UploadId"] + +print("start UploadId=%s" % (mpu_id)) + +parts = [] +parts2 = [] + +for ix in range(0,nparts): + part_num = ix + 1 + f.seek(0) + res = client.upload_part(Body=f, Bucket=bucket_name, Key=key, + UploadId=mpu_id, PartNumber=part_num) + # save + etag = res['ETag'] + part = {'ETag': etag, 'PartNumber': part_num} + print("phase 1 uploaded part %s" % part) + parts.append(part) + +if do_reupload: + # just re-upload part 1 + part_num = 1 + for ix in range(0,ndups): + f.seek(0) + res = client.upload_part(Body=f, Bucket=bucket_name, Key=key, + UploadId=mpu_id, PartNumber=part_num) + etag = res['ETag'] + part = {'ETag': etag, 'PartNumber': part_num} + print ("phase 2 uploaded part %s" % part) + + # save + etag = res['ETag'] + part = {'ETag': etag, 'PartNumber': part_num} + parts2.append(part) + +if complete_mpu: + print("completing multipart upload, parts=%s" % parts) + res = client.complete_multipart_upload( + Bucket=bucket_name, Key=key, UploadId=mpu_id, + MultipartUpload={'Parts': parts}) +else: + print("aborting multipart upload, parts=%s" % parts) + res = client.abort_multipart_upload( + Bucket=bucket_name, Key=key, UploadId=mpu_id) + +# clean up +subprocess.run(["rm", "-f", part_path], check=True) diff --git a/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh b/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh new file mode 100755 index 000000000..5d73fd048 --- /dev/null +++ b/qa/workunits/rgw/test_rgw_s3_mp_reupload.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash + +# INITIALIZATION + +mydir=$(dirname $0) +data_pool=default.rgw.buckets.data +orphan_list_out=/tmp/orphan_list.$$ +radoslist_out=/tmp/radoslist.$$ +rados_ls_out=/tmp/rados_ls.$$ +diff_out=/tmp/diff.$$ + +rgw_host="$(hostname --fqdn)" +echo "INFO: fully qualified domain name: $rgw_host" + +export RGW_ACCESS_KEY="0555b35654ad1656d804" +export RGW_SECRET_KEY="h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==" +export RGW_HOST="${RGW_HOST:-$rgw_host}" + +# random argument determines if multipart is aborted or completed 50/50 +outcome=$((RANDOM % 2)) +if [ $outcome -eq 0 ] ;then + echo "== TESTING *ABORTING* MULTIPART UPLOAD WITH RE-UPLOADS ==" +else + echo "== TESTING *COMPLETING* MULTIPART UPLOAD WITH RE-UPLOADS ==" +fi + +# random argument determines if multipart is aborted or completed 50/50 +versioning=$((RANDOM % 2)) +if [ $versioning -eq 0 ] ;then + echo "== TESTING NON-VERSIONED BUCKET ==" +else + echo "== TESTING VERSIONED BUCKET ==" +fi + +# create a randomized bucket name +bucket="reupload-bkt-$((RANDOM % 899999 + 100000))" + + +# SET UP PYTHON VIRTUAL ENVIRONMENT + +# install boto3 +python3 -m venv $mydir +source $mydir/bin/activate +pip install pip --upgrade +pip install boto3 + + +# CREATE RGW USER IF NECESSARY + +if radosgw-admin user info --access-key $RGW_ACCESS_KEY 2>/dev/null ;then + echo INFO: user already exists +else + echo INFO: creating user + radosgw-admin user create --uid testid \ + --access-key $RGW_ACCESS_KEY \ + --secret $RGW_SECRET_KEY \ + --display-name 'M. Tester' \ + --email tester@ceph.com 2>/dev/null +fi + + +# RUN REUPLOAD TEST + +$mydir/bin/python3 ${mydir}/test_rgw_s3_mp_reupload.py $bucket $outcome $versioning + + +# ANALYZE FOR ERRORS +# (NOTE: for now we're choosing not to use the rgw-orphan-list tool) + +# force garbage collection to remove extra parts +radosgw-admin gc process --include-all 2>/dev/null + +marker=$(radosgw-admin metadata get bucket:$bucket 2>/dev/null | grep bucket_id | sed 's/.*: "\(.*\)".*/\1/') + +# determine expected rados objects +radosgw-admin bucket radoslist --bucket=$bucket 2>/dev/null | sort >$radoslist_out +echo "radosgw-admin bucket radoslist:" +cat $radoslist_out + +# determine found rados objects +rados ls -p $data_pool 2>/dev/null | grep "^$marker" | sort >$rados_ls_out +echo "rados ls:" +cat $rados_ls_out + +# compare expected and found +diff $radoslist_out $rados_ls_out >$diff_out +if [ $(cat $diff_out | wc -l) -ne 0 ] ;then + error=1 + echo "ERROR: Found differences between expected and actual rados objects for test bucket." + echo " note: indicators: '>' found but not expected; '<' expected but not found." + cat $diff_out +fi + + +# CLEAN UP + +deactivate + +rm -f $orphan_list_out $radoslist_out $rados_ls_out $diff_out + + +# PRODUCE FINAL RESULTS + +if [ -n "$error" ] ;then + echo "== FAILED ==" + exit 1 +fi + +echo "== PASSED ==" +exit 0 diff --git a/qa/workunits/rgw/test_rgw_throttle.sh b/qa/workunits/rgw/test_rgw_throttle.sh new file mode 100755 index 000000000..f637b8f08 --- /dev/null +++ b/qa/workunits/rgw/test_rgw_throttle.sh @@ -0,0 +1,5 @@ +#!/bin/sh -e + +ceph_test_rgw_throttle + +exit 0 diff --git a/qa/workunits/rgw/test_rgw_versioning.py b/qa/workunits/rgw/test_rgw_versioning.py new file mode 100755 index 000000000..fc69e138d --- /dev/null +++ b/qa/workunits/rgw/test_rgw_versioning.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 + +import logging as log +import json +import uuid +import botocore +import time +from common import exec_cmd, create_user, boto_connect +from botocore.config import Config + +""" +Tests behavior of bucket versioning. +""" +# The test cases in this file have been annotated for inventory. +# To extract the inventory (in csv format) use the command: +# +# grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' +# +# + +""" Constants """ +USER = 'versioning-tester' +DISPLAY_NAME = 'Versioning Testing' +ACCESS_KEY = 'LTA662PVVDTDWX6M2AB0' +SECRET_KEY = 'pvtchqajgzqx5581t6qbddbkj0bgf3a69qdkjcea' +BUCKET_NAME = 'versioning-bucket' +DATA_POOL = 'default.rgw.buckets.data' + +def main(): + """ + execute versioning tests + """ + create_user(USER, DISPLAY_NAME, ACCESS_KEY, SECRET_KEY) + + connection = boto_connect(ACCESS_KEY, SECRET_KEY, Config(retries = { + 'total_max_attempts': 1, + })) + + # pre-test cleanup + try: + bucket = connection.Bucket(BUCKET_NAME) + bucket.objects.all().delete() + bucket.object_versions.all().delete() + bucket.delete() + except botocore.exceptions.ClientError as e: + if not e.response['Error']['Code'] == 'NoSuchBucket': + raise + + bucket = connection.create_bucket(Bucket=BUCKET_NAME) + connection.BucketVersioning(BUCKET_NAME).enable() + + # reproducer for bug from https://tracker.ceph.com/issues/59663 + # TESTCASE 'verify that index entries and OLH objects are cleaned up after redundant deletes' + log.debug('TEST: verify that index entries and OLH objects are cleaned up after redundant deletes\n') + key = str(uuid.uuid4()) + resp = bucket.Object(key).delete() + assert 'DeleteMarker' in resp, 'DeleteMarker key not present in response' + assert resp['DeleteMarker'], 'DeleteMarker value not True in response' + assert 'VersionId' in resp, 'VersionId key not present in response' + version_id = resp['VersionId'] + bucket.Object(key).delete() + connection.ObjectVersion(bucket.name, key, version_id).delete() + # bucket index should now be empty + out = exec_cmd(f'radosgw-admin bi list --bucket {BUCKET_NAME}') + json_out = json.loads(out.replace(b'\x80', b'0x80')) + assert len(json_out) == 0, 'bucket index was not empty after all objects were deleted' + + (_out, ret) = exec_cmd(f'rados -p {DATA_POOL} ls | grep {key}', check_retcode=False) + assert ret != 0, 'olh object was not cleaned up' + + # TESTCASE 'verify that index entries and OLH objects are cleaned up after index linking error' + log.debug('TEST: verify that index entries and OLH objects are cleaned up after index linking error\n') + key = str(uuid.uuid4()) + try: + exec_cmd('ceph config set client rgw_debug_inject_set_olh_err 2') + time.sleep(1) + bucket.Object(key).delete() + finally: + exec_cmd('ceph config rm client rgw_debug_inject_set_olh_err') + out = exec_cmd(f'radosgw-admin bi list --bucket {BUCKET_NAME}') + json_out = json.loads(out.replace(b'\x80', b'0x80')) + assert len(json_out) == 0, 'bucket index was not empty after op failed' + (_out, ret) = exec_cmd(f'rados -p {DATA_POOL} ls | grep {key}', check_retcode=False) + assert ret != 0, 'olh object was not cleaned up' + + # TESTCASE 'verify that original null object version is intact after failed olh upgrade' + log.debug('TEST: verify that original null object version is intact after failed olh upgrade\n') + connection.BucketVersioning(BUCKET_NAME).suspend() + key = str(uuid.uuid4()) + put_resp = bucket.put_object(Key=key, Body=b"data") + connection.BucketVersioning(BUCKET_NAME).enable() + try: + exec_cmd('ceph config set client rgw_debug_inject_set_olh_err 2') + time.sleep(1) + # expected to fail due to the above error injection + bucket.put_object(Key=key, Body=b"new data") + except Exception as e: + log.debug(e) + finally: + exec_cmd('ceph config rm client rgw_debug_inject_set_olh_err') + get_resp = bucket.Object(key).get() + assert put_resp.e_tag == get_resp['ETag'], 'get did not return null version with correct etag' + + # Clean up + log.debug("Deleting bucket {}".format(BUCKET_NAME)) + bucket.object_versions.all().delete() + bucket.delete() + +main() +log.info("Completed bucket versioning tests") diff --git a/qa/workunits/suites/blogbench.sh b/qa/workunits/suites/blogbench.sh new file mode 100755 index 000000000..a05d8d21c --- /dev/null +++ b/qa/workunits/suites/blogbench.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -e + +echo "getting blogbench" +wget http://download.ceph.com/qa/blogbench-1.0.tar.bz2 +#cp /home/gregf/src/blogbench-1.0.tar.bz2 . +tar -xvf blogbench-1.0.tar.bz2 +cd blogbench-1.0/ +echo "making blogbench" +./configure +make +cd src +mkdir blogtest_in +echo "running blogbench" +./blogbench -d blogtest_in diff --git a/qa/workunits/suites/bonnie.sh b/qa/workunits/suites/bonnie.sh new file mode 100755 index 000000000..b60cc6a5e --- /dev/null +++ b/qa/workunits/suites/bonnie.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -e + +bonnie_bin=`which bonnie++` +[ $? -eq 1 ] && bonnie_bin=/usr/sbin/bonnie++ + +uid_flags="" +[ "`id -u`" == "0" ] && uid_flags="-u root" + +$bonnie_bin $uid_flags -n 100 diff --git a/qa/workunits/suites/cephfs_journal_tool_smoke.sh b/qa/workunits/suites/cephfs_journal_tool_smoke.sh new file mode 100755 index 000000000..3fe01ed63 --- /dev/null +++ b/qa/workunits/suites/cephfs_journal_tool_smoke.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +set -e +set -x + +export BIN="${BIN:-cephfs-journal-tool --rank=cephfs:0}" +export JOURNAL_FILE=/tmp/journal.bin +export JSON_OUTPUT=/tmp/json.tmp +export BINARY_OUTPUT=/tmp/binary.tmp + +if [ -d $BINARY_OUTPUT ] ; then + rm -rf $BINARY_OUTPUT +fi + +# Check that the import/export stuff really works as expected +# first because it's used as the reset method between +# following checks. +echo "Testing that export/import cycle preserves state" +HEADER_STATE=`$BIN header get` +EVENT_LIST=`$BIN event get list` +$BIN journal export $JOURNAL_FILE +$BIN journal import $JOURNAL_FILE +NEW_HEADER_STATE=`$BIN header get` +NEW_EVENT_LIST=`$BIN event get list` + +if [ ! "$HEADER_STATE" = "$NEW_HEADER_STATE" ] ; then + echo "Import failed to preserve header state" + echo $HEADER_STATE + echo $NEW_HEADER_STATE + exit -1 +fi + +if [ ! "$EVENT_LIST" = "$NEW_EVENT_LIST" ] ; then + echo "Import failed to preserve event state" + echo $EVENT_LIST + echo $NEW_EVENT_LIST + exit -1 +fi + +echo "Testing 'journal' commands..." + +# Simplest thing: print the vital statistics of the journal +$BIN journal inspect +$BIN header get + +# Make a copy of the journal in its original state +$BIN journal export $JOURNAL_FILE +if [ ! -s $JOURNAL_FILE ] ; then + echo "Export to $JOURNAL_FILE failed" + exit -1 +fi + +# Can we execute a journal reset? +$BIN journal reset +$BIN journal inspect +$BIN header get + +echo "Rolling back journal to original state..." +$BIN journal import $JOURNAL_FILE + +echo "Testing 'header' commands..." +$BIN header get +$BIN header set write_pos 123 +$BIN header set expire_pos 123 +$BIN header set trimmed_pos 123 + +echo "Rolling back journal to original state..." +$BIN journal import $JOURNAL_FILE + +echo "Testing 'event' commands..." +$BIN event get summary +$BIN event get --type=UPDATE --path=/ --inode=0 --frag=0x100 summary +$BIN event get json --path $JSON_OUTPUT +if [ ! -s $JSON_OUTPUT ] ; then + echo "Export to $JSON_OUTPUT failed" + exit -1 +fi +$BIN event get binary --path $BINARY_OUTPUT +if [ ! -s $BINARY_OUTPUT ] ; then + echo "Export to $BINARY_OUTPUT failed" + exit -1 +fi +$BIN event recover_dentries summary +$BIN event splice summary + +# Tests finish. +# Metadata objects have been modified by the 'event recover_dentries' command. +# Journal is no long consistent with respect to metadata objects (especially inotable). +# To ensure mds successfully replays its journal, we need to do journal reset. +$BIN journal reset +cephfs-table-tool all reset session + diff --git a/qa/workunits/suites/dbench-short.sh b/qa/workunits/suites/dbench-short.sh new file mode 100755 index 000000000..b0da02275 --- /dev/null +++ b/qa/workunits/suites/dbench-short.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +set -e + +dbench 1 diff --git a/qa/workunits/suites/dbench.sh b/qa/workunits/suites/dbench.sh new file mode 100755 index 000000000..32c893b45 --- /dev/null +++ b/qa/workunits/suites/dbench.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +set -e + +dbench 1 +dbench 10 diff --git a/qa/workunits/suites/ffsb.sh b/qa/workunits/suites/ffsb.sh new file mode 100755 index 000000000..bf95a05c4 --- /dev/null +++ b/qa/workunits/suites/ffsb.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -e + +mydir=`dirname $0` + +# try it again if the clone is slow and the second time +trap -- 'retry' EXIT +retry() { + rm -rf ffsb + # double the timeout value + timeout 3600 git clone https://git.ceph.com/ffsb.git --depth 1 +} +rm -rf ffsb +timeout 1800 git clone https://git.ceph.com/ffsb.git --depth 1 +trap - EXIT + +cd ffsb +./configure +make +cd .. +mkdir tmp +cd tmp + +for f in $mydir/*.ffsb +do + ../ffsb/ffsb $f +done +cd .. +rm -r tmp ffsb* + diff --git a/qa/workunits/suites/fio.sh b/qa/workunits/suites/fio.sh new file mode 100755 index 000000000..ee69de81c --- /dev/null +++ b/qa/workunits/suites/fio.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -x + +gen_fio_file() { + iter=$1 + f=$2 + cat > randio-$$-${iter}.fio <<EOF +[randio] +blocksize_range=32m:128m +blocksize_unaligned=1 +filesize=10G:20G +readwrite=randrw +runtime=300 +size=20G +filename=${f} +EOF +} + +sudo apt-get -y install fio +for i in $(seq 1 20); do + fcount=$(ls donetestfile* 2>/dev/null | wc -l) + donef="foo" + fiof="bar" + if test ${fcount} -gt 0; then + # choose random file + r=$[ ${RANDOM} % ${fcount} ] + testfiles=( $(ls donetestfile*) ) + donef=${testfiles[${r}]} + fiof=$(echo ${donef} | sed -e "s|done|fio|") + gen_fio_file $i ${fiof} + else + fiof=fiotestfile.$$.$i + donef=donetestfile.$$.$i + gen_fio_file $i ${fiof} + fi + + sudo rm -f ${donef} + sudo fio randio-$$-$i.fio + sudo ln ${fiof} ${donef} + ls -la +done diff --git a/qa/workunits/suites/fsstress.sh b/qa/workunits/suites/fsstress.sh new file mode 100755 index 000000000..e5da5b439 --- /dev/null +++ b/qa/workunits/suites/fsstress.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -ex + +mkdir -p fsstress +pushd fsstress +wget -q -O ltp-full.tgz http://download.ceph.com/qa/ltp-full-20091231.tgz +tar xzf ltp-full.tgz +pushd ltp-full-20091231/testcases/kernel/fs/fsstress +make +BIN=$(readlink -f fsstress) +popd +popd + +T=$(mktemp -d -p .) +"$BIN" -d "$T" -l 1 -n 1000 -p 10 -v +rm -rf -- "$T" diff --git a/qa/workunits/suites/fsx.sh b/qa/workunits/suites/fsx.sh new file mode 100755 index 000000000..0d5ba3a58 --- /dev/null +++ b/qa/workunits/suites/fsx.sh @@ -0,0 +1,16 @@ +#!/bin/sh -x + +set -e + +git clone https://git.ceph.com/xfstests-dev.git +cd xfstests-dev +git checkout 12973fc04fd10d4af086901e10ffa8e48866b735 +make -j4 +cd .. +cp xfstests-dev/ltp/fsx . + +OPTIONS="-z" # don't use zero range calls; not supported by cephfs + +./fsx $OPTIONS 1MB -N 50000 -p 10000 -l 1048576 +./fsx $OPTIONS 10MB -N 50000 -p 10000 -l 10485760 +./fsx $OPTIONS 100MB -N 50000 -p 10000 -l 104857600 diff --git a/qa/workunits/suites/fsync-tester.sh b/qa/workunits/suites/fsync-tester.sh new file mode 100755 index 000000000..6e32786ea --- /dev/null +++ b/qa/workunits/suites/fsync-tester.sh @@ -0,0 +1,13 @@ +#!/bin/sh -x + +set -e + +# To skirt around GPL compatibility issues: +wget http://download.ceph.com/qa/fsync-tester.c +gcc -D_GNU_SOURCE fsync-tester.c -o fsync-tester + +./fsync-tester + +echo $PATH +whereis lsof +lsof diff --git a/qa/workunits/suites/iogen.sh b/qa/workunits/suites/iogen.sh new file mode 100755 index 000000000..d92b87083 --- /dev/null +++ b/qa/workunits/suites/iogen.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -e + +echo "getting iogen" +wget http://download.ceph.com/qa/iogen_3.1p0.tar +tar -xvzf iogen_3.1p0.tar +cd iogen_3.1p0 +echo "making iogen" +make +echo "running iogen" +./iogen -n 5 -s 2g +echo "sleep for 10 min" +sleep 600 +echo "stopping iogen" +./iogen -k + +echo "OK" diff --git a/qa/workunits/suites/iozone-sync.sh b/qa/workunits/suites/iozone-sync.sh new file mode 100755 index 000000000..a37962d30 --- /dev/null +++ b/qa/workunits/suites/iozone-sync.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +set -e + +# basic tests of O_SYNC, O_DSYNC, O_RSYNC +# test O_SYNC +iozone -c -e -s 512M -r 1M -t 1 -F osync1 -i 0 -i 1 -o +# test O_DSYNC +iozone -c -e -s 512M -r 1M -t 1 -F odsync1 -i 0 -i 1 -+D +# test O_RSYNC +iozone -c -e -s 512M -r 1M -t 1 -F orsync1 -i 0 -i 1 -+r + +# test same file with O_SYNC in one process, buffered in the other +# the sync test starts first, so the buffered test should blow +# past it and +iozone -c -e -s 512M -r 1M -t 1 -F osync2 -i 0 -i 1 -o & +sleep 1 +iozone -c -e -s 512M -r 256K -t 1 -F osync2 -i 0 +wait $! + +# test same file with O_SYNC from different threads +iozone -c -e -s 512M -r 1M -t 2 -F osync3 -i 2 -o diff --git a/qa/workunits/suites/iozone.sh b/qa/workunits/suites/iozone.sh new file mode 100755 index 000000000..7dc50cb0b --- /dev/null +++ b/qa/workunits/suites/iozone.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +set -ex + +iozone -c -e -s 1024M -r 16K -t 1 -F f1 -i 0 -i 1 +iozone -c -e -s 1024M -r 1M -t 1 -F f2 -i 0 -i 1 +iozone -c -e -s 10240M -r 1M -t 1 -F f3 -i 0 -i 1 diff --git a/qa/workunits/suites/pjd.sh b/qa/workunits/suites/pjd.sh new file mode 100755 index 000000000..bd72f77f2 --- /dev/null +++ b/qa/workunits/suites/pjd.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +set -e + +wget http://download.ceph.com/qa/pjd-fstest-20090130-RC-aclfixes.tgz +tar zxvf pjd*.tgz +cd pjd-fstest-20090130-RC +make clean +make +cd .. +mkdir tmp +cd tmp +# must be root! +sudo prove -r -v --exec 'bash -x' ../pjd*/tests +cd .. +rm -rf tmp pjd* + diff --git a/qa/workunits/suites/random_write.32.ffsb b/qa/workunits/suites/random_write.32.ffsb new file mode 100644 index 000000000..ba83e470f --- /dev/null +++ b/qa/workunits/suites/random_write.32.ffsb @@ -0,0 +1,48 @@ +# Large file random writes. +# 1024 files, 100MB per file. + +time=300 # 5 min +alignio=1 + +[filesystem0] + location=. + num_files=128 + min_filesize=104857600 # 100 MB + max_filesize=104857600 + reuse=1 +[end0] + +[threadgroup0] + num_threads=32 + + write_random=1 + write_weight=1 + + write_size=5242880 # 5 MB + write_blocksize=4096 + + [stats] + enable_stats=1 + enable_range=1 + + msec_range 0.00 0.01 + msec_range 0.01 0.02 + msec_range 0.02 0.05 + msec_range 0.05 0.10 + msec_range 0.10 0.20 + msec_range 0.20 0.50 + msec_range 0.50 1.00 + msec_range 1.00 2.00 + msec_range 2.00 5.00 + msec_range 5.00 10.00 + msec_range 10.00 20.00 + msec_range 20.00 50.00 + msec_range 50.00 100.00 + msec_range 100.00 200.00 + msec_range 200.00 500.00 + msec_range 500.00 1000.00 + msec_range 1000.00 2000.00 + msec_range 2000.00 5000.00 + msec_range 5000.00 10000.00 + [end] +[end0] diff --git a/qa/workunits/test_telemetry_pacific.sh b/qa/workunits/test_telemetry_pacific.sh new file mode 100755 index 000000000..a971f5883 --- /dev/null +++ b/qa/workunits/test_telemetry_pacific.sh @@ -0,0 +1,23 @@ +#!/bin/bash -ex + +# Set up ident details for cluster +ceph config set mgr mgr/telemetry/channel_ident true +ceph config set mgr mgr/telemetry/organization 'ceph-qa' +ceph config set mgr mgr/telemetry/description 'upgrade test cluster' + +# Opt-in +ceph telemetry on --license sharing-1-0 + +# Check last_opt_revision +LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision) +if [ $LAST_OPT_REVISION -ne 3 ]; then + echo "last_opt_revision is incorrect." + exit 1 +fi + +# Check reports +ceph telemetry show +ceph telemetry show-device +ceph telemetry show-all + +echo OK diff --git a/qa/workunits/test_telemetry_pacific_x.sh b/qa/workunits/test_telemetry_pacific_x.sh new file mode 100755 index 000000000..0e4a832db --- /dev/null +++ b/qa/workunits/test_telemetry_pacific_x.sh @@ -0,0 +1,59 @@ +#!/bin/bash -ex + +# Assert that we're still opted in +LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision) +if [ $LAST_OPT_REVISION -ne 3 ]; then + echo "last_opt_revision is incorrect" + exit 1 +fi + +# Check the warning: +STATUS=$(ceph -s) +if ! [[ $STATUS == *"Telemetry requires re-opt-in"* ]] +then + echo "STATUS does not contain re-opt-in warning" + exit 1 +fi + +# Check new collections +COLLECTIONS=$(ceph telemetry collection ls) +NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" "basic_rook_v01" "perf_memory_metrics") +for col in ${NEW_COLLECTIONS[@]}; do + if ! [[ $COLLECTIONS == *$col* ]]; + then + echo "COLLECTIONS does not contain" "'"$col"'." + exit 1 + fi +done + +# Run preview commands +ceph telemetry preview +ceph telemetry preview-device +ceph telemetry preview-all + +# Opt in to new collections +ceph telemetry on --license sharing-1-0 +ceph telemetry enable channel perf + +# Check the warning: +timeout=60 +STATUS=$(ceph -s) +until [[ $STATUS != *"Telemetry requires re-opt-in"* ]] || [ $timeout -le 0 ]; do + STATUS=$(ceph -s) + sleep 1 + timeout=$(( timeout - 1 )) +done +if [ $timeout -le 0 ]; then + echo "STATUS should not contain re-opt-in warning at this point" + exit 1 +fi + +# Run show commands +ceph telemetry show +ceph telemetry show-device +ceph telemetry show + +# Opt out +ceph telemetry off + +echo OK diff --git a/qa/workunits/test_telemetry_quincy.sh b/qa/workunits/test_telemetry_quincy.sh new file mode 100755 index 000000000..e8b07ec13 --- /dev/null +++ b/qa/workunits/test_telemetry_quincy.sh @@ -0,0 +1,44 @@ +#!/bin/bash -ex + +# Set up ident details for cluster +ceph config set mgr mgr/telemetry/channel_ident true +ceph config set mgr mgr/telemetry/organization 'ceph-qa' +ceph config set mgr mgr/telemetry/description 'upgrade test cluster' + + +#Run preview commands +ceph telemetry preview +ceph telemetry preview-device +ceph telemetry preview-all + +# Assert that new collections are available +COLLECTIONS=$(ceph telemetry collection ls) +NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" "basic_rook_v01" "perf_memory_metrics") +for col in ${NEW_COLLECTIONS[@]}; do + if ! [[ $COLLECTIONS == *$col* ]]; + then + echo "COLLECTIONS does not contain" "'"$col"'." + exit 1 + fi +done + +# Opt-in +ceph telemetry on --license sharing-1-0 + +# Enable perf channel +ceph telemetry enable channel perf + +# For quincy, the last_opt_revision remains at 1 since last_opt_revision +# was phased out for fresh installs of quincy. +LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision) +if [ $LAST_OPT_REVISION -ne 1 ]; then + echo "last_opt_revision is incorrect" + exit 1 +fi + +# Run show commands +ceph telemetry show +ceph telemetry show-device +ceph telemetry show-all + +echo OK diff --git a/qa/workunits/test_telemetry_quincy_x.sh b/qa/workunits/test_telemetry_quincy_x.sh new file mode 100755 index 000000000..4734132d0 --- /dev/null +++ b/qa/workunits/test_telemetry_quincy_x.sh @@ -0,0 +1,40 @@ +#!/bin/bash -ex + +# For quincy, the last_opt_revision remains at 1 since last_opt_revision +# was phased out for fresh installs of quincy. +LAST_OPT_REVISION=$(ceph config get mgr mgr/telemetry/last_opt_revision) +if [ $LAST_OPT_REVISION -ne 1 ]; then + echo "last_opt_revision is incorrect" + exit 1 +fi + +# Check the warning: +ceph -s + +COLLECTIONS=$(ceph telemetry collection ls) +NEW_COLLECTIONS=("perf_perf" "basic_mds_metadata" "basic_pool_usage" "basic_rook_v01" "perf_memory_metrics") +for col in ${NEW_COLLECTIONS[@]}; do + if ! [[ $COLLECTIONS == *$col* ]]; + then + echo "COLLECTIONS does not contain" "'"$col"'." + exit 1 + fi +done + +#Run preview commands +ceph telemetry preview +ceph telemetry preview-device +ceph telemetry preview-all + +# Opt in to new collections +# Currently, no new collections between latest quincy and reef (dev) + +# Run show commands +ceph telemetry show +ceph telemetry show-device +ceph telemetry show + +# Opt out +ceph telemetry off + +echo OK diff --git a/qa/workunits/true.sh b/qa/workunits/true.sh new file mode 100755 index 000000000..296ef781c --- /dev/null +++ b/qa/workunits/true.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +true diff --git a/qa/workunits/windows/libvirt_vm/autounattend.xml b/qa/workunits/windows/libvirt_vm/autounattend.xml new file mode 100644 index 000000000..c3cdf3171 --- /dev/null +++ b/qa/workunits/windows/libvirt_vm/autounattend.xml @@ -0,0 +1,157 @@ +<?xml version="1.0" encoding="utf-8"?> +<unattend xmlns="urn:schemas-microsoft-com:unattend"> + + <settings pass="windowsPE"> + + <component name="Microsoft-Windows-International-Core-WinPE" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS" xmlns:wcm="http://schemas.microsoft.com/WMIConfig/2002/State" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <SetupUILanguage> + <UILanguage>en-US</UILanguage> + </SetupUILanguage> + <SystemLocale>en-US</SystemLocale> + <UILanguage>en-US</UILanguage> + <UserLocale>en-US</UserLocale> + </component> + + <component name="Microsoft-Windows-Setup" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS" xmlns:wcm="http://schemas.microsoft.com/WMIConfig/2002/State" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + + <DiskConfiguration> + <WillShowUI>OnError</WillShowUI> + <Disk wcm:action="add"> + <CreatePartitions> + <CreatePartition wcm:action="add"> + <Order>1</Order> + <Size>100</Size> + <Type>Primary</Type> + </CreatePartition> + <CreatePartition wcm:action="add"> + <Order>2</Order> + <Extend>true</Extend> + <Type>Primary</Type> + </CreatePartition> + </CreatePartitions> + <ModifyPartitions> + <ModifyPartition wcm:action="add"> + <Active>true</Active> + <Label>Boot</Label> + <Format>NTFS</Format> + <Order>1</Order> + <PartitionID>1</PartitionID> + </ModifyPartition> + <ModifyPartition wcm:action="add"> + <Format>NTFS</Format> + <Order>2</Order> + <PartitionID>2</PartitionID> + <Label>System</Label> + </ModifyPartition> + </ModifyPartitions> + <DiskID>0</DiskID> + <WillWipeDisk>true</WillWipeDisk> + </Disk> + </DiskConfiguration> + + <ImageInstall> + <OSImage> + <InstallTo> + <PartitionID>2</PartitionID> + <DiskID>0</DiskID> + </InstallTo> + <InstallToAvailablePartition>false</InstallToAvailablePartition> + <WillShowUI>OnError</WillShowUI> + <InstallFrom> + <MetaData wcm:action="add"> + <Key>/IMAGE/NAME</Key> + <Value>Windows Server 2019 SERVERSTANDARDCORE</Value> + </MetaData> + </InstallFrom> + </OSImage> + </ImageInstall> + + <UserData> + <!-- Product Key from http://technet.microsoft.com/en-us/library/jj612867.aspx --> + <ProductKey> + <!-- Do not uncomment the Key element if you are using trial ISOs --> + <!-- You must uncomment the Key element (and optionally insert your own key) if you are using retail or volume license ISOs --> + <!-- <Key></Key> --> + <WillShowUI>OnError</WillShowUI> + </ProductKey> + <AcceptEula>true</AcceptEula> + </UserData> + + </component> + + <component name="Microsoft-Windows-PnpCustomizationsWinPE" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS" xmlns:wcm="http://schemas.microsoft.com/WMIConfig/2002/State" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <DriverPaths> + <PathAndCredentials wcm:action="add" wcm:keyValue="1"> + <Path>E:\NetKVM\2k19\amd64\</Path> + </PathAndCredentials> + <PathAndCredentials wcm:action="add" wcm:keyValue="2"> + <Path>E:\viostor\2k19\amd64\</Path> + </PathAndCredentials> + <PathAndCredentials wcm:action="add" wcm:keyValue="3"> + <Path>E:\vioserial\2k19\amd64\</Path> + </PathAndCredentials> + </DriverPaths> + </component> + + </settings> + + <settings pass="oobeSystem"> + <component name="Microsoft-Windows-Shell-Setup" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS" xmlns:wcm="http://schemas.microsoft.com/WMIConfig/2002/State" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + + <VisualEffects> + <FontSmoothing>ClearType</FontSmoothing> + </VisualEffects> + + <UserAccounts> + <!-- + Password to be used only during initial provisioning. + Must be reset with final Sysprep. + --> + <AdministratorPassword> + <Value>Passw0rd</Value> + <PlainText>true</PlainText> + </AdministratorPassword> + </UserAccounts> + + <AutoLogon> + <Password> + <Value>Passw0rd</Value> + <PlainText>true</PlainText> + </Password> + <Enabled>true</Enabled> + <Username>Administrator</Username> + </AutoLogon> + + <ComputerName>*</ComputerName> + + <OOBE> + <NetworkLocation>Work</NetworkLocation> + <HideEULAPage>true</HideEULAPage> + <ProtectYourPC>3</ProtectYourPC> + <SkipMachineOOBE>true</SkipMachineOOBE> + <SkipUserOOBE>true</SkipUserOOBE> + </OOBE> + + <FirstLogonCommands> + + <SynchronousCommand wcm:action="add"> + <CommandLine>%SystemRoot%\System32\WindowsPowerShell\v1.0\powershell -NoLogo -NonInteractive -ExecutionPolicy RemoteSigned -File A:\first-logon.ps1</CommandLine> + <Order>1</Order> + </SynchronousCommand> + + </FirstLogonCommands> + + </component> + + </settings> + + <settings pass="specialize"> + + <component name="Microsoft-Windows-Shell-Setup" processorArchitecture="amd64" publicKeyToken="31bf3856ad364e35" language="neutral" versionScope="nonSxS" xmlns:wcm="http://schemas.microsoft.com/WMIConfig/2002/State" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <TimeZone>UTC</TimeZone> + <ComputerName>*</ComputerName> + </component> + + </settings> + +</unattend> diff --git a/qa/workunits/windows/libvirt_vm/first-logon.ps1 b/qa/workunits/windows/libvirt_vm/first-logon.ps1 new file mode 100644 index 000000000..654b836bb --- /dev/null +++ b/qa/workunits/windows/libvirt_vm/first-logon.ps1 @@ -0,0 +1,42 @@ +$ErrorActionPreference = "Stop" + +. "${PSScriptRoot}\utils.ps1" + +$VIRTIO_WIN_PATH = "E:\" + +# Install QEMU quest agent +Write-Output "Installing QEMU guest agent" +$p = Start-Process -FilePath "msiexec.exe" -ArgumentList @("/i", "${VIRTIO_WIN_PATH}\guest-agent\qemu-ga-x86_64.msi", "/qn") -NoNewWindow -PassThru -Wait +if($p.ExitCode) { + Throw "The QEMU guest agent installation failed. Exit code: $($p.ExitCode)" +} +Write-Output "Successfully installed QEMU guest agent" + +# Install OpenSSH server +Start-ExecuteWithRetry { + Get-WindowsCapability -Online -Name OpenSSH* | Add-WindowsCapability -Online +} + +# Start OpenSSH server +Set-Service -Name "sshd" -StartupType Automatic +Start-Service -Name "sshd" + +# Set PowerShell as default SSH shell +New-ItemProperty -PropertyType String -Force -Name DefaultShell -Path "HKLM:\SOFTWARE\OpenSSH" -Value (Get-Command powershell.exe).Source + +# Create SSH firewall rule +New-NetFirewallRule -Name "sshd" -DisplayName 'OpenSSH Server (sshd)' -Enabled True -Direction Inbound -Protocol TCP -Action Allow -LocalPort 22 + +# Authorize the SSH key +$authorizedKeysFile = Join-Path $env:ProgramData "ssh\administrators_authorized_keys" +Set-Content -Path $authorizedKeysFile -Value (Get-Content "${PSScriptRoot}\id_rsa.pub") -Encoding ascii +$acl = Get-Acl $authorizedKeysFile +$acl.SetAccessRuleProtection($true, $false) +$administratorsRule = New-Object system.security.accesscontrol.filesystemaccessrule("Administrators", "FullControl", "Allow") +$systemRule = New-Object system.security.accesscontrol.filesystemaccessrule("SYSTEM", "FullControl", "Allow") +$acl.SetAccessRule($administratorsRule) +$acl.SetAccessRule($systemRule) +$acl | Set-Acl + +# Reboot the machine to complete first logon process +Restart-Computer -Force -Confirm:$false diff --git a/qa/workunits/windows/libvirt_vm/setup.ps1 b/qa/workunits/windows/libvirt_vm/setup.ps1 new file mode 100644 index 000000000..550fb274e --- /dev/null +++ b/qa/workunits/windows/libvirt_vm/setup.ps1 @@ -0,0 +1,43 @@ +$ErrorActionPreference = "Stop" +$ProgressPreference = "SilentlyContinue" + +$PYTHON3_URL = "https://www.python.org/ftp/python/3.10.4/python-3.10.4-amd64.exe" +$FIO_URL = "https://bsdio.com/fio/releases/fio-3.27-x64.msi" +$VC_REDIST_URL = "https://aka.ms/vs/17/release/vc_redist.x64.exe" + +. "${PSScriptRoot}\utils.ps1" + +function Install-VCRedist { + Write-Output "Installing Visual Studio Redistributable x64" + Install-Tool -URL $VC_REDIST_URL -Params @("/quiet", "/norestart") + Write-Output "Successfully installed Visual Studio Redistributable x64" +} + +function Install-Python3 { + Write-Output "Installing Python3" + Install-Tool -URL $PYTHON3_URL -Params @("/quiet", "InstallAllUsers=1", "PrependPath=1") + Add-ToPathEnvVar -Path @("${env:ProgramFiles}\Python310\", "${env:ProgramFiles}\Python310\Scripts\") + Write-Output "Installing pip dependencies" + Start-ExecuteWithRetry { + Invoke-CommandLine "pip3.exe" "install prettytable" + } + Write-Output "Successfully installed Python3" +} + +function Install-FIO { + Write-Output "Installing FIO" + Install-Tool -URL $FIO_URL -Params @("/qn", "/l*v", "$env:TEMP\fio-install.log", "/norestart") + Write-Output "Successfully installed FIO" +} + +Install-VCRedist +Install-Python3 +Install-FIO + +# Pre-append WNBD and Ceph to PATH +Add-ToPathEnvVar -Path @( + "${env:SystemDrive}\wnbd\binaries", + "${env:SystemDrive}\ceph") + +# This will refresh the PATH for new SSH sessions +Restart-Service -Force -Name "sshd" diff --git a/qa/workunits/windows/libvirt_vm/setup.sh b/qa/workunits/windows/libvirt_vm/setup.sh new file mode 100755 index 000000000..51e91ec42 --- /dev/null +++ b/qa/workunits/windows/libvirt_vm/setup.sh @@ -0,0 +1,162 @@ +#!/usr/bin/env bash +set -ex + +WINDOWS_SERVER_2019_ISO_URL=${WINDOWS_SERVER_2019_ISO_URL:-"https://software-download.microsoft.com/download/pr/17763.737.190906-2324.rs5_release_svc_refresh_SERVER_EVAL_x64FRE_en-us_1.iso"} +VIRTIO_WIN_ISO_URL=${VIRTIO_WIN_ISO_URL:-"https://fedorapeople.org/groups/virt/virtio-win/direct-downloads/stable-virtio/virtio-win.iso"} + +DIR="$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)" + +# Use build_utils.sh from ceph-build +curl --retry-max-time 30 --retry 10 -L -o ${DIR}/build_utils.sh https://raw.githubusercontent.com/ceph/ceph-build/main/scripts/build_utils.sh +source ${DIR}/build_utils.sh + +# Helper function to restart the Windows VM +function restart_windows_vm() { + echo "Restarting Windows VM" + ssh_exec "cmd.exe /c 'shutdown.exe /r /t 0 & sc.exe stop sshd'" + SECONDS=0 + TIMEOUT=${1:-600} + while true; do + if [[ $SECONDS -gt $TIMEOUT ]]; then + echo "Timeout waiting for the VM to start" + exit 1 + fi + ssh_exec hostname || { + echo "Cannot execute SSH commands yet" + sleep 10 + continue + } + break + done + echo "Windows VM restarted" +} + +# Install libvirt with KVM +retrycmd_if_failure 5 0 5m sudo apt-get update +retrycmd_if_failure 5 0 10m sudo apt-get install -y qemu-kvm libvirt-daemon-system libvirt-clients virtinst + +# Download ISO images +echo "Downloading virtio-win ISO" +retrycmd_if_failure 5 0 30m curl -C - -L $VIRTIO_WIN_ISO_URL -o ${DIR}/virtio-win.iso +echo "Downloading Windows Server 2019 ISO" +retrycmd_if_failure 5 0 60m curl -C - -L $WINDOWS_SERVER_2019_ISO_URL -o ${DIR}/windows-server-2019.iso + +# Create virtual floppy image with the unattended instructions to install Windows Server 2019 +echo "Creating floppy image" +qemu-img create -f raw ${DIR}/floppy.img 1440k +mkfs.msdos -s 1 ${DIR}/floppy.img +mkdir ${DIR}/floppy +sudo mount ${DIR}/floppy.img ${DIR}/floppy +ssh-keygen -b 2048 -t rsa -f ${DIR}/id_rsa -q -N "" +sudo cp \ + ${DIR}/autounattend.xml \ + ${DIR}/first-logon.ps1 \ + ${DIR}/id_rsa.pub \ + ${DIR}/utils.ps1 \ + ${DIR}/setup.ps1 \ + ${DIR}/floppy/ +sudo umount ${DIR}/floppy +rmdir ${DIR}/floppy + +echo "Starting libvirt VM" +qemu-img create -f qcow2 ${DIR}/ceph-win-ltsc2019.qcow2 50G +VM_NAME="ceph-win-ltsc2019" +sudo virt-install \ + --name $VM_NAME \ + --os-variant win2k19 \ + --boot hd,cdrom \ + --virt-type kvm \ + --graphics spice \ + --cpu host \ + --vcpus 4 \ + --memory 4096 \ + --disk ${DIR}/floppy.img,device=floppy \ + --disk ${DIR}/ceph-win-ltsc2019.qcow2,bus=virtio \ + --disk ${DIR}/windows-server-2019.iso,device=cdrom \ + --disk ${DIR}/virtio-win.iso,device=cdrom \ + --network network=default,model=virtio \ + --controller type=virtio-serial \ + --channel unix,target_type=virtio,name=org.qemu.guest_agent.0 \ + --noautoconsol + +export SSH_USER="administrator" +export SSH_KNOWN_HOSTS_FILE="${DIR}/known_hosts" +export SSH_KEY="${DIR}/id_rsa" + +SECONDS=0 +TIMEOUT=1800 +SLEEP_SECS=30 +while true; do + if [[ $SECONDS -gt $TIMEOUT ]]; then + echo "Timeout waiting for the VM to start" + exit 1 + fi + VM_IP=$(sudo virsh domifaddr --source agent --interface Ethernet --full $VM_NAME | grep ipv4 | awk '{print $4}' | cut -d '/' -f1) || { + echo "Retrying in $SLEEP_SECS seconds" + sleep $SLEEP_SECS + continue + } + ssh-keyscan -H $VM_IP &> $SSH_KNOWN_HOSTS_FILE || { + echo "SSH is not reachable yet" + sleep $SLEEP_SECS + continue + } + SSH_ADDRESS=$VM_IP ssh_exec hostname || { + echo "Cannot execute SSH commands yet" + sleep $SLEEP_SECS + continue + } + break +done +export SSH_ADDRESS=$VM_IP + +scp_upload ${DIR}/utils.ps1 /utils.ps1 +scp_upload ${DIR}/setup.ps1 /setup.ps1 +SSH_TIMEOUT=1h ssh_exec /setup.ps1 + +cd $DIR + +# Get the helper script to download Chacra builds +retrycmd_if_failure 10 5 1m curl -L -o ./get-chacra-bin.py https://raw.githubusercontent.com/ceph/ceph-win32-tests/main/get-bin.py +chmod +x ./get-chacra-bin.py + +# Download latest WNBD build from Chacra +retrycmd_if_failure 10 0 10m ./get-chacra-bin.py --project wnbd --filename wnbd.zip +scp_upload wnbd.zip /wnbd.zip +ssh_exec tar.exe xzvf /wnbd.zip -C / + +# Install WNBD driver +ssh_exec Import-Certificate -FilePath /wnbd/driver/wnbd.cer -Cert Cert:\\LocalMachine\\Root +ssh_exec Import-Certificate -FilePath /wnbd/driver/wnbd.cer -Cert Cert:\\LocalMachine\\TrustedPublisher +ssh_exec /wnbd/binaries/wnbd-client.exe install-driver /wnbd/driver/wnbd.inf +restart_windows_vm +ssh_exec wnbd-client.exe -v + +# Download Ceph Windows build from Chacra +CEPH_REPO_FILE="/etc/apt/sources.list.d/ceph.list" +PROJECT=$(cat $CEPH_REPO_FILE | cut -d ' ' -f3 | tr '\/', ' ' | awk '{print $4}') +BRANCH=$(cat $CEPH_REPO_FILE | cut -d ' ' -f3 | tr '\/', ' ' | awk '{print $5}') +SHA1=$(cat $CEPH_REPO_FILE | cut -d ' ' -f3 | tr '\/', ' ' | awk '{print $6}') +retrycmd_if_failure 10 0 10m ./get-chacra-bin.py --project $PROJECT --branchname $BRANCH --sha1 $SHA1 --filename ceph.zip + +# Install Ceph on Windows +SSH_TIMEOUT=5m scp_upload ./ceph.zip /ceph.zip +SSH_TIMEOUT=10m ssh_exec tar.exe xzvf /ceph.zip -C / +ssh_exec "New-Service -Name ceph-rbd -BinaryPathName 'c:\ceph\rbd-wnbd.exe service'" +ssh_exec Start-Service -Name ceph-rbd +ssh_exec rbd.exe -v + +# Setup Ceph configs and directories +ssh_exec mkdir -force /etc/ceph, /var/run/ceph, /var/log/ceph +for i in $(ls /etc/ceph); do + scp_upload /etc/ceph/$i /etc/ceph/$i +done + +cat << EOF > ${DIR}/connection_info.sh +export SSH_USER="${SSH_USER}" +export SSH_KNOWN_HOSTS_FILE="${SSH_KNOWN_HOSTS_FILE}" +export SSH_KEY="${SSH_KEY}" +export SSH_ADDRESS="${SSH_ADDRESS}" +EOF + +echo "Windows Server 2019 libvirt testing VM is ready" diff --git a/qa/workunits/windows/libvirt_vm/utils.ps1 b/qa/workunits/windows/libvirt_vm/utils.ps1 new file mode 100644 index 000000000..f29ab79f5 --- /dev/null +++ b/qa/workunits/windows/libvirt_vm/utils.ps1 @@ -0,0 +1,130 @@ +function Invoke-CommandLine { + Param( + [Parameter(Mandatory=$true)] + [String]$Command, + [String]$Arguments, + [Int[]]$AllowedExitCodes=@(0) + ) + & $Command $Arguments.Split(" ") + if($LASTEXITCODE -notin $AllowedExitCodes) { + Throw "$Command $Arguments returned a non zero exit code ${LASTEXITCODE}." + } +} + +function Start-ExecuteWithRetry { + Param( + [Parameter(Mandatory=$true)] + [ScriptBlock]$ScriptBlock, + [Int]$MaxRetryCount=10, + [Int]$RetryInterval=3, + [String]$RetryMessage, + [Array]$ArgumentList=@() + ) + $currentErrorActionPreference = $ErrorActionPreference + $ErrorActionPreference = "Continue" + $retryCount = 0 + while ($true) { + try { + $res = Invoke-Command -ScriptBlock $ScriptBlock -ArgumentList $ArgumentList + $ErrorActionPreference = $currentErrorActionPreference + return $res + } catch [System.Exception] { + $retryCount++ + if ($retryCount -gt $MaxRetryCount) { + $ErrorActionPreference = $currentErrorActionPreference + Throw $_ + } else { + $prefixMsg = "Retry(${retryCount}/${MaxRetryCount})" + if($RetryMessage) { + Write-Host "${prefixMsg} - $RetryMessage" + } elseif($_) { + Write-Host "${prefixMsg} - $($_.ToString())" + } + Start-Sleep $RetryInterval + } + } + } +} + +function Start-FileDownload { + Param( + [Parameter(Mandatory=$true)] + [String]$URL, + [Parameter(Mandatory=$true)] + [String]$Destination, + [Int]$RetryCount=10 + ) + Write-Output "Downloading $URL to $Destination" + Start-ExecuteWithRetry ` + -ScriptBlock { Invoke-CommandLine -Command "curl.exe" -Arguments "-L -s -o $Destination $URL" } ` + -MaxRetryCount $RetryCount ` + -RetryMessage "Failed to download '${URL}'. Retrying" + Write-Output "Successfully downloaded." +} + +function Add-ToPathEnvVar { + Param( + [Parameter(Mandatory=$true)] + [String[]]$Path, + [Parameter(Mandatory=$false)] + [ValidateSet([System.EnvironmentVariableTarget]::User, [System.EnvironmentVariableTarget]::Machine)] + [System.EnvironmentVariableTarget]$Target=[System.EnvironmentVariableTarget]::Machine + ) + $pathEnvVar = [Environment]::GetEnvironmentVariable("PATH", $Target).Split(';') + $currentSessionPath = $env:PATH.Split(';') + foreach($p in $Path) { + if($p -notin $pathEnvVar) { + $pathEnvVar += $p + } + if($p -notin $currentSessionPath) { + $currentSessionPath += $p + } + } + $env:PATH = $currentSessionPath -join ';' + $newPathEnvVar = $pathEnvVar -join ';' + [Environment]::SetEnvironmentVariable("PATH", $newPathEnvVar, $Target) +} + +function Install-Tool { + [CmdletBinding(DefaultParameterSetName = "URL")] + Param( + [Parameter(Mandatory=$true, ParameterSetName = "URL")] + [String]$URL, + [Parameter(Mandatory=$true, ParameterSetName = "LocalPath")] + [String]$LocalPath, + [Parameter(ParameterSetName = "URL")] + [Parameter(ParameterSetName = "LocalPath")] + [String[]]$Params=@(), + [Parameter(ParameterSetName = "URL")] + [Parameter(ParameterSetName = "LocalPath")] + [Int[]]$AllowedExitCodes=@(0) + ) + PROCESS { + $installerPath = $LocalPath + if($PSCmdlet.ParameterSetName -eq "URL") { + $installerPath = Join-Path $env:TEMP $URL.Split('/')[-1] + Start-FileDownload -URL $URL -Destination $installerPath + } + Write-Output "Installing ${installerPath}" + $kwargs = @{ + "FilePath" = $installerPath + "ArgumentList" = $Params + "NoNewWindow" = $true + "PassThru" = $true + "Wait" = $true + } + if((Get-ChildItem $installerPath).Extension -eq '.msi') { + $kwargs["FilePath"] = "msiexec.exe" + $kwargs["ArgumentList"] = @("/i", $installerPath) + $Params + } + $p = Start-Process @kwargs + if($p.ExitCode -notin $AllowedExitCodes) { + Throw "Installation failed. Exit code: $($p.ExitCode)" + } + if($PSCmdlet.ParameterSetName -eq "URL") { + Start-ExecuteWithRetry ` + -ScriptBlock { Remove-Item -Force -Path $installerPath -ErrorAction Stop } ` + -RetryMessage "Failed to remove ${installerPath}. Retrying" + } + } +} diff --git a/qa/workunits/windows/run-tests.ps1 b/qa/workunits/windows/run-tests.ps1 new file mode 100644 index 000000000..6d818f426 --- /dev/null +++ b/qa/workunits/windows/run-tests.ps1 @@ -0,0 +1,29 @@ +$ProgressPreference = "SilentlyContinue" +$ErrorActionPreference = "Stop" + +$scriptLocation = [System.IO.Path]::GetDirectoryName( + $myInvocation.MyCommand.Definition) + +$testRbdWnbd = "$scriptLocation/test_rbd_wnbd.py" + +function safe_exec() { + # Powershell doesn't check the command exit code, we'll need to + # do it ourselves. Also, in case of native commands, it treats stderr + # output as an exception, which is why we'll have to capture it. + cmd /c "$args 2>&1" + if ($LASTEXITCODE) { + throw "Command failed: $args" + } +} + +safe_exec python.exe $testRbdWnbd --test-name RbdTest --iterations 100 +safe_exec python.exe $testRbdWnbd --test-name RbdFioTest --iterations 100 +safe_exec python.exe $testRbdWnbd --test-name RbdStampTest --iterations 100 + +# It can take a while to setup the partition (~10s), we'll use fewer iterations. +safe_exec python.exe $testRbdWnbd --test-name RbdFsTest --iterations 4 +safe_exec python.exe $testRbdWnbd --test-name RbdFsFioTest --iterations 4 +safe_exec python.exe $testRbdWnbd --test-name RbdFsStampTest --iterations 4 + +safe_exec python.exe $testRbdWnbd ` + --test-name RbdResizeFioTest --image-size-mb 64 diff --git a/qa/workunits/windows/run-tests.sh b/qa/workunits/windows/run-tests.sh new file mode 100644 index 000000000..b582491c5 --- /dev/null +++ b/qa/workunits/windows/run-tests.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +set -ex + +DIR="$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)" + +source ${DIR}/libvirt_vm/build_utils.sh +source ${DIR}/libvirt_vm/connection_info.sh + +# Run the Windows tests +scp_upload ${DIR} /windows-workunits +SSH_TIMEOUT=30m ssh_exec powershell.exe -File /windows-workunits/run-tests.ps1 diff --git a/qa/workunits/windows/test_rbd_wnbd.py b/qa/workunits/windows/test_rbd_wnbd.py new file mode 100644 index 000000000..f22810e2e --- /dev/null +++ b/qa/workunits/windows/test_rbd_wnbd.py @@ -0,0 +1,919 @@ +import argparse +import collections +import functools +import json +import logging +import math +import os +import prettytable +import random +import subprocess +import time +import threading +import typing +import uuid +from concurrent import futures + +LOG = logging.getLogger() + +parser = argparse.ArgumentParser(description='rbd-wnbd tests') +parser.add_argument('--test-name', + help='The test to be run.', + default="RbdFioTest") +parser.add_argument('--iterations', + help='Total number of test iterations', + default=1, type=int) +parser.add_argument('--concurrency', + help='The number of tests to run in parallel', + default=4, type=int) +parser.add_argument('--fio-iterations', + help='Total number of benchmark iterations per disk.', + default=1, type=int) +parser.add_argument('--fio-workers', + help='Total number of fio workers per disk.', + default=1, type=int) +parser.add_argument('--fio-depth', + help='The number of concurrent asynchronous operations ' + 'executed per disk', + default=64, type=int) +parser.add_argument('--fio-verify', + help='The mechanism used to validate the written ' + 'data. Examples: crc32c, md5, sha1, null, etc. ' + 'If set to null, the written data will not be ' + 'verified.', + default='crc32c') +parser.add_argument('--bs', + help='Benchmark block size.', + default="2M") +parser.add_argument('--op', + help='Benchmark operation. ' + 'Examples: read, randwrite, rw, etc.', + default="rw") +parser.add_argument('--image-prefix', + help='The image name prefix.', + default="cephTest-") +parser.add_argument('--image-size-mb', + help='The image size in megabytes.', + default=1024, type=int) +parser.add_argument('--map-timeout', + help='Image map timeout.', + default=60, type=int) +parser.add_argument('--skip-enabling-disk', action='store_true', + help='If set, the disk will not be turned online and the ' + 'read-only flag will not be removed. Useful when ' + 'the SAN policy is set to "onlineAll".') +parser.add_argument('--verbose', action='store_true', + help='Print info messages.') +parser.add_argument('--debug', action='store_true', + help='Print debug messages.') +parser.add_argument('--stop-on-error', action='store_true', + help='Stop testing when hitting errors.') +parser.add_argument('--skip-cleanup-on-error', action='store_true', + help='Skip cleanup when hitting errors.') + + +class CephTestException(Exception): + msg_fmt = "An exception has been encountered." + + def __init__(self, message: str = None, **kwargs): + self.kwargs = kwargs + if not message: + message = self.msg_fmt % kwargs + self.message = message + super(CephTestException, self).__init__(message) + + +class CommandFailed(CephTestException): + msg_fmt = ( + "Command failed: %(command)s. " + "Return code: %(returncode)s. " + "Stdout: %(stdout)s. Stderr: %(stderr)s.") + + +class CephTestTimeout(CephTestException): + msg_fmt = "Operation timeout." + + +def setup_logging(log_level: int = logging.INFO): + handler = logging.StreamHandler() + handler.setLevel(log_level) + + log_fmt = '[%(asctime)s] %(levelname)s - %(message)s' + formatter = logging.Formatter(log_fmt) + handler.setFormatter(formatter) + + LOG.addHandler(handler) + LOG.setLevel(logging.DEBUG) + + +def retry_decorator(timeout: int = 60, + retry_interval: int = 2, + silent_interval: int = 10, + additional_details: str = "", + retried_exceptions: + typing.Union[ + typing.Type[Exception], + collections.abc.Iterable[ + typing.Type[Exception]]] = Exception): + def wrapper(f: typing.Callable[..., typing.Any]): + @functools.wraps(f) + def inner(*args, **kwargs): + tstart: float = time.time() + elapsed: float = 0 + exc = None + details = additional_details or "%s failed" % f.__qualname__ + + while elapsed < timeout or not timeout: + try: + return f(*args, **kwargs) + except retried_exceptions as ex: + exc = ex + elapsed = time.time() - tstart + if elapsed > silent_interval: + level = logging.WARNING + else: + level = logging.DEBUG + LOG.log(level, + "Exception: %s. Additional details: %s. " + "Time elapsed: %d. Timeout: %d", + ex, details, elapsed, timeout) + + time.sleep(retry_interval) + elapsed = time.time() - tstart + + msg = ( + "Operation timed out. Exception: %s. Additional details: %s. " + "Time elapsed: %d. Timeout: %d.") + raise CephTestTimeout( + msg % (exc, details, elapsed, timeout)) + return inner + return wrapper + + +def execute(*args, **kwargs): + LOG.debug("Executing: %s", args) + result = subprocess.run( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs) + LOG.debug("Command %s returned %d.", args, result.returncode) + if result.returncode: + exc = CommandFailed( + command=args, returncode=result.returncode, + stdout=result.stdout, stderr=result.stderr) + LOG.error(exc) + raise exc + return result + + +def ps_execute(*args, **kwargs): + # Disable PS progress bar, causes issues when invoked remotely. + prefix = "$global:ProgressPreference = 'SilentlyContinue' ; " + return execute( + "powershell.exe", "-NonInteractive", + "-Command", prefix, *args, **kwargs) + + +def array_stats(array: list): + mean = sum(array) / len(array) if len(array) else 0 + variance = (sum((i - mean) ** 2 for i in array) / len(array) + if len(array) else 0) + std_dev = math.sqrt(variance) + sorted_array = sorted(array) + + return { + 'min': min(array) if len(array) else 0, + 'max': max(array) if len(array) else 0, + 'sum': sum(array) if len(array) else 0, + 'mean': mean, + 'median': sorted_array[len(array) // 2] if len(array) else 0, + 'max_90': sorted_array[int(len(array) * 0.9)] if len(array) else 0, + 'min_90': sorted_array[int(len(array) * 0.1)] if len(array) else 0, + 'variance': variance, + 'std_dev': std_dev, + 'count': len(array) + } + + +class Tracer: + data: collections.OrderedDict = collections.OrderedDict() + lock = threading.Lock() + + @classmethod + def trace(cls, func): + def wrapper(*args, **kwargs): + tstart = time.time() + exc_str = None + + # Preserve call order + with cls.lock: + if func.__qualname__ not in cls.data: + cls.data[func.__qualname__] = list() + + try: + return func(*args, **kwargs) + except Exception as exc: + exc_str = str(exc) + raise + finally: + tend = time.time() + + with cls.lock: + cls.data[func.__qualname__] += [{ + "duration": tend - tstart, + "error": exc_str, + }] + + return wrapper + + @classmethod + def get_results(cls): + stats = collections.OrderedDict() + for f in cls.data.keys(): + stats[f] = array_stats([i['duration'] for i in cls.data[f]]) + errors = [] + for i in cls.data[f]: + if i['error']: + errors.append(i['error']) + + stats[f]['errors'] = errors + return stats + + @classmethod + def print_results(cls): + r = cls.get_results() + + table = prettytable.PrettyTable(title="Duration (s)") + table.field_names = [ + "function", "min", "max", "total", + "mean", "median", "std_dev", + "max 90%", "min 90%", "count", "errors"] + table.float_format = ".4" + for f, s in r.items(): + table.add_row([f, s['min'], s['max'], s['sum'], + s['mean'], s['median'], s['std_dev'], + s['max_90'], s['min_90'], + s['count'], len(s['errors'])]) + print(table) + + +class RbdImage(object): + def __init__(self, + name: str, + size_mb: int, + is_shared: bool = True, + disk_number: int = -1, + mapped: bool = False): + self.name = name + self.size_mb = size_mb + self.is_shared = is_shared + self.disk_number = disk_number + self.mapped = mapped + self.removed = False + self.drive_letter = "" + + @classmethod + @Tracer.trace + def create(cls, + name: str, + size_mb: int = 1024, + is_shared: bool = True): + LOG.info("Creating image: %s. Size: %s.", name, "%sM" % size_mb) + cmd = ["rbd", "create", name, "--size", "%sM" % size_mb] + if is_shared: + cmd += ["--image-shared"] + execute(*cmd) + + return RbdImage(name, size_mb, is_shared) + + @Tracer.trace + def get_disk_number(self, + timeout: int = 60, + retry_interval: int = 2): + @retry_decorator( + retried_exceptions=CephTestException, + timeout=timeout, + retry_interval=retry_interval) + def _get_disk_number(): + LOG.info("Retrieving disk number: %s", self.name) + + result = execute("rbd-wnbd", "show", self.name, "--format=json") + disk_info = json.loads(result.stdout) + disk_number = disk_info["disk_number"] + if disk_number > 0: + LOG.debug("Image %s disk number: %d", self.name, disk_number) + return disk_number + + raise CephTestException( + f"Could not get disk number: {self.name}.") + + return _get_disk_number() + + @Tracer.trace + def _wait_for_disk(self, + timeout: int = 60, + retry_interval: int = 2): + @retry_decorator( + retried_exceptions=(FileNotFoundError, OSError), + additional_details="the mapped disk isn't available yet", + timeout=timeout, + retry_interval=retry_interval) + def wait_for_disk(): + LOG.debug("Waiting for disk to be accessible: %s %s", + self.name, self.path) + + with open(self.path, 'rb'): + pass + + return wait_for_disk() + + @property + def path(self): + return f"\\\\.\\PhysicalDrive{self.disk_number}" + + @Tracer.trace + @retry_decorator(additional_details="couldn't clear disk read-only flag") + def set_writable(self): + ps_execute( + "Set-Disk", "-Number", str(self.disk_number), + "-IsReadOnly", "$false") + + @Tracer.trace + @retry_decorator(additional_details="couldn't bring the disk online") + def set_online(self): + ps_execute( + "Set-Disk", "-Number", str(self.disk_number), + "-IsOffline", "$false") + + @Tracer.trace + def map(self, timeout: int = 60): + LOG.info("Mapping image: %s", self.name) + tstart = time.time() + + execute("rbd-wnbd", "map", self.name) + self.mapped = True + + self.disk_number = self.get_disk_number(timeout=timeout) + + elapsed = time.time() - tstart + self._wait_for_disk(timeout=timeout - elapsed) + + @Tracer.trace + def unmap(self): + if self.mapped: + LOG.info("Unmapping image: %s", self.name) + execute("rbd-wnbd", "unmap", self.name) + self.mapped = False + + @Tracer.trace + def remove(self): + if not self.removed: + LOG.info("Removing image: %s", self.name) + execute("rbd", "rm", self.name) + self.removed = True + + def cleanup(self): + try: + self.unmap() + finally: + self.remove() + + @Tracer.trace + @retry_decorator() + def _init_disk(self): + cmd = f"Get-Disk -Number {self.disk_number} | Initialize-Disk" + ps_execute(cmd) + + @Tracer.trace + @retry_decorator() + def _create_partition(self): + cmd = (f"Get-Disk -Number {self.disk_number} | " + "New-Partition -AssignDriveLetter -UseMaximumSize") + ps_execute(cmd) + + @Tracer.trace + @retry_decorator() + def _format_volume(self): + cmd = ( + f"(Get-Partition -DiskNumber {self.disk_number}" + " | ? { $_.DriveLetter }) | Format-Volume -Force -Confirm:$false") + ps_execute(cmd) + + @Tracer.trace + @retry_decorator() + def _get_drive_letter(self): + cmd = (f"(Get-Partition -DiskNumber {self.disk_number}" + " | ? { $_.DriveLetter }).DriveLetter") + result = ps_execute(cmd) + + # The PowerShell command will place a null character if no drive letter + # is available. For example, we can receive "\x00\r\n". + self.drive_letter = result.stdout.decode().strip() + if not self.drive_letter.isalpha() or len(self.drive_letter) != 1: + raise CephTestException( + "Invalid drive letter received: %s" % self.drive_letter) + + @Tracer.trace + def init_fs(self): + if not self.mapped: + raise CephTestException("Unable to create fs, image not mapped.") + + LOG.info("Initializing fs, image: %s.", self.name) + + self._init_disk() + self._create_partition() + self._format_volume() + self._get_drive_letter() + + @Tracer.trace + def get_fs_capacity(self): + if not self.drive_letter: + raise CephTestException("No drive letter available") + + cmd = f"(Get-Volume -DriveLetter {self.drive_letter}).Size" + result = ps_execute(cmd) + + return int(result.stdout.decode().strip()) + + @Tracer.trace + def resize(self, new_size_mb, allow_shrink=False): + LOG.info( + "Resizing image: %s. New size: %s MB, old size: %s MB", + self.name, new_size_mb, self.size_mb) + + cmd = ["rbd", "resize", self.name, + "--size", f"{new_size_mb}M", "--no-progress"] + if allow_shrink: + cmd.append("--allow-shrink") + + execute(*cmd) + + self.size_mb = new_size_mb + + @Tracer.trace + def get_disk_size(self): + """Retrieve the virtual disk size (bytes) reported by Windows.""" + cmd = f"(Get-Disk -Number {self.disk_number}).Size" + result = ps_execute(cmd) + + disk_size = result.stdout.decode().strip() + if not disk_size.isdigit(): + raise CephTestException( + "Invalid disk size received: %s" % disk_size) + + return int(disk_size) + + @Tracer.trace + @retry_decorator(timeout=30) + def wait_for_disk_resize(self): + # After resizing the rbd image, the daemon is expected to receive + # the notification, inform the WNBD driver and then trigger a disk + # rescan (IOCTL_DISK_UPDATE_PROPERTIES). This might take a few seconds, + # so we'll need to do some polling. + disk_size = self.get_disk_size() + disk_size_mb = disk_size // (1 << 20) + + if disk_size_mb != self.size_mb: + raise CephTestException( + "The disk size hasn't been updated yet. Retrieved size: " + f"{disk_size_mb}MB. Expected size: {self.size_mb}MB.") + + +class RbdTest(object): + image: RbdImage + + requires_disk_online = False + requires_disk_write = False + + def __init__(self, + image_prefix: str = "cephTest-", + image_size_mb: int = 1024, + map_timeout: int = 60, + **kwargs): + self.image_size_mb = image_size_mb + self.image_name = image_prefix + str(uuid.uuid4()) + self.map_timeout = map_timeout + self.skip_enabling_disk = kwargs.get("skip_enabling_disk") + + @Tracer.trace + def initialize(self): + self.image = RbdImage.create( + self.image_name, + self.image_size_mb) + self.image.map(timeout=self.map_timeout) + + if not self.skip_enabling_disk: + if self.requires_disk_write: + self.image.set_writable() + + if self.requires_disk_online: + self.image.set_online() + + def run(self): + pass + + def cleanup(self): + if self.image: + self.image.cleanup() + + @classmethod + def print_results(cls, + title: str = "Test results", + description: str = None): + pass + + +class RbdFsTestMixin(object): + # Windows disks must be turned online before accessing partitions. + requires_disk_online = True + requires_disk_write = True + + @Tracer.trace + def initialize(self): + super(RbdFsTestMixin, self).initialize() + + self.image.init_fs() + + def get_subpath(self, *args): + drive_path = f"{self.image.drive_letter}:\\" + return os.path.join(drive_path, *args) + + +class RbdFsTest(RbdFsTestMixin, RbdTest): + pass + + +class RbdFioTest(RbdTest): + data: typing.DefaultDict[str, typing.List[typing.Dict[str, str]]] = ( + collections.defaultdict(list)) + lock = threading.Lock() + + def __init__(self, + *args, + fio_size_mb: int = None, + iterations: int = 1, + workers: int = 1, + bs: str = "2M", + iodepth: int = 64, + op: str = "rw", + verify: str = "crc32c", + **kwargs): + + super(RbdFioTest, self).__init__(*args, **kwargs) + + self.fio_size_mb = fio_size_mb or self.image_size_mb + self.iterations = iterations + self.workers = workers + self.bs = bs + self.iodepth = iodepth + self.op = op + if op not in ("read", "randread"): + self.requires_disk_write = True + self.verify = verify + + def process_result(self, raw_fio_output: str): + result = json.loads(raw_fio_output) + with self.lock: + for job in result["jobs"]: + # Fio doesn't support trim on Windows + for op in ['read', 'write']: + if op in job: + self.data[op].append({ + 'error': job['error'], + 'io_bytes': job[op]['io_bytes'], + 'bw_bytes': job[op]['bw_bytes'], + 'runtime': job[op]['runtime'] / 1000, # seconds + 'total_ios': job[op]['short_ios'], + 'short_ios': job[op]['short_ios'], + 'dropped_ios': job[op]['short_ios'], + 'clat_ns_min': job[op]['clat_ns']['min'], + 'clat_ns_max': job[op]['clat_ns']['max'], + 'clat_ns_mean': job[op]['clat_ns']['mean'], + 'clat_ns_stddev': job[op]['clat_ns']['stddev'], + 'clat_ns_10': job[op].get('clat_ns', {}) + .get('percentile', {}) + .get('10.000000', 0), + 'clat_ns_90': job[op].get('clat_ns', {}) + .get('percentile', {}) + .get('90.000000', 0) + }) + + def _get_fio_path(self): + return self.image.path + + @Tracer.trace + def _run_fio(self, fio_size_mb=None): + LOG.info("Starting FIO test.") + cmd = [ + "fio", "--thread", "--output-format=json", + "--randrepeat=%d" % self.iterations, + "--direct=1", "--name=test", + "--bs=%s" % self.bs, "--iodepth=%s" % self.iodepth, + "--size=%sM" % (fio_size_mb or self.fio_size_mb), + "--readwrite=%s" % self.op, + "--numjobs=%s" % self.workers, + "--filename=%s" % self._get_fio_path(), + ] + if self.verify: + cmd += ["--verify=%s" % self.verify] + result = execute(*cmd) + LOG.info("Completed FIO test.") + self.process_result(result.stdout) + + @Tracer.trace + def run(self): + self._run_fio() + + @classmethod + def print_results(cls, + title: str = "Benchmark results", + description: str = None): + if description: + title = "%s (%s)" % (title, description) + + for op in cls.data.keys(): + op_title = "%s op=%s" % (title, op) + + table = prettytable.PrettyTable(title=op_title) + table.field_names = ["stat", "min", "max", "mean", + "median", "std_dev", + "max 90%", "min 90%", "total"] + table.float_format = ".4" + + op_data = cls.data[op] + + s = array_stats([float(i["bw_bytes"]) / 1000_000 for i in op_data]) + table.add_row(["bandwidth (MB/s)", + s['min'], s['max'], s['mean'], + s['median'], s['std_dev'], + s['max_90'], s['min_90'], 'N/A']) + + s = array_stats([float(i["runtime"]) for i in op_data]) + table.add_row(["duration (s)", + s['min'], s['max'], s['mean'], + s['median'], s['std_dev'], + s['max_90'], s['min_90'], s['sum']]) + + s = array_stats([i["error"] for i in op_data]) + table.add_row(["errors", + s['min'], s['max'], s['mean'], + s['median'], s['std_dev'], + s['max_90'], s['min_90'], s['sum']]) + + s = array_stats([i["short_ios"] for i in op_data]) + table.add_row(["incomplete IOs", + s['min'], s['max'], s['mean'], + s['median'], s['std_dev'], + s['max_90'], s['min_90'], s['sum']]) + + s = array_stats([i["dropped_ios"] for i in op_data]) + table.add_row(["dropped IOs", + s['min'], s['max'], s['mean'], + s['median'], s['std_dev'], + s['max_90'], s['min_90'], s['sum']]) + + clat_min = array_stats([i["clat_ns_min"] for i in op_data]) + clat_max = array_stats([i["clat_ns_max"] for i in op_data]) + clat_mean = array_stats([i["clat_ns_mean"] for i in op_data]) + clat_stddev = math.sqrt( + sum([float(i["clat_ns_stddev"]) ** 2 for i in op_data]) / len(op_data) + if len(op_data) else 0) + clat_10 = array_stats([i["clat_ns_10"] for i in op_data]) + clat_90 = array_stats([i["clat_ns_90"] for i in op_data]) + # For convenience, we'll convert it from ns to seconds. + table.add_row(["completion latency (s)", + clat_min['min'] / 1e+9, + clat_max['max'] / 1e+9, + clat_mean['mean'] / 1e+9, + clat_mean['median'] / 1e+9, + clat_stddev / 1e+9, + clat_10['mean'] / 1e+9, + clat_90['mean'] / 1e+9, + clat_mean['sum'] / 1e+9]) + print(table) + + +class RbdResizeFioTest(RbdFioTest): + """Image resize test. + + This test extends and then shrinks the image, performing FIO tests to + validate the resized image. + """ + + @Tracer.trace + def run(self): + self.image.resize(self.image_size_mb * 2) + self.image.wait_for_disk_resize() + + self._run_fio(fio_size_mb=self.image_size_mb * 2) + + self.image.resize(self.image_size_mb // 2, allow_shrink=True) + self.image.wait_for_disk_resize() + + self._run_fio(fio_size_mb=self.image_size_mb // 2) + + # Just like rbd-nbd, rbd-wnbd is masking out-of-bounds errors. + # For this reason, we don't have a negative test that writes + # passed the disk boundary. + + +class RbdFsFioTest(RbdFsTestMixin, RbdFioTest): + def initialize(self): + super(RbdFsFioTest, self).initialize() + + if not self.fio_size_mb or self.fio_size_mb == self.image_size_mb: + # Out of caution, we'll use up to 80% of the FS by default + self.fio_size_mb = int( + self.image.get_fs_capacity() * 0.8 / (1024 * 1024)) + + @staticmethod + def _fio_escape_path(path): + # FIO allows specifying multiple files separated by colon. + # This means that ":" has to be escaped, so + # F:\filename becomes F\:\filename. + return path.replace(":", "\\:") + + def _get_fio_path(self): + return self._fio_escape_path(self.get_subpath("test-fio")) + + +class RbdStampTest(RbdTest): + requires_disk_write = True + + _write_open_mode = "rb+" + _read_open_mode = "rb" + _expect_path_exists = True + + @staticmethod + def _rand_float(min_val: float, max_val: float): + return min_val + (random.random() * max_val - min_val) + + def _get_stamp(self): + buff = self.image_name.encode() + padding = 512 - len(buff) + buff += b'\0' * padding + return buff + + def _get_stamp_path(self): + return self.image.path + + @Tracer.trace + def _write_stamp(self): + with open(self._get_stamp_path(), self._write_open_mode) as disk: + stamp = self._get_stamp() + disk.write(stamp) + + @Tracer.trace + def _read_stamp(self): + with open(self._get_stamp_path(), self._read_open_mode) as disk: + return disk.read(len(self._get_stamp())) + + @Tracer.trace + def run(self): + if self._expect_path_exists: + # Wait up to 5 seconds and then check the disk, ensuring that + # nobody else wrote to it. This is particularly useful when + # running a high number of tests in parallel, ensuring that + # we aren't writing to the wrong disk. + time.sleep(self._rand_float(0, 5)) + + stamp = self._read_stamp() + assert stamp == b'\0' * len(self._get_stamp()) + + self._write_stamp() + + stamp = self._read_stamp() + assert stamp == self._get_stamp() + + +class RbdFsStampTest(RbdFsTestMixin, RbdStampTest): + _write_open_mode = "wb" + _expect_path_exists = False + + def _get_stamp_path(self): + return self.get_subpath("test-stamp") + + +class TestRunner(object): + def __init__(self, + test_cls: typing.Type[RbdTest], + test_params: dict = {}, + iterations: int = 1, + workers: int = 1, + stop_on_error: bool = False, + cleanup_on_error: bool = True): + self.test_cls = test_cls + self.test_params = test_params + self.iterations = iterations + self.workers = workers + self.executor = futures.ThreadPoolExecutor(max_workers=workers) + self.lock = threading.Lock() + self.completed = 0 + self.errors = 0 + self.stopped = False + self.stop_on_error = stop_on_error + self.cleanup_on_error = cleanup_on_error + + @Tracer.trace + def run(self): + tasks = [] + for i in range(self.iterations): + task = self.executor.submit(self.run_single_test) + tasks.append(task) + + LOG.info("Waiting for %d tests to complete.", self.iterations) + for task in tasks: + task.result() + + def run_single_test(self): + failed = False + if self.stopped: + return + + try: + test = self.test_cls(**self.test_params) + test.initialize() + test.run() + except KeyboardInterrupt: + LOG.warning("Received Ctrl-C.") + self.stopped = True + except Exception as ex: + failed = True + if self.stop_on_error: + self.stopped = True + with self.lock: + self.errors += 1 + LOG.exception( + "Test exception: %s. Total exceptions: %d", + ex, self.errors) + finally: + if not failed or self.cleanup_on_error: + try: + test.cleanup() + except KeyboardInterrupt: + LOG.warning("Received Ctrl-C.") + self.stopped = True + # Retry the cleanup + test.cleanup() + except Exception: + LOG.exception("Test cleanup failed.") + + with self.lock: + self.completed += 1 + LOG.info("Completed tests: %d. Pending: %d", + self.completed, self.iterations - self.completed) + + +TESTS: typing.Dict[str, typing.Type[RbdTest]] = { + 'RbdTest': RbdTest, + 'RbdFioTest': RbdFioTest, + 'RbdResizeFioTest': RbdResizeFioTest, + 'RbdStampTest': RbdStampTest, + # FS tests + 'RbdFsTest': RbdFsTest, + 'RbdFsFioTest': RbdFsFioTest, + 'RbdFsStampTest': RbdFsStampTest, +} + +if __name__ == '__main__': + args = parser.parse_args() + + log_level = logging.WARNING + if args.verbose: + log_level = logging.INFO + if args.debug: + log_level = logging.DEBUG + setup_logging(log_level) + + test_params = dict( + image_size_mb=args.image_size_mb, + image_prefix=args.image_prefix, + bs=args.bs, + op=args.op, + verify=args.fio_verify, + iodepth=args.fio_depth, + map_timeout=args.map_timeout, + skip_enabling_disk=args.skip_enabling_disk, + ) + + try: + test_cls = TESTS[args.test_name] + except KeyError: + raise CephTestException("Unkown test: {}".format(args.test_name)) + + runner = TestRunner( + test_cls, + test_params=test_params, + iterations=args.iterations, + workers=args.concurrency, + stop_on_error=args.stop_on_error, + cleanup_on_error=not args.skip_cleanup_on_error) + runner.run() + + Tracer.print_results() + test_cls.print_results( + description="count: %d, concurrency: %d" % + (args.iterations, args.concurrency)) + + assert runner.errors == 0, f"encountered {runner.errors} error(s)." |