diff options
Diffstat (limited to '')
176 files changed, 4752 insertions, 1168 deletions
diff --git a/qa/cephfs/begin/3-modules.yaml b/qa/cephfs/begin/3-modules.yaml new file mode 100644 index 000000000..259473425 --- /dev/null +++ b/qa/cephfs/begin/3-modules.yaml @@ -0,0 +1,19 @@ +# Enable mgr modules now before any CephFS mounts are created by the mgr. This +# avoids the potential race of the mgr mounting CephFS and then getting failed +# over by the monitors before the monitors have a chance to note the new client +# session from the mgr beacon. In that case, the monitors will not blocklist +# that client mount automatically so the MDS will eventually do the eviction +# (and create a cluster log warning which we want to avoid). +# +# Note: ideally the mgr would gently stop mgr modules before respawning so that +# the client mounts can be unmounted but this caused issues historically with +# modules like the dashboard so an abrupt restart was chosen instead. + +mgrmodules: + sequential: + - print: "Enabling mgr modules" + # other fragments append to this + +tasks: + - sequential: + - mgrmodules diff --git a/qa/cephfs/overrides/ignorelist_health.yaml b/qa/cephfs/overrides/ignorelist_health.yaml index d8b819288..90811d6f2 100644 --- a/qa/cephfs/overrides/ignorelist_health.yaml +++ b/qa/cephfs/overrides/ignorelist_health.yaml @@ -1,13 +1,15 @@ overrides: ceph: log-ignorelist: + - FS_DEGRADED + - FS_INLINE_DATA_DEPRECATED + - FS_WITH_FAILED_MDS + - MDS_ALL_DOWN + - MDS_DAMAGE + - MDS_DEGRADED + - MDS_FAILED + - MDS_INSUFFICIENT_STANDBY + - MDS_UP_LESS_THAN_MAX + - POOL_APP_NOT_ENABLED - overall HEALTH_ - - \(FS_DEGRADED\) - - \(MDS_FAILED\) - - \(MDS_DEGRADED\) - - \(FS_WITH_FAILED_MDS\) - - \(MDS_DAMAGE\) - - \(MDS_ALL_DOWN\) - - \(MDS_UP_LESS_THAN_MAX\) - - \(FS_INLINE_DATA_DEPRECATED\) - - \(POOL_APP_NOT_ENABLED\) + - Replacing daemon diff --git a/qa/cephfs/overrides/subvol_versions/create_subvol_version_v1.yaml b/qa/cephfs/overrides/subvol_versions/create_subvol_version_v1.yaml new file mode 100644 index 000000000..120b2bf04 --- /dev/null +++ b/qa/cephfs/overrides/subvol_versions/create_subvol_version_v1.yaml @@ -0,0 +1,2 @@ +overrides: + subvolume_version: 1 diff --git a/qa/cephfs/overrides/subvol_versions/create_subvol_version_v2.yaml b/qa/cephfs/overrides/subvol_versions/create_subvol_version_v2.yaml new file mode 100644 index 000000000..c8bcf95c0 --- /dev/null +++ b/qa/cephfs/overrides/subvol_versions/create_subvol_version_v2.yaml @@ -0,0 +1,2 @@ +overrides: + subvolume_version: 2 diff --git a/qa/suites/fs/cephadm/renamevolume/1-rename.yaml b/qa/suites/fs/cephadm/renamevolume/1-rename.yaml index 7f9bc8906..e1d5b9b33 100644 --- a/qa/suites/fs/cephadm/renamevolume/1-rename.yaml +++ b/qa/suites/fs/cephadm/renamevolume/1-rename.yaml @@ -1,7 +1,11 @@ tasks: - cephadm.shell: host.a: + - ceph fs fail foo + - ceph fs set foo refuse_client_session true - ceph fs volume rename foo bar --yes-i-really-mean-it + - ceph fs set bar joinable true + - ceph fs set bar refuse_client_session false - fs.ready: timeout: 300 - cephadm.shell: diff --git a/qa/suites/fs/full/tasks/mgr-osd-full.yaml b/qa/suites/fs/full/tasks/mgr-osd-full.yaml index b4f673e39..a005f5203 100644 --- a/qa/suites/fs/full/tasks/mgr-osd-full.yaml +++ b/qa/suites/fs/full/tasks/mgr-osd-full.yaml @@ -12,7 +12,7 @@ overrides: debug mds: 20 osd: # force bluestore since it's required for ec overwrites osd objectstore: bluestore - bluestore block size: 1073741824 + bluestore block size: 2147483648 tasks: - workunit: cleanup: true diff --git a/qa/suites/fs/functional/subvol_versions/.qa b/qa/suites/fs/functional/subvol_versions/.qa new file mode 120000 index 000000000..fea2489fd --- /dev/null +++ b/qa/suites/fs/functional/subvol_versions/.qa @@ -0,0 +1 @@ +../.qa
\ No newline at end of file diff --git a/qa/suites/fs/functional/subvol_versions/create_subvol_version_v1.yaml b/qa/suites/fs/functional/subvol_versions/create_subvol_version_v1.yaml new file mode 120000 index 000000000..09cfdb59e --- /dev/null +++ b/qa/suites/fs/functional/subvol_versions/create_subvol_version_v1.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/subvol_versions/create_subvol_version_v1.yaml
\ No newline at end of file diff --git a/qa/suites/fs/functional/subvol_versions/create_subvol_version_v2.yaml b/qa/suites/fs/functional/subvol_versions/create_subvol_version_v2.yaml new file mode 120000 index 000000000..5a4de14e7 --- /dev/null +++ b/qa/suites/fs/functional/subvol_versions/create_subvol_version_v2.yaml @@ -0,0 +1 @@ +.qa/cephfs/overrides/subvol_versions/create_subvol_version_v2.yaml
\ No newline at end of file diff --git a/qa/suites/fs/functional/tasks/client-recovery.yaml b/qa/suites/fs/functional/tasks/client-recovery.yaml index e67acc3ab..7ea93a367 100644 --- a/qa/suites/fs/functional/tasks/client-recovery.yaml +++ b/qa/suites/fs/functional/tasks/client-recovery.yaml @@ -9,6 +9,9 @@ overrides: - MDS_CLIENT_LATE_RELEASE - t responding to mclientcaps - file system flag refuse_client_session is set + - Degraded data redundancy + - MDS_CLIENTS_LAGGY + - Reduced data availability tasks: - cephfs_test_runner: fail_on_skip: false diff --git a/qa/suites/fs/functional/tasks/snap-schedule.yaml b/qa/suites/fs/functional/tasks/snap-schedule.yaml index f2e62b050..26922abed 100644 --- a/qa/suites/fs/functional/tasks/snap-schedule.yaml +++ b/qa/suites/fs/functional/tasks/snap-schedule.yaml @@ -6,7 +6,7 @@ overrides: debug ms: 1 debug finisher: 20 debug client: 20 - log-whitelist: + log-ignorelist: - OSD full dropping all updates - OSD near full - pausewr flag diff --git a/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml b/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml index 7bbcf000f..2a175dbf1 100644 --- a/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml +++ b/qa/suites/fs/functional/tasks/snap_schedule_snapdir.yaml @@ -6,7 +6,7 @@ overrides: debug ms: 1 debug finisher: 20 debug client: 20 - log-whitelist: + log-ignorelist: - OSD full dropping all updates - OSD near full - pausewr flag diff --git a/qa/suites/fs/mirror-ha/overrides/ignorelist_health.yaml b/qa/suites/fs/mirror-ha/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..4cb7d981d --- /dev/null +++ b/qa/suites/fs/mirror-ha/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +./.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml b/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml deleted file mode 100644 index d40fa4cb8..000000000 --- a/qa/suites/fs/mirror-ha/overrides/whitelist_health.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - log-ignorelist: - - overall HEALTH_ - - \(FS_DEGRADED\) - - \(MDS_FAILED\) - - \(MDS_DEGRADED\) - - \(FS_WITH_FAILED_MDS\) - - \(MDS_DAMAGE\) - - \(MDS_ALL_DOWN\) - - \(MDS_UP_LESS_THAN_MAX\) - - \(FS_INLINE_DATA_DEPRECATED\) - - Reduced data availability - - Degraded data redundancy diff --git a/qa/suites/fs/mirror/overrides/ignorelist_health.yaml b/qa/suites/fs/mirror/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..4cb7d981d --- /dev/null +++ b/qa/suites/fs/mirror/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +./.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/mirror/overrides/whitelist_health.yaml b/qa/suites/fs/mirror/overrides/whitelist_health.yaml deleted file mode 100644 index d40fa4cb8..000000000 --- a/qa/suites/fs/mirror/overrides/whitelist_health.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - log-ignorelist: - - overall HEALTH_ - - \(FS_DEGRADED\) - - \(MDS_FAILED\) - - \(MDS_DEGRADED\) - - \(FS_WITH_FAILED_MDS\) - - \(MDS_DAMAGE\) - - \(MDS_ALL_DOWN\) - - \(MDS_UP_LESS_THAN_MAX\) - - \(FS_INLINE_DATA_DEPRECATED\) - - Reduced data availability - - Degraded data redundancy diff --git a/qa/suites/fs/nfs/overrides/ignorelist_health.yaml b/qa/suites/fs/nfs/overrides/ignorelist_health.yaml index 8bfe4dc6f..5cb891a95 100644..120000 --- a/qa/suites/fs/nfs/overrides/ignorelist_health.yaml +++ b/qa/suites/fs/nfs/overrides/ignorelist_health.yaml @@ -1,13 +1 @@ -overrides: - ceph: - log-ignorelist: - - overall HEALTH_ - - \(FS_DEGRADED\) - - \(MDS_FAILED\) - - \(MDS_DEGRADED\) - - \(FS_WITH_FAILED_MDS\) - - \(MDS_DAMAGE\) - - \(MDS_ALL_DOWN\) - - \(MDS_UP_LESS_THAN_MAX\) - - \(FS_INLINE_DATA_DEPRECATED\) - - \(OSD_DOWN\) +.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_upgrade.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_upgrade.yaml new file mode 100644 index 000000000..713adb962 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_upgrade.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + log-ignorelist: + - OSD_DOWN diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/quincy.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/quincy.yaml new file mode 100644 index 000000000..4a21021c0 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/quincy.yaml @@ -0,0 +1,32 @@ +meta: +- desc: | + setup ceph/quincy + +tasks: +- install: + branch: quincy + exclude_packages: + - ceph-volume +- print: "**** done install task..." +- cephadm: + image: quay.ceph.io/ceph-ci/ceph:quincy + roleless: true + cephadm_branch: quincy + cephadm_git_url: https://github.com/ceph/ceph + conf: + osd: + #set config option for which cls modules are allowed to be loaded / used + osd_class_load_list: "*" + osd_class_default_list: "*" +- print: "**** done end installing quincy cephadm ..." +- cephadm.shell: + host.a: + - ceph config set mgr mgr/cephadm/use_repo_digest true --force +- print: "**** done cephadm.shell ceph config set mgr..." +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls diff --git a/qa/suites/fs/workload/tasks/0-subvolume/no-subvolume.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/$ index e69de29bb..e69de29bb 100644 --- a/qa/suites/fs/workload/tasks/0-subvolume/no-subvolume.yaml +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/$ diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/reef.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/reef.yaml new file mode 100644 index 000000000..c53e8b55d --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/reef.yaml @@ -0,0 +1,31 @@ +meta: +- desc: | + setup ceph/reef + +tasks: +- install: + branch: reef + exclude_packages: + - ceph-volume +- print: "**** done install task..." +- cephadm: + image: quay.ceph.io/ceph-ci/ceph:reef + roleless: true + compiled_cephadm_branch: reef + conf: + osd: + #set config option for which cls modules are allowed to be loaded / used + osd_class_load_list: "*" + osd_class_default_list: "*" +- print: "**** done end installing reef cephadm ..." +- cephadm.shell: + host.a: + - ceph config set mgr mgr/cephadm/use_repo_digest true --force +- print: "**** done cephadm.shell ceph config set mgr..." +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.0.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.0.yaml new file mode 100644 index 000000000..98bb210d1 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.0.yaml @@ -0,0 +1,31 @@ +meta: +- desc: | + setup ceph/v18.2.0 + +tasks: +- install: + tag: v18.2.0 + exclude_packages: + - ceph-volume +- print: "**** done install task..." +- cephadm: + image: quay.io/ceph/ceph:v18.2.0 + roleless: true + compiled_cephadm_branch: reef + conf: + osd: + #set config option for which cls modules are allowed to be loaded / used + osd_class_load_list: "*" + osd_class_default_list: "*" +- print: "**** done end installing v18.2.0 cephadm ..." +- cephadm.shell: + host.a: + - ceph config set mgr mgr/cephadm/use_repo_digest true --force +- print: "**** done cephadm.shell ceph config set mgr..." +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.1.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.1.yaml new file mode 100644 index 000000000..ce45d9ea9 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/0-from/reef/v18.2.1.yaml @@ -0,0 +1,31 @@ +meta: +- desc: | + setup ceph/v18.2.1 + +tasks: +- install: + tag: v18.2.1 + exclude_packages: + - ceph-volume +- print: "**** done install task..." +- cephadm: + image: quay.io/ceph/ceph:v18.2.1 + roleless: true + compiled_cephadm_branch: reef + conf: + osd: + #set config option for which cls modules are allowed to be loaded / used + osd_class_load_list: "*" + osd_class_default_list: "*" +- print: "**** done end installing v18.2.1 cephadm ..." +- cephadm.shell: + host.a: + - ceph config set mgr mgr/cephadm/use_repo_digest true --force +- print: "**** done cephadm.shell ceph config set mgr..." +- cephadm.shell: + host.a: + - ceph orch status + - ceph orch ps + - ceph orch ls + - ceph orch host ls + - ceph orch device ls diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client/fuse.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client/fuse.yaml new file mode 100644 index 000000000..5318fd1a9 --- /dev/null +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client/fuse.yaml @@ -0,0 +1,3 @@ +tasks: +- ceph-fuse: +- print: "**** done client" diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client/kclient.yaml index 92b9dda84..92b9dda84 100644 --- a/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client.yaml +++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/tasks/2-client/kclient.yaml diff --git a/qa/suites/fs/valgrind/mirror/overrides/ignorelist_health.yaml b/qa/suites/fs/valgrind/mirror/overrides/ignorelist_health.yaml new file mode 120000 index 000000000..4cb7d981d --- /dev/null +++ b/qa/suites/fs/valgrind/mirror/overrides/ignorelist_health.yaml @@ -0,0 +1 @@ +./.qa/cephfs/overrides/ignorelist_health.yaml
\ No newline at end of file diff --git a/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml b/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml deleted file mode 100644 index d40fa4cb8..000000000 --- a/qa/suites/fs/valgrind/mirror/overrides/whitelist_health.yaml +++ /dev/null @@ -1,14 +0,0 @@ -overrides: - ceph: - log-ignorelist: - - overall HEALTH_ - - \(FS_DEGRADED\) - - \(MDS_FAILED\) - - \(MDS_DEGRADED\) - - \(FS_WITH_FAILED_MDS\) - - \(MDS_DAMAGE\) - - \(MDS_ALL_DOWN\) - - \(MDS_UP_LESS_THAN_MAX\) - - \(FS_INLINE_DATA_DEPRECATED\) - - Reduced data availability - - Degraded data redundancy diff --git a/qa/suites/fs/workload/begin/3-modules.yaml b/qa/suites/fs/workload/begin/3-modules.yaml new file mode 120000 index 000000000..1eba706a5 --- /dev/null +++ b/qa/suites/fs/workload/begin/3-modules.yaml @@ -0,0 +1 @@ +.qa/cephfs/begin/3-modules.yaml
\ No newline at end of file diff --git a/qa/suites/fs/workload/ranks/1.yaml b/qa/suites/fs/workload/ranks/1.yaml index e69de29bb..f9e95daa9 100644 --- a/qa/suites/fs/workload/ranks/1.yaml +++ b/qa/suites/fs/workload/ranks/1.yaml @@ -0,0 +1,4 @@ +overrides: + ceph: + cephfs: + max_mds: 1 diff --git a/qa/suites/fs/workload/ranks/multi/balancer/automatic.yaml b/qa/suites/fs/workload/ranks/multi/balancer/automatic.yaml new file mode 100644 index 000000000..020eaa4bf --- /dev/null +++ b/qa/suites/fs/workload/ranks/multi/balancer/automatic.yaml @@ -0,0 +1,4 @@ +tasks: +- exec: + mon.a: + - ceph fs set cephfs balance_automate true diff --git a/qa/suites/fs/workload/ranks/multi/balancer/distributed.yaml.disabled b/qa/suites/fs/workload/ranks/multi/balancer/distributed.yaml.disabled new file mode 100644 index 000000000..be06d5186 --- /dev/null +++ b/qa/suites/fs/workload/ranks/multi/balancer/distributed.yaml.disabled @@ -0,0 +1,6 @@ +# distributed pins would be interesting if we had workloads on multiple clients. We do not yet. So it's disabled. +tasks: +- exec: + mon.a: + - ceph fs set cephfs balance_automate false + - ceph fs subvolumegroup pin cephfs qa distributed 1 diff --git a/qa/suites/fs/workload/ranks/multi/balancer/random.yaml b/qa/suites/fs/workload/ranks/multi/balancer/random.yaml new file mode 100644 index 000000000..977e83fc2 --- /dev/null +++ b/qa/suites/fs/workload/ranks/multi/balancer/random.yaml @@ -0,0 +1,10 @@ +overrides: + ceph: + conf: + mds: + mds_export_ephemeral_random_max: 0.10 +tasks: +- exec: + mon.a: + - ceph fs set cephfs balance_automate false + - ceph fs subvolumegroup pin cephfs qa random 0.10 diff --git a/qa/suites/fs/workload/tasks/3-snaps/yes.yaml b/qa/suites/fs/workload/tasks/3-snaps/yes.yaml index 598f7e215..69f53768d 100644 --- a/qa/suites/fs/workload/tasks/3-snaps/yes.yaml +++ b/qa/suites/fs/workload/tasks/3-snaps/yes.yaml @@ -1,3 +1,10 @@ +mgrmodules: + sequential: + - exec: + mon.a: + - ceph mgr module enable snap_schedule + - ceph config set mgr mgr/snap_schedule/allow_m_granularity true + - ceph config set mgr mgr/snap_schedule/dump_on_update true overrides: ceph: conf: @@ -12,11 +19,8 @@ overrides: tasks: - exec: mon.a: - - ceph mgr module enable snap_schedule - - ceph config set mgr mgr/snap_schedule/allow_m_granularity true - - ceph config set mgr mgr/snap_schedule/dump_on_update true - - ceph fs snap-schedule add --fs=cephfs --path=/ --snap_schedule=1M - - ceph fs snap-schedule retention add --fs=cephfs --path=/ --retention-spec-or-period=6M3h + - ceph fs snap-schedule add --fs=cephfs --path=/ --snap_schedule=1m + - ceph fs snap-schedule retention add --fs=cephfs --path=/ --retention-spec-or-period=6m3h - ceph fs snap-schedule status --fs=cephfs --path=/ - ceph fs snap-schedule list --fs=cephfs --path=/ --recursive=true - date +%s > START_TIME diff --git a/qa/suites/rbd/nbd/% b/qa/suites/krbd/mirror/% index e69de29bb..e69de29bb 100644 --- a/qa/suites/rbd/nbd/% +++ b/qa/suites/krbd/mirror/% diff --git a/qa/suites/rbd/nbd/.qa b/qa/suites/krbd/mirror/.qa index a602a0353..a602a0353 120000 --- a/qa/suites/rbd/nbd/.qa +++ b/qa/suites/krbd/mirror/.qa diff --git a/qa/suites/krbd/mirror/bluestore-bitmap.yaml b/qa/suites/krbd/mirror/bluestore-bitmap.yaml new file mode 120000 index 000000000..a59cf5175 --- /dev/null +++ b/qa/suites/krbd/mirror/bluestore-bitmap.yaml @@ -0,0 +1 @@ +.qa/objectstore/bluestore-bitmap.yaml
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/cluster/.qa b/qa/suites/krbd/mirror/clusters/.qa index a602a0353..a602a0353 120000 --- a/qa/suites/rbd/nbd/cluster/.qa +++ b/qa/suites/krbd/mirror/clusters/.qa diff --git a/qa/suites/krbd/mirror/clusters/2-node.yaml b/qa/suites/krbd/mirror/clusters/2-node.yaml new file mode 100644 index 000000000..e5036ea72 --- /dev/null +++ b/qa/suites/krbd/mirror/clusters/2-node.yaml @@ -0,0 +1,17 @@ +meta: +- desc: 2 ceph clusters with 1 mon, 1 mgr and 3 osd each +roles: +- - cluster1.mon.a + - cluster1.mgr.x + - cluster1.osd.0 + - cluster1.osd.1 + - cluster1.osd.2 +- - cluster2.mon.a + - cluster2.mgr.x + - cluster2.osd.0 + - cluster2.osd.1 + - cluster2.osd.2 + - cluster1.client.mirror + - cluster1.client.mirror.0 + - cluster2.client.mirror + - cluster2.client.mirror.0 diff --git a/qa/suites/krbd/mirror/conf.yaml b/qa/suites/krbd/mirror/conf.yaml new file mode 100644 index 000000000..eb6d72a80 --- /dev/null +++ b/qa/suites/krbd/mirror/conf.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + mon warn on pool no app: false + ms die on skipped message: false diff --git a/qa/suites/rbd/nbd/workloads/.qa b/qa/suites/krbd/mirror/install/.qa index a602a0353..a602a0353 120000 --- a/qa/suites/rbd/nbd/workloads/.qa +++ b/qa/suites/krbd/mirror/install/.qa diff --git a/qa/suites/krbd/mirror/install/ceph.yaml b/qa/suites/krbd/mirror/install/ceph.yaml new file mode 100644 index 000000000..08bb1faa6 --- /dev/null +++ b/qa/suites/krbd/mirror/install/ceph.yaml @@ -0,0 +1,14 @@ +tasks: +- install: + extra_packages: + - rbd-mirror +- ceph: + cluster: cluster1 +- ceph: + cluster: cluster2 +- rbd-mirror: + client: cluster1.client.mirror.0 + thrash: False +- rbd-mirror: + client: cluster2.client.mirror.0 + thrash: False diff --git a/qa/suites/krbd/mirror/ms_mode$/.qa b/qa/suites/krbd/mirror/ms_mode$/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/mirror/ms_mode$/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/mirror/ms_mode$/crc-rxbounce.yaml b/qa/suites/krbd/mirror/ms_mode$/crc-rxbounce.yaml new file mode 100644 index 000000000..4d27d0113 --- /dev/null +++ b/qa/suites/krbd/mirror/ms_mode$/crc-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc,rxbounce diff --git a/qa/suites/krbd/mirror/ms_mode$/crc.yaml b/qa/suites/krbd/mirror/ms_mode$/crc.yaml new file mode 100644 index 000000000..3b072578f --- /dev/null +++ b/qa/suites/krbd/mirror/ms_mode$/crc.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=crc diff --git a/qa/suites/krbd/mirror/ms_mode$/legacy-rxbounce.yaml b/qa/suites/krbd/mirror/ms_mode$/legacy-rxbounce.yaml new file mode 100644 index 000000000..244e45cbc --- /dev/null +++ b/qa/suites/krbd/mirror/ms_mode$/legacy-rxbounce.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy,rxbounce diff --git a/qa/suites/krbd/mirror/ms_mode$/legacy.yaml b/qa/suites/krbd/mirror/ms_mode$/legacy.yaml new file mode 100644 index 000000000..0048dcb0c --- /dev/null +++ b/qa/suites/krbd/mirror/ms_mode$/legacy.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=legacy diff --git a/qa/suites/krbd/mirror/ms_mode$/secure.yaml b/qa/suites/krbd/mirror/ms_mode$/secure.yaml new file mode 100644 index 000000000..a735db18d --- /dev/null +++ b/qa/suites/krbd/mirror/ms_mode$/secure.yaml @@ -0,0 +1,5 @@ +overrides: + ceph: + conf: + client: + rbd default map options: ms_mode=secure diff --git a/qa/suites/krbd/mirror/tasks/.qa b/qa/suites/krbd/mirror/tasks/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/krbd/mirror/tasks/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/krbd/mirror/tasks/compare-mirror-image-alternate-primary.yaml b/qa/suites/krbd/mirror/tasks/compare-mirror-image-alternate-primary.yaml new file mode 100644 index 000000000..42ee5a274 --- /dev/null +++ b/qa/suites/krbd/mirror/tasks/compare-mirror-image-alternate-primary.yaml @@ -0,0 +1,14 @@ +overrides: + install: + ceph: + extra_system_packages: + - pv +tasks: +- workunit: + clients: + cluster1.client.mirror: + - rbd/compare_mirror_image_alternate_primary.sh + env: + RBD_DEVICE_TYPE: 'krbd' + RBD_MIRROR_USE_RBD_MIRROR: '1' + timeout: 3h diff --git a/qa/suites/krbd/mirror/tasks/compare-mirror-images.yaml b/qa/suites/krbd/mirror/tasks/compare-mirror-images.yaml new file mode 100644 index 000000000..30d147de9 --- /dev/null +++ b/qa/suites/krbd/mirror/tasks/compare-mirror-images.yaml @@ -0,0 +1,14 @@ +overrides: + install: + ceph: + extra_system_packages: + - pv +tasks: +- workunit: + clients: + cluster1.client.mirror: + - rbd/compare_mirror_images.sh + env: + RBD_DEVICE_TYPE: 'krbd' + RBD_MIRROR_USE_RBD_MIRROR: '1' + timeout: 3h diff --git a/qa/suites/netsplit/ceph.yaml b/qa/suites/netsplit/ceph.yaml index ddf54b3a3..7bdb78c9e 100644 --- a/qa/suites/netsplit/ceph.yaml +++ b/qa/suites/netsplit/ceph.yaml @@ -11,7 +11,7 @@ overrides: mon osdmap full prune interval: 2 mon osdmap full prune txsize: 2 # thrashing monitors may make mgr have trouble w/ its keepalive - log-whitelist: + log-ignorelist: - overall HEALTH_ - \(MGR_DOWN\) - \(MON_DOWN\) diff --git a/qa/suites/rbd/nbd/cluster/+ b/qa/suites/orch/cephadm/no-agent-workunits/% index e69de29bb..e69de29bb 100644 --- a/qa/suites/rbd/nbd/cluster/+ +++ b/qa/suites/orch/cephadm/no-agent-workunits/% diff --git a/qa/suites/orch/cephadm/no-agent-workunits/.qa b/qa/suites/orch/cephadm/no-agent-workunits/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/orch/cephadm/no-agent-workunits/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/no-agent-workunits/0-distro b/qa/suites/orch/cephadm/no-agent-workunits/0-distro new file mode 120000 index 000000000..4b341719d --- /dev/null +++ b/qa/suites/orch/cephadm/no-agent-workunits/0-distro @@ -0,0 +1 @@ +.qa/distros/container-hosts
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/no-agent-workunits/mon_election b/qa/suites/orch/cephadm/no-agent-workunits/mon_election new file mode 120000 index 000000000..3f331e621 --- /dev/null +++ b/qa/suites/orch/cephadm/no-agent-workunits/mon_election @@ -0,0 +1 @@ +.qa/mon_election
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/workunits/task/test_adoption.yaml b/qa/suites/orch/cephadm/no-agent-workunits/task/test_adoption.yaml index e04fc1eea..e04fc1eea 100644 --- a/qa/suites/orch/cephadm/workunits/task/test_adoption.yaml +++ b/qa/suites/orch/cephadm/no-agent-workunits/task/test_adoption.yaml diff --git a/qa/suites/orch/cephadm/no-agent-workunits/task/test_cephadm_timeout.yaml b/qa/suites/orch/cephadm/no-agent-workunits/task/test_cephadm_timeout.yaml new file mode 100644 index 000000000..24b53d029 --- /dev/null +++ b/qa/suites/orch/cephadm/no-agent-workunits/task/test_cephadm_timeout.yaml @@ -0,0 +1,13 @@ +roles: +- - host.a + - mon.a + - mgr.a + - osd.0 + - client.0 +tasks: +- install: +- cephadm: +- workunit: + clients: + client.0: + - cephadm/test_cephadm_timeout.py
\ No newline at end of file diff --git a/qa/suites/orch/cephadm/workunits/task/test_orch_cli.yaml b/qa/suites/orch/cephadm/no-agent-workunits/task/test_orch_cli.yaml index ec65fb116..ec65fb116 100644 --- a/qa/suites/orch/cephadm/workunits/task/test_orch_cli.yaml +++ b/qa/suites/orch/cephadm/no-agent-workunits/task/test_orch_cli.yaml diff --git a/qa/suites/orch/cephadm/workunits/task/test_orch_cli_mon.yaml b/qa/suites/orch/cephadm/no-agent-workunits/task/test_orch_cli_mon.yaml index 2a33dc839..2a33dc839 100644 --- a/qa/suites/orch/cephadm/workunits/task/test_orch_cli_mon.yaml +++ b/qa/suites/orch/cephadm/no-agent-workunits/task/test_orch_cli_mon.yaml diff --git a/qa/suites/orch/cephadm/workunits/task/test_extra_daemon_features.yaml b/qa/suites/orch/cephadm/workunits/task/test_extra_daemon_features.yaml new file mode 100644 index 000000000..b5e0ec98f --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_extra_daemon_features.yaml @@ -0,0 +1,74 @@ +roles: +- - host.a + - mon.a + - mgr.a + - osd.0 +- - host.b + - mon.b + - mgr.b + - osd.1 +tasks: +- install: +- cephadm: +- exec: + all-hosts: + - mkdir /etc/cephadm_testing +- cephadm.apply: + specs: + - service_type: mon + placement: + host_pattern: '*' + extra_container_args: + - "--cpus=2" + extra_entrypoint_args: + - "--debug_ms 10" + - service_type: container + service_id: foo + placement: + host_pattern: '*' + spec: + image: "quay.io/fedora/fedora:latest" + entrypoint: "bash" + extra_container_args: + - "-v" + - "/etc/cephadm_testing:/root/cephadm_testing" + extra_entrypoint_args: + - "/root/write_thing_to_file.sh" + - "-c" + - "testing_custom_containers" + - "-o" + - "/root/cephadm_testing/testing.txt" + custom_configs: + - mount_path: "/root/write_thing_to_file.sh" + content: | + while getopts "o:c:" opt; do + case ${opt} in + o ) + OUT_FILE=${OPTARG} + ;; + c ) + CONTENT=${OPTARG} + esac + done + echo $CONTENT > $OUT_FILE + sleep infinity +- cephadm.wait_for_service: + service: mon +- cephadm.wait_for_service: + service: container.foo +- exec: + host.a: + - | + set -ex + FSID=$(/home/ubuntu/cephtest/cephadm shell -- ceph fsid) + sleep 60 + # check extra container and entrypoint args written to mon unit run file + grep "\-\-cpus=2" /var/lib/ceph/$FSID/mon.*/unit.run + grep "\-\-debug_ms 10" /var/lib/ceph/$FSID/mon.*/unit.run + # check that custom container properly wrote content to file. + # This requires the custom config, extra container args, and + # entrypoint args to all be working in order for this to have + # been written. The container entrypoint was set up with custom_configs, + # the content and where to write to with the entrypoint args, and the mounting + # of the /etc/cephadm_testing dir with extra container args + grep "testing_custom_containers" /etc/cephadm_testing/testing.txt diff --git a/qa/suites/orch/cephadm/workunits/task/test_host_drain.yaml b/qa/suites/orch/cephadm/workunits/task/test_host_drain.yaml new file mode 100644 index 000000000..c195bc052 --- /dev/null +++ b/qa/suites/orch/cephadm/workunits/task/test_host_drain.yaml @@ -0,0 +1,72 @@ +roles: +- - host.a + - mon.a + - mgr.a + - osd.0 + - osd.1 +- - host.b + - mon.b + - mgr.b + - osd.2 + - osd.3 +- - host.c + - mon.c + - osd.4 + - osd.5 +tasks: +- install: +- cephadm: +- cephadm.shell: + host.a: + - | + set -ex + HOSTNAMES=$(ceph orch host ls --format json | jq -r '.[] | .hostname') + for host in $HOSTNAMES; do + # find the hostname for "host.c" which will have no mgr + HAS_MGRS=$(ceph orch ps --hostname ${host} --format json | jq 'any(.daemon_type == "mgr")') + if [ "$HAS_MGRS" == "false" ]; then + HOST_C="${host}" + fi + done + # One last thing to worry about before draining the host + # is that the teuthology test tends to put the explicit + # hostnames in the placement for the mon service. + # We want to make sure we can drain without providing + # --force and there is a check for the host being removed + # being listed explicitly in the placements. Therefore, + # we should remove it from the mon placement. + ceph orch ls mon --export > mon.yaml + sed /"$HOST_C"/d mon.yaml > mon_adjusted.yaml + ceph orch apply -i mon_adjusted.yaml + # now drain that host + ceph orch host drain $HOST_C --zap-osd-devices + # wait for drain to complete + HOST_C_DAEMONS=$(ceph orch ps --hostname $HOST_C) + while [ "$HOST_C_DAEMONS" != "No daemons reported" ]; do + sleep 15 + HOST_C_DAEMONS=$(ceph orch ps --hostname $HOST_C) + done + # we want to check the ability to remove the host from + # the CRUSH map, so we should first verify the host is in + # the CRUSH map. + ceph osd getcrushmap -o compiled-crushmap + crushtool -d compiled-crushmap -o crushmap.txt + CRUSH_MAP=$(cat crushmap.txt) + if ! grep -q "$HOST_C" <<< "$CRUSH_MAP"; then + printf "Expected to see $HOST_C in CRUSH map. Saw:\n\n$CRUSH_MAP" + exit 1 + fi + # If the drain was successful, we should be able to remove the + # host without force with no issues. If there are still daemons + # we will get a response telling us to drain the host and a + # non-zero return code + ceph orch host rm $HOST_C --rm-crush-entry + # verify we've successfully removed the host from the CRUSH map + sleep 30 + ceph osd getcrushmap -o compiled-crushmap + crushtool -d compiled-crushmap -o crushmap.txt + CRUSH_MAP=$(cat crushmap.txt) + if grep -q "$HOST_C" <<< "$CRUSH_MAP"; then + printf "Saw $HOST_C in CRUSH map after it should have been removed.\n\n$CRUSH_MAP" + exit 1 + fi diff --git a/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml b/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml index 31724f9e8..84abb702c 100644 --- a/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml +++ b/qa/suites/rados/singleton/all/ec-inconsistent-hinfo.yaml @@ -30,6 +30,7 @@ tasks: - slow request - unfound - \(POOL_APP_NOT_ENABLED\) + - enough copies available conf: osd: osd min pg log entries: 5 diff --git a/qa/suites/rados/singleton/all/mon-config.yaml b/qa/suites/rados/singleton/all/mon-config.yaml index ab1eb81b0..5e36a34a6 100644 --- a/qa/suites/rados/singleton/all/mon-config.yaml +++ b/qa/suites/rados/singleton/all/mon-config.yaml @@ -6,7 +6,7 @@ roles: - osd.0 - osd.1 - osd.2 - - client.0 + - client.rgw openstack: - volumes: # attached to each instance count: 3 @@ -18,6 +18,7 @@ tasks: - sudo ceph config set mgr mgr_pool false --force log-ignorelist: - \(POOL_APP_NOT_ENABLED\) +- rgw: [client.rgw] - workunit: clients: all: diff --git a/qa/suites/rbd/device/% b/qa/suites/rbd/device/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/device/% diff --git a/qa/suites/rbd/device/.qa b/qa/suites/rbd/device/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/device/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/base b/qa/suites/rbd/device/base index fd10a859d..fd10a859d 120000 --- a/qa/suites/rbd/nbd/base +++ b/qa/suites/rbd/device/base diff --git a/qa/suites/rbd/device/cluster/+ b/qa/suites/rbd/device/cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/rbd/device/cluster/+ diff --git a/qa/suites/rbd/device/cluster/.qa b/qa/suites/rbd/device/cluster/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/device/cluster/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/nbd/cluster/fixed-3.yaml b/qa/suites/rbd/device/cluster/fixed-3.yaml index 182589152..182589152 100644 --- a/qa/suites/rbd/nbd/cluster/fixed-3.yaml +++ b/qa/suites/rbd/device/cluster/fixed-3.yaml diff --git a/qa/suites/rbd/nbd/cluster/openstack.yaml b/qa/suites/rbd/device/cluster/openstack.yaml index 48becbb83..48becbb83 120000 --- a/qa/suites/rbd/nbd/cluster/openstack.yaml +++ b/qa/suites/rbd/device/cluster/openstack.yaml diff --git a/qa/suites/rbd/nbd/conf b/qa/suites/rbd/device/conf index 4bc0fe86c..4bc0fe86c 120000 --- a/qa/suites/rbd/nbd/conf +++ b/qa/suites/rbd/device/conf diff --git a/qa/suites/rbd/nbd/msgr-failures b/qa/suites/rbd/device/msgr-failures index 03689aa44..03689aa44 120000 --- a/qa/suites/rbd/nbd/msgr-failures +++ b/qa/suites/rbd/device/msgr-failures diff --git a/qa/suites/rbd/nbd/objectstore b/qa/suites/rbd/device/objectstore index c40bd3261..c40bd3261 120000 --- a/qa/suites/rbd/nbd/objectstore +++ b/qa/suites/rbd/device/objectstore diff --git a/qa/suites/rbd/nbd/supported-random-distro$ b/qa/suites/rbd/device/supported-random-distro$ index 0862b4457..0862b4457 120000 --- a/qa/suites/rbd/nbd/supported-random-distro$ +++ b/qa/suites/rbd/device/supported-random-distro$ diff --git a/qa/suites/rbd/nbd/thrashers b/qa/suites/rbd/device/thrashers index f461dadc3..f461dadc3 120000 --- a/qa/suites/rbd/nbd/thrashers +++ b/qa/suites/rbd/device/thrashers diff --git a/qa/suites/rbd/nbd/thrashosds-health.yaml b/qa/suites/rbd/device/thrashosds-health.yaml index 9124eb1aa..9124eb1aa 120000 --- a/qa/suites/rbd/nbd/thrashosds-health.yaml +++ b/qa/suites/rbd/device/thrashosds-health.yaml diff --git a/qa/suites/rbd/device/workloads/.qa b/qa/suites/rbd/device/workloads/.qa new file mode 120000 index 000000000..a602a0353 --- /dev/null +++ b/qa/suites/rbd/device/workloads/.qa @@ -0,0 +1 @@ +../.qa/
\ No newline at end of file diff --git a/qa/suites/rbd/device/workloads/diff-continuous-krbd.yaml b/qa/suites/rbd/device/workloads/diff-continuous-krbd.yaml new file mode 100644 index 000000000..5907718d5 --- /dev/null +++ b/qa/suites/rbd/device/workloads/diff-continuous-krbd.yaml @@ -0,0 +1,12 @@ +overrides: + install: + ceph: + extra_system_packages: + - pv +tasks: +- workunit: + clients: + all: + - rbd/diff_continuous.sh + env: + RBD_DEVICE_TYPE: "krbd" diff --git a/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml b/qa/suites/rbd/device/workloads/diff-continuous-nbd.yaml index e0a7ebe33..e0a7ebe33 100644 --- a/qa/suites/rbd/nbd/workloads/rbd_nbd_diff_continuous.yaml +++ b/qa/suites/rbd/device/workloads/diff-continuous-nbd.yaml diff --git a/qa/suites/rbd/nbd/workloads/rbd_fsx_nbd.yaml b/qa/suites/rbd/device/workloads/rbd_fsx_nbd.yaml index b5737671f..b5737671f 100644 --- a/qa/suites/rbd/nbd/workloads/rbd_fsx_nbd.yaml +++ b/qa/suites/rbd/device/workloads/rbd_fsx_nbd.yaml diff --git a/qa/suites/rbd/nbd/workloads/rbd_nbd.yaml b/qa/suites/rbd/device/workloads/rbd_nbd.yaml index ededea024..ededea024 100644 --- a/qa/suites/rbd/nbd/workloads/rbd_nbd.yaml +++ b/qa/suites/rbd/device/workloads/rbd_nbd.yaml diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-krbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-krbd.yaml new file mode 100644 index 000000000..771400d01 --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-krbd.yaml @@ -0,0 +1,13 @@ +overrides: + install: + ceph: + extra_system_packages: + - pv +tasks: +- workunit: + clients: + cluster1.client.mirror: + - rbd/compare_mirror_image_alternate_primary.sh + env: + RBD_DEVICE_TYPE: 'krbd' + timeout: 3h diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-nbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-nbd.yaml new file mode 100644 index 000000000..e87d0e8ce --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/compare-mirror-image-alternate-primary-nbd.yaml @@ -0,0 +1,15 @@ +overrides: + install: + ceph: + extra_packages: + - rbd-nbd + extra_system_packages: + - pv +tasks: +- workunit: + clients: + cluster1.client.mirror: + - rbd/compare_mirror_image_alternate_primary.sh + env: + RBD_DEVICE_TYPE: 'nbd' + timeout: 3h diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-images-krbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-images-krbd.yaml new file mode 100644 index 000000000..fc161987f --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/compare-mirror-images-krbd.yaml @@ -0,0 +1,13 @@ +overrides: + install: + ceph: + extra_system_packages: + - pv +tasks: +- workunit: + clients: + cluster1.client.mirror: + - rbd/compare_mirror_images.sh + env: + RBD_DEVICE_TYPE: 'krbd' + timeout: 3h diff --git a/qa/suites/rbd/mirror/workloads/compare-mirror-images-nbd.yaml b/qa/suites/rbd/mirror/workloads/compare-mirror-images-nbd.yaml new file mode 100644 index 000000000..ed02ed257 --- /dev/null +++ b/qa/suites/rbd/mirror/workloads/compare-mirror-images-nbd.yaml @@ -0,0 +1,15 @@ +overrides: + install: + ceph: + extra_packages: + - rbd-nbd + extra_system_packages: + - pv +tasks: +- workunit: + clients: + cluster1.client.mirror: + - rbd/compare_mirror_images.sh + env: + RBD_DEVICE_TYPE: 'nbd' + timeout: 3h diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/% b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/% diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/point-to-point-upgrade.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/point-to-point-upgrade.yaml new file mode 100644 index 000000000..443b89fcf --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/point-to-point-upgrade.yaml @@ -0,0 +1,173 @@ +meta: +- desc: | + Run ceph on two nodes, using one of them as a client, + with a separate client-only node. + Use xfs beneath the osds. + install ceph/reef v18.2.1 and the v18.2.x point versions + run workload and upgrade-sequence in parallel + (every point release should be tested) + run workload and upgrade-sequence in parallel + install ceph/reef latest version + run workload and upgrade-sequence in parallel + Overall upgrade path is - reef-latest.point-1 => reef-latest.point => reef-latest +overrides: + ceph: + log-ignorelist: + - reached quota + - scrub + - osd_map_max_advance + - wrongly marked + - FS_DEGRADED + - POOL_APP_NOT_ENABLED + - CACHE_POOL_NO_HIT_SET + - POOL_FULL + - SMALLER_PG + - pool\(s\) full + - OSD_DOWN + - missing hit_sets + - CACHE_POOL_NEAR_FULL + - PG_AVAILABILITY + - PG_DEGRADED + - application not enabled + - cache pools at or near target size + - filesystem is degraded + - OBJECT_MISPLACED + ### ref: https://tracker.ceph.com/issues/40251 + #removed see ^ - failed to encode map + + fs: xfs + + conf: + global: + mon_warn_on_pool_no_app: false + mon_mds_skip_sanity: true + mon: + mon debug unsafe allow tier with nonempty snaps: true + osd: + osd map max advance: 1000 + osd_class_default_list: "*" + osd_class_load_list: "*" + client: + rgw_crypt_require_ssl: false + rgw crypt s3 kms backend: testing + rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= +roles: +- - mon.a + - mds.a + - osd.0 + - osd.1 + - osd.2 + - mgr.x +- - mon.b + - mon.c + - osd.3 + - osd.4 + - osd.5 + - client.0 +- - client.1 +openstack: +- volumes: # attached to each instance + count: 3 + size: 30 # GB +tasks: +- print: "**** done reef about to install v18.2.0 " +- install: + tag: v18.2.0 + # line below can be removed its from jewel test + #exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev', 'librgw2'] +- print: "**** done v18.2.0 install" +- ceph: + fs: xfs + add_osds_to_crush: true +- print: "**** done ceph xfs" +- sequential: + - workload +- print: "**** done workload v18.2.0" + + +####### upgrade to v18.2.1 +- install.upgrade: + #exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev'] + mon.a: + tag: v18.2.1 + mon.b: + tag: v18.2.1 +- parallel: + - workload_reef + - upgrade-sequence_reef +- print: "**** done parallel reef v18.2.1" + +#### upgrade to latest reef +- install.upgrade: + mon.a: + mon.b: +- parallel: + - workload_reef + - upgrade-sequence_reef +- print: "**** done parallel reef branch" + +####################### +workload: + sequential: + - workunit: + clients: + client.0: + - suites/blogbench.sh + +workload_reef: + full_sequential: + - workunit: + branch: reef + # tag: v18.2.1 + clients: + client.1: + - rados/test.sh + - cls + env: + CLS_RBD_GTEST_FILTER: '*:-TestClsRbd.mirror_snapshot' + - print: "**** done rados/test.sh & cls workload_reef" + - sequential: + - rgw: [client.0] + - print: "**** done rgw workload_reef" + - rbd_fsx: + clients: [client.0] + size: 134217728 + - print: "**** done rbd_fsx workload_reef" + +upgrade-sequence_reef: + sequential: + - print: "**** done branch: reef install.upgrade" + - ceph.restart: [mds.a] + - sleep: + duration: 60 + - ceph.restart: [osd.0] + - sleep: + duration: 30 + - ceph.restart: [osd.1] + - sleep: + duration: 30 + - ceph.restart: [osd.2] + - sleep: + duration: 30 + - ceph.restart: [osd.3] + - sleep: + duration: 30 + - ceph.restart: [osd.4] + - sleep: + duration: 30 + - ceph.restart: [osd.5] + - sleep: + duration: 60 + - ceph.restart: [mgr.x] + - sleep: + duration: 60 + - ceph.restart: [mon.a] + - sleep: + duration: 60 + - ceph.restart: [mon.b] + - sleep: + duration: 60 + - ceph.restart: [mon.c] + - sleep: + duration: 60 + - print: "**** done ceph.restart all reef branch mds/osd/mon" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/centos_8.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/centos_8.yaml new file mode 120000 index 000000000..bb4a6aaf3 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/centos_8.yaml @@ -0,0 +1 @@ +../../../../../distros/supported-all-distro/centos_8.yaml
\ No newline at end of file diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/ubuntu_latest.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/ubuntu_latest.yaml new file mode 100644 index 000000000..f20398230 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-parallel/supported-all-distro/ubuntu_latest.yaml @@ -0,0 +1,2 @@ +os_type: ubuntu +os_version: "20.04" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/% b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/% new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/% diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/+ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/+ diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/openstack.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/openstack.yaml new file mode 100644 index 000000000..5caffc353 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/openstack.yaml @@ -0,0 +1,6 @@ +openstack: + - machine: + disk: 100 # GB + - volumes: # attached to each instance + count: 4 + size: 30 # GB diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/start.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/start.yaml new file mode 100644 index 000000000..1271edd8b --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/0-cluster/start.yaml @@ -0,0 +1,33 @@ +meta: +- desc: | + Run ceph on two nodes, + with a separate client-only node. + Use xfs beneath the osds. +overrides: + ceph: + fs: xfs + log-ignorelist: + - overall HEALTH_ + - \(MON_DOWN\) + - \(MGR_DOWN\) + ### ref: https://tracker.ceph.com/issues/40251 + #removed see ^ - failed to encode map + conf: + global: + enable experimental unrecoverable data corrupting features: "*" + mon: + mon warn on osd down out interval zero: false +roles: +- - mon.a + - mon.b + - mon.c + - mgr.x + - osd.0 + - osd.1 + - osd.2 + - osd.3 +- - osd.4 + - osd.5 + - osd.6 + - osd.7 +- - client.0 diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1-ceph-install/reef.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1-ceph-install/reef.yaml new file mode 100644 index 000000000..0c7db6ae4 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1-ceph-install/reef.yaml @@ -0,0 +1,21 @@ +meta: +- desc: | + install ceph/reef v18.2.0 + Overall upgrade path is - reef-latest.point -1 => reef-latest +tasks: +- install: + tag: v18.2.0 + exclude_packages: ['librados3'] + extra_packages: ['librados2'] +- print: "**** done install reef v18.2.0" +- ceph: +- exec: + osd.0: + - ceph osd require-osd-release reef + - ceph osd set-require-min-compat-client reef +- print: "**** done ceph" +overrides: + ceph: + conf: + mon: + mon warn on osd down out interval zero: false diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1.1.short_pg_log.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1.1.short_pg_log.yaml new file mode 100644 index 000000000..20cc101de --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/1.1.short_pg_log.yaml @@ -0,0 +1,6 @@ +overrides: + ceph: + conf: + global: + osd_min_pg_log_entries: 1 + osd_max_pg_log_entries: 2 diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/2-partial-upgrade/firsthalf.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/2-partial-upgrade/firsthalf.yaml new file mode 100644 index 000000000..02ba5c1bb --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/2-partial-upgrade/firsthalf.yaml @@ -0,0 +1,13 @@ +meta: +- desc: | + install upgrade ceph/-x on one node only + 1st half + restart : osd.0,1,2,3 +tasks: +- install.upgrade: + osd.0: +- print: "**** done install.upgrade osd.0" +- ceph.restart: + daemons: [mon.a,mon.b,mon.c,mgr.x,osd.0,osd.1,osd.2,osd.3] + mon-health-to-clog: false +- print: "**** done ceph.restart 1st half" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/3-thrash/default.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/3-thrash/default.yaml new file mode 100644 index 000000000..c739d8fea --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/3-thrash/default.yaml @@ -0,0 +1,27 @@ +meta: +- desc: | + randomly kill and revive osd + small chance to increase the number of pgs +overrides: + ceph: + log-ignorelist: + - but it is still running + - wrongly marked me down + - objects unfound and apparently lost + - log bound mismatch + ### ref: https://tracker.ceph.com/issues/40251 + - failed to encode map +tasks: +- parallel: + - stress-tasks +stress-tasks: +- thrashosds: + timeout: 1200 + chance_pgnum_grow: 1 + chance_pgpnum_fix: 1 + chance_thrash_cluster_full: 0 + chance_thrash_pg_upmap: 0 + chance_thrash_pg_upmap_items: 0 + disable_objectstore_tool_tests: true + chance_force_recovery: 0 +- print: "**** done thrashosds 3-thrash" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/+ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/+ diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/fsx.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/fsx.yaml new file mode 100644 index 000000000..fd4081f23 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/fsx.yaml @@ -0,0 +1,8 @@ +meta: +- desc: | + run basic fsx tests for rbd +stress-tasks: +- rbd_fsx: + clients: [client.0] + size: 134217728 +- print: "**** done rbd_fsx 4-workload" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/radosbench.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/radosbench.yaml new file mode 100644 index 000000000..c545936c0 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/radosbench.yaml @@ -0,0 +1,52 @@ +meta: +- desc: | + run randomized correctness test for rados operations + generate write load with rados bench +stress-tasks: +- full_sequential: + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 + - radosbench: + clients: [client.0] + time: 90 +- print: "**** done radosbench 4-workload" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-cls.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-cls.yaml new file mode 100644 index 000000000..c0445533d --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-cls.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + run basic cls tests for rbd +stress-tasks: +- workunit: + branch: reef + clients: + client.0: + - cls/test_cls_rbd.sh + env: + CLS_RBD_GTEST_FILTER: '*:-TestClsRbd.mirror_snapshot' +- print: "**** done cls/test_cls_rbd.sh 4-workload" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-import-export.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-import-export.yaml new file mode 100644 index 000000000..a4bea35a4 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd-import-export.yaml @@ -0,0 +1,12 @@ +meta: +- desc: | + run basic import/export cli tests for rbd +stress-tasks: +- workunit: + branch: reef + clients: + client.0: + - rbd/import_export.sh + env: + RBD_CREATE_ARGS: --new-format +- print: "**** done rbd/import_export.sh 4-workload" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd_api.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd_api.yaml new file mode 100644 index 000000000..025616655 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/rbd_api.yaml @@ -0,0 +1,18 @@ +meta: +- desc: | + librbd C and C++ api tests +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(CACHE_POOL_NO_HIT_SET\) + - \(POOL_APP_NOT_ENABLED\) + - is full \(reached quota + - \(POOL_FULL\) +stress-tasks: +- workunit: + branch: reef + clients: + client.0: + - rbd/test_librbd.sh +- print: "**** done rbd/test_librbd.sh 4-workload" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/readwrite.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/readwrite.yaml new file mode 100644 index 000000000..456868998 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/readwrite.yaml @@ -0,0 +1,16 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool, + using only reads, writes, and deletes +stress-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 500 + write_append_excl: false + op_weights: + read: 45 + write: 45 + delete: 10 +- print: "**** done rados/readwrite 4-workload" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/snaps-few-objects.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/snaps-few-objects.yaml new file mode 100644 index 000000000..ae232d867 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/4-workload/snaps-few-objects.yaml @@ -0,0 +1,18 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool with snapshot operations +stress-tasks: +- full_sequential: + - rados: + clients: [client.0] + ops: 4000 + objects: 50 + write_append_excl: false + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 +- print: "**** done rados/snaps-few-objects 4-workload" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/5-finish-upgrade.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/5-finish-upgrade.yaml new file mode 100644 index 000000000..803737c72 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/5-finish-upgrade.yaml @@ -0,0 +1,8 @@ +tasks: +- install.upgrade: + osd.4: + client.0: +- ceph.restart: + daemons: [osd.4, osd.5, osd.6, osd.7] + wait-for-healthy: false + wait-for-osds-up: true diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/+ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/+ new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/+ diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/rbd-python.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/rbd-python.yaml new file mode 100644 index 000000000..78e68dbdb --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/rbd-python.yaml @@ -0,0 +1,10 @@ +meta: +- desc: | + librbd python api tests +tasks: +- workunit: + branch: reef + clients: + client.0: + - rbd/test_librbd_python.sh +- print: "**** done rbd/test_librbd_python.sh 7-workload" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/snaps-many-objects.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/snaps-many-objects.yaml new file mode 100644 index 000000000..805bf97c3 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/6-final-workload/snaps-many-objects.yaml @@ -0,0 +1,16 @@ +meta: +- desc: | + randomized correctness test for rados operations on a replicated pool with snapshot operations +tasks: +- rados: + clients: [client.0] + ops: 4000 + objects: 500 + write_append_excl: false + op_weights: + read: 100 + write: 100 + delete: 50 + snap_create: 50 + snap_remove: 50 + rollback: 50 diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-bitmap.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-bitmap.yaml new file mode 100644 index 000000000..b18e04bee --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-bitmap.yaml @@ -0,0 +1,43 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + bluestore allocator: bitmap + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + bdev enable discard: true + bdev async discard: true + diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-comp.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-comp.yaml new file mode 100644 index 000000000..b408032fd --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-comp.yaml @@ -0,0 +1,23 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore compression mode: aggressive + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-stupid.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-stupid.yaml new file mode 100644 index 000000000..ca811f131 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/objectstore/bluestore-stupid.yaml @@ -0,0 +1,43 @@ +overrides: + thrashosds: + bdev_inject_crash: 2 + bdev_inject_crash_probability: .5 + ceph: + fs: xfs + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + bluestore allocator: stupid + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 +# this doesn't work with failures bc the log writes are not atomic across the two backends +# bluestore bluefs env mirror: true + bdev enable discard: true + bdev async discard: true + ceph-deploy: + fs: xfs + bluestore: yes + conf: + osd: + osd objectstore: bluestore + bluestore block size: 96636764160 + debug bluestore: 20 + debug bluefs: 20 + debug rocksdb: 10 + bluestore fsck on mount: true + # lower the full ratios since we can fill up a 100gb osd so quickly + mon osd full ratio: .9 + mon osd backfillfull_ratio: .85 + mon osd nearfull ratio: .8 + osd failsafe full ratio: .95 + bdev enable discard: true + bdev async discard: true + diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/supported-all-distro/ubuntu_latest.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/supported-all-distro/ubuntu_latest.yaml new file mode 100644 index 000000000..f20398230 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/supported-all-distro/ubuntu_latest.yaml @@ -0,0 +1,2 @@ +os_type: ubuntu +os_version: "20.04" diff --git a/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/thrashosds-health.yaml b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/thrashosds-health.yaml new file mode 100644 index 000000000..9903fa578 --- /dev/null +++ b/qa/suites/upgrade/reef-p2p/reef-p2p-stress-split/thrashosds-health.yaml @@ -0,0 +1,15 @@ +overrides: + ceph: + log-ignorelist: + - overall HEALTH_ + - \(OSDMAP_FLAGS\) + - \(OSD_ + - \(PG_ + - \(POOL_ + - \(CACHE_POOL_ + - \(SMALLER_PGP_NUM\) + - \(OBJECT_ + - \(SLOW_OPS\) + - \(REQUEST_SLOW\) + - \(TOO_FEW_PGS\) + - slow request diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index 516c409e8..e24965026 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -234,6 +234,7 @@ class OSDThrasher(Thrasher): self.chance_thrash_pg_upmap_items = self.config.get('chance_thrash_pg_upmap', 1.0) self.random_eio = self.config.get('random_eio') self.chance_force_recovery = self.config.get('chance_force_recovery', 0.3) + self.chance_reset_purged_snaps_last = self.config.get('chance_reset_purged_snaps_last', 0.3) num_osds = self.in_osds + self.out_osds self.max_pgs = self.config.get("max_pgs_per_pool_osd", 1200) * len(num_osds) @@ -779,6 +780,19 @@ class OSDThrasher(Thrasher): else: self.cancel_force_recovery() + def reset_purged_snaps_last(self): + """ + Run reset_purged_snaps_last + """ + self.log('reset_purged_snaps_last') + for osd in self.in_osds: + try: + self.ceph_manager.raw_cluster_cmd( + 'tell', "osd.%s" % (str(osd)), + 'reset_purged_snaps_last') + except CommandFailedError: + self.log('Failed to reset_purged_snaps_last, ignoring') + def all_up(self): """ Make sure all osds are up and not out. @@ -1229,6 +1243,8 @@ class OSDThrasher(Thrasher): actions.append((self.thrash_pg_upmap_items, self.chance_thrash_pg_upmap_items,)) if self.chance_force_recovery > 0: actions.append((self.force_cancel_recovery, self.chance_force_recovery)) + if self.chance_reset_purged_snaps_last > 0: + actions.append((self.reset_purged_snaps_last, self.chance_reset_purged_snaps_last)) for key in ['heartbeat_inject_failure', 'filestore_inject_stall']: for scenario in [ @@ -1524,11 +1540,9 @@ class CephManager: self.cephadm = cephadm self.testdir = teuthology.get_testdir(self.ctx) # prefix args for ceph cmds to be executed - pre = ['adjust-ulimits', 'ceph-coverage', - f'{self.testdir}/archive/coverage'] - self.CEPH_CMD = ['sudo'] + pre + ['timeout', '120', 'ceph', - '--cluster', self.cluster] - self.RADOS_CMD = pre + ['rados', '--cluster', self.cluster] + self.pre = ['adjust-ulimits', 'ceph-coverage', + f'{self.testdir}/archive/coverage'] + self.RADOS_CMD = self.pre + ['rados', '--cluster', self.cluster] self.run_ceph_w_prefix = ['sudo', 'daemon-helper', 'kill', 'ceph', '--cluster', self.cluster] @@ -1541,6 +1555,11 @@ class CephManager: except CommandFailedError: self.log('Failed to get pg_num from pool %s, ignoring' % pool) + def get_ceph_cmd(self, **kwargs): + timeout = kwargs.pop('timeout', 120) + return ['sudo'] + self.pre + ['timeout', f'{timeout}', 'ceph', + '--cluster', self.cluster] + def ceph(self, cmd, **kwargs): """ Simple Ceph admin command wrapper around run_cluster_cmd. @@ -1584,7 +1603,7 @@ class CephManager: stdout=StringIO(), check_status=kwargs.get('check_status', True)) else: - kwargs['args'] = prefixcmd + self.CEPH_CMD + kwargs['args'] + kwargs['args'] = prefixcmd + self.get_ceph_cmd(**kwargs) + kwargs['args'] return self.controller.run(**kwargs) def raw_cluster_cmd(self, *args, **kwargs) -> str: @@ -3152,11 +3171,14 @@ class CephManager: raise self.log("quorum is size %d" % size) - def get_mon_health(self, debug=False): + def get_mon_health(self, debug=False, detail=False): """ Extract all the monitor health information. """ - out = self.raw_cluster_cmd('health', '--format=json') + if detail: + out = self.raw_cluster_cmd('health', 'detail', '--format=json') + else: + out = self.raw_cluster_cmd('health', '--format=json') if debug: self.log('health:\n{h}'.format(h=out)) return json.loads(out) diff --git a/qa/tasks/ceph_test_case.py b/qa/tasks/ceph_test_case.py index 3f8a152d7..649c0e53c 100644 --- a/qa/tasks/ceph_test_case.py +++ b/qa/tasks/ceph_test_case.py @@ -2,6 +2,7 @@ from typing import Optional, TYPE_CHECKING import unittest import time import logging +from io import StringIO from teuthology.exceptions import CommandFailedError @@ -13,7 +14,106 @@ log = logging.getLogger(__name__) class TestTimeoutError(RuntimeError): pass -class CephTestCase(unittest.TestCase): + +class RunCephCmd: + + def run_ceph_cmd(self, *args, **kwargs): + """ + *args and **kwargs must contain arguments that are accepted by + vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run() + methods. + """ + if kwargs.get('args') is None and args: + if len(args) == 1: + args = args[0] + kwargs['args'] = args + return self.mon_manager.run_cluster_cmd(**kwargs) + + def get_ceph_cmd_result(self, *args, **kwargs): + """ + *args and **kwargs must contain arguments that are accepted by + vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run() + methods. + """ + if kwargs.get('args') is None and args: + if len(args) == 1: + args = args[0] + kwargs['args'] = args + return self.run_ceph_cmd(**kwargs).exitstatus + + def get_ceph_cmd_stdout(self, *args, **kwargs): + """ + *args and **kwargs must contain arguments that are accepted by + vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run() + methods. + """ + if kwargs.get('args') is None and args: + if len(args) == 1: + args = args[0] + kwargs['args'] = args + kwargs['stdout'] = kwargs.pop('stdout', StringIO()) + return self.run_ceph_cmd(**kwargs).stdout.getvalue() + + def assert_retval(self, proc_retval, exp_retval): + msg = (f'expected return value: {exp_retval}\n' + f'received return value: {proc_retval}\n') + assert proc_retval == exp_retval, msg + + def _verify(self, proc, exp_retval=None, exp_errmsgs=None): + if exp_retval is None and exp_errmsgs is None: + raise RuntimeError('Method didn\'t get enough parameters. Pass ' + 'return value or error message expected from ' + 'the command/process.') + + if exp_retval is not None: + self.assert_retval(proc.returncode, exp_retval) + if exp_errmsgs is None: + return + + if isinstance(exp_errmsgs, str): + exp_errmsgs = (exp_errmsgs, ) + exp_errmsgs = tuple([e.lower() for e in exp_errmsgs]) + + proc_stderr = proc.stderr.getvalue().lower() + msg = ('didn\'t find any of the expected string in stderr.\n' + f'expected string: {exp_errmsgs}\n' + f'received error message: {proc_stderr}\n' + 'note: received error message is converted to lowercase') + for e in exp_errmsgs: + if e in proc_stderr: + break + # this else is meant for the for loop above. + else: + assert False, msg + + def negtest_ceph_cmd(self, args, retval=None, errmsgs=None, **kwargs): + """ + Conduct a negative test for the given Ceph command. + + retval and errmsgs are parameters to confirm the cause of command + failure. + + *args and **kwargs must contain arguments that are accepted by + vstart_runner.LocalRemote._do_run() or teuhology.orchestra.run.run() + methods. + + NOTE: errmsgs is expected to be a tuple, but in case there's only one + error message, it can also be a string. This method will add the string + to a tuple internally. + """ + kwargs['args'] = args + # execution is needed to not halt on command failure because we are + # conducting negative testing + kwargs['check_status'] = False + # stderr is needed to check for expected error messages. + kwargs['stderr'] = StringIO() + + proc = self.run_ceph_cmd(**kwargs) + self._verify(proc, retval, errmsgs) + return proc + + +class CephTestCase(unittest.TestCase, RunCephCmd): """ For test tasks that want to define a structured set of tests implemented in python. Subclass this with appropriate @@ -36,9 +136,23 @@ class CephTestCase(unittest.TestCase): # their special needs. If not met, tests will be skipped. REQUIRE_MEMSTORE = False + def _init_mon_manager(self): + # if vstart_runner.py has invoked this code + if 'Local' in str(type(self.ceph_cluster)): + from tasks.vstart_runner import LocalCephManager + self.mon_manager = LocalCephManager(ctx=self.ctx) + # else teuthology has invoked this code + else: + from tasks.ceph_manager import CephManager + self.mon_manager = CephManager(self.ceph_cluster.admin_remote, + ctx=self.ctx, logger=log.getChild('ceph_manager')) + def setUp(self): self._mon_configs_set = set() + self._init_mon_manager() + self.admin_remote = self.ceph_cluster.admin_remote + self.ceph_cluster.mon_manager.raw_cluster_cmd("log", "Starting test {0}".format(self.id())) @@ -148,12 +262,14 @@ class CephTestCase(unittest.TestCase): return ContextManager() - def wait_for_health(self, pattern, timeout): + def wait_for_health(self, pattern, timeout, check_in_detail=None): """ Wait until 'ceph health' contains messages matching the pattern + Also check if @check_in_detail matches detailed health messages + only when @pattern is a code string. """ def seen_health_warning(): - health = self.ceph_cluster.mon_manager.get_mon_health() + health = self.ceph_cluster.mon_manager.get_mon_health(debug=False, detail=bool(check_in_detail)) codes = [s for s in health['checks']] summary_strings = [s[1]['summary']['message'] for s in health['checks'].items()] if len(summary_strings) == 0: @@ -164,7 +280,16 @@ class CephTestCase(unittest.TestCase): if pattern in ss: return True if pattern in codes: - return True + if not check_in_detail: + return True + # check if the string is in detail list if asked + detail_strings = [ss['message'] for ss in \ + [s for s in health['checks'][pattern]['detail']]] + log.debug(f'detail_strings: {detail_strings}') + for ds in detail_strings: + if check_in_detail in ds: + return True + log.debug(f'detail string "{check_in_detail}" not found') log.debug("Not found expected summary strings yet ({0})".format(summary_strings)) return False diff --git a/qa/tasks/cephfs/caps_helper.py b/qa/tasks/cephfs/caps_helper.py index ac9bc4401..1ead57b71 100644 --- a/qa/tasks/cephfs/caps_helper.py +++ b/qa/tasks/cephfs/caps_helper.py @@ -160,11 +160,11 @@ class CapTester(CephFSTestCase): else: raise RuntimeError(f'perm = {perm}\nIt should be "r" or "rw".') - def conduct_pos_test_for_read_caps(self): + def conduct_pos_test_for_read_caps(self, sudo_read=False): for mount, path, data in self.test_set: log.info(f'test read perm: read file {path} and expect data ' f'"{data}"') - contents = mount.read_file(path) + contents = mount.read_file(path, sudo_read) self.assertEqual(data, contents) log.info(f'read perm was tested successfully: "{data}" was ' f'successfully read from path {path}') @@ -193,3 +193,32 @@ class CapTester(CephFSTestCase): cmdargs.pop(-1) log.info('absence of write perm was tested successfully: ' f'failed to be write data to file {path}.') + + def _conduct_neg_test_for_root_squash_caps(self, _cmdargs, sudo_write=False): + possible_errmsgs = ('permission denied', 'operation not permitted') + cmdargs = ['sudo'] if sudo_write else [''] + cmdargs += _cmdargs + + for mount, path, data in self.test_set: + log.info(f'test absence of {_cmdargs[0]} perm: expect failure {path}.') + + # open the file and hold it. The MDS will issue CEPH_CAP_EXCL_* + # to mount + proc = mount.open_background(path) + cmdargs.append(path) + mount.negtestcmd(args=cmdargs, retval=1, errmsgs=possible_errmsgs) + cmdargs.pop(-1) + mount._kill_background(proc) + log.info(f'absence of {_cmdargs[0]} perm was tested successfully') + + def conduct_neg_test_for_chown_caps(self, sudo_write=True): + # flip ownership to nobody. assumption: nobody's id is 65534 + cmdargs = ['chown', '-h', '65534:65534'] + self._conduct_neg_test_for_root_squash_caps(cmdargs, sudo_write) + + def conduct_neg_test_for_truncate_caps(self, sudo_write=True): + cmdargs = ['truncate', '-s', '10GB'] + self._conduct_neg_test_for_root_squash_caps(cmdargs, sudo_write) + + def conduct_pos_test_for_open_caps(self, sudo_read=True): + self.conduct_pos_test_for_read_caps(sudo_read) diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py index d2688929c..f26b598aa 100644 --- a/qa/tasks/cephfs/cephfs_test_case.py +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -3,8 +3,6 @@ import logging import os import re -from shlex import split as shlex_split - from tasks.ceph_test_case import CephTestCase from teuthology import contextutil @@ -96,22 +94,22 @@ class CephFSTestCase(CephTestCase): # In case anything is in the OSD blocklist list, clear it out. This is to avoid # the OSD map changing in the background (due to blocklist expiry) while tests run. try: - self.mds_cluster.mon_manager.run_cluster_cmd(args="osd blocklist clear") + self.run_ceph_cmd("osd blocklist clear") except CommandFailedError: # Fallback for older Ceph cluster try: - blocklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd", - "dump", "--format=json-pretty"))['blocklist'] + blocklist = json.loads(self.get_ceph_cmd_stdout("osd", + "dump", "--format=json-pretty"))['blocklist'] log.info(f"Removing {len(blocklist)} blocklist entries") for addr, blocklisted_at in blocklist.items(): - self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blocklist", "rm", addr) + self.run_ceph_cmd("osd", "blocklist", "rm", addr) except KeyError: # Fallback for more older Ceph clusters, who will use 'blacklist' instead. - blacklist = json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd("osd", - "dump", "--format=json-pretty"))['blacklist'] + blacklist = json.loads(self.get_ceph_cmd_stdout("osd", + "dump", "--format=json-pretty"))['blacklist'] log.info(f"Removing {len(blacklist)} blacklist entries") for addr, blocklisted_at in blacklist.items(): - self.mds_cluster.mon_manager.raw_cluster_cmd("osd", "blacklist", "rm", addr) + self.run_ceph_cmd("osd", "blacklist", "rm", addr) def setUp(self): super(CephFSTestCase, self).setUp() @@ -160,7 +158,7 @@ class CephFSTestCase(CephTestCase): for entry in self.auth_list(): ent_type, ent_id = entry['entity'].split(".") if ent_type == "client" and ent_id not in client_mount_ids and not (ent_id == "admin" or ent_id[:6] == 'mirror'): - self.mds_cluster.mon_manager.raw_cluster_cmd("auth", "del", entry['entity']) + self.run_ceph_cmd("auth", "del", entry['entity']) if self.REQUIRE_FILESYSTEM: self.fs = self.mds_cluster.newfs(create=True) @@ -171,11 +169,11 @@ class CephFSTestCase(CephTestCase): 'osd', f'allow rw tag cephfs data={self.fs.name}', 'mds', 'allow'] - if self.run_cluster_cmd_result(cmd) == 0: + if self.get_ceph_cmd_result(*cmd) == 0: break cmd[1] = 'add' - if self.run_cluster_cmd_result(cmd) != 0: + if self.get_ceph_cmd_result(*cmd) != 0: raise RuntimeError(f'Failed to create new client {cmd[2]}') # wait for ranks to become active @@ -188,9 +186,8 @@ class CephFSTestCase(CephTestCase): if self.REQUIRE_BACKUP_FILESYSTEM: if not self.REQUIRE_FILESYSTEM: self.skipTest("backup filesystem requires a primary filesystem as well") - self.fs.mon_manager.raw_cluster_cmd('fs', 'flag', 'set', - 'enable_multiple', 'true', - '--yes-i-really-mean-it') + self.run_ceph_cmd('fs', 'flag', 'set', 'enable_multiple', 'true', + '--yes-i-really-mean-it') self.backup_fs = self.mds_cluster.newfs(name="backup_fs") self.backup_fs.wait_for_daemons() @@ -226,9 +223,8 @@ class CephFSTestCase(CephTestCase): """ Convenience wrapper on "ceph auth ls" """ - return json.loads(self.mds_cluster.mon_manager.raw_cluster_cmd( - "auth", "ls", "--format=json-pretty" - ))['auth_dump'] + return json.loads(self.get_ceph_cmd_stdout("auth", "ls", + "--format=json-pretty"))['auth_dump'] def assert_session_count(self, expected, ls_data=None, mds_id=None): if ls_data is None: @@ -411,16 +407,6 @@ class CephFSTestCase(CephTestCase): except contextutil.MaxWhileTries as e: raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e - def run_cluster_cmd(self, cmd): - if isinstance(cmd, str): - cmd = shlex_split(cmd) - return self.fs.mon_manager.raw_cluster_cmd(*cmd) - - def run_cluster_cmd_result(self, cmd): - if isinstance(cmd, str): - cmd = shlex_split(cmd) - return self.fs.mon_manager.raw_cluster_cmd_result(*cmd) - def create_client(self, client_id, moncap=None, osdcap=None, mdscap=None): if not (moncap or osdcap or mdscap): if self.fs: @@ -438,5 +424,5 @@ class CephFSTestCase(CephTestCase): if mdscap: cmd += ['mds', mdscap] - self.run_cluster_cmd(cmd) - return self.run_cluster_cmd(f'auth get {self.client_name}') + self.run_ceph_cmd(*cmd) + return self.run_ceph_cmd(f'auth get {self.client_name}') diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index 777ba8249..dc314efa8 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -17,8 +17,10 @@ from teuthology import misc from teuthology.nuke import clear_firewall from teuthology.parallel import parallel from teuthology import contextutil + from tasks.ceph_manager import write_conf -from tasks import ceph_manager +from tasks.ceph_manager import CephManager +from tasks.ceph_test_case import RunCephCmd log = logging.getLogger(__name__) @@ -66,16 +68,16 @@ class FSMissing(Exception): def __str__(self): return f"File system {self.ident} does not exist in the map" -class FSStatus(object): +class FSStatus(RunCephCmd): """ Operations on a snapshot of the FSMap. """ def __init__(self, mon_manager, epoch=None): - self.mon = mon_manager + self.mon_manager = mon_manager cmd = ["fs", "dump", "--format=json"] if epoch is not None: cmd.append(str(epoch)) - self.map = json.loads(self.mon.raw_cluster_cmd(*cmd)) + self.map = json.loads(self.get_ceph_cmd_stdout(*cmd)) def __str__(self): return json.dumps(self.map, indent = 2, sort_keys = True) @@ -216,7 +218,7 @@ class FSStatus(object): #all matching return False -class CephCluster(object): +class CephCluster(RunCephCmd): @property def admin_remote(self): first_mon = misc.get_first_mon(self._ctx, None) @@ -225,7 +227,8 @@ class CephCluster(object): def __init__(self, ctx) -> None: self._ctx = ctx - self.mon_manager = ceph_manager.CephManager(self.admin_remote, ctx=ctx, logger=log.getChild('ceph_manager')) + self.mon_manager = CephManager(self.admin_remote, ctx=ctx, + logger=log.getChild('ceph_manager')) def get_config(self, key, service_type=None): """ @@ -261,8 +264,14 @@ class CephCluster(object): "-Infinity": -float("inf")} return c[value] - j = json.loads(response_data.replace('inf', 'Infinity'), - parse_constant=get_nonnumeric_values) + + j = {} + try: + j = json.loads(response_data.replace('inf', 'Infinity'), + parse_constant=get_nonnumeric_values) + except json.decoder.JSONDecodeError: + raise RuntimeError(response_data) # assume it is an error message, pass it up + pretty = json.dumps(j, sort_keys=True, indent=2) log.debug(f"_json_asok output\n{pretty}") return j @@ -271,7 +280,7 @@ class CephCluster(object): return None def is_addr_blocklisted(self, addr): - blocklist = json.loads(self.mon_manager.raw_cluster_cmd( + blocklist = json.loads(self.get_ceph_cmd_stdout( "osd", "dump", "--format=json"))['blocklist'] if addr in blocklist: return True @@ -350,7 +359,7 @@ class MDSCluster(CephCluster): Inform MDSMonitor of the death of the daemon process(es). If it held a rank, that rank will be relinquished. """ - self._one_or_all(mds_id, lambda id_: self.mon_manager.raw_cluster_cmd("mds", "fail", id_)) + self._one_or_all(mds_id, lambda id_: self.get_ceph_cmd_stdout("mds", "fail", id_)) def mds_restart(self, mds_id=None): self._one_or_all(mds_id, lambda id_: self.mds_daemons[id_].restart()) @@ -364,7 +373,7 @@ class MDSCluster(CephCluster): """ def _fail_restart(id_): self.mds_daemons[id_].stop() - self.mon_manager.raw_cluster_cmd("mds", "fail", id_) + self.run_ceph_cmd("mds", "fail", id_) self.mds_daemons[id_].restart() self._one_or_all(mds_id, _fail_restart) @@ -468,7 +477,7 @@ class MDSCluster(CephCluster): return FSStatus(self.mon_manager).get_mds(mds_id) def is_pool_full(self, pool_name): - pools = json.loads(self.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['pools'] + pools = json.loads(self.get_ceph_cmd_stdout("osd", "dump", "--format=json-pretty"))['pools'] for pool in pools: if pool['pool_name'] == pool_name: return 'full' in pool['flags_names'].split(",") @@ -575,21 +584,21 @@ class Filesystem(MDSCluster): assert(mds_map['in'] == list(range(0, mds_map['max_mds']))) def reset(self): - self.mon_manager.raw_cluster_cmd("fs", "reset", str(self.name), '--yes-i-really-mean-it') + self.run_ceph_cmd("fs", "reset", str(self.name), '--yes-i-really-mean-it') def fail(self): - self.mon_manager.raw_cluster_cmd("fs", "fail", str(self.name)) + self.run_ceph_cmd("fs", "fail", str(self.name)) def set_flag(self, var, *args): a = map(lambda x: str(x).lower(), args) - self.mon_manager.raw_cluster_cmd("fs", "flag", "set", var, *a) + self.run_ceph_cmd("fs", "flag", "set", var, *a) def set_allow_multifs(self, yes=True): self.set_flag("enable_multiple", yes) def set_var(self, var, *args): a = map(lambda x: str(x).lower(), args) - self.mon_manager.raw_cluster_cmd("fs", "set", self.name, var, *a) + self.run_ceph_cmd("fs", "set", self.name, var, *a) def set_down(self, down=True): self.set_var("down", str(down).lower()) @@ -615,9 +624,12 @@ class Filesystem(MDSCluster): def set_refuse_client_session(self, yes): self.set_var("refuse_client_session", yes) + def set_refuse_standby_for_another_fs(self, yes): + self.set_var("refuse_standby_for_another_fs", yes) + def compat(self, *args): a = map(lambda x: str(x).lower(), args) - self.mon_manager.raw_cluster_cmd("fs", "compat", self.name, *a) + self.run_ceph_cmd("fs", "compat", self.name, *a) def add_compat(self, *args): self.compat("add_compat", *args) @@ -633,7 +645,7 @@ class Filesystem(MDSCluster): def required_client_features(self, *args, **kwargs): c = ["fs", "required_client_features", self.name, *args] - return self.mon_manager.run_cluster_cmd(args=c, **kwargs) + return self.run_ceph_cmd(args=c, **kwargs) # Since v15.1.0 the pg autoscale mode has been enabled as default, # will let the pg autoscale mode to calculate the pg_num as needed. @@ -662,24 +674,23 @@ class Filesystem(MDSCluster): log.debug("Creating filesystem '{0}'".format(self.name)) try: - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - self.metadata_pool_name, - '--pg_num_min', str(self.pg_num_min)) - - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - data_pool_name, str(self.pg_num), - '--pg_num_min', str(self.pg_num_min), - '--target_size_ratio', - str(self.target_size_ratio)) + self.run_ceph_cmd('osd', 'pool', 'create',self.metadata_pool_name, + '--pg_num_min', str(self.pg_num_min)) + + self.run_ceph_cmd('osd', 'pool', 'create', data_pool_name, + str(self.pg_num), + '--pg_num_min', str(self.pg_num_min), + '--target_size_ratio', + str(self.target_size_ratio)) except CommandFailedError as e: if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - self.metadata_pool_name, - str(self.pg_num_min)) + self.run_ceph_cmd('osd', 'pool', 'create', + self.metadata_pool_name, + str(self.pg_num_min)) - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - data_pool_name, str(self.pg_num), - str(self.pg_num_min)) + self.run_ceph_cmd('osd', 'pool', 'create', + data_pool_name, str(self.pg_num), + str(self.pg_num_min)) else: raise @@ -688,7 +699,7 @@ class Filesystem(MDSCluster): args.append('--recover') if metadata_overlay: args.append('--allow-dangerous-metadata-overlay') - self.mon_manager.raw_cluster_cmd(*args) + self.run_ceph_cmd(*args) if not recover: if self.ec_profile and 'disabled' not in self.ec_profile: @@ -696,23 +707,22 @@ class Filesystem(MDSCluster): log.debug("EC profile is %s", self.ec_profile) cmd = ['osd', 'erasure-code-profile', 'set', ec_data_pool_name] cmd.extend(self.ec_profile) - self.mon_manager.raw_cluster_cmd(*cmd) + self.run_ceph_cmd(*cmd) try: - self.mon_manager.raw_cluster_cmd( + self.run_ceph_cmd( 'osd', 'pool', 'create', ec_data_pool_name, 'erasure', ec_data_pool_name, '--pg_num_min', str(self.pg_num_min), '--target_size_ratio', str(self.target_size_ratio_ec)) except CommandFailedError as e: if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option - self.mon_manager.raw_cluster_cmd( + self.run_ceph_cmd( 'osd', 'pool', 'create', ec_data_pool_name, str(self.pg_num_min), 'erasure', ec_data_pool_name) else: raise - self.mon_manager.raw_cluster_cmd( - 'osd', 'pool', 'set', - ec_data_pool_name, 'allow_ec_overwrites', 'true') + self.run_ceph_cmd('osd', 'pool', 'set', ec_data_pool_name, + 'allow_ec_overwrites', 'true') self.add_data_pool(ec_data_pool_name, create=False) self.check_pool_application(ec_data_pool_name) @@ -723,7 +733,8 @@ class Filesystem(MDSCluster): # Turn off spurious standby count warnings from modifying max_mds in tests. try: - self.mon_manager.raw_cluster_cmd('fs', 'set', self.name, 'standby_count_wanted', '0') + self.run_ceph_cmd('fs', 'set', self.name, 'standby_count_wanted', + '0') except CommandFailedError as e: if e.exitstatus == 22: # standby_count_wanted not available prior to luminous (upgrade tests would fail otherwise) @@ -756,17 +767,29 @@ class Filesystem(MDSCluster): assert(isinstance(subvols['create'], int)) assert(subvols['create'] > 0) + self.mon_manager.raw_cluster_cmd('fs', 'subvolumegroup', 'create', self.name, 'qa') + subvol_options = self.fs_config.get('subvol_options', '') + for sv in range(0, subvols['create']): sv_name = f'sv_{sv}' - self.mon_manager.raw_cluster_cmd( - 'fs', 'subvolume', 'create', self.name, sv_name, - self.fs_config.get('subvol_options', '')) + cmd = [ + 'fs', + 'subvolume', + 'create', + self.name, + sv_name, + '--group_name', 'qa', + ] + if subvol_options: + cmd.append(subvol_options) + self.run_ceph_cmd(cmd) if self.name not in self._ctx.created_subvols: self._ctx.created_subvols[self.name] = [] - subvol_path = self.mon_manager.raw_cluster_cmd( - 'fs', 'subvolume', 'getpath', self.name, sv_name) + subvol_path = self.get_ceph_cmd_stdout( + 'fs', 'subvolume', 'getpath', self.name, + '--group_name', 'qa', sv_name) subvol_path = subvol_path.strip() self._ctx.created_subvols[self.name].append(subvol_path) else: @@ -858,7 +881,7 @@ class Filesystem(MDSCluster): """ Whether a filesystem exists in the mon's filesystem list """ - fs_list = json.loads(self.mon_manager.raw_cluster_cmd('fs', 'ls', '--format=json-pretty')) + fs_list = json.loads(self.get_ceph_cmd_stdout('fs', 'ls', '--format=json-pretty')) return self.name in [fs['name'] for fs in fs_list] def legacy_configured(self): @@ -867,7 +890,7 @@ class Filesystem(MDSCluster): the case, the caller should avoid using Filesystem.create """ try: - out_text = self.mon_manager.raw_cluster_cmd('--format=json-pretty', 'osd', 'lspools') + out_text = self.get_ceph_cmd_stdout('--format=json-pretty', 'osd', 'lspools') pools = json.loads(out_text) metadata_pool_exists = 'metadata' in [p['poolname'] for p in pools] if metadata_pool_exists: @@ -883,7 +906,7 @@ class Filesystem(MDSCluster): return metadata_pool_exists def _df(self): - return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")) + return json.loads(self.get_ceph_cmd_stdout("df", "--format=json-pretty")) # may raise FSMissing def get_mds_map(self, status=None): @@ -901,15 +924,15 @@ class Filesystem(MDSCluster): def add_data_pool(self, name, create=True): if create: try: - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, - '--pg_num_min', str(self.pg_num_min)) + self.run_ceph_cmd('osd', 'pool', 'create', name, + '--pg_num_min', str(self.pg_num_min)) except CommandFailedError as e: if e.exitstatus == 22: # nautilus couldn't specify --pg_num_min option - self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, - str(self.pg_num_min)) + self.run_ceph_cmd('osd', 'pool', 'create', name, + str(self.pg_num_min)) else: raise - self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name) + self.run_ceph_cmd('fs', 'add_data_pool', self.name, name) self.get_pool_names(refresh = True) for poolid, fs_name in self.data_pools.items(): if name == fs_name: @@ -962,9 +985,9 @@ class Filesystem(MDSCluster): self.data_pool_name = name def get_pool_pg_num(self, pool_name): - pgs = json.loads(self.mon_manager.raw_cluster_cmd('osd', 'pool', 'get', - pool_name, 'pg_num', - '--format=json-pretty')) + pgs = json.loads(self.get_ceph_cmd_stdout('osd', 'pool', 'get', + pool_name, 'pg_num', + '--format=json-pretty')) return int(pgs['pg_num']) def get_namespace_id(self): @@ -1095,13 +1118,13 @@ class Filesystem(MDSCluster): self.mds_signal(name, signal) def rank_freeze(self, yes, rank=0): - self.mon_manager.raw_cluster_cmd("mds", "freeze", "{}:{}".format(self.id, rank), str(yes).lower()) + self.run_ceph_cmd("mds", "freeze", "{}:{}".format(self.id, rank), str(yes).lower()) def rank_repaired(self, rank): - self.mon_manager.raw_cluster_cmd("mds", "repaired", "{}:{}".format(self.id, rank)) + self.run_ceph_cmd("mds", "repaired", "{}:{}".format(self.id, rank)) def rank_fail(self, rank=0): - self.mon_manager.raw_cluster_cmd("mds", "fail", "{}:{}".format(self.id, rank)) + self.run_ceph_cmd("mds", "fail", "{}:{}".format(self.id, rank)) def rank_is_running(self, rank=0, status=None): name = self.get_rank(rank=rank, status=status)['name'] @@ -1240,15 +1263,15 @@ class Filesystem(MDSCluster): if mds_id is None: return self.rank_tell(command) - return json.loads(self.mon_manager.raw_cluster_cmd("tell", f"mds.{mds_id}", *command)) + return json.loads(self.get_ceph_cmd_stdout("tell", f"mds.{mds_id}", *command)) def rank_asok(self, command, rank=0, status=None, timeout=None): info = self.get_rank(rank=rank, status=status) return self.json_asok(command, 'mds', info['name'], timeout=timeout) - def rank_tell(self, command, rank=0, status=None): + def rank_tell(self, command, rank=0, status=None, timeout=120): try: - out = self.mon_manager.raw_cluster_cmd("tell", f"mds.{self.id}:{rank}", *command) + out = self.get_ceph_cmd_stdout("tell", f"mds.{self.id}:{rank}", *command) return json.loads(out) except json.decoder.JSONDecodeError: log.error("could not decode: {}".format(out)) @@ -1648,8 +1671,8 @@ class Filesystem(MDSCluster): caps = tuple(x) client_name = 'client.' + client_id - return self.mon_manager.raw_cluster_cmd('fs', 'authorize', self.name, - client_name, *caps) + return self.get_ceph_cmd_stdout('fs', 'authorize', self.name, + client_name, *caps) def grow(self, new_max_mds, status=None): oldmax = self.get_var('max_mds', status=status) @@ -1663,11 +1686,11 @@ class Filesystem(MDSCluster): self.set_max_mds(new_max_mds) return self.wait_for_daemons() - def run_scrub(self, cmd, rank=0): - return self.rank_tell(["scrub"] + cmd, rank) + def run_scrub(self, cmd, rank=0, timeout=300): + return self.rank_tell(["scrub"] + cmd, rank=rank, timeout=timeout) def get_scrub_status(self, rank=0): - return self.run_scrub(["status"], rank) + return self.run_scrub(["status"], rank=rank, timeout=300) def flush(self, rank=0): return self.rank_tell(["flush", "journal"], rank=rank) @@ -1679,7 +1702,7 @@ class Filesystem(MDSCluster): result = "no active scrubs running" with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed: while proceed(): - out_json = self.rank_tell(["scrub", "status"], rank=rank) + out_json = self.rank_tell(["scrub", "status"], rank=rank, timeout=timeout) assert out_json is not None if not reverse: if result in out_json['status']: diff --git a/qa/tasks/cephfs/kernel_mount.py b/qa/tasks/cephfs/kernel_mount.py index 89f6b6639..c59f661a3 100644 --- a/qa/tasks/cephfs/kernel_mount.py +++ b/qa/tasks/cephfs/kernel_mount.py @@ -260,9 +260,10 @@ class KernelMount(CephFSMount): import json def get_id_to_dir(): - result = {} + meta_dir = "{meta_dir}" + result = dict() for dir in glob.glob("/sys/kernel/debug/ceph/*"): - if os.path.basename(dir) == DEBUGFS_META_DIR: + if os.path.basename(dir) == meta_dir: continue mds_sessions_lines = open(os.path.join(dir, "mds_sessions")).readlines() global_id = mds_sessions_lines[0].split()[1].strip('"') @@ -270,7 +271,7 @@ class KernelMount(CephFSMount): result[client_id] = global_id return result print(json.dumps(get_id_to_dir())) - """) + """.format(meta_dir=DEBUGFS_META_DIR)) output = self.client_remote.sh([ 'sudo', 'python3', '-c', pyscript @@ -342,7 +343,7 @@ echo '{fdata}' | sudo tee /sys/kernel/debug/dynamic_debug/control if self.inst is not None: return self.inst - client_gid = "client%d" % self.get_global_id() + client_gid = "client%d" % int(self.get_global_id()) self.inst = " ".join([client_gid, self._global_addr]) return self.inst diff --git a/qa/tasks/cephfs/mount.py b/qa/tasks/cephfs/mount.py index 4a8187406..bd92cadaa 100644 --- a/qa/tasks/cephfs/mount.py +++ b/qa/tasks/cephfs/mount.py @@ -195,10 +195,10 @@ class CephFSMount(object): self.fs = Filesystem(self.ctx, name=self.cephfs_name) try: - output = self.fs.mon_manager.raw_cluster_cmd(args='osd blocklist ls') + output = self.fs.get_ceph_cmd_stdout('osd blocklist ls') except CommandFailedError: # Fallback for older Ceph cluster - output = self.fs.mon_manager.raw_cluster_cmd(args='osd blacklist ls') + output = self.fs.get_ceph_cmd_stdout('osd blacklist ls') return self.addr in output @@ -740,15 +740,19 @@ class CephFSMount(object): if perms: self.run_shell(args=f'chmod {perms} {path}') - def read_file(self, path): + def read_file(self, path, sudo=False): """ Return the data from the file on given path. """ if path.find(self.hostfs_mntpt) == -1: path = os.path.join(self.hostfs_mntpt, path) - return self.run_shell(args=['cat', path]).\ - stdout.getvalue().strip() + args = [] + if sudo: + args.append('sudo') + args += ['cat', path] + + return self.run_shell(args=args, omit_sudo=False).stdout.getvalue().strip() def create_destroy(self): assert(self.is_mounted()) diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py index 9890381c6..4f3100bbe 100644 --- a/qa/tasks/cephfs/test_admin.py +++ b/qa/tasks/cephfs/test_admin.py @@ -7,6 +7,7 @@ from io import StringIO from os.path import join as os_path_join from teuthology.exceptions import CommandFailedError +from teuthology.contextutil import safe_while from tasks.cephfs.cephfs_test_case import CephFSTestCase, classhook from tasks.cephfs.filesystem import FileLayout, FSMissing @@ -15,6 +16,58 @@ from tasks.cephfs.caps_helper import CapTester log = logging.getLogger(__name__) +class TestLabeledPerfCounters(CephFSTestCase): + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 1 + + def test_per_client_labeled_perf_counters(self): + """ + That the per-client labelled perf counters depict the clients + performaing IO. + """ + def get_counters_for(filesystem, client_id): + dump = self.fs.rank_tell(["counter", "dump"]) + per_client_metrics_key = f'mds_client_metrics-{filesystem}' + counters = [c["counters"] for \ + c in dump[per_client_metrics_key] if c["labels"]["client"] == client_id] + return counters[0] + + # sleep a bit so that we get updated clients... + time.sleep(10) + + # lookout for clients... + dump = self.fs.rank_tell(["counter", "dump"]) + + fs_suffix = dump["mds_client_metrics"][0]["labels"]["fs_name"] + self.assertGreaterEqual(dump["mds_client_metrics"][0]["counters"]["num_clients"], 2) + + per_client_metrics_key = f'mds_client_metrics-{fs_suffix}' + mount_a_id = f'client.{self.mount_a.get_global_id()}' + mount_b_id = f'client.{self.mount_b.get_global_id()}' + + clients = [c["labels"]["client"] for c in dump[per_client_metrics_key]] + self.assertIn(mount_a_id, clients) + self.assertIn(mount_b_id, clients) + + # write workload + self.mount_a.create_n_files("test_dir/test_file", 1000, sync=True) + with safe_while(sleep=1, tries=30, action=f'wait for counters - {mount_a_id}') as proceed: + counters_dump_a = get_counters_for(fs_suffix, mount_a_id) + while proceed(): + if counters_dump_a["total_write_ops"] > 0 and counters_dump_a["total_write_size"] > 0: + return True + + # read from the other client + for i in range(100): + self.mount_b.open_background(basename=f'test_dir/test_file_{i}', write=False) + with safe_while(sleep=1, tries=30, action=f'wait for counters - {mount_b_id}') as proceed: + counters_dump_b = get_counters_for(fs_suffix, mount_b_id) + while proceed(): + if counters_dump_b["total_read_ops"] > 0 and counters_dump_b["total_read_size"] > 0: + return True + + self.fs.teardown() + class TestAdminCommands(CephFSTestCase): """ Tests for administration command. @@ -24,18 +77,18 @@ class TestAdminCommands(CephFSTestCase): MDSS_REQUIRED = 1 def check_pool_application_metadata_key_value(self, pool, app, key, value): - output = self.fs.mon_manager.raw_cluster_cmd( + output = self.get_ceph_cmd_stdout( 'osd', 'pool', 'application', 'get', pool, app, key) self.assertEqual(str(output.strip()), value) def setup_ec_pools(self, n, metadata=True, overwrites=True): if metadata: - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-meta", "8") + self.run_ceph_cmd('osd', 'pool', 'create', n+"-meta", "8") cmd = ['osd', 'erasure-code-profile', 'set', n+"-profile", "m=2", "k=2", "crush-failure-domain=osd"] - self.fs.mon_manager.raw_cluster_cmd(*cmd) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', n+"-data", "8", "erasure", n+"-profile") + self.run_ceph_cmd(cmd) + self.run_ceph_cmd('osd', 'pool', 'create', n+"-data", "8", "erasure", n+"-profile") if overwrites: - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true') + self.run_ceph_cmd('osd', 'pool', 'set', n+"-data", 'allow_ec_overwrites', 'true') @classhook('_add_valid_tell') class TestValidTell(TestAdminCommands): @@ -76,13 +129,13 @@ class TestFsStatus(TestAdminCommands): That `ceph fs status` command functions. """ - s = self.fs.mon_manager.raw_cluster_cmd("fs", "status") + s = self.get_ceph_cmd_stdout("fs", "status") self.assertTrue("active" in s) - mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json-pretty"))["mdsmap"] + mdsmap = json.loads(self.get_ceph_cmd_stdout("fs", "status", "--format=json-pretty"))["mdsmap"] self.assertEqual(mdsmap[0]["state"], "active") - mdsmap = json.loads(self.fs.mon_manager.raw_cluster_cmd("fs", "status", "--format=json"))["mdsmap"] + mdsmap = json.loads(self.get_ceph_cmd_stdout("fs", "status", "--format=json"))["mdsmap"] self.assertEqual(mdsmap[0]["state"], "active") @@ -104,7 +157,7 @@ class TestAddDataPool(TestAdminCommands): That the application metadata set on a newly added data pool is as expected. """ pool_name = "foo" - mon_cmd = self.fs.mon_manager.raw_cluster_cmd + mon_cmd = self.get_ceph_cmd_stdout mon_cmd('osd', 'pool', 'create', pool_name, '--pg_num_min', str(self.fs.pg_num_min)) # Check whether https://tracker.ceph.com/issues/43061 is fixed @@ -148,22 +201,22 @@ class TestAddDataPool(TestAdminCommands): first_fs = "first_fs" first_metadata_pool = "first_metadata_pool" first_data_pool = "first_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool) + self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) # create second data pool, metadata pool and add with filesystem second_fs = "second_fs" second_metadata_pool = "second_metadata_pool" second_data_pool = "second_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', second_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', second_data_pool) + self.run_ceph_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) # try to add 'first_data_pool' with 'second_fs' # Expecting EINVAL exit status because 'first_data_pool' is already in use with 'first_fs' try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', second_fs, first_data_pool) + self.run_ceph_cmd('fs', 'add_data_pool', second_fs, first_data_pool) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) else: @@ -178,23 +231,23 @@ class TestAddDataPool(TestAdminCommands): first_fs = "first_fs" first_metadata_pool = "first_metadata_pool" first_data_pool = "first_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool) + self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) # create second data pool, metadata pool and add with filesystem second_fs = "second_fs" second_metadata_pool = "second_metadata_pool" second_data_pool = "second_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', second_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', second_data_pool) + self.run_ceph_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) # try to add 'second_metadata_pool' with 'first_fs' as a data pool # Expecting EINVAL exit status because 'second_metadata_pool' # is already in use with 'second_fs' as a metadata pool try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', first_fs, second_metadata_pool) + self.run_ceph_cmd('fs', 'add_data_pool', first_fs, second_metadata_pool) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) else: @@ -211,23 +264,21 @@ class TestFsNew(TestAdminCommands): metapoolname, datapoolname = n+'-testmetapool', n+'-testdatapool' badname = n+'badname@#' - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - n+metapoolname) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - n+datapoolname) + self.run_ceph_cmd('osd', 'pool', 'create', n+metapoolname) + self.run_ceph_cmd('osd', 'pool', 'create', n+datapoolname) # test that fsname not with "goodchars" fails args = ['fs', 'new', badname, metapoolname, datapoolname] - proc = self.fs.mon_manager.run_cluster_cmd(args=args,stderr=StringIO(), - check_status=False) + proc = self.run_ceph_cmd(args=args, stderr=StringIO(), + check_status=False) self.assertIn('invalid chars', proc.stderr.getvalue().lower()) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', metapoolname, - metapoolname, - '--yes-i-really-really-mean-it-not-faking') - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', datapoolname, - datapoolname, - '--yes-i-really-really-mean-it-not-faking') + self.run_ceph_cmd('osd', 'pool', 'rm', metapoolname, + metapoolname, + '--yes-i-really-really-mean-it-not-faking') + self.run_ceph_cmd('osd', 'pool', 'rm', datapoolname, + datapoolname, + '--yes-i-really-really-mean-it-not-faking') def test_new_default_ec(self): """ @@ -239,7 +290,7 @@ class TestFsNew(TestAdminCommands): n = "test_new_default_ec" self.setup_ec_pools(n) try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data") + self.run_ceph_cmd('fs', 'new', n, n+"-meta", n+"-data") except CommandFailedError as e: if e.exitstatus == 22: pass @@ -257,7 +308,7 @@ class TestFsNew(TestAdminCommands): self.mds_cluster.delete_all_filesystems() n = "test_new_default_ec_force" self.setup_ec_pools(n) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force") + self.run_ceph_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force") def test_new_default_ec_no_overwrite(self): """ @@ -269,7 +320,7 @@ class TestFsNew(TestAdminCommands): n = "test_new_default_ec_no_overwrite" self.setup_ec_pools(n, overwrites=False) try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data") + self.run_ceph_cmd('fs', 'new', n, n+"-meta", n+"-data") except CommandFailedError as e: if e.exitstatus == 22: pass @@ -279,7 +330,7 @@ class TestFsNew(TestAdminCommands): raise RuntimeError("expected failure") # and even with --force ! try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force") + self.run_ceph_cmd('fs', 'new', n, n+"-meta", n+"-data", "--force") except CommandFailedError as e: if e.exitstatus == 22: pass @@ -297,7 +348,7 @@ class TestFsNew(TestAdminCommands): fs_name = "test_fs_new_pool_application" keys = ['metadata', 'data'] pool_names = [fs_name+'-'+key for key in keys] - mon_cmd = self.fs.mon_manager.raw_cluster_cmd + mon_cmd = self.get_ceph_cmd_stdout for p in pool_names: mon_cmd('osd', 'pool', 'create', p, '--pg_num_min', str(self.fs.pg_num_min)) mon_cmd('osd', 'pool', 'application', 'enable', p, 'cephfs') @@ -315,8 +366,8 @@ class TestFsNew(TestAdminCommands): keys = ['metadata', 'data'] pool_names = [fs_name+'-'+key for key in keys] for p in pool_names: - self.run_cluster_cmd(f'osd pool create {p}') - self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.run_ceph_cmd(f'osd pool create {p}') + self.run_ceph_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') self.fs.status().get_fsmap(fscid) for i in range(2): self.check_pool_application_metadata_key_value(pool_names[i], 'cephfs', keys[i], fs_name) @@ -330,9 +381,9 @@ class TestFsNew(TestAdminCommands): keys = ['metadata', 'data'] pool_names = [fs_name+'-'+key for key in keys] for p in pool_names: - self.run_cluster_cmd(f'osd pool create {p}') - self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') - self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.run_ceph_cmd(f'osd pool create {p}') + self.run_ceph_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.run_ceph_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') self.fs.status().get_fsmap(fscid) def test_fs_new_with_specific_id_fails_without_force_flag(self): @@ -344,9 +395,9 @@ class TestFsNew(TestAdminCommands): keys = ['metadata', 'data'] pool_names = [fs_name+'-'+key for key in keys] for p in pool_names: - self.run_cluster_cmd(f'osd pool create {p}') + self.run_ceph_cmd(f'osd pool create {p}') try: - self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid}') + self.run_ceph_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid}') except CommandFailedError as ce: self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on creating a file system with specifc ID without --force flag") @@ -363,9 +414,9 @@ class TestFsNew(TestAdminCommands): keys = ['metadata', 'data'] pool_names = [fs_name+'-'+key for key in keys] for p in pool_names: - self.run_cluster_cmd(f'osd pool create {p}') + self.run_ceph_cmd(f'osd pool create {p}') try: - self.run_cluster_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') + self.run_ceph_cmd(f'fs new {fs_name} {pool_names[0]} {pool_names[1]} --fscid {fscid} --force') except CommandFailedError as ce: self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on creating a file system with specifc ID that is already in use") @@ -381,13 +432,13 @@ class TestFsNew(TestAdminCommands): first_fs = "first_fs" first_metadata_pool = "first_metadata_pool" first_data_pool = "first_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool) + self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) second_fs = "second_fs" second_data_pool = "second_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', second_data_pool) # try to create new fs 'second_fs' with following configuration # metadata pool -> 'first_metadata_pool' @@ -395,7 +446,7 @@ class TestFsNew(TestAdminCommands): # Expecting EINVAL exit status because 'first_metadata_pool' # is already in use with 'first_fs' try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_metadata_pool, second_data_pool) + self.run_ceph_cmd('fs', 'new', second_fs, first_metadata_pool, second_data_pool) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) else: @@ -410,13 +461,13 @@ class TestFsNew(TestAdminCommands): first_fs = "first_fs" first_metadata_pool = "first_metadata_pool" first_data_pool = "first_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool) + self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) second_fs = "second_fs" second_metadata_pool = "second_metadata_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', second_metadata_pool) # try to create new fs 'second_fs' with following configuration # metadata pool -> 'second_metadata_pool' @@ -424,7 +475,7 @@ class TestFsNew(TestAdminCommands): # Expecting EINVAL exit status because 'first_data_pool' # is already in use with 'first_fs' try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, first_data_pool) + self.run_ceph_cmd('fs', 'new', second_fs, second_metadata_pool, first_data_pool) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) else: @@ -439,9 +490,9 @@ class TestFsNew(TestAdminCommands): first_fs = "first_fs" first_metadata_pool = "first_metadata_pool" first_data_pool = "first_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool) + self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) second_fs = "second_fs" @@ -451,7 +502,7 @@ class TestFsNew(TestAdminCommands): # Expecting EINVAL exit status because 'first_metadata_pool' and 'first_data_pool' # is already in use with 'first_fs' try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_metadata_pool, first_data_pool) + self.run_ceph_cmd('fs', 'new', second_fs, first_metadata_pool, first_data_pool) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) else: @@ -466,17 +517,17 @@ class TestFsNew(TestAdminCommands): first_fs = "first_fs" first_metadata_pool = "first_metadata_pool" first_data_pool = "first_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool) + self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) # create second data pool, metadata pool and add with filesystem second_fs = "second_fs" second_metadata_pool = "second_metadata_pool" second_data_pool = "second_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', second_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', second_data_pool) + self.run_ceph_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) third_fs = "third_fs" @@ -486,7 +537,7 @@ class TestFsNew(TestAdminCommands): # Expecting EINVAL exit status because 'first_metadata_pool' and 'second_data_pool' # is already in use with 'first_fs' and 'second_fs' try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', third_fs, first_metadata_pool, second_data_pool) + self.run_ceph_cmd('fs', 'new', third_fs, first_metadata_pool, second_data_pool) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) else: @@ -501,9 +552,9 @@ class TestFsNew(TestAdminCommands): first_fs = "first_fs" first_metadata_pool = "first_metadata_pool" first_data_pool = "first_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool) + self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) second_fs = "second_fs" @@ -513,7 +564,7 @@ class TestFsNew(TestAdminCommands): # Expecting EINVAL exit status because 'first_data_pool' and 'first_metadata_pool' # is already in use with 'first_fs' try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, first_data_pool, first_metadata_pool) + self.run_ceph_cmd('fs', 'new', second_fs, first_data_pool, first_metadata_pool) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) else: @@ -528,17 +579,17 @@ class TestFsNew(TestAdminCommands): first_fs = "first_fs" first_metadata_pool = "first_metadata_pool" first_data_pool = "first_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', first_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', first_data_pool) + self.run_ceph_cmd('fs', 'new', first_fs, first_metadata_pool, first_data_pool) # create second data pool, metadata pool and add with filesystem second_fs = "second_fs" second_metadata_pool = "second_metadata_pool" second_data_pool = "second_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_metadata_pool) - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', second_data_pool) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', second_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', second_data_pool) + self.run_ceph_cmd('fs', 'new', second_fs, second_metadata_pool, second_data_pool) third_fs = "third_fs" @@ -548,7 +599,7 @@ class TestFsNew(TestAdminCommands): # Expecting EINVAL exit status because 'first_data_pool' and 'second_metadata_pool' # is already in use with 'first_fs' and 'second_fs' try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', third_fs, first_data_pool, second_metadata_pool) + self.run_ceph_cmd('fs', 'new', third_fs, first_data_pool, second_metadata_pool) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) else: @@ -561,20 +612,20 @@ class TestFsNew(TestAdminCommands): # create pool and initialise with rbd new_pool = "new_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_pool) + self.run_ceph_cmd('osd', 'pool', 'create', new_pool) self.ctx.cluster.run(args=['rbd', 'pool', 'init', new_pool]) new_fs = "new_fs" new_data_pool = "new_data_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_data_pool) + self.run_ceph_cmd('osd', 'pool', 'create', new_data_pool) # try to create new fs 'new_fs' with following configuration # metadata pool -> 'new_pool' (already used by rbd app) # data pool -> 'new_data_pool' # Expecting EINVAL exit status because 'new_pool' is already in use with 'rbd' app try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', new_fs, new_pool, new_data_pool) + self.run_ceph_cmd('fs', 'new', new_fs, new_pool, new_data_pool) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) else: @@ -587,20 +638,20 @@ class TestFsNew(TestAdminCommands): # create pool and initialise with rbd new_pool = "new_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_pool) + self.run_ceph_cmd('osd', 'pool', 'create', new_pool) self.ctx.cluster.run(args=['rbd', 'pool', 'init', new_pool]) new_fs = "new_fs" new_metadata_pool = "new_metadata_pool" - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', new_metadata_pool) + self.run_ceph_cmd('osd', 'pool', 'create', new_metadata_pool) # try to create new fs 'new_fs' with following configuration # metadata pool -> 'new_metadata_pool' # data pool -> 'new_pool' (already used by rbd app) # Expecting EINVAL exit status because 'new_pool' is already in use with 'rbd' app try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', new_fs, new_metadata_pool, new_pool) + self.run_ceph_cmd('fs', 'new', new_fs, new_metadata_pool, new_pool) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) else: @@ -628,7 +679,7 @@ class TestRenameCommand(TestAdminCommands): new_fs_name = 'new_cephfs' client_id = 'test_new_cephfs' - self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') + self.run_ceph_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') # authorize a cephx ID access to the renamed file system. # use the ID to write to the file system. @@ -649,7 +700,7 @@ class TestRenameCommand(TestAdminCommands): # cleanup self.mount_a.umount_wait() - self.run_cluster_cmd(f'auth rm client.{client_id}') + self.run_ceph_cmd(f'auth rm client.{client_id}') def test_fs_rename_idempotency(self): """ @@ -661,8 +712,8 @@ class TestRenameCommand(TestAdminCommands): orig_fs_name = self.fs.name new_fs_name = 'new_cephfs' - self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') - self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') + self.run_ceph_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') + self.run_ceph_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') # original file system name does not appear in `fs ls` command self.assertFalse(self.fs.exists()) @@ -681,10 +732,10 @@ class TestRenameCommand(TestAdminCommands): new_fs_name = 'new_cephfs' data_pool = self.fs.get_data_pool_name() metadata_pool = self.fs.get_metadata_pool_name() - self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') + self.run_ceph_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') try: - self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool}") + self.run_ceph_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool}") except CommandFailedError as ce: self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on creating a new file system with old " @@ -694,7 +745,7 @@ class TestRenameCommand(TestAdminCommands): "existing pools to fail.") try: - self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} --force") + self.run_ceph_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} --force") except CommandFailedError as ce: self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on creating a new file system with old " @@ -704,7 +755,7 @@ class TestRenameCommand(TestAdminCommands): "existing pools, and --force flag to fail.") try: - self.run_cluster_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} " + self.run_ceph_cmd(f"fs new {orig_fs_name} {metadata_pool} {data_pool} " "--allow-dangerous-metadata-overlay") except CommandFailedError as ce: self.assertEqual(ce.exitstatus, errno.EINVAL, @@ -719,7 +770,7 @@ class TestRenameCommand(TestAdminCommands): That renaming a file system without '--yes-i-really-mean-it' flag fails. """ try: - self.run_cluster_cmd(f"fs rename {self.fs.name} new_fs") + self.run_ceph_cmd(f"fs rename {self.fs.name} new_fs") except CommandFailedError as ce: self.assertEqual(ce.exitstatus, errno.EPERM, "invalid error code on renaming a file system without the " @@ -733,7 +784,7 @@ class TestRenameCommand(TestAdminCommands): That renaming a non-existent file system fails. """ try: - self.run_cluster_cmd("fs rename non_existent_fs new_fs --yes-i-really-mean-it") + self.run_ceph_cmd("fs rename non_existent_fs new_fs --yes-i-really-mean-it") except CommandFailedError as ce: self.assertEqual(ce.exitstatus, errno.ENOENT, "invalid error code on renaming a non-existent fs") else: @@ -746,7 +797,7 @@ class TestRenameCommand(TestAdminCommands): self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True) try: - self.run_cluster_cmd(f"fs rename {self.fs.name} {self.fs2.name} --yes-i-really-mean-it") + self.run_ceph_cmd(f"fs rename {self.fs.name} {self.fs2.name} --yes-i-really-mean-it") except CommandFailedError as ce: self.assertEqual(ce.exitstatus, errno.EINVAL, "invalid error code on renaming to a fs name that is already in use") @@ -760,14 +811,14 @@ class TestRenameCommand(TestAdminCommands): orig_fs_name = self.fs.name new_fs_name = 'new_cephfs' - self.run_cluster_cmd(f'fs mirror enable {orig_fs_name}') + self.run_ceph_cmd(f'fs mirror enable {orig_fs_name}') try: - self.run_cluster_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') + self.run_ceph_cmd(f'fs rename {orig_fs_name} {new_fs_name} --yes-i-really-mean-it') except CommandFailedError as ce: self.assertEqual(ce.exitstatus, errno.EPERM, "invalid error code on renaming a mirrored file system") else: self.fail("expected renaming of a mirrored file system to fail") - self.run_cluster_cmd(f'fs mirror disable {orig_fs_name}') + self.run_ceph_cmd(f'fs mirror disable {orig_fs_name}') class TestDump(CephFSTestCase): @@ -851,13 +902,13 @@ class TestRequiredClientFeatures(CephFSTestCase): """ def is_required(index): - out = self.fs.mon_manager.raw_cluster_cmd('fs', 'get', self.fs.name, '--format=json-pretty') + out = self.get_ceph_cmd_stdout('fs', 'get', self.fs.name, '--format=json-pretty') features = json.loads(out)['mdsmap']['required_client_features'] if "feature_{0}".format(index) in features: return True; return False; - features = json.loads(self.fs.mon_manager.raw_cluster_cmd('fs', 'feature', 'ls', '--format=json-pretty')) + features = json.loads(self.get_ceph_cmd_stdout('fs', 'feature', 'ls', '--format=json-pretty')) self.assertGreater(len(features), 0); for f in features: @@ -1063,7 +1114,7 @@ class TestConfigCommands(CephFSTestCase): names = self.fs.get_rank_names() for n in names: - s = self.fs.mon_manager.raw_cluster_cmd("config", "show", "mds."+n) + s = self.get_ceph_cmd_stdout("config", "show", "mds."+n) self.assertTrue("NAME" in s) self.assertTrue("mon_host" in s) @@ -1113,17 +1164,17 @@ class TestMirroringCommands(CephFSTestCase): MDSS_REQUIRED = 1 def _enable_mirroring(self, fs_name): - self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", fs_name) + self.run_ceph_cmd("fs", "mirror", "enable", fs_name) def _disable_mirroring(self, fs_name): - self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", fs_name) + self.run_ceph_cmd("fs", "mirror", "disable", fs_name) def _add_peer(self, fs_name, peer_spec, remote_fs_name): peer_uuid = str(uuid.uuid4()) - self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_add", fs_name, peer_uuid, peer_spec, remote_fs_name) + self.run_ceph_cmd("fs", "mirror", "peer_add", fs_name, peer_uuid, peer_spec, remote_fs_name) def _remove_peer(self, fs_name, peer_uuid): - self.fs.mon_manager.raw_cluster_cmd("fs", "mirror", "peer_remove", fs_name, peer_uuid) + self.run_ceph_cmd("fs", "mirror", "peer_remove", fs_name, peer_uuid) def _verify_mirroring(self, fs_name, flag_str): status = self.fs.status() @@ -1250,6 +1301,10 @@ class TestFsAuthorize(CephFSTestCase): self.captester.run_mds_cap_tests(PERM) def test_single_path_rootsquash(self): + if not isinstance(self.mount_a, FuseMount): + self.skipTest("only FUSE client has CEPHFS_FEATURE_MDS_AUTH_CAPS " + "needed to enforce root_squash MDS caps") + PERM = 'rw' FS_AUTH_CAPS = (('/', PERM, 'root_squash'),) self.captester = CapTester() @@ -1259,7 +1314,36 @@ class TestFsAuthorize(CephFSTestCase): # Since root_squash is set in client caps, client can read but not # write even thought access level is set to "rw". self.captester.conduct_pos_test_for_read_caps() + self.captester.conduct_pos_test_for_open_caps() self.captester.conduct_neg_test_for_write_caps(sudo_write=True) + self.captester.conduct_neg_test_for_chown_caps() + self.captester.conduct_neg_test_for_truncate_caps() + + def test_single_path_rootsquash_issue_56067(self): + """ + That a FS client using root squash MDS caps allows non-root user to write data + to a file. And after client remount, the non-root user can read the data that + was previously written by it. https://tracker.ceph.com/issues/56067 + """ + if not isinstance(self.mount_a, FuseMount): + self.skipTest("only FUSE client has CEPHFS_FEATURE_MDS_AUTH_CAPS " + "needed to enforce root_squash MDS caps") + + keyring = self.fs.authorize(self.client_id, ('/', 'rw', 'root_squash')) + keyring_path = self.mount_a.client_remote.mktemp(data=keyring) + self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_mntpt='/') + filedata, filename = 'some data on fs 1', 'file_on_fs1' + filepath = os_path_join(self.mount_a.hostfs_mntpt, filename) + self.mount_a.write_file(filepath, filedata) + + self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_mntpt='/') + if filepath.find(self.mount_a.hostfs_mntpt) != -1: + contents = self.mount_a.read_file(filepath) + self.assertEqual(filedata, contents) def test_single_path_authorize_on_nonalphanumeric_fsname(self): """ @@ -1271,10 +1355,10 @@ class TestFsAuthorize(CephFSTestCase): fs_name = "cephfs-_." self.fs = self.mds_cluster.newfs(name=fs_name) self.fs.wait_for_daemons() - self.run_cluster_cmd(f'auth caps client.{self.mount_a.client_id} ' - f'mon "allow r" ' - f'osd "allow rw pool={self.fs.get_data_pool_name()}" ' - f'mds allow') + self.run_ceph_cmd(f'auth caps client.{self.mount_a.client_id} ' + f'mon "allow r" ' + f'osd "allow rw pool={self.fs.get_data_pool_name()}" ' + f'mds allow') self.mount_a.remount(cephfs_name=self.fs.name) PERM = 'rw' FS_AUTH_CAPS = (('/', PERM),) @@ -1303,7 +1387,7 @@ class TestFsAuthorize(CephFSTestCase): self.run_cap_test_one_by_one(FS_AUTH_CAPS) def run_cap_test_one_by_one(self, fs_auth_caps): - keyring = self.run_cluster_cmd(f'auth get {self.client_name}') + keyring = self.run_ceph_cmd(f'auth get {self.client_name}') for i, c in enumerate(fs_auth_caps): self.assertIn(i, (0, 1)) PATH = c[0] @@ -1315,7 +1399,7 @@ class TestFsAuthorize(CephFSTestCase): def tearDown(self): self.mount_a.umount_wait() - self.run_cluster_cmd(f'auth rm {self.client_name}') + self.run_ceph_cmd(f'auth rm {self.client_name}') super(type(self), self).tearDown() @@ -1492,3 +1576,68 @@ class TestFsBalRankMask(CephFSTestCase): self.fs.set_bal_rank_mask(bal_rank_mask) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) + + +class TestPermErrMsg(CephFSTestCase): + + CLIENT_NAME = 'client.testuser' + FS1_NAME, FS2_NAME, FS3_NAME = 'abcd', 'efgh', 'ijkl' + + EXPECTED_ERRNO = 22 + EXPECTED_ERRMSG = ("Permission flags in MDS caps must start with 'r' or " + "'rw' or be '*' or 'all'") + + MONCAP = f'allow r fsname={FS1_NAME}' + OSDCAP = f'allow rw tag cephfs data={FS1_NAME}' + MDSCAPS = [ + 'allow w', + f'allow w fsname={FS1_NAME}', + + f'allow rw fsname={FS1_NAME}, allow w fsname={FS2_NAME}', + f'allow w fsname={FS1_NAME}, allow rw fsname={FS2_NAME}', + f'allow w fsname={FS1_NAME}, allow w fsname={FS2_NAME}', + + (f'allow rw fsname={FS1_NAME}, allow rw fsname={FS2_NAME}, allow ' + f'w fsname={FS3_NAME}'), + + # without space after comma + f'allow rw fsname={FS1_NAME},allow w fsname={FS2_NAME}', + + + 'allow wr', + f'allow wr fsname={FS1_NAME}', + + f'allow rw fsname={FS1_NAME}, allow wr fsname={FS2_NAME}', + f'allow wr fsname={FS1_NAME}, allow rw fsname={FS2_NAME}', + f'allow wr fsname={FS1_NAME}, allow wr fsname={FS2_NAME}', + + (f'allow rw fsname={FS1_NAME}, allow rw fsname={FS2_NAME}, allow ' + f'wr fsname={FS3_NAME}'), + + # without space after comma + f'allow rw fsname={FS1_NAME},allow wr fsname={FS2_NAME}'] + + def _negtestcmd(self, SUBCMD, MDSCAP): + return self.negtest_ceph_cmd( + args=(f'{SUBCMD} {self.CLIENT_NAME} ' + f'mon "{self.MONCAP}" osd "{self.OSDCAP}" mds "{MDSCAP}"'), + retval=self.EXPECTED_ERRNO, errmsgs=self.EXPECTED_ERRMSG) + + def test_auth_add(self): + for mdscap in self.MDSCAPS: + self._negtestcmd('auth add', mdscap) + + def test_auth_get_or_create(self): + for mdscap in self.MDSCAPS: + self._negtestcmd('auth get-or-create', mdscap) + + def test_auth_get_or_create_key(self): + for mdscap in self.MDSCAPS: + self._negtestcmd('auth get-or-create-key', mdscap) + + def test_fs_authorize(self): + for wrong_perm in ('w', 'wr'): + self.negtest_ceph_cmd( + args=(f'fs authorize {self.fs.name} {self.CLIENT_NAME} / ' + f'{wrong_perm}'), retval=self.EXPECTED_ERRNO, + errmsgs=self.EXPECTED_ERRMSG) diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py index c4215df33..b76ce4922 100644 --- a/qa/tasks/cephfs/test_client_limits.py +++ b/qa/tasks/cephfs/test_client_limits.py @@ -315,7 +315,7 @@ class TestClientLimits(CephFSTestCase): self.mount_a.create_n_files("testdir/file2", 5, True) # Wait for the health warnings. Assume mds can handle 10 request per second at least - self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10) + self.wait_for_health("MDS_CLIENT_OLDEST_TID", max_requests // 10, check_in_detail=str(self.mount_a.client_id)) def _test_client_cache_size(self, mount_subdir): """ diff --git a/qa/tasks/cephfs/test_client_recovery.py b/qa/tasks/cephfs/test_client_recovery.py index 1bd6884a9..a01317065 100644 --- a/qa/tasks/cephfs/test_client_recovery.py +++ b/qa/tasks/cephfs/test_client_recovery.py @@ -4,6 +4,7 @@ Teuthology task for exercising CephFS client recovery """ import logging +import signal from textwrap import dedent import time import distutils.version as version @@ -12,6 +13,7 @@ import re import string import os +from teuthology import contextutil from teuthology.orchestra import run from teuthology.exceptions import CommandFailedError from tasks.cephfs.fuse_mount import FuseMount @@ -755,3 +757,117 @@ class TestClientRecovery(CephFSTestCase): self.assertEqual(len(self.fs.mds_tell(["session", "ls"])), 0) self.mount_a.umount_wait(force=True) + +class TestClientOnLaggyOSD(CephFSTestCase): + CLIENTS_REQUIRED = 2 + + def make_osd_laggy(self, osd, sleep=120): + self.mds_cluster.mon_manager.signal_osd(osd, signal.SIGSTOP) + time.sleep(sleep) + self.mds_cluster.mon_manager.signal_osd(osd, signal.SIGCONT) + + def clear_laggy_params(self, osd): + default_laggy_weight = self.config_get('mon', 'mon_osd_laggy_weight') + self.config_set('mon', 'mon_osd_laggy_weight', 1) + self.mds_cluster.mon_manager.revive_osd(osd) + self.config_set('mon', 'mon_osd_laggy_weight', default_laggy_weight) + + def get_a_random_osd(self): + osds = self.mds_cluster.mon_manager.get_osd_status() + return random.choice(osds['live']) + + def test_client_eviction_if_config_is_set(self): + """ + If any client gets unresponsive/it's session get idle due to lagginess + with any OSD and if config option defer_client_eviction_on_laggy_osds + is set true(default true) then make sure clients are not evicted until + OSD(s) return to normal. + """ + + self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false']) + self.config_set('mds', 'defer_client_eviction_on_laggy_osds', 'true') + self.assertEqual(self.config_get( + 'mds', 'defer_client_eviction_on_laggy_osds'), 'true') + + # make an OSD laggy + osd = self.get_a_random_osd() + self.make_osd_laggy(osd) + + try: + mount_a_gid = self.mount_a.get_global_id() + + self.mount_a.kill() + + # client session should be open, it gets stale + # only after session_timeout time. + self.assert_session_state(mount_a_gid, "open") + + # makes session stale + time.sleep(self.fs.get_var("session_timeout") * 1.5) + self.assert_session_state(mount_a_gid, "stale") + + # it takes time to have laggy clients entries in cluster log, + # wait for 6 minutes to see if it is visible, finally restart + # the client + with contextutil.safe_while(sleep=5, tries=6) as proceed: + while proceed(): + try: + with self.assert_cluster_log("1 client(s) laggy due to" + " laggy OSDs", + timeout=55): + # make sure clients weren't evicted + self.assert_session_count(2) + break + except (AssertionError, CommandFailedError) as e: + log.debug(f'{e}, retrying') + + # clear lagginess, expect to get the warning cleared and make sure + # client gets evicted + self.clear_laggy_params(osd) + self.wait_for_health_clear(60) + self.assert_session_count(1) + finally: + self.mount_a.kill_cleanup() + self.mount_a.mount_wait() + self.mount_a.create_destroy() + + def test_client_eviction_if_config_is_unset(self): + """ + If an OSD is laggy but config option defer_client_eviction_on_laggy_osds + is unset then an unresponsive client does get evicted. + """ + + self.fs.mds_asok(['config', 'set', 'mds_defer_session_stale', 'false']) + self.config_set('mds', 'defer_client_eviction_on_laggy_osds', 'false') + self.assertEqual(self.config_get( + 'mds', 'defer_client_eviction_on_laggy_osds'), 'false') + + # make an OSD laggy + osd = self.get_a_random_osd() + self.make_osd_laggy(osd) + + try: + session_timeout = self.fs.get_var("session_timeout") + mount_a_gid = self.mount_a.get_global_id() + + self.fs.mds_asok(['session', 'config', '%s' % mount_a_gid, 'timeout', '%s' % (session_timeout * 2)]) + + self.mount_a.kill() + + self.assert_session_count(2) + + time.sleep(session_timeout * 1.5) + self.assert_session_state(mount_a_gid, "open") + + time.sleep(session_timeout) + self.assert_session_count(1) + + # make sure warning wasn't seen in cluster log + with self.assert_cluster_log("laggy due to laggy OSDs", + timeout=120, present=False): + pass + finally: + self.mount_a.kill_cleanup() + self.mount_a.mount_wait() + self.mount_a.create_destroy() + self.clear_laggy_params(osd) diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py index bfaa23453..a39ccaa9f 100644 --- a/qa/tasks/cephfs/test_damage.py +++ b/qa/tasks/cephfs/test_damage.py @@ -244,7 +244,7 @@ class TestDamage(CephFSTestCase): # Reset MDS state self.mount_a.umount_wait(force=True) self.fs.fail() - self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') + self.run_ceph_cmd('mds', 'repaired', '0') # Reset RADOS pool state self.fs.radosm(['import', '-'], stdin=BytesIO(serialized)) @@ -355,8 +355,9 @@ class TestDamage(CephFSTestCase): # EIOs mean something handled by DamageTable: assert that it has # been populated damage = json.loads( - self.fs.mon_manager.raw_cluster_cmd( - 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty')) + self.get_ceph_cmd_stdout( + 'tell', f'mds.{self.fs.get_active_names()[0]}', + "damage", "ls", '--format=json-pretty')) if len(damage) == 0: results[mutation] = EIO_NO_DAMAGE @@ -416,8 +417,8 @@ class TestDamage(CephFSTestCase): # The fact that there is damaged should have bee recorded damage = json.loads( - self.fs.mon_manager.raw_cluster_cmd( - 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + self.get_ceph_cmd_stdout( + 'tell', f'mds.{self.fs.get_active_names()[0]}', "damage", "ls", '--format=json-pretty')) self.assertEqual(len(damage), 1) damage_id = damage[0]['id'] @@ -466,9 +467,9 @@ class TestDamage(CephFSTestCase): self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk]) # Clean up the damagetable entry - self.fs.mon_manager.raw_cluster_cmd( - 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), - "damage", "rm", "{did}".format(did=damage_id)) + self.run_ceph_cmd( + 'tell', f'mds.{self.fs.get_active_names()[0]}', + "damage", "rm", f"{damage_id}") # Now I should be able to create a file with the same name as the # damaged guy if I want. @@ -520,14 +521,14 @@ class TestDamage(CephFSTestCase): # Check that an entry is created in the damage table damage = json.loads( - self.fs.mon_manager.raw_cluster_cmd( + self.get_ceph_cmd_stdout( 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty')) self.assertEqual(len(damage), 1) self.assertEqual(damage[0]['damage_type'], "backtrace") self.assertEqual(damage[0]['ino'], file1_ino) - self.fs.mon_manager.raw_cluster_cmd( + self.run_ceph_cmd( 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "rm", str(damage[0]['id'])) @@ -545,7 +546,7 @@ class TestDamage(CephFSTestCase): # Check that an entry is created in the damage table damage = json.loads( - self.fs.mon_manager.raw_cluster_cmd( + self.get_ceph_cmd_stdout( 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "ls", '--format=json-pretty')) self.assertEqual(len(damage), 2) @@ -560,7 +561,7 @@ class TestDamage(CephFSTestCase): self.assertEqual(damage[1]['ino'], file2_ino) for entry in damage: - self.fs.mon_manager.raw_cluster_cmd( + self.run_ceph_cmd( 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), "damage", "rm", str(entry['id'])) diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py index 9a93bd622..f9f853247 100644 --- a/qa/tasks/cephfs/test_data_scan.py +++ b/qa/tasks/cephfs/test_data_scan.py @@ -428,7 +428,7 @@ class TestDataScan(CephFSTestCase): self.fs.data_scan(["scan_links"]) # Mark the MDS repaired - self.fs.mon_manager.raw_cluster_cmd('mds', 'repaired', '0') + self.run_ceph_cmd('mds', 'repaired', '0') # Start the MDS self.fs.mds_restart() @@ -491,10 +491,11 @@ class TestDataScan(CephFSTestCase): file_count = 100 file_names = ["%s" % n for n in range(0, file_count)] + split_size = 100 * file_count # Make sure and disable dirfrag auto merging and splitting - self.fs.set_ceph_conf('mds', 'mds bal merge size', 0) - self.fs.set_ceph_conf('mds', 'mds bal split size', 100 * file_count) + self.config_set('mds', 'mds_bal_merge_size', 0) + self.config_set('mds', 'mds_bal_split_size', split_size) # Create a directory of `file_count` files, each named after its # decimal number and containing the string of its decimal number @@ -603,7 +604,7 @@ class TestDataScan(CephFSTestCase): file_path = "mydir/myfile_{0}".format(i) ino = self.mount_a.path_to_ino(file_path) obj = "{0:x}.{1:08x}".format(ino, 0) - pgid = json.loads(self.fs.mon_manager.raw_cluster_cmd( + pgid = json.loads(self.get_ceph_cmd_stdout( "osd", "map", self.fs.get_data_pool_name(), obj, "--format=json-pretty" ))['pgid'] diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py index ddcc58ccc..ba2c3f76f 100644 --- a/qa/tasks/cephfs/test_failover.py +++ b/qa/tasks/cephfs/test_failover.py @@ -151,8 +151,39 @@ class TestClusterAffinity(CephFSTestCase): ranks = list(self.fs.get_ranks(status=status)) self.assertEqual(len(ranks), 1) self.assertIn(ranks[0]['name'], standbys) - # Note that we would expect the former active to reclaim its spot, but - # we're not testing that here. + + # Wait for the former active to reclaim its spot + def reclaimed(): + ranks = list(self.fs.get_ranks()) + return len(ranks) > 0 and ranks[0]['name'] not in standbys + + log.info("Waiting for former active to reclaim its spot") + self.wait_until_true(reclaimed, timeout=self.fs.beacon_timeout) + + def test_join_fs_last_resort_refused(self): + """ + That a standby with mds_join_fs set to another fs is not used if refuse_standby_for_another_fs is set. + """ + status, target = self._verify_init() + standbys = [info['name'] for info in status.get_standbys()] + for mds in standbys: + self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2') + fs2 = self.mds_cluster.newfs(name="cephfs2") + for mds in standbys: + self._change_target_state(target, mds, {'join_fscid': fs2.id}) + self.fs.set_refuse_standby_for_another_fs(True) + self.fs.rank_fail() + status = self.fs.status() + ranks = list(self.fs.get_ranks(status=status)) + self.assertTrue(len(ranks) == 0 or ranks[0]['name'] not in standbys) + + # Wait for the former active to reclaim its spot + def reclaimed(): + ranks = list(self.fs.get_ranks()) + return len(ranks) > 0 and ranks[0]['name'] not in standbys + + log.info("Waiting for former active to reclaim its spot") + self.wait_until_true(reclaimed, timeout=self.fs.beacon_timeout) def test_join_fs_steady(self): """ @@ -414,7 +445,7 @@ class TestFailover(CephFSTestCase): standbys = self.mds_cluster.get_standby_daemons() self.assertGreaterEqual(len(standbys), 1) - self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys))) + self.run_ceph_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys))) # Kill a standby and check for warning victim = standbys.pop() @@ -432,11 +463,11 @@ class TestFailover(CephFSTestCase): # Set it one greater than standbys ever seen standbys = self.mds_cluster.get_standby_daemons() self.assertGreaterEqual(len(standbys), 1) - self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1)) + self.run_ceph_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', str(len(standbys)+1)) self.wait_for_health("MDS_INSUFFICIENT_STANDBY", self.fs.beacon_timeout) # Set it to 0 - self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0') + self.run_ceph_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0') self.wait_for_health_clear(timeout=30) def test_discontinuous_mdsmap(self): @@ -685,9 +716,8 @@ class TestMultiFilesystems(CephFSTestCase): def setUp(self): super(TestMultiFilesystems, self).setUp() - self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", - "enable_multiple", "true", - "--yes-i-really-mean-it") + self.run_ceph_cmd("fs", "flag", "set", "enable_multiple", + "true", "--yes-i-really-mean-it") def _setup_two(self): fs_a = self.mds_cluster.newfs(name="alpha") @@ -701,7 +731,7 @@ class TestMultiFilesystems(CephFSTestCase): # Reconfigure client auth caps for mount in self.mounts: - self.mds_cluster.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', "client.{0}".format(mount.client_id), 'mds', 'allow', 'mon', 'allow r', @@ -769,7 +799,7 @@ class TestMultiFilesystems(CephFSTestCase): # Kill fs_a's active MDS, see a standby take over self.mds_cluster.mds_stop(original_a) - self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_a) + self.run_ceph_cmd("mds", "fail", original_a) self.wait_until_equal(lambda: len(fs_a.get_active_names()), 1, 30, reject_fn=lambda v: v > 1) # Assert that it's a *different* daemon that has now appeared in the map for fs_a @@ -777,7 +807,7 @@ class TestMultiFilesystems(CephFSTestCase): # Kill fs_b's active MDS, see a standby take over self.mds_cluster.mds_stop(original_b) - self.mds_cluster.mon_manager.raw_cluster_cmd("mds", "fail", original_b) + self.run_ceph_cmd("mds", "fail", original_b) self.wait_until_equal(lambda: len(fs_b.get_active_names()), 1, 30, reject_fn=lambda v: v > 1) # Assert that it's a *different* daemon that has now appeared in the map for fs_a diff --git a/qa/tasks/cephfs/test_forward_scrub.py b/qa/tasks/cephfs/test_forward_scrub.py index f3cec881b..334a73e1c 100644 --- a/qa/tasks/cephfs/test_forward_scrub.py +++ b/qa/tasks/cephfs/test_forward_scrub.py @@ -9,6 +9,7 @@ how the functionality responds to damaged metadata. """ import logging import json +import errno from collections import namedtuple from io import BytesIO @@ -46,6 +47,9 @@ class TestForwardScrub(CephFSTestCase): return inos + def _is_MDS_damage(self): + return "MDS_DAMAGE" in self.mds_cluster.mon_manager.get_mon_health()['checks'] + def test_apply_tag(self): self.mount_a.run_shell(["mkdir", "parentdir"]) self.mount_a.run_shell(["mkdir", "parentdir/childdir"]) @@ -305,3 +309,207 @@ class TestForwardScrub(CephFSTestCase): backtrace = self.fs.read_backtrace(file_ino) self.assertEqual(['alpha', 'parent_a'], [a['dname'] for a in backtrace['ancestors']]) + + def test_health_status_after_dentry_repair(self): + """ + Test that the damage health status is cleared + after the damaged dentry is repaired + """ + # Create a file for checks + self.mount_a.run_shell(["mkdir", "subdir/"]) + + self.mount_a.run_shell(["touch", "subdir/file_undamaged"]) + self.mount_a.run_shell(["touch", "subdir/file_to_be_damaged"]) + + subdir_ino = self.mount_a.path_to_ino("subdir") + + self.mount_a.umount_wait() + for mds_name in self.fs.get_active_names(): + self.fs.mds_asok(["flush", "journal"], mds_name) + + self.fs.fail() + + # Corrupt a dentry + junk = "deadbeef" * 10 + dirfrag_obj = "{0:x}.00000000".format(subdir_ino) + self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk]) + + # Start up and try to list it + self.fs.set_joinable() + self.fs.wait_for_daemons() + + self.mount_a.mount_wait() + dentries = self.mount_a.ls("subdir/") + + # The damaged guy should have disappeared + self.assertEqual(dentries, ["file_undamaged"]) + + # I should get ENOENT if I try and read it normally, because + # the dir is considered complete + try: + self.mount_a.stat("subdir/file_to_be_damaged", wait=True) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, errno.ENOENT) + else: + raise AssertionError("Expected ENOENT") + + nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files") + self.assertEqual(nfiles, "2") + + self.mount_a.umount_wait() + + out_json = self.fs.run_scrub(["start", "/subdir", "recursive"]) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + # Check that an entry for dentry damage is created in the damage table + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 1) + self.assertEqual(damage[0]['damage_type'], "dentry") + self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100) + + out_json = self.fs.run_scrub(["start", "/subdir", "repair,recursive"]) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + # Check that the entry is cleared from the damage table + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 0) + self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100) + + self.mount_a.mount_wait() + + # Check that the file count is now correct + nfiles = self.mount_a.getfattr("./subdir", "ceph.dir.files") + self.assertEqual(nfiles, "1") + + # Clean up the omap object + self.fs.radosm(["setomapval", dirfrag_obj, "file_to_be_damaged_head", junk]) + + def test_health_status_after_dirfrag_repair(self): + """ + Test that the damage health status is cleared + after the damaged dirfrag is repaired + """ + self.mount_a.run_shell(["mkdir", "dir"]) + self.mount_a.run_shell(["touch", "dir/file"]) + self.mount_a.run_shell(["mkdir", "testdir"]) + self.mount_a.run_shell(["ln", "dir/file", "testdir/hardlink"]) + + dir_ino = self.mount_a.path_to_ino("dir") + + # Ensure everything is written to backing store + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"]) + + # Drop everything from the MDS cache + self.fs.fail() + + self.fs.radosm(["rm", "{0:x}.00000000".format(dir_ino)]) + + self.fs.journal_tool(['journal', 'reset'], 0) + self.fs.set_joinable() + self.fs.wait_for_daemons() + self.mount_a.mount_wait() + + # Check that touching the hardlink gives EIO + ran = self.mount_a.run_shell(["stat", "testdir/hardlink"], wait=False) + try: + ran.wait() + except CommandFailedError: + self.assertTrue("Input/output error" in ran.stderr.getvalue()) + + out_json = self.fs.run_scrub(["start", "/dir", "recursive"]) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + # Check that an entry is created in the damage table + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 3) + damage_types = set() + for i in range(0, 3): + damage_types.add(damage[i]['damage_type']) + self.assertIn("dir_frag", damage_types) + self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100) + + out_json = self.fs.run_scrub(["start", "/dir", "recursive,repair"]) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + # Check that the entry is cleared from the damage table + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 1) + self.assertNotEqual(damage[0]['damage_type'], "dir_frag") + + self.mount_a.umount_wait() + self.fs.mds_asok(["flush", "journal"]) + self.fs.fail() + + # Run cephfs-data-scan + self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()]) + self.fs.data_scan(["scan_inodes", self.fs.get_data_pool_name()]) + self.fs.data_scan(["scan_links"]) + + self.fs.set_joinable() + self.fs.wait_for_daemons() + self.mount_a.mount_wait() + + out_json = self.fs.run_scrub(["start", "/dir", "recursive,repair"]) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 0) + self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100) + + def test_health_status_after_backtrace_repair(self): + """ + Test that the damage health status is cleared + after the damaged backtrace is repaired + """ + # Create a file for checks + self.mount_a.run_shell(["mkdir", "dir_test"]) + self.mount_a.run_shell(["touch", "dir_test/file"]) + file_ino = self.mount_a.path_to_ino("dir_test/file") + + # That backtrace and layout are written after initial flush + self.fs.mds_asok(["flush", "journal"]) + backtrace = self.fs.read_backtrace(file_ino) + self.assertEqual(['file', 'dir_test'], + [a['dname'] for a in backtrace['ancestors']]) + + # Corrupt the backtrace + self.fs._write_data_xattr(file_ino, "parent", + "The backtrace is corrupted") + + out_json = self.fs.run_scrub(["start", "/", "recursive"]) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + # Check that an entry for backtrace damage is created in the damage table + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 1) + self.assertEqual(damage[0]['damage_type'], "backtrace") + self.wait_until_true(lambda: self._is_MDS_damage(), timeout=100) + + out_json = self.fs.run_scrub(["start", "/", "repair,recursive,force"]) + self.assertEqual(self.fs.wait_until_scrub_complete(tag=out_json["scrub_tag"]), True) + + # Check that the entry is cleared from the damage table + damage = json.loads( + self.fs.mon_manager.raw_cluster_cmd( + 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]), + "damage", "ls", '--format=json-pretty')) + self.assertEqual(len(damage), 0) + self.wait_until_true(lambda: not self._is_MDS_damage(), timeout=100) diff --git a/qa/tasks/cephfs/test_fragment.py b/qa/tasks/cephfs/test_fragment.py index 7d35ec0df..902a53e79 100644 --- a/qa/tasks/cephfs/test_fragment.py +++ b/qa/tasks/cephfs/test_fragment.py @@ -160,14 +160,13 @@ class TestFragmentation(CephFSTestCase): target_files = branch_factor**depth * int(split_size * 1.5) create_files = target_files - files_written - self.ceph_cluster.mon_manager.raw_cluster_cmd("log", + self.run_ceph_cmd("log", "{0} Writing {1} files (depth={2})".format( self.__class__.__name__, create_files, depth )) self.mount_a.create_n_files("splitdir/file_{0}".format(depth), create_files) - self.ceph_cluster.mon_manager.raw_cluster_cmd("log", - "{0} Done".format(self.__class__.__name__)) + self.run_ceph_cmd("log","{0} Done".format(self.__class__.__name__)) files_written += create_files log.info("Now have {0} files".format(files_written)) diff --git a/qa/tasks/cephfs/test_fstop.py b/qa/tasks/cephfs/test_fstop.py index ed76eaac2..09896703d 100644 --- a/qa/tasks/cephfs/test_fstop.py +++ b/qa/tasks/cephfs/test_fstop.py @@ -20,10 +20,10 @@ class TestFSTop(CephFSTestCase): super(TestFSTop, self).tearDown() def _enable_mgr_stats_plugin(self): - return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats") + return self.get_ceph_cmd_stdout("mgr", "module", "enable", "stats") def _disable_mgr_stats_plugin(self): - return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats") + return self.get_ceph_cmd_stdout("mgr", "module", "disable", "stats") def _fstop_dump(self, *args): return self.mount_a.run_shell(['cephfs-top', @@ -66,7 +66,7 @@ class TestFSTop(CephFSTestCase): Tests 'cephfs-top --dump' output is valid """ def verify_fstop_metrics(metrics): - clients = metrics.get(self.fs.name, {}) + clients = metrics.get('filesystems').get(self.fs.name, {}) if str(self.mount_a.get_global_id()) in clients and \ str(self.mount_b.get_global_id()) in clients: return True @@ -93,8 +93,8 @@ class TestFSTop(CephFSTestCase): # umount mount_b, mount another filesystem on it and use --dumpfs filter self.mount_b.umount_wait() - self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", "enable_multiple", "true", - "--yes-i-really-mean-it") + self.run_ceph_cmd("fs", "flag", "set", "enable_multiple", "true", + "--yes-i-really-mean-it") # create a new filesystem fs_b = self.mds_cluster.newfs(name=newfs_name) diff --git a/qa/tasks/cephfs/test_full.py b/qa/tasks/cephfs/test_full.py index 2b3a7d5f9..90a65f069 100644 --- a/qa/tasks/cephfs/test_full.py +++ b/qa/tasks/cephfs/test_full.py @@ -61,10 +61,10 @@ class FullnessTestCase(CephFSTestCase): self.assertGreaterEqual(mount_a_initial_epoch, self.initial_osd_epoch) # Set and unset a flag to cause OSD epoch to increment - self.fs.mon_manager.raw_cluster_cmd("osd", "set", "pause") - self.fs.mon_manager.raw_cluster_cmd("osd", "unset", "pause") + self.run_ceph_cmd("osd", "set", "pause") + self.run_ceph_cmd("osd", "unset", "pause") - out = self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json").strip() + out = self.get_ceph_cmd_stdout("osd", "dump", "--format=json").strip() new_epoch = json.loads(out)['epoch'] self.assertNotEqual(self.initial_osd_epoch, new_epoch) @@ -138,7 +138,7 @@ class FullnessTestCase(CephFSTestCase): # Wait for the MDS to see the latest OSD map so that it will reliably # be applying the policy of rejecting non-deletion metadata operations # while in the full state. - osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] + osd_epoch = json.loads(self.get_ceph_cmd_stdout("osd", "dump", "--format=json-pretty"))['epoch'] self.wait_until_true( lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch, timeout=10) @@ -167,7 +167,7 @@ class FullnessTestCase(CephFSTestCase): # Wait for the MDS to see the latest OSD map so that it will reliably # be applying the free space policy - osd_epoch = json.loads(self.fs.mon_manager.raw_cluster_cmd("osd", "dump", "--format=json-pretty"))['epoch'] + osd_epoch = json.loads(self.get_ceph_cmd_stdout("osd", "dump", "--format=json-pretty"))['epoch'] self.wait_until_true( lambda: self.fs.rank_asok(['status'])['osdmap_epoch'] >= osd_epoch, timeout=10) @@ -376,8 +376,8 @@ class TestQuotaFull(FullnessTestCase): super(TestQuotaFull, self).setUp() pool_name = self.fs.get_data_pool_name() - self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", pool_name, - "max_bytes", "{0}".format(self.pool_capacity)) + self.run_ceph_cmd("osd", "pool", "set-quota", pool_name, + "max_bytes", f"{self.pool_capacity}") class TestClusterFull(FullnessTestCase): diff --git a/qa/tasks/cephfs/test_journal_repair.py b/qa/tasks/cephfs/test_journal_repair.py index c5769784d..365140fd9 100644 --- a/qa/tasks/cephfs/test_journal_repair.py +++ b/qa/tasks/cephfs/test_journal_repair.py @@ -233,8 +233,8 @@ class TestJournalRepair(CephFSTestCase): self.fs.table_tool(["0", "reset", "session"]) self.fs.journal_tool(["journal", "reset"], 0) self.fs.erase_mds_objects(1) - self.fs.mon_manager.raw_cluster_cmd('fs', 'reset', self.fs.name, - '--yes-i-really-mean-it') + self.run_ceph_cmd('fs', 'reset', self.fs.name, + '--yes-i-really-mean-it') # Bring an MDS back online, mount a client, and see that we can walk the full # filesystem tree again diff --git a/qa/tasks/cephfs/test_mantle.py b/qa/tasks/cephfs/test_mantle.py index 746c2ffe3..92583b502 100644 --- a/qa/tasks/cephfs/test_mantle.py +++ b/qa/tasks/cephfs/test_mantle.py @@ -22,7 +22,7 @@ class TestMantle(CephFSTestCase): self.fs.mds_asok(['config', 'set', 'debug_mds_balancer', '5'], mds_id=m) def push_balancer(self, obj, lua_code, expect): - self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', obj) + self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer', obj) self.fs.radosm(["put", obj, "-"], stdin=StringIO(lua_code)) with self.assert_cluster_log(failure + obj + " " + expect): log.info("run a " + obj + " balancer that expects=" + expect) @@ -31,16 +31,16 @@ class TestMantle(CephFSTestCase): self.start_mantle() expect = " : (2) No such file or directory" - ret = self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer') + ret = self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer') assert(ret == 22) # EINVAL - self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', " ") + self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer', " ") with self.assert_cluster_log(failure + " " + expect): pass def test_version_not_in_rados(self): self.start_mantle() expect = failure + "ghost.lua : (2) No such file or directory" - self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua") + self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua") with self.assert_cluster_log(expect): pass def test_balancer_invalid(self): @@ -59,7 +59,7 @@ class TestMantle(CephFSTestCase): def test_balancer_valid(self): self.start_mantle() lua_code = "BAL_LOG(0, \"test\")\nreturn {3, 4}" - self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") + self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") self.fs.radosm(["put", "valid.lua", "-"], stdin=StringIO(lua_code)) with self.assert_cluster_log(success + "valid.lua"): log.info("run a valid.lua balancer") @@ -94,13 +94,13 @@ class TestMantle(CephFSTestCase): expect = " : (110) Connection timed out" # kill the OSDs so that the balancer pull from RADOS times out - osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty')) + osd_map = json.loads(self.get_ceph_cmd_stdout('osd', 'dump', '--format=json-pretty')) for i in range(0, len(osd_map['osds'])): - self.fs.mon_manager.raw_cluster_cmd_result('osd', 'down', str(i)) - self.fs.mon_manager.raw_cluster_cmd_result('osd', 'out', str(i)) + self.get_ceph_cmd_result('osd', 'down', str(i)) + self.get_ceph_cmd_result('osd', 'out', str(i)) # trigger a pull from RADOS - self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") + self.get_ceph_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") # make the timeout a little longer since dead OSDs spam ceph -w with self.assert_cluster_log(failure + "valid.lua" + expect, timeout=30): @@ -108,4 +108,4 @@ class TestMantle(CephFSTestCase): # cleanup for i in range(0, len(osd_map['osds'])): - self.fs.mon_manager.raw_cluster_cmd_result('osd', 'in', str(i)) + self.get_ceph_cmd_result('osd', 'in', str(i)) diff --git a/qa/tasks/cephfs/test_mds_metrics.py b/qa/tasks/cephfs/test_mds_metrics.py index ad877f622..0e824d3d2 100644 --- a/qa/tasks/cephfs/test_mds_metrics.py +++ b/qa/tasks/cephfs/test_mds_metrics.py @@ -57,13 +57,13 @@ class TestMDSMetrics(CephFSTestCase): return verify_metrics_cbk def _fs_perf_stats(self, *args): - return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", *args) + return self.get_ceph_cmd_stdout("fs", "perf", "stats", *args) def _enable_mgr_stats_plugin(self): - return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "stats") + return self.get_ceph_cmd_stdout("mgr", "module", "enable", "stats") def _disable_mgr_stats_plugin(self): - return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "stats") + return self.get_ceph_cmd_stdout("mgr", "module", "disable", "stats") def _spread_directory_on_all_ranks(self, fscid): fs_status = self.fs.status() @@ -115,7 +115,7 @@ class TestMDSMetrics(CephFSTestCase): # Reconfigure client auth caps for mount in self.mounts: - self.mds_cluster.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', f"client.{mount.client_id}", 'mds', 'allow', 'mon', 'allow r', @@ -404,7 +404,7 @@ class TestMDSMetrics(CephFSTestCase): invalid_mds_rank = "1," # try, 'fs perf stat' command with invalid mds_rank try: - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--mds_rank", invalid_mds_rank) + self.run_ceph_cmd("fs", "perf", "stats", "--mds_rank", invalid_mds_rank) except CommandFailedError as ce: if ce.exitstatus != errno.EINVAL: raise @@ -415,7 +415,7 @@ class TestMDSMetrics(CephFSTestCase): invalid_client_id = "abcd" # try, 'fs perf stat' command with invalid client_id try: - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_id", invalid_client_id) + self.run_ceph_cmd("fs", "perf", "stats", "--client_id", invalid_client_id) except CommandFailedError as ce: if ce.exitstatus != errno.EINVAL: raise @@ -426,7 +426,7 @@ class TestMDSMetrics(CephFSTestCase): invalid_client_ip = "1.2.3" # try, 'fs perf stat' command with invalid client_ip try: - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "perf", "stats", "--client_ip", invalid_client_ip) + self.run_ceph_cmd("fs", "perf", "stats", "--client_ip", invalid_client_ip) except CommandFailedError as ce: if ce.exitstatus != errno.EINVAL: raise @@ -501,8 +501,8 @@ class TestMDSMetrics(CephFSTestCase): self.mount_b.umount_wait() self.fs.delete_all_filesystems() - self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", - "enable_multiple", "true", "--yes-i-really-mean-it") + self.run_ceph_cmd("fs", "flag", "set", "enable_multiple", + "true", "--yes-i-really-mean-it") # creating filesystem fs_a = self._setup_fs(fs_name="fs1") @@ -569,8 +569,8 @@ class TestMDSMetrics(CephFSTestCase): self.mount_a.umount_wait() self.mount_b.umount_wait() - self.mds_cluster.mon_manager.raw_cluster_cmd("fs", "flag", "set", - "enable_multiple", "true", "--yes-i-really-mean-it") + self.run_ceph_cmd("fs", "flag", "set", "enable_multiple", + "true", "--yes-i-really-mean-it") # creating filesystem fs_b = self._setup_fs(fs_name="fs2") diff --git a/qa/tasks/cephfs/test_mirroring.py b/qa/tasks/cephfs/test_mirroring.py index c1a940e3f..6e57df5d0 100644 --- a/qa/tasks/cephfs/test_mirroring.py +++ b/qa/tasks/cephfs/test_mirroring.py @@ -21,6 +21,10 @@ class TestMirroring(CephFSTestCase): MODULE_NAME = "mirroring" + PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR = "cephfs_mirror" + PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS = "cephfs_mirror_mirrored_filesystems" + PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER = "cephfs_mirror_peers" + def setUp(self): super(TestMirroring, self).setUp() self.primary_fs_name = self.fs.name @@ -34,13 +38,16 @@ class TestMirroring(CephFSTestCase): super(TestMirroring, self).tearDown() def enable_mirroring_module(self): - self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", TestMirroring.MODULE_NAME) + self.run_ceph_cmd("mgr", "module", "enable", TestMirroring.MODULE_NAME) def disable_mirroring_module(self): - self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", TestMirroring.MODULE_NAME) + self.run_ceph_cmd("mgr", "module", "disable", TestMirroring.MODULE_NAME) def enable_mirroring(self, fs_name, fs_id): - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "enable", fs_name) + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR][0] + + self.run_ceph_cmd("fs", "snapshot", "mirror", "enable", fs_name) time.sleep(10) # verify via asok res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', @@ -48,8 +55,20 @@ class TestMirroring(CephFSTestCase): self.assertTrue(res['peers'] == {}) self.assertTrue(res['snap_dirs']['dir_count'] == 0) + # verify labelled perf counter + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + self.assertEqual(res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0]["labels"]["filesystem"], + fs_name) + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR][0] + + self.assertGreater(vafter["counters"]["mirrored_filesystems"], + vbefore["counters"]["mirrored_filesystems"]) + def disable_mirroring(self, fs_name, fs_id): - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "disable", fs_name) + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR][0] + + self.run_ceph_cmd("fs", "snapshot", "mirror", "disable", fs_name) time.sleep(10) # verify via asok try: @@ -60,6 +79,13 @@ class TestMirroring(CephFSTestCase): else: raise RuntimeError('expected admin socket to be unavailable') + # verify labelled perf counter + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR][0] + + self.assertLess(vafter["counters"]["mirrored_filesystems"], + vbefore["counters"]["mirrored_filesystems"]) + def verify_peer_added(self, fs_name, fs_id, peer_spec, remote_fs_name=None): # verify via asok res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', @@ -74,40 +100,62 @@ class TestMirroring(CephFSTestCase): else: self.assertTrue(self.fs_name == res['peers'][peer_uuid]['remote']['fs_name']) - def peer_add(self, fs_name, fs_id, peer_spec, remote_fs_name=None): + def peer_add(self, fs_name, fs_id, peer_spec, remote_fs_name=None, check_perf_counter=True): + if check_perf_counter: + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0] + if remote_fs_name: - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec, remote_fs_name) + self.run_ceph_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec, remote_fs_name) else: - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec) + self.run_ceph_cmd("fs", "snapshot", "mirror", "peer_add", fs_name, peer_spec) time.sleep(10) self.verify_peer_added(fs_name, fs_id, peer_spec, remote_fs_name) + if check_perf_counter: + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0] + self.assertGreater(vafter["counters"]["mirroring_peers"], vbefore["counters"]["mirroring_peers"]) + def peer_remove(self, fs_name, fs_id, peer_spec): + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0] + peer_uuid = self.get_peer_uuid(peer_spec) - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", fs_name, peer_uuid) + self.run_ceph_cmd("fs", "snapshot", "mirror", "peer_remove", fs_name, peer_uuid) time.sleep(10) # verify via asok res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') self.assertTrue(res['peers'] == {} and res['snap_dirs']['dir_count'] == 0) + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0] + + self.assertLess(vafter["counters"]["mirroring_peers"], vbefore["counters"]["mirroring_peers"]) + def bootstrap_peer(self, fs_name, client_name, site_name): - outj = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd( - "fs", "snapshot", "mirror", "peer_bootstrap", "create", fs_name, client_name, site_name)) + outj = json.loads(self.get_ceph_cmd_stdout( + "fs", "snapshot", "mirror", "peer_bootstrap", "create", fs_name, + client_name, site_name)) return outj['token'] def import_peer(self, fs_name, token): - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_bootstrap", "import", - fs_name, token) + self.run_ceph_cmd("fs", "snapshot", "mirror", "peer_bootstrap", + "import", fs_name, token) + + def add_directory(self, fs_name, fs_id, dir_name, check_perf_counter=True): + if check_perf_counter: + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0] - def add_directory(self, fs_name, fs_id, dir_name): # get initial dir count res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') dir_count = res['snap_dirs']['dir_count'] log.debug(f'initial dir_count={dir_count}') - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", fs_name, dir_name) + self.run_ceph_cmd("fs", "snapshot", "mirror", "add", fs_name, dir_name) time.sleep(10) # verify via asok @@ -117,14 +165,21 @@ class TestMirroring(CephFSTestCase): log.debug(f'new dir_count={new_dir_count}') self.assertTrue(new_dir_count > dir_count) + if check_perf_counter: + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0] + self.assertGreater(vafter["counters"]["directory_count"], vbefore["counters"]["directory_count"]) + def remove_directory(self, fs_name, fs_id, dir_name): + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0] # get initial dir count res = self.mirror_daemon_command(f'mirror status for fs: {fs_name}', 'fs', 'mirror', 'status', f'{fs_name}@{fs_id}') dir_count = res['snap_dirs']['dir_count'] log.debug(f'initial dir_count={dir_count}') - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", fs_name, dir_name) + self.run_ceph_cmd("fs", "snapshot", "mirror", "remove", fs_name, dir_name) time.sleep(10) # verify via asok @@ -134,6 +189,11 @@ class TestMirroring(CephFSTestCase): log.debug(f'new dir_count={new_dir_count}') self.assertTrue(new_dir_count < dir_count) + res = self.mirror_daemon_command(f'counter dump for fs: {fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_FS][0] + + self.assertLess(vafter["counters"]["directory_count"], vbefore["counters"]["directory_count"]) + def check_peer_status(self, fs_name, fs_id, peer_spec, dir_name, expected_snap_name, expected_snap_count): peer_uuid = self.get_peer_uuid(peer_spec) @@ -234,7 +294,7 @@ class TestMirroring(CephFSTestCase): return json.loads(res) def get_mirror_daemon_status(self): - daemon_status = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "daemon", "status")) + daemon_status = json.loads(self.get_ceph_cmd_stdout("fs", "snapshot", "mirror", "daemon", "status")) log.debug(f'daemon_status: {daemon_status}') # running a single mirror daemon is supported status = daemon_status[0] @@ -267,7 +327,7 @@ class TestMirroring(CephFSTestCase): self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) try: - self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph") + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", check_perf_counter=False) except CommandFailedError as ce: if ce.exitstatus != errno.EINVAL: raise RuntimeError('invalid errno when adding a matching remote peer') @@ -281,7 +341,7 @@ class TestMirroring(CephFSTestCase): # and explicitly specifying the spec (via filesystem name) should fail too try: - self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name, check_perf_counter=False) except CommandFailedError as ce: if ce.exitstatus != errno.EINVAL: raise RuntimeError('invalid errno when adding a matching remote peer') @@ -302,7 +362,7 @@ class TestMirroring(CephFSTestCase): self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) # adding the same peer should be idempotent - self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name, check_perf_counter=False) # remove peer self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph") @@ -312,7 +372,7 @@ class TestMirroring(CephFSTestCase): def test_peer_commands_with_mirroring_disabled(self): # try adding peer when mirroring is not enabled try: - self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name, check_perf_counter=False) except CommandFailedError as ce: if ce.exitstatus != errno.EINVAL: raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a peer') @@ -321,7 +381,7 @@ class TestMirroring(CephFSTestCase): # try removing peer try: - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_remove", self.primary_fs_name, 'dummy-uuid') + self.run_ceph_cmd("fs", "snapshot", "mirror", "peer_remove", self.primary_fs_name, 'dummy-uuid') except CommandFailedError as ce: if ce.exitstatus != errno.EINVAL: raise RuntimeError(-errno.EINVAL, 'incorrect error code when removing a peer') @@ -331,7 +391,7 @@ class TestMirroring(CephFSTestCase): def test_add_directory_with_mirroring_disabled(self): # try adding a directory when mirroring is not enabled try: - self.add_directory(self.primary_fs_name, self.primary_fs_id, "/d1") + self.add_directory(self.primary_fs_name, self.primary_fs_id, "/d1", check_perf_counter=False) except CommandFailedError as ce: if ce.exitstatus != errno.EINVAL: raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory') @@ -343,7 +403,7 @@ class TestMirroring(CephFSTestCase): self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1') try: - self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1') + self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1', check_perf_counter=False) except CommandFailedError as ce: if ce.exitstatus != errno.EEXIST: raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory') @@ -363,7 +423,7 @@ class TestMirroring(CephFSTestCase): def test_add_relative_directory_path(self): self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) try: - self.add_directory(self.primary_fs_name, self.primary_fs_id, './d1') + self.add_directory(self.primary_fs_name, self.primary_fs_id, './d1', check_perf_counter=False) except CommandFailedError as ce: if ce.exitstatus != errno.EINVAL: raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a relative path dir') @@ -377,7 +437,7 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/d3') def check_add_command_failure(dir_path): try: - self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path) + self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path, check_perf_counter=False) except CommandFailedError as ce: if ce.exitstatus != errno.EEXIST: raise RuntimeError(-errno.EINVAL, 'incorrect error code when re-adding a directory') @@ -401,7 +461,7 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d1/d2/') def check_add_command_failure(dir_path): try: - self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path) + self.add_directory(self.primary_fs_name, self.primary_fs_id, dir_path, check_perf_counter=False) except CommandFailedError as ce: if ce.exitstatus != errno.EINVAL: raise RuntimeError(-errno.EINVAL, 'incorrect error code when adding a directory') @@ -466,12 +526,13 @@ class TestMirroring(CephFSTestCase): def test_cephfs_mirror_stats(self): log.debug('reconfigure client auth caps') - self.mds_cluster.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), 'mds', 'allow rw', 'mon', 'allow r', 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( - self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + self.backup_fs.get_data_pool_name(), + self.backup_fs.get_data_pool_name())) log.debug(f'mounting filesystem {self.secondary_fs_name}') self.mount_b.umount_wait() @@ -485,6 +546,10 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + first = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -493,6 +558,11 @@ class TestMirroring(CephFSTestCase): "client.mirror_remote@ceph", '/d0', 'snap0', 1) self.verify_snapshot('d0', 'snap0') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + second = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(second["counters"]["snaps_synced"], first["counters"]["snaps_synced"]) + # some more IO self.mount_a.run_shell(["mkdir", "d0/d00"]) self.mount_a.run_shell(["mkdir", "d0/d01"]) @@ -508,6 +578,11 @@ class TestMirroring(CephFSTestCase): "client.mirror_remote@ceph", '/d0', 'snap1', 2) self.verify_snapshot('d0', 'snap1') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + third = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(third["counters"]["snaps_synced"], second["counters"]["snaps_synced"]) + # delete a snapshot self.mount_a.run_shell(["rmdir", "d0/.snap/snap0"]) @@ -516,6 +591,10 @@ class TestMirroring(CephFSTestCase): self.assertTrue('snap0' not in snap_list) self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 1) + # check snaps_deleted + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + fourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(fourth["counters"]["snaps_deleted"], third["counters"]["snaps_deleted"]) # rename a snapshot self.mount_a.run_shell(["mv", "d0/.snap/snap1", "d0/.snap/snap2"]) @@ -526,18 +605,23 @@ class TestMirroring(CephFSTestCase): self.assertTrue('snap2' in snap_list) self.check_peer_status_renamed_snap(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 1) + # check snaps_renamed + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + fifth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(fifth["counters"]["snaps_renamed"], fourth["counters"]["snaps_renamed"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) def test_cephfs_mirror_cancel_sync(self): log.debug('reconfigure client auth caps') - self.mds_cluster.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), 'mds', 'allow rw', 'mon', 'allow r', 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( - self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + self.backup_fs.get_data_pool_name(), + self.backup_fs.get_data_pool_name())) log.debug(f'mounting filesystem {self.secondary_fs_name}') self.mount_b.umount_wait() @@ -564,16 +648,23 @@ class TestMirroring(CephFSTestCase): snap_list = self.mount_b.ls(path='d0/.snap') self.assertTrue('snap0' not in snap_list) + + # check sync_failures + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vmirror_peers = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vmirror_peers["counters"]["sync_failures"], 0) + self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) def test_cephfs_mirror_restart_sync_on_blocklist(self): log.debug('reconfigure client auth caps') - self.mds_cluster.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), 'mds', 'allow rw', 'mon', 'allow r', 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( - self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + self.backup_fs.get_data_pool_name(), + self.backup_fs.get_data_pool_name())) log.debug(f'mounting filesystem {self.secondary_fs_name}') self.mount_b.umount_wait() @@ -592,6 +683,10 @@ class TestMirroring(CephFSTestCase): # fetch rados address for blacklist check rados_inst = self.get_mirror_rados_addr(self.primary_fs_name, self.primary_fs_id) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -620,6 +715,10 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 'snap0', expected_snap_count=1) self.verify_snapshot('d0', 'snap0') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -628,6 +727,10 @@ class TestMirroring(CephFSTestCase): self.enable_mirroring(self.primary_fs_name, self.primary_fs_id) self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # add a non-existent directory for synchronization self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') @@ -644,6 +747,10 @@ class TestMirroring(CephFSTestCase): time.sleep(120) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', 'snap0', 1) + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) def test_cephfs_mirror_service_daemon_status(self): @@ -697,8 +804,8 @@ class TestMirroring(CephFSTestCase): self.disable_mirroring_module() # enable mirroring through mon interface -- this should result in the mirror daemon - # failing to enable mirroring due to absence of `cephfs_mirorr` index object. - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name) + # failing to enable mirroring due to absence of `cephfs_mirror` index object. + self.run_ceph_cmd("fs", "mirror", "enable", self.primary_fs_name) with safe_while(sleep=5, tries=10, action='wait for failed state') as proceed: while proceed(): @@ -713,7 +820,7 @@ class TestMirroring(CephFSTestCase): except: pass - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name) + self.run_ceph_cmd("fs", "mirror", "disable", self.primary_fs_name) time.sleep(10) # verify via asok try: @@ -735,7 +842,7 @@ class TestMirroring(CephFSTestCase): # enable mirroring through mon interface -- this should result in the mirror daemon # failing to enable mirroring due to absence of `cephfs_mirror` index object. - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "enable", self.primary_fs_name) + self.run_ceph_cmd("fs", "mirror", "enable", self.primary_fs_name) # need safe_while since non-failed status pops up as mirroring is restarted # internally in mirror daemon. with safe_while(sleep=5, tries=20, action='wait for failed state') as proceed: @@ -766,7 +873,7 @@ class TestMirroring(CephFSTestCase): self.assertTrue(res['peers'] == {}) self.assertTrue(res['snap_dirs']['dir_count'] == 0) - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "mirror", "disable", self.primary_fs_name) + self.run_ceph_cmd("fs", "mirror", "disable", self.primary_fs_name) time.sleep(10) # verify via asok try: @@ -792,9 +899,8 @@ class TestMirroring(CephFSTestCase): # verify via peer_list interface peer_uuid = self.get_peer_uuid("client.mirror_peer_bootstrap@site-remote") - res = json.loads(self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "peer_list", self.primary_fs_name)) + res = json.loads(self.get_ceph_cmd_stdout("fs", "snapshot", "mirror", "peer_list", self.primary_fs_name)) self.assertTrue(peer_uuid in res) - self.assertTrue('mon_host' in res[peer_uuid] and res[peer_uuid]['mon_host'] != '') # remove peer self.peer_remove(self.primary_fs_name, self.primary_fs_id, "client.mirror_peer_bootstrap@site-remote") @@ -803,12 +909,13 @@ class TestMirroring(CephFSTestCase): def test_cephfs_mirror_symlink_sync(self): log.debug('reconfigure client auth caps') - self.mds_cluster.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), 'mds', 'allow rw', 'mon', 'allow r', 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( - self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + self.backup_fs.get_data_pool_name(), + self.backup_fs.get_data_pool_name())) log.debug(f'mounting filesystem {self.secondary_fs_name}') self.mount_b.umount_wait() @@ -825,6 +932,10 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -833,6 +944,10 @@ class TestMirroring(CephFSTestCase): "client.mirror_remote@ceph", '/d0', 'snap0', 1) self.verify_snapshot('d0', 'snap0') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -844,12 +959,20 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + # take a snapshot self.mount_a.run_shell(["mkdir", "d0/d1/d2/d3/.snap/snap0"]) time.sleep(30) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap0', 1) + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) # create snapshots in parent directories self.mount_a.run_shell(["mkdir", "d0/.snap/snap_d0"]) @@ -861,12 +984,20 @@ class TestMirroring(CephFSTestCase): time.sleep(30) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0/d1/d2/d3', 'snap1', 2) + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"]) self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap0"]) self.mount_a.run_shell(["rmdir", "d0/d1/d2/d3/.snap/snap1"]) time.sleep(15) self.check_peer_status_deleted_snap(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0/d1/d2/d3', 2) + # check snaps_deleted + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vfourth["counters"]["snaps_deleted"], vthird["counters"]["snaps_deleted"]) self.remove_directory(self.primary_fs_name, self.primary_fs_id, '/d0/d1/d2/d3') self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -893,20 +1024,20 @@ class TestMirroring(CephFSTestCase): dir_path_p = "/d0/d1" dir_path = "/d0/d1/d2" - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path) + self.run_ceph_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path) time.sleep(10) # this uses an undocumented interface to get dirpath map state - res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path) + res_json = self.get_ceph_cmd_stdout("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path) res = json.loads(res_json) # there are no mirror daemons self.assertTrue(res['state'], 'stalled') - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "remove", self.primary_fs_name, dir_path) + self.run_ceph_cmd("fs", "snapshot", "mirror", "remove", self.primary_fs_name, dir_path) time.sleep(10) try: - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path) + self.run_ceph_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path) except CommandFailedError as ce: if ce.exitstatus != errno.ENOENT: raise RuntimeError('invalid errno when checking dirmap status for non-existent directory') @@ -914,11 +1045,11 @@ class TestMirroring(CephFSTestCase): raise RuntimeError('incorrect errno when checking dirmap state for non-existent directory') # adding a parent directory should be allowed - self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path_p) + self.run_ceph_cmd("fs", "snapshot", "mirror", "add", self.primary_fs_name, dir_path_p) time.sleep(10) # however, this directory path should get stalled too - res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p) + res_json = self.get_ceph_cmd_stdout("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p) res = json.loads(res_json) # there are no mirror daemons self.assertTrue(res['state'], 'stalled') @@ -930,7 +1061,7 @@ class TestMirroring(CephFSTestCase): # wait for restart mirror on blocklist time.sleep(60) - res_json = self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p) + res_json = self.get_ceph_cmd_stdout("fs", "snapshot", "mirror", "dirmap", self.primary_fs_name, dir_path_p) res = json.loads(res_json) # there are no mirror daemons self.assertTrue(res['state'], 'mapped') @@ -940,12 +1071,13 @@ class TestMirroring(CephFSTestCase): def test_cephfs_mirror_incremental_sync(self): """ Test incremental snapshot synchronization (based on mtime differences).""" log.debug('reconfigure client auth caps') - self.mds_cluster.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), 'mds', 'allow rw', 'mon', 'allow r', 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( - self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + self.backup_fs.get_data_pool_name(), + self.backup_fs.get_data_pool_name())) log.debug(f'mounting filesystem {self.secondary_fs_name}') self.mount_b.umount_wait() self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) @@ -969,6 +1101,9 @@ class TestMirroring(CephFSTestCase): self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}') + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a']) # full copy, takes time @@ -976,6 +1111,10 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1) self.verify_snapshot(repo_path, 'snap_a') + # check snaps_synced + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) # create some diff num = random.randint(5, 20) @@ -988,6 +1127,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2) self.verify_snapshot(repo_path, 'snap_b') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"]) # diff again, this time back to HEAD log.debug('resetting to HEAD') @@ -999,6 +1141,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_c', 3) self.verify_snapshot(repo_path, 'snap_c') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfourth = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vfourth["counters"]["snaps_synced"], vthird["counters"]["snaps_synced"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -1018,12 +1163,13 @@ class TestMirroring(CephFSTestCase): file_z | sym dir reg sym """ log.debug('reconfigure client auth caps') - self.mds_cluster.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), 'mds', 'allow rw', 'mon', 'allow r', 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( - self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + self.backup_fs.get_data_pool_name(), + self.backup_fs.get_data_pool_name())) log.debug(f'mounting filesystem {self.secondary_fs_name}') self.mount_b.umount_wait() self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) @@ -1068,11 +1214,18 @@ class TestMirroring(CephFSTestCase): while turns != len(typs): snapname = f'snap_{turns}' cleanup_and_create_with_type('d0', fnames) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] self.mount_a.run_shell(['mkdir', f'd0/.snap/{snapname}']) time.sleep(30) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d0', snapname, turns+1) verify_types('d0', fnames, snapname) + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) + # next type typs.rotate(1) turns += 1 @@ -1089,12 +1242,13 @@ class TestMirroring(CephFSTestCase): """ log.debug('reconfigure client auth caps') - self.mds_cluster.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), 'mds', 'allow rw', 'mon', 'allow r', 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( - self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + self.backup_fs.get_data_pool_name(), + self.backup_fs.get_data_pool_name())) log.debug(f'mounting filesystem {self.secondary_fs_name}') self.mount_b.umount_wait() self.mount_b.mount_wait(cephfs_name=self.secondary_fs_name) @@ -1118,6 +1272,9 @@ class TestMirroring(CephFSTestCase): self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) self.add_directory(self.primary_fs_name, self.primary_fs_id, f'/{repo_path}') + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vfirst = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] self.mount_a.run_shell(['mkdir', f'{repo_path}/.snap/snap_a']) # full copy, takes time @@ -1125,6 +1282,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_a', 1) self.verify_snapshot(repo_path, 'snap_a') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vsecond = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vsecond["counters"]["snaps_synced"], vfirst["counters"]["snaps_synced"]) # create some diff num = random.randint(60, 100) @@ -1141,6 +1301,9 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", f'/{repo_path}', 'snap_b', 2) self.verify_snapshot(repo_path, 'snap_b') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vthird = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vthird["counters"]["snaps_synced"], vsecond["counters"]["snaps_synced"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -1151,7 +1314,7 @@ class TestMirroring(CephFSTestCase): # try adding the primary file system as a peer to secondary file # system try: - self.peer_add(self.secondary_fs_name, self.secondary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name) + self.peer_add(self.secondary_fs_name, self.secondary_fs_id, "client.mirror_remote@ceph", self.primary_fs_name, check_perf_counter=False) except CommandFailedError as ce: if ce.exitstatus != errno.EINVAL: raise RuntimeError('invalid errno when adding a primary file system') @@ -1169,12 +1332,13 @@ class TestMirroring(CephFSTestCase): that all replayer threads (3 by default) in the mirror daemon are busy. """ log.debug('reconfigure client auth caps') - self.mds_cluster.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', "client.{0}".format(self.mount_b.client_id), 'mds', 'allow rw', 'mon', 'allow r', 'osd', 'allow rw pool={0}, allow rw pool={1}'.format( - self.backup_fs.get_data_pool_name(), self.backup_fs.get_data_pool_name())) + self.backup_fs.get_data_pool_name(), + self.backup_fs.get_data_pool_name())) log.debug(f'mounting filesystem {self.secondary_fs_name}') self.mount_b.umount_wait() @@ -1198,6 +1362,9 @@ class TestMirroring(CephFSTestCase): self.add_directory(self.primary_fs_name, self.primary_fs_id, '/d2') self.peer_add(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", self.secondary_fs_name) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vbefore = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] # take snapshots log.debug('taking snapshots') self.mount_a.run_shell(["mkdir", "d0/.snap/snap0"]) @@ -1259,6 +1426,10 @@ class TestMirroring(CephFSTestCase): self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/d2', 'snap0', 1) self.verify_snapshot('d2', 'snap0') + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vafter = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + self.assertGreater(vafter["counters"]["snaps_synced"], vbefore["counters"]["snaps_synced"]) + self.assertGreater(vafter["counters"]["snaps_deleted"], vbefore["counters"]["snaps_deleted"]) self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) @@ -1266,7 +1437,7 @@ class TestMirroring(CephFSTestCase): log.debug('reconfigure client auth caps') cid = self.mount_b.client_id data_pool = self.backup_fs.get_data_pool_name() - self.mds_cluster.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', f"client.{cid}", 'mds', 'allow rw', 'mon', 'allow r', @@ -1287,6 +1458,11 @@ class TestMirroring(CephFSTestCase): time.sleep(60) self.check_peer_status(self.primary_fs_name, self.primary_fs_id, "client.mirror_remote@ceph", '/l1', 'snap0', 1) + # dump perf counters + res = self.mirror_daemon_command(f'counter dump for fs: {self.primary_fs_name}', 'counter', 'dump') + vmirror_peers = res[TestMirroring.PERF_COUNTER_KEY_NAME_CEPHFS_MIRROR_PEER][0] + snaps_synced = vmirror_peers["counters"]["snaps_synced"] + self.assertEqual(snaps_synced, 1, f"Mismatch snaps_synced: {snaps_synced} vs 1") mode_local = self.mount_a.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip() mode_remote = self.mount_b.run_shell(["stat", "--format=%A", "l1"]).stdout.getvalue().strip() @@ -1296,3 +1472,13 @@ class TestMirroring(CephFSTestCase): self.disable_mirroring(self.primary_fs_name, self.primary_fs_id) self.mount_a.run_shell(["rmdir", "l1/.snap/snap0"]) self.mount_a.run_shell(["rmdir", "l1"]) + + def test_get_set_mirror_dirty_snap_id(self): + """ + That get/set ceph.mirror.dirty_snap_id attribute succeeds in a remote filesystem. + """ + self.mount_b.run_shell(["mkdir", "-p", "d1/d2/d3"]) + attr = str(random.randint(1, 10)) + self.mount_b.setfattr("d1/d2/d3", "ceph.mirror.dirty_snap_id", attr) + val = self.mount_b.getfattr("d1/d2/d3", "ceph.mirror.dirty_snap_id") + self.assertEqual(attr, val, f"Mismatch for ceph.mirror.dirty_snap_id value: {attr} vs {val}") diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py index 8b48dee69..72468a813 100644 --- a/qa/tasks/cephfs/test_misc.py +++ b/qa/tasks/cephfs/test_misc.py @@ -96,16 +96,15 @@ class TestMisc(CephFSTestCase): self.fs.fail() - self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name, - '--yes-i-really-mean-it') + self.run_ceph_cmd('fs', 'rm', self.fs.name, '--yes-i-really-mean-it') - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete', - self.fs.metadata_pool_name, - self.fs.metadata_pool_name, - '--yes-i-really-really-mean-it') - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - self.fs.metadata_pool_name, - '--pg_num_min', str(self.fs.pg_num_min)) + self.run_ceph_cmd('osd', 'pool', 'delete', + self.fs.metadata_pool_name, + self.fs.metadata_pool_name, + '--yes-i-really-really-mean-it') + self.run_ceph_cmd('osd', 'pool', 'create', + self.fs.metadata_pool_name, + '--pg_num_min', str(self.fs.pg_num_min)) # insert a garbage object self.fs.radosm(["put", "foo", "-"], stdin=StringIO("bar")) @@ -119,34 +118,34 @@ class TestMisc(CephFSTestCase): self.wait_until_true(lambda: get_pool_df(self.fs, self.fs.metadata_pool_name), timeout=30) try: - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, - self.fs.metadata_pool_name, - data_pool_name) + self.run_ceph_cmd('fs', 'new', self.fs.name, + self.fs.metadata_pool_name, + data_pool_name) except CommandFailedError as e: self.assertEqual(e.exitstatus, errno.EINVAL) else: raise AssertionError("Expected EINVAL") - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, - self.fs.metadata_pool_name, - data_pool_name, "--force") + self.run_ceph_cmd('fs', 'new', self.fs.name, + self.fs.metadata_pool_name, + data_pool_name, "--force") - self.fs.mon_manager.raw_cluster_cmd('fs', 'fail', self.fs.name) + self.run_ceph_cmd('fs', 'fail', self.fs.name) - self.fs.mon_manager.raw_cluster_cmd('fs', 'rm', self.fs.name, - '--yes-i-really-mean-it') + self.run_ceph_cmd('fs', 'rm', self.fs.name, + '--yes-i-really-mean-it') - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete', - self.fs.metadata_pool_name, - self.fs.metadata_pool_name, - '--yes-i-really-really-mean-it') - self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', - self.fs.metadata_pool_name, - '--pg_num_min', str(self.fs.pg_num_min)) - self.fs.mon_manager.raw_cluster_cmd('fs', 'new', self.fs.name, - self.fs.metadata_pool_name, - data_pool_name, - '--allow_dangerous_metadata_overlay') + self.run_ceph_cmd('osd', 'pool', 'delete', + self.fs.metadata_pool_name, + self.fs.metadata_pool_name, + '--yes-i-really-really-mean-it') + self.run_ceph_cmd('osd', 'pool', 'create', + self.fs.metadata_pool_name, + '--pg_num_min', str(self.fs.pg_num_min)) + self.run_ceph_cmd('fs', 'new', self.fs.name, + self.fs.metadata_pool_name, + data_pool_name, + '--allow_dangerous_metadata_overlay') def test_cap_revoke_nonresponder(self): """ @@ -199,9 +198,8 @@ class TestMisc(CephFSTestCase): pool_name = self.fs.get_data_pool_name() raw_df = self.fs.get_pool_df(pool_name) raw_avail = float(raw_df["max_avail"]) - out = self.fs.mon_manager.raw_cluster_cmd('osd', 'pool', 'get', - pool_name, 'size', - '-f', 'json-pretty') + out = self.get_ceph_cmd_stdout('osd', 'pool', 'get', pool_name, + 'size', '-f', 'json-pretty') _ = json.loads(out) proc = self.mount_a.run_shell(['df', '.']) @@ -210,18 +208,39 @@ class TestMisc(CephFSTestCase): fs_avail = float(fs_avail) * 1024 ratio = raw_avail / fs_avail - assert 0.9 < ratio < 1.1 + self.assertTrue(0.9 < ratio < 1.1) def test_dump_inode(self): info = self.fs.mds_asok(['dump', 'inode', '1']) - assert(info['path'] == "/") + self.assertEqual(info['path'], "/") def test_dump_inode_hexademical(self): self.mount_a.run_shell(["mkdir", "-p", "foo"]) ino = self.mount_a.path_to_ino("foo") - assert type(ino) is int + self.assertTrue(type(ino) is int) info = self.fs.mds_asok(['dump', 'inode', hex(ino)]) - assert info['path'] == "/foo" + self.assertEqual(info['path'], "/foo") + + def test_dump_dir(self): + self.mount_a.run_shell(["mkdir", "-p", "foo/bar"]) + dirs = self.fs.mds_asok(['dump', 'dir', '/foo']) + self.assertTrue(type(dirs) is list) + for dir in dirs: + self.assertEqual(dir['path'], "/foo") + self.assertFalse("dentries" in dir) + dirs = self.fs.mds_asok(['dump', 'dir', '/foo', '--dentry_dump']) + self.assertTrue(type(dirs) is list) + found_dentry = False + for dir in dirs: + self.assertEqual(dir['path'], "/foo") + self.assertTrue(type(dir['dentries']) is list) + if found_dentry: + continue + for dentry in dir['dentries']: + if dentry['path'] == "foo/bar": + found_dentry = True + break + self.assertTrue(found_dentry) def test_fs_lsflags(self): """ @@ -232,9 +251,8 @@ class TestMisc(CephFSTestCase): self.fs.set_allow_new_snaps(False) self.fs.set_allow_standby_replay(True) - lsflags = json.loads(self.fs.mon_manager.raw_cluster_cmd('fs', 'lsflags', - self.fs.name, - "--format=json-pretty")) + lsflags = json.loads(self.get_ceph_cmd_stdout( + 'fs', 'lsflags', self.fs.name, "--format=json-pretty")) self.assertEqual(lsflags["joinable"], False) self.assertEqual(lsflags["allow_snaps"], False) self.assertEqual(lsflags["allow_multimds_snaps"], True) @@ -258,30 +276,30 @@ class TestMisc(CephFSTestCase): self.mount_a.run_shell(["mkdir", os.path.join(dir_path, f"{i}_{j}")]) start = time.time() if file_sync: - self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript]) + self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript], timeout=4) else: - self.mount_a.run_shell(["sync"]) + self.mount_a.run_shell(["sync"], timeout=4) + # the real duration should be less than the rough one duration = time.time() - start - log.info(f"sync mkdir i = {i}, duration = {duration}") - self.assertLess(duration, 4) + log.info(f"sync mkdir i = {i}, rough duration = {duration}") for j in range(5): self.mount_a.run_shell(["rm", "-rf", os.path.join(dir_path, f"{i}_{j}")]) start = time.time() if file_sync: - self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript]) + self.mount_a.run_shell(['python3', '-c', sync_dir_pyscript], timeout=4) else: - self.mount_a.run_shell(["sync"]) + self.mount_a.run_shell(["sync"], timeout=4) + # the real duration should be less than the rough one duration = time.time() - start - log.info(f"sync rmdir i = {i}, duration = {duration}") - self.assertLess(duration, 4) + log.info(f"sync rmdir i = {i}, rough duration = {duration}") self.mount_a.run_shell(["rm", "-rf", dir_path]) def test_filesystem_sync_stuck_for_around_5s(self): """ - To check whether the fsync will be stuck to wait for the mdlog to be - flushed for at most 5 seconds. + To check whether the filesystem sync will be stuck to wait for the + mdlog to be flushed for at most 5 seconds. """ dir_path = "filesystem_sync_do_not_wait_mdlog_testdir" @@ -289,8 +307,8 @@ class TestMisc(CephFSTestCase): def test_file_sync_stuck_for_around_5s(self): """ - To check whether the filesystem sync will be stuck to wait for the - mdlog to be flushed for at most 5 seconds. + To check whether the fsync will be stuck to wait for the mdlog to + be flushed for at most 5 seconds. """ dir_path = "file_sync_do_not_wait_mdlog_testdir" @@ -404,7 +422,7 @@ class TestMisc(CephFSTestCase): self.fs.mds_asok(['config', 'set', 'debug_mds', '1/10']) self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1']) try: - mons = json.loads(self.fs.mon_manager.raw_cluster_cmd('mon', 'dump', '-f', 'json'))['mons'] + mons = json.loads(self.get_ceph_cmd_stdout('mon', 'dump', '-f', 'json'))['mons'] except: self.assertTrue(False, "Error fetching monitors") @@ -447,7 +465,7 @@ class TestMisc(CephFSTestCase): self.fs.mds_asok(['config', 'set', 'mds_heartbeat_grace', '1']) self.fs.mds_asok(['config', 'set', 'mds_extraordinary_events_dump_interval', '1']) try: - mons = json.loads(self.fs.mon_manager.raw_cluster_cmd('mon', 'dump', '-f', 'json'))['mons'] + mons = json.loads(self.get_ceph_cmd_stdout('mon', 'dump', '-f', 'json'))['mons'] except: self.assertTrue(False, "Error fetching monitors") diff --git a/qa/tasks/cephfs/test_multifs_auth.py b/qa/tasks/cephfs/test_multifs_auth.py index c9ea5f528..592a84164 100644 --- a/qa/tasks/cephfs/test_multifs_auth.py +++ b/qa/tasks/cephfs/test_multifs_auth.py @@ -26,15 +26,15 @@ class TestMultiFS(CephFSTestCase): # we might have it - the client - if the same cluster was used for a # different vstart_runner.py run. - self.run_cluster_cmd(f'auth rm {self.client_name}') + self.run_ceph_cmd(f'auth rm {self.client_name}') self.fs1 = self.fs self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True) # we'll reassign caps to client.1 so that it can operate with cephfs2 - self.run_cluster_cmd(f'auth caps client.{self.mount_b.client_id} mon ' - f'"allow r" osd "allow rw ' - f'pool={self.fs2.get_data_pool_name()}" mds allow') + self.run_ceph_cmd(f'auth caps client.{self.mount_b.client_id} mon ' + f'"allow r" osd "allow rw ' + f'pool={self.fs2.get_data_pool_name()}" mds allow') self.mount_b.remount(cephfs_name=self.fs2.name) @@ -209,54 +209,16 @@ class TestMDSCaps(TestMultiFS): class TestClientsWithoutAuth(TestMultiFS): + # c.f., src/mount/mtab.c: EX_FAIL + RETVAL_KCLIENT = 32 + # c.f., src/ceph_fuse.cc: (cpp EXIT_FAILURE). Normally the check for this + # case should be anything-except-0, but EXIT_FAILURE is 1 in most systems. + RETVAL_USER_SPACE_CLIENT = 1 def setUp(self): super(TestClientsWithoutAuth, self).setUp() - - # TODO: When MON and OSD caps for a Ceph FS are assigned to a - # client but MDS caps are not, mount.ceph prints "permission - # denied". But when MON caps are not assigned and MDS and OSD - # caps are, mount.ceph prints "no mds server or cluster laggy" - # instead of "permission denied". - # - # Before uncommenting the following line a fix would be required - # for latter case to change "no mds server is up or the cluster is - # laggy" to "permission denied". - self.kernel_errmsgs = ('permission denied', 'no mds server is up or ' - 'the cluster is laggy', 'no such file or ' - 'directory', - 'input/output error') - - # TODO: When MON and OSD caps are assigned for a Ceph FS to a - # client but MDS caps are not, ceph-fuse prints "operation not - # permitted". But when MON caps are not assigned and MDS and OSD - # caps are, ceph-fuse prints "no such file or directory" instead - # of "operation not permitted". - # - # Before uncommenting the following line a fix would be required - # for the latter case to change "no such file or directory" to - # "operation not permitted". - #self.assertIn('operation not permitted', retval[2].lower()) - self.fuse_errmsgs = ('operation not permitted', 'no such file or ' - 'directory') - - if 'kernel' in str(type(self.mount_a)).lower(): - self.errmsgs = self.kernel_errmsgs - elif 'fuse' in str(type(self.mount_a)).lower(): - self.errmsgs = self.fuse_errmsgs - else: - raise RuntimeError('strange, the client was neither based on ' - 'kernel nor FUSE.') - - def check_that_mount_failed_for_right_reason(self, stderr): - stderr = stderr.lower() - for errmsg in self.errmsgs: - if errmsg in stderr: - break - else: - raise AssertionError('can\'t find expected set of words in the ' - f'stderr\nself.errmsgs - {self.errmsgs}\n' - f'stderr - {stderr}') + self.retval = self.RETVAL_KCLIENT if 'kernel' in str(type(self.mount_a)).lower() \ + else self.RETVAL_USER_SPACE_CLIENT def test_mount_all_caps_absent(self): # setup part... @@ -264,16 +226,13 @@ class TestClientsWithoutAuth(TestMultiFS): keyring_path = self.mount_a.client_remote.mktemp(data=keyring) # mount the FS for which client has no auth... - retval = self.mount_a.remount(client_id=self.client_id, - client_keyring_path=keyring_path, - cephfs_name=self.fs2.name, - check_status=False) - - # tests... - self.assertIsInstance(retval, tuple) - self.assertEqual(len(retval), 3) - self.assertIsInstance(retval[0], CommandFailedError) - self.check_that_mount_failed_for_right_reason(retval[2]) + try: + self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_name=self.fs2.name, + check_status=False) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, self.retval) def test_mount_mon_and_osd_caps_present_mds_caps_absent(self): # setup part... @@ -285,13 +244,10 @@ class TestClientsWithoutAuth(TestMultiFS): keyring_path = self.mount_a.client_remote.mktemp(data=keyring) # mount the FS for which client has no auth... - retval = self.mount_a.remount(client_id=self.client_id, - client_keyring_path=keyring_path, - cephfs_name=self.fs2.name, - check_status=False) - - # tests... - self.assertIsInstance(retval, tuple) - self.assertEqual(len(retval), 3) - self.assertIsInstance(retval[0], CommandFailedError) - self.check_that_mount_failed_for_right_reason(retval[2]) + try: + self.mount_a.remount(client_id=self.client_id, + client_keyring_path=keyring_path, + cephfs_name=self.fs2.name, + check_status=False) + except CommandFailedError as e: + self.assertEqual(e.exitstatus, self.retval) diff --git a/qa/tasks/cephfs/test_multimds_misc.py b/qa/tasks/cephfs/test_multimds_misc.py index 2bb6257c7..e0e46fb24 100644 --- a/qa/tasks/cephfs/test_multimds_misc.py +++ b/qa/tasks/cephfs/test_multimds_misc.py @@ -116,7 +116,7 @@ class TestScrub2(CephFSTestCase): def expect_exdev(cmd, mds): try: - self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(mds), *cmd) + self.run_ceph_cmd('tell', 'mds.{0}'.format(mds), *cmd) except CommandFailedError as e: if e.exitstatus == errno.EXDEV: pass diff --git a/qa/tasks/cephfs/test_nfs.py b/qa/tasks/cephfs/test_nfs.py index 0a10709e6..2d06cbac7 100644 --- a/qa/tasks/cephfs/test_nfs.py +++ b/qa/tasks/cephfs/test_nfs.py @@ -16,16 +16,14 @@ NFS_POOL_NAME = '.nfs' # should match mgr_module.py # TODO Add test for cluster update when ganesha can be deployed on multiple ports. class TestNFS(MgrTestCase): def _cmd(self, *args): - return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args) + return self.get_ceph_cmd_stdout(args) def _nfs_cmd(self, *args): return self._cmd("nfs", *args) def _nfs_complete_cmd(self, cmd): - return self.mgr_cluster.mon_manager.run_cluster_cmd(args=f"nfs {cmd}", - stdout=StringIO(), - stderr=StringIO(), - check_status=False) + return self.run_ceph_cmd(args=f"nfs {cmd}", stdout=StringIO(), + stderr=StringIO(), check_status=False) def _orch_cmd(self, *args): return self._cmd("orch", *args) @@ -142,7 +140,7 @@ class TestNFS(MgrTestCase): :param cmd_args: nfs command arguments to be run ''' cmd_func() - ret = self.mgr_cluster.mon_manager.raw_cluster_cmd_result(*cmd_args) + ret = self.get_ceph_cmd_result(*cmd_args) if ret != 0: self.fail("Idempotency test failed") @@ -406,6 +404,13 @@ class TestNFS(MgrTestCase): self._cmd('fs', 'volume', 'rm', fs_name, '--yes-i-really-mean-it') self._test_delete_cluster() + def _nfs_export_apply(self, cluster, exports, raise_on_error=False): + return self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply', + cluster, '-i', '-'], + check_status=raise_on_error, + stdin=json.dumps(exports), + stdout=StringIO(), stderr=StringIO()) + def test_create_and_delete_cluster(self): ''' Test successful creation and deletion of the nfs cluster. @@ -878,3 +883,258 @@ class TestNFS(MgrTestCase): raise self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}/*']) self._delete_cluster_with_fs(self.fs_name, mnt_pt, preserve_mode) + + def test_nfs_export_apply_multiple_exports(self): + """ + Test multiple export creation/update with multiple + export blocks provided in the json/conf file using: + ceph nfs export apply <nfs_cluster> -i <{conf/json}_file>, and check + 1) if there are multiple failure: + -> Return the EIO and error status to CLI (along with JSON output + containing status of every export). + 2) if there is single failure: + -> Return the respective errno and error status to CLI (along with + JSON output containing status of every export). + """ + + mnt_pt = self._sys_cmd(['mktemp', '-d']).decode().strip() + self._create_cluster_with_fs(self.fs_name, mnt_pt) + try: + self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir1']) + self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir2']) + self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir3']) + self._create_export(export_id='1', + extra_cmd=['--pseudo-path', self.pseudo_path, + '--path', '/testdir1']) + self._create_export(export_id='2', + extra_cmd=['--pseudo-path', + self.pseudo_path+'2', + '--path', '/testdir2']) + exports = [ + { + "export_id": 11, # export_id change not allowed + "path": "/testdir1", + "pseudo": self.pseudo_path, + "squash": "none", + "access_type": "rw", + "protocols": [4], + "fsal": { + "name": "CEPH", + "user_id": "nfs.test.1", + "fs_name": self.fs_name + } + }, + { + "export_id": 2, + "path": "/testdir2", + "pseudo": self.pseudo_path+'2', + "squash": "none", + "access_type": "rw", + "protocols": [4], + "fsal": { + "name": "CEPH", + "user_id": "nfs.test.2", + "fs_name": "invalid_fs_name" # invalid fs + } + }, + { # no error, export creation should succeed + "export_id": 3, + "path": "/testdir3", + "pseudo": self.pseudo_path+'3', + "squash": "none", + "access_type": "rw", + "protocols": [4], + "fsal": { + "name": "CEPH", + "user_id": "nfs.test.3", + "fs_name": self.fs_name + } + } + ] + + # multiple failures + ret = self._nfs_export_apply(self.cluster_id, exports) + self.assertEqual(ret[0].returncode, errno.EIO) + self.assertIn("2 export blocks (at index 1, 2) failed to be " + "created/updated", ret[0].stderr.getvalue()) + + # single failure + exports[1]["fsal"]["fs_name"] = self.fs_name # correct the fs + ret = self._nfs_export_apply(self.cluster_id, exports) + self.assertEqual(ret[0].returncode, errno.EINVAL) + self.assertIn("Export ID changed, Cannot update export for " + "export block at index 1", ret[0].stderr.getvalue()) + finally: + self._delete_cluster_with_fs(self.fs_name, mnt_pt) + self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}']) + + def test_nfs_export_apply_single_export(self): + """ + Test that when single export creation/update fails with multiple + export blocks provided in the json/conf file using: + ceph nfs export apply <nfs_cluster> -i <{conf/json}_file>, it + returns the respective errno and error status to CLI (along with + JSON output containing status of every export). + """ + + mnt_pt = self._sys_cmd(['mktemp', '-d']).decode().strip() + self._create_cluster_with_fs(self.fs_name, mnt_pt) + try: + self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir1']) + self._create_export(export_id='1', + extra_cmd=['--pseudo-path', self.pseudo_path, + '--path', '/testdir1']) + export = { + "export_id": 1, + "path": "/testdir1", + "pseudo": self.pseudo_path, + "squash": "none", + "access_type": "rw", + "protocols": [4], + "fsal": { + "name": "CEPH", + "user_id": "nfs.test.1", + "fs_name": "invalid_fs_name" # invalid fs + } + } + ret = self._nfs_export_apply(self.cluster_id, export) + self.assertEqual(ret[0].returncode, errno.ENOENT) + self.assertIn("filesystem invalid_fs_name not found for " + "export block at index 1", ret[0].stderr.getvalue()) + finally: + self._delete_cluster_with_fs(self.fs_name, mnt_pt) + self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}']) + + def test_nfs_export_apply_json_output_states(self): + """ + If export creation/update is done using: + ceph nfs export apply <nfs_cluster> -i <{conf/json}_file> then the + "status" field in the json output maybe added, updated, error or + warning. Test different scenarios to make sure these states are + in the json output as expected. + """ + + mnt_pt = self._sys_cmd(['mktemp', '-d']).decode().strip() + self._create_cluster_with_fs(self.fs_name, mnt_pt) + try: + self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir1']) + self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir2']) + self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir3']) + self._create_export(export_id='1', + extra_cmd=['--pseudo-path', self.pseudo_path, + '--path', '/testdir1']) + exports = [ + { # change pseudo, state should be "updated" + "export_id": 1, + "path": "/testdir1", + "pseudo": self.pseudo_path+'1', + "squash": "none", + "access_type": "rw", + "protocols": [4], + "fsal": { + "name": "CEPH", + "user_id": "nfs.test.1", + "fs_name": self.fs_name + } + }, + { # a new export, state should be "added" + "export_id": 2, + "path": "/testdir2", + "pseudo": self.pseudo_path+'2', + "squash": "none", + "access_type": "rw", + "protocols": [4], + "fsal": { + "name": "CEPH", + "user_id": "nfs.test.2", + "fs_name": self.fs_name + } + }, + { # error in export block, state should be "error" since the + # fs_name is invalid + "export_id": 3, + "path": "/testdir3", + "pseudo": self.pseudo_path+'3', + "squash": "none", + "access_type": "RW", + "protocols": [4], + "fsal": { + "name": "CEPH", + "user_id": "nfs.test.3", + "fs_name": "invalid_fs_name" + } + } + ] + ret = self._nfs_export_apply(self.cluster_id, exports) + json_output = json.loads(ret[0].stdout.getvalue().strip()) + self.assertEqual(len(json_output), 3) + self.assertEqual(json_output[0]["state"], "updated") + self.assertEqual(json_output[1]["state"], "added") + self.assertEqual(json_output[2]["state"], "error") + finally: + self._delete_cluster_with_fs(self.fs_name, mnt_pt) + self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}']) + + def test_pseudo_path_in_json_response_when_updating_exports_failed(self): + """ + Test that on export update/creation failure while using + ceph nfs export apply <nfs_cluster> -i <json/conf>, the failed + exports pseudo paths are visible in the JSON response to CLI and the + return code is set to EIO. + """ + mnt_pt = self._sys_cmd(['mktemp', '-d']).decode().strip() + self._create_cluster_with_fs(self.fs_name, mnt_pt) + self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir1']) + self.ctx.cluster.run(args=['mkdir', f'{mnt_pt}/testdir2']) + self._create_export(export_id='1', + extra_cmd=['--pseudo-path', self.pseudo_path]) + + ret = self.ctx.cluster.run(args=['ceph', 'nfs', 'export', 'apply', + self.cluster_id, '-i', '-'], + check_status=False, + stdin=json.dumps([ + { + "export_id": 11, # change not allowed + "path": "/testdir1", + "pseudo": self.pseudo_path, + "squash": "none", + "access_type": "rw", + "protocols": [4], + "fsal": { + "name": "CEPH", + "fs_name": self.fs_name + } + }, + { + "path": "/testdir2", + "pseudo": self.pseudo_path+'1', + "squash": "none", + "access_type": "rw", + "protocols": [4], + "fsal": { + "name": "CEPH", + "fs_name": "foo" # invalid fs + } + }]), + stdout=StringIO(), stderr=StringIO()) + + try: + # EIO since multiple exports failure (first export failed to be + # modified while the second one failed to be created) + self.assertEqual(ret[0].returncode, errno.EIO) + err_info = ret[0].stdout + if err_info: + update_details = json.loads(err_info.getvalue()) + self.assertEqual(update_details[0]["pseudo"], self.pseudo_path) + self.assertEqual(update_details[1]["pseudo"], self.pseudo_path+'1') + else: + self.fail("Could not retrieve any export update data") + + # verify second export wasn't created + exports = json.loads(self._nfs_cmd('export', 'ls', + self.cluster_id, '--detailed')) + self.assertEqual(len(exports), 1) + + finally: + self._delete_cluster_with_fs(self.fs_name, mnt_pt) + self.ctx.cluster.run(args=['rm', '-rf', f'{mnt_pt}']) diff --git a/qa/tasks/cephfs/test_pool_perm.py b/qa/tasks/cephfs/test_pool_perm.py index 9912debed..b55052b82 100644 --- a/qa/tasks/cephfs/test_pool_perm.py +++ b/qa/tasks/cephfs/test_pool_perm.py @@ -30,9 +30,9 @@ class TestPoolPerm(CephFSTestCase): client_name = "client.{0}".format(self.mount_a.client_id) # set data pool read only - self.fs.mon_manager.raw_cluster_cmd_result( - 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd', - 'allow r pool={0}'.format(self.fs.get_data_pool_name())) + self.get_ceph_cmd_result( + 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', + 'osd', 'allow r pool={0}'.format(self.fs.get_data_pool_name())) self.mount_a.umount_wait() self.mount_a.mount_wait() @@ -41,9 +41,9 @@ class TestPoolPerm(CephFSTestCase): self.mount_a.run_python(remote_script.format(path=file_path, check_read=str(False))) # set data pool write only - self.fs.mon_manager.raw_cluster_cmd_result( - 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', 'osd', - 'allow w pool={0}'.format(self.fs.get_data_pool_name())) + self.get_ceph_cmd_result( + 'auth', 'caps', client_name, 'mds', 'allow', 'mon', 'allow r', + 'osd', 'allow w pool={0}'.format(self.fs.get_data_pool_name())) self.mount_a.umount_wait() self.mount_a.mount_wait() @@ -66,7 +66,7 @@ class TestPoolPerm(CephFSTestCase): self.mount_a.run_shell(["mkdir", "layoutdir"]) # Set MDS 'rw' perms: missing 'p' means no setting pool layouts - self.fs.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', client_name, 'mds', 'allow rw', 'mon', 'allow r', 'osd', 'allow rw pool={0},allow rw pool={1}'.format( @@ -86,7 +86,7 @@ class TestPoolPerm(CephFSTestCase): self.mount_a.umount_wait() # Set MDS 'rwp' perms: should now be able to set layouts - self.fs.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', client_name, 'mds', 'allow rwp', 'mon', 'allow r', 'osd', 'allow rw pool={0},allow rw pool={1}'.format( @@ -101,7 +101,7 @@ class TestPoolPerm(CephFSTestCase): self.mount_a.umount_wait() def tearDown(self): - self.fs.mon_manager.raw_cluster_cmd_result( + self.get_ceph_cmd_result( 'auth', 'caps', "client.{0}".format(self.mount_a.client_id), 'mds', 'allow', 'mon', 'allow r', 'osd', 'allow rw pool={0}'.format(self.fs.get_data_pool_names()[0])) diff --git a/qa/tasks/cephfs/test_quota.py b/qa/tasks/cephfs/test_quota.py index 0386672bd..b5691c838 100644 --- a/qa/tasks/cephfs/test_quota.py +++ b/qa/tasks/cephfs/test_quota.py @@ -104,3 +104,59 @@ class TestQuota(CephFSTestCase): with self.assertRaises(CommandFailedError): self.mount_b.write_n_mb("subdir_data/file", 40) + def test_human_readable_quota_values(self): + """ + test human-readable values for setting ceph.quota.max_bytes + """ + self.mount_a.run_shell(["mkdir", "subdir"]) + + self.assertEqual(self.mount_a.getfattr("./subdir", + "ceph.quota.max_bytes"), None) + + readable_values = {"10K": "10240", + "100Ki": "102400", + "10M": "10485760", + "100Mi": "104857600", + "2G": "2147483648", + "4Gi": "4294967296", + "1T": "1099511627776", + "2Ti": "2199023255552"} + for readable_value in readable_values: + self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", + readable_value) + self.assertEqual(self.mount_a.getfattr( + "./subdir", "ceph.quota.max_bytes"), + readable_values.get(readable_value)) + + def test_human_readable_quota_invalid_values(self): + """ + test invalid values for ceph.quota.max_bytes + """ + + self.mount_a.run_shell(["mkdir", "subdir"]) + + invalid_values = ["10A", "1y00Ki", "af00", "G", "", " ", "-1t", "-1"] + for invalid_value in invalid_values: + with self.assertRaises(CommandFailedError): + self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", + invalid_value) + + def test_disable_enable_human_readable_quota_values(self): + """ + test: + 1) disabling ceph.quota.max_bytes using byte value. + 2) enabling it again using human readable value. + 3) disabling it again but using human readable value. + """ + + self.mount_a.run_shell(["mkdir", "subdir"]) + + self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", "0") + self.assertEqual(self.mount_a.getfattr("./subdir", + "ceph.quota.max_bytes"), None) + self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", "1K") + self.assertEqual(self.mount_a.getfattr("./subdir", + "ceph.quota.max_bytes"), "1024") + self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", "0M") + self.assertEqual(self.mount_a.getfattr("./subdir", + "ceph.quota.max_bytes"), None) diff --git a/qa/tasks/cephfs/test_recovery_fs.py b/qa/tasks/cephfs/test_recovery_fs.py index bbcdf9769..17669c0f2 100644 --- a/qa/tasks/cephfs/test_recovery_fs.py +++ b/qa/tasks/cephfs/test_recovery_fs.py @@ -27,7 +27,7 @@ class TestFSRecovery(CephFSTestCase): # recovered/intact self.fs.rm() # Recreate file system with pool and previous fscid - self.fs.mon_manager.raw_cluster_cmd( + self.run_ceph_cmd( 'fs', 'new', self.fs.name, metadata_pool, data_pool, '--recover', '--force', '--fscid', f'{self.fs.id}') self.fs.set_joinable() diff --git a/qa/tasks/cephfs/test_recovery_pool.py b/qa/tasks/cephfs/test_recovery_pool.py index 8c4e1967d..7aef28229 100644 --- a/qa/tasks/cephfs/test_recovery_pool.py +++ b/qa/tasks/cephfs/test_recovery_pool.py @@ -119,7 +119,7 @@ class TestRecoveryPool(CephFSTestCase): recovery_fs.create(recover=True, metadata_overlay=True) recovery_pool = recovery_fs.get_metadata_pool_name() - recovery_fs.mon_manager.raw_cluster_cmd('-s') + self.run_ceph_cmd('-s') # Reset the MDS map in case multiple ranks were in play: recovery procedure # only understands how to rebuild metadata under rank 0 diff --git a/qa/tasks/cephfs/test_scrub_checks.py b/qa/tasks/cephfs/test_scrub_checks.py index e41b997a6..f17a6ceb1 100644 --- a/qa/tasks/cephfs/test_scrub_checks.py +++ b/qa/tasks/cephfs/test_scrub_checks.py @@ -281,8 +281,8 @@ class TestScrubChecks(CephFSTestCase): all_damage = self.fs.rank_tell(["damage", "ls"], mds_rank) damage = [d for d in all_damage if d['ino'] == ino and d['damage_type'] == dtype] for d in damage: - self.fs.mon_manager.raw_cluster_cmd( - 'tell', 'mds.{0}'.format(self.fs.get_active_names()[mds_rank]), + self.run_ceph_cmd( + 'tell', f'mds.{self.fs.get_active_names()[mds_rank]}', "damage", "rm", str(d['id'])) return len(damage) > 0 diff --git a/qa/tasks/cephfs/test_sessionmap.py b/qa/tasks/cephfs/test_sessionmap.py index ad6fd1d60..b3b88af72 100644 --- a/qa/tasks/cephfs/test_sessionmap.py +++ b/qa/tasks/cephfs/test_sessionmap.py @@ -158,7 +158,7 @@ class TestSessionMap(CephFSTestCase): if mon_caps is None: mon_caps = "allow r" - out = self.fs.mon_manager.raw_cluster_cmd( + out = self.get_ceph_cmd_stdout( "auth", "get-or-create", "client.{name}".format(name=id_name), "mds", mds_caps, "osd", osd_caps, diff --git a/qa/tasks/cephfs/test_snap_schedules.py b/qa/tasks/cephfs/test_snap_schedules.py index 0264cac32..8bbd679ef 100644 --- a/qa/tasks/cephfs/test_snap_schedules.py +++ b/qa/tasks/cephfs/test_snap_schedules.py @@ -3,6 +3,7 @@ import json import time import errno import logging +import uuid from tasks.cephfs.cephfs_test_case import CephFSTestCase from teuthology.exceptions import CommandFailedError @@ -28,6 +29,29 @@ class TestSnapSchedulesHelper(CephFSTestCase): # this should be in sync with snap_schedule format SNAPSHOT_TS_FORMAT = '%Y-%m-%d-%H_%M_%S' + def remove_snapshots(self, dir_path, sdn): + snap_path = f'{dir_path}/{sdn}' + + snapshots = self.mount_a.ls(path=snap_path) + for snapshot in snapshots: + if snapshot.startswith("_scheduled"): + continue + snapshot_path = os.path.join(snap_path, snapshot) + log.debug(f'removing snapshot: {snapshot_path}') + self.mount_a.run_shell(['sudo', 'rmdir', snapshot_path]) + + def get_snap_dir_name(self): + from .fuse_mount import FuseMount + from .kernel_mount import KernelMount + + if isinstance(self.mount_a, KernelMount): + sdn = self.mount_a.client_config.get('snapdirname', '.snap') + elif isinstance(self.mount_a, FuseMount): + sdn = self.mount_a.client_config.get('client_snapdir', '.snap') + self.fs.set_ceph_conf('client', 'client snapdir', sdn) + self.mount_a.remount() + return sdn + def check_scheduled_snapshot(self, exec_time, timo): now = time.time() delta = now - exec_time @@ -36,7 +60,7 @@ class TestSnapSchedulesHelper(CephFSTestCase): self.assertTrue((delta <= timo + 5) and (delta >= timo - 5)) def _fs_cmd(self, *args): - return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args) + return self.get_ceph_cmd_stdout("fs", *args) def fs_snap_schedule_cmd(self, *args, **kwargs): if 'fs' in kwargs: @@ -61,10 +85,10 @@ class TestSnapSchedulesHelper(CephFSTestCase): self.volname = result[0]['name'] def _enable_snap_schedule(self): - return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "enable", "snap_schedule") + return self.get_ceph_cmd_stdout("mgr", "module", "enable", "snap_schedule") def _disable_snap_schedule(self): - return self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "module", "disable", "snap_schedule") + return self.get_ceph_cmd_stdout("mgr", "module", "disable", "snap_schedule") def _allow_minute_granularity_snapshots(self): self.config_set('mgr', 'mgr/snap_schedule/allow_m_granularity', True) @@ -94,7 +118,7 @@ class TestSnapSchedulesHelper(CephFSTestCase): def _schedule_to_timeout(self, schedule): mult = schedule[-1] period = int(schedule[0:-1]) - if mult == 'M': + if mult == 'm': return period * 60 elif mult == 'h': return period * 60 * 60 @@ -102,6 +126,10 @@ class TestSnapSchedulesHelper(CephFSTestCase): return period * 60 * 60 * 24 elif mult == 'w': return period * 60 * 60 * 24 * 7 + elif mult == 'M': + return period * 60 * 60 * 24 * 30 + elif mult == 'Y': + return period * 60 * 60 * 24 * 365 else: raise RuntimeError('schedule multiplier not recognized') @@ -166,7 +194,7 @@ class TestSnapSchedulesHelper(CephFSTestCase): self.assertTrue(schedule in json_res['schedule']) for retention in retentions: self.assertTrue(retention in json_res['retention']) - + class TestSnapSchedules(TestSnapSchedulesHelper): def remove_snapshots(self, dir_path): snap_path = f'{dir_path}/.snap' @@ -224,15 +252,15 @@ class TestSnapSchedules(TestSnapSchedulesHelper): self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) # set a schedule on the dir - self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1m') exec_time = time.time() - timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m') log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo}s...') to_wait = timo + 2 # some leeway to avoid false failures... # verify snapshot schedule - self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M']) + self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1m']) def verify_added(snaps_added): log.debug(f'snapshots added={snaps_added}') @@ -260,18 +288,18 @@ class TestSnapSchedules(TestSnapSchedulesHelper): self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) # set schedules on the dir - self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') - self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='2M') + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1m') + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='2m') exec_time = time.time() - timo_1, snap_sfx_1 = self.calc_wait_time_and_snap_name(exec_time, '1M') + timo_1, snap_sfx_1 = self.calc_wait_time_and_snap_name(exec_time, '1m') log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_1} in ~{timo_1}s...') - timo_2, snap_sfx_2 = self.calc_wait_time_and_snap_name(exec_time, '2M') + timo_2, snap_sfx_2 = self.calc_wait_time_and_snap_name(exec_time, '2m') log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx_2} in ~{timo_2}s...') to_wait = timo_2 + 2 # use max timeout # verify snapshot schedule - self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M', '2M']) + self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1m', '2m']) def verify_added_1(snaps_added): log.debug(f'snapshots added={snaps_added}') @@ -309,16 +337,16 @@ class TestSnapSchedules(TestSnapSchedulesHelper): self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) # set a schedule on the dir - self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1M') - self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedules.TEST_DIRECTORY, retention_spec_or_period='1M') + self.fs_snap_schedule_cmd('add', path=TestSnapSchedules.TEST_DIRECTORY, snap_schedule='1m') + self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedules.TEST_DIRECTORY, retention_spec_or_period='1m') exec_time = time.time() - timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m') log.debug(f'expecting snap {TestSnapSchedules.TEST_DIRECTORY}/.snap/scheduled-{snap_sfx} in ~{timo_1}s...') to_wait = timo_1 + 2 # some leeway to avoid false failures... # verify snapshot schedule - self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}]) + self.verify_schedule(TestSnapSchedules.TEST_DIRECTORY, ['1m'], retentions=[{'m':1}]) def verify_added(snaps_added): log.debug(f'snapshots added={snaps_added}') @@ -400,26 +428,26 @@ class TestSnapSchedules(TestSnapSchedulesHelper): for d in testdirs: self.mount_a.run_shell(['mkdir', '-p', d[1:]]) - self.fs_snap_schedule_cmd('add', path=d, snap_schedule='1M') + self.fs_snap_schedule_cmd('add', path=d, snap_schedule='1m') exec_time = time.time() - timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m') for d in testdirs: - self.fs_snap_schedule_cmd('activate', path=d, snap_schedule='1M') + self.fs_snap_schedule_cmd('activate', path=d, snap_schedule='1m') # we wait for 10 snaps to be taken wait_time = timo_1 + 10 * 60 + 15 time.sleep(wait_time) for d in testdirs: - self.fs_snap_schedule_cmd('deactivate', path=d, snap_schedule='1M') + self.fs_snap_schedule_cmd('deactivate', path=d, snap_schedule='1m') for d in testdirs: self.verify_snap_stats(d) for d in testdirs: - self.fs_snap_schedule_cmd('remove', path=d, snap_schedule='1M') + self.fs_snap_schedule_cmd('remove', path=d, snap_schedule='1m') self.remove_snapshots(d[1:]) self.mount_a.run_shell(['rmdir', d[1:]]) @@ -428,12 +456,12 @@ class TestSnapSchedules(TestSnapSchedulesHelper): self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedules.TEST_DIRECTORY]) testdir = os.path.join("/", TestSnapSchedules.TEST_DIRECTORY, "test_restart") self.mount_a.run_shell(['mkdir', '-p', testdir[1:]]) - self.fs_snap_schedule_cmd('add', path=testdir, snap_schedule='1M') + self.fs_snap_schedule_cmd('add', path=testdir, snap_schedule='1m') exec_time = time.time() - timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m') - self.fs_snap_schedule_cmd('activate', path=testdir, snap_schedule='1M') + self.fs_snap_schedule_cmd('activate', path=testdir, snap_schedule='1m') # we wait for 10 snaps to be taken wait_time = timo_1 + 10 * 60 + 15 @@ -448,7 +476,7 @@ class TestSnapSchedules(TestSnapSchedulesHelper): log.debug(f'restarting active mgr: {active_mgr}') self.mgr_cluster.mon_manager.revive_mgr(active_mgr) time.sleep(300) # sleep for 5 minutes - self.fs_snap_schedule_cmd('deactivate', path=testdir, snap_schedule='1M') + self.fs_snap_schedule_cmd('deactivate', path=testdir, snap_schedule='1m') new_stats = self.get_snap_stats(testdir) self.assertTrue(new_stats['fs_count'] == new_stats['db_count']) @@ -456,15 +484,15 @@ class TestSnapSchedules(TestSnapSchedulesHelper): self.assertTrue(new_stats['db_count'] > old_stats['db_count']) # cleanup - self.fs_snap_schedule_cmd('remove', path=testdir, snap_schedule='1M') + self.fs_snap_schedule_cmd('remove', path=testdir, snap_schedule='1m') self.remove_snapshots(testdir[1:]) - self.mount_a.run_shell(['rmdir', testdir[1:]]) + self.mount_a.run_shell(['rmdir', testdir[1:]]) def test_schedule_auto_deactivation_for_non_existent_path(self): """ Test that a non-existent path leads to schedule deactivation after a few retries. """ - self.fs_snap_schedule_cmd('add', path="/bad-path", snap_schedule='1M') + self.fs_snap_schedule_cmd('add', path="/bad-path", snap_schedule='1m') start_time = time.time() while time.time() - start_time < 60.0: @@ -491,15 +519,15 @@ class TestSnapSchedules(TestSnapSchedulesHelper): self.mount_a.run_shell(['mkdir', '-p', test_dir[1:]]) # set a schedule on the dir - self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1M') + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1m') self.fs_snap_schedule_cmd('retention', 'add', path=test_dir, retention_spec_or_period=f'{total_snaps}n') exec_time = time.time() - timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + timo_1, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m') # verify snapshot schedule - self.verify_schedule(test_dir, ['1M']) + self.verify_schedule(test_dir, ['1m']) # we wait for total_snaps snaps to be taken wait_time = timo_1 + total_snaps * 60 + 15 @@ -517,45 +545,513 @@ class TestSnapSchedules(TestSnapSchedulesHelper): self.mount_a.run_shell(['rmdir', test_dir[1:]]) + def test_snap_schedule_all_periods(self): + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/minutes" + self.mount_a.run_shell(['mkdir', '-p', test_dir]) + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1m') -class TestSnapSchedulesSnapdir(TestSnapSchedulesHelper): - def remove_snapshots(self, dir_path, sdn): - snap_path = f'{dir_path}/{sdn}' + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/hourly" + self.mount_a.run_shell(['mkdir', '-p', test_dir]) + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1h') - snapshots = self.mount_a.ls(path=snap_path) - for snapshot in snapshots: - snapshot_path = os.path.join(snap_path, snapshot) - log.debug(f'removing snapshot: {snapshot_path}') - self.mount_a.run_shell(['rmdir', snapshot_path]) + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/daily" + self.mount_a.run_shell(['mkdir', '-p', test_dir]) + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1d') - def get_snap_dir_name(self): - from tasks.cephfs.fuse_mount import FuseMount - from tasks.cephfs.kernel_mount import KernelMount + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/weekly" + self.mount_a.run_shell(['mkdir', '-p', test_dir]) + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1w') - if isinstance(self.mount_a, KernelMount): - sdn = self.mount_a.client_config.get('snapdirname', '.snap') - elif isinstance(self.mount_a, FuseMount): - sdn = self.mount_a.client_config.get('client_snapdir', '.snap') - self.fs.set_ceph_conf('client', 'client snapdir', sdn) - self.mount_a.remount() - return sdn + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/monthly" + self.mount_a.run_shell(['mkdir', '-p', test_dir]) + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1M') + + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/yearly" + self.mount_a.run_shell(['mkdir', '-p', test_dir]) + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1Y') + + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/bad_period_spec" + self.mount_a.run_shell(['mkdir', '-p', test_dir]) + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1X') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1MM') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='1') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='M') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='-1m') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('add', path=test_dir, snap_schedule='') + + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/minutes" + self.mount_a.run_shell(['rmdir', test_dir]) + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/hourly" + self.mount_a.run_shell(['rmdir', test_dir]) + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/daily" + self.mount_a.run_shell(['rmdir', test_dir]) + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/weekly" + self.mount_a.run_shell(['rmdir', test_dir]) + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/monthly" + self.mount_a.run_shell(['rmdir', test_dir]) + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/yearly" + self.mount_a.run_shell(['rmdir', test_dir]) + test_dir = TestSnapSchedulesSnapdir.TEST_DIRECTORY + "/bad_period_spec" + self.mount_a.run_shell(['rmdir', test_dir]) + + +class TestSnapSchedulesSubvolAndGroupArguments(TestSnapSchedulesHelper): + def setUp(self): + super(TestSnapSchedulesSubvolAndGroupArguments, self).setUp() + self.CREATE_VERSION = int(self.mount_a.ctx['config']['overrides']['subvolume_version']) + + def _create_v1_subvolume(self, subvol_name, subvol_group=None, has_snapshot=False, subvol_type='subvolume', state='complete'): + group = subvol_group if subvol_group is not None else '_nogroup' + basepath = os.path.join("volumes", group, subvol_name) + uuid_str = str(uuid.uuid4()) + createpath = os.path.join(basepath, uuid_str) + self.mount_a.run_shell(['sudo', 'mkdir', '-p', createpath], omit_sudo=False) + self.mount_a.setfattr(createpath, 'ceph.dir.subvolume', '1', sudo=True) + + # create a v1 snapshot, to prevent auto upgrades + if has_snapshot: + snappath = os.path.join(createpath, self.get_snap_dir_name(), "fake") + self.mount_a.run_shell(['sudo', 'mkdir', '-p', snappath], omit_sudo=False) + + # add required xattrs to subvolume + default_pool = self.mount_a.getfattr(".", "ceph.dir.layout.pool") + self.mount_a.setfattr(createpath, 'ceph.dir.layout.pool', default_pool, sudo=True) + + # create a v1 .meta file + cp = "/" + createpath + meta_contents = f"[GLOBAL]\nversion = 1\ntype = {subvol_type}\npath = {cp}\nstate = {state}\n" + meta_contents += "allow_subvolume_upgrade = 0\n" # boolean + if state == 'pending': + # add a fake clone source + meta_contents = meta_contents + '[source]\nvolume = fake\nsubvolume = fake\nsnapshot = fake\n' + meta_filepath1 = os.path.join(self.mount_a.mountpoint, basepath, ".meta") + self.mount_a.client_remote.write_file(meta_filepath1, meta_contents, sudo=True) + return createpath + + def _create_subvolume(self, version, subvol_name, subvol_group=None): + if version == 1: + self._create_v1_subvolume(subvol_name, subvol_group) + elif version >= 2: + if subvol_group: + self._fs_cmd('subvolume', 'create', 'cephfs', subvol_name, '--group_name', subvol_group) + else: + self._fs_cmd('subvolume', 'create', 'cephfs', subvol_name) + else: + self.assertTrue('NoSuchSubvolumeVersion' == None) + + def _get_subvol_snapdir_path(self, version, subvol, group): + args = ['subvolume', 'getpath', 'cephfs', subvol] + if group: + args += ['--group_name', group] + + path = self.get_ceph_cmd_stdout("fs", *args).rstrip() + if version >= 2: + path += "/.." + return path[1:] + + def _verify_snap_schedule(self, version, subvol, group): + time.sleep(75) + path = self._get_subvol_snapdir_path(version, subvol, group) + path += "/" + self.get_snap_dir_name() + snaps = self.mount_a.ls(path=path) + log.debug(f"snaps:{snaps}") + count = 0 + for snapname in snaps: + if snapname.startswith("scheduled-"): + count += 1 + # confirm presence of snapshot dir under .snap dir + self.assertGreater(count, 0) + + def test_snap_schedule_subvol_and_group_arguments_01(self): + """ + Test subvol schedule creation succeeds for default subvolgroup. + """ + self._create_subvolume(self.CREATE_VERSION, 'sv01') + self.fs_snap_schedule_cmd('add', '--subvol', 'sv01', path='.', snap_schedule='1m') + + self._verify_snap_schedule(self.CREATE_VERSION, 'sv01', None) + path = self._get_subvol_snapdir_path(self.CREATE_VERSION, 'sv01', None) + self.remove_snapshots(path, self.get_snap_dir_name()) + + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv01', path='.', snap_schedule='1m') + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv01') + def test_snap_schedule_subvol_and_group_arguments_02(self): + """ + Test subvol schedule creation fails for non-default subvolgroup. + """ + self._create_subvolume(self.CREATE_VERSION, 'sv02') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('add', '--subvol', 'sv02', '--group', 'mygrp02', path='.', snap_schedule='1m') + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv02') + + def test_snap_schedule_subvol_and_group_arguments_03(self): + """ + Test subvol schedule creation fails when subvol exists only under default group. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp03') + self._create_subvolume(self.CREATE_VERSION, 'sv03', 'mygrp03') + + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('add', '--subvol', 'sv03', path='.', snap_schedule='1m') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv03', '--group_name', 'mygrp03') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp03') + + def test_snap_schedule_subvol_and_group_arguments_04(self): + """ + Test subvol schedule creation fails without subvol argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp04') + self._create_subvolume(self.CREATE_VERSION, 'sv04', 'mygrp04') + + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('add', '--group', 'mygrp04', path='.', snap_schedule='1m') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv04', '--group_name', 'mygrp04') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp04') + + def test_snap_schedule_subvol_and_group_arguments_05(self): + """ + Test subvol schedule creation succeeds for a subvol under a subvolgroup. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp05') + self._create_subvolume(self.CREATE_VERSION, 'sv05', 'mygrp05') + self.fs_snap_schedule_cmd('add', '--subvol', 'sv05', '--group', 'mygrp05', path='.', snap_schedule='1m', fs='cephfs') + + self._verify_snap_schedule(self.CREATE_VERSION, 'sv05', 'mygrp05') + path = self._get_subvol_snapdir_path(self.CREATE_VERSION, 'sv05', 'mygrp05') + self.remove_snapshots(path, self.get_snap_dir_name()) + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv05', '--group_name', 'mygrp05') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp05') + + def test_snap_schedule_subvol_and_group_arguments_06(self): + """ + Test subvol schedule listing fails without a subvolgroup argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp06') + self._create_subvolume(self.CREATE_VERSION, 'sv06', 'mygrp06') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv06', '--group', 'mygrp06', path='.', snap_schedule='1m', fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('list', '--subvol', 'sv06', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv06', '--group', 'mygrp06', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv06', '--group_name', 'mygrp06') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp06') + + def test_snap_schedule_subvol_and_group_arguments_07(self): + """ + Test subvol schedule listing fails without a subvol argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp07') + self._create_subvolume(self.CREATE_VERSION, 'sv07', 'mygrp07') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv07', '--group', 'mygrp07', path='.', snap_schedule='1m', fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('list', '--group', 'mygrp07', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv07', '--group', 'mygrp07', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv07', '--group_name', 'mygrp07') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp07') + + def test_snap_schedule_subvol_and_group_arguments_08(self): + """ + Test subvol schedule listing succeeds with a subvol and a subvolgroup argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp08') + self._create_subvolume(self.CREATE_VERSION, 'sv08', 'mygrp08') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv08', '--group', 'mygrp08', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('list', '--subvol', 'sv08', '--group', 'mygrp08', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv08', '--group', 'mygrp08', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv08', '--group_name', 'mygrp08') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp08') + + def test_snap_schedule_subvol_and_group_arguments_09(self): + """ + Test subvol schedule retention add fails for a subvol without a subvolgroup. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp09') + self._create_subvolume(self.CREATE_VERSION, 'sv09', 'mygrp09') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv09', '--group', 'mygrp09', path='.', snap_schedule='1m', fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv09', path='.', retention_spec_or_period='h', retention_count='5') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv09', '--group', 'mygrp09', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv09', '--group_name', 'mygrp09') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp09') + + def test_snap_schedule_subvol_and_group_arguments_10(self): + """ + Test subvol schedule retention add fails for a subvol without a subvol argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp10') + self._create_subvolume(self.CREATE_VERSION, 'sv10', 'mygrp10') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv10', '--group', 'mygrp10', path='.', snap_schedule='1m', fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('retention', 'add', '--group', 'mygrp10', path='.', retention_spec_or_period='h', retention_count='5') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv10', '--group', 'mygrp10', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv10', '--group_name', 'mygrp10') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp10') + + def test_snap_schedule_subvol_and_group_arguments_11(self): + """ + Test subvol schedule retention add succeeds for a subvol within a subvolgroup. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp11') + self._create_subvolume(self.CREATE_VERSION, 'sv11', 'mygrp11') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv11', '--group', 'mygrp11', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv11', '--group', 'mygrp11', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv11', '--group', 'mygrp11', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv11', '--group_name', 'mygrp11') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp11') + + def test_snap_schedule_subvol_and_group_arguments_12(self): + """ + Test subvol schedule activation fails for a subvol without a subvolgroup argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp12') + self._create_subvolume(self.CREATE_VERSION, 'sv12', 'mygrp12') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv12', '--group', 'mygrp12', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv12', '--group', 'mygrp12', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('activate', '--subvol', 'sv12', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv12', '--group', 'mygrp12', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv12', '--group_name', 'mygrp12') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp12') + + def test_snap_schedule_subvol_and_group_arguments_13(self): + """ + Test subvol schedule activation fails for a subvol without a subvol argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp13') + self._create_subvolume(self.CREATE_VERSION, 'sv13', 'mygrp13') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv13', '--group', 'mygrp13', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv13', '--group', 'mygrp13', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('activate', '--group', 'mygrp13', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv13', '--group', 'mygrp13', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv13', '--group_name', 'mygrp13') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp13') + + def test_snap_schedule_subvol_and_group_arguments_14(self): + """ + Test subvol schedule activation succeeds for a subvol within a subvolgroup. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp14') + self._create_subvolume(self.CREATE_VERSION, 'sv14', 'mygrp14') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv14', '--group', 'mygrp14', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv14', '--group', 'mygrp14', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + self.fs_snap_schedule_cmd('activate', '--subvol', 'sv14', '--group', 'mygrp14', path='.', fs='cephfs') + + self._verify_snap_schedule(self.CREATE_VERSION, 'sv14', 'mygrp14') + path = self._get_subvol_snapdir_path(self.CREATE_VERSION, 'sv14', 'mygrp14') + self.remove_snapshots(path, self.get_snap_dir_name()) + + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv14', '--group', 'mygrp14', path='.', snap_schedule='1m', fs='cephfs') + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv14', '--group_name', 'mygrp14') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp14') + + def test_snap_schedule_subvol_and_group_arguments_15(self): + """ + Test subvol schedule deactivation fails for a subvol without a subvolgroup argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp15') + self._create_subvolume(self.CREATE_VERSION, 'sv15', 'mygrp15') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv15', '--group', 'mygrp15', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv15', '--group', 'mygrp15', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + self.fs_snap_schedule_cmd('activate', '--subvol', 'sv15', '--group', 'mygrp15', path='.', fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv15', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv15', '--group', 'mygrp15', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv15', '--group_name', 'mygrp15') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp15') + + def test_snap_schedule_subvol_and_group_arguments_16(self): + """ + Test subvol schedule deactivation fails for a subvol without a subvol argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp16') + self._create_subvolume(self.CREATE_VERSION, 'sv16', 'mygrp16') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv16', '--group', 'mygrp16', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv16', '--group', 'mygrp16', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + self.fs_snap_schedule_cmd('activate', '--subvol', 'sv16', '--group', 'mygrp16', path='.', fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('deactivate', '--group', 'mygrp16', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv16', '--group', 'mygrp16', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv16', '--group_name', 'mygrp16') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp16') + + def test_snap_schedule_subvol_and_group_arguments_17(self): + """ + Test subvol schedule deactivation succeeds for a subvol within a subvolgroup. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp17') + self._create_subvolume(self.CREATE_VERSION, 'sv17', 'mygrp17') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv17', '--group', 'mygrp17', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv17', '--group', 'mygrp17', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + self.fs_snap_schedule_cmd('activate', '--subvol', 'sv17', '--group', 'mygrp17', path='.', fs='cephfs') + + self._verify_snap_schedule(self.CREATE_VERSION, 'sv17', 'mygrp17') + path = self._get_subvol_snapdir_path(self.CREATE_VERSION, 'sv17', 'mygrp17') + self.remove_snapshots(path, self.get_snap_dir_name()) + + self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv17', '--group', 'mygrp17', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv17', '--group', 'mygrp17', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv17', '--group_name', 'mygrp17') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp17') + + def test_snap_schedule_subvol_and_group_arguments_18(self): + """ + Test subvol schedule retention remove fails for a subvol without a subvolgroup argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp18') + self._create_subvolume(self.CREATE_VERSION, 'sv18', 'mygrp18') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv18', '--group', 'mygrp18', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv18', '--group', 'mygrp18', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + self.fs_snap_schedule_cmd('activate', '--subvol', 'sv18', '--group', 'mygrp18', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv18', '--group', 'mygrp18', path='.', fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('retention', 'remove', '--subvol', 'sv18', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv18', '--group', 'mygrp18', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv18', '--group_name', 'mygrp18') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp18') + + def test_snap_schedule_subvol_and_group_arguments_19(self): + """ + Test subvol schedule retention remove fails for a subvol without a subvol argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp19') + self._create_subvolume(self.CREATE_VERSION, 'sv19', 'mygrp19') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv19', '--group', 'mygrp19', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv19', '--group', 'mygrp19', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + self.fs_snap_schedule_cmd('activate', '--subvol', 'sv19', '--group', 'mygrp19', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv19', '--group', 'mygrp19', path='.', fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('retention', 'remove', '--group', 'mygrp19', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv19', '--group', 'mygrp19', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv19', '--group_name', 'mygrp19') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp19') + + def test_snap_schedule_subvol_and_group_arguments_20(self): + """ + Test subvol schedule retention remove succeeds for a subvol within a subvolgroup. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp20') + self._create_subvolume(self.CREATE_VERSION, 'sv20', 'mygrp20') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv20', '--group', 'mygrp20', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv20', '--group', 'mygrp20', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + self.fs_snap_schedule_cmd('activate', '--subvol', 'sv20', '--group', 'mygrp20', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv20', '--group', 'mygrp20', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'remove', '--subvol', 'sv20', '--group', 'mygrp20', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv20', '--group', 'mygrp20', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv20', '--group_name', 'mygrp20') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp20') + + def test_snap_schedule_subvol_and_group_arguments_21(self): + """ + Test subvol schedule remove fails for a subvol without a subvolgroup argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp21') + self._create_subvolume(self.CREATE_VERSION, 'sv21', 'mygrp21') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv21', '--group', 'mygrp21', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv21', '--group', 'mygrp21', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + self.fs_snap_schedule_cmd('activate', '--subvol', 'sv21', '--group', 'mygrp21', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv21', '--group', 'mygrp21', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'remove', '--subvol', 'sv21', '--group', 'mygrp21', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv21', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv21', '--group', 'mygrp21', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv21', '--group_name', 'mygrp21') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp21') + + def test_snap_schedule_subvol_and_group_arguments_22(self): + """ + Test subvol schedule remove fails for a subvol without a subvol argument. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp22') + self._create_subvolume(self.CREATE_VERSION, 'sv22', 'mygrp22') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv22', '--group', 'mygrp22', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv22', '--group', 'mygrp22', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + self.fs_snap_schedule_cmd('activate', '--subvol', 'sv22', '--group', 'mygrp22', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv22', '--group', 'mygrp22', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'remove', '--subvol', 'sv22', '--group', 'mygrp22', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs') + with self.assertRaises(CommandFailedError): + self.fs_snap_schedule_cmd('remove', '--group', 'mygrp22', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv22', '--group', 'mygrp22', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv22', '--group_name', 'mygrp22') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp22') + + def test_snap_schedule_subvol_and_group_arguments_23(self): + """ + Test subvol schedule remove succeeds for a subvol within a subvolgroup. + """ + self._fs_cmd('subvolumegroup', 'create', 'cephfs', 'mygrp23') + self._create_subvolume(self.CREATE_VERSION, 'sv23', 'mygrp23') + + self.fs_snap_schedule_cmd('add', '--subvol', 'sv23', '--group', 'mygrp23', path='.', snap_schedule='1m', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'add', '--subvol', 'sv23', '--group', 'mygrp23', path='.', retention_spec_or_period='h', retention_count=5, fs='cephfs') + self.fs_snap_schedule_cmd('activate', '--subvol', 'sv23', '--group', 'mygrp23', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('deactivate', '--subvol', 'sv23', '--group', 'mygrp23', path='.', fs='cephfs') + self.fs_snap_schedule_cmd('retention', 'remove', '--subvol', 'sv23', '--group', 'mygrp23', path='.', retention_spec_or_period='h', retention_count='5', fs='cephfs') + self.fs_snap_schedule_cmd('remove', '--subvol', 'sv23', '--group', 'mygrp23', path='.', snap_schedule='1m', fs='cephfs') + + self._fs_cmd('subvolume', 'rm', 'cephfs', 'sv23', '--group_name', 'mygrp23') + self._fs_cmd('subvolumegroup', 'rm', 'cephfs', 'mygrp23') + + +class TestSnapSchedulesSnapdir(TestSnapSchedulesHelper): def test_snap_dir_name(self): """Test the correctness of snap directory name""" self.mount_a.run_shell(['mkdir', '-p', TestSnapSchedulesSnapdir.TEST_DIRECTORY]) # set a schedule on the dir - self.fs_snap_schedule_cmd('add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, snap_schedule='1M') - self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, retention_spec_or_period='1M') + self.fs_snap_schedule_cmd('add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, snap_schedule='1m') + self.fs_snap_schedule_cmd('retention', 'add', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY, retention_spec_or_period='1m') exec_time = time.time() - timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1M') + timo, snap_sfx = self.calc_wait_time_and_snap_name(exec_time, '1m') sdn = self.get_snap_dir_name() log.info(f'expecting snap {TestSnapSchedulesSnapdir.TEST_DIRECTORY}/{sdn}/scheduled-{snap_sfx} in ~{timo}s...') - + # verify snapshot schedule - self.verify_schedule(TestSnapSchedulesSnapdir.TEST_DIRECTORY, ['1M'], retentions=[{'M':1}]) - + self.verify_schedule(TestSnapSchedulesSnapdir.TEST_DIRECTORY, ['1m'], retentions=[{'m':1}]) + # remove snapshot schedule self.fs_snap_schedule_cmd('remove', path=TestSnapSchedulesSnapdir.TEST_DIRECTORY) diff --git a/qa/tasks/cephfs/test_snapshots.py b/qa/tasks/cephfs/test_snapshots.py index 608dcc81f..a9639a7eb 100644 --- a/qa/tasks/cephfs/test_snapshots.py +++ b/qa/tasks/cephfs/test_snapshots.py @@ -553,12 +553,12 @@ class TestMonSnapsAndFsPools(CephFSTestCase): with self.assertRaises(CommandFailedError): test_pool_name = self.fs.get_data_pool_name() base_cmd = f'osd pool mksnap {test_pool_name} snap3' - self.run_cluster_cmd(base_cmd) + self.run_ceph_cmd(base_cmd) with self.assertRaises(CommandFailedError): test_pool_name = self.fs.get_metadata_pool_name() base_cmd = f'osd pool mksnap {test_pool_name} snap4' - self.run_cluster_cmd(base_cmd) + self.run_ceph_cmd(base_cmd) def test_attaching_pools_with_snaps_to_fs_fails(self): """ @@ -566,40 +566,40 @@ class TestMonSnapsAndFsPools(CephFSTestCase): """ test_pool_name = 'snap-test-pool' base_cmd = f'osd pool create {test_pool_name}' - ret = self.run_cluster_cmd_result(base_cmd) + ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False) self.assertEqual(ret, 0) self.fs.rados(["mksnap", "snap3"], pool=test_pool_name) base_cmd = f'fs add_data_pool {self.fs.name} {test_pool_name}' - ret = self.run_cluster_cmd_result(base_cmd) + ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False) self.assertEqual(ret, errno.EOPNOTSUPP) # cleanup self.fs.rados(["rmsnap", "snap3"], pool=test_pool_name) base_cmd = f'osd pool delete {test_pool_name}' - ret = self.run_cluster_cmd_result(base_cmd) + ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False) def test_using_pool_with_snap_fails_fs_creation(self): """ Test that using a pool with snaps for fs creation fails """ base_cmd = 'osd pool create test_data_pool' - ret = self.run_cluster_cmd_result(base_cmd) + ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False) self.assertEqual(ret, 0) base_cmd = 'osd pool create test_metadata_pool' - ret = self.run_cluster_cmd_result(base_cmd) + ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False) self.assertEqual(ret, 0) self.fs.rados(["mksnap", "snap4"], pool='test_data_pool') base_cmd = 'fs new testfs test_metadata_pool test_data_pool' - ret = self.run_cluster_cmd_result(base_cmd) + ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False) self.assertEqual(ret, errno.EOPNOTSUPP) # cleanup self.fs.rados(["rmsnap", "snap4"], pool='test_data_pool') base_cmd = 'osd pool delete test_data_pool' - ret = self.run_cluster_cmd_result(base_cmd) + ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False) base_cmd = 'osd pool delete test_metadata_pool' - ret = self.run_cluster_cmd_result(base_cmd) + ret = self.get_ceph_cmd_result(args=base_cmd, check_status=False) diff --git a/qa/tasks/cephfs/test_strays.py b/qa/tasks/cephfs/test_strays.py index 8bdc126e2..11701dc28 100644 --- a/qa/tasks/cephfs/test_strays.py +++ b/qa/tasks/cephfs/test_strays.py @@ -651,9 +651,8 @@ class TestStrays(CephFSTestCase): self.assertFalse(self._is_stopped(1)) # Permit the daemon to start purging again - self.fs.mon_manager.raw_cluster_cmd('tell', 'mds.{0}'.format(rank_1_id), - 'injectargs', - "--mds_max_purge_files 100") + self.run_ceph_cmd('tell', 'mds.{0}'.format(rank_1_id), + 'injectargs', "--mds_max_purge_files 100") # It should now proceed through shutdown self.fs.wait_for_daemons(timeout=120) @@ -816,7 +815,7 @@ touch pin/placeholder :param pool_name: Which pool (must exist) """ - out = self.fs.mon_manager.raw_cluster_cmd("df", "--format=json-pretty") + out = self.get_ceph_cmd_stdout("df", "--format=json-pretty") for p in json.loads(out)['pools']: if p['name'] == pool_name: return p['stats'] diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py index 2ecfeb327..612a4ef41 100644 --- a/qa/tasks/cephfs/test_volumes.py +++ b/qa/tasks/cephfs/test_volumes.py @@ -19,11 +19,6 @@ log = logging.getLogger(__name__) class TestVolumesHelper(CephFSTestCase): """Helper class for testing FS volume, subvolume group and subvolume operations.""" - TEST_VOLUME_PREFIX = "volume" - TEST_SUBVOLUME_PREFIX="subvolume" - TEST_GROUP_PREFIX="group" - TEST_SNAPSHOT_PREFIX="snapshot" - TEST_CLONE_PREFIX="clone" TEST_FILE_NAME_PREFIX="subvolume_file" # for filling subvolume with data @@ -35,10 +30,10 @@ class TestVolumesHelper(CephFSTestCase): DEFAULT_NUMBER_OF_FILES = 1024 def _fs_cmd(self, *args): - return self.mgr_cluster.mon_manager.raw_cluster_cmd("fs", *args) + return self.get_ceph_cmd_stdout("fs", *args) def _raw_cmd(self, *args): - return self.mgr_cluster.mon_manager.raw_cluster_cmd(*args) + return self.get_ceph_cmd_stdout(args) def __check_clone_state(self, state, clone, clone_group=None, timo=120): check = 0 @@ -165,35 +160,24 @@ class TestVolumesHelper(CephFSTestCase): self._verify_clone_root(path1, path2, clone, clone_group, clone_pool) self._verify_clone_attrs(path1, path2) - def _generate_random_volume_name(self, count=1): - n = self.volume_start - volumes = [f"{TestVolumes.TEST_VOLUME_PREFIX}_{i:016}" for i in range(n, n+count)] - self.volume_start += count - return volumes[0] if count == 1 else volumes - - def _generate_random_subvolume_name(self, count=1): - n = self.subvolume_start - subvolumes = [f"{TestVolumes.TEST_SUBVOLUME_PREFIX}_{i:016}" for i in range(n, n+count)] - self.subvolume_start += count - return subvolumes[0] if count == 1 else subvolumes - - def _generate_random_group_name(self, count=1): - n = self.group_start - groups = [f"{TestVolumes.TEST_GROUP_PREFIX}_{i:016}" for i in range(n, n+count)] - self.group_start += count - return groups[0] if count == 1 else groups - - def _generate_random_snapshot_name(self, count=1): - n = self.snapshot_start - snaps = [f"{TestVolumes.TEST_SNAPSHOT_PREFIX}_{i:016}" for i in range(n, n+count)] - self.snapshot_start += count - return snaps[0] if count == 1 else snaps - - def _generate_random_clone_name(self, count=1): - n = self.clone_start - clones = [f"{TestVolumes.TEST_CLONE_PREFIX}_{i:016}" for i in range(n, n+count)] - self.clone_start += count - return clones[0] if count == 1 else clones + def _gen_name(self, name, n): + names = [f'{name}{random.randrange(0, 9999)}{i}' for i in range(n)] + return names[0] if n == 1 else names + + def _gen_vol_name(self, n=1): + return self._gen_name('vol', n) + + def _gen_subvol_name(self, n=1): + return self._gen_name('subvol', n) + + def _gen_subvol_grp_name(self, n=1): + return self._gen_name('subvol_grp', n) + + def _gen_subvol_snap_name(self, n=1): + return self._gen_name('subvol_snap', n) + + def _gen_subvol_clone_name(self, n=1): + return self._gen_name('subvol_clone', n) def _enable_multi_fs(self): self._fs_cmd("flag", "set", "enable_multiple", "true", "--yes-i-really-mean-it") @@ -202,7 +186,7 @@ class TestVolumesHelper(CephFSTestCase): result = json.loads(self._fs_cmd("volume", "ls")) if len(result) == 0: self.vol_created = True - self.volname = self._generate_random_volume_name() + self.volname = self._gen_vol_name() self._fs_cmd("volume", "create", self.volname) else: self.volname = result[0]['name'] @@ -393,14 +377,16 @@ class TestVolumesHelper(CephFSTestCase): """.format(authid=authid,key=key)) guest_mount.client_id = authid - guest_mount.client_remote.write_file(guest_mount.get_keyring_path(), - keyring_txt, sudo=True) + guest_keyring_path = guest_mount.client_remote.mktemp( + data=keyring_txt) # Add a guest client section to the ceph config file. self.config_set("client.{0}".format(authid), "debug client", 20) self.config_set("client.{0}".format(authid), "debug objecter", 20) self.set_conf("client.{0}".format(authid), "keyring", guest_mount.get_keyring_path()) + return guest_keyring_path + def _auth_metadata_get(self, filedata): """ Return a deserialized JSON object, or None @@ -418,11 +404,6 @@ class TestVolumesHelper(CephFSTestCase): self._enable_multi_fs() self._create_or_reuse_test_volume() self.config_set('mon', 'mon_allow_pool_delete', True) - self.volume_start = random.randint(1, (1<<20)) - self.subvolume_start = random.randint(1, (1<<20)) - self.group_start = random.randint(1, (1<<20)) - self.snapshot_start = random.randint(1, (1<<20)) - self.clone_start = random.randint(1, (1<<20)) def tearDown(self): if self.vol_created: @@ -436,7 +417,7 @@ class TestVolumes(TestVolumesHelper): """ That the volume can be created and then cleans up """ - volname = self._generate_random_volume_name() + volname = self._gen_vol_name() self._fs_cmd("volume", "create", volname) volumels = json.loads(self._fs_cmd("volume", "ls")) @@ -467,7 +448,7 @@ class TestVolumes(TestVolumesHelper): volumes = [volume['name'] for volume in vls] #create new volumes and add it to the existing list of volumes - volumenames = self._generate_random_volume_name(2) + volumenames = self._gen_vol_name(2) for volumename in volumenames: self._fs_cmd("volume", "create", volumename) volumes.extend(volumenames) @@ -562,6 +543,102 @@ class TestVolumes(TestVolumesHelper): self.assertNotIn(pool["name"], pools, "pool {0} exists after volume removal".format(pool["name"])) + def test_volume_info(self): + """ + Tests the 'fs volume info' command + """ + vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"] + group = self._gen_subvol_grp_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname)) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + self.assertEqual(vol_info["used_size"], 0, + "Size should be zero when volumes directory is empty") + + def test_volume_info_pending_subvol_deletions(self): + """ + Tests the pending_subvolume_deletions in 'fs volume info' command + """ + subvolname = self._gen_subvol_name() + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolname, "--mode=777") + # create 3K zero byte files + self._do_subvolume_io(subvolname, number_of_files=3000, file_size=0) + # Delete the subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolname) + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname)) + self.assertNotEqual(vol_info['pending_subvolume_deletions'], 0, + "pending_subvolume_deletions should be 1") + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_volume_info_without_subvolumegroup(self): + """ + Tests the 'fs volume info' command without subvolume group + """ + vol_fields = ["pools", "mon_addrs"] + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname)) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + self.assertNotIn("used_size", vol_info, + "'used_size' should not be present in absence of subvolumegroup") + self.assertNotIn("pending_subvolume_deletions", vol_info, + "'pending_subvolume_deletions' should not be present in absence" + " of subvolumegroup") + + def test_volume_info_with_human_readable_flag(self): + """ + Tests the 'fs volume info --human_readable' command + """ + vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"] + group = self._gen_subvol_grp_name() + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable")) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + units = [' ', 'k', 'M', 'G', 'T', 'P', 'E'] + assert vol_info["used_size"][-1] in units, "unit suffix in used_size is absent" + assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent" + assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent" + assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent" + assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent" + self.assertEqual(int(vol_info["used_size"]), 0, + "Size should be zero when volumes directory is empty") + + def test_volume_info_with_human_readable_flag_without_subvolumegroup(self): + """ + Tests the 'fs volume info --human_readable' command without subvolume group + """ + vol_fields = ["pools", "mon_addrs"] + # get volume metadata + vol_info = json.loads(self._get_volume_info(self.volname, "--human_readable")) + for md in vol_fields: + self.assertIn(md, vol_info, + f"'{md}' key not present in metadata of volume") + units = [' ', 'k', 'M', 'G', 'T', 'P', 'E'] + assert vol_info["pools"]["data"][0]["avail"][-1] in units, "unit suffix in avail data is absent" + assert vol_info["pools"]["data"][0]["used"][-1] in units, "unit suffix in used data is absent" + assert vol_info["pools"]["metadata"][0]["avail"][-1] in units, "unit suffix in avail metadata is absent" + assert vol_info["pools"]["metadata"][0]["used"][-1] in units, "unit suffix in used metadata is absent" + self.assertNotIn("used_size", vol_info, + "'used_size' should not be present in absence of subvolumegroup") + self.assertNotIn("pending_subvolume_deletions", vol_info, + "'pending_subvolume_deletions' should not be present in absence" + " of subvolumegroup") + + +class TestRenameCmd(TestVolumesHelper): + def test_volume_rename(self): """ That volume, its file system and pools, can be renamed. @@ -569,7 +646,7 @@ class TestVolumes(TestVolumesHelper): for m in self.mounts: m.umount_wait() oldvolname = self.volname - newvolname = self._generate_random_volume_name() + newvolname = self._gen_vol_name() new_data_pool, new_metadata_pool = f"cephfs.{newvolname}.data", f"cephfs.{newvolname}.meta" self._fs_cmd("volume", "rename", oldvolname, newvolname, "--yes-i-really-mean-it") @@ -590,7 +667,7 @@ class TestVolumes(TestVolumesHelper): for m in self.mounts: m.umount_wait() oldvolname = self.volname - newvolname = self._generate_random_volume_name() + newvolname = self._gen_vol_name() new_data_pool, new_metadata_pool = f"cephfs.{newvolname}.data", f"cephfs.{newvolname}.meta" self._fs_cmd("volume", "rename", oldvolname, newvolname, "--yes-i-really-mean-it") @@ -608,7 +685,8 @@ class TestVolumes(TestVolumesHelper): """ That renaming volume fails without --yes-i-really-mean-it flag. """ - newvolname = self._generate_random_volume_name() + newvolname = self._gen_vol_name() + try: self._fs_cmd("volume", "rename", self.volname, newvolname) except CommandFailedError as ce: @@ -628,7 +706,7 @@ class TestVolumes(TestVolumesHelper): m.umount_wait() self.fs.add_data_pool('another-data-pool') oldvolname = self.volname - newvolname = self._generate_random_volume_name() + newvolname = self._gen_vol_name() self.fs.get_pool_names(refresh=True) orig_data_pool_names = list(self.fs.data_pools.values()) new_metadata_pool = f"cephfs.{newvolname}.meta" @@ -650,7 +728,7 @@ class TestVolumes(TestVolumesHelper): Tests the 'fs volume info' command """ vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"] - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() # create subvolumegroup self._fs_cmd("subvolumegroup", "create", self.volname, group) # get volume metadata @@ -665,7 +743,7 @@ class TestVolumes(TestVolumesHelper): """ Tests the pending_subvolume_deletions in 'fs volume info' command """ - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolname, "--mode=777") # create 3K zero byte files @@ -700,7 +778,7 @@ class TestVolumes(TestVolumesHelper): Tests the 'fs volume info --human_readable' command """ vol_fields = ["pools", "used_size", "pending_subvolume_deletions", "mon_addrs"] - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() # create subvolumegroup self._fs_cmd("subvolumegroup", "create", self.volname, group) # get volume metadata @@ -742,7 +820,7 @@ class TestVolumes(TestVolumesHelper): class TestSubvolumeGroups(TestVolumesHelper): """Tests for FS subvolume group operations.""" def test_default_uid_gid_subvolume_group(self): - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() expected_uid = 0 expected_gid = 0 @@ -759,7 +837,7 @@ class TestSubvolumeGroups(TestVolumesHelper): self._fs_cmd("subvolumegroup", "rm", self.volname, group) def test_nonexistent_subvolume_group_create(self): - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() group = "non_existent_group" # try, creating subvolume in a nonexistent group @@ -784,7 +862,7 @@ class TestSubvolumeGroups(TestVolumesHelper): raise RuntimeError("expected the 'fs subvolumegroup rm' command to fail") def test_subvolume_group_create_with_auto_cleanup_on_fail(self): - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() data_pool = "invalid_pool" # create group with invalid data pool layout with self.assertRaises(CommandFailedError): @@ -800,7 +878,7 @@ class TestSubvolumeGroups(TestVolumesHelper): raise RuntimeError("expected the 'fs subvolumegroup getpath' command to fail") def test_subvolume_group_create_with_desired_data_pool_layout(self): - group1, group2 = self._generate_random_group_name(2) + group1, group2 = self._gen_subvol_grp_name(2) # create group self._fs_cmd("subvolumegroup", "create", self.volname, group1) @@ -828,7 +906,7 @@ class TestSubvolumeGroups(TestVolumesHelper): self._fs_cmd("subvolumegroup", "rm", self.volname, group2) def test_subvolume_group_create_with_desired_mode(self): - group1, group2 = self._generate_random_group_name(2) + group1, group2 = self._gen_subvol_grp_name(2) # default mode expected_mode1 = "755" # desired mode @@ -862,7 +940,7 @@ class TestSubvolumeGroups(TestVolumesHelper): gid = 1000 # create subvolume group - subvolgroupname = self._generate_random_group_name() + subvolgroupname = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, subvolgroupname, "--uid", str(uid), "--gid", str(gid)) # make sure it exists @@ -879,7 +957,7 @@ class TestSubvolumeGroups(TestVolumesHelper): self._fs_cmd("subvolumegroup", "rm", self.volname, subvolgroupname) def test_subvolume_group_create_with_invalid_data_pool_layout(self): - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() data_pool = "invalid_pool" # create group with invalid data pool layout try: @@ -892,7 +970,7 @@ class TestSubvolumeGroups(TestVolumesHelper): def test_subvolume_group_create_with_size(self): # create group with size -- should set quota - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") # get group metadata @@ -909,7 +987,7 @@ class TestSubvolumeGroups(TestVolumesHelper): "data_pool", "gid", "mode", "mon_addrs", "mtime", "uid"] # create group - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group) # get group metadata @@ -938,7 +1016,7 @@ class TestSubvolumeGroups(TestVolumesHelper): def test_subvolume_group_create_idempotence(self): # create group - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group) # try creating w/ same subvolume group name -- should be idempotent @@ -949,7 +1027,7 @@ class TestSubvolumeGroups(TestVolumesHelper): def test_subvolume_group_create_idempotence_mode(self): # create group - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group) # try creating w/ same subvolume group name with mode -- should set mode @@ -969,7 +1047,7 @@ class TestSubvolumeGroups(TestVolumesHelper): desired_gid = 1000 # create group - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group) # try creating w/ same subvolume group name with uid/gid -- should set uid/gid @@ -988,7 +1066,7 @@ class TestSubvolumeGroups(TestVolumesHelper): def test_subvolume_group_create_idempotence_data_pool(self): # create group - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group) group_path = self._get_subvolume_group_path(self.volname, group) @@ -1013,7 +1091,7 @@ class TestSubvolumeGroups(TestVolumesHelper): def test_subvolume_group_create_idempotence_resize(self): # create group - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group) # try creating w/ same subvolume name with size -- should set quota @@ -1035,7 +1113,7 @@ class TestSubvolumeGroups(TestVolumesHelper): """ osize = self.DEFAULT_FILE_SIZE*1024*1024*100 # create group with 100MB quota - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize), "--mode=777") @@ -1044,7 +1122,7 @@ class TestSubvolumeGroups(TestVolumesHelper): self.assertNotEqual(grouppath, None) # create subvolume under the group - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group, "--mode=777") @@ -1054,7 +1132,7 @@ class TestSubvolumeGroups(TestVolumesHelper): # Create auth_id authid = "client.guest1" - user = json.loads(self.fs.mon_manager.raw_cluster_cmd( + user = json.loads(self.get_ceph_cmd_stdout( "auth", "get-or-create", authid, "mds", "allow rw path=/volumes", "mgr", "allow rw", @@ -1068,11 +1146,12 @@ class TestSubvolumeGroups(TestVolumesHelper): guest_mount.umount_wait() # configure credentials for guest client - self._configure_guest_auth(guest_mount, "guest1", user[0]["key"]) - + guest_keyring_path = self._configure_guest_auth( + guest_mount, "guest1", user[0]["key"]) # mount the subvolume mount_path = os.path.join("/", subvolpath) - guest_mount.mount_wait(cephfs_mntpt=mount_path) + guest_mount.mount_wait(cephfs_mntpt=mount_path, + client_keyring_path=guest_keyring_path) # create 99 files of 1MB guest_mount.run_shell_payload("mkdir -p dir1") @@ -1119,7 +1198,7 @@ class TestSubvolumeGroups(TestVolumesHelper): """ osize = self.DEFAULT_FILE_SIZE*1024*1024*100 # create group with 100MB quota - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize), "--mode=777") @@ -1128,7 +1207,7 @@ class TestSubvolumeGroups(TestVolumesHelper): self.assertNotEqual(grouppath, None) # create subvolume under the group - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group, "--mode=777") @@ -1140,7 +1219,7 @@ class TestSubvolumeGroups(TestVolumesHelper): # Create auth_id authid = "client.guest1" - user = json.loads(self.fs.mon_manager.raw_cluster_cmd( + user = json.loads(self.get_ceph_cmd_stdout( "auth", "get-or-create", authid, "mds", f"allow rw path={mount_path}", "mgr", "allow rw", @@ -1154,10 +1233,11 @@ class TestSubvolumeGroups(TestVolumesHelper): guest_mount.umount_wait() # configure credentials for guest client - self._configure_guest_auth(guest_mount, "guest1", user[0]["key"]) - + guest_keyring_path = self._configure_guest_auth( + guest_mount, "guest1", user[0]["key"]) # mount the subvolume - guest_mount.mount_wait(cephfs_mntpt=mount_path) + guest_mount.mount_wait(cephfs_mntpt=mount_path, + client_keyring_path=guest_keyring_path) # create 99 files of 1MB to exceed quota guest_mount.run_shell_payload("mkdir -p dir1") @@ -1200,7 +1280,7 @@ class TestSubvolumeGroups(TestVolumesHelper): """ osize = self.DEFAULT_FILE_SIZE*1024*1024*100 # create group with 100MB quota - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize), "--mode=777") @@ -1209,7 +1289,7 @@ class TestSubvolumeGroups(TestVolumesHelper): self.assertNotEqual(grouppath, None) # create subvolume under the group - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group, "--mode=777") @@ -1243,9 +1323,9 @@ class TestSubvolumeGroups(TestVolumesHelper): """ Tests retained snapshot subvolume removal if it's group quota is exceeded """ - group = self._generate_random_group_name() - subvolname = self._generate_random_subvolume_name() - snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + group = self._gen_subvol_grp_name() + subvolname = self._gen_subvol_name() + snapshot1, snapshot2 = self._gen_subvol_snap_name(2) osize = self.DEFAULT_FILE_SIZE*1024*1024*100 # create group with 100MB quota @@ -1301,11 +1381,11 @@ class TestSubvolumeGroups(TestVolumesHelper): Tests subvolume removal if it's group quota is set. """ # create group with size -- should set quota - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "1000000000") # create subvolume under the group - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group) # remove subvolume @@ -1324,8 +1404,8 @@ class TestSubvolumeGroups(TestVolumesHelper): """ Tests legacy subvolume removal if it's group quota is set. """ - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # emulate a old-fashioned subvolume -- in a custom group createpath1 = os.path.join(".", "volumes", group, subvolume) @@ -1358,8 +1438,8 @@ class TestSubvolumeGroups(TestVolumesHelper): """ Tests v1 subvolume removal if it's group quota is set. """ - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # emulate a v1 subvolume -- in a custom group self._create_v1_subvolume(subvolume, subvol_group=group, has_snapshot=False) @@ -1386,7 +1466,7 @@ class TestSubvolumeGroups(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024 # create group with 1MB quota - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize)) # make sure it exists @@ -1417,7 +1497,7 @@ class TestSubvolumeGroups(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024 # create group with 1MB quota - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize)) # make sure it exists @@ -1449,7 +1529,7 @@ class TestSubvolumeGroups(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024*20 # create group with 20MB quota - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize), "--mode=777") @@ -1458,7 +1538,7 @@ class TestSubvolumeGroups(TestVolumesHelper): self.assertNotEqual(grouppath, None) # create subvolume under the group - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group, "--mode=777") @@ -1503,7 +1583,7 @@ class TestSubvolumeGroups(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024*20 # create group with 20MB quota - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize), "--mode=777") @@ -1512,7 +1592,7 @@ class TestSubvolumeGroups(TestVolumesHelper): self.assertNotEqual(grouppath, None) # create subvolume under the group - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group, "--mode=777") @@ -1558,7 +1638,7 @@ class TestSubvolumeGroups(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024*100 # create group with 100MB quota - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize), "--mode=777") @@ -1567,7 +1647,7 @@ class TestSubvolumeGroups(TestVolumesHelper): self.assertNotEqual(grouppath, None) # create subvolume under the group - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group, "--mode=777") @@ -1612,7 +1692,7 @@ class TestSubvolumeGroups(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024 # create group - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize)) @@ -1637,7 +1717,7 @@ class TestSubvolumeGroups(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024*5 # create group with 5MB quota - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group, "--size", str(osize), "--mode=777") @@ -1646,7 +1726,7 @@ class TestSubvolumeGroups(TestVolumesHelper): self.assertNotEqual(grouppath, None) # create subvolume under the group - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--group_name", group, "--mode=777") @@ -1695,7 +1775,7 @@ class TestSubvolumeGroups(TestVolumesHelper): subvolumegroups = [] #create subvolumegroups - subvolumegroups = self._generate_random_group_name(3) + subvolumegroups = self._gen_subvol_grp_name(3) for groupname in subvolumegroups: self._fs_cmd("subvolumegroup", "create", self.volname, groupname) @@ -1713,12 +1793,12 @@ class TestSubvolumeGroups(TestVolumesHelper): subvolumegroups = [] #create subvolumegroup - subvolumegroups = self._generate_random_group_name(3) + subvolumegroups = self._gen_subvol_grp_name(3) for groupname in subvolumegroups: self._fs_cmd("subvolumegroup", "create", self.volname, groupname) # create subvolume and remove. This creates '_deleting' directory. - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume) self._fs_cmd("subvolume", "rm", self.volname, subvolume) @@ -1731,10 +1811,10 @@ class TestSubvolumeGroups(TestVolumesHelper): # tests the 'fs subvolumegroup ls' command filters internal directories # eg: '_deleting', '_nogroup', '_index', "_legacy" - subvolumegroups = self._generate_random_group_name(3) - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolumegroups = self._gen_subvol_grp_name(3) + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() #create subvolumegroups for groupname in subvolumegroups: @@ -1789,7 +1869,7 @@ class TestSubvolumeGroups(TestVolumesHelper): group = "pinme" self._fs_cmd("subvolumegroup", "create", self.volname, group) self._fs_cmd("subvolumegroup", "pin", self.volname, group, "distributed", "True") - subvolumes = self._generate_random_subvolume_name(50) + subvolumes = self._gen_subvol_name(50) for subvolume in subvolumes: self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) self._wait_distributed_subtrees(2 * 2, status=status, rank="all") @@ -1803,7 +1883,7 @@ class TestSubvolumeGroups(TestVolumesHelper): def test_subvolume_group_rm_force(self): # test removing non-existing subvolume group with --force - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() try: self._fs_cmd("subvolumegroup", "rm", self.volname, group, "--force") except CommandFailedError: @@ -1812,7 +1892,7 @@ class TestSubvolumeGroups(TestVolumesHelper): def test_subvolume_group_exists_with_subvolumegroup_and_no_subvolume(self): """Test the presence of any subvolumegroup when only subvolumegroup is present""" - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() # create subvolumegroup self._fs_cmd("subvolumegroup", "create", self.volname, group) ret = self._fs_cmd("subvolumegroup", "exist", self.volname) @@ -1832,8 +1912,8 @@ class TestSubvolumeGroups(TestVolumesHelper): """Test the presence of any subvolume when subvolumegroup and subvolume both are present""" - group = self._generate_random_group_name() - subvolume = self._generate_random_subvolume_name(2) + group = self._gen_subvol_grp_name() + subvolume = self._gen_subvol_name(2) # create subvolumegroup self._fs_cmd("subvolumegroup", "create", self.volname, group) # create subvolume in group @@ -1859,7 +1939,7 @@ class TestSubvolumeGroups(TestVolumesHelper): """Test the presence of any subvolume when subvolume is present but no subvolumegroup is present""" - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) ret = self._fs_cmd("subvolumegroup", "exist", self.volname) @@ -1869,11 +1949,37 @@ class TestSubvolumeGroups(TestVolumesHelper): ret = self._fs_cmd("subvolumegroup", "exist", self.volname) self.assertEqual(ret.strip('\n'), "no subvolumegroup exists") + def test_subvolume_group_rm_when_its_not_empty(self): + group = self._gen_subvol_grp_name() + subvolume = self._gen_subvol_name() + + # create subvolumegroup + self._fs_cmd("subvolumegroup", "create", self.volname, group) + # create subvolume in group + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) + # try, remove subvolume group + try: + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + except CommandFailedError as ce: + self.assertEqual(ce.exitstatus, errno.ENOTEMPTY, "invalid error code on deleting " + "subvolumegroup when it is not empty") + else: + self.fail("expected the 'fs subvolumegroup rm' command to fail") + + # delete subvolume + self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) + + # delete subvolumegroup + self._fs_cmd("subvolumegroup", "rm", self.volname, group) + + # verify trash dir is clean + self._wait_for_trash_empty() + class TestSubvolumes(TestVolumesHelper): """Tests for FS subvolume operations, except snapshot and snapshot clone.""" def test_async_subvolume_rm(self): - subvolumes = self._generate_random_subvolume_name(100) + subvolumes = self._gen_subvol_name(100) # create subvolumes for subvolume in subvolumes: @@ -1892,7 +1998,7 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty(timeout=300) def test_default_uid_gid_subvolume(self): - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() expected_uid = 0 expected_gid = 0 @@ -1926,7 +2032,7 @@ class TestSubvolumes(TestVolumesHelper): def test_subvolume_create_and_rm(self): # create subvolume - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume) # make sure it exists @@ -1948,8 +2054,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_create_and_rm_in_group(self): - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -1968,7 +2074,7 @@ class TestSubvolumes(TestVolumesHelper): def test_subvolume_create_idempotence(self): # create subvolume - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume) # try creating w/ same subvolume name -- should be idempotent @@ -1982,7 +2088,7 @@ class TestSubvolumes(TestVolumesHelper): def test_subvolume_create_idempotence_resize(self): # create subvolume - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume) # try creating w/ same subvolume name with size -- should set quota @@ -2003,7 +2109,7 @@ class TestSubvolumes(TestVolumesHelper): default_mode = "755" # create subvolume - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume) subvol_path = self._get_subvolume_path(self.volname, subvolume) @@ -2027,7 +2133,7 @@ class TestSubvolumes(TestVolumesHelper): def test_subvolume_create_idempotence_without_passing_mode(self): # create subvolume desired_mode = "777" - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode", desired_mode) subvol_path = self._get_subvolume_path(self.volname, subvolume) @@ -2056,7 +2162,7 @@ class TestSubvolumes(TestVolumesHelper): """ # create subvolume - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume, "--namespace-isolated") # get subvolume metadata @@ -2071,7 +2177,7 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_create_with_auto_cleanup_on_fail(self): - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() data_pool = "invalid_pool" # create subvolume with invalid data pool layout fails with self.assertRaises(CommandFailedError): @@ -2089,8 +2195,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_create_with_desired_data_pool_layout_in_group(self): - subvol1, subvol2 = self._generate_random_subvolume_name(2) - group = self._generate_random_group_name() + subvol1, subvol2 = self._gen_subvol_name(2) + group = self._gen_subvol_grp_name() # create group. this also helps set default pool layout for subvolumes # created within the group. @@ -2126,7 +2232,7 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_create_with_desired_mode(self): - subvol1 = self._generate_random_subvolume_name() + subvol1 = self._gen_subvol_name() # default mode default_mode = "755" @@ -2156,9 +2262,9 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_create_with_desired_mode_in_group(self): - subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3) + subvol1, subvol2, subvol3 = self._gen_subvol_name(3) - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() # default mode expected_mode1 = "755" # desired mode @@ -2202,7 +2308,7 @@ class TestSubvolumes(TestVolumesHelper): gid = 1000 # create subvolume - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--uid", str(uid), "--gid", str(gid)) # make sure it exists @@ -2222,7 +2328,7 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_create_with_invalid_data_pool_layout(self): - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() data_pool = "invalid_pool" # create subvolume with invalid data pool layout try: @@ -2237,7 +2343,7 @@ class TestSubvolumes(TestVolumesHelper): def test_subvolume_create_with_invalid_size(self): # create subvolume with an invalid size -1 - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() try: self._fs_cmd("subvolume", "create", self.volname, subvolume, "--size", "-1") except CommandFailedError as ce: @@ -2254,7 +2360,7 @@ class TestSubvolumes(TestVolumesHelper): permission denied error if option --group=_nogroup is provided. """ - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() # try to create subvolume providing --group_name=_nogroup option try: @@ -2289,7 +2395,7 @@ class TestSubvolumes(TestVolumesHelper): """ # create subvolume - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() osize = self.DEFAULT_FILE_SIZE*1024*1024 self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) @@ -2319,7 +2425,7 @@ class TestSubvolumes(TestVolumesHelper): "type", "uid", "features", "state"] # create subvolume - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume) # get subvolume metadata @@ -2367,7 +2473,7 @@ class TestSubvolumes(TestVolumesHelper): subvolumes = [] # create subvolumes - subvolumes = self._generate_random_subvolume_name(3) + subvolumes = self._gen_subvol_name(3) for subvolume in subvolumes: self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -2439,7 +2545,7 @@ class TestSubvolumes(TestVolumesHelper): """ ensure a subvolume is marked with the ceph.dir.subvolume xattr """ - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -2475,7 +2581,7 @@ class TestSubvolumes(TestVolumesHelper): self.fs.set_max_mds(2) status = self.fs.wait_for_daemons() - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume) self._fs_cmd("subvolume", "pin", self.volname, subvolume, "export", "1") path = self._fs_cmd("subvolume", "getpath", self.volname, subvolume) @@ -2493,8 +2599,8 @@ class TestSubvolumes(TestVolumesHelper): ### authorize operations def test_authorize_deauthorize_legacy_subvolume(self): - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() authid = "alice" guest_mount = self.mount_b @@ -2519,10 +2625,11 @@ class TestSubvolumes(TestVolumesHelper): self.assertIn("client.{0}".format(authid), existing_ids) # configure credentials for guest client - self._configure_guest_auth(guest_mount, authid, key) - + guest_keyring_path = self._configure_guest_auth( + guest_mount, authid, key) # mount the subvolume, and write to it - guest_mount.mount_wait(cephfs_mntpt=mount_path) + guest_mount.mount_wait(cephfs_mntpt=mount_path, + client_keyring_path=guest_keyring_path) guest_mount.write_n_mb("data.bin", 1) # authorize guest authID read access to subvolume @@ -2551,8 +2658,8 @@ class TestSubvolumes(TestVolumesHelper): self._fs_cmd("subvolumegroup", "rm", self.volname, group) def test_authorize_deauthorize_subvolume(self): - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() authid = "alice" guest_mount = self.mount_b @@ -2575,10 +2682,11 @@ class TestSubvolumes(TestVolumesHelper): self.assertIn("client.{0}".format(authid), existing_ids) # configure credentials for guest client - self._configure_guest_auth(guest_mount, authid, key) - + guest_keyring_path = self._configure_guest_auth( + guest_mount, authid, key) # mount the subvolume, and write to it - guest_mount.mount_wait(cephfs_mntpt=mount_path) + guest_mount.mount_wait(cephfs_mntpt=mount_path, + client_keyring_path=guest_keyring_path) guest_mount.write_n_mb("data.bin", 1) # authorize guest authID read access to subvolume @@ -2614,8 +2722,8 @@ class TestSubvolumes(TestVolumesHelper): subvolumes is stored as a two-way mapping between auth IDs and subvolumes that they're authorized to access. """ - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() guest_mount = self.mount_b @@ -2722,8 +2830,8 @@ class TestSubvolumes(TestVolumesHelper): self._fs_cmd("subvolumegroup", "rm", self.volname, group) def test_subvolume_authorized_list(self): - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() authid1 = "alice" authid2 = "guest1" authid3 = "guest2" @@ -2765,11 +2873,11 @@ class TestSubvolumes(TestVolumesHelper): it's not allowed to authorize the auth-id by default. """ - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # Create auth_id - self.fs.mon_manager.raw_cluster_cmd( + self.run_ceph_cmd( "auth", "get-or-create", "client.guest1", "mds", "allow *", "osd", "allow rw", @@ -2798,7 +2906,7 @@ class TestSubvolumes(TestVolumesHelper): self.fail("expected the 'fs subvolume authorize' command to fail") # clean up - self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self.run_ceph_cmd("auth", "rm", "client.guest1") self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) self._fs_cmd("subvolumegroup", "rm", self.volname, group) @@ -2809,11 +2917,11 @@ class TestSubvolumes(TestVolumesHelper): allowed with option allow_existing_id. """ - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # Create auth_id - self.fs.mon_manager.raw_cluster_cmd( + self.run_ceph_cmd( "auth", "get-or-create", "client.guest1", "mds", "allow *", "osd", "allow rw", @@ -2841,7 +2949,7 @@ class TestSubvolumes(TestVolumesHelper): # clean up self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group) - self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self.run_ceph_cmd("auth", "rm", "client.guest1") self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) self._fs_cmd("subvolumegroup", "rm", self.volname, group) @@ -2852,8 +2960,8 @@ class TestSubvolumes(TestVolumesHelper): deauthorize. It should only remove caps associated with it. """ - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() auth_id = "guest1" guestclient_1 = { @@ -2875,7 +2983,7 @@ class TestSubvolumes(TestVolumesHelper): "--group_name", group).rstrip() # Update caps for guestclient_1 out of band - out = self.fs.mon_manager.raw_cluster_cmd( + out = self.get_ceph_cmd_stdout( "auth", "caps", "client.guest1", "mds", "allow rw path=/volumes/{0}, allow rw path={1}".format(group, subvol_path), "osd", "allow rw pool=cephfs_data", @@ -2888,7 +2996,7 @@ class TestSubvolumes(TestVolumesHelper): # Validate the caps of guestclient_1 after deauthorize. It should not have deleted # guestclient_1. The mgr and mds caps should be present which was updated out of band. - out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.guest1", "--format=json-pretty")) + out = json.loads(self.get_ceph_cmd_stdout("auth", "get", "client.guest1", "--format=json-pretty")) self.assertEqual("client.guest1", out[0]["entity"]) self.assertEqual("allow rw path=/volumes/{0}".format(group), out[0]["caps"]["mds"]) @@ -2896,7 +3004,7 @@ class TestSubvolumes(TestVolumesHelper): self.assertNotIn("osd", out[0]["caps"]) # clean up - out = self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + out = self.get_ceph_cmd_stdout("auth", "rm", "client.guest1") self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) self._fs_cmd("subvolumegroup", "rm", self.volname, group) @@ -2909,8 +3017,8 @@ class TestSubvolumes(TestVolumesHelper): guest_mount = self.mount_b - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() auth_id = "guest1" guestclient_1 = { @@ -2948,7 +3056,7 @@ class TestSubvolumes(TestVolumesHelper): # clean up self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume, auth_id, "--group_name", group) guest_mount.umount_wait() - self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self.run_ceph_cmd("auth", "rm", "client.guest1") self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--group_name", group) self._fs_cmd("subvolumegroup", "rm", self.volname, group) @@ -2961,8 +3069,8 @@ class TestSubvolumes(TestVolumesHelper): guest_mount = self.mount_b - subvolume1, subvolume2 = self._generate_random_subvolume_name(2) - group = self._generate_random_group_name() + subvolume1, subvolume2 = self._gen_subvol_name(2) + group = self._gen_subvol_grp_name() guestclient_1 = { "auth_id": "guest1", @@ -3004,7 +3112,7 @@ class TestSubvolumes(TestVolumesHelper): # clean up self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, "guest1", "--group_name", group) guest_mount.umount_wait() - self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self.run_ceph_cmd("auth", "rm", "client.guest1") self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group) self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group) self._fs_cmd("subvolumegroup", "rm", self.volname, group) @@ -3019,8 +3127,8 @@ class TestSubvolumes(TestVolumesHelper): guest_mount = self.mount_b - subvolume1, subvolume2 = self._generate_random_subvolume_name(2) - group = self._generate_random_group_name() + subvolume1, subvolume2 = self._gen_subvol_name(2) + group = self._gen_subvol_grp_name() auth_id = "guest1" guestclient_1 = { @@ -3079,7 +3187,7 @@ class TestSubvolumes(TestVolumesHelper): self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group) self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume2, auth_id, "--group_name", group) guest_mount.umount_wait() - self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self.run_ceph_cmd("auth", "rm", "client.guest1") self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group) self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group) self._fs_cmd("subvolumegroup", "rm", self.volname, group) @@ -3094,8 +3202,8 @@ class TestSubvolumes(TestVolumesHelper): guest_mount = self.mount_b - subvolume1, subvolume2 = self._generate_random_subvolume_name(2) - group = self._generate_random_group_name() + subvolume1, subvolume2 = self._gen_subvol_name(2) + group = self._gen_subvol_grp_name() auth_id = "guest1" guestclient_1 = { @@ -3151,7 +3259,7 @@ class TestSubvolumes(TestVolumesHelper): # clean up self._fs_cmd("subvolume", "deauthorize", self.volname, subvolume1, auth_id, "--group_name", group) guest_mount.umount_wait() - self.fs.mon_manager.raw_cluster_cmd("auth", "rm", "client.guest1") + self.run_ceph_cmd("auth", "rm", "client.guest1") self._fs_cmd("subvolume", "rm", self.volname, subvolume1, "--group_name", group) self._fs_cmd("subvolume", "rm", self.volname, subvolume2, "--group_name", group) self._fs_cmd("subvolumegroup", "rm", self.volname, group) @@ -3161,8 +3269,8 @@ class TestSubvolumes(TestVolumesHelper): That a subvolume client can be evicted based on the auth ID """ - subvolumes = self._generate_random_subvolume_name(2) - group = self._generate_random_group_name() + subvolumes = self._gen_subvol_name(2) + group = self._gen_subvol_grp_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3189,11 +3297,14 @@ class TestSubvolumes(TestVolumesHelper): mount_path = self._fs_cmd("subvolume", "getpath", self.volname, subvolumes[i], "--group_name", group).rstrip() - # configure credentials for guest client - self._configure_guest_auth(guest_mounts[i], auth_id, key) + # configure credentials for guest client + guest_keyring_path = self._configure_guest_auth(guest_mounts[i], + auth_id, key) # mount the subvolume, and write to it - guest_mounts[i].mount_wait(cephfs_mntpt=mount_path) + guest_mounts[i].mount_wait( + cephfs_mntpt=mount_path, + client_keyring_path=guest_keyring_path) guest_mounts[i].write_n_mb("data.bin", 1) # Evict client, guest_mounts[0], using auth ID 'guest' and has mounted @@ -3230,7 +3341,7 @@ class TestSubvolumes(TestVolumesHelper): self.fs.wait_for_daemons() self.config_set('mds', 'mds_export_ephemeral_random', True) - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume) self._fs_cmd("subvolume", "pin", self.volname, subvolume, "random", ".01") # no verification @@ -3248,7 +3359,7 @@ class TestSubvolumes(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024 # create subvolume - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) # make sure it exists @@ -3281,7 +3392,7 @@ class TestSubvolumes(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024 # create subvolume - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) # make sure it exists @@ -3315,7 +3426,7 @@ class TestSubvolumes(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024*20 # create subvolume - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777") # make sure it exists @@ -3362,7 +3473,7 @@ class TestSubvolumes(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024*20 # create subvolume - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777") # make sure it exists @@ -3410,7 +3521,7 @@ class TestSubvolumes(TestVolumesHelper): osize = self.DEFAULT_FILE_SIZE*1024*1024*10 # create subvolume of quota 10MB and make sure it exists - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize), "--mode=777") subvolpath = self._get_subvolume_path(self.volname, subvolname) self.assertNotEqual(subvolpath, None) @@ -3458,7 +3569,7 @@ class TestSubvolumes(TestVolumesHelper): """ # create subvolume - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(self.DEFAULT_FILE_SIZE*1024*1024)) @@ -3485,7 +3596,7 @@ class TestSubvolumes(TestVolumesHelper): """ # create subvolume - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(self.DEFAULT_FILE_SIZE*1024*1024*5), "--mode=777") @@ -3522,7 +3633,7 @@ class TestSubvolumes(TestVolumesHelper): def test_subvolume_rm_force(self): # test removing non-existing subvolume with --force - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() try: self._fs_cmd("subvolume", "rm", self.volname, subvolume, "--force") except CommandFailedError: @@ -3531,8 +3642,8 @@ class TestSubvolumes(TestVolumesHelper): def test_subvolume_exists_with_subvolumegroup_and_subvolume(self): """Test the presence of any subvolume by specifying the name of subvolumegroup""" - group = self._generate_random_group_name() - subvolume1 = self._generate_random_subvolume_name() + group = self._gen_subvol_grp_name() + subvolume1 = self._gen_subvol_name() # create subvolumegroup self._fs_cmd("subvolumegroup", "create", self.volname, group) # create subvolume in group @@ -3550,7 +3661,7 @@ class TestSubvolumes(TestVolumesHelper): """Test the presence of any subvolume specifying the name of subvolumegroup and no subvolumes""" - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() # create subvolumegroup self._fs_cmd("subvolumegroup", "create", self.volname, group) ret = self._fs_cmd("subvolume", "exist", self.volname, "--group_name", group) @@ -3562,7 +3673,7 @@ class TestSubvolumes(TestVolumesHelper): """Test the presence of any subvolume without specifying the name of subvolumegroup""" - subvolume1 = self._generate_random_subvolume_name() + subvolume1 = self._gen_subvol_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume1) ret = self._fs_cmd("subvolume", "exist", self.volname) @@ -3585,7 +3696,7 @@ class TestSubvolumes(TestVolumesHelper): """ # create subvolume - subvolname = self._generate_random_subvolume_name() + subvolname = self._gen_subvol_name() osize = self.DEFAULT_FILE_SIZE*1024*1024 self._fs_cmd("subvolume", "create", self.volname, subvolname, "--size", str(osize)) @@ -3614,8 +3725,8 @@ class TestSubvolumes(TestVolumesHelper): is cleaned up. The subvolume deletion issued while the trash directory is not empty, should pass and should not error out with EAGAIN. """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -3644,8 +3755,8 @@ class TestSubvolumes(TestVolumesHelper): def test_subvolume_user_metadata_set(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3668,8 +3779,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_set_idempotence(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3698,8 +3809,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_get(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3731,8 +3842,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_get_for_nonexisting_key(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3761,8 +3872,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_get_for_nonexisting_section(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3786,8 +3897,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_update(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3823,8 +3934,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_list(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3856,8 +3967,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_list_if_no_metadata_set(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3885,8 +3996,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_remove(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3920,8 +4031,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_remove_for_nonexisting_key(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3950,8 +4061,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_remove_for_nonexisting_section(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -3975,8 +4086,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_remove_force(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -4010,8 +4121,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_remove_force_for_nonexisting_key(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -4051,8 +4162,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_set_and_get_for_legacy_subvolume(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # emulate a old-fashioned subvolume in a custom group createpath = os.path.join(".", "volumes", group, subvolname) @@ -4085,8 +4196,8 @@ class TestSubvolumes(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_user_metadata_list_and_remove_for_legacy_subvolume(self): - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # emulate a old-fashioned subvolume in a custom group createpath = os.path.join(".", "volumes", group, subvolname) @@ -4133,9 +4244,9 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper): """Tests for FS subvolume group snapshot operations.""" @unittest.skip("skipping subvolumegroup snapshot tests") def test_nonexistent_subvolume_group_snapshot_rm(self): - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -4169,9 +4280,9 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper): @unittest.skip("skipping subvolumegroup snapshot tests") def test_subvolume_group_snapshot_create_and_rm(self): - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -4196,9 +4307,9 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper): @unittest.skip("skipping subvolumegroup snapshot tests") def test_subvolume_group_snapshot_idempotence(self): - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -4231,11 +4342,11 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper): snapshots = [] # create group - group = self._generate_random_group_name() + group = self._gen_subvol_grp_name() self._fs_cmd("subvolumegroup", "create", self.volname, group) # create subvolumegroup snapshots - snapshots = self._generate_random_snapshot_name(3) + snapshots = self._gen_subvol_snap_name(3) for snapshot in snapshots: self._fs_cmd("subvolumegroup", "snapshot", "create", self.volname, group, snapshot) @@ -4250,8 +4361,8 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper): @unittest.skip("skipping subvolumegroup snapshot tests") def test_subvolume_group_snapshot_rm_force(self): # test removing non-existing subvolume group snapshot with --force - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # remove snapshot try: self._fs_cmd("subvolumegroup", "snapshot", "rm", self.volname, group, snapshot, "--force") @@ -4259,8 +4370,8 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper): raise RuntimeError("expected the 'fs subvolumegroup snapshot rm --force' command to succeed") def test_subvolume_group_snapshot_unsupported_status(self): - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -4280,8 +4391,8 @@ class TestSubvolumeGroupSnapshots(TestVolumesHelper): class TestSubvolumeSnapshots(TestVolumesHelper): """Tests for FS subvolume snapshot operations.""" def test_nonexistent_subvolume_snapshot_rm(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -4308,8 +4419,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_create_and_rm(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -4327,8 +4438,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_create_idempotence(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -4356,8 +4467,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): snap_md = ["created_at", "data_pool", "has_pending_clones"] - subvolume = self._generate_random_subvolume_name() - snapshot, snap_missing = self._generate_random_snapshot_name(2) + subvolume = self._gen_subvol_name() + snapshot, snap_missing = self._gen_subvol_snap_name(2) # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -4391,9 +4502,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_in_group(self): - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -4422,11 +4533,11 @@ class TestSubvolumeSnapshots(TestVolumesHelper): snapshots = [] # create subvolume - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume) # create subvolume snapshots - snapshots = self._generate_random_snapshot_name(3) + snapshots = self._gen_subvol_snap_name(3) for snapshot in snapshots: self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) @@ -4454,8 +4565,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): # at ancestral level snapshots = [] - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() snap_count = 3 # create group @@ -4465,7 +4576,7 @@ class TestSubvolumeSnapshots(TestVolumesHelper): self._fs_cmd("subvolume", "create", self.volname, subvolume, "--group_name", group) # create subvolume snapshots - snapshots = self._generate_random_snapshot_name(snap_count) + snapshots = self._gen_subvol_snap_name(snap_count) for snapshot in snapshots: self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot, group) @@ -4500,8 +4611,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): at ancestral level """ - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -4548,8 +4659,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): at ancestral level """ - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -4596,9 +4707,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): fail. """ - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - group_snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + group_snapshot = self._gen_subvol_snap_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -4637,8 +4748,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ ensure retained subvolume recreate does not leave any incarnations in the subvolume and trash """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -4683,8 +4794,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ snap_md = ["created_at", "data_pool", "has_pending_clones"] - subvolume = self._generate_random_subvolume_name() - snapshot1, snapshot2 = self._generate_random_snapshot_name(2) + subvolume = self._gen_subvol_name() + snapshot1, snapshot2 = self._gen_subvol_snap_name(2) # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -4746,8 +4857,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ snap_md = ["created_at", "data_pool", "has_pending_clones"] - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -4840,7 +4951,7 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ ensure retain snapshots based delete of a subvolume with no snapshots, deletes the subbvolume """ - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -4859,8 +4970,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ ensure retained subvolume recreate fails if its trash is not yet purged """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -4898,8 +5009,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_rm_with_snapshots(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -4930,9 +5041,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): Snapshot protect/unprotect commands are deprecated. This test exists to ensure that invoking the command does not cause errors, till they are removed from a subsequent release. """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -4970,8 +5081,8 @@ class TestSubvolumeSnapshots(TestVolumesHelper): def test_subvolume_snapshot_rm_force(self): # test removing non existing subvolume snapshot with --force - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() # remove snapshot try: @@ -4983,9 +5094,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Set custom metadata for subvolume snapshot. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5015,9 +5126,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Set custom metadata for subvolume snapshot (Idempotency). """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5065,9 +5176,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Get custom metadata for a specified key in subvolume snapshot metadata. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5106,9 +5217,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Get custom metadata for subvolume snapshot if specified key not exist in metadata. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5144,9 +5255,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Get custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5177,9 +5288,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Update custom metadata for a specified key in subvolume snapshot metadata. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5222,9 +5333,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ List custom metadata for subvolume snapshot. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5261,9 +5372,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ List custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5295,9 +5406,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Remove custom metadata for a specified key in subvolume snapshot metadata. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5338,9 +5449,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Remove custom metadata for subvolume snapshot if specified key not exist in metadata. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5376,9 +5487,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Remove custom metadata for subvolume snapshot if metadata is not added for subvolume snapshot. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5409,9 +5520,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Forcefully remove custom metadata for a specified key in subvolume snapshot metadata. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5452,9 +5563,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Forcefully remove custom metadata for subvolume snapshot if specified key not exist in metadata. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5501,9 +5612,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Verify metadata removal of subvolume snapshot after snapshot removal. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5533,9 +5644,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): # try to get metadata after removing snapshot. # Expecting error ENOENT with error message of snapshot does not exist - cmd_ret = self.mgr_cluster.mon_manager.run_cluster_cmd( - args=["fs", "subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group], - check_status=False, stdout=StringIO(), stderr=StringIO()) + cmd_ret = self.run_ceph_cmd( + args=["fs", "subvolume", "snapshot", "metadata", "get", self.volname, subvolname, snapshot, key, group], check_status=False, stdout=StringIO(), + stderr=StringIO()) self.assertEqual(cmd_ret.returncode, errno.ENOENT, "Expecting ENOENT error") self.assertIn(f"snapshot '{snapshot}' does not exist", cmd_ret.stderr.getvalue(), f"Expecting message: snapshot '{snapshot}' does not exist ") @@ -5561,9 +5672,9 @@ class TestSubvolumeSnapshots(TestVolumesHelper): """ Validate cleaning of stale subvolume snapshot metadata. """ - subvolname = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() + subvolname = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() # create group. self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5628,9 +5739,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): "data_pool", "gid", "mode", "mon_addrs", "mtime", "path", "pool_namespace", "type", "uid"] - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -5672,8 +5783,8 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): If no clone is performed then path /volumes/_index/clone/{track_id} will not exist. """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() # create subvolume. self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -5700,10 +5811,13 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): """ Verify subvolume snapshot info output if no clone is in pending state. """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() clone_list = [f'clone_{i}' for i in range(3)] + # disable "capped" clones + self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', False) + # create subvolume. self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -5741,8 +5855,8 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): Clones are not specified for particular target_group. Hence target_group should not be in the output as we don't show _nogroup (default group) """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() clone_list = [f'clone_{i}' for i in range(3)] # create subvolume. @@ -5754,6 +5868,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): # insert delay at the beginning of snapshot clone self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 5) + # disable "capped" clones + self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', False) + # schedule a clones for clone in clone_list: self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) @@ -5788,11 +5905,11 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): Verify subvolume snapshot info output if clones are in pending state. Clones are not specified for target_group. """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() - group = self._generate_random_group_name() - target_group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() + group = self._gen_subvol_grp_name() + target_group = self._gen_subvol_grp_name() # create groups self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -5844,8 +5961,8 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): Orphan clones should not list under pending clones. orphan_clones_count should display correct count of orphan clones' """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() clone_list = [f'clone_{i}' for i in range(3)] # create subvolume. @@ -5857,6 +5974,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): # insert delay at the beginning of snapshot clone self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 15) + # disable "capped" clones + self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', False) + # schedule a clones for clone in clone_list: self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) @@ -5891,7 +6011,7 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self.assertEqual(res['has_pending_clones'], "no") def test_non_clone_status(self): - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -5911,9 +6031,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_clone_inherit_snapshot_namespace_and_size(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() osize = self.DEFAULT_FILE_SIZE*1024*1024*12 # create subvolume, in an isolated namespace with a specified size @@ -5955,9 +6075,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_clone_inherit_quota_attrs(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() osize = self.DEFAULT_FILE_SIZE*1024*1024*12 # create subvolume with a specified size @@ -6003,9 +6123,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_clone_in_progress_getpath(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6052,9 +6172,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_clone_in_progress_snapshot_rm(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6100,9 +6220,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_clone_in_progress_source(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6151,9 +6271,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): """ retain snapshots of a cloned subvolume and check disallowed operations """ - subvolume = self._generate_random_subvolume_name() - snapshot1, snapshot2 = self._generate_random_snapshot_name(2) - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot1, snapshot2 = self._gen_subvol_snap_name(2) + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6225,9 +6345,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): """ clone a snapshot from a snapshot retained subvolume """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6270,9 +6390,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): """ clone a subvolume from recreated subvolume's latest snapshot """ - subvolume = self._generate_random_subvolume_name() - snapshot1, snapshot2 = self._generate_random_snapshot_name(2) - clone = self._generate_random_clone_name(1) + subvolume = self._gen_subvol_name() + snapshot1, snapshot2 = self._gen_subvol_snap_name(2) + clone = self._gen_subvol_clone_name(1) # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6328,8 +6448,8 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): """ recreate a subvolume from one of its retained snapshots """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6372,9 +6492,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): """ ensure retained clone recreate fails if its trash is not yet purged """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume) @@ -6426,9 +6546,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_attr_clone(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6462,9 +6582,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): """ ensure failure status is not shown when clone is not in failed/cancelled state """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone1 = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone1 = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6528,9 +6648,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): """ ensure failure status is shown when clone is in failed state and validate the reason """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone1 = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone1 = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6573,9 +6693,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): """ ensure failure status is shown when clone is cancelled during pending state and validate the reason """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone1 = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone1 = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6617,9 +6737,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): """ ensure failure status is shown when clone is cancelled during in-progress state and validate the reason """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone1 = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone1 = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6661,9 +6781,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_clone(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6694,9 +6814,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_clone_quota_exceeded(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume with 20MB quota osize = self.DEFAULT_FILE_SIZE*1024*1024*20 @@ -6738,9 +6858,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): 'complete|cancelled|failed' states. It fails with EAGAIN in any other states. """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6785,9 +6905,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_clone_retain_suid_guid(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6827,9 +6947,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_clone_and_reclone(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone1, clone2 = self._generate_random_clone_name(2) + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone1, clone2 = self._gen_subvol_clone_name(2) # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6880,9 +7000,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_clone_cancel_in_progress(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6931,9 +7051,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): # yeh, 1gig -- we need the clone to run for sometime FILE_SIZE_MB = 1024 - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clones = self._generate_random_clone_name(NR_CLONES) + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clones = self._gen_subvol_snap_name(NR_CLONES) # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -6944,6 +7064,11 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): # snapshot subvolume self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + # Disable the snapshot_clone_no_wait config option + self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', False) + threads_available = self.config_get('mgr', 'mgr/volumes/snapshot_clone_no_wait') + self.assertEqual(threads_available, 'false') + # schedule clones for clone in clones: self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone) @@ -6983,10 +7108,10 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_clone_different_groups(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() - s_group, c_group = self._generate_random_group_name(2) + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() + s_group, c_group = self._gen_subvol_grp_name(2) # create groups self._fs_cmd("subvolumegroup", "create", self.volname, s_group) @@ -7026,9 +7151,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_clone_fail_with_remove(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone1, clone2 = self._generate_random_clone_name(2) + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone1, clone2 = self._gen_subvol_clone_name(2) pool_capacity = 32 * 1024 * 1024 # number of files required to fill up 99% of the pool @@ -7047,8 +7172,8 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): new_pool = "new_pool" self.fs.add_data_pool(new_pool) - self.fs.mon_manager.raw_cluster_cmd("osd", "pool", "set-quota", new_pool, - "max_bytes", "{0}".format(pool_capacity // 4)) + self.run_ceph_cmd("osd", "pool", "set-quota", new_pool, + "max_bytes", f"{pool_capacity // 4}") # schedule a clone self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1, "--pool_layout", new_pool) @@ -7089,9 +7214,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_clone_on_existing_subvolumes(self): - subvolume1, subvolume2 = self._generate_random_subvolume_name(2) - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume1, subvolume2 = self._gen_subvol_name(2) + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create subvolumes self._fs_cmd("subvolume", "create", self.volname, subvolume1, "--mode=777") @@ -7141,9 +7266,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_clone_pool_layout(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # add data pool new_pool = "new_pool" @@ -7185,10 +7310,10 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_clone_under_group(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() - group = self._generate_random_group_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() + group = self._gen_subvol_grp_name() # create subvolume self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") @@ -7225,9 +7350,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self._wait_for_trash_empty() def test_subvolume_snapshot_clone_with_attrs(self): - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() mode = "777" uid = "1000" @@ -7274,9 +7399,9 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): and verify clone operation. further ensure that a legacy volume is not updated to v2, but clone is. """ - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # emulate a old-fashioned subvolume createpath = os.path.join(".", "volumes", "_nogroup", subvolume) @@ -7367,10 +7492,10 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): self.assertEqual(max_concurrent_clones, 2) def test_subvolume_under_group_snapshot_clone(self): - subvolume = self._generate_random_subvolume_name() - group = self._generate_random_group_name() - snapshot = self._generate_random_snapshot_name() - clone = self._generate_random_clone_name() + subvolume = self._gen_subvol_name() + group = self._gen_subvol_grp_name() + snapshot = self._gen_subvol_snap_name() + clone = self._gen_subvol_clone_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) @@ -7406,6 +7531,159 @@ class TestSubvolumeSnapshotClones(TestVolumesHelper): # verify trash dir is clean self._wait_for_trash_empty() + def test_subvolume_snapshot_clone_with_no_wait_enabled(self): + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone1, clone2, clone3 = self._gen_subvol_clone_name(3) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=10) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Decrease number of cloner threads + self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2) + max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(max_concurrent_clones, 2) + + # Enable the snapshot_clone_no_wait config option + self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', True) + threads_available = self.config_get('mgr', 'mgr/volumes/snapshot_clone_no_wait') + self.assertEqual(threads_available, 'true') + + # Insert delay of 15 seconds at the beginning of the snapshot clone + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 15) + + # schedule a clone1 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # schedule a clone2 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone2) + + # schedule a clone3 + cmd_ret = self.mgr_cluster.mon_manager.run_cluster_cmd( + args=["fs", "subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone3], check_status=False, stdout=StringIO(), + stderr=StringIO()) + self.assertEqual(cmd_ret.returncode, errno.EAGAIN, "Expecting EAGAIN error") + + # check clone1 status + self._wait_for_clone_to_complete(clone1) + + # verify clone1 + self._verify_clone(subvolume, snapshot, clone1) + + # check clone2 status + self._wait_for_clone_to_complete(clone2) + + # verify clone2 + self._verify_clone(subvolume, snapshot, clone2) + + # schedule clone3 , it should be successful this time + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone3) + + # check clone3 status + self._wait_for_clone_to_complete(clone3) + + # verify clone3 + self._verify_clone(subvolume, snapshot, clone3) + + # set number of cloner threads to default + self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 4) + max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(max_concurrent_clones, 4) + + # set the snapshot_clone_delay to default + self.config_set('mgr', 'mgr/volumes/snapshot_clone_delay', 0) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone1) + self._fs_cmd("subvolume", "rm", self.volname, clone2) + self._fs_cmd("subvolume", "rm", self.volname, clone3) + + # verify trash dir is clean + self._wait_for_trash_empty() + + def test_subvolume_snapshot_clone_with_no_wait_not_enabled(self): + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone1, clone2, clone3 = self._gen_subvol_clone_name(3) + + # create subvolume + self._fs_cmd("subvolume", "create", self.volname, subvolume, "--mode=777") + + # do some IO + self._do_subvolume_io(subvolume, number_of_files=10) + + # snapshot subvolume + self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot) + + # Disable the snapshot_clone_no_wait config option + self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', False) + threads_available = self.config_get('mgr', 'mgr/volumes/snapshot_clone_no_wait') + self.assertEqual(threads_available, 'false') + + # Decrease number of cloner threads + self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 2) + max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(max_concurrent_clones, 2) + + # schedule a clone1 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone1) + + # schedule a clone2 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone2) + + # schedule a clone3 + self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone3) + + # check clone1 status + self._wait_for_clone_to_complete(clone1) + + # verify clone1 + self._verify_clone(subvolume, snapshot, clone1) + + # check clone2 status + self._wait_for_clone_to_complete(clone2) + + # verify clone2 + self._verify_clone(subvolume, snapshot, clone2) + + # check clone3 status + self._wait_for_clone_to_complete(clone3) + + # verify clone3 + self._verify_clone(subvolume, snapshot, clone3) + + # set the snapshot_clone_no_wait config option to default + self.config_set('mgr', 'mgr/volumes/snapshot_clone_no_wait', True) + threads_available = self.config_get('mgr', 'mgr/volumes/snapshot_clone_no_wait') + self.assertEqual(threads_available, 'true') + + # set number of cloner threads to default + self.config_set('mgr', 'mgr/volumes/max_concurrent_clones', 4) + max_concurrent_clones = int(self.config_get('mgr', 'mgr/volumes/max_concurrent_clones')) + self.assertEqual(max_concurrent_clones, 4) + + # remove snapshot + self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot) + + # remove subvolumes + self._fs_cmd("subvolume", "rm", self.volname, subvolume) + self._fs_cmd("subvolume", "rm", self.volname, clone1) + self._fs_cmd("subvolume", "rm", self.volname, clone2) + self._fs_cmd("subvolume", "rm", self.volname, clone3) + + # verify trash dir is clean + self._wait_for_trash_empty() + class TestMisc(TestVolumesHelper): """Miscellaneous tests related to FS volume, subvolume group, and subvolume operations.""" @@ -7417,7 +7695,7 @@ class TestMisc(TestVolumesHelper): self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted # Get the mgr to definitely mount cephfs - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume) sessions = self._session_list() self.assertEqual(len(sessions), 1) @@ -7433,7 +7711,7 @@ class TestMisc(TestVolumesHelper): self.assertLessEqual(len(sessions), 1) # maybe mgr is already mounted # Get the mgr to definitely mount cephfs - subvolume = self._generate_random_subvolume_name() + subvolume = self._gen_subvol_name() self._fs_cmd("subvolume", "create", self.volname, subvolume) sessions = self._session_list() self.assertEqual(len(sessions), 1) @@ -7537,8 +7815,8 @@ class TestMisc(TestVolumesHelper): accessible. further ensure that a legacy volume is not updated to v2. """ - subvolume1, subvolume2 = self._generate_random_subvolume_name(2) - group = self._generate_random_group_name() + subvolume1, subvolume2 = self._gen_subvol_name(2) + group = self._gen_subvol_grp_name() # emulate a old-fashioned subvolume -- one in the default group and # the other in a custom group @@ -7588,9 +7866,9 @@ class TestMisc(TestVolumesHelper): "type", "uid", "features", "state"] snap_md = ["created_at", "data_pool", "has_pending_clones"] - subvolume = self._generate_random_subvolume_name() - snapshot = self._generate_random_snapshot_name() - clone1, clone2 = self._generate_random_clone_name(2) + subvolume = self._gen_subvol_name() + snapshot = self._gen_subvol_snap_name() + clone1, clone2 = self._gen_subvol_clone_name(2) mode = "777" uid = "1000" gid = "1000" @@ -7695,8 +7973,8 @@ class TestMisc(TestVolumesHelper): poor man's upgrade test -- theme continues... ensure v1 to v2 upgrades are not done automatically due to various states of v1 """ - subvolume1, subvolume2, subvolume3 = self._generate_random_subvolume_name(3) - group = self._generate_random_group_name() + subvolume1, subvolume2, subvolume3 = self._gen_subvol_name(3) + group = self._gen_subvol_grp_name() # emulate a v1 subvolume -- in the default group subvol1_path = self._create_v1_subvolume(subvolume1) @@ -7753,8 +8031,8 @@ class TestMisc(TestVolumesHelper): poor man's upgrade test -- theme continues... ensure v1 to v2 upgrades work """ - subvolume1, subvolume2 = self._generate_random_subvolume_name(2) - group = self._generate_random_group_name() + subvolume1, subvolume2 = self._gen_subvol_name(2) + group = self._gen_subvol_grp_name() # emulate a v1 subvolume -- in the default group subvol1_path = self._create_v1_subvolume(subvolume1, has_snapshot=False) @@ -7786,7 +8064,7 @@ class TestMisc(TestVolumesHelper): on legacy subvol upgrade to v1 poor man's upgrade test -- theme continues... """ - subvol1, subvol2 = self._generate_random_subvolume_name(2) + subvol1, subvol2 = self._gen_subvol_name(2) # emulate a old-fashioned subvolume in the default group createpath1 = os.path.join(".", "volumes", "_nogroup", subvol1) @@ -7822,7 +8100,7 @@ class TestMisc(TestVolumesHelper): self._fs_cmd("subvolume", "authorize", self.volname, subvol1, authid1) # Validate that the mds path added is of subvol1 and not of subvol2 - out = json.loads(self.fs.mon_manager.raw_cluster_cmd("auth", "get", "client.alice", "--format=json-pretty")) + out = json.loads(self.get_ceph_cmd_stdout("auth", "get", "client.alice", "--format=json-pretty")) self.assertEqual("client.alice", out[0]["entity"]) self.assertEqual("allow rw path={0}".format(createpath1[1:]), out[0]["caps"]["mds"]) @@ -7839,8 +8117,8 @@ class TestMisc(TestVolumesHelper): on legacy subvol upgrade to v1 poor man's upgrade test -- theme continues... """ - subvol = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvol = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # emulate a old-fashioned subvolume -- in a custom group createpath = os.path.join(".", "volumes", group, subvol) @@ -7882,8 +8160,8 @@ class TestMisc(TestVolumesHelper): on legacy subvol upgrade to v1 poor man's upgrade test -- theme continues... """ - subvol = self._generate_random_subvolume_name() - group = self._generate_random_group_name() + subvol = self._gen_subvol_name() + group = self._gen_subvol_grp_name() # emulate a old-fashioned subvolume -- in a custom group createpath = os.path.join(".", "volumes", group, subvol) @@ -7926,8 +8204,8 @@ class TestPerModuleFinsherThread(TestVolumesHelper): as four subvolume cmds are run """ def test_volumes_module_finisher_thread(self): - subvol1, subvol2, subvol3 = self._generate_random_subvolume_name(3) - group = self._generate_random_group_name() + subvol1, subvol2, subvol3 = self._gen_subvol_name(3) + group = self._gen_subvol_grp_name() # create group self._fs_cmd("subvolumegroup", "create", self.volname, group) diff --git a/qa/tasks/cephfs/xfstests_dev.py b/qa/tasks/cephfs/xfstests_dev.py index cbb344305..7d5233f8f 100644 --- a/qa/tasks/cephfs/xfstests_dev.py +++ b/qa/tasks/cephfs/xfstests_dev.py @@ -143,8 +143,8 @@ class XFSTestsDev(CephFSTestCase): import configparser cp = configparser.ConfigParser() - cp.read_string(self.fs.mon_manager.raw_cluster_cmd( - 'auth', 'get-or-create', 'client.admin')) + cp.read_string(self.get_ceph_cmd_stdout('auth', 'get-or-create', + 'client.admin')) return cp['client.admin']['key'] diff --git a/qa/tasks/mgr/dashboard/test_health.py b/qa/tasks/mgr/dashboard/test_health.py index b6ffade4c..0b7b7a3b4 100644 --- a/qa/tasks/mgr/dashboard/test_health.py +++ b/qa/tasks/mgr/dashboard/test_health.py @@ -29,6 +29,7 @@ class HealthTest(DashboardTestCase): 'in': JList(int), 'last_failure': int, 'max_file_size': int, + 'max_xattr_size': int, 'explicitly_allowed_features': int, 'damaged': JList(int), 'tableserver': int, @@ -57,7 +58,9 @@ class HealthTest(DashboardTestCase): 'allow_snaps': bool, 'allow_multimds_snaps': bool, 'allow_standby_replay': bool, - 'refuse_client_session': bool + 'refuse_client_session': bool, + 'refuse_standby_for_another_fs': bool, + 'balance_automate': bool, }), 'ever_allowed_features': int, 'root': int diff --git a/qa/tasks/mgr/mgr_test_case.py b/qa/tasks/mgr/mgr_test_case.py index 94a230c8d..aa5bc6e56 100644 --- a/qa/tasks/mgr/mgr_test_case.py +++ b/qa/tasks/mgr/mgr_test_case.py @@ -29,8 +29,11 @@ class MgrCluster(CephCluster): def mgr_stop(self, mgr_id): self.mgr_daemons[mgr_id].stop() - def mgr_fail(self, mgr_id): - self.mon_manager.raw_cluster_cmd("mgr", "fail", mgr_id) + def mgr_fail(self, mgr_id=None): + if mgr_id is None: + self.mon_manager.raw_cluster_cmd("mgr", "fail") + else: + self.mon_manager.raw_cluster_cmd("mgr", "fail", mgr_id) def mgr_restart(self, mgr_id): self.mgr_daemons[mgr_id].restart() @@ -77,6 +80,8 @@ class MgrTestCase(CephTestCase): for daemon in cls.mgr_cluster.mgr_daemons.values(): daemon.stop() + cls.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "set", "down", "false") + for mgr_id in cls.mgr_cluster.mgr_ids: cls.mgr_cluster.mgr_fail(mgr_id) @@ -112,7 +117,11 @@ class MgrTestCase(CephTestCase): raise SkipTest( "Only have {0} manager daemons, {1} are required".format( len(cls.mgr_cluster.mgr_ids), cls.MGRS_REQUIRED)) - + + # We expect laggy OSDs in this testing environment so turn off this warning. + # See https://tracker.ceph.com/issues/61907 + cls.mgr_cluster.mon_manager.raw_cluster_cmd('config', 'set', 'mds', + 'defer_client_eviction_on_laggy_osds', 'false') cls.setup_mgrs() @classmethod diff --git a/qa/tasks/mgr/test_cli.py b/qa/tasks/mgr/test_cli.py new file mode 100644 index 000000000..a43be90ea --- /dev/null +++ b/qa/tasks/mgr/test_cli.py @@ -0,0 +1,32 @@ +import logging + +from .mgr_test_case import MgrTestCase + +log = logging.getLogger(__name__) + + +class TestCLI(MgrTestCase): + MGRS_REQUIRED = 2 + + def setUp(self): + super(TestCLI, self).setUp() + self.setup_mgrs() + + def test_set_down(self): + """ + That setting the down flag prevents a standby from promoting. + """ + + with self.assert_cluster_log("Activating manager daemon", present=False): + self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'true') + self.wait_until_true(lambda: self.mgr_cluster.get_active_id() == "", timeout=60) + + def test_set_down_off(self): + """ + That removing the down flag allows a standby to promote. + """ + + with self.assert_cluster_log("Activating manager daemon"): + self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'true') + self.wait_until_true(lambda: self.mgr_cluster.get_active_id() == "", timeout=60) + self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'false') diff --git a/qa/tasks/mgr/test_devicehealth.py b/qa/tasks/mgr/test_devicehealth.py new file mode 100644 index 000000000..d3aa33fc0 --- /dev/null +++ b/qa/tasks/mgr/test_devicehealth.py @@ -0,0 +1,33 @@ +from io import StringIO +import logging + +from .mgr_test_case import MgrTestCase + +log = logging.getLogger(__name__) + + +class TestDeviceHealth(MgrTestCase): + MGRS_REQUIRED = 1 + + def setUp(self): + super(TestDeviceHealth, self).setUp() + self.setup_mgrs() + + def tearDown(self): + self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'true') + self.mgr_cluster.mon_manager.raw_cluster_cmd('config', 'set', 'mon', 'mon_allow_pool_delete', 'true') + self.mgr_cluster.mon_manager.raw_cluster_cmd('osd', 'pool', 'rm', '.mgr', '.mgr', '--yes-i-really-really-mean-it-not-faking') + self.mgr_cluster.mon_manager.raw_cluster_cmd('mgr', 'set', 'down', 'false') + + def test_legacy_upgrade_snap(self): + """ + """ + + o = "ABC_DEADB33F_FA" + self.mon_manager.do_rados(["put", o, "-"], pool=".mgr", stdin=StringIO("junk")) + self.mon_manager.do_rados(["mksnap", "foo"], pool=".mgr") + self.mon_manager.do_rados(["rm", o], pool=".mgr") + self.mgr_cluster.mgr_fail() + + with self.assert_cluster_log("Unhandled exception from module 'devicehealth' while running", present=False): + self.wait_until_true(lambda: self.mgr_cluster.get_active_id() is not None, timeout=60) diff --git a/qa/tasks/radosgw_admin.py b/qa/tasks/radosgw_admin.py index 780dae1e1..28d58715f 100644 --- a/qa/tasks/radosgw_admin.py +++ b/qa/tasks/radosgw_admin.py @@ -7,8 +7,9 @@ Rgw admin testing against a running instance # grep '^ *# TESTCASE' | sed 's/^ *# TESTCASE //' # # to run this standalone: -# python qa/tasks/radosgw_admin.py [--user=uid] --host=host --port=port -# +# 1. uncomment vstart_runner lines to run locally against a vstart cluster +# 2. run: +# $ python qa/tasks/radosgw_admin.py [--user=uid] --host=host --port=port import json import logging @@ -27,7 +28,7 @@ import httplib2 #import pdb -import tasks.vstart_runner +#import tasks.vstart_runner from tasks.rgw import RGWEndpoint from tasks.util.rgw import rgwadmin as tasks_util_rgw_rgwadmin from tasks.util.rgw import get_user_summary, get_user_successful_ops @@ -1107,7 +1108,7 @@ def task(ctx, config): (err, out) = rgwadmin(ctx, client, ['zonegroup', 'get'], check_status=True) from teuthology.config import config -from teuthology.orchestra import cluster +from teuthology.orchestra import cluster, remote import argparse; @@ -1124,7 +1125,9 @@ def main(): else: port = 80 - client0 = tasks.vstart_runner.LocalRemote() + client0 = remote.Remote(host) + #client0 = tasks.vstart_runner.LocalRemote() + ctx = config ctx.cluster=cluster.Cluster(remotes=[(client0, [ 'ceph.client.rgw.%s' % (port), ]),]) diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py index df4886fb6..3d429c265 100644 --- a/qa/tasks/vstart_runner.py +++ b/qa/tasks/vstart_runner.py @@ -777,9 +777,11 @@ class LocalCephManager(CephManager): self.rook = False self.testdir = None self.run_ceph_w_prefix = self.run_cluster_cmd_prefix = [CEPH_CMD] - self.CEPH_CMD = [CEPH_CMD] self.RADOS_CMD = [RADOS_CMD] + def get_ceph_cmd(self, **kwargs): + return [CEPH_CMD] + def find_remote(self, daemon_type, daemon_id): """ daemon_type like 'mds', 'osd' diff --git a/qa/workunits/cephadm/test_cephadm_timeout.py b/qa/workunits/cephadm/test_cephadm_timeout.py new file mode 100755 index 000000000..67b43a2df --- /dev/null +++ b/qa/workunits/cephadm/test_cephadm_timeout.py @@ -0,0 +1,179 @@ +#!/usr/bin/python3 -s + +import time +import os +import fcntl +import subprocess +import uuid +import sys + +from typing import Optional, Any + +LOCK_DIR = '/run/cephadm' +DATA_DIR = '/var/lib/ceph' + +class _Acquire_ReturnProxy(object): + def __init__(self, lock: 'FileLock') -> None: + self.lock = lock + return None + + def __enter__(self) -> 'FileLock': + return self.lock + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.lock.release() + return None + +class FileLock(object): + def __init__(self, name: str, timeout: int = -1) -> None: + if not os.path.exists(LOCK_DIR): + os.mkdir(LOCK_DIR, 0o700) + self._lock_file = os.path.join(LOCK_DIR, name + '.lock') + + self._lock_file_fd: Optional[int] = None + self.timeout = timeout + self._lock_counter = 0 + return None + + @property + def is_locked(self) -> bool: + return self._lock_file_fd is not None + + def acquire(self, timeout: Optional[int] = None, poll_intervall: float = 0.05) -> _Acquire_ReturnProxy: + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self.timeout + + # Increment the number right at the beginning. + # We can still undo it, if something fails. + self._lock_counter += 1 + + start_time = time.time() + try: + while True: + if not self.is_locked: + self._acquire() + + if self.is_locked: + break + elif timeout >= 0 and time.time() - start_time > timeout: + raise Exception(self._lock_file) + else: + time.sleep(poll_intervall) + except Exception: + # Something did go wrong, so decrement the counter. + self._lock_counter = max(0, self._lock_counter - 1) + + raise + return _Acquire_ReturnProxy(lock=self) + + def release(self, force: bool = False) -> None: + if self.is_locked: + self._lock_counter -= 1 + + if self._lock_counter == 0 or force: + self._release() + self._lock_counter = 0 + + return None + + def __enter__(self) -> 'FileLock': + self.acquire() + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.release() + return None + + def __del__(self) -> None: + self.release(force=True) + return None + + def _acquire(self) -> None: + open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC + fd = os.open(self._lock_file, open_mode) + + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except (IOError, OSError): + os.close(fd) + else: + self._lock_file_fd = fd + return None + + def _release(self) -> None: + fd = self._lock_file_fd + self._lock_file_fd = None + fcntl.flock(fd, fcntl.LOCK_UN) # type: ignore + os.close(fd) # type: ignore + return None + +def _is_fsid(s): + try: + uuid.UUID(s) + except ValueError: + return False + return True + +def find_fsid(): + if not os.path.exists(DATA_DIR): + raise Exception(f'{DATA_DIR} does not exist. Aborting...') + + for d in os.listdir(DATA_DIR): + # assume the first thing we find that is an fsid + # is what we want. Not expecting multiple clusters + # to have been installed here. + if _is_fsid(d): + return d + raise Exception(f'No fsid dir found in {DATA_DIR} does not exist. Aborting...') + +def main(): + print('Looking for cluster fsid...') + fsid = find_fsid() + print(f'Found fsid {fsid}') + + print('Setting cephadm command timeout to 120...') + subprocess.run(['cephadm', 'shell', '--', 'ceph', 'config', 'set', + 'mgr', 'mgr/cephadm/default_cephadm_command_timeout', '120'], + check=True) + + print('Taking hold of cephadm lock for 300 seconds...') + lock = FileLock(fsid, 300) + lock.acquire() + + print('Triggering cephadm device refresh...') + subprocess.run(['cephadm', 'shell', '--', 'ceph', 'orch', 'device', 'ls', '--refresh'], + check=True) + + print('Sleeping 150 seconds to allow for timeout to occur...') + time.sleep(150) + + print('Checking ceph health detail...') + # directing stdout to res.stdout via "capture_stdout" option + # (and same for stderr) seems to have been added in python 3.7. + # Using files so this works with 3.6 as well + with open('/tmp/ceph-health-detail-stdout', 'w') as f_stdout: + with open('/tmp/ceph-health-detail-stderr', 'w') as f_stderr: + subprocess.run(['cephadm', 'shell', '--', 'ceph', 'health', 'detail'], + check=True, stdout=f_stdout, stderr=f_stderr) + + res_stdout = open('/tmp/ceph-health-detail-stdout', 'r').read() + res_stderr = open('/tmp/ceph-health-detail-stderr', 'r').read() + print(f'"cephadm shell -- ceph health detail" stdout:\n{res_stdout}') + print(f'"cephadm shell -- ceph health detail" stderr:\n{res_stderr}') + + print('Checking for correct health warning in health detail...') + if 'CEPHADM_REFRESH_FAILED' not in res_stdout: + raise Exception('No health warning caused by timeout was raised') + if 'Command "cephadm ceph-volume -- inventory" timed out' not in res_stdout: + raise Exception('Health warnings did not contain message about time out') + + print('Health warnings found succesfully. Exiting.') + return 0 + + +if __name__ == '__main__': + if os.getuid() != 0: + print('Trying to run myself with sudo...') + os.execvp('sudo', [sys.executable] + list(sys.argv)) + main() diff --git a/qa/workunits/fs/full/subvolume_clone.sh b/qa/workunits/fs/full/subvolume_clone.sh index a11131215..d61e07111 100755 --- a/qa/workunits/fs/full/subvolume_clone.sh +++ b/qa/workunits/fs/full/subvolume_clone.sh @@ -7,8 +7,8 @@ set -ex # Hence the subsequent subvolume commands on the clone fails with # 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' traceback. -# The osd is of the size 1GB. The full-ratios are set so that osd is treated full -# at around 600MB. The subvolume is created and 100MB is written. +# The osd is of the size 2GiB. The full-ratios are set so that osd is treated full +# at around 1.2GB. The subvolume is created and 200MB is written. # The subvolume is snapshotted and cloned ten times. Since the clone delay is set to 15 seconds, # all the clones reach pending state for sure. Among ten clones, only few succeed and rest fails # with ENOSPACE. @@ -47,7 +47,7 @@ echo "After ratios are set" df -h ceph osd df -for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/1MB_file-$i status=progress bs=1M count=1 conv=fdatasync;done +for i in {1..100};do sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path_0/2MB_file-$i status=progress bs=1M count=2 conv=fdatasync;done # For debugging echo "After subvolumes are written" @@ -60,6 +60,9 @@ ceph fs subvolume snapshot create cephfs sub_0 snap_0 # Set clone snapshot delay ceph config set mgr mgr/volumes/snapshot_clone_delay 15 +# Disable the snapshot_clone_no_wait config option +ceph config set mgr mgr/volumes/snapshot_clone_no_wait false + # Schedule few clones, some would fail with no space for i in $(eval echo {1..$NUM_CLONES});do ceph fs subvolume snapshot clone cephfs sub_0 snap_0 clone_$i;done diff --git a/qa/workunits/fs/full/subvolume_rm.sh b/qa/workunits/fs/full/subvolume_rm.sh index a464e30f5..2a3bf956d 100755 --- a/qa/workunits/fs/full/subvolume_rm.sh +++ b/qa/workunits/fs/full/subvolume_rm.sh @@ -2,8 +2,8 @@ set -ex # This testcase tests the scenario of the 'ceph fs subvolume rm' mgr command -# when the osd is full. The command used to hang. The osd is of the size 1GB. -# The subvolume is created and 500MB file is written. The full-ratios are +# when the osd is full. The command used to hang. The osd is of the size 2GiB. +# The subvolume is created and 1GB file is written. The full-ratios are # set below 500MB such that the osd is treated as full. Now the subvolume is # is removed. This should be successful with the introduction of FULL # capabilities which the mgr holds. @@ -21,7 +21,7 @@ echo "Before write" df -h ceph osd df -sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/500MB_file-1 status=progress bs=1M count=500 +sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/1GB_file-1 status=progress bs=1M count=1000 ceph osd set-full-ratio 0.2 ceph osd set-nearfull-ratio 0.16 diff --git a/qa/workunits/fs/full/subvolume_snapshot_rm.sh b/qa/workunits/fs/full/subvolume_snapshot_rm.sh index f6d0add9f..8df89d3c7 100755 --- a/qa/workunits/fs/full/subvolume_snapshot_rm.sh +++ b/qa/workunits/fs/full/subvolume_snapshot_rm.sh @@ -7,8 +7,8 @@ set -ex # snapshot rm of the same snapshot fails with 'MetadataMgrException: -2 (section 'GLOBAL' does not exist)' # traceback. -# The osd is of the size 1GB. The subvolume is created and 800MB file is written. -# Then full-ratios are set below 500MB such that the osd is treated as full. +# The osd is of the size 2GiB. The subvolume is created and 1.6GB file is written. +# Then full-ratios are set below 1GiB such that the osd is treated as full. # The subvolume snapshot is taken which succeeds as no extra space is required # for snapshot. Now, the removal of the snapshot fails with ENOSPACE as it # fails to remove the snapshot metadata set. The snapshot removal fails @@ -31,8 +31,8 @@ echo "Before write" df $CEPH_MNT ceph osd df -# Write 800MB file and set full ratio to around 200MB -ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/800MB_file-1 status=progress bs=1M count=800 conv=fdatasync +# Write 1.6GB file and set full ratio to around 400MB +ignore_failure sudo dd if=/dev/urandom of=$CEPH_MNT$subvol_path/1.6GB_file-1 status=progress bs=1M count=1600 conv=fdatasync ceph osd set-full-ratio 0.2 ceph osd set-nearfull-ratio 0.16 diff --git a/qa/workunits/fs/quota/quota.sh b/qa/workunits/fs/quota/quota.sh index 1315be6d8..a2f5c459d 100755 --- a/qa/workunits/fs/quota/quota.sh +++ b/qa/workunits/fs/quota/quota.sh @@ -29,7 +29,7 @@ mkdir quota-test cd quota-test # bytes -setfattr . -n ceph.quota.max_bytes -v 100000000 # 100m +setfattr . -n ceph.quota.max_bytes -v 100M expect_false write_file big 1000 # 1g expect_false write_file second 10 setfattr . -n ceph.quota.max_bytes -v 0 @@ -57,7 +57,7 @@ rm -rf * # mix mkdir bytes bytes/files -setfattr bytes -n ceph.quota.max_bytes -v 10000000 #10m +setfattr bytes -n ceph.quota.max_bytes -v 10M setfattr bytes/files -n ceph.quota.max_files -v 5 dd if=/dev/zero of=bytes/files/1 bs=1M count=4 dd if=/dev/zero of=bytes/files/2 bs=1M count=4 @@ -78,7 +78,7 @@ rm -rf * #mv mkdir files limit truncate files/file -s 10G -setfattr limit -n ceph.quota.max_bytes -v 1000000 #1m +setfattr limit -n ceph.quota.max_bytes -v 1M expect_false mv files limit/ @@ -88,8 +88,8 @@ rm -rf * #limit by ancestor mkdir -p ancestor/p1/p2/parent/p3 -setfattr ancestor -n ceph.quota.max_bytes -v 1000000 -setfattr ancestor/p1/p2/parent -n ceph.quota.max_bytes -v 1000000000 #1g +setfattr ancestor -n ceph.quota.max_bytes -v 1M +setfattr ancestor/p1/p2/parent -n ceph.quota.max_bytes -v 1G expect_false write_file ancestor/p1/p2/parent/p3/file1 900 #900m stat --printf="%n %s\n" ancestor/p1/p2/parent/p3/file1 @@ -104,6 +104,14 @@ expect_false setfattr -n ceph.quota.max_bytes -v -1 . expect_false setfattr -n ceph.quota.max_bytes -v -9223372036854775808 . expect_false setfattr -n ceph.quota.max_bytes -v -9223372036854775809 . +setfattr -n ceph.quota.max_bytes -v 0 . +setfattr -n ceph.quota.max_bytes -v 1Ti . +setfattr -n ceph.quota.max_bytes -v 8388607Ti . +expect_false setfattr -n ceph.quota.max_bytes -v 8388608Ti . +expect_false setfattr -n ceph.quota.max_bytes -v -1Ti . +expect_false setfattr -n ceph.quota.max_bytes -v -8388609Ti . +expect_false setfattr -n ceph.quota.max_bytes -v -8388610Ti . + setfattr -n ceph.quota.max_files -v 0 . setfattr -n ceph.quota.max_files -v 1 . setfattr -n ceph.quota.max_files -v 9223372036854775807 . diff --git a/qa/workunits/kernel_untar_build.sh b/qa/workunits/kernel_untar_build.sh index 9b60f065c..602ce04a7 100755 --- a/qa/workunits/kernel_untar_build.sh +++ b/qa/workunits/kernel_untar_build.sh @@ -2,11 +2,11 @@ set -e -wget -O linux.tar.gz http://download.ceph.com/qa/linux-5.4.tar.gz +wget -O linux.tar.xz http://download.ceph.com/qa/linux-6.5.11.tar.xz mkdir t cd t -tar xzf ../linux.tar.gz +tar xJf ../linux.tar.xz cd linux* make defconfig make -j`grep -c processor /proc/cpuinfo` diff --git a/qa/workunits/mon/config.sh b/qa/workunits/mon/config.sh index 1b00201ae..10cbe5630 100755 --- a/qa/workunits/mon/config.sh +++ b/qa/workunits/mon/config.sh @@ -98,11 +98,11 @@ ceph tell osd.0 config unset debug_asok ceph tell osd.0 config unset debug_asok ceph config rm osd.0 debug_asok -while ceph config show osd.0 | grep debug_asok | grep mon +while ceph config show osd.0 | grep '^debug_asok[:[space]:]' | grep mon do sleep 1 done -ceph config show osd.0 | grep -c debug_asok | grep 0 +ceph config show osd.0 | grep -c '^debug_asok[:[space]:]' | grep 0 ceph config set osd.0 osd_scrub_cost 123 while ! ceph config show osd.0 | grep osd_scrub_cost | grep mon @@ -111,6 +111,13 @@ do done ceph config rm osd.0 osd_scrub_cost +#RGW daemons test config set +ceph config set client.rgw debug_rgw 22 +while ! ceph config show client.rgw | grep debug_rgw | grep 22 | grep mon +do + sleep 1 +done + # show-with-defaults ceph config show-with-defaults osd.0 | grep debug_asok @@ -130,6 +137,21 @@ rm -f $t1 $t2 expect_false ceph config reset expect_false ceph config reset -1 + + +# test parallel config set +# reproducer for https://tracker.ceph.com/issues/62832 +ceph config reset 0 +for ((try = 0; try < 10; try++)); do + set +x + for ((i = 0; i < 100; i++)); do + # Use a config that will get "handled" by the Objecter instantiated by the ceph binary + ceph config set client rados_mon_op_timeout $((i+300)) & + done 2> /dev/null + set -x + wait +done + # we are at end of testing, so it's okay to revert everything ceph config reset 0 diff --git a/qa/workunits/mon/rbd_snaps_ops.sh b/qa/workunits/mon/rbd_snaps_ops.sh index eb88565ea..0e5b16b7b 100755 --- a/qa/workunits/mon/rbd_snaps_ops.sh +++ b/qa/workunits/mon/rbd_snaps_ops.sh @@ -36,6 +36,7 @@ expect 'rbd --pool=test snap ls image' 0 expect 'rbd --pool=test snap rm image@snapshot' 0 expect 'ceph osd pool mksnap test snapshot' 22 +expect 'rados -p test mksnap snapshot' 1 expect 'ceph osd pool delete test test --yes-i-really-really-mean-it' 0 @@ -52,6 +53,8 @@ expect 'rbd --pool test-foo snap create image@snapshot' 0 ceph osd pool delete test-bar test-bar --yes-i-really-really-mean-it || true expect 'ceph osd pool create test-bar 8' 0 expect 'ceph osd pool application enable test-bar rbd' +# "rados cppool" without --yes-i-really-mean-it should fail +expect 'rados cppool test-foo test-bar' 1 expect 'rados cppool test-foo test-bar --yes-i-really-mean-it' 0 expect 'rbd --pool test-bar snap rm image@snapshot' 95 expect 'ceph osd pool delete test-foo test-foo --yes-i-really-really-mean-it' 0 diff --git a/qa/workunits/rbd/cli_generic.sh b/qa/workunits/rbd/cli_generic.sh index 57279d26d..15c47074d 100755 --- a/qa/workunits/rbd/cli_generic.sh +++ b/qa/workunits/rbd/cli_generic.sh @@ -432,6 +432,7 @@ test_trash() { rbd trash mv test2 ID=`rbd trash ls | cut -d ' ' -f 1` rbd info --image-id $ID | grep "rbd image 'test2'" + rbd children --image-id $ID | wc -l | grep 0 rbd trash restore $ID rbd ls | grep test2 @@ -449,6 +450,7 @@ test_trash() { rbd create $RBD_CREATE_ARGS -s 1 test1 rbd snap create test1@snap1 rbd snap protect test1@snap1 + rbd clone test1@snap1 clone rbd trash mv test1 rbd trash ls | grep test1 @@ -459,7 +461,10 @@ test_trash() { ID=`rbd trash ls | cut -d ' ' -f 1` rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 1 rbd snap ls --image-id $ID | grep '.*snap1.*' + rbd children --image-id $ID | wc -l | grep 1 + rbd children --image-id $ID | grep 'clone' + rbd rm clone rbd snap unprotect --image-id $ID --snap snap1 rbd snap rm --image-id $ID --snap snap1 rbd snap ls --image-id $ID | grep -v 'SNAPID' | wc -l | grep 0 @@ -1261,7 +1266,6 @@ test_trash_purge_schedule_recovery() { jq 'select(.name == "rbd_support")' | jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') ceph osd blocklist add $CLIENT_ADDR - ceph osd blocklist ls | grep $CLIENT_ADDR # Check that you can add a trash purge schedule after a few retries expect_fail rbd trash purge schedule add -p rbd3 10m @@ -1420,7 +1424,6 @@ test_mirror_snapshot_schedule_recovery() { jq 'select(.name == "rbd_support")' | jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') ceph osd blocklist add $CLIENT_ADDR - ceph osd blocklist ls | grep $CLIENT_ADDR # Check that you can add a mirror snapshot schedule after a few retries expect_fail rbd mirror snapshot schedule add -p rbd3/ns1 --image test1 2m @@ -1529,7 +1532,6 @@ test_perf_image_iostat_recovery() { jq 'select(.name == "rbd_support")' | jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') ceph osd blocklist add $CLIENT_ADDR - ceph osd blocklist ls | grep $CLIENT_ADDR expect_fail rbd perf image iostat --format json rbd3/ns sleep 10 @@ -1661,7 +1663,6 @@ test_tasks_recovery() { jq 'select(.name == "rbd_support")' | jq -r '[.addrvec[0].addr, "/", .addrvec[0].nonce|tostring] | add') ceph osd blocklist add $CLIENT_ADDR - ceph osd blocklist ls | grep $CLIENT_ADDR expect_fail ceph rbd task add flatten rbd2/clone1 sleep 10 diff --git a/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh b/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh new file mode 100755 index 000000000..78a390230 --- /dev/null +++ b/qa/workunits/rbd/compare_mirror_image_alternate_primary.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash + +set -ex + +IMAGE=image-alternate-primary +MIRROR_IMAGE_MODE=snapshot +MIRROR_POOL_MODE=image +MOUNT=test-alternate-primary +RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff' +RBD_MIRROR_INSTANCES=1 +RBD_MIRROR_MODE=snapshot +RBD_MIRROR_USE_EXISTING_CLUSTER=1 + +. $(dirname $0)/rbd_mirror_helpers.sh + +take_mirror_snapshots() { + local cluster=$1 + local pool=$2 + local image=$3 + + for i in {1..30}; do + mirror_image_snapshot $cluster $pool $image + sleep 3 + done +} + +slow_untar_workload() { + local mountpt=$1 + + cp linux-5.4.tar.gz $mountpt + # run workload that updates the data and metadata of multiple files on disk. + # rate limit the workload such that the mirror snapshots can be taken as the + # contents of the image are progressively changed by the workload. + local ret=0 + timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \ + | pv -L 256K | tar xf - -C $mountpt" || ret=$? + if ((ret != 124)); then + echo "Workload completed prematurely" + return 1 + fi +} + +setup + +start_mirrors ${CLUSTER1} +start_mirrors ${CLUSTER2} + +# initial setup +create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMAGE} \ + ${RBD_MIRROR_MODE} 10G + +if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \ + -o try-netlink ${POOL}/${IMAGE}) +elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \ + ${POOL}/${IMAGE}) +else + echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}" + exit 1 +fi +sudo mkfs.ext4 ${DEV} +mkdir ${MOUNT} + +wget https://download.ceph.com/qa/linux-5.4.tar.gz + +for i in {1..25}; do + # create mirror snapshots every few seconds under I/O + sudo mount ${DEV} ${MOUNT} + sudo chown $(whoami) ${MOUNT} + rm -rf ${MOUNT}/* + take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMAGE} & + SNAP_PID=$! + slow_untar_workload ${MOUNT} + wait $SNAP_PID + sudo umount ${MOUNT} + + # calculate hash before demotion of primary image + DEMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}') + sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} ${DEV} + + demote_image ${CLUSTER1} ${POOL} ${IMAGE} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMAGE} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${IMAGE} 'up+unknown' + promote_image ${CLUSTER2} ${POOL} ${IMAGE} + + # calculate hash after promotion of secondary image + if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \ + -o try-netlink ${POOL}/${IMAGE}) + elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${IMAGE}) + fi + PROMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}') + + if [[ "${DEMOTE_MD5}" != "${PROMOTE_MD5}" ]]; then + echo "Mismatch at iteration ${i}: ${DEMOTE_MD5} != ${PROMOTE_MD5}" + exit 1 + fi + + TEMP=${CLUSTER1} + CLUSTER1=${CLUSTER2} + CLUSTER2=${TEMP} +done + +echo OK diff --git a/qa/workunits/rbd/compare_mirror_images.sh b/qa/workunits/rbd/compare_mirror_images.sh new file mode 100755 index 000000000..cbaa77a71 --- /dev/null +++ b/qa/workunits/rbd/compare_mirror_images.sh @@ -0,0 +1,170 @@ +#!/usr/bin/env bash + +set -ex + +IMG_PREFIX=image-primary +MIRROR_IMAGE_MODE=snapshot +MIRROR_POOL_MODE=image +MNTPT_PREFIX=test-primary +RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff' +RBD_MIRROR_INSTANCES=1 +RBD_MIRROR_MODE=snapshot +RBD_MIRROR_USE_EXISTING_CLUSTER=1 + +. $(dirname $0)/rbd_mirror_helpers.sh + +take_mirror_snapshots() { + local cluster=$1 + local pool=$2 + local image=$3 + + for i in {1..30}; do + mirror_image_snapshot $cluster $pool $image + sleep 3 + done +} + +slow_untar_workload() { + local mountpt=$1 + + cp linux-5.4.tar.gz $mountpt + # run workload that updates the data and metadata of multiple files on disk. + # rate limit the workload such that the mirror snapshots can be taken as the + # contents of the image are progressively changed by the workload. + local ret=0 + timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \ + | pv -L 256K | tar xf - -C $mountpt" || ret=$? + if ((ret != 124)); then + echo "Workload completed prematurely" + return 1 + fi +} + +wait_for_image_removal() { + local cluster=$1 + local pool=$2 + local image=$3 + + for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do + if ! rbd --cluster $cluster ls $pool | grep -wq $image; then + return 0 + fi + sleep $s + done + + echo "image ${pool}/${image} not removed from cluster ${cluster}" + return 1 +} + +compare_demoted_promoted_image() { + local dev=${DEVS[$1-1]} + local img=${IMG_PREFIX}$1 + local mntpt=${MNTPT_PREFIX}$1 + local demote_md5 promote_md5 + + sudo umount ${mntpt} + + # calculate hash before demotion of primary image + demote_md5=$(sudo md5sum ${dev} | awk '{print $1}') + sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} \ + ${POOL}/${img} + + demote_image ${CLUSTER1} ${POOL} ${img} + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${img} 'up+unknown' + wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${img} 'up+unknown' + promote_image ${CLUSTER2} ${POOL} ${img} + + # calculate hash after promotion of secondary image + if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + dev=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \ + -o try-netlink ${POOL}/${img}) + elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + dev=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${img}) + fi + promote_md5=$(sudo md5sum ${dev} | awk '{print $1}') + sudo rbd --cluster ${CLUSTER2} device unmap -t ${RBD_DEVICE_TYPE} ${dev} + + if [[ "${demote_md5}" != "${promote_md5}" ]]; then + echo "Mismatch for image ${POOL}/${img}: ${demote_md5} != ${promote_md5}" + return 1 + fi +} + +setup + +start_mirrors ${CLUSTER1} +start_mirrors ${CLUSTER2} + +wget https://download.ceph.com/qa/linux-5.4.tar.gz + +for i in {1..10}; do + DEVS=() + SNAP_PIDS=() + COMPARE_PIDS=() + WORKLOAD_PIDS=() + RET=0 + for j in {1..10}; do + IMG=${IMG_PREFIX}${j} + MNTPT=${MNTPT_PREFIX}${j} + create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMG} \ + ${RBD_MIRROR_MODE} 10G + if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \ + -o try-netlink ${POOL}/${IMG}) + elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then + DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \ + ${POOL}/${IMG}) + else + echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}" + exit 1 + fi + DEVS+=($DEV) + sudo mkfs.ext4 ${DEV} + mkdir ${MNTPT} + sudo mount ${DEV} ${MNTPT} + sudo chown $(whoami) ${MNTPT} + # create mirror snapshots under I/O every few seconds + take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMG} & + SNAP_PIDS+=($!) + slow_untar_workload ${MNTPT} & + WORKLOAD_PIDS+=($!) + done + for pid in ${SNAP_PIDS[@]}; do + wait $pid || RET=$? + done + if ((RET != 0)); then + echo "take_mirror_snapshots failed" + exit 1 + fi + for pid in ${WORKLOAD_PIDS[@]}; do + wait $pid || RET=$? + done + if ((RET != 0)); then + echo "slow_untar_workload failed" + exit 1 + fi + + for j in {1..10}; do + compare_demoted_promoted_image $j & + COMPARE_PIDS+=($!) + done + for pid in ${COMPARE_PIDS[@]}; do + wait $pid || RET=$? + done + if ((RET != 0)); then + echo "compare_demoted_promoted_image failed" + exit 1 + fi + + for j in {1..10}; do + IMG=${IMG_PREFIX}${j} + # Allow for removal of non-primary image by checking that mirroring + # image status is "up+replaying" + wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMG} 'up+replaying' + remove_image ${CLUSTER2} ${POOL} ${IMG} + wait_for_image_removal ${CLUSTER1} ${POOL} ${IMG} + rm -rf ${MNTPT_PREFIX}${j} + done +done + +echo OK diff --git a/qa/workunits/rbd/rbd-nbd.sh b/qa/workunits/rbd/rbd-nbd.sh index bc89e9be5..8e1b05b3f 100755 --- a/qa/workunits/rbd/rbd-nbd.sh +++ b/qa/workunits/rbd/rbd-nbd.sh @@ -202,8 +202,11 @@ provisioned=`rbd -p ${POOL} --format xml du ${IMAGE} | used=`rbd -p ${POOL} --format xml du ${IMAGE} | $XMLSTARLET sel -t -m "//stats/images/image/used_size" -v .` [ "${used}" -lt "${provisioned}" ] +unmap_device ${DEV} ${PID} # resize test +DEV=`_sudo rbd device -t nbd -o try-netlink map ${POOL}/${IMAGE}` +get_pid ${POOL} devname=$(basename ${DEV}) blocks=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions) test -n "${blocks}" @@ -216,9 +219,9 @@ rbd resize ${POOL}/${IMAGE} --allow-shrink --size ${SIZE}M blocks2=$(awk -v dev=${devname} '$4 == dev {print $3}' /proc/partitions) test -n "${blocks2}" test ${blocks2} -eq ${blocks} +unmap_device ${DEV} ${PID} # read-only option test -unmap_device ${DEV} ${PID} DEV=`_sudo rbd --device-type nbd map --read-only ${POOL}/${IMAGE}` PID=$(rbd device --device-type nbd list | awk -v pool=${POOL} -v img=${IMAGE} -v dev=${DEV} \ '$2 == pool && $3 == img && $5 == dev {print $1}') diff --git a/qa/workunits/rbd/rbd_mirror_bootstrap.sh b/qa/workunits/rbd/rbd_mirror_bootstrap.sh index 6ef06f2b8..f4c1070bc 100755 --- a/qa/workunits/rbd/rbd_mirror_bootstrap.sh +++ b/qa/workunits/rbd/rbd_mirror_bootstrap.sh @@ -1,8 +1,10 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_bootstrap.sh - test peer bootstrap create/import # +set -ex + RBD_MIRROR_MANUAL_PEERS=1 RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-1} . $(dirname $0)/rbd_mirror_helpers.sh diff --git a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh index 0ba3c97d7..79c36546d 100755 --- a/qa/workunits/rbd/rbd_mirror_fsx_compare.sh +++ b/qa/workunits/rbd/rbd_mirror_fsx_compare.sh @@ -1,10 +1,12 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_fsx_compare.sh - test rbd-mirror daemon under FSX workload # # The script is used to compare FSX-generated images between two clusters. # +set -ex + . $(dirname $0)/rbd_mirror_helpers.sh trap 'cleanup $?' INT TERM EXIT diff --git a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh index d988987ba..6daadbbb4 100755 --- a/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh +++ b/qa/workunits/rbd/rbd_mirror_fsx_prepare.sh @@ -1,10 +1,12 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_fsx_prepare.sh - test rbd-mirror daemon under FSX workload # # The script is used to compare FSX-generated images between two clusters. # +set -ex + . $(dirname $0)/rbd_mirror_helpers.sh setup diff --git a/qa/workunits/rbd/rbd_mirror_ha.sh b/qa/workunits/rbd/rbd_mirror_ha.sh index 37739a83d..1e43712a6 100755 --- a/qa/workunits/rbd/rbd_mirror_ha.sh +++ b/qa/workunits/rbd/rbd_mirror_ha.sh @@ -1,8 +1,10 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_ha.sh - test rbd-mirror daemons in HA mode # +set -ex + RBD_MIRROR_INSTANCES=${RBD_MIRROR_INSTANCES:-7} . $(dirname $0)/rbd_mirror_helpers.sh diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh index f4961b925..b6abff96d 100755 --- a/qa/workunits/rbd/rbd_mirror_helpers.sh +++ b/qa/workunits/rbd/rbd_mirror_helpers.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/usr/bin/env bash # # rbd_mirror_helpers.sh - shared rbd-mirror daemon helper functions # @@ -814,23 +814,23 @@ test_status_in_pool_dir() local description_pattern="$5" local service_pattern="$6" - local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.mirror_status) - CEPH_ARGS='' rbd --cluster ${cluster} mirror image status ${pool}/${image} | - tee ${status_log} >&2 - grep "^ state: .*${state_pattern}" ${status_log} || return 1 - grep "^ description: .*${description_pattern}" ${status_log} || return 1 + local status + status=$(CEPH_ARGS='' rbd --cluster ${cluster} mirror image status \ + ${pool}/${image}) + grep "^ state: .*${state_pattern}" <<< "$status" || return 1 + grep "^ description: .*${description_pattern}" <<< "$status" || return 1 if [ -n "${service_pattern}" ]; then - grep "service: *${service_pattern}" ${status_log} || return 1 + grep "service: *${service_pattern}" <<< "$status" || return 1 elif echo ${state_pattern} | grep '^up+'; then - grep "service: *${MIRROR_USER_ID_PREFIX}.* on " ${status_log} || return 1 + grep "service: *${MIRROR_USER_ID_PREFIX}.* on " <<< "$status" || return 1 else - grep "service: " ${status_log} && return 1 + grep "service: " <<< "$status" && return 1 fi # recheck using `mirror pool status` command to stress test it. - - local last_update="$(sed -nEe 's/^ last_update: *(.*) *$/\1/p' ${status_log})" + local last_update + last_update="$(sed -nEe 's/^ last_update: *(.*) *$/\1/p' <<< "$status")" test_mirror_pool_status_verbose \ ${cluster} ${pool} ${image} "${state_pattern}" "${last_update}" && return 0 @@ -847,16 +847,15 @@ test_mirror_pool_status_verbose() local state_pattern="$4" local prev_last_update="$5" - local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}.mirror_status) - - rbd --cluster ${cluster} mirror pool status ${pool} --verbose --format xml \ - > ${status_log} + local status + status=$(CEPH_ARGS='' rbd --cluster ${cluster} mirror pool status ${pool} \ + --verbose --format xml) local last_update state last_update=$($XMLSTARLET sel -t -v \ - "//images/image[name='${image}']/last_update" < ${status_log}) + "//images/image[name='${image}']/last_update" <<< "$status") state=$($XMLSTARLET sel -t -v \ - "//images/image[name='${image}']/state" < ${status_log}) + "//images/image[name='${image}']/state" <<< "$status") echo "${state}" | grep "${state_pattern}" || test "${last_update}" '>' "${prev_last_update}" diff --git a/qa/workunits/rbd/rbd_mirror_journal.sh b/qa/workunits/rbd/rbd_mirror_journal.sh index 54f6aeec8..20a3b87db 100755 --- a/qa/workunits/rbd/rbd_mirror_journal.sh +++ b/qa/workunits/rbd/rbd_mirror_journal.sh @@ -1,4 +1,4 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_journal.sh - test rbd-mirror daemon in journal-based mirroring mode # @@ -7,6 +7,8 @@ # socket, temporary files, and launches rbd-mirror daemon. # +set -ex + . $(dirname $0)/rbd_mirror_helpers.sh setup diff --git a/qa/workunits/rbd/rbd_mirror_snapshot.sh b/qa/workunits/rbd/rbd_mirror_snapshot.sh index c70d48b09..17164c4d5 100755 --- a/qa/workunits/rbd/rbd_mirror_snapshot.sh +++ b/qa/workunits/rbd/rbd_mirror_snapshot.sh @@ -1,4 +1,4 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_snapshot.sh - test rbd-mirror daemon in snapshot-based mirroring mode # @@ -7,6 +7,8 @@ # socket, temporary files, and launches rbd-mirror daemon. # +set -ex + MIRROR_POOL_MODE=image MIRROR_IMAGE_MODE=snapshot diff --git a/qa/workunits/rbd/rbd_mirror_stress.sh b/qa/workunits/rbd/rbd_mirror_stress.sh index cb79aba7e..ea39d3aae 100755 --- a/qa/workunits/rbd/rbd_mirror_stress.sh +++ b/qa/workunits/rbd/rbd_mirror_stress.sh @@ -1,4 +1,4 @@ -#!/bin/sh -ex +#!/usr/bin/env bash # # rbd_mirror_stress.sh - stress test rbd-mirror daemon # @@ -8,6 +8,8 @@ # tool during the many image test # +set -ex + IMAGE_COUNT=50 export LOCKDEP=0 |