summaryrefslogtreecommitdiffstats
path: root/test/features/bootstrap_sbd_delay.feature
diff options
context:
space:
mode:
Diffstat (limited to 'test/features/bootstrap_sbd_delay.feature')
-rw-r--r--test/features/bootstrap_sbd_delay.feature286
1 files changed, 286 insertions, 0 deletions
diff --git a/test/features/bootstrap_sbd_delay.feature b/test/features/bootstrap_sbd_delay.feature
new file mode 100644
index 0000000..8b636d1
--- /dev/null
+++ b/test/features/bootstrap_sbd_delay.feature
@@ -0,0 +1,286 @@
+@sbd
+Feature: configure sbd delay start correctly
+
+ Tag @clean means need to stop cluster service if the service is available
+
+ @clean
+ Scenario: disk-based SBD with small sbd_watchdog_timeout
+ Given Run "test -f /etc/crm/profiles.yml" OK
+ Given Yaml "default:corosync.totem.token" value is "5000"
+ Given Yaml "default:sbd.watchdog_timeout" value is "15"
+
+ Given Has disk "/dev/sda1" on "hanode1"
+ Given Cluster service is "stopped" on "hanode1"
+ When Run "crm cluster init -s /dev/sda1 -y" on "hanode1"
+ Then Cluster service is "started" on "hanode1"
+ And Service "sbd" is "started" on "hanode1"
+ And Resource "stonith-sbd" type "external/sbd" is "Started"
+ And SBD option "SBD_DELAY_START" value is "no"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+ And SBD option "msgwait" value for "/dev/sda1" is "30"
+ # calculated and set by sbd RA
+ And Cluster property "stonith-timeout" is "43"
+ And Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+ Given Has disk "/dev/sda1" on "hanode2"
+ Given Cluster service is "stopped" on "hanode2"
+ When Run "crm cluster join -c hanode1 -y" on "hanode2"
+ Then Cluster service is "started" on "hanode2"
+ And Service "sbd" is "started" on "hanode2"
+ # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd
+ And SBD option "SBD_DELAY_START" value is "71"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+ And SBD option "msgwait" value for "/dev/sda1" is "30"
+ # value_from_sbd >= 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd
+ # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus
+ And Cluster property "stonith-timeout" is "83"
+ And Parameter "pcmk_delay_max" configured in "stonith-sbd"
+
+ Given Has disk "/dev/sda1" on "hanode3"
+ Given Cluster service is "stopped" on "hanode3"
+ When Run "crm cluster join -c hanode1 -y" on "hanode3"
+ Then Cluster service is "started" on "hanode3"
+ And Service "sbd" is "started" on "hanode3"
+ # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd
+ # runtime value is "41", we keep the larger one here
+ And SBD option "SBD_DELAY_START" value is "71"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+ And SBD option "msgwait" value for "/dev/sda1" is "30"
+ # value_from_sbd >= 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd
+ # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus
+ # runtime value is "71", we keep ther larger one here
+ And Cluster property "stonith-timeout" is "83"
+ And Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+ When Run "crm cluster remove hanode3 -y" on "hanode1"
+ Then Cluster service is "stopped" on "hanode3"
+ And Service "sbd" is "stopped" on "hanode3"
+ And SBD option "SBD_DELAY_START" value is "71"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+ And SBD option "msgwait" value for "/dev/sda1" is "30"
+ And Cluster property "stonith-timeout" is "83"
+ And Parameter "pcmk_delay_max" configured in "stonith-sbd"
+
+ @clean
+ Scenario: disk-less SBD with small sbd_watchdog_timeout
+ Given Run "test -f /etc/crm/profiles.yml" OK
+ Given Yaml "default:corosync.totem.token" value is "5000"
+ Given Yaml "default:sbd.watchdog_timeout" value is "15"
+
+ Given Cluster service is "stopped" on "hanode1"
+ When Run "crm cluster init -S -y" on "hanode1"
+ Then Cluster service is "started" on "hanode1"
+ And SBD option "SBD_DELAY_START" value is "no"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+ And Cluster property "stonith-timeout" is "60"
+
+ Given Cluster service is "stopped" on "hanode2"
+ When Run "crm cluster join -c hanode1 -y" on "hanode2"
+ Then Cluster service is "started" on "hanode2"
+ # SBD_DELAY_START >= (token + consensus + 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd
+ And SBD option "SBD_DELAY_START" value is "41"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+ # stonith-timeout >= 1.2 * max(stonith_watchdog_timeout, 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd
+ # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus
+ And Cluster property "stonith-timeout" is "71"
+
+ Given Cluster service is "stopped" on "hanode3"
+ When Run "crm cluster join -c hanode1 -y" on "hanode3"
+ Then Cluster service is "started" on "hanode3"
+ And SBD option "SBD_DELAY_START" value is "41"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+ And Cluster property "stonith-timeout" is "71"
+
+ When Run "crm cluster remove hanode3 -y" on "hanode1"
+ Then Cluster service is "stopped" on "hanode3"
+ And SBD option "SBD_DELAY_START" value is "41"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+ And Cluster property "stonith-timeout" is "71"
+
+ @clean
+ Scenario: disk-based SBD with big sbd_watchdog_timeout
+ When Run "sed -i 's/watchdog_timeout: 15/watchdog_timeout: 60/' /etc/crm/profiles.yml" on "hanode1"
+ Given Yaml "default:corosync.totem.token" value is "5000"
+ Given Yaml "default:sbd.watchdog_timeout" value is "60"
+
+ Given Has disk "/dev/sda1" on "hanode1"
+ Given Cluster service is "stopped" on "hanode1"
+ When Run "crm cluster init -s /dev/sda1 -y" on "hanode1"
+ Then Cluster service is "started" on "hanode1"
+ And Service "sbd" is "started" on "hanode1"
+ And Resource "stonith-sbd" type "external/sbd" is "Started"
+ And SBD option "SBD_DELAY_START" value is "no"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+ And SBD option "msgwait" value for "/dev/sda1" is "120"
+ # calculated and set by sbd RA
+ And Cluster property "stonith-timeout" is "172"
+ And Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+ Given Has disk "/dev/sda1" on "hanode2"
+ Given Cluster service is "stopped" on "hanode2"
+ When Run "crm cluster join -c hanode1 -y" on "hanode2"
+ Then Cluster service is "started" on "hanode2"
+ And Service "sbd" is "started" on "hanode2"
+ # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd
+ And SBD option "SBD_DELAY_START" value is "161"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+ And SBD option "msgwait" value for "/dev/sda1" is "120"
+ # stonith-timeout >= 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd
+ # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus
+ And Cluster property "stonith-timeout" is "191"
+ And Parameter "pcmk_delay_max" configured in "stonith-sbd"
+ # since SBD_DELAY_START value(161s) > default systemd startup value(1min 30s)
+ And Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+ # 1.2*SBD_DELAY_START
+ And Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+
+ Given Has disk "/dev/sda1" on "hanode3"
+ Given Cluster service is "stopped" on "hanode3"
+ When Run "crm cluster join -c hanode1 -y" on "hanode3"
+ Then Cluster service is "started" on "hanode3"
+ And Service "sbd" is "started" on "hanode3"
+ And SBD option "SBD_DELAY_START" value is "161"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+ And SBD option "msgwait" value for "/dev/sda1" is "120"
+ And Cluster property "stonith-timeout" is "191"
+ And Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+ And Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+ And Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+
+ When Run "crm cluster remove hanode3 -y" on "hanode1"
+ Then Cluster service is "stopped" on "hanode3"
+ And Service "sbd" is "stopped" on "hanode3"
+ And SBD option "SBD_DELAY_START" value is "161"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+ And SBD option "msgwait" value for "/dev/sda1" is "120"
+ And Cluster property "stonith-timeout" is "191"
+ And Parameter "pcmk_delay_max" configured in "stonith-sbd"
+ And Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+ And Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+ When Run "sed -i 's/watchdog_timeout: 60/watchdog_timeout: 15/g' /etc/crm/profiles.yml" on "hanode1"
+
+ @clean
+ Scenario: Add sbd via stage on a running cluster
+ Given Run "test -f /etc/crm/profiles.yml" OK
+ Given Yaml "default:corosync.totem.token" value is "5000"
+ Given Yaml "default:sbd.watchdog_timeout" value is "15"
+
+ Given Has disk "/dev/sda1" on "hanode1"
+ Given Has disk "/dev/sda1" on "hanode2"
+ Given Cluster service is "stopped" on "hanode1"
+ Given Cluster service is "stopped" on "hanode2"
+ When Run "crm cluster init -y" on "hanode1"
+ Then Cluster service is "started" on "hanode1"
+ When Run "crm cluster join -c hanode1 -y" on "hanode2"
+ Then Cluster service is "started" on "hanode2"
+
+ When Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1"
+ Then Service "sbd" is "started" on "hanode1"
+ Then Service "sbd" is "started" on "hanode2"
+ And SBD option "SBD_DELAY_START" value is "71"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+ And SBD option "msgwait" value for "/dev/sda1" is "30"
+ And Cluster property "stonith-timeout" is "83"
+ And Parameter "pcmk_delay_max" configured in "stonith-sbd"
+
+ @clean
+ Scenario: Add disk-based sbd with qdevice
+ Given Run "test -f /etc/crm/profiles.yml" OK
+ Given Yaml "default:corosync.totem.token" value is "5000"
+ Given Yaml "default:sbd.watchdog_timeout" value is "15"
+ Given Has disk "/dev/sda1" on "hanode1"
+ Given Has disk "/dev/sda1" on "hanode2"
+ Given Cluster service is "stopped" on "hanode1"
+ Given Cluster service is "stopped" on "hanode2"
+
+ When Run "crm cluster init -s /dev/sda1 --qnetd-hostname=qnetd-node -y" on "hanode1"
+ Then Cluster service is "started" on "hanode1"
+ When Run "crm cluster join -c hanode1 -y" on "hanode2"
+ Then Cluster service is "started" on "hanode2"
+ And Service "corosync-qdevice" is "started" on "hanode1"
+ And Service "corosync-qdevice" is "started" on "hanode2"
+ And Service "sbd" is "started" on "hanode1"
+ And Service "sbd" is "started" on "hanode2"
+
+ And SBD option "SBD_DELAY_START" value is "41"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+ And SBD option "msgwait" value for "/dev/sda1" is "30"
+ And Cluster property "stonith-timeout" is "71"
+ And Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+ @clean
+ Scenario: Add disk-less sbd with qdevice
+ Given Run "test -f /etc/crm/profiles.yml" OK
+ Given Yaml "default:corosync.totem.token" value is "5000"
+ Given Yaml "default:sbd.watchdog_timeout" value is "15"
+ Given Cluster service is "stopped" on "hanode1"
+ Given Cluster service is "stopped" on "hanode2"
+
+ When Run "crm cluster init -S --qnetd-hostname=qnetd-node -y" on "hanode1"
+ Then Cluster service is "started" on "hanode1"
+ When Run "crm cluster join -c hanode1 -y" on "hanode2"
+ Then Cluster service is "started" on "hanode2"
+ And Service "corosync-qdevice" is "started" on "hanode1"
+ And Service "corosync-qdevice" is "started" on "hanode2"
+ And Service "sbd" is "started" on "hanode1"
+ And Service "sbd" is "started" on "hanode2"
+
+ And SBD option "SBD_DELAY_START" value is "81"
+ And SBD option "SBD_WATCHDOG_TIMEOUT" value is "35"
+ And Cluster property "stonith-timeout" is "95"
+ And Cluster property "stonith-watchdog-timeout" is "-1"
+
+ @clean
+ Scenario: Add and remove qdevice from cluster with sbd running
+ Given Cluster service is "stopped" on "hanode1"
+ Given Cluster service is "stopped" on "hanode2"
+ When Run "crm cluster init -s /dev/sda1 -y" on "hanode1"
+ Then Cluster service is "started" on "hanode1"
+ When Run "crm cluster join -c hanode1 -y" on "hanode2"
+ Then Cluster service is "started" on "hanode2"
+ And Service "sbd" is "started" on "hanode1"
+ And Service "sbd" is "started" on "hanode2"
+ And Parameter "pcmk_delay_max" configured in "stonith-sbd"
+ When Run "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode1"
+ Then Service "corosync-qdevice" is "started" on "hanode1"
+ And Service "corosync-qdevice" is "started" on "hanode2"
+ And Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+ When Run "crm cluster remove --qdevice -y" on "hanode1"
+ Then Service "corosync-qdevice" is "stopped" on "hanode1"
+ And Service "corosync-qdevice" is "stopped" on "hanode2"
+ And Parameter "pcmk_delay_max" configured in "stonith-sbd"
+
+ @clean
+ Scenario: Test priority-fence-delay and priority
+ Given Cluster service is "stopped" on "hanode1"
+ Given Cluster service is "stopped" on "hanode2"
+ When Run "crm cluster init -y" on "hanode1"
+ Then Cluster service is "started" on "hanode1"
+ When Run "crm cluster join -c hanode1 -y" on "hanode2"
+ Then Cluster service is "started" on "hanode2"
+ And Property "priority" in "rsc_defaults" is "1"
+ When Run "crm cluster remove hanode2 -y" on "hanode1"
+ Then Cluster service is "stopped" on "hanode2"
+ And Property "priority" in "rsc_defaults" is "0"
+ When Run "crm cluster join -c hanode1 -y" on "hanode2"
+ Then Cluster service is "started" on "hanode2"
+ And Property "priority" in "rsc_defaults" is "1"
+ When Run "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode1"
+ Then Service "corosync-qdevice" is "started" on "hanode1"
+ And Service "corosync-qdevice" is "started" on "hanode2"
+ And Property "priority" in "rsc_defaults" is "0"
+ When Run "crm cluster remove --qdevice -y" on "hanode1"
+ Then Service "corosync-qdevice" is "stopped" on "hanode1"
+ And Service "corosync-qdevice" is "stopped" on "hanode2"
+ And Property "priority" in "rsc_defaults" is "1"
+ When Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1"
+ Then Service "sbd" is "started" on "hanode1"
+ And Service "sbd" is "started" on "hanode2"
+ And Parameter "pcmk_delay_max" configured in "stonith-sbd"
+ And Cluster property "stonith-timeout" is "83"
+ And Cluster property "priority-fencing-delay" is "60"
+ When Run "crm cluster remove hanode2 -y" on "hanode1"
+ Then Cluster service is "stopped" on "hanode2"
+ And Property "priority" in "rsc_defaults" is "0"
+ And Cluster property "priority-fencing-delay" is "0"
+ And Parameter "pcmk_delay_max" not configured in "stonith-sbd"