1 files changed, 286 insertions, 0 deletions
diff --git a/test/features/bootstrap_sbd_delay.feature b/test/features/bootstrap_sbd_delay.feature
new file mode 100644
index 0000000..8b636d1
--- /dev/null
+++ b/test/features/bootstrap_sbd_delay.feature
@@ -0,0 +1,286 @@
+@sbd
+Feature: configure sbd delay start correctly
+
+  Tag @clean means need to stop cluster service if the service is available
+
+  @clean
+  Scenario: disk-based SBD with small sbd_watchdog_timeout
+    Given   Run "test -f /etc/crm/profiles.yml" OK
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "15"
+
+    Given   Has disk "/dev/sda1" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode1"
+    When    Run "crm cluster init -s /dev/sda1 -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    And     Service "sbd" is "started" on "hanode1"
+    And     Resource "stonith-sbd" type "external/sbd" is "Started"
+    And     SBD option "SBD_DELAY_START" value is "no"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    # calculated and set by sbd RA
+    And     Cluster property "stonith-timeout" is "43"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+    Given   Has disk "/dev/sda1" on "hanode2"
+    Given   Cluster service is "stopped" on "hanode2"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    And     Service "sbd" is "started" on "hanode2"
+    # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait)  # for disk-based sbd
+    And     SBD option "SBD_DELAY_START" value is "71"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    # value_from_sbd >= 1.2 * (pcmk_delay_max + msgwait)  # for disk-based sbd
+    # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus
+    And     Cluster property "stonith-timeout" is "83"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+
+    Given   Has disk "/dev/sda1" on "hanode3"
+    Given   Cluster service is "stopped" on "hanode3"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode3"
+    Then    Cluster service is "started" on "hanode3"
+    And     Service "sbd" is "started" on "hanode3"
+    # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait)  # for disk-based sbd
+    # runtime value is "41", we keep the larger one here
+    And     SBD option "SBD_DELAY_START" value is "71"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    # value_from_sbd >= 1.2 * (pcmk_delay_max + msgwait)  # for disk-based sbd
+    # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus
+    # runtime value is "71", we keep ther larger one here
+    And     Cluster property "stonith-timeout" is "83"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+    When    Run "crm cluster remove hanode3 -y" on "hanode1"
+    Then    Cluster service is "stopped" on "hanode3"
+    And     Service "sbd" is "stopped" on "hanode3"
+    And     SBD option "SBD_DELAY_START" value is "71"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    And     Cluster property "stonith-timeout" is "83"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+
+  @clean
+  Scenario: disk-less SBD with small sbd_watchdog_timeout
+    Given   Run "test -f /etc/crm/profiles.yml" OK
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "15"
+
+    Given   Cluster service is "stopped" on "hanode1"
+    When    Run "crm cluster init -S -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    And     SBD option "SBD_DELAY_START" value is "no"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     Cluster property "stonith-timeout" is "60"
+
+    Given   Cluster service is "stopped" on "hanode2"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    # SBD_DELAY_START >= (token + consensus + 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd
+    And     SBD option "SBD_DELAY_START" value is "41"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    # stonith-timeout >= 1.2 * max(stonith_watchdog_timeout, 2*SBD_WATCHDOG_TIMEOUT)  # for disk-less sbd
+    # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus
+    And     Cluster property "stonith-timeout" is "71"
+
+    Given   Cluster service is "stopped" on "hanode3"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode3"
+    Then    Cluster service is "started" on "hanode3"
+    And     SBD option "SBD_DELAY_START" value is "41"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     Cluster property "stonith-timeout" is "71"
+
+    When    Run "crm cluster remove hanode3 -y" on "hanode1"
+    Then    Cluster service is "stopped" on "hanode3"
+    And     SBD option "SBD_DELAY_START" value is "41"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     Cluster property "stonith-timeout" is "71"
+
+  @clean
+  Scenario: disk-based SBD with big sbd_watchdog_timeout
+    When    Run "sed -i 's/watchdog_timeout: 15/watchdog_timeout: 60/' /etc/crm/profiles.yml" on "hanode1"
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "60"
+
+    Given   Has disk "/dev/sda1" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode1"
+    When    Run "crm cluster init -s /dev/sda1 -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    And     Service "sbd" is "started" on "hanode1"
+    And     Resource "stonith-sbd" type "external/sbd" is "Started"
+    And     SBD option "SBD_DELAY_START" value is "no"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+    And     SBD option "msgwait" value for "/dev/sda1" is "120"
+    # calculated and set by sbd RA
+    And     Cluster property "stonith-timeout" is "172"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+    Given   Has disk "/dev/sda1" on "hanode2"
+    Given   Cluster service is "stopped" on "hanode2"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    And     Service "sbd" is "started" on "hanode2"
+    # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait)  # for disk-based sbd
+    And     SBD option "SBD_DELAY_START" value is "161"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+    And     SBD option "msgwait" value for "/dev/sda1" is "120"
+    # stonith-timeout >= 1.2 * (pcmk_delay_max + msgwait)  # for disk-based sbd
+    # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus
+    And     Cluster property "stonith-timeout" is "191"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+    # since SBD_DELAY_START value(161s) > default systemd startup value(1min 30s)
+    And     Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+    # 1.2*SBD_DELAY_START
+    And     Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+
+    Given   Has disk "/dev/sda1" on "hanode3"
+    Given   Cluster service is "stopped" on "hanode3"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode3"
+    Then    Cluster service is "started" on "hanode3"
+    And     Service "sbd" is "started" on "hanode3"
+    And     SBD option "SBD_DELAY_START" value is "161"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+    And     SBD option "msgwait" value for "/dev/sda1" is "120"
+    And     Cluster property "stonith-timeout" is "191"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+    And     Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+    And     Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+
+    When    Run "crm cluster remove hanode3 -y" on "hanode1"
+    Then    Cluster service is "stopped" on "hanode3"
+    And     Service "sbd" is "stopped" on "hanode3"
+    And     SBD option "SBD_DELAY_START" value is "161"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "60"
+    And     SBD option "msgwait" value for "/dev/sda1" is "120"
+    And     Cluster property "stonith-timeout" is "191"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+    And     Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+    And     Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK
+    When    Run "sed -i 's/watchdog_timeout: 60/watchdog_timeout: 15/g' /etc/crm/profiles.yml" on "hanode1"
+
+  @clean
+  Scenario: Add sbd via stage on a running cluster
+    Given   Run "test -f /etc/crm/profiles.yml" OK
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "15"
+
+    Given   Has disk "/dev/sda1" on "hanode1"
+    Given   Has disk "/dev/sda1" on "hanode2"
+    Given   Cluster service is "stopped" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode2"
+    When    Run "crm cluster init -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+
+    When    Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1"
+    Then    Service "sbd" is "started" on "hanode1"
+    Then    Service "sbd" is "started" on "hanode2"
+    And     SBD option "SBD_DELAY_START" value is "71"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    And     Cluster property "stonith-timeout" is "83"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+
+  @clean
+  Scenario: Add disk-based sbd with qdevice
+    Given   Run "test -f /etc/crm/profiles.yml" OK
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "15"
+    Given   Has disk "/dev/sda1" on "hanode1"
+    Given   Has disk "/dev/sda1" on "hanode2"
+    Given   Cluster service is "stopped" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode2"
+
+    When    Run "crm cluster init -s /dev/sda1 --qnetd-hostname=qnetd-node -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    And     Service "corosync-qdevice" is "started" on "hanode1"
+    And     Service "corosync-qdevice" is "started" on "hanode2"
+    And     Service "sbd" is "started" on "hanode1"
+    And     Service "sbd" is "started" on "hanode2"
+
+    And     SBD option "SBD_DELAY_START" value is "41"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "15"
+    And     SBD option "msgwait" value for "/dev/sda1" is "30"
+    And     Cluster property "stonith-timeout" is "71"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+
+  @clean
+  Scenario: Add disk-less sbd with qdevice
+    Given   Run "test -f /etc/crm/profiles.yml" OK
+    Given   Yaml "default:corosync.totem.token" value is "5000"
+    Given   Yaml "default:sbd.watchdog_timeout" value is "15"
+    Given   Cluster service is "stopped" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode2"
+
+    When    Run "crm cluster init -S --qnetd-hostname=qnetd-node -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    And     Service "corosync-qdevice" is "started" on "hanode1"
+    And     Service "corosync-qdevice" is "started" on "hanode2"
+    And     Service "sbd" is "started" on "hanode1"
+    And     Service "sbd" is "started" on "hanode2"
+
+    And     SBD option "SBD_DELAY_START" value is "81"
+    And     SBD option "SBD_WATCHDOG_TIMEOUT" value is "35"
+    And     Cluster property "stonith-timeout" is "95"
+    And     Cluster property "stonith-watchdog-timeout" is "-1"
+
+  @clean
+  Scenario: Add and remove qdevice from cluster with sbd running
+    Given   Cluster service is "stopped" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode2"
+    When    Run "crm cluster init -s /dev/sda1 -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    And     Service "sbd" is "started" on "hanode1"
+    And     Service "sbd" is "started" on "hanode2"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+    When    Run "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode1"
+    Then    Service "corosync-qdevice" is "started" on "hanode1"
+    And     Service "corosync-qdevice" is "started" on "hanode2"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"
+    When    Run "crm cluster remove --qdevice -y" on "hanode1"
+    Then    Service "corosync-qdevice" is "stopped" on "hanode1"
+    And     Service "corosync-qdevice" is "stopped" on "hanode2"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+
+  @clean
+  Scenario: Test priority-fence-delay and priority
+    Given   Cluster service is "stopped" on "hanode1"
+    Given   Cluster service is "stopped" on "hanode2"
+    When    Run "crm cluster init -y" on "hanode1"
+    Then    Cluster service is "started" on "hanode1"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    And     Property "priority" in "rsc_defaults" is "1"
+    When    Run "crm cluster remove hanode2 -y" on "hanode1"
+    Then    Cluster service is "stopped" on "hanode2"
+    And     Property "priority" in "rsc_defaults" is "0"
+    When    Run "crm cluster join -c hanode1 -y" on "hanode2"
+    Then    Cluster service is "started" on "hanode2"
+    And     Property "priority" in "rsc_defaults" is "1"
+    When    Run "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode1"
+    Then    Service "corosync-qdevice" is "started" on "hanode1"
+    And     Service "corosync-qdevice" is "started" on "hanode2"
+    And     Property "priority" in "rsc_defaults" is "0"
+    When    Run "crm cluster remove --qdevice -y" on "hanode1"
+    Then    Service "corosync-qdevice" is "stopped" on "hanode1"
+    And     Service "corosync-qdevice" is "stopped" on "hanode2"
+    And     Property "priority" in "rsc_defaults" is "1"
+    When    Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1"
+    Then    Service "sbd" is "started" on "hanode1"
+    And     Service "sbd" is "started" on "hanode2"
+    And     Parameter "pcmk_delay_max" configured in "stonith-sbd"
+    And     Cluster property "stonith-timeout" is "83"
+    And     Cluster property "priority-fencing-delay" is "60"
+    When    Run "crm cluster remove hanode2 -y" on "hanode1"
+    Then    Cluster service is "stopped" on "hanode2"
+    And     Property "priority" in "rsc_defaults" is "0"
+    And     Cluster property "priority-fencing-delay" is "0"
+    And     Parameter "pcmk_delay_max" not configured in "stonith-sbd"