diff options
Diffstat (limited to '')
28 files changed, 3966 insertions, 0 deletions
diff --git a/test/features/bootstrap_bugs.feature b/test/features/bootstrap_bugs.feature new file mode 100644 index 0000000..e6a2d6e --- /dev/null +++ b/test/features/bootstrap_bugs.feature @@ -0,0 +1,251 @@ +@bootstrap +Feature: Regression test for bootstrap bugs + + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 hanode3 + + @clean + Scenario: Set placement-strategy value as "default"(bsc#1129462) + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Show cluster status on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Show cluster status on "hanode1" + When Run "crm configure get_property placement-strategy" on "hanode1" + Then Got output "default" + + @clean + Scenario: Empty value not allowed for option(bsc#1141976) + When Try "crm -c ' '" + Then Except "ERROR: Empty value not allowed for dest "cib"" + When Try "crm cluster init --name ' '" + Then Except "ERROR: cluster.init: Empty value not allowed for dest "cluster_name"" + When Try "crm cluster join -c ' '" + Then Except "ERROR: cluster.join: Empty value not allowed for dest "cluster_node"" + When Try "crm cluster remove -c ' '" + Then Except "ERROR: cluster.remove: Empty value not allowed for dest "cluster_node"" + When Try "crm cluster geo_init -a ' '" + Then Except "ERROR: cluster.geo_init: Empty value not allowed for dest "arbitrator"" + When Try "crm cluster geo_join -c ' '" + Then Except "ERROR: cluster.geo_join: Empty value not allowed for dest "cluster_node"" + When Try "crm cluster geo_init_arbitrator -c ' '" + Then Except "ERROR: cluster.geo_init_arbitrator: Empty value not allowed for dest "cluster_node"" + + @clean + Scenario: Setup cluster with crossed network(udpu only) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -u -i eth0 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Try "crm cluster join -c hanode1 -i eth1 -y" on "hanode2" + Then Cluster service is "stopped" on "hanode2" + And Except "Cannot see peer node "hanode1", please check the communication IP" in stderr + When Run "crm cluster join -c hanode1 -i eth0 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + + @clean + Scenario: Remove correspond nodelist in corosync.conf while remove(bsc#1165644) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -u -i eth1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -i eth1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "crm corosync get nodelist.node.ring0_addr" on "hanode1" + Then Expected "@hanode2.ip.0" in stdout + #And Service "hawk.service" is "started" on "hanode2" + When Run "crm cluster remove hanode2 -y" on "hanode1" + Then Online nodes are "hanode1" + And Cluster service is "stopped" on "hanode2" + # verify bsc#1175708 + #And Service "hawk.service" is "stopped" on "hanode2" + When Run "crm corosync get nodelist.node.ring0_addr" on "hanode1" + Then Expected "@hanode2.ip.0" not in stdout + + @clean + Scenario: Multi nodes join in parallel(bsc#1175976) + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + And Cluster service is "stopped" on "hanode3" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Show cluster status on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2,hanode3" + Then Cluster service is "started" on "hanode2" + And Cluster service is "started" on "hanode3" + And Online nodes are "hanode1 hanode2 hanode3" + And Show cluster status on "hanode1" + And File "/etc/corosync/corosync.conf" was synced in cluster + + @clean + Scenario: Multi nodes join in parallel timed out(bsc#1175976) + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + And Cluster service is "stopped" on "hanode3" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Show cluster status on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + # Try to simulate the join process hanging on hanode2 or hanode2 died + # Just leave the lock directory unremoved + When Run "mkdir /run/.crmsh_lock_directory" on "hanode1" + When Try "crm cluster join -c hanode1 -y" on "hanode3" + Then Except "ERROR: cluster.join: Timed out after 120 seconds. Cannot continue since the lock directory exists at the node (hanode1:/run/.crmsh_lock_directory)" + When Run "rm -rf /run/.crmsh_lock_directory" on "hanode1" + + @clean + Scenario: Change host name in /etc/hosts as alias(bsc#1183654) + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "echo '@hanode1.ip.0 HANODE1'|sudo tee -a /etc/hosts" on "hanode1" + When Run "echo '@hanode2.ip.0 HANODE2'|sudo tee -a /etc/hosts" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c HANODE1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + When Run "crm cluster remove HANODE2 -y" on "hanode1" + Then Cluster service is "stopped" on "hanode2" + And Online nodes are "hanode1" + + @clean + Scenario: Stop service quickly(bsc#1203601) + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "crm cluster stop --all" on "hanode1" + Then Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster start --all;sudo crm cluster stop --all" on "hanode1" + Then Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "systemctl start corosync" on "hanode1" + Then Service "corosync" is "started" on "hanode1" + When Run "crm cluster stop" on "hanode1" + Then Service "corosync" is "stopped" on "hanode1" + + @clean + Scenario: Can't stop all nodes' cluster service when local node's service is down(bsc#1213889) + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Wait for DC + And Run "crm cluster stop" on "hanode1" + And Run "crm cluster stop --all" on "hanode1" + Then Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + + @skip_non_root + @clean + Scenario: crm cluster join default behavior change in ssh key handling (bsc#1210693) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "rm -rf /home/alice/.ssh" on "hanode1" + When Run "rm -rf /home/alice/.ssh" on "hanode2" + When Run "su - alice -c "sudo crm cluster init -y"" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "su - alice -c "sudo crm cluster join -c hanode1 -y"" on "hanode2" + Then Cluster service is "started" on "hanode2" + + @skip_non_root + @clean + Scenario: Passwordless for root, not for sudoer(bsc#1209193) + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "useradd -m -s /bin/bash xin" on "hanode1" + When Run "echo "xin ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/xin" on "hanode1" + When Run "rm -f /root/.config/crm/crm.conf" on "hanode1" + When Run "useradd -m -s /bin/bash xin" on "hanode2" + When Run "echo "xin ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/xin" on "hanode2" + When Run "rm -f /root/.config/crm/crm.conf" on "hanode2" + When Run "su xin -c "sudo crm cluster run 'touch /tmp/1209193'"" on "hanode1" + And Run "test -f /tmp/1209193" on "hanode1" + And Run "test -f /tmp/1209193" on "hanode2" + + @skip_non_root + @clean + Scenario: Missing public key + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "rm -f /root/.ssh/id_rsa.pub" on "hanode1" + When Run "rm -f /root/.ssh/id_rsa.pub" on "hanode2" + When Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode1" + When Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode2" + When Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode1" + And Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode2" + And Run "usermod -s /usr/sbin/nologin hacluster" on "hanode1" + And Run "usermod -s /usr/sbin/nologin hacluster" on "hanode2" + And Run "crm status" on "hanode1" + Then Check user shell for hacluster between "hanode1 hanode2" + Then Check passwordless for hacluster between "hanode1 hanode2" + + @skip_non_root + @clean + Scenario: Skip upgrade when preconditions are not satisfied + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode1" + And Run "mv /root/.config/crm/crm.conf{,.bak}" on "hanode1" + Then Run "crm status" OK on "hanode1" + When Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode1" + And Run "mv /root/.config/crm/crm.conf{.bak,}" on "hanode1" + And Run "mv /root/.ssh{,.bak}" on "hanode1" + Then Run "crm status" OK on "hanode1" + And Run "rm -rf /root/.ssh && mv /root/.ssh{.bak,}" OK on "hanode1" + + # skip non-root as behave_agent is not able to run commands interactively with non-root sudoer + @skip_non_root + @clean + Scenario: Owner and permssion of file authorized_keys (bsc#1217279) + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + # in a newly created cluster + When Run "crm cluster init -y" on "hanode1" + And Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Run "stat -c '%U:%G' ~hacluster/.ssh/authorized_keys" OK on "hanode1" + And Expected "hacluster:haclient" in stdout + And Run "stat -c '%U:%G' ~hacluster/.ssh/authorized_keys" OK on "hanode2" + And Expected "hacluster:haclient" in stdout + # in an upgraded cluster in which ~hacluster/.ssh/authorized_keys exists + When Run "chown root:root ~hacluster/.ssh/authorized_keys && chmod 0600 ~hacluster/.ssh/authorized_keys" on "hanode1" + And Run "chown root:root ~hacluster/.ssh/authorized_keys && chmod 0600 ~hacluster/.ssh/authorized_keys" on "hanode2" + And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode1" + And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode2" + And Run "crm status" on "hanode1" + Then Run "stat -c '%U:%G' ~hacluster/.ssh/authorized_keys" OK on "hanode1" + And Expected "hacluster:haclient" in stdout + Then Run "stat -c '%U:%G' ~hacluster/.ssh/authorized_keys" OK on "hanode2" + And Expected "hacluster:haclient" in stdout + # in an upgraded cluster in which ~hacluster/.ssh/authorized_keys does not exist + When Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh/" on "hanode1" + And Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh/" on "hanode2" + And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode1" + And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode2" + And Run "crm status" on "hanode1" + Then Run "stat -c '%U:%G' ~hacluster/.ssh/authorized_keys" OK on "hanode1" + And Expected "hacluster:haclient" in stdout + Then Run "stat -c '%U:%G' ~hacluster/.ssh/authorized_keys" OK on "hanode2" + And Expected "hacluster:haclient" in stdout diff --git a/test/features/bootstrap_init_join_remove.feature b/test/features/bootstrap_init_join_remove.feature new file mode 100644 index 0000000..ed04525 --- /dev/null +++ b/test/features/bootstrap_init_join_remove.feature @@ -0,0 +1,205 @@ +@bootstrap +Feature: crmsh bootstrap process - init, join and remove + + Test crmsh bootstrap init/join/remove process + Need nodes: hanode1 hanode2 hanode3 + + Background: Setup a two nodes cluster + Given Nodes ["hanode1", "hanode2", "hanode3"] are cleaned up + And Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Show cluster status on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Show cluster status on "hanode1" + + Scenario: Init cluster service on node "hanode1", and join on node "hanode2" + + Scenario: Support --all or specific node to manage cluster and nodes + When Run "crm node standby --all" on "hanode1" + Then Node "hanode1" is standby + And Node "hanode2" is standby + When Run "crm node online --all" on "hanode1" + Then Node "hanode1" is online + And Node "hanode2" is online + When Wait for DC + When Run "crm cluster stop --all" on "hanode1" + Then Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster start --all" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Cluster service is "started" on "hanode2" + When Wait for DC + When Run "crm cluster stop hanode2" on "hanode1" + Then Cluster service is "stopped" on "hanode2" + When Run "crm cluster start hanode2" on "hanode1" + Then Cluster service is "started" on "hanode2" + When Run "crm cluster disable hanode2" on "hanode1" + Then Cluster service is "disabled" on "hanode2" + When Run "crm cluster enable hanode2" on "hanode1" + Then Cluster service is "enabled" on "hanode2" + When Run "crm cluster restart --all" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Cluster service is "started" on "hanode2" + + Scenario: Remove peer node "hanode2" + When Run "crm configure primitive d1 Dummy" on "hanode1" + When Run "crm configure primitive d2 Dummy" on "hanode2" + Then File "/etc/csync2/csync2.cfg" exists on "hanode2" + Then File "/etc/csync2/key_hagroup" exists on "hanode2" + Then File "/etc/corosync/authkey" exists on "hanode2" + Then File "/etc/corosync/corosync.conf" exists on "hanode2" + Then File "/etc/pacemaker/authkey" exists on "hanode2" + Then Directory "/var/lib/csync2/" not empty on "hanode2" + Then Directory "/var/lib/pacemaker/cib/" not empty on "hanode2" + Then Directory "/var/lib/pacemaker/pengine/" not empty on "hanode2" + Then Directory "/var/lib/corosync/" not empty on "hanode2" + When Run "crm cluster remove hanode2 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Cluster service is "stopped" on "hanode2" + And Online nodes are "hanode1" + And Show cluster status on "hanode1" + Then File "/etc/csync2/csync2.cfg" not exist on "hanode2" + Then File "/etc/csync2/key_hagroup" not exist on "hanode2" + Then File "/etc/corosync/authkey" not exist on "hanode2" + Then File "/etc/corosync/corosync.conf" not exist on "hanode2" + Then File "/etc/pacemaker/authkey" not exist on "hanode2" + Then Directory "/var/lib/csync2/" is empty on "hanode2" + Then Directory "/var/lib/pacemaker/cib/" is empty on "hanode2" + Then Directory "/var/lib/pacemaker/pengine/" is empty on "hanode2" + Then Directory "/var/lib/corosync/" is empty on "hanode2" + + Scenario: Remove local node "hanode1" + When Run "crm configure primitive d1 Dummy" on "hanode1" + When Run "crm configure primitive d2 Dummy" on "hanode1" + Then File "/etc/csync2/csync2.cfg" exists on "hanode1" + Then File "/etc/csync2/key_hagroup" exists on "hanode1" + Then File "/etc/corosync/authkey" exists on "hanode1" + Then File "/etc/corosync/corosync.conf" exists on "hanode1" + Then File "/etc/pacemaker/authkey" exists on "hanode1" + Then Directory "/var/lib/csync2/" not empty on "hanode1" + Then Directory "/var/lib/pacemaker/cib/" not empty on "hanode1" + Then Directory "/var/lib/pacemaker/pengine/" not empty on "hanode1" + Then Directory "/var/lib/corosync/" not empty on "hanode1" + When Run "crm cluster remove hanode1 -y --force" on "hanode1" + Then Cluster service is "stopped" on "hanode1" + And Cluster service is "started" on "hanode2" + And Show cluster status on "hanode2" + Then File "/etc/csync2/csync2.cfg" not exist on "hanode1" + Then File "/etc/csync2/key_hagroup" not exist on "hanode1" + Then File "/etc/corosync/authkey" not exist on "hanode1" + Then File "/etc/corosync/corosync.conf" not exist on "hanode1" + Then File "/etc/pacemaker/authkey" not exist on "hanode1" + Then Directory "/var/lib/csync2/" is empty on "hanode1" + Then Directory "/var/lib/pacemaker/cib/" is empty on "hanode1" + Then Directory "/var/lib/pacemaker/pengine/" is empty on "hanode1" + Then Directory "/var/lib/corosync/" is empty on "hanode1" + + Scenario: Remove peer node "hanode2" with `crm -F node delete` + When Run "crm configure primitive d1 Dummy" on "hanode1" + When Run "crm configure primitive d2 Dummy" on "hanode2" + Then File "/etc/csync2/csync2.cfg" exists on "hanode2" + Then File "/etc/csync2/key_hagroup" exists on "hanode2" + Then File "/etc/corosync/authkey" exists on "hanode2" + Then File "/etc/corosync/corosync.conf" exists on "hanode2" + Then File "/etc/pacemaker/authkey" exists on "hanode2" + Then Directory "/var/lib/csync2/" not empty on "hanode2" + Then Directory "/var/lib/pacemaker/cib/" not empty on "hanode2" + Then Directory "/var/lib/pacemaker/pengine/" not empty on "hanode2" + Then Directory "/var/lib/corosync/" not empty on "hanode2" + When Run "crm -F cluster remove hanode2" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Cluster service is "stopped" on "hanode2" + And Online nodes are "hanode1" + And Show cluster status on "hanode1" + Then File "/etc/csync2/csync2.cfg" not exist on "hanode2" + Then File "/etc/csync2/key_hagroup" not exist on "hanode2" + Then File "/etc/corosync/authkey" not exist on "hanode2" + Then File "/etc/corosync/corosync.conf" not exist on "hanode2" + Then File "/etc/pacemaker/authkey" not exist on "hanode2" + Then Directory "/var/lib/csync2/" is empty on "hanode2" + Then Directory "/var/lib/pacemaker/cib/" is empty on "hanode2" + Then Directory "/var/lib/pacemaker/pengine/" is empty on "hanode2" + Then Directory "/var/lib/corosync/" is empty on "hanode2" + When Run "crm cluster remove hanode1 -y --force" on "hanode1" + Then File "/etc/corosync/corosync.conf" not exist on "hanode1" + + Scenario: Remove local node "hanode1" with `crm -F node delete` + When Run "crm configure primitive d1 Dummy" on "hanode1" + When Run "crm configure primitive d2 Dummy" on "hanode1" + Then File "/etc/csync2/csync2.cfg" exists on "hanode1" + Then File "/etc/csync2/key_hagroup" exists on "hanode1" + Then File "/etc/corosync/authkey" exists on "hanode1" + Then File "/etc/corosync/corosync.conf" exists on "hanode1" + Then File "/etc/pacemaker/authkey" exists on "hanode1" + Then Directory "/var/lib/csync2/" not empty on "hanode1" + Then Directory "/var/lib/pacemaker/cib/" not empty on "hanode1" + Then Directory "/var/lib/pacemaker/pengine/" not empty on "hanode1" + Then Directory "/var/lib/corosync/" not empty on "hanode1" + When Run "crm -F node delete hanode1" on "hanode1" + Then Cluster service is "stopped" on "hanode1" + And Cluster service is "started" on "hanode2" + And Show cluster status on "hanode2" + Then File "/etc/csync2/csync2.cfg" not exist on "hanode1" + Then File "/etc/csync2/key_hagroup" not exist on "hanode1" + Then File "/etc/corosync/authkey" not exist on "hanode1" + Then File "/etc/corosync/corosync.conf" not exist on "hanode1" + Then File "/etc/pacemaker/authkey" not exist on "hanode1" + Then Directory "/var/lib/csync2/" is empty on "hanode1" + Then Directory "/var/lib/pacemaker/cib/" is empty on "hanode1" + Then Directory "/var/lib/pacemaker/pengine/" is empty on "hanode1" + Then Directory "/var/lib/corosync/" is empty on "hanode1" + + Scenario: Check hacluster's passwordless configuration on 2 nodes + Then Check user shell for hacluster between "hanode1 hanode2" + Then Check passwordless for hacluster between "hanode1 hanode2" + + Scenario: Check hacluster's passwordless configuration in old cluster, 2 nodes + When Run "crm cluster stop --all" on "hanode1" + Then Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Check passwordless for hacluster between "hanode1 hanode2" + + Scenario: Check hacluster's passwordless configuration on 3 nodes + Given Cluster service is "stopped" on "hanode3" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + And Online nodes are "hanode1 hanode2 hanode3" + And Check user shell for hacluster between "hanode1 hanode2 hanode3" + And Check passwordless for hacluster between "hanode1 hanode2 hanode3" + + Scenario: Check hacluster's passwordless configuration in old cluster, 3 nodes + Given Cluster service is "stopped" on "hanode3" + When Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode1" + And Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode2" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + And Online nodes are "hanode1 hanode2 hanode3" + And Check passwordless for hacluster between "hanode1 hanode2 hanode3" + + Scenario: Check hacluster's user shell + Given Cluster service is "stopped" on "hanode3" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + And Online nodes are "hanode1 hanode2 hanode3" + When Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode1" + And Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode2" + And Run "rm -rf /var/lib/heartbeat/cores/hacluster/.ssh" on "hanode3" + And Run "usermod -s /usr/sbin/nologin hacluster" on "hanode1" + And Run "usermod -s /usr/sbin/nologin hacluster" on "hanode2" + And Run "usermod -s /usr/sbin/nologin hacluster" on "hanode3" + And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode1" + And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode2" + And Run "rm -f /var/lib/crmsh/upgrade_seq" on "hanode3" + And Run "crm status" on "hanode1" + Then Check user shell for hacluster between "hanode1 hanode2 hanode3" + Then Check passwordless for hacluster between "hanode1 hanode2 hanode3" diff --git a/test/features/bootstrap_options.feature b/test/features/bootstrap_options.feature new file mode 100644 index 0000000..5ccc052 --- /dev/null +++ b/test/features/bootstrap_options.feature @@ -0,0 +1,165 @@ +@bootstrap +Feature: crmsh bootstrap process - options + + Test crmsh bootstrap options: + "--node": Additional nodes to add to the created cluster + "-i": Bind to IP address on interface IF + "-M": Configure corosync with second heartbeat line + "-n": Set the name of the configured cluster + "-A": Configure IP address as an administration virtual IP + "-u": Configure corosync to communicate over unicast + "-U": Configure corosync to communicate over multicast + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 hanode3 + + @clean + Scenario: Check help output + When Run "crm -h" on "hanode1" + Then Output is the same with expected "crm" help output + When Run "crm cluster init -h" on "hanode1" + Then Output is the same with expected "crm cluster init" help output + When Run "crm cluster join -h" on "hanode1" + Then Output is the same with expected "crm cluster join" help output + When Run "crm cluster remove -h" on "hanode1" + Then Output is the same with expected "crm cluster remove" help output + When Run "crm cluster geo_init -h" on "hanode1" + Then Output is the same with expected "crm cluster geo-init" help output + When Run "crm cluster geo_join -h" on "hanode1" + Then Output is the same with expected "crm cluster geo-join" help output + When Run "crm cluster geo_init_arbitrator -h" on "hanode1" + Then Output is the same with expected "crm cluster geo-init-arbitrator" help output + When Try "crm cluster init -i eth1 -i eth1 -y" + Then Except multiple lines + """ + usage: init [options] [STAGE] + crm: error: Duplicated input for '-i/--interface' option + """ + When Try "crm cluster init sbd -x -y" on "hanode1" + Then Expected "-x option or SKIP_CSYNC2_SYNC can't be used with any stage" in stderr + When Try "crm cluster init -i eth0 -i eth1 -i eth2 -y" on "hanode1" + Then Expected "Maximum number of interface is 2" in stderr + When Try "crm cluster init sbd -N hanode1 -N hanode2 -y" on "hanode1" + Then Expected "Can't use -N/--nodes option and stage(sbd) together" in stderr + + @clean + Scenario: Init whole cluster service on node "hanode1" using "--node" option + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y --node "hanode1 hanode2 hanode3"" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Show cluster status on "hanode1" + + @clean + Scenario: Bind specific network interface using "-i" option + Given Cluster service is "stopped" on "hanode1" + And IP "@hanode1.ip.0" is belong to "eth1" + When Run "crm cluster init -i eth1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And IP "@hanode1.ip.0" is used by corosync on "hanode1" + And Show corosync ring status + + @clean + Scenario: Using multiple network interface using "-M" option + Given Cluster service is "stopped" on "hanode1" + And IP "@hanode1.ip.default" is belong to "eth0" + And IP "@hanode1.ip.0" is belong to "eth1" + When Run "crm cluster init -M -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And IP "@hanode1.ip.default" is used by corosync on "hanode1" + And IP "@hanode1.ip.0" is used by corosync on "hanode1" + And Show corosync ring status + And Corosync working on "unicast" mode + + @clean + Scenario: Using multiple network interface using "-i" option + Given Cluster service is "stopped" on "hanode1" + And IP "@hanode1.ip.default" is belong to "eth0" + And IP "@hanode1.ip.0" is belong to "eth1" + When Run "crm cluster init -i eth0 -i eth1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And IP "@hanode1.ip.default" is used by corosync on "hanode1" + And IP "@hanode1.ip.0" is used by corosync on "hanode1" + And Show corosync ring status + + @clean + Scenario: Setup cluster name and virtual IP using "-A" option + Given Cluster service is "stopped" on "hanode1" + When Try "crm cluster init -A xxx -y" + Then Except "ERROR: cluster.init: 'xxx' does not appear to be an IPv4 or IPv6 address" + When Try "crm cluster init -A @hanode1.ip.0 -y" + Then Except "ERROR: cluster.init: Address already in use: @hanode1.ip.0" + When Run "crm cluster init -n hatest -A @vip.0 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Cluster name is "hatest" + And Cluster virtual IP is "@vip.0" + And Show cluster status on "hanode1" + + @clean + Scenario: Init cluster service with udpu using "-u" option + Given Cluster service is "stopped" on "hanode1" + When Run "crm cluster init -u -y -i eth0" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Cluster is using udpu transport mode + And IP "@hanode1.ip.default" is used by corosync on "hanode1" + And Show corosync ring status + And Corosync working on "unicast" mode + + @clean + Scenario: Init cluster service with ipv6 using "-I" option + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -I -i eth1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And IP "@hanode1.ip6.default" is used by corosync on "hanode1" + When Run "crm cluster join -c hanode1 -i eth1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And IP "@hanode2.ip6.default" is used by corosync on "hanode2" + And Corosync working on "unicast" mode + + @clean + Scenario: Init cluster service with ipv6 unicast using "-I" and "-u" option + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -I -i eth1 -u -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And IP "@hanode1.ip6.default" is used by corosync on "hanode1" + When Run "crm cluster join -c hanode1 -i eth1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And IP "@hanode2.ip6.default" is used by corosync on "hanode2" + And Show cluster status on "hanode1" + And Corosync working on "unicast" mode + + @clean + Scenario: Init cluster service with multicast using "-U" option (bsc#1132375) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -U -i eth1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -i eth1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Show cluster status on "hanode1" + And Corosync working on "multicast" mode + + @clean + Scenario: Init cluster with -N option (bsc#1175863) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -N hanode1 -N hanode2 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Cluster service is "started" on "hanode2" + + @clean + Scenario: Skip using csync2 by -x option + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y -x" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "csync2.socket" is "stopped" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "csync2.socket" is "stopped" on "hanode2" + When Run "crm cluster init csync2 -y" on "hanode1" + Then Service "csync2.socket" is "started" on "hanode1" + And Service "csync2.socket" is "started" on "hanode2" diff --git a/test/features/bootstrap_sbd_delay.feature b/test/features/bootstrap_sbd_delay.feature new file mode 100644 index 0000000..8b636d1 --- /dev/null +++ b/test/features/bootstrap_sbd_delay.feature @@ -0,0 +1,286 @@ +@sbd +Feature: configure sbd delay start correctly + + Tag @clean means need to stop cluster service if the service is available + + @clean + Scenario: disk-based SBD with small sbd_watchdog_timeout + Given Run "test -f /etc/crm/profiles.yml" OK + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "15" + + Given Has disk "/dev/sda1" on "hanode1" + Given Cluster service is "stopped" on "hanode1" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "external/sbd" is "Started" + And SBD option "SBD_DELAY_START" value is "no" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + # calculated and set by sbd RA + And Cluster property "stonith-timeout" is "43" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" + + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd + And SBD option "SBD_DELAY_START" value is "71" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + # value_from_sbd >= 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd + # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus + And Cluster property "stonith-timeout" is "83" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + + Given Has disk "/dev/sda1" on "hanode3" + Given Cluster service is "stopped" on "hanode3" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + And Service "sbd" is "started" on "hanode3" + # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd + # runtime value is "41", we keep the larger one here + And SBD option "SBD_DELAY_START" value is "71" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + # value_from_sbd >= 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd + # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus + # runtime value is "71", we keep ther larger one here + And Cluster property "stonith-timeout" is "83" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" + + When Run "crm cluster remove hanode3 -y" on "hanode1" + Then Cluster service is "stopped" on "hanode3" + And Service "sbd" is "stopped" on "hanode3" + And SBD option "SBD_DELAY_START" value is "71" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + And Cluster property "stonith-timeout" is "83" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + + @clean + Scenario: disk-less SBD with small sbd_watchdog_timeout + Given Run "test -f /etc/crm/profiles.yml" OK + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "15" + + Given Cluster service is "stopped" on "hanode1" + When Run "crm cluster init -S -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And SBD option "SBD_DELAY_START" value is "no" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And Cluster property "stonith-timeout" is "60" + + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + # SBD_DELAY_START >= (token + consensus + 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd + And SBD option "SBD_DELAY_START" value is "41" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + # stonith-timeout >= 1.2 * max(stonith_watchdog_timeout, 2*SBD_WATCHDOG_TIMEOUT) # for disk-less sbd + # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus + And Cluster property "stonith-timeout" is "71" + + Given Cluster service is "stopped" on "hanode3" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + And SBD option "SBD_DELAY_START" value is "41" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And Cluster property "stonith-timeout" is "71" + + When Run "crm cluster remove hanode3 -y" on "hanode1" + Then Cluster service is "stopped" on "hanode3" + And SBD option "SBD_DELAY_START" value is "41" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And Cluster property "stonith-timeout" is "71" + + @clean + Scenario: disk-based SBD with big sbd_watchdog_timeout + When Run "sed -i 's/watchdog_timeout: 15/watchdog_timeout: 60/' /etc/crm/profiles.yml" on "hanode1" + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "60" + + Given Has disk "/dev/sda1" on "hanode1" + Given Cluster service is "stopped" on "hanode1" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "external/sbd" is "Started" + And SBD option "SBD_DELAY_START" value is "no" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60" + And SBD option "msgwait" value for "/dev/sda1" is "120" + # calculated and set by sbd RA + And Cluster property "stonith-timeout" is "172" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" + + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + # SBD_DELAY_START >= (token + consensus + pcmk_delay_max + msgwait) # for disk-based sbd + And SBD option "SBD_DELAY_START" value is "161" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60" + And SBD option "msgwait" value for "/dev/sda1" is "120" + # stonith-timeout >= 1.2 * (pcmk_delay_max + msgwait) # for disk-based sbd + # stonith_timeout >= max(value_from_sbd, constants.STONITH_TIMEOUT_DEFAULT) + token + consensus + And Cluster property "stonith-timeout" is "191" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + # since SBD_DELAY_START value(161s) > default systemd startup value(1min 30s) + And Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + # 1.2*SBD_DELAY_START + And Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + + Given Has disk "/dev/sda1" on "hanode3" + Given Cluster service is "stopped" on "hanode3" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + And Service "sbd" is "started" on "hanode3" + And SBD option "SBD_DELAY_START" value is "161" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60" + And SBD option "msgwait" value for "/dev/sda1" is "120" + And Cluster property "stonith-timeout" is "191" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" + And Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + And Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + + When Run "crm cluster remove hanode3 -y" on "hanode1" + Then Cluster service is "stopped" on "hanode3" + And Service "sbd" is "stopped" on "hanode3" + And SBD option "SBD_DELAY_START" value is "161" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "60" + And SBD option "msgwait" value for "/dev/sda1" is "120" + And Cluster property "stonith-timeout" is "191" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + And Run "test -f /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + And Run "grep 'TimeoutSec=193' /etc/systemd/system/sbd.service.d/sbd_delay_start.conf" OK + When Run "sed -i 's/watchdog_timeout: 60/watchdog_timeout: 15/g' /etc/crm/profiles.yml" on "hanode1" + + @clean + Scenario: Add sbd via stage on a running cluster + Given Run "test -f /etc/crm/profiles.yml" OK + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "15" + + Given Has disk "/dev/sda1" on "hanode1" + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + + When Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1" + Then Service "sbd" is "started" on "hanode1" + Then Service "sbd" is "started" on "hanode2" + And SBD option "SBD_DELAY_START" value is "71" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + And Cluster property "stonith-timeout" is "83" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + + @clean + Scenario: Add disk-based sbd with qdevice + Given Run "test -f /etc/crm/profiles.yml" OK + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "15" + Given Has disk "/dev/sda1" on "hanode1" + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + + When Run "crm cluster init -s /dev/sda1 --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + + And SBD option "SBD_DELAY_START" value is "41" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "15" + And SBD option "msgwait" value for "/dev/sda1" is "30" + And Cluster property "stonith-timeout" is "71" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" + + @clean + Scenario: Add disk-less sbd with qdevice + Given Run "test -f /etc/crm/profiles.yml" OK + Given Yaml "default:corosync.totem.token" value is "5000" + Given Yaml "default:sbd.watchdog_timeout" value is "15" + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + + When Run "crm cluster init -S --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + + And SBD option "SBD_DELAY_START" value is "81" + And SBD option "SBD_WATCHDOG_TIMEOUT" value is "35" + And Cluster property "stonith-timeout" is "95" + And Cluster property "stonith-watchdog-timeout" is "-1" + + @clean + Scenario: Add and remove qdevice from cluster with sbd running + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + When Run "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" + When Run "crm cluster remove --qdevice -y" on "hanode1" + Then Service "corosync-qdevice" is "stopped" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode2" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + + @clean + Scenario: Test priority-fence-delay and priority + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Property "priority" in "rsc_defaults" is "1" + When Run "crm cluster remove hanode2 -y" on "hanode1" + Then Cluster service is "stopped" on "hanode2" + And Property "priority" in "rsc_defaults" is "0" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Property "priority" in "rsc_defaults" is "1" + When Run "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + And Property "priority" in "rsc_defaults" is "0" + When Run "crm cluster remove --qdevice -y" on "hanode1" + Then Service "corosync-qdevice" is "stopped" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode2" + And Property "priority" in "rsc_defaults" is "1" + When Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1" + Then Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + And Parameter "pcmk_delay_max" configured in "stonith-sbd" + And Cluster property "stonith-timeout" is "83" + And Cluster property "priority-fencing-delay" is "60" + When Run "crm cluster remove hanode2 -y" on "hanode1" + Then Cluster service is "stopped" on "hanode2" + And Property "priority" in "rsc_defaults" is "0" + And Cluster property "priority-fencing-delay" is "0" + And Parameter "pcmk_delay_max" not configured in "stonith-sbd" diff --git a/test/features/bootstrap_sbd_normal.feature b/test/features/bootstrap_sbd_normal.feature new file mode 100644 index 0000000..8c5d421 --- /dev/null +++ b/test/features/bootstrap_sbd_normal.feature @@ -0,0 +1,272 @@ +@sbd +Feature: crmsh bootstrap sbd management + + Tag @clean means need to stop cluster service if the service is available + + @clean + Scenario: Verify sbd device + When Try "crm cluster init -s "/dev/sda1;/dev/sda2;/dev/sda3;/dev/sda4" -y" + Then Except "ERROR: cluster.init: Maximum number of SBD device is 3" + When Try "crm cluster init -s "/dev/sda1;/dev/sdaxxxx" -y" + Then Except "ERROR: cluster.init: /dev/sdaxxxx doesn't look like a block device" + When Try "crm cluster init -s "/dev/sda1;/dev/sda1" -y" + Then Except multiple lines + """ + usage: init [options] [STAGE] + crm: error: Duplicated input for '-s/--sbd-device' option + """ + + @clean + Scenario: Setup sbd with init and join process(bsc#1170999) + Given Has disk "/dev/sda1" on "hanode1" + Given Cluster service is "stopped" on "hanode1" + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "external/sbd" is "Started" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + + @clean + Scenario: Re-setup cluster without sbd(bsc#1166967) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "stopped" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "stopped" on "hanode2" + And Resource "stonith:external/sbd" not configured + + @clean + Scenario: Configure diskless sbd(bsc#1181907) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -S -y" on "hanode1" + Then Expected "Diskless SBD requires cluster with three or more nodes." in stderr + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Expected "Diskless SBD requires cluster with three or more nodes." in stderr + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Expected "Diskless SBD requires cluster with three or more nodes." not in stderr + Then Cluster service is "started" on "hanode3" + And Service "sbd" is "started" on "hanode3" + And Resource "stonith:external/sbd" not configured + + @clean + Scenario: Configure multi disks sbd + Given Has disk "/dev/sda1" on "hanode1" + Given Has disk "/dev/sda2" on "hanode1" + Given Cluster service is "stopped" on "hanode1" + Given Has disk "/dev/sda1" on "hanode2" + Given Has disk "/dev/sda2" on "hanode2" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -s /dev/sda1 -s /dev/sda2 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "external/sbd" is "Started" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + + @clean + Scenario: Configure sbd in several stages(bsc#1175057) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init ssh -y" on "hanode1" + And Run "crm cluster init csync2 -y" on "hanode1" + And Run "crm cluster init corosync -y" on "hanode1" + And Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1" + And Run "crm cluster init cluster -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + When Run "crm cluster join ssh -y -c hanode1" on "hanode2" + And Run "crm cluster join csync2 -y -c hanode1" on "hanode2" + And Run "crm cluster join ssh_merge -y -c hanode1" on "hanode2" + And Run "crm cluster join cluster -y -c hanode1" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith-sbd" type "external/sbd" is "Started" + + @clean + Scenario: Configure diskless sbd in several stages(bsc#1175057) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init ssh -y" on "hanode1" + And Run "crm cluster init csync2 -y" on "hanode1" + And Run "crm cluster init corosync -y" on "hanode1" + And Run "crm cluster init sbd -S -y" on "hanode1" + And Run "crm cluster init cluster -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + When Run "crm cluster join ssh -y -c hanode1" on "hanode2" + And Run "crm cluster join csync2 -y -c hanode1" on "hanode2" + And Run "crm cluster join ssh_merge -y -c hanode1" on "hanode2" + And Run "crm cluster join cluster -y -c hanode1" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith:external/sbd" not configured + + @clean + Scenario: Configure sbd on running cluster via stage(bsc#1181906) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + When Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1" + Then Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith-sbd" type "external/sbd" is "Started" + + @clean + Scenario: Configure sbd on running cluster via stage with ra running(bsc#1181906) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + When Run "crm configure primitive d Dummy op monitor interval=3s" on "hanode1" + When Run "crm cluster init sbd -s /dev/sda1 -y" on "hanode1" + Then Expected "WARNING: To start sbd.service, need to restart cluster service manually on each node" in stderr + Then Service "sbd" is "stopped" on "hanode1" + And Service "sbd" is "stopped" on "hanode2" + When Run "crm cluster restart" on "hanode1" + Then Service "sbd" is "started" on "hanode1" + When Run "crm cluster restart" on "hanode2" + Then Service "sbd" is "started" on "hanode2" + When Run "sleep 20" on "hanode1" + Then Resource "stonith-sbd" type "external/sbd" is "Started" + + @clean + Scenario: Configure sbd when no watchdog device(bsc#1154927, bsc#1178869) + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Try "lsmod |grep softdog && rmmod softdog" on "hanode1" + And Try "lsmod |grep softdog && rmmod softdog" on "hanode2" + When Run "crm cluster init -s /dev/sda1 -w softdog -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + When Try "lsmod |grep softdog" + Then Expected return code is "0" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith-sbd" type "external/sbd" is "Started" + + @clean + Scenario: Setup sbd and test fence node + Given Has disk "/dev/sda1" on "hanode1" + Given Cluster service is "stopped" on "hanode1" + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "external/sbd" is "Started" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + When Run "stonith_admin -H hanode2 -c" on "hanode1" + When Run "crm -F node fence hanode2" on "hanode1" + Then Expected return code is "0" + Then Node "hanode2" is UNCLEAN + Then Wait "60" seconds for "hanode2" successfully fenced + + @skip_non_root + @clean + Scenario: Setup sbd and test fence node, use hacluster to fence + Given Has disk "/dev/sda1" on "hanode1" + Given Cluster service is "stopped" on "hanode1" + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "external/sbd" is "Started" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + When Run "stonith_admin -H hanode2 -c" on "hanode1" + When Run "su hacluster -c '/usr/sbin/crm -F node fence hanode2'" on "hanode1" + Then Expected return code is "0" + Then Node "hanode2" is UNCLEAN + Then Wait "60" seconds for "hanode2" successfully fenced + + @clean + Scenario: Change existing diskbased sbd cluster as diskless sbd + Given Has disk "/dev/sda1" on "hanode1" + Given Cluster service is "stopped" on "hanode1" + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "external/sbd" is "Started" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + And Run "ps -ef|grep -v grep|grep 'watcher: /dev/sda1 '" OK + + When Run "crm -F cluster init sbd -S -y" on "hanode1" + Then Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith:external/sbd" not configured + When Try "ps -ef|grep -v grep|grep 'watcher: /dev/sda1 '" + Then Expected return code is "1" + + @clean + Scenario: Change existing diskless sbd cluster as diskbased sbd + Given Has disk "/dev/sda1" on "hanode1" + Given Cluster service is "stopped" on "hanode1" + Given Has disk "/dev/sda1" on "hanode2" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -S -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith:external/sbd" not configured + + When Run "crm -F cluster init sbd -s /dev/sda1 -y" on "hanode1" + Then Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith-sbd" type "external/sbd" is "Started" + And Run "ps -ef|grep -v grep|grep 'watcher: /dev/sda1 '" OK + + @clean + Scenario: Change sbd device + Given Has disk "/dev/sda1" on "hanode1" + Given Has disk "/dev/sda2" on "hanode1" + Given Cluster service is "stopped" on "hanode1" + Given Has disk "/dev/sda1" on "hanode2" + Given Has disk "/dev/sda2" on "hanode2" + Given Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith-sbd" type "external/sbd" is "Started" + And Run "ps -ef|grep -v grep|grep 'watcher: /dev/sda1 '" OK + + When Run "crm -F cluster init sbd -s /dev/sda2 -y" on "hanode1" + Then Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith-sbd" type "external/sbd" is "Started" + And Run "ps -ef|grep -v grep|grep 'watcher: /dev/sda2 '" OK + When Try "ps -ef|grep -v grep|grep 'watcher: /dev/sda1 '" + Then Expected return code is "1" diff --git a/test/features/cluster_api.feature b/test/features/cluster_api.feature new file mode 100644 index 0000000..b8676be --- /dev/null +++ b/test/features/cluster_api.feature @@ -0,0 +1,143 @@ +@cluster_api +Feature: Functional test to cover SAP clusterAPI + + To avoid possible regression on crmsh side when adapting SAP Applications + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 + + Background: Setup a two nodes cluster + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Show cluster status on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + When Run "crm configure primitive d Dummy" on "hanode1" + And Wait "3" seconds + Then Resource "d" type "Dummy" is "Started" + And Show cluster status on "hanode1" + When Run "echo 'export PATH=$PATH:/usr/sbin/' > ~hacluster/.bashrc" on "hanode1" + When Run "echo 'export PATH=$PATH:/usr/sbin/' > ~hacluster/.bashrc" on "hanode2" + + @clean + Scenario: Start and stop resource by hacluster + When Run "su - hacluster -c 'crm resource stop d'" on "hanode1" + Then Expected return code is "0" + When Wait "3" seconds + Then Resource "d" type "Dummy" is "Stopped" + And Show cluster status on "hanode1" + When Run "su - hacluster -c 'crm resource start d'" on "hanode1" + Then Expected return code is "0" + When Wait "3" seconds + Then Resource "d" type "Dummy" is "Started" + And Show cluster status on "hanode1" + + @clean + Scenario: Resource move by hacluster + Given Resource "d" is started on "hanode1" + # move <res> <node> + When Run "su - hacluster -c 'crm resource move d hanode2'" on "hanode1" + Then Expected return code is "0" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode2" + When Run "su - hacluster -c 'crm resource clear d'" on "hanode1" + Then Expected return code is "0" + + # move <res> <node> force + When Run "su - hacluster -c 'crm resource move d hanode1'" on "hanode1" + Then Expected return code is "0" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode1" + When Run "su - hacluster -c 'crm resource clear d'" on "hanode1" + Then Expected return code is "0" + + # move <res> force + When Run "su - hacluster -c 'crm resource move d force'" on "hanode1" + Then Expected return code is "0" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode2" + When Run "su - hacluster -c 'crm resource clear d'" on "hanode1" + Then Expected return code is "0" + + # move <res> <lifetime> force + When Run "su - hacluster -c 'crm resource move d PT5M force'" on "hanode1" + Then Expected return code is "0" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode1" + When Run "su - hacluster -c 'crm resource clear d'" on "hanode1" + Then Expected return code is "0" + + # move <res> <node> <lifetime> + When Run "su - hacluster -c 'crm resource move d hanode2 PT5M'" on "hanode1" + Then Expected return code is "0" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode2" + When Run "su - hacluster -c 'crm resource clear d'" on "hanode1" + Then Expected return code is "0" + + # move <res> <node> <lifetime> force + When Run "su - hacluster -c 'crm resource move d hanode1 PT5M force'" on "hanode1" + Then Expected return code is "0" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode1" + When Run "su - hacluster -c 'crm resource clear d'" on "hanode1" + Then Expected return code is "0" + + When Try "crm resource move d hanode2 PT5M force xxx" + Then Except "ERROR: resource.move: usage: move <rsc> [<node>] [<lifetime>] [force]" + When Try "crm resource move d hanode2 PT5M forcd" + Then Except "ERROR: resource.move: usage: move <rsc> [<node>] [<lifetime>] [force]" + When Try "crm resource move d xxxx PT5M force" + Then Except "ERROR: resource.move: Not our node: xxxx" + When Try "crm resource move d" + Then Except "ERROR: resource.move: No target node: Move requires either a target node or 'force'" + + @clean + Scenario: Run "crm configure show" by hacluster + When Run "crm configure primitive d2 Dummy op monitor interval=10s timeout=20s on-fail=restart params fake=test meta resource-stickiness=5000" on "hanode1" + And Run "crm configure group g d2 meta resource-stickiness=3000" on "hanode1" + And Wait "3" seconds + Then Resource "d2" type "Dummy" is "Started" + And Show cluster status on "hanode1" + When Run "su - hacluster -c 'crm configure show'" on "hanode1" + Then Expected return code is "0" + And Expected multiple lines in output + """ + primitive d2 Dummy \ + params fake=test \ + meta resource-stickiness=5000 \ + op monitor interval=10s timeout=20s on-fail=restart \ + op start timeout=20s interval=0s \ + op stop timeout=20s interval=0s + group g d2 \ + meta resource-stickiness=3000 + """ + + @clean + Scenario: pacemaker ACL related operations by hacluster + When Run "su - hacluster -c 'crm configure primitive d2 Dummy'" on "hanode1" + And Wait "3" seconds + Then Resource "d2" type "Dummy" is "Started" + When Run "su - hacluster -c 'crm maintenance on'" on "hanode1" + When Run "crm_mon -1" on "hanode1" + Then Expected "Resource management is DISABLED" in stdout + When Run "su - hacluster -c 'crm maintenance off'" on "hanode1" + When Run "crm_mon -1" on "hanode1" + Then Expected "Resource management is DISABLED" not in stdout + When Run "su - hacluster -c 'crm node standby hanode2'" on "hanode1" + Then Node "hanode2" is standby + When Run "su - hacluster -c 'crm node online hanode2'" on "hanode1" + Then Node "hanode2" is online + When Run "su - hacluster -c 'crm ra providers Dummy'" on "hanode1" + Then Expected "heartbeat pacemaker" in stdout + When Run "su - hacluster -c 'crm status'" on "hanode1" + Then Expected "Online: [ hanode1 hanode2 ]" in stdout + When Run "su - hacluster -c '/usr/sbin/crm report /tmp/report'" on "hanode1" + Then No crmsh tracebacks + Then File "/tmp/report.tar.bz2" exists on "hanode1" + And Directory "hanode1" in "/tmp/report.tar.bz2" + And Directory "hanode2" in "/tmp/report.tar.bz2" + And File "pacemaker.log" in "/tmp/report.tar.bz2" + And File "corosync.conf" in "/tmp/report.tar.bz2" diff --git a/test/features/configure_bugs.feature b/test/features/configure_bugs.feature new file mode 100644 index 0000000..7b1222d --- /dev/null +++ b/test/features/configure_bugs.feature @@ -0,0 +1,38 @@ +@configure +Feature: Functional test for configure sub level + + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 + + @clean + Scenario: Replace sensitive data by default(bsc#1163581) + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + + # mask password by default + When Run "crm node utilization hanode1 set password=qwertyui" on "hanode1" + When Try "crm configure show|grep password|grep qwertyui" + Then Expected return code is "1" + When Run "crm node utilization hanode2 set password testingpass" on "hanode1" + When Try "crm configure show|grep password|grep testingpass" + Then Expected return code is "1" + And Show crm configure + + # mask password and ip address + When Run "crm configure primitive ip2 IPaddr2 params ip=@vip.0" on "hanode1" + And Run "sed -i 's/; \[core\]/[core]/' /etc/crm/crm.conf" on "hanode1" + And Run "sed -i 's/; obscure_pattern = .*$/obscure_pattern = passw*|ip/g' /etc/crm/crm.conf" on "hanode1" + And Try "crm configure show|grep -E "@vip.0|qwertyui"" + Then Expected return code is "1" + And Show crm configure + + # mask password and ip address with another pattern + When Run "sed -i 's/obscure_pattern = .*$/obscure_pattern = passw* ip/g' /etc/crm/crm.conf" on "hanode1" + And Try "crm configure show|grep -E "@vip.0|qwertyui"" + Then Expected return code is "1" + And Show crm configure diff --git a/test/features/constraints_bugs.feature b/test/features/constraints_bugs.feature new file mode 100644 index 0000000..c1174d5 --- /dev/null +++ b/test/features/constraints_bugs.feature @@ -0,0 +1,24 @@ +@constraints +Feature: Verify constraints(order/colocation/location) bug + + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 + + Background: Setup a two nodes cluster + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Show cluster status on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Show cluster status on "hanode1" + + @clean + Scenario: Convert score to kind for rsc_order(bsc#1122391) + When Run "crm configure primitive d1 Dummy op monitor interval=10s" on "hanode1" + And Run "crm configure primitive d2 Dummy op monitor interval=10s" on "hanode1" + And Run "crm configure order o1 100: d1 d2" on "hanode1" + When Run "crm configure show" on "hanode1" + Then Expected "order o1 Mandatory: d1 d2" in stdout diff --git a/test/features/coveragerc b/test/features/coveragerc new file mode 100644 index 0000000..cb0403e --- /dev/null +++ b/test/features/coveragerc @@ -0,0 +1,4 @@ +[run] +data_file = /.coverage +parallel = True +source_pkgs = crmsh diff --git a/test/features/crm_report_bugs.feature b/test/features/crm_report_bugs.feature new file mode 100644 index 0000000..58d158b --- /dev/null +++ b/test/features/crm_report_bugs.feature @@ -0,0 +1,164 @@ +@crm_report +Feature: crm report functional test for verifying bugs + + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 hanode3 + + Background: Setup a two nodes cluster + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Show cluster status on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Show cluster status on "hanode1" + + @clean + Scenario: Verify crm report handle files contain non-utf-8 characters (bsc#1130715) + When Run "echo 'abc#$%%^' | iconv -f UTF-8 -t UTF-16 > /opt/text_non_utf8" on "hanode1" + Then This file "/opt/text_non_utf8" will trigger UnicodeDecodeError exception + When Run "crm report -E /opt/text_non_utf8 report1" on "hanode1" + Then File "text_non_utf8" in "report1.tar.bz2" + When Run "rm -f report1.tar.bz2" on "hanode1" + + @clean + Scenario: Compressed file ended before the end-of-stream marker was reached (bsc#1206606) + When Run "touch /var/log/pacemaker/pacemaker.log-20221220.xz" on "hanode1" + When Try "crm report report1" on "hanode1" + Then File "pacemaker.log" in "report1.tar.bz2" + And Expected "When reading file "/var/log/pacemaker/pacemaker.log-20221220.xz": Compressed file ended before the end-of-stream marker was reached" in stderr + When Run "rm -f report1.tar.bz2" on "hanode1" + + @clean + Scenario: Include archived logs(bsc#1148873) + # For syslog + When Write multi lines to file "/var/log/log1" on "hanode1" + """ + Sep 08 08:36:34 node1 log message line1 + Sep 08 08:37:01 node1 log message line2 + Sep 08 08:37:02 node1 log message line3 + """ + And Run "xz /var/log/log1" on "hanode1" + # bsc#1218491, unable to gather log files that are in the syslog format + And Run "touch -m -t 202201010000 /var/log/log1.xz" on "hanode1" + When Write multi lines to file "/var/log/log1" on "hanode1" + """ + Sep 08 09:37:02 node1 log message line4 + Sep 08 09:37:12 node1 log message line5 + """ + # bsc#1218491, unable to gather log files that are in the syslog format + And Run "touch -m -t 202201010001 /var/log/log1" on "hanode1" + And Run "crm report -f 20200901 -E /var/log/log1 report1" on "hanode1" + Then File "log1" in "report1.tar.bz2" + When Run "tar jxf report1.tar.bz2" on "hanode1" + And Run "cat report1/hanode1/log1" on "hanode1" + Then Expected multiple lines in output + """ + Sep 08 08:36:34 node1 log message line1 + Sep 08 08:37:01 node1 log message line2 + Sep 08 08:37:02 node1 log message line3 + Sep 08 09:37:02 node1 log message line4 + Sep 08 09:37:12 node1 log message line5 + """ + When Run "rm -rf report1.tar.gz report1" on "hanode1" + + # For rfc5424 + When Write multi lines to file "/var/log/log2" on "hanode1" + """ + 2022-09-08T14:24:36.003Z mymachine.example.com myapp - ID47 + 2022-09-08T14:25:15.003Z mymachine.example.com myapp - ID48 + 2022-09-08T14:26:15.003Z mymachine.example.com myapp - ID49 + """ + And Run "xz /var/log/log2" on "hanode1" + When Write multi lines to file "/var/log/log2" on "hanode1" + """ + 2022-09-08T14:27:15.003Z mymachine.example.com myapp - ID50 + 2022-09-08T14:28:15.003Z mymachine.example.com myapp - ID51 + """ + And Run "crm report -f 20200901 -E /var/log/log2 report1" on "hanode1" + Then File "log2" in "report1.tar.bz2" + When Run "tar jxf report1.tar.bz2" on "hanode1" + And Run "cat report1/hanode1/log2" on "hanode1" + Then Expected multiple lines in output + """ + 2022-09-08T14:24:36.003Z mymachine.example.com myapp - ID47 + 2022-09-08T14:25:15.003Z mymachine.example.com myapp - ID48 + 2022-09-08T14:26:15.003Z mymachine.example.com myapp - ID49 + 2022-09-08T14:27:15.003Z mymachine.example.com myapp - ID50 + 2022-09-08T14:28:15.003Z mymachine.example.com myapp - ID51 + """ + When Run "rm -rf report1.tar.gz report1" on "hanode1" + + @clean + Scenario: Collect corosync.log(bsc#1148874) + When Run "sed -i 's/\(\s*to_logfile:\s*\).*/\1no/' /etc/corosync/corosync.conf" on "hanode1" + When Run "sed -i 's/\(\s*to_logfile:\s*\).*/\1no/' /etc/corosync/corosync.conf" on "hanode2" + And Run "corosync-cfgtool -R" on "hanode1" + And Run "rm -f /var/log/cluster/corosync.log" on "hanode1" + And Run "rm -f /var/log/cluster/corosync.log" on "hanode2" + And Run "crm cluster stop --all" on "hanode1" + And Run "crm cluster start --all" on "hanode1" + And Run "sleep 15" on "hanode1" + + And Run "crm report report" on "hanode1" + And Run "tar jxf report.tar.bz2" on "hanode1" + Then File "corosync.log" not in "report.tar.bz2" + When Run "rm -rf report.tar.gz report" on "hanode1" + + When Run "sed -i 's/\(\s*to_logfile:\s*\).*/\1yes/' /etc/corosync/corosync.conf" on "hanode1" + When Run "sed -i 's/\(\s*to_logfile:\s*\).*/\1yes/' /etc/corosync/corosync.conf" on "hanode2" + And Run "crm cluster stop --all" on "hanode1" + And Run "crm cluster start --all" on "hanode1" + And Run "sleep 15" on "hanode1" + + And Run "crm report report" on "hanode1" + And Run "tar jxf report.tar.bz2" on "hanode1" + Then File "corosync.log" in "report.tar.bz2" + When Run "rm -rf report.tar.bz2 report" on "hanode1" + + @clean + Scenario: Replace sensitive data(bsc#1163581) + # Set sensitive data TEL and password + When Run "crm node utilization hanode1 set TEL 13356789876" on "hanode1" + When Run "crm node utilization hanode1 set password qwertyui" on "hanode1" + When Run "crm report report" on "hanode1" + When Run "tar jxf report.tar.bz2" on "hanode1" + And Try "grep -R 'qwertyui' report" + # crm report mask passw.* by default + # No password here + Then Expected return code is "1" + When Run "rm -rf report.tar.bz2 report" on "hanode1" + + # mask password and ip address by using crm.conf + When Run "crm configure primitive ip2 IPaddr2 params ip=@vip.0" on "hanode1" + And Run "sed -i 's/; \[report\]/[report]/' /etc/crm/crm.conf" on "hanode1" + And Run "sed -i 's/; sanitize_rule = .*$/sanitize_rule = passw.*|ip.*:raw/g' /etc/crm/crm.conf" on "hanode1" + And Run "crm report report" on "hanode1" + And Run "tar jxf report.tar.bz2" on "hanode1" + And Try "grep -R -E '@vip.0|qwertyui' report" + # No password here + Then Expected return code is "1" + When Run "rm -rf report.tar.bz2 report" on "hanode1" + + # Do sanitize job, also for TEL + When Run "crm report -s -p TEL report" on "hanode1" + When Run "tar jxf report.tar.bz2" on "hanode1" + And Try "grep -R 'qwertyui' report" + # No password here + Then Expected return code is "1" + When Try "grep -R '13356789876' report" + # No TEL number here + Then Expected return code is "1" + When Run "rm -rf report.tar.bz2 report" on "hanode1" + + # disable sanitize + When Run "sed -i 's/; \[report\]/[report]/' /etc/crm/crm.conf" on "hanode1" + And Run "sed -i 's/sanitize_rule = .*$/sanitize_rule = /g' /etc/crm/crm.conf" on "hanode1" + When Run "crm report report" on "hanode1" + When Run "tar jxf report.tar.bz2" on "hanode1" + And Try "grep -R 'qwertyui' report" + # found password + Then Expected return code is "0" + When Run "rm -rf report.tar.bz2 report" on "hanode1" diff --git a/test/features/crm_report_normal.feature b/test/features/crm_report_normal.feature new file mode 100644 index 0000000..00a1f2b --- /dev/null +++ b/test/features/crm_report_normal.feature @@ -0,0 +1,109 @@ +@crm_report +Feature: crm report functional test for common cases + + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 hanode3 + + Background: Setup a two nodes cluster + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Show cluster status on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Show cluster status on "hanode1" + + @clean + Scenario: crm report collect trace ra log + When Run "crm configure primitive d Dummy" on "hanode1" + And Run "crm configure primitive d2 Dummy" on "hanode1" + Then Resource "d" is started on "hanode1" + And Resource "d2" is started on "hanode2" + When Run "crm resource trace d monitor" on "hanode1" + Then Expected "Trace for d:monitor is written to /var/lib/heartbeat/trace_ra/Dummy" in stdout + When Wait "10" seconds + And Run "crm resource untrace d" on "hanode1" + And Run "crm resource trace d2 monitor /trace_d" on "hanode1" + Then Expected "Trace for d2:monitor is written to /trace_d/Dummy" in stdout + When Wait "10" seconds + And Run "crm resource untrace d2" on "hanode1" + And Run "crm report report" on "hanode1" + Then No crmsh tracebacks + Then Directory "trace_ra" in "report.tar.bz2" + And Directory "trace_d" in "report.tar.bz2" + When Run "rm -rf report.tar.bz2 report" on "hanode1" + + @clean + Scenario: Run history and script + When Run "crm history info" on "hanode1" + When Run "crm history refresh" on "hanode1" + When Try "crm history peinputs|grep "pengine/pe-input-0"" + Then Expected return code is "0" + When Try "crm history info|grep "Nodes: hanode1 hanode2"" + Then Expected return code is "0" + When Run "crm configure primitive d100 Dummy" on "hanode1" + When Run "crm history refresh force" on "hanode1" + When Try "crm history info|grep "Resources: d100"" + Then Expected return code is "0" + Given Cluster service is "stopped" on "hanode3" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + When Run "crm history refresh force" on "hanode1" + When Try "crm history info|grep "Nodes: hanode1 hanode2 hanode3"" + Then Expected return code is "0" + When Run "crm script run health" on "hanode1" + When Run "crm script run virtual-ip id=vip_x ip=@vip.0" on "hanode1" + Then Resource "vip_x" type "IPaddr2" is "Started" + + @clean + Scenario: Common tests + When Run "crm report -h" on "hanode1" + + When Try "crm report "*s"" on "hanode1" + Then Expected "*s is invalid file name" in stderr + + When Try "crm report /fsf/report" on "hanode1" + Then Expected "Directory /fsf does not exist" in stderr + + When Try "crm report -n fs" on "hanode1" + Then Expected "host "fs" is unreachable:" in stderr + + When Try "crm report -f xxxx" on "hanode1" + Then Expected "Invalid time string 'xxxx'" in stderr + + When Try "crm report -f 1d -t 2d" on "hanode1" + Then Expected "The start time must be before the finish time" in stderr + + When Run "crm -d report -S -d /tmp/report" on "hanode1" + Then Directory "/tmp/report/hanode1" created + Then Directory "/tmp/report/hanode2" not created + When Run "rm -rf /tmp/report" on "hanode1" + + When Run "crm report -vv" on "hanode1" + Then Default crm_report tar file created + When Remove default crm_report tar file + + When Run "crm report -d /tmp/report" on "hanode1" + Then Directory "/tmp/report" created + When Try "crm report -d /tmp/report" on "hanode1" + Then Expected "Destination directory /tmp/report exists, please cleanup or use -Z option" in stderr + When Run "crm report -d -Z /tmp/report" on "hanode1" + Then Directory "/tmp/report" created + + When Run "mv /etc/corosync/corosync.conf /etc/corosync/corosync.bak" on "hanode1" + When Try "crm report" on "hanode1" + Then Expected "File /etc/corosync/corosync.conf does not exist" in stderr + When Run "mv /etc/corosync/corosync.bak /etc/corosync/corosync.conf" on "hanode1" + + When Run "mv /var/lib/pacemaker/pengine /var/lib/pacemaker/pengine_bak" on "hanode1" + When Try "crm report" on "hanode1" + Then Expected "Cannot find PE directory" in stderr + When Run "mv /var/lib/pacemaker/pengine_bak /var/lib/pacemaker/pengine" on "hanode1" + + When Run "crm cluster stop --all" on "hanode1" + When Run "rm -f /var/lib/pacemaker/cib/cib*" on "hanode1" + When Run "rm -f /var/lib/pacemaker/cib/cib*" on "hanode2" + When Try "crm report" on "hanode1" + Then Expected "Could not figure out a list of nodes; is this a cluster node" in stderr diff --git a/test/features/environment.py b/test/features/environment.py new file mode 100644 index 0000000..61d2ac2 --- /dev/null +++ b/test/features/environment.py @@ -0,0 +1,53 @@ +import logging +import re +import subprocess +import time + +import crmsh.userdir +import crmsh.utils +from crmsh.sh import ShellUtils + + +def get_online_nodes(): + _, out, _ = ShellUtils().get_stdout_stderr('sudo crm_node -l') + if out: + return re.findall(r'[0-9]+ (.*) member', out) + else: + return None + + +def resource_cleanup(): + subprocess.run( + ['sudo', 'crm', 'resource', 'cleanup'], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + +def before_step(context, step): + context.logger = logging.getLogger("Step:{}".format(step.name)) + + +def before_tag(context, tag): + # tag @clean means need to stop cluster service + if tag == "clean": + time.sleep(3) + online_nodes = get_online_nodes() + if online_nodes: + resource_cleanup() + while True: + time.sleep(1) + rc, stdout, _ = ShellUtils().get_stdout_stderr('sudo crmadmin -D -t 1') + if rc == 0 and stdout.startswith('Designated'): + break + subprocess.call( + ['sudo', 'crm', 'cluster', 'stop', '--all'], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + if tag == "skip_non_root": + sudoer = crmsh.userdir.get_sudoer() + if sudoer or crmsh.userdir.getuser() != 'root': + context.scenario.skip() diff --git a/test/features/geo_setup.feature b/test/features/geo_setup.feature new file mode 100644 index 0000000..b26b04e --- /dev/null +++ b/test/features/geo_setup.feature @@ -0,0 +1,29 @@ +@geo +Feature: geo cluster + + Test geo cluster setup using bootstrap + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 hanode3 + + @clean + Scenario: GEO cluster setup + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "crm cluster init -y -n cluster1" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm configure primitive vip IPaddr2 params ip=@vip.0" on "hanode1" + + When Run "crm cluster init -y -n cluster2" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "crm configure primitive vip IPaddr2 params ip=@vip.1" on "hanode2" + + When Run "crm cluster geo_init -y --clusters "cluster1=@vip.0 cluster2=@vip.1" --tickets tickets-geo --arbitrator hanode3" on "hanode1" + When Run "crm cluster geo_join -y --cluster-node hanode1 --clusters "cluster1=@vip.0 cluster2=@vip.1"" on "hanode2" + + Given Service "booth@booth" is "stopped" on "hanode3" + When Run "crm cluster geo_init_arbitrator -y --cluster-node hanode1" on "hanode3" + Then Service "booth@booth" is "started" on "hanode3" + When Run "crm resource start g-booth" on "hanode1" + Then Show cluster status on "hanode1" + When Run "crm resource start g-booth" on "hanode2" + Then Show cluster status on "hanode2" diff --git a/test/features/healthcheck.feature b/test/features/healthcheck.feature new file mode 100644 index 0000000..da7f78a --- /dev/null +++ b/test/features/healthcheck.feature @@ -0,0 +1,37 @@ +@healthcheck +Feature: healthcheck detect and fix problems in a crmsh deployment + + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 hanode3 + + Background: Setup a two nodes cluster + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + And Cluster service is "stopped" on "hanode3" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Show cluster status on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Show cluster status on "hanode1" + + @clean + Scenario: a new node joins when directory ~hacluster/.ssh is removed from cluster + When Run "rm -rf ~hacluster/.ssh" on "hanode1" + And Run "rm -rf ~hacluster/.ssh" on "hanode2" + And Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + # FIXME: new join implement does not trigger a exception any longer, and the auto fix is not applied + # And File "~hacluster/.ssh/id_rsa" exists on "hanode1" + # And File "~hacluster/.ssh/id_rsa" exists on "hanode2" + # And File "~hacluster/.ssh/id_rsa" exists on "hanode3" + + # skip non-root as behave_agent is not able to run commands interactively with non-root sudoer + @skip_non_root + @clean + Scenario: An upgrade_seq file in ~hacluster/crmsh/ will be migrated to /var/lib/crmsh (bsc#1213050) + When Run "mv /var/lib/crmsh ~hacluster/" on "hanode1" + Then File "~hacluster/crmsh/upgrade_seq" exists on "hanode1" + When Run "crm cluster status" on "hanode1" + Then File "/var/lib/crmsh/upgrade_seq" exists on "hanode1" diff --git a/test/features/ocfs2.feature b/test/features/ocfs2.feature new file mode 100644 index 0000000..29b4b1a --- /dev/null +++ b/test/features/ocfs2.feature @@ -0,0 +1,61 @@ +@ocfs2 +Feature: OCFS2 configuration/verify using bootstrap + +@clean +Scenario: Configure ocfs2 along with init process + Given Has disk "/dev/sda1" on "hanode1" + And Has disk "/dev/sda2" on "hanode1" + When Run "crm cluster init -s /dev/sda1 -o /dev/sda2 -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "external/sbd" is "Started" + And Resource "ocfs2-dlm" type "pacemaker:controld" is "Started" + And Resource "ocfs2-clusterfs" type "heartbeat:Filesystem" is "Started" + +@clean +Scenario: Configure cluster lvm2 + ocfs2 with init process + Given Has disk "/dev/sda1" on "hanode1" + And Has disk "/dev/sda2" on "hanode1" + And Has disk "/dev/sda3" on "hanode1" + When Run "crm cluster init -s /dev/sda1 -o /dev/sda2 -o /dev/sda3 -C -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "sbd" is "started" on "hanode1" + And Resource "stonith-sbd" type "external/sbd" is "Started" + And Resource "ocfs2-dlm" type "pacemaker:controld" is "Started" + And Resource "ocfs2-lvmlockd" type "heartbeat:lvmlockd" is "Started" + And Resource "ocfs2-lvmactivate" type "heartbeat:LVM-activate" is "Started" + And Resource "ocfs2-clusterfs" type "heartbeat:Filesystem" is "Started" + +@clean +Scenario: Add ocfs2 alone on a running cluster + Given Has disk "/dev/sda1" on "hanode1" + And Has disk "/dev/sda2" on "hanode1" + And Has disk "/dev/sda1" on "hanode2" + And Has disk "/dev/sda2" on "hanode2" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + And Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Online nodes are "hanode1 hanode2" + And Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith-sbd" type "external/sbd" is "Started" + When Run "crm cluster init ocfs2 -o /dev/sda2 -y" on "hanode1" + Then Resource "ocfs2-dlm" type "pacemaker:controld" is "Started" + And Resource "ocfs2-clusterfs" type "heartbeat:Filesystem" is "Started" + +@clean +Scenario: Add cluster lvm2 + ocfs2 on a running cluster + Given Has disk "/dev/sda1" on "hanode1" + And Has disk "/dev/sda2" on "hanode1" + And Has disk "/dev/sda1" on "hanode2" + And Has disk "/dev/sda2" on "hanode2" + When Run "crm cluster init -s /dev/sda1 -y" on "hanode1" + And Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Online nodes are "hanode1 hanode2" + And Service "sbd" is "started" on "hanode1" + And Service "sbd" is "started" on "hanode2" + And Resource "stonith-sbd" type "external/sbd" is "Started" + When Run "crm cluster init ocfs2 -o /dev/sda2 -C -y" on "hanode1" + Then Resource "ocfs2-dlm" type "pacemaker:controld" is "Started" + And Resource "ocfs2-lvmlockd" type "heartbeat:lvmlockd" is "Started" + And Resource "ocfs2-lvmactivate" type "heartbeat:LVM-activate" is "Started" + And Resource "ocfs2-clusterfs" type "heartbeat:Filesystem" is "Started" diff --git a/test/features/qdevice_options.feature b/test/features/qdevice_options.feature new file mode 100644 index 0000000..e0277a7 --- /dev/null +++ b/test/features/qdevice_options.feature @@ -0,0 +1,50 @@ +@qdevice +Feature: corosync qdevice/qnetd options + + Test corosync qdevice/qnetd options: + "--qdevice-algo": QNetd decision ALGORITHM(ffsplit/lms, default:ffsplit) + "--qdevice-ti-breaker": QNetd TIE_BREAKER(lowest/highest/valid_node_id, default:lowest) + "--qdevice-tls": Whether using TLS on QDevice/QNetd(on/off/required, default:on) + "--qdevice-heuristics": COMMAND to run with absolute path. For multiple commands, use ";" to separate + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 qnetd-node + + @clean + Scenario: Use "--qdevice-algo" to change qnetd decision algorithm to "lms" + Given Cluster service is "stopped" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode1" + When Run "crm cluster init --qnetd-hostname=qnetd-node --qdevice-algo=lms -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Service "corosync-qdevice" is "started" on "hanode2" + And Show corosync qdevice configuration + + @clean + Scenario: Use "--qdevice-tie-breaker" to change qnetd tie_breaker to "highest" + Given Cluster service is "stopped" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode1" + When Run "crm cluster init --qnetd-hostname=qnetd-node --qdevice-tie-breaker=highest -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode1" + And Show corosync qdevice configuration + + @clean + Scenario: Use "--qdevice-tls" to turn off TLS certification + Given Cluster service is "stopped" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode1" + When Run "crm cluster init --qnetd-hostname=qnetd-node --qdevice-tls=off -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode1" + And Show corosync qdevice configuration + + @clean + Scenario: Use "--qdevice-heuristics" to configure heuristics + Given Cluster service is "stopped" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode1" + When Run "crm cluster init --qnetd-hostname=qnetd-node --qdevice-heuristics='/usr/bin/test -f /tmp/file_exists;/usr/bin/which pacemaker' -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode1" + And Show corosync qdevice configuration diff --git a/test/features/qdevice_setup_remove.feature b/test/features/qdevice_setup_remove.feature new file mode 100644 index 0000000..df7af3d --- /dev/null +++ b/test/features/qdevice_setup_remove.feature @@ -0,0 +1,173 @@ +@qdevice +Feature: corosync qdevice/qnetd setup/remove process + + Test corosync qdevice/qnetd setup/remove process + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 hanode3 hanode4 qnetd-node + + Background: Cluster and qdevice service are stopped + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + And Service "corosync-qdevice" is "stopped" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode2" + + @clean + Scenario: Setup qdevice/qnetd during init/join process + When Run "crm cluster init --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + # for bsc#1181415 + Then Expected "Restarting cluster service" in stdout + And Service "corosync-qdevice" is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "corosync-qnetd" is "started" on "qnetd-node" + And Show status from qnetd + And Show corosync qdevice configuration + And Show qdevice status + + @clean + Scenario: Setup qdevice/qnetd on running cluster + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Service "corosync-qdevice" is "stopped" on "hanode2" + When Write multi lines to file "/etc/corosync/corosync.conf" on "hanode1" + """ + # This is a test for bsc#1166684 + + """ + When Write multi lines to file "/etc/corosync/corosync.conf" on "hanode2" + """ + # This is a test for bsc#1166684 + + """ + When Run "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode1" + # for bsc#1181415 + Then Expected "Starting corosync-qdevice.service in cluster" in stdout + Then Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "corosync-qnetd" is "started" on "qnetd-node" + And Show status from qnetd + And Show corosync qdevice configuration + + @clean + Scenario: Remove qdevice from a two nodes cluster + When Run "crm cluster init --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Service "corosync-qdevice" is "started" on "hanode2" + And Show corosync qdevice configuration + When Run "crm cluster remove --qdevice -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Cluster service is "started" on "hanode2" + And Service "corosync-qdevice" is "stopped" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode2" + And Show corosync qdevice configuration + + @clean + Scenario: Setup qdevice on multi nodes + When Run "crm cluster init --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Service "corosync-qdevice" is "started" on "hanode2" + And Expected votes will be "3" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + And Online nodes are "hanode1 hanode2 hanode3" + And Service "corosync-qdevice" is "started" on "hanode3" + And Expected votes will be "4" + When Run "crm cluster join -c hanode1 -y" on "hanode4" + Then Cluster service is "started" on "hanode4" + And Online nodes are "hanode1 hanode2 hanode3 hanode4" + And Service "corosync-qdevice" is "started" on "hanode4" + And Expected votes will be "5" + And Show corosync qdevice configuration + And Show status from qnetd + + @clean + Scenario: Setup qdevice on multi nodes existing cluster + When Run "crm cluster init -u -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + When Run "crm cluster join -c hanode1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + And Online nodes are "hanode1 hanode2 hanode3" + When Run "crm cluster join -c hanode1 -y" on "hanode4" + Then Cluster service is "started" on "hanode4" + And Online nodes are "hanode1 hanode2 hanode3 hanode4" + And Expected votes will be "4" + When Run "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Show corosync qdevice configuration + And Expected votes will be "5" + And Service "corosync-qdevice" is "started" on "hanode4" + And Service "corosync-qdevice" is "started" on "hanode3" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "corosync-qdevice" is "started" on "hanode1" + And Show status from qnetd + + @clean + Scenario: Setup qdevice using IPv6 + When Run "crm cluster init -u -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + When Run "crm cluster init qdevice --qnetd-hostname @qnetd-node.ip6.0 -y" on "hanode1" + Then Show corosync qdevice configuration + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "corosync-qdevice" is "started" on "hanode1" + And Show status from qnetd + + @skip_non_root + @clean + Scenario: Passwordless for root, not for sudoer (bsc#1209193) + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "useradd -m -s /bin/bash xin" on "hanode1" + When Run "echo "xin ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/xin" on "hanode1" + When Run "rm -f /root/.config/crm/crm.conf" on "hanode1" + When Run "useradd -m -s /bin/bash xin" on "hanode2" + When Run "echo "xin ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/xin" on "hanode2" + When Run "rm -f /root/.config/crm/crm.conf" on "hanode2" + When Run "su xin -c "sudo crm cluster init qdevice --qnetd-hostname=qnetd-node -y"" on "hanode1" + Then Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + + @skip_non_root + @clean + Scenario: Missing crm/crm.conf (bsc#1209193) + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "rm -f /root/.config/crm/crm.conf" on "hanode1" + When Run "rm -f /root/.config/crm/crm.conf" on "hanode2" + When Run "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + + @clean + Scenario: One qnetd for multi cluster, add in parallel + When Run "crm cluster init -n cluster1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "crm cluster init -n cluster2 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + When Run "crm cluster init qdevice --qnetd-hostname qnetd-node -y" on "hanode2,hanode3" + Then Service "corosync-qdevice" is "started" on "hanode2" + And Service "corosync-qdevice" is "started" on "hanode3" + And Service "corosync-qnetd" is "started" on "qnetd-node" diff --git a/test/features/qdevice_usercase.feature b/test/features/qdevice_usercase.feature new file mode 100644 index 0000000..c35d2cb --- /dev/null +++ b/test/features/qdevice_usercase.feature @@ -0,0 +1,87 @@ +@qdevice +Feature: Verify usercase master survive when split-brain + + Steps to setup a two-nodes cluster with heuristics qdevice, + started with a promotable clone resource, and make sure master side always with quorum: + 1. Setup a two-nodes cluster + 2. Generate script to check whether this node is master + 3. Add a promotable clone resource + 4. Setup qdevice with heuristics + 5. Use iptables command to simulate split-brain + 6. Check whether hanode1 has quorum, while hanode2 doesn't + + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 qnetd-node + + Background: Cluster and qdevice service are stopped + Given Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + And Service "corosync-qdevice" is "stopped" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode2" + + @clean + Scenario: Setup qdevice with heuristics + When Run "crm cluster init -y --qnetd-hostname=qnetd-node --qdevice-heuristics="/usr/bin/test -f /tmp/heuristics.txt" --qdevice-heuristics-mode="on"" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + And Online nodes are "hanode1 hanode2" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "corosync-qnetd" is "started" on "qnetd-node" + And Show corosync qdevice configuration + When Run "crm corosync status qnetd" on "hanode1" + Then Expected regrex "Heuristics:\s+Fail" in stdout + When Run "touch /tmp/heuristics.txt" on "hanode1" + When Run "sleep 30" on "hanode1" + When Run "crm corosync status qnetd" on "hanode1" + Then Expected regrex "Heuristics:\s+Pass" in stdout + + @clean + Scenario: Master survive when split-brain + # Setup a two-nodes cluster + When Run "crm cluster init -y -i eth0" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y -i eth0" on "hanode2" + Then Cluster service is "started" on "hanode2" + + # Generate script to check whether this node is master + When Write multi lines to file "/etc/corosync/qdevice/check_master.sh" on "hanode1" + """ + #!/usr/bin/sh + crm_resource --locate -r promotable-1 2>&1 | grep -E "Master|Promoted" | grep `crm_node -n` >/dev/null 2>&1 + """ + And Run "chmod +x /etc/corosync/qdevice/check_master.sh" on "hanode1" + When Write multi lines to file "/etc/corosync/qdevice/check_master.sh" on "hanode2" + """ + #!/usr/bin/sh + crm_resource --locate -r promotable-1 2>&1 | grep -E "Master|Promoted" | grep `crm_node -n` >/dev/null 2>&1 + """ + And Run "chmod +x /etc/corosync/qdevice/check_master.sh" on "hanode2" + # Add a promotable clone resource and make sure hanode1 is master + And Run "crm configure primitive stateful-1 ocf:pacemaker:Stateful op monitor role=Promoted interval=10s op monitor role=Unpromoted interval=5s" on "hanode1" + And Run "crm configure clone promotable-1 stateful-1 meta promotable=true" on "hanode1" + And Run "sleep 5" on "hanode1" + Then Show cluster status on "hanode1" + + # Setup qdevice with heuristics + When Run "crm cluster init qdevice --qnetd-hostname=qnetd-node --qdevice-heuristics=/etc/corosync/qdevice/check_master.sh -y" on "hanode1" + Then Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + When Run "sleep 5" on "hanode1" + Then Show status from qnetd + When Run "corosync-quorumtool -s" on "hanode1" + Then Expected "Quorate: Yes" in stdout + # Use iptables command to simulate split-brain + When Run "iptables -I INPUT -s @hanode2.ip.default -j DROP; sudo iptables -I OUTPUT -d @hanode2.ip.default -j DROP" on "hanode1" + And Run "iptables -I INPUT -s @hanode1.ip.default -j DROP; sudo iptables -I OUTPUT -d @hanode1.ip.default -j DROP" on "hanode2" + # Check whether hanode1 has quorum, while hanode2 doesn't + And Run "sleep 20" on "hanode1" + When Run "crm corosync status quorum" on "hanode1" + Then Expected "Quorate: Yes" in stdout + When Run "crm corosync status quorum" on "hanode2" + Then Expected "Quorate: No" in stdout + And Show cluster status on "hanode1" + And Show cluster status on "hanode2" + When Try "crm corosync status fs" on "hanode1" + Then Expected "Wrong type "fs" to query status" in stderr diff --git a/test/features/qdevice_validate.feature b/test/features/qdevice_validate.feature new file mode 100644 index 0000000..5403a52 --- /dev/null +++ b/test/features/qdevice_validate.feature @@ -0,0 +1,161 @@ +@qdevice +Feature: corosync qdevice/qnetd options validate + + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 hanode3 qnetd-node node-without-ssh + + @clean + Scenario: Option "--qnetd-hostname" use the same node + When Try "crm cluster init --qnetd-hostname=hanode1" + Then Except "ERROR: cluster.init: host for qnetd must be a remote one" + + @clean + Scenario: Option "--qnetd-hostname" use hanode1's IP + When Try "crm cluster init --qnetd-hostname=@hanode1.ip.0" + Then Except "ERROR: cluster.init: host for qnetd must be a remote one" + + @clean + Scenario: Option "--qnetd-hostname" use unknown hostname + When Try "crm cluster init --qnetd-hostname=error-node" + Then Except "ERROR: cluster.init: host "error-node" is unreachable" + + @clean + Scenario: Service ssh on qnetd node not available + When Run "systemctl stop sshd.service" on "node-without-ssh" + When Try "crm cluster init --qnetd-hostname=node-without-ssh" + Then Except "ERROR: cluster.init: ssh service on "node-without-ssh" not available" + + @clean + Scenario: Option "--qdevice-port" set wrong port + When Try "crm cluster init --qnetd-hostname=qnetd-node --qdevice-port=1" + Then Except "ERROR: cluster.init: invalid qdevice port range(1024 - 65535)" + + @clean + Scenario: Option "--qdevice-tie-breaker" set wrong value + When Try "crm cluster init --qnetd-hostname=qnetd-node --qdevice-tie-breaker=wrongtiebreaker" + Then Except "ERROR: cluster.init: invalid qdevice tie_breaker(lowest/highest/valid_node_id)" + + @clean + Scenario: Option "--qdevice-heuristics" set wrong value + When Try "crm cluster init --qnetd-hostname=qnetd-node --qdevice-heuristics='ls /opt'" + Then Except "ERROR: cluster.init: commands for heuristics should be absolute path" + When Try "crm cluster init --qnetd-hostname=qnetd-node --qdevice-heuristics='/bin/not_exist_cmd /opt'" + Then Except "ERROR: cluster.init: command /bin/not_exist_cmd not exist" + + @clean + Scenario: Option "--qnetd-hostname" is required by other qdevice options + When Try "crm cluster init --qdevice-port=1234" + Then Except multiple lines + """ + usage: init [options] [STAGE] + crm: error: Option --qnetd-hostname is required if want to configure qdevice + """ + + @clean + Scenario: Option --qdevice-heuristics is required if want to configure heuristics mode + When Try "crm cluster init --qnetd-hostname=qnetd-node --qdevice-heuristics-mode="on"" + Then Except multiple lines + """ + usage: init [options] [STAGE] + crm: error: Option --qdevice-heuristics is required if want to configure heuristics mode + """ + + @clean + Scenario: Node for qnetd not installed corosync-qnetd + Given Cluster service is "stopped" on "hanode2" + When Try "crm cluster init --qnetd-hostname=hanode2 -y" + Then Except multiple lines + """ + ERROR: cluster.init: Package "corosync-qnetd" not installed on hanode2! + Cluster service already successfully started on this node except qdevice service. + If you still want to use qdevice, install "corosync-qnetd" on hanode2. + Then run command "crm cluster init" with "qdevice" stage, like: + crm cluster init qdevice qdevice_related_options + That command will setup qdevice separately. + """ + And Cluster service is "started" on "hanode1" + + @clean + Scenario: Raise error when adding qdevice stage with the same cluster name + Given Cluster service is "stopped" on "hanode2" + Given Cluster service is "stopped" on "hanode3" + When Run "crm cluster init -n cluster1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "crm cluster init -n cluster1 -y" on "hanode3" + Then Cluster service is "started" on "hanode3" + When Try "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode2,hanode3" + Then Except "ERROR: cluster.init: Duplicated cluster name "cluster1"!" + When Run "crm cluster stop" on "hanode2" + When Run "crm cluster stop" on "hanode3" + + @clean + Scenario: Raise error when the same cluster name already exists on qnetd + Given Cluster service is "stopped" on "hanode1" + Given Cluster service is "stopped" on "hanode2" + When Try "crm cluster init -n cluster1 --qnetd-hostname=qnetd-node -y" on "hanode2" + When Try "crm cluster init -n cluster1 --qnetd-hostname=qnetd-node -y" + Then Except multiple lines + """ + ERROR: cluster.init: This cluster's name "cluster1" already exists on qnetd server! + Cluster service already successfully started on this node except qdevice service. + If you still want to use qdevice, consider to use the different cluster-name property. + Then run command "crm cluster init" with "qdevice" stage, like: + crm cluster init qdevice qdevice_related_options + That command will setup qdevice separately. + """ + And Cluster service is "started" on "hanode1" + And Cluster service is "started" on "hanode2" + + @clean + Scenario: Run qdevice stage on inactive cluster node + Given Cluster service is "stopped" on "hanode1" + When Try "crm cluster init qdevice --qnetd-hostname=qnetd-node" + Then Except "ERROR: cluster.init: Cluster is inactive - can't run qdevice stage" + + @clean + Scenario: Run qdevice stage but miss "--qnetd-hostname" option + Given Cluster service is "stopped" on "hanode1" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Try "crm cluster init qdevice -y" + Then Except multiple lines + """ + usage: init [options] [STAGE] + crm: error: Option --qnetd-hostname is required if want to configure qdevice + """ + + @clean + Scenario: Setup qdevice on a single node cluster with RA running(bsc#1181415) + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode1" + When Run "crm configure primitive d Dummy op monitor interval=3s" on "hanode1" + When Run "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Expected "WARNING: To use qdevice service, need to restart cluster service manually on each node" in stderr + And Service "corosync-qdevice" is "stopped" on "hanode1" + When Run "crm cluster restart" on "hanode1" + Then Service "corosync-qdevice" is "started" on "hanode1" + + @clean + Scenario: Remove qdevice from a single node cluster(bsc#1181415) + When Run "crm cluster init --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode1" + When Run "crm cluster remove --qdevice -y" on "hanode1" + Then Expected "Restarting cluster service" in stdout + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode1" + + @clean + Scenario: Remove qdevice from a single node cluster which has RA running(bsc#1181415) + When Run "crm cluster init --qnetd-hostname=qnetd-node -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode1" + When Run "crm configure primitive d Dummy op monitor interval=3s" on "hanode1" + When Run "crm cluster remove --qdevice -y" on "hanode1" + Then Expected "WARNING: To remove qdevice service, need to restart cluster service manually on each node" in stderr + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode1" + When Run "crm cluster restart" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Service "corosync-qdevice" is "stopped" on "hanode1" diff --git a/test/features/resource_failcount.feature b/test/features/resource_failcount.feature new file mode 100644 index 0000000..69f402a --- /dev/null +++ b/test/features/resource_failcount.feature @@ -0,0 +1,61 @@ +@resource +Feature: Use "crm resource failcount" to manage failcounts + + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 + + Background: Setup one node cluster and configure a Dummy resource + Given Cluster service is "stopped" on "hanode1" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm configure primitive d Dummy op monitor interval=3s" on "hanode1" + Then Resource "d" type "Dummy" is "Started" + + @clean + Scenario: Validation, input the wrong parameters + When Try "crm resource failcount d showss hanode1" + Then Except "ERROR: resource.failcount: showss is not valid command(should be one of ['set', 'delete', 'show'])" + When Try "crm resource failcount d set hanode11 0" + Then Except "ERROR: resource.failcount: Node hanode11 not in this cluster" + + @clean + Scenario: Set the failcount to 0 + When Run "rm -f /run/resource-agents/Dummy-d.state" on "hanode1" + And Wait "5" seconds + Then Resource "d" failcount on "hanode1" is "1" + When Run "crm resource failcount d set hanode1 0" on "hanode1" + Then Resource "d" failcount on "hanode1" is "0" + + @clean + Scenario: Set multiple failcounts to 0 + When Run "sed -i -e '/rm \${OCF_RESKEY_state}/a\' -e "else\nreturn \$OCF_ERR_GENERIC" /usr/lib/ocf/resource.d/heartbeat/Dummy" on "hanode1" + And Run "rm -f /run/resource-agents/Dummy-d.state" on "hanode1" + And Wait "5" seconds + Then Resource "d" failcount on "hanode1" is "INFINITY" + """ + now have two failcount entries, one is monitor, another is stop + """ + When Run "crm resource failcount d set hanode1 0" on "hanode1" + """ + set all failcounts to 0 + """ + Then Resource "d" failcount on "hanode1" is "0" + When Run "crm resource cleanup" on "hanode1" + And Wait "5" seconds + And Run "rm -f /run/resource-agents/Dummy-d.state" on "hanode1" + And Wait "5" seconds + Then Resource "d" failcount on "hanode1" is "INFINITY" + """ + now have two failcount entries, one is monitor, another is stop + """ + When Run "crm resource failcount d set hanode1 0 stop" on "hanode1" + """ + set stop failcounts to 0 + """ + Then Resource "d" failcount on "hanode1" is "1" + When Run "crm resource failcount d set hanode1 0 monitor" on "hanode1" + """ + set monitor failcounts to 0 + """ + Then Resource "d" failcount on "hanode1" is "0" + diff --git a/test/features/resource_set.feature b/test/features/resource_set.feature new file mode 100644 index 0000000..a6726d7 --- /dev/null +++ b/test/features/resource_set.feature @@ -0,0 +1,154 @@ +@resource +Feature: Use "crm configure set" to update attributes and operations + + Tag @clean means need to stop cluster service if the service is available + Need nodes: hanode1 hanode2 + + Background: Setup cluster and configure some resources + Given Cluster service is "stopped" on "hanode1" + When Run "crm cluster init -y" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm cluster join -c hanode1 -y" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "crm configure primitive d Dummy op monitor interval=3s" on "hanode1" + Then Resource "d" type "Dummy" is "Started" + When Run "crm configure primitive vip IPaddr2 params ip=@vip.0 op monitor interval=3s" on "hanode1" + Then Resource "vip" type "IPaddr2" is "Started" + And Cluster virtual IP is "@vip.0" + When Run "crm configure primitive s ocf:pacemaker:Stateful op monitor role=Promoted interval=3s op monitor role=Unpromoted interval=5s" on "hanode1" + Then Resource "s" type "Stateful" is "Started" + + @clean + Scenario: Validation, input the wrong parameters + When Try "crm configure set path" + Then Except "ERROR: configure.set: Expected (path value), takes exactly 2 arguments (1 given)" + When Try "crm configure set xxxx value" + Then Except "ERROR: configure.set: Invalid path: "xxxx"; Valid path: "id.[op_type.][interval.]name"" + When Try "crm configure set xxxx.name value" + Then Except "ERROR: configure.set: Object xxxx not found" + When Try "crm configure set d.name value" + Then Except "ERROR: configure.set: Attribute not found: d.name" + When Try "crm configure set d.monitor.100.timeout 10" + Then Except "ERROR: configure.set: Operation "monitor" interval "100" not found for resource d" + When Try "crm configure set s.monitor.interval 20" + Then Except "ERROR: configure.set: Should specify interval of monitor" + + @clean + Scenario: Using configure.set to update resource parameters and operation values + When Run "crm configure set vip.ip @vip.0" on "hanode1" + Then Cluster virtual IP is "@vip.0" + When Run "crm configure set d.monitor.on-fail ignore" on "hanode1" + And Run "crm configure show d" on "hanode1" + Then Expected "on-fail=ignore" in stdout + When Run "crm configure set s.monitor.5s.interval 20s" on "hanode1" + And Run "crm configure show s" on "hanode1" + Then Expected "interval=20s" in stdout + When Run "crm configure set op-options.timeout 101" on "hanode1" + And Run "crm configure show op-options" on "hanode1" + Then Expected "timeout=101" in stdout + + @clean + Scenario: Parse node and lifetime correctly (bsc#1192618) + Given Resource "d" is started on "hanode1" + # move <res> <node> + When Run "crm resource move d hanode2" on "hanode1" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode2" + When Run "crm resource clear d" on "hanode1" + + # move <res> <node> force + When Run "crm resource move d hanode1" on "hanode1" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode1" + When Run "crm resource clear d" on "hanode1" + + # move <res> force + When Run "crm resource move d force" on "hanode1" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode2" + When Run "crm resource clear d" on "hanode1" + + # move <res> <lifetime> force + When Run "crm resource move d PT5M force" on "hanode1" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode1" + When Run "crm resource clear d" on "hanode1" + + # move <res> <node> <lifetime> + When Run "crm resource move d hanode2 PT5M" on "hanode1" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode2" + When Run "crm resource clear d" on "hanode1" + + # move <res> <node> <lifetime> force + When Run "crm resource move d hanode1 PT5M force" on "hanode1" + When Run "sleep 2" on "hanode1" + Then Resource "d" is started on "hanode1" + When Run "crm resource clear d" on "hanode1" + + When Try "crm resource move d hanode2 PT5M force xxx" + Then Except "ERROR: resource.move: usage: move <rsc> [<node>] [<lifetime>] [force]" + When Try "crm resource move d hanode2 PT5M forcd" + Then Except "ERROR: resource.move: usage: move <rsc> [<node>] [<lifetime>] [force]" + When Try "crm resource move d xxxx PT5M force" + Then Except "ERROR: resource.move: Not our node: xxxx" + When Try "crm resource move d" + Then Except "ERROR: resource.move: No target node: Move requires either a target node or 'force'" + + @clean + Scenario: promote and demote promotable clone resource (bsc#1194125) + When Run "crm configure primitive s2 ocf:pacemaker:Stateful op monitor role=Promoted interval=3s op monitor role=Unpromoted interval=5s" on "hanode1" + And Run "crm configure clone p2 s2 meta promotable=true" on "hanode1" + And Run "crm resource demote p2" on "hanode1" + Then Run "sleep 2;! crm_resource --locate -r p2|grep -E 'Master|Promoted'" OK + When Run "crm resource promote p2" on "hanode2" + Then Run "sleep 2;crm_resource --locate -r p2|grep -E 'Master|Promoted'" OK + + @clean + Scenario: operation warning + When Run "crm configure primitive id=d2 Dummy op start interval=5s" on "hanode1" + Then Expected "WARNING: d2: Specified interval for start is 5s, it must be 0" in stderr + When Run "crm configure primitive id=d3 Dummy op monitor interval=0" on "hanode1" + Then Expected "WARNING: d3: interval in monitor should be larger than 0, advised is 10s" in stderr + When Run "crm configure primitive s2 ocf:pacemaker:Stateful op monitor role=Promoted interval=3s op monitor role=Unpromoted interval=3s" on "hanode1" + Then Expected "WARNING: s2: interval in monitor must be unique, advised is 11s" in stderr + When Run "crm configure primitive id=d4 Dummy op start timeout=10s" on "hanode1" + Then Expected "WARNING: d4: specified timeout 10s for start is smaller than the advised 20s" in stderr + + @clean + Scenario: trace ra with specific directory + When Run "crm resource trace d monitor" on "hanode1" + Then Expected "Trace for d:monitor is written to /var/lib/heartbeat/trace_ra/Dummy" in stdout + When Wait "10" seconds + Then Run "bash -c 'ls /var/lib/heartbeat/trace_ra/Dummy/d.monitor.*'" OK + When Run "crm resource untrace d" on "hanode1" + Then Expected "Stop tracing d" in stdout + When Run "crm resource trace d monitor /trace_log_d" on "hanode1" + Then Expected "Trace for d:monitor is written to /trace_log_d/Dummy" in stdout + When Wait "10" seconds + Then Run "bash -c 'ls /trace_log_d/Dummy/d.monitor.*'" OK + When Run "crm resource untrace d" on "hanode1" + Then Expected "Stop tracing d" in stdout + + @clean + Scenario: Add promotable=true and interleave=true automatically (bsc#1205522) + When Run "crm configure primitive s2 ocf:pacemaker:Stateful" on "hanode1" + And Run "crm configure clone p2 s2" on "hanode1" + Then Run "sleep 2;crm configure show|grep -A1 'clone p2 s2'|grep 'promotable=true interleave=true'" OK + When Run "crm configure primitive s3 ocf:pacemaker:Stateful" on "hanode1" + And Run "crm configure clone p3 s3 meta promotable=false" on "hanode1" + Then Run "sleep 2;crm configure show|grep -A1 'clone p3 s3'|grep 'promotable=false interleave=true'" OK + When Run "crm configure primitive d2 Dummy" on "hanode1" + And Run "crm configure clone p4 d2" on "hanode1" + Then Run "sleep 2;crm configure show|grep -A1 'clone p4 d2'|grep 'interleave=true'" OK + + @clean + Scenario: Run rsctest + When Run "crm resource stop d vip" on "hanode1" + When Run "crm configure rsctest d vip" on "hanode1" + Then Expected multiple lines in output + """ + INFO: Probing resources + INFO: Testing on hanode1: d vip + INFO: Testing on hanode2: d vip + """ diff --git a/test/features/ssh_agent.feature b/test/features/ssh_agent.feature new file mode 100644 index 0000000..5c632dd --- /dev/null +++ b/test/features/ssh_agent.feature @@ -0,0 +1,86 @@ +# vim: sw=2 sts=2 +Feature: ssh-agent support + + Test ssh-agent support for crmsh + Need nodes: hanode1 hanode2 hanode3 qnetd-node + + Scenario: Errors are reported when ssh-agent is not avaible + When Try "crm cluster init --use-ssh-agent -y" on "hanode1" + Then Expected "Environment variable SSH_AUTH_SOCK does not exist." in stderr + When Try "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + Then Expected "Environment variable SSH_AUTH_SOCK does not exist." not in stderr + + Scenario: Errors are reported when there are no keys in ssh-agent + Given ssh-agent is started at "/tmp/ssh-auth-sock" on nodes ["hanode1", "hanode2", "hanode3"] + When Try "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + Then Expected "ssh-add" in stderr + + Scenario: Skip creating ssh key pairs with --use-ssh-agent + Given Run "mkdir ~/ssh_disabled" OK on "hanode1,hanode2,hanode3" + And Run "mv ~/.ssh/id_* ~/ssh_disabled" OK on "hanode1,hanode2,hanode3" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock ssh-add ~/ssh_disabled/id_rsa" on "hanode1,hanode2,hanode3" + And Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + And Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode2" + And Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode3" + Then Cluster service is "started" on "hanode1" + And Online nodes are "hanode1 hanode2 hanode3" + # check the number of keys in authorized_keys + And Run "test x1 == x$(awk 'END {print NR}' ~/.ssh/authorized_keys)" OK + And Run "test x3 == x$(sudo awk 'END {print NR}' ~hacluster/.ssh/authorized_keys)" OK + + Scenario: Skip creating ssh key pairs with --use-ssh-agent and use -N + Given Run "crm cluster stop" OK on "hanode1,hanode2,hanode3" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y -N hanode2 -N hanode3" on "hanode1" + Then Cluster service is "started" on "hanode3" + And Online nodes are "hanode1 hanode2 hanode3" + And Run "test x1 == x$(awk 'END {print NR}' ~/.ssh/authorized_keys)" OK on "hanode3" + And Run "test x3 == x$(sudo awk 'END {print NR}' ~hacluster/.ssh/authorized_keys)" OK on "hanode3" + + Scenario: crm report + Then Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm report /tmp/report1" OK on "hanode1" + Then Directory "hanode2" in "/tmp/report1.tar.bz2" + Then Directory "hanode3" in "/tmp/report1.tar.bz2" + + Scenario: Use qnetd + Given Run "crm cluster stop" OK on "hanode1,hanode2,hanode3" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + And Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init qdevice --use-ssh-agent -y --qnetd-hostname qnetd-node" on "hanode1" + And Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode2" + Then Cluster service is "started" on "hanode1" + And Online nodes are "hanode1 hanode2" + And Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "corosync-qnetd" is "started" on "qnetd-node" + + Scenario: Use qnetd with -N + Given Run "crm cluster stop" OK on "hanode1,hanode2" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y -N hanode2 --qnetd-hostname qnetd-node" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Online nodes are "hanode1 hanode2" + And Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "corosync-qnetd" is "started" on "qnetd-node" + + Scenario: GEO cluster setup with ssh-agent + Given Run "crm cluster stop" OK on "hanode1,hanode2" + And Run "systemctl disable --now booth@booth" OK on "hanode1,hanode2,hanode3" + And Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init -y -n cluster1 --use-ssh-agent" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm configure primitive vip IPaddr2 params ip=@vip.0" on "hanode1" + + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init -y -n cluster2 --use-ssh-agent" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "crm configure primitive vip IPaddr2 params ip=@vip.1" on "hanode2" + + When Run "crm cluster geo_init -y --clusters "cluster1=@vip.0 cluster2=@vip.1" --tickets tickets-geo --arbitrator hanode3" on "hanode1" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster geo_join -y --use-ssh-agent --cluster-node hanode1 --clusters "cluster1=@vip.0 cluster2=@vip.1"" on "hanode2" + + Given Service "booth@booth" is "stopped" on "hanode3" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster geo_init_arbitrator -y --use-ssh-agent --cluster-node hanode1" on "hanode3" + Then Service "booth@booth" is "started" on "hanode3" + When Run "crm resource start g-booth" on "hanode1" + Then Show cluster status on "hanode1" + When Run "crm resource start g-booth" on "hanode2" + Then Show cluster status on "hanode2" diff --git a/test/features/steps/__init__.py b/test/features/steps/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/features/steps/__init__.py diff --git a/test/features/steps/behave_agent.py b/test/features/steps/behave_agent.py new file mode 100755 index 0000000..eafeedd --- /dev/null +++ b/test/features/steps/behave_agent.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +# behave_agent.py - a simple agent to execute command +# NO AUTHENTICATIONS. It should only be used in behave test. +import io +import os +import pwd +import socket +import struct +import subprocess +import typing + + +MSG_EOF = 0 +MSG_USER = 1 +MSG_CMD = 2 +MSG_OUT = 4 +MSG_ERR = 5 +MSG_RC = 6 + + +class Message: + @staticmethod + def write(output, type: int, data: bytes): + output.write(struct.pack('!ii', type, len(data))) + output.write(data) + + @staticmethod + def read(input): + buf = input.read(8) + type, length = struct.unpack('!ii', buf) + if length > 0: + buf = input.read(length) + else: + buf = b'' + return type, buf + + +class SocketIO(io.RawIOBase): + def __init__(self, s: socket.socket): + self._socket = s + + def readable(self) -> bool: + return True + + def writable(self) -> bool: + return True + + def read(self, __size: int = -1) -> bytes: + return self._socket.recv(__size) + + def readinto(self, __buffer) -> int: + return self._socket.recv_into(__buffer) + + def readall(self) -> bytes: + raise NotImplementedError + + def write(self, __b) -> int: + return self._socket.send(__b) + + +def call(host: str, port: int, cmdline: str, user: typing.Optional[str] = None): + family, type, proto, _, sockaddr = socket.getaddrinfo(host, port, type=socket.SOCK_STREAM)[0] + with socket.socket(family, type, proto) as s: + s.connect(sockaddr) + sout = io.BufferedWriter(SocketIO(s), 4096) + Message.write(sout, MSG_USER, user.encode('utf-8') if user else _getuser().encode('utf-8')) + Message.write(sout, MSG_CMD, cmdline.encode('utf-8')) + Message.write(sout, MSG_EOF, b'') + sout.flush() + s.shutdown(socket.SHUT_WR) + rc = None + stdout = [] + stderr = [] + sin = io.BufferedReader(SocketIO(s), 4096) + while True: + type, buf = Message.read(sin) + if type == MSG_OUT: + stdout.append(buf) + elif type == MSG_ERR: + stderr.append(buf) + elif type == MSG_RC: + rc, = struct.unpack('!i', buf) + elif type == MSG_EOF: + assert rc is not None + return rc, b''.join(stdout), b''.join(stderr) + else: + raise ValueError(f"Unknown message type: {type}") + + +def serve(stdin, stdout, stderr): + # This is an xinetd-style service. + assert os.geteuid() == 0 + user = None + cmd = None + sin = io.BufferedReader(stdin) + while True: + type, buf = Message.read(sin) + if type == MSG_USER: + user = buf.decode('utf-8') + elif type == MSG_CMD: + cmd = buf.decode('utf-8') + elif type == MSG_EOF: + assert user is not None + assert cmd is not None + break + else: + raise ValueError(f"Unknown message type: {type}") + if user == 'root': + args = ['/bin/sh'] + else: + args = ['/bin/su', '-', user, '-c', '/bin/sh'] + result = subprocess.run( + args, + input=cmd.encode('utf-8'), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + sout = io.BufferedWriter(stdout) + Message.write(sout, MSG_RC, struct.pack('!i', result.returncode)) + Message.write(sout, MSG_OUT, result.stdout) + Message.write(sout, MSG_ERR, result.stderr) + Message.write(sout, MSG_EOF, b'') + stdout.flush() + + +def _getuser(): + return pwd.getpwuid(os.geteuid()).pw_name + + +if __name__ == '__main__': + with open(0, 'rb') as stdin, \ + open(1, 'wb') as stdout, \ + open(2, 'wb') as stderr: + serve(stdin, stdout, stderr) diff --git a/test/features/steps/const.py b/test/features/steps/const.py new file mode 100644 index 0000000..3ec8845 --- /dev/null +++ b/test/features/steps/const.py @@ -0,0 +1,353 @@ +CRM_H_OUTPUT = '''usage: crm [-h|--help] [OPTIONS] [SUBCOMMAND ARGS...] +or crm help SUBCOMMAND + +For a list of available subcommands, use crm help. + +Use crm without arguments for an interactive session. +Call a subcommand directly for a "single-shot" use. +Call crm with a level name as argument to start an interactive +session from that level. + +See the crm(8) man page or call crm help for more details. + +positional arguments: + SUBCOMMAND + +optional arguments: + -h, --help show this help message and exit + --version show program's version number and exit + -f FILE, --file FILE Load commands from the given file. If a dash (-) is + used in place of a file name, crm will read commands + from the shell standard input (stdin). + -c CIB, --cib CIB Start the session using the given shadow CIB file. + Equivalent to `cib use <CIB>`. + -D OUTPUT_TYPE, --display OUTPUT_TYPE + Choose one of the output options: plain, color-always, + color, or uppercase. The default is color if the + terminal emulation supports colors, else plain. + -F, --force Make crm proceed with applying changes where it would + normally ask the user to confirm before proceeding. + This option is mainly useful in scripts, and should be + used with care. + -n, --no Automatically answer no when prompted + -w, --wait Make crm wait for the cluster transition to finish + (for the changes to take effect) after each processed + line. + -H DIR|FILE|SESSION, --history DIR|FILE|SESSION + A directory or file containing a cluster report to + load into history, or the name of a previously saved + history session. + -d, --debug Print verbose debugging information. + -R, --regression-tests + Enables extra verbose trace logging used by the + regression tests. Logs all external calls made by + crmsh. + --scriptdir DIR Extra directory where crm looks for cluster scripts, + or a list of directories separated by semi-colons + (e.g. /dir1;/dir2;etc.). + -X PROFILE Collect profiling data and save in PROFILE. + -o OPTION=VALUE, --opt OPTION=VALUE + Set crmsh option temporarily. If the options are saved + using+options save+ then the value passed here will + also be saved.Multiple options can be set by using + +-o+ multiple times.''' + + +CRM_CLUSTER_INIT_H_OUTPUT = '''Initializes a new HA cluster + +usage: init [options] [STAGE] + +Initialize a cluster from scratch. This command configures +a complete cluster, and can also add additional cluster +nodes to the initial one-node cluster using the --nodes +option. + +optional arguments: + -h, --help Show this help message + -q, --quiet Be quiet (don't describe what's happening, just do it) + -y, --yes Answer "yes" to all prompts (use with caution, this is + destructive, especially those storage related + configurations and stages.) + -n NAME, --name NAME Set the name of the configured cluster. + -N [USER@]HOST, --node [USER@]HOST + The member node of the cluster. Note: the current node + is always get initialized during bootstrap in the + beginning. + -S, --enable-sbd Enable SBD even if no SBD device is configured + (diskless mode) + -w WATCHDOG, --watchdog WATCHDOG + Use the given watchdog device or driver name + -x, --skip-csync2-sync + Skip csync2 initialization (an experimental option) + --no-overwrite-sshkey + Avoid "/root/.ssh/id_rsa" overwrite if "-y" option is + used (False by default; Deprecated) + --use-ssh-agent Use an existing key from ssh-agent instead of creating + new key pairs + +Network configuration: + Options for configuring the network and messaging layer. + + -i IF, --interface IF + Bind to IP address on interface IF. Use -i second time + for second interface + -u, --unicast Configure corosync to communicate over unicast(udpu). + This is the default transport type + -U, --multicast Configure corosync to communicate over multicast. + Default is unicast + -A IP, --admin-ip IP Configure IP address as an administration virtual IP + -M, --multi-heartbeats + Configure corosync with second heartbeat line + -I, --ipv6 Configure corosync use IPv6 + +QDevice configuration: + QDevice participates in quorum decisions. With the assistance of + a third-party arbitrator Qnetd, it provides votes so that a cluster + is able to sustain more node failures than standard quorum rules + allow. It is recommended for clusters with an even number of nodes + and highly recommended for 2 node clusters. + + Options for configuring QDevice and QNetd. + + --qnetd-hostname [USER@]HOST + User and host of the QNetd server. The host can be + specified in either hostname or IP address. + --qdevice-port PORT TCP PORT of QNetd server (default:5403) + --qdevice-algo ALGORITHM + QNetd decision ALGORITHM (ffsplit/lms, + default:ffsplit) + --qdevice-tie-breaker TIE_BREAKER + QNetd TIE_BREAKER (lowest/highest/valid_node_id, + default:lowest) + --qdevice-tls TLS Whether using TLS on QDevice/QNetd (on/off/required, + default:on) + --qdevice-heuristics COMMAND + COMMAND to run with absolute path. For multiple + commands, use ";" to separate (details about + heuristics can see man 8 corosync-qdevice) + --qdevice-heuristics-mode MODE + MODE of operation of heuristics (on/sync/off, + default:sync) + +Storage configuration: + Options for configuring shared storage. + + -s DEVICE, --sbd-device DEVICE + Block device to use for SBD fencing, use ";" as + separator or -s multiple times for multi path (up to 3 + devices) + -o DEVICE, --ocfs2-device DEVICE + Block device to use for OCFS2; When using Cluster LVM2 + to manage the shared storage, user can specify one or + multiple raw disks, use ";" as separator or -o + multiple times for multi path (must specify -C option) + NOTE: this is a Technical Preview + -C, --cluster-lvm2 Use Cluster LVM2 (only valid together with -o option) + NOTE: this is a Technical Preview + -m MOUNT, --mount-point MOUNT + Mount point for OCFS2 device (default is + /srv/clusterfs, only valid together with -o option) + NOTE: this is a Technical Preview + +Stage can be one of: + ssh Create SSH keys for passwordless SSH between cluster nodes + csync2 Configure csync2 + corosync Configure corosync + sbd Configure SBD (requires -s <dev>) + cluster Bring the cluster online + ocfs2 Configure OCFS2 (requires -o <dev>) NOTE: this is a Technical Preview + vgfs Create volume group and filesystem (ocfs2 template only, + requires -o <dev>) NOTE: this stage is an alias of ocfs2 stage + admin Create administration virtual IP (optional) + qdevice Configure qdevice and qnetd + +Note: + - If stage is not specified, the script will run through each stage + in sequence, with prompts for required information. + +Examples: + # Setup the cluster on the current node + crm cluster init -y + + # Setup the cluster with multiple nodes + (NOTE: the current node will be part of the cluster even not listed in the -N option as below) + crm cluster init -N node1 -N node2 -N node3 -y + + # Setup the cluster on the current node, with two network interfaces + crm cluster init -i eth1 -i eth2 -y + + # Setup the cluster on the current node, with disk-based SBD + crm cluster init -s <share disk> -y + + # Setup the cluster on the current node, with diskless SBD + crm cluster init -S -y + + # Setup the cluster on the current node, with QDevice + crm cluster init --qnetd-hostname <qnetd addr> -y + + # Setup the cluster on the current node, with SBD+OCFS2 + crm cluster init -s <share disk1> -o <share disk2> -y + + # Setup the cluster on the current node, with SBD+OCFS2+Cluster LVM + crm cluster init -s <share disk1> -o <share disk2> -o <share disk3> -C -y + + # Add SBD on a running cluster + crm cluster init sbd -s <share disk> -y + + # Replace SBD device on a running cluster which already configured SBD + crm -F cluster init sbd -s <share disk> -y + + # Add diskless SBD on a running cluster + crm cluster init sbd -S -y + + # Add QDevice on a running cluster + crm cluster init qdevice --qnetd-hostname <qnetd addr> -y + + # Add OCFS2+Cluster LVM on a running cluster + crm cluster init ocfs2 -o <share disk1> -o <share disk2> -C -y''' + + +CRM_CLUSTER_JOIN_H_OUTPUT = '''Join existing cluster + +usage: join [options] [STAGE] + +Join the current node to an existing cluster. The +current node cannot be a member of a cluster already. +Pass any node in the existing cluster as the argument +to the -c option. + +optional arguments: + -h, --help Show this help message + -q, --quiet Be quiet (don't describe what's happening, just do it) + -y, --yes Answer "yes" to all prompts (use with caution) + -w WATCHDOG, --watchdog WATCHDOG + Use the given watchdog device + --use-ssh-agent Use an existing key from ssh-agent instead of creating + new key pairs + +Network configuration: + Options for configuring the network and messaging layer. + + -c [USER@]HOST, --cluster-node [USER@]HOST + User and host to login to an existing cluster node. + The host can be specified with either a hostname or an + IP. + -i IF, --interface IF + Bind to IP address on interface IF. Use -i second time + for second interface + +Stage can be one of: + ssh Obtain SSH keys from existing cluster node (requires -c <host>) + csync2 Configure csync2 (requires -c <host>) + ssh_merge Merge root's SSH known_hosts across all nodes (csync2 must + already be configured). + cluster Start the cluster on this node + +If stage is not specified, each stage will be invoked in sequence. + +Examples: + # Join with a cluster node + crm cluster join -c <node> -y + + # Join with a cluster node, with the same network interface used by that node + crm cluster join -c <node> -i eth1 -i eth2 -y''' + + +CRM_CLUSTER_REMOVE_H_OUTPUT = '''Remove node(s) from the cluster + +usage: remove [options] [<node> ...] + +Remove one or more nodes from the cluster. + +This command can remove the last node in the cluster, +thus effectively removing the whole cluster. To remove +the last node, pass --force argument to crm or set +the config.core.force option. + +optional arguments: + -h, --help Show this help message + -q, --quiet Be quiet (don't describe what's happening, just do it) + -y, --yes Answer "yes" to all prompts (use with caution) + -c HOST, --cluster-node HOST + IP address or hostname of cluster node which will be + deleted + -F, --force Remove current node + --qdevice Remove QDevice configuration and service from cluster''' + + +CRM_CLUSTER_GEO_INIT_H_OUTPUT = '''Configure cluster as geo cluster + +usage: geo-init [options] + +Create a new geo cluster with the current cluster as the +first member. Pass the complete geo cluster topology as +arguments to this command, and then use geo-join and +geo-init-arbitrator to add the remaining members to +the geo cluster. + +optional arguments: + -h, --help Show this help message + -q, --quiet Be quiet (don't describe what's happening, just do it) + -y, --yes Answer "yes" to all prompts (use with caution) + -a [USER@]HOST, --arbitrator [USER@]HOST + Geo cluster arbitrator + -s DESC, --clusters DESC + Geo cluster description (see details below) + -t LIST, --tickets LIST + Tickets to create (space-separated) + +Cluster Description + + This is a map of cluster names to IP addresses. + Each IP address will be configured as a virtual IP + representing that cluster in the geo cluster + configuration. + + Example with two clusters named paris and amsterdam: + + --clusters "paris=192.168.10.10 amsterdam=192.168.10.11" + + Name clusters using the --name parameter to + crm bootstrap init.''' + + +CRM_CLUSTER_GEO_JOIN_H_OUTPUT = '''Join cluster to existing geo cluster + +usage: geo-join [options] + +This command should be run from one of the nodes in a cluster +which is currently not a member of a geo cluster. The geo +cluster configuration will be fetched from the provided node, +and the cluster will be added to the geo cluster. + +Note that each cluster in a geo cluster needs to have a unique +name set. The cluster name can be set using the --name argument +to init, or by configuring corosync with the cluster name in +an existing cluster. + +optional arguments: + -h, --help Show this help message + -q, --quiet Be quiet (don't describe what's happening, just do it) + -y, --yes Answer "yes" to all prompts (use with caution) + -c [USER@]HOST, --cluster-node [USER@]HOST + An already-configured geo cluster or arbitrator + -s DESC, --clusters DESC + Geo cluster description (see geo-init for details)''' + + +CRM_CLUSTER_GEO_INIT_ARBIT_H_OUTPUT = '''Initialize node as geo cluster arbitrator + +usage: geo-init-arbitrator [options] + +Configure the current node as a geo arbitrator. The command +requires an existing geo cluster or geo arbitrator from which +to get the geo cluster configuration. + +optional arguments: + -h, --help Show this help message + -q, --quiet Be quiet (don't describe what's happening, just do it) + -y, --yes Answer "yes" to all prompts (use with caution) + -c [USER@]HOST, --cluster-node [USER@]HOST + An already-configured geo cluster + --use-ssh-agent Use an existing key from ssh-agent instead of creating + new key pairs''' diff --git a/test/features/steps/step_implementation.py b/test/features/steps/step_implementation.py new file mode 100644 index 0000000..74f0cc8 --- /dev/null +++ b/test/features/steps/step_implementation.py @@ -0,0 +1,575 @@ +import re +import time +import os +import datetime +import yaml + +import behave +from behave import given, when, then +import behave_agent +from crmsh import corosync, sbd, userdir, bootstrap +from crmsh import utils as crmutils +from crmsh.sh import ShellUtils +from utils import check_cluster_state, check_service_state, online, run_command, me, \ + run_command_local_or_remote, file_in_archive, \ + assert_eq, is_unclean, assert_in +import const + + +def _parse_str(text): + return text[1:-1].encode('utf-8').decode('unicode_escape') +_parse_str.pattern='".*"' + + +behave.use_step_matcher("cfparse") +behave.register_type(str=_parse_str) + + +@when('Write multi lines to file "{f}" on "{addr}"') +def step_impl(context, f, addr): + data_list = context.text.split('\n') + for line in data_list: + echo_option = " -n" if line == data_list[-1] else "" + cmd = "echo{} \"{}\"|sudo tee -a {}".format(echo_option, line, f) + if addr != me(): + sudoer = userdir.get_sudoer() + user = f"{sudoer}@" if sudoer else "" + cmd = f"ssh {user}{addr} '{cmd}'" + run_command(context, cmd) + + +@given('Cluster service is "{state}" on "{addr}"') +def step_impl(context, state, addr): + assert check_cluster_state(context, state, addr) is True + + +@given('Nodes [{nodes:str+}] are cleaned up') +def step_impl(context, nodes): + run_command(context, 'crm resource cleanup || true') + for node in nodes: + # wait for ssh service + for _ in range(10): + rc, _, _ = ShellUtils().get_stdout_stderr('ssh {} true'.format(node)) + if rc == 0: + break + time.sleep(1) + run_command_local_or_remote(context, "crm cluster stop {} || true".format(node), node) + assert check_cluster_state(context, 'stopped', node) is True + + +@given('Service "{name}" is "{state}" on "{addr}"') +def step_impl(context, name, state, addr): + assert check_service_state(context, name, state, addr) is True + + +@given('Has disk "{disk}" on "{addr}"') +def step_impl(context, disk, addr): + _, out, _ = run_command_local_or_remote(context, "fdisk -l", addr) + assert re.search(r'{} '.format(disk), out) is not None + + +@given('Online nodes are "{nodelist}"') +def step_impl(context, nodelist): + assert online(context, nodelist) is True + + +@given('Run "{cmd}" OK') +def step_impl(context, cmd): + rc, _, _ = run_command(context, cmd) + assert rc == 0 + + +@then('Run "{cmd}" OK') +def step_impl(context, cmd): + rc, _, _ = run_command(context, cmd) + assert rc == 0 + + +@when('Run "{cmd}" OK') +def step_impl(context, cmd): + rc, _, _ = run_command(context, cmd) + assert rc == 0 + + +@given('IP "{addr}" is belong to "{iface}"') +def step_impl(context, addr, iface): + cmd = 'ip address show dev {}'.format(iface) + res = re.search(r' {}/'.format(addr), run_command(context, cmd)[1]) + assert bool(res) is True + + +@given('Run "{cmd}" OK on "{addr}"') +def step_impl(context, cmd, addr): + _, out, _ = run_command_local_or_remote(context, cmd, addr, True) + +@when('Run "{cmd}" on "{addr}"') +def step_impl(context, cmd, addr): + _, out, _ = run_command_local_or_remote(context, cmd, addr) + + +@then('Run "{cmd}" OK on "{addr}"') +def step_impl(context, cmd, addr): + _, out, _ = run_command_local_or_remote(context, cmd, addr) + + +@then('Print stdout') +def step_impl(context): + context.logger.info("\n{}".format(context.stdout)) + + +@then('Print stderr') +def step_impl(context): + context.logger.info("\n{}".format(context.stderr)) + + +@then('No crmsh tracebacks') +def step_impl(context): + if "Traceback (most recent call last):" in context.stderr and \ + re.search('File "/usr/lib/python.*/crmsh/', context.stderr): + context.logger.info("\n{}".format(context.stderr)) + context.failed = True + + +@when('Try "{cmd}" on "{addr}"') +def step_impl(context, cmd, addr): + run_command_local_or_remote(context, cmd, addr, exit_on_fail=False) + + +@when('Try "{cmd}"') +def step_impl(context, cmd): + _, out, _ = run_command(context, cmd, exit_on_fail=False) + + +@when('Wait "{second}" seconds') +def step_impl(context, second): + time.sleep(int(second)) + + +@then('Got output "{msg}"') +def step_impl(context, msg): + assert context.stdout == msg + context.stdout = None + + +@then('Expected multiple lines') +def step_impl(context): + assert context.stdout == context.text + context.stdout = None + + +@then('Expected "{msg}" in stdout') +def step_impl(context, msg): + assert_in(msg, context.stdout) + context.stdout = None + + +@then('Expected "{msg}" in stderr') +def step_impl(context, msg): + assert_in(msg, context.stderr) + context.stderr = None + + +@then('Expected regrex "{reg_str}" in stdout') +def step_impl(context, reg_str): + res = re.search(reg_str, context.stdout) + assert res is not None + context.stdout = None + + +@then('Expected return code is "{num}"') +def step_impl(context, num): + assert context.return_code == int(num) + + +@then('Expected "{msg}" not in stdout') +def step_impl(context, msg): + assert msg not in context.stdout + context.stdout = None + + +@then('Expected "{msg}" not in stderr') +def step_impl(context, msg): + assert context.stderr is None or msg not in context.stderr + context.stderr = None + + +@then('Except "{msg}"') +def step_impl(context, msg): + assert_in(msg, context.stderr) + context.stderr = None + + +@then('Except multiple lines') +def step_impl(context): + assert_in(context.text, context.stderr) + context.stderr = None + + +@then('Expected multiple lines in output') +def step_impl(context): + assert_in(context.text, context.stdout) + context.stdout = None + + +@then('Except "{msg}" in stderr') +def step_impl(context, msg): + assert_in(msg, context.stderr) + context.stderr = None + + +@then('Cluster service is "{state}" on "{addr}"') +def step_impl(context, state, addr): + assert check_cluster_state(context, state, addr) is True + + +@then('Service "{name}" is "{state}" on "{addr}"') +def step_impl(context, name, state, addr): + assert check_service_state(context, name, state, addr) is True + + +@then('Online nodes are "{nodelist}"') +def step_impl(context, nodelist): + assert online(context, nodelist) is True + + +@then('Node "{node}" is standby') +def step_impl(context, node): + assert crmutils.is_standby(node) is True + + +@then('Node "{node}" is online') +def step_impl(context, node): + assert crmutils.is_standby(node) is False + + +@then('IP "{addr}" is used by corosync on "{node}"') +def step_impl(context, addr, node): + _, out, _ = run_command_local_or_remote(context, 'corosync-cfgtool -s', node) + res = re.search(r' {}\n'.format(addr), out) + assert bool(res) is True + + +@then('Cluster name is "{name}"') +def step_impl(context, name): + _, out, _ = run_command(context, 'corosync-cmapctl -b totem.cluster_name') + assert out.split()[-1] == name + + +@then('Cluster virtual IP is "{addr}"') +def step_impl(context, addr): + _, out, _ = run_command(context, 'crm configure show|grep -A1 IPaddr2') + res = re.search(r' ip={}'.format(addr), out) + assert bool(res) is True + + +@then('Cluster is using udpu transport mode') +def step_impl(context): + assert corosync.get_value('totem.transport') == 'udpu' + + +@then('Show cluster status on "{addr}"') +def step_impl(context, addr): + _, out, _ = run_command_local_or_remote(context, 'crm_mon -1', addr) + if out: + context.logger.info("\n{}".format(out)) + + +@then('Show corosync ring status') +def step_impl(context): + _, out, _ = run_command(context, 'crm corosync status ring') + if out: + context.logger.info("\n{}".format(out)) + + +@then('Show crm configure') +def step_impl(context): + _, out, _ = run_command(context, 'crm configure show') + if out: + context.logger.info("\n{}".format(out)) + + +@then('Show status from qnetd') +def step_impl(context): + _, out, _ = run_command(context, 'crm corosync status qnetd') + if out: + context.logger.info("\n{}".format(out)) + + +@then('Show qdevice status') +def step_impl(context): + _, out, _ = run_command(context, 'crm corosync status qdevice') + if out: + context.logger.info("\n{}".format(out)) + + +@then('Show corosync qdevice configuration') +def step_impl(context): + _, out, _ = run_command(context, "sed -n -e '/quorum/,/^}/ p' /etc/corosync/corosync.conf") + if out: + context.logger.info("\n{}".format(out)) + + +@then('Resource "{res}" type "{res_type}" is "{state}"') +def step_impl(context, res, res_type, state): + try_count = 0 + result = None + while try_count < 20: + time.sleep(1) + _, out, _ = run_command(context, "crm_mon -1rR") + if out: + result = re.search(r'\s{}\s+.*:+{}\):\s+{} '.format(res, res_type, state), out) + if not result: + try_count += 1 + else: + break + assert result is not None + + +@then('Resource "{res}" failcount on "{node}" is "{number}"') +def step_impl(context, res, node, number): + cmd = "crm resource failcount {} show {}".format(res, node) + _, out, _ = run_command(context, cmd) + if out: + result = re.search(r'name=fail-count-{} value={}'.format(res, number), out) + assert result is not None + + +@then('Resource "{res_type}" not configured') +def step_impl(context, res_type): + _, out, _ = run_command(context, "crm configure show") + result = re.search(r' {} '.format(res_type), out) + assert result is None + + +@then('Output is the same with expected "{cmd}" help output') +def step_impl(context, cmd): + cmd_help = {} + cmd_help["crm"] = const.CRM_H_OUTPUT + cmd_help["crm_cluster_init"] = const.CRM_CLUSTER_INIT_H_OUTPUT + cmd_help["crm_cluster_join"] = const.CRM_CLUSTER_JOIN_H_OUTPUT + cmd_help["crm_cluster_remove"] = const.CRM_CLUSTER_REMOVE_H_OUTPUT + cmd_help["crm_cluster_geo-init"] = const.CRM_CLUSTER_GEO_INIT_H_OUTPUT + cmd_help["crm_cluster_geo-join"] = const.CRM_CLUSTER_GEO_JOIN_H_OUTPUT + cmd_help["crm_cluster_geo-init-arbitrator"] = const.CRM_CLUSTER_GEO_INIT_ARBIT_H_OUTPUT + key = '_'.join(cmd.split()) + assert_eq(cmd_help[key], context.stdout) + + +@then('Corosync working on "{transport_type}" mode') +def step_impl(context, transport_type): + if transport_type == "multicast": + assert corosync.get_value("totem.transport") is None + if transport_type == "unicast": + assert_eq("udpu", corosync.get_value("totem.transport")) + + +@then('Expected votes will be "{votes}"') +def step_impl(context, votes): + assert_eq(int(votes), int(corosync.get_value("quorum.expected_votes"))) + + +@then('Directory "{directory}" created') +def step_impl(context, directory): + assert os.path.isdir(directory) is True + + +@then('Directory "{directory}" not created') +def step_impl(context, directory): + assert os.path.isdir(directory) is False + + +@then('Default crm_report tar file created') +def step_impl(context): + default_file_name = 'crm_report-{}.tar.bz2'.format(datetime.datetime.now().strftime("%a-%d-%b-%Y")) + assert os.path.exists(default_file_name) is True + + +@when('Remove default crm_report tar file') +def step_impl(context): + default_file_name = 'crm_report-{}.tar.bz2'.format(datetime.datetime.now().strftime("%a-%d-%b-%Y")) + os.remove(default_file_name) + + +@then('File "{f}" in "{archive}"') +def step_impl(context, f, archive): + assert file_in_archive(f, archive) is True + + +@then('Directory "{f}" in "{archive}"') +def step_impl(context, f, archive): + assert file_in_archive(f, archive) is True + + +@then('File "{f}" not in "{archive}"') +def step_impl(context, f, archive): + assert file_in_archive(f, archive) is False + + +@then('File "{f}" was synced in cluster') +def step_impl(context, f): + cmd = "crm cluster diff {}".format(f) + rc, out, _ = run_command(context, cmd) + assert_eq("", out) + + +@given('Resource "{res_id}" is started on "{node}"') +def step_impl(context, res_id, node): + rc, out, err = ShellUtils().get_stdout_stderr("crm_mon -1") + assert re.search(r'\*\s+{}\s+.*Started\s+{}'.format(res_id, node), out) is not None + + +@then('Resource "{res_id}" is started on "{node}"') +def step_impl(context, res_id, node): + rc, out, err = ShellUtils().get_stdout_stderr("crm_mon -1") + assert re.search(r'\*\s+{}\s+.*Started\s+{}'.format(res_id, node), out) is not None + + +@then('SBD option "{key}" value is "{value}"') +def step_impl(context, key, value): + res = sbd.SBDManager.get_sbd_value_from_config(key) + assert_eq(value, res) + + +@then('SBD option "{key}" value for "{dev}" is "{value}"') +def step_impl(context, key, dev, value): + res = sbd.SBDTimeout.get_sbd_msgwait(dev) + assert_eq(int(value), res) + + +@then('Cluster property "{key}" is "{value}"') +def step_impl(context, key, value): + res = crmutils.get_property(key) + assert res is not None + assert_eq(value, str(res)) + + +@then('Property "{key}" in "{type}" is "{value}"') +def step_impl(context, key, type, value): + res = crmutils.get_property(key, type) + assert res is not None + assert_eq(value, str(res)) + + +@then('Parameter "{param_name}" not configured in "{res_id}"') +def step_impl(context, param_name, res_id): + _, out, _ = run_command(context, "crm configure show {}".format(res_id)) + result = re.search("params {}=".format(param_name), out) + assert result is None + + +@then('Parameter "{param_name}" configured in "{res_id}"') +def step_impl(context, param_name, res_id): + _, out, _ = run_command(context, "crm configure show {}".format(res_id)) + result = re.search("params {}=".format(param_name), out) + assert result is not None + + +@given('Yaml "{path}" value is "{value}"') +def step_impl(context, path, value): + yaml_file = "/etc/crm/profiles.yml" + with open(yaml_file) as f: + data = yaml.load(f, Loader=yaml.SafeLoader) + sec_name, key = path.split(':') + assert_eq(str(value), str(data[sec_name][key])) + + +@when('Wait for DC') +def step_impl(context): + while True: + time.sleep(1) + if crmutils.get_dc(): + break + + +@then('File "{path}" exists on "{node}"') +def step_impl(context, path, node): + rc, _, stderr = behave_agent.call(node, 1122, 'test -f {}'.format(path), user='root') + assert rc == 0 + + +@then('File "{path}" not exist on "{node}"') +def step_impl(context, path, node): + cmd = '[ ! -f {} ]'.format(path) + rc, _, stderr = behave_agent.call(node, 1122, cmd, user='root') + assert rc == 0 + + +@then('Directory "{path}" is empty on "{node}"') +def step_impl(context, path, node): + cmd = '[ ! "$(ls -A {})" ]'.format(path) + rc, _, stderr = behave_agent.call(node, 1122, cmd, user='root') + assert rc == 0 + + +@then('Directory "{path}" not empty on "{node}"') +def step_impl(context, path, node): + cmd = '[ "$(ls -A {})" ]'.format(path) + rc, _, stderr = behave_agent.call(node, 1122, cmd, user='root') + assert rc == 0 + + +@then('Node "{node}" is UNCLEAN') +def step_impl(context, node): + assert is_unclean(node) is True + + +@then('Wait "{count}" seconds for "{node}" successfully fenced') +def step_impl(context, count, node): + index = 0 + while index <= int(count): + rc, out, _ = ShellUtils().get_stdout_stderr("stonith_admin -h {}".format(node)) + if "Node {} last fenced at:".format(node) in out: + return True + time.sleep(1) + index += 1 + return False + +@then('Check passwordless for hacluster between "{nodelist}"') +def step_impl(context, nodelist): + if userdir.getuser() != 'root' or userdir.get_sudoer(): + return True + failed = False + nodes = nodelist.split() + for i in range(0, len(nodes)): + for j in range(i + 1, len(nodes)): + rc, _, _ = behave_agent.call( + nodes[i], 1122, + f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 {nodes[j]} true', + user='hacluster', + ) + if rc != 0: + failed = True + context.logger.error(f"There is no passwordless configured from {nodes[i]} to {nodes[j]} under 'hacluster'") + assert not failed + + +@then('Check user shell for hacluster between "{nodelist}"') +def step_impl(context, nodelist): + if userdir.getuser() != 'root' or userdir.get_sudoer(): + return True + for node in nodelist.split(): + if node == me(): + assert bootstrap.is_nologin('hacluster') is False + else: + assert bootstrap.is_nologin('hacluster', node) is False + + +@given('ssh-agent is started at "{path}" on nodes [{nodes:str+}]') +def step_impl(context, path, nodes): + user = userdir.get_sudoer() + if not user: + user = userdir.getuser() + for node in nodes: + rc, _, _ = behave_agent.call(node, 1122, f"systemd-run --uid '{user}' -u ssh-agent /usr/bin/ssh-agent -D -a '{path}'", user='root') + assert 0 == rc + + +@then('This file "{target_file}" will trigger UnicodeDecodeError exception') +def step_impl(context, target_file): + try: + with open(target_file, "r", encoding="utf-8") as file: + content = file.read() + except UnicodeDecodeError as e: + return True + else: + return False diff --git a/test/features/steps/utils.py b/test/features/steps/utils.py new file mode 100644 index 0000000..675c2c4 --- /dev/null +++ b/test/features/steps/utils.py @@ -0,0 +1,177 @@ +import concurrent.futures +import difflib +import tarfile +import glob +import re +import socket +from crmsh import utils, userdir +from crmsh.sh import ShellUtils +import behave_agent + + +COLOR_MODE = r'\x1b\[[0-9]+m' + + +def get_file_type(file_path): + rc, out, _ = ShellUtils().get_stdout_stderr("file {}".format(file_path)) + if re.search(r'{}: bzip2'.format(file_path), out): + return "bzip2" + if re.search(r'{}: directory'.format(file_path), out): + return "directory" + + +def get_all_files(archive_path): + archive_type = get_file_type(archive_path) + if archive_type == "bzip2": + with tarfile.open(archive_path) as tar: + return tar.getnames() + if archive_type == "directory": + all_files = glob.glob("{}/*".format(archive_path)) + glob.glob("{}/*/*".format(archive_path)) + return all_files + + +def file_in_archive(f, archive_path): + for item in get_all_files(archive_path): + if re.search(r'/{}$'.format(f), item): + return True + return False + + +def me(): + return socket.gethostname() + + +def _wrap_cmd_non_root(cmd): + """ + When running command under sudoer, or the current user is not root, + wrap crm cluster join command with '<user>@', and for the -N option, too + """ + sudoer = userdir.get_sudoer() + current_user = userdir.getuser() + if sudoer: + user = sudoer + elif current_user != 'root': + user = current_user + else: + return cmd + if re.search('cluster (:?join|geo_join|geo_init_arbitrator)', cmd) and "@" not in cmd: + cmd = re.sub(r'''((?:-c|-N|--qnetd-hostname|--cluster-node|--arbitrator)(?:\s+|=)['"]?)(\S{2,}['"]?)''', f'\\1{user}@\\2', cmd) + elif "cluster init" in cmd and ("-N" in cmd or "--qnetd-hostname" in cmd) and "@" not in cmd: + cmd = re.sub(r'''((?:-c|-N|--qnetd-hostname|--cluster-node)(?:\s+|=)['"]?)(\S{2,}['"]?)''', f'\\1{user}@\\2', cmd) + elif "cluster init" in cmd and "--node" in cmd and "@" not in cmd: + search_patt = r"--node [\'\"](.*)[\'\"]" + res = re.search(search_patt, cmd) + if res: + node_str = ' '.join([f"{user}@{n}" for n in res.group(1).split()]) + cmd = re.sub(search_patt, f"--node '{node_str}'", cmd) + return cmd + + +def run_command(context, cmd, exit_on_fail=True): + cmd = _wrap_cmd_non_root(cmd) + rc, out, err = ShellUtils().get_stdout_stderr(cmd) + context.return_code = rc + if out: + out = re.sub(COLOR_MODE, '', out) + context.stdout = out + if err: + err = re.sub(COLOR_MODE, '', err) + context.stderr = err + if rc != 0 and exit_on_fail: + if out: + context.logger.info("\n{}\n".format(out)) + context.logger.error("\n{}\n".format(err)) + context.failed = True + return rc, out, err + + +def run_command_local_or_remote(context, cmd, addr, exit_on_fail=True): + if addr == me(): + return run_command(context, cmd, exit_on_fail) + cmd = _wrap_cmd_non_root(cmd) + sudoer = userdir.get_sudoer() + if sudoer is None: + user = None + else: + user = sudoer + cmd = f'sudo {cmd}' + hosts = addr.split(',') + with concurrent.futures.ThreadPoolExecutor(max_workers=len(hosts)) as executor: + results = list(executor.map(lambda x: (x, behave_agent.call(x, 1122, cmd, user=user)), hosts)) + out = utils.to_ascii(results[0][1][1]) + err = utils.to_ascii(results[0][1][2]) + context.stdout = out + context.stderr = err + context.return_code = 0 + for host, (rc, stdout, stderr) in results: + if rc != 0: + err = re.sub(COLOR_MODE, '', utils.to_ascii(stderr)) + context.stderr = err + if exit_on_fail: + import os + context.logger.error("Failed to run %s on %s@%s :%s", cmd, os.geteuid(), host, err) + raise ValueError("{}".format(err)) + else: + return + return 0, out, err + + +def check_service_state(context, service_name, state, addr): + if state not in ["started", "stopped", "enabled", "disabled"]: + context.logger.error("\nService state should be \"started/stopped/enabled/disabled\"\n") + context.failed = True + if state in {'enabled', 'disabled'}: + rc, _, _ = behave_agent.call(addr, 1122, f'systemctl is-enabled {service_name}', 'root') + return (state == 'enabled') == (rc == 0) + elif state in {'started', 'stopped'}: + rc, _, _ = behave_agent.call(addr, 1122, f'systemctl is-active {service_name}', 'root') + return (state == 'started') == (rc == 0) + else: + context.logger.error("\nService state should be \"started/stopped/enabled/disabled\"\n") + raise ValueError("Service state should be \"started/stopped/enabled/disabled\"") + + +def check_cluster_state(context, state, addr): + return check_service_state(context, 'pacemaker.service', state, addr) + + +def is_unclean(node): + rc, out, err = ShellUtils().get_stdout_stderr("crm_mon -1") + return "{}: UNCLEAN".format(node) in out + + +def online(context, nodelist): + rc = True + _, out = ShellUtils().get_stdout("sudo crm_node -l") + for node in nodelist.split(): + node_info = "{} member".format(node) + if not node_info in out: + rc = False + context.logger.error("\nNode \"{}\" not online\n".format(node)) + return rc + +def assert_eq(expected, actual): + if expected != actual: + msg = "\033[32m" "Expected" "\033[31m" " != Actual" "\033[0m" "\n" \ + "\033[32m" "Expected:" "\033[0m" " {}\n" \ + "\033[31m" "Actual:" "\033[0m" " {}".format(expected, actual) + if isinstance(expected, str) and '\n' in expected: + try: + diff = '\n'.join(difflib.unified_diff( + expected.splitlines(), + actual.splitlines(), + fromfile="expected", + tofile="actual", + lineterm="", + )) + msg = "{}\n" "\033[31m" "Diff:" "\033[0m" "\n{}".format(msg, diff) + except Exception: + pass + raise AssertionError(msg) + +def assert_in(expected, actual): + if expected not in actual: + msg = "\033[32m" "Expected" "\033[31m" " not in Actual" "\033[0m" "\n" \ + "\033[32m" "Expected:" "\033[0m" " {}\n" \ + "\033[31m" "Actual:" "\033[0m" " {}".format(expected, actual) + raise AssertionError(msg) diff --git a/test/features/user_access.feature b/test/features/user_access.feature new file mode 100644 index 0000000..180dd3f --- /dev/null +++ b/test/features/user_access.feature @@ -0,0 +1,114 @@ +@user +Feature: Functional test for user access + + Need nodes: hanode1 + + Scenario: User in haclient group + Given Cluster service is "stopped" on "hanode1" + When Run "useradd -m -s /bin/bash -N -g 90 xin1" on "hanode1" + When Run "echo 'export PATH=$PATH:/usr/sbin/' >> ~xin1/.bashrc" on "hanode1" + When Try "su - xin1 -c 'crm cluster init -y'" + Then Except multiple lines + """ + ERROR: Please run this command starting with "sudo". + Currently, this command needs to use sudo to escalate itself as root. + Please consider to add "xin1" as sudoer. For example: + sudo bash -c 'echo "xin1 ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/xin1' + """ + When Run "echo "xin1 ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/xin1" on "hanode1" + When Try "su - xin1 -c 'crm cluster init -y'" + Then Except multiple lines + """ + ERROR: Please run this command starting with "sudo" + """ + When Run "su - xin1 -c 'sudo crm cluster init -y'" on "hanode1" + Then Cluster service is "started" on "hanode1" + + When Run "su - xin1 -c 'crm node standby hanode1'" on "hanode1" + Then Node "hanode1" is standby + + @clean + Scenario: User in sudoer + Given Cluster service is "stopped" on "hanode1" + When Run "useradd -m -s /bin/bash xin3" on "hanode1" + When Run "echo 'export PATH=$PATH:/usr/sbin/' >> ~xin3/.bashrc" on "hanode1" + And Run "echo "xin3 ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/xin3" on "hanode1" + When Try "su - xin3 -c 'crm cluster init -y'" + Then Except multiple lines + """ + WARNING: Failed to open log file: [Errno 13] Permission denied: '/var/log/crmsh/crmsh.log' + ERROR: Please run this command starting with "sudo" + """ + When Run "su - xin3 -c 'sudo crm cluster init -y'" on "hanode1" + Then Cluster service is "started" on "hanode1" + + When Try "su - xin3 -c 'crm node standby hanode1'" + Then Except multiple lines + """ + WARNING: Failed to open log file: [Errno 13] Permission denied: '/var/log/crmsh/crmsh.log' + ERROR: Please run this command starting with "sudo" + """ + When Run "su - xin3 -c 'sudo crm node standby hanode1'" on "hanode1" + Then Node "hanode1" is standby + + @clean + Scenario: Normal user access + Given Cluster service is "stopped" on "hanode1" + When Run "useradd -m -s /bin/bash user1" on "hanode1" + When Run "echo 'export PATH=$PATH:/usr/sbin/' >> ~user1/.bashrc" on "hanode1" + When Try "su - user1 -c 'crm cluster init -y'" + Then Except multiple lines + """ + WARNING: Failed to open log file: [Errno 13] Permission denied: '/var/log/crmsh/crmsh.log' + ERROR: Please run this command starting with "sudo". + Currently, this command needs to use sudo to escalate itself as root. + Please consider to add "user1" as sudoer. For example: + sudo bash -c 'echo "user1 ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/user1' + """ + When Run "echo "user1 ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/user1" on "hanode1" + When Try "su - user1 -c 'crm cluster init -y'" + Then Except multiple lines + """ + WARNING: Failed to open log file: [Errno 13] Permission denied: '/var/log/crmsh/crmsh.log' + ERROR: Please run this command starting with "sudo" + """ + When Run "su - user1 -c 'sudo crm cluster init -y'" on "hanode1" + Then Cluster service is "started" on "hanode1" + + When Run "useradd -m -s /bin/bash user2" on "hanode1" + When Run "echo 'export PATH=$PATH:/usr/sbin/' >> ~user2/.bashrc" on "hanode1" + When Try "su - user2 -c 'crm node standby hanode1'" + Then Except multiple lines + """ + WARNING: Failed to open log file: [Errno 13] Permission denied: '/var/log/crmsh/crmsh.log' + ERROR: This command needs higher privilege. + Option 1) Please consider to add "user2" as sudoer. For example: + sudo bash -c 'echo "user2 ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/user2' + Option 2) Add "user2" to the haclient group. For example: + sudo usermod -g haclient user2 + """ + When Run "usermod -g haclient user2" on "hanode1" + When Run "su - user2 -c 'crm node standby hanode1'" on "hanode1" + Then Node "hanode1" is standby + + When Run "useradd -m -s /bin/bash user3" on "hanode1" + When Run "echo 'export PATH=$PATH:/usr/sbin/' >> ~user3/.bashrc" on "hanode1" + When Try "su - user3 -c 'crm node online hanode1'" + Then Except multiple lines + """ + WARNING: Failed to open log file: [Errno 13] Permission denied: '/var/log/crmsh/crmsh.log' + ERROR: This command needs higher privilege. + Option 1) Please consider to add "user3" as sudoer. For example: + sudo bash -c 'echo "user3 ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/user3' + Option 2) Add "user3" to the haclient group. For example: + sudo usermod -g haclient user3 + """ + When Run "echo "user3 ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/user3" on "hanode1" + When Try "su - user3 -c 'crm node online hanode1'" + Then Except multiple lines + """ + WARNING: Failed to open log file: [Errno 13] Permission denied: '/var/log/crmsh/crmsh.log' + ERROR: Please run this command starting with "sudo" + """ + When Run "su - user3 -c 'sudo crm node online hanode1'" on "hanode1" + Then Node "hanode1" is online |