diff options
Diffstat (limited to '')
-rw-r--r-- | doc/stonith.xml.in | 315 | ||||
-rw-r--r-- | doc/stonith/Makefile.am | 37 | ||||
-rw-r--r-- | doc/stonith/README.bladehpi | 101 | ||||
-rw-r--r-- | doc/stonith/README.cyclades | 61 | ||||
-rw-r--r-- | doc/stonith/README.drac3 | 18 | ||||
-rw-r--r-- | doc/stonith/README.dracmc | 87 | ||||
-rw-r--r-- | doc/stonith/README.external | 90 | ||||
-rw-r--r-- | doc/stonith/README.ibmrsa | 9 | ||||
-rw-r--r-- | doc/stonith/README.ibmrsa-telnet | 55 | ||||
-rw-r--r-- | doc/stonith/README.ipmilan | 131 | ||||
-rw-r--r-- | doc/stonith/README.ippower9258 | 68 | ||||
-rw-r--r-- | doc/stonith/README.meatware | 26 | ||||
-rw-r--r-- | doc/stonith/README.rackpdu | 21 | ||||
-rw-r--r-- | doc/stonith/README.rcd_serial | 186 | ||||
-rw-r--r-- | doc/stonith/README.riloe | 36 | ||||
-rw-r--r-- | doc/stonith/README.vacm | 40 | ||||
-rw-r--r-- | doc/stonith/README.vcenter | 90 | ||||
-rw-r--r-- | doc/stonith/README.wti_mpc | 85 | ||||
-rw-r--r-- | doc/stonith/README_kdumpcheck.txt | 151 |
19 files changed, 1607 insertions, 0 deletions
diff --git a/doc/stonith.xml.in b/doc/stonith.xml.in new file mode 100644 index 0000000..575c339 --- /dev/null +++ b/doc/stonith.xml.in @@ -0,0 +1,315 @@ +<?xml version="1.0"?> +<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN" "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"> +<refentry id="re-stonith"> + <refentryinfo> + <date>December 7, 2009</date> + <productname>@PACKAGE_NAME@</productname> + <productnumber>@VERSION@</productnumber> + <authorgroup> + <author> + <firstname>Alan</firstname> + <surname>Robertson</surname> + <contrib>stonith</contrib> + <email>alanr@unix.sh</email> + </author> + <author> + <firstname>Simon</firstname> + <surname>Horman</surname> + <contrib>man page</contrib> + <email>horms@vergenet.net</email> + </author> + <author> + <firstname>Florian</firstname> + <surname>Haas</surname> + <contrib>man page</contrib> + <email>florian.haas@linbit.com</email> + </author> + </authorgroup> + </refentryinfo> + <refmeta> + <refentrytitle>stonith</refentrytitle> + <manvolnum>8</manvolnum> + <refmiscinfo class="manual">System administration utilities</refmiscinfo> + </refmeta> + <refnamediv> + <refname>stonith</refname> + <refpurpose>extensible interface for remotely powering down a node + in the cluster</refpurpose> + </refnamediv> + <refsynopsisdiv> + <cmdsynopsis> + <command>stonith</command> + <arg choice="plain"><option>-h</option></arg> + </cmdsynopsis> + <cmdsynopsis> + <command>stonith</command> + <arg choice="opt"><option>-s</option></arg> + <arg choice="opt"><option>-h</option></arg> + <arg choice="plain"><option>-L</option></arg> + </cmdsynopsis> + <cmdsynopsis> + <command>stonith</command> + <arg choice="opt"><option>-s</option></arg> + <arg choice="opt"><option>-h</option></arg> + <arg choice="plain"><option>-t</option> <replaceable>stonith-device-type</replaceable></arg> + <arg choice="plain"><option>-n</option></arg> + </cmdsynopsis> + <cmdsynopsis> + <command>stonith</command> + <arg choice="opt"><option>-s</option></arg> + <arg choice="opt"><option>-h</option></arg> + <arg choice="plain"><option>-t</option> <replaceable>stonith-device-type</replaceable></arg> + <group choice="req" rep="norepeat"> + <group choice="plain" rep="repeat"> + <arg choice="plain"><replaceable>name</replaceable>=<replaceable>value</replaceable></arg> + </group> + <arg choice="plain"><option>-p</option> <replaceable>stonith-device-parameters</replaceable></arg> + <arg choice="plain"><option>-F</option> <replaceable>stonith-device-parameters-file</replaceable></arg> + </group> + <arg choice="opt"><option>-c</option> <replaceable>count</replaceable></arg> + <arg choice="opt"><option>-l</option></arg> + <arg choice="opt"><option>-S</option></arg> + </cmdsynopsis> + <cmdsynopsis> + <command>stonith</command> + <arg choice="opt"><option>-s</option></arg> + <arg choice="opt"><option>-h</option></arg> + <arg choice="plain"><option>-t</option> <replaceable>stonith-device-type</replaceable></arg> + <group choice="req" rep="norepeat"> + <group choice="plain" rep="repeat"> + <arg choice="plain"><replaceable>name</replaceable>=<replaceable>value</replaceable></arg> + </group> + <arg choice="plain"><option>-p</option> <replaceable>stonith-device-parameters</replaceable></arg> + <arg choice="plain"><option>-F</option> <replaceable>stonith-device-parameters-file</replaceable></arg> + </group> + <arg choice="opt"><option>-c</option> <replaceable>count</replaceable></arg> + <arg choice="opt"><option>-T</option> + <group choice="req"> + <arg choice="plain">reset</arg> + <arg choice="plain">on</arg> + <arg choice="plain">off</arg> + </group> + </arg> + <arg><replaceable>nodename</replaceable></arg> + </cmdsynopsis> + </refsynopsisdiv> + <refsection id="rs-stonith-description"> + <title>Description</title> + <para>The STONITH module provides an extensible interface for + remotely powering down a node in the cluster (STONITH = Shoot The + Other Node In The Head). The idea is quite simple: when the + software running on one machine wants to make sure another machine + in the cluster is not using a resource, pull the plug on the other + machine. It's simple and reliable, albeit admittedly + brutal.</para> + </refsection> + <refsection id="rs-stonith-options"> + <title>Options</title> + <para>The following options are supported:</para> + <variablelist> + <varlistentry> + <term> + <option>-c</option> <replaceable>count</replaceable> + </term> + <listitem> + <para>Perform any actions identified by the + <option>-l</option>, <option>-S</option> and + <option>-T</option> options <replaceable>count</replaceable> + times.</para> + </listitem> + </varlistentry> + <varlistentry> + <term> + <option>-F</option> <replaceable>stonith-device-parameters-file</replaceable> + </term> + <listitem> + <para>Path of file specifying parameters for a stonith + device. To determine the syntax of the parameters file for a + given device type run:</para> + <screen><computeroutput># </computeroutput><userinput>stonith -t stonith-device-type -n</userinput></screen> + <para>All of the listed parameters need to appear in order + on a single line in the parameters file and be delimited by + whitespace.</para> + </listitem> + </varlistentry> + <varlistentry> + <term> + <option>-h</option> + </term> + <listitem> + <para>Display detailed information about a stonith device + including description, configuration information, parameters + and any other related information. When specified without a + stonith-device-type, detailed information on all stonith + devices is displayed.</para> + <para>If you don't yet own a stonith device and want to know + more about the ones we support, this information is likely + to be helpful.</para> + </listitem> + </varlistentry> + <varlistentry> + <term> + <option>-L</option> + </term> + <listitem> + <para>List the valid stonith device types, suitable for + passing as an argument to the <option>-t</option> + option.</para> + </listitem> + </varlistentry> + <varlistentry> + <term> + <option>-l</option> + </term> + <listitem> + <para>List the hosts controlled by the stonith device.</para> + </listitem> + </varlistentry> + <varlistentry> + <term> + <option>-n</option> + </term> + <listitem> + <para>Output the parameter names of the stonith device.</para> + </listitem> + </varlistentry> + <varlistentry> + <term> + <replaceable>name</replaceable>=<replaceable>value</replaceable> + </term> + <listitem> + <para>Parameter, in the form of a name/value pair, to pass + directly to the stonith device. To determine the syntax of + the parameters for a given device type run:</para> + <screen><computeroutput># </computeroutput><userinput>stonith -t stonith-device-type -n</userinput></screen> + <para>All of the listed parameter names need to be passed + with their corresponding values.</para> + </listitem> + </varlistentry> + <varlistentry> + <term> + <option>-p</option> <replaceable>stonith-device-parameters</replaceable> + </term> + <listitem> + <para>Parameters to pass directly to the stonith device. To + determine the syntax of the parameters for a given device + type run:</para> + <screen><computeroutput># </computeroutput><userinput>stonith -t stonith-device-type -n</userinput></screen> + <para>All of the listed parameter names need to appear in + order and be delimited by whitespace.</para> + </listitem> + </varlistentry> + <varlistentry> + <term> + <option>-S</option> + </term> + <listitem> + <para>Show the status of the stonith device.</para> + </listitem> + </varlistentry> + <varlistentry> + <term> + <option>-s</option> + </term> + <listitem> + <para>Silent operation. Suppress logging of error messages to standard error.</para> + </listitem> + </varlistentry> + <varlistentry> + <term> + <option>-T</option> <replaceable>action</replaceable> + </term> + <listitem> + <para>The stonith action to perform on the node identified + by nodename. Chosen from <token>reset</token>, + <token>on</token>, and <token>off</token>.</para> + <note> + <para>If a nodename is specified without the + <option>-T</option> option, the stonith action defaults to + <token>reset</token>.</para> + </note> + </listitem> + </varlistentry> + <varlistentry> + <term> + <option>-t</option> <replaceable>stonith-device-type</replaceable> + </term> + <listitem> + <para>The type of the stonith device to be used to effect + stonith. A list of supported devices for an installation may + be obtained using the <option>-L</option> option.</para> + </listitem> + </varlistentry> + <varlistentry> + <term> + <option>-v</option> + </term> + <listitem> + <para>Ignored.</para> + </listitem> + </varlistentry> + </variablelist> + </refsection> + <refsection id="rs-stonith-examples"> + <title>Examples</title> + <para>To determine which stonith devices are available on your installation, use the <option>-L</option> option:</para> + <screen><computeroutput># </computeroutput><userinput>stonith -L</userinput></screen> + <para>All of the supported devices will be displayed one per line. + Choose one from this list that is best for your environment - + let's use <code>wti_nps</code> for the rest of this example. To get detailed + information about this device, use the <option>-h</option> option:</para> + <screen><computeroutput># </computeroutput><userinput>stonith -t wti_nps -h</userinput></screen> + <para>Included in the output is the list of valid parameter names + for <code>wti_nps</code>. To get <emphasis>just</emphasis> the + list of valid parameter names, use the <option>-n</option> option + instead:</para> + <screen><computeroutput># </computeroutput><userinput>stonith -t wti_nps -n</userinput></screen> + <para>All of the required parameter names will be displayed one + per line. For <code>wti_nps</code> the output is:</para> + <screen><computeroutput>ipaddr</computeroutput> +<computeroutput>password</computeroutput></screen> + <para>There are three ways to pass these parameters to the device. + The first (and preferred) way is by passing name/value pairs on + the <command>stonith</command> command line:</para> + <screen><computeroutput># </computeroutput><userinput>stonith -t wti_nps ipaddr=my-dev-ip password=my-dev-pw ...</userinput></screen> + <para>The second way, which is maintained only for backward + compatibility with legacy clusters, is passing the values + <emphasis>in order</emphasis> on the <command>stonith</command> + command line with the <option>-p</option> option:</para> + <screen><computeroutput># </computeroutput><userinput>stonith -t wti_nps -p "my-dev-ip my-dev-pw" ...</userinput></screen> + <para>The third way, which is also maintained only for backward + compatibility with legacy clusters, is placing the values <emphasis>in order</emphasis> + on a single line in a config file:</para> + <programlisting>my-dev-ip my-dev-pw</programlisting> + <para>... and passing the name of the file on the stonith command + line with the <option>-F</option> option:</para> + <screen><computeroutput># </computeroutput><userinput>stonith -t wti_nps -F ~/my-wtinps-config ...</userinput></screen> + <para>To make sure you have the configuration set up correctly and + that the device is available for stonith operations, use the + <option>-S</option> option:</para> + <screen><computeroutput># </computeroutput><userinput>stonith -t wti_nps ipaddr=my-dev-ip password=my-dev-pw -S</userinput></screen> + <para>If all is well at this point, you should see something similar to:</para> + <screen><computeroutput>stonith: wti_nps device OK.</computeroutput></screen> + <para>If you don't, some debugging may be necessary to determine + if the config info is correct, the device is powered on, etc. The + <option>-d</option> option can come in handy here - you can add it + to any <command>stonith</command> command to cause it to generate + debug output.</para> + <para>To get the list of hosts controlled by the device, use the + <option>-l</option> option:</para> + <screen><computeroutput># </computeroutput><userinput>stonith -t wti_nps ipaddr=my-dev-ip password=my-dev-pw -l</userinput></screen> + <para>All of the hosts controlled by the device will be displayed one per line. For <code>wti_nps</code> the output could be:</para> + <screen><computeroutput>node1</computeroutput> + <computeroutput>node2</computeroutput> + <computeroutput>node3</computeroutput></screen> + <para>To power off one of these hosts, use the <option>-T</option> option: + <screen><computeroutput># </computeroutput><userinput>stonith -t wti_nps ipaddr=my-dev-ip password=my-dev-pw -T off <replaceable>node</replaceable></userinput></screen></para> + </refsection> + <refsection id="rs-stonith-seealso"> + <title>See also</title> + <para> + <citerefentry><refentrytitle>heartbeat</refentrytitle><manvolnum>8</manvolnum></citerefentry>, + <citerefentry><refentrytitle>meatclient</refentrytitle><manvolnum>8</manvolnum></citerefentry> + </para> + </refsection> +</refentry> diff --git a/doc/stonith/Makefile.am b/doc/stonith/Makefile.am new file mode 100644 index 0000000..4c9b76f --- /dev/null +++ b/doc/stonith/Makefile.am @@ -0,0 +1,37 @@ +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +MAINTAINERCLEANFILES = Makefile.in + +stdocdir = $(docdir)/stonith + +stdoc_DATA = README.bladehpi \ + README.cyclades \ + README.drac3 \ + README.dracmc \ + README.external \ + README.ibmrsa \ + README.ibmrsa-telnet \ + README.meatware \ + README.rackpdu \ + README.rcd_serial \ + README.riloe \ + README.vacm \ + README.wti_mpc \ + README_kdumpcheck.txt \ + README.vcenter + +if IPMILAN_BUILD +stdoc_DATA += README.ipmilan +endif diff --git a/doc/stonith/README.bladehpi b/doc/stonith/README.bladehpi new file mode 100644 index 0000000..3119ef7 --- /dev/null +++ b/doc/stonith/README.bladehpi @@ -0,0 +1,101 @@ + +STONITH module for IBM BladeCenter via OpenHPI +---------------------------------------------- + +Requirements: + Linux-HA bladehpi STONITH plugin requires OpenHPI 2.6+ + OpenHPI requires Net-SNMP 5.0+ + OpenHPI requires BladeCenter Management Module 1.08+ + +This STONITH module talks to IBM BladeCenters via SNMP through use of +the OpenHPI BladeCenter plugin (snmp_bc). For more information about +installing OpenHPI, setting up the BladeCenter SNMP agent, etc. please +visit http://www.openhpi.org/. Once OpenHPI is installed properly, +the STONITH plugin will automatically be built the next time Linux-HA +is built. + +Use the OpenHPI configuration file (i.e. /etc/openhpi/openhpi.conf) +to configure the BladeCenters of interest to STONITH. For example, +the following excerpt: + + plugin libsnmp_bc + + handler libsnmp_bc { + entity_root = "{SYSTEM_CHASSIS,1}" # Required + host = "9.254.253.252" # Required + community = "community" # Version 1 Required. + version = "3" # Required. SNMP protocol version (1|3) + security_name = "userid" # Version 3 Required. + passphrase = "userpass" # Version 3. Required if security_level is authNoPriv or authPriv. + auth_type = "MD5" # Version 3. Passphrase encoding (MD5|SHA) + security_level = "authNoPriv" # Version 3. (noAuthNoPriv|authNoPriv|authPriv) + } + +defines how to access the BladeCenter at 9.254.253.252 using SNMPV3 +with an ID/password of userid/userpass. The entity_root must be +passed to the STONITH bladehpi plugin as its single required parameter. +For example, to query the list of blades present in the BladeCenter +configured above, run: + + stonith -t bladehpi -p "{SYSTEM_CHASSIS,1}" -l + +which is the same as: + + stonith -t bladehpi "entity_root={SYSTEM_CHASSIS,1}" -l + +Use the BladeCenter Management Module web interface to set the Blade +Information to match "uname -n" for each blade in the cluster. For +example, with the BladeCeter configured above use a brower to access +http://9.254.253.252, login with userid/userpass, and then go to +Blade Tasks -> Configuration -> Blade Information, enter the proper +names, and select Save. Be aware that heartbeat must be restarted +before these changes take effect or, if using the OpenHPI daemon, +the daemon must be restarted. + +More than one BladeCenter can be placed in the OpenHPI configuration +file by using different numbers with the entity_root. For example, + + plugin libsnmp_bc + + handler libsnmp_bc { + entity_root = "{SYSTEM_CHASSIS,1}" # Required + host = "9.254.253.252" # Required + : + } + handler libsnmp_bc { + entity_root = "{SYSTEM_CHASSIS,2}" # Required + host = "9.254.253.251" # Required + : + } + +There is an optional parameter, soft_reset, that is true|1 if bladehpi +should use soft reset (power cycle) to reset nodes or false|0 if it +should use hard reset (power off, wait, power on); the default is +false. As an example, to override the default value the above stonith +command would become: + + stonith -t bladehpi -p "{SYSTEM_CHASSIS,1} true" -l + +which is the same as: + + stonith -t bladehpi "entity_root={SYSTEM_CHASSIS,1} soft_reset=true" -l + +The difference between the two is that a soft reset is much quicker +but may return before the node has been reset because bladehpi relies +on BladeCenter firmware to cycle the node's power, while a hard reset +is slower but guaranteed not to return until the node is dead because +bladehpi powers off the node, waits until it is off, then powers it +on again. + +NOTE: Set the OPENHPI_CONF environment variable to contain the +fully-qualified path of the OpenHPI configuration file, for example: + + export OPENHPI_CONF=/etc/openhpi/openhpi.conf + +NOTE: If OpenHPI is not configured with --disable-daemon before being +built and installed, make sure that the OpenHPI daemon is running +before using the bladehpi plugin. + +NOTE: If debugging of the environment is needed, configure OpenHPI +with --enable-debuggable and rebuild/reinstall, export +OPENHPI_DEBUG=YES, and run stonith commands with the -d option. diff --git a/doc/stonith/README.cyclades b/doc/stonith/README.cyclades new file mode 100644 index 0000000..3ccf9db --- /dev/null +++ b/doc/stonith/README.cyclades @@ -0,0 +1,61 @@ +STONITH module for Cyclades AlterPath PM +---------------------------------------- + +This STONITH module talks to Cyclades AlterPath PM series of power managers +via TS, ACS or KVM equipment. + +Access to the frontend device (TS, ACS or KVM) is done via root user with +passwordless ssh. + +For that, it is necessary to create a public/private keypar with _empty_ +passphrase on _each_ machine which is part of the cluster. + +Small HOWTO follows: + +# ssh-keygen -t rsa +Generating public/private rsa key pair. +Enter file in which to save the key (/root/.ssh/id_rsa): +Created directory '/home/root/.ssh'. +Enter passphrase (empty for no passphrase): +Enter same passphrase again: +Your identification has been saved in /root/.ssh/id_rsa. +Your public key has been saved in /root/.ssh/id_rsa.pub. +The key fingerprint is: +dc:e0:71:55:fd:2a:b0:19:d6:3c:48:e5:45:db:b4:be root@hostname.network + +Next step is to append the public key (/root/.ssh/id_rsa.pub) +to the authorized_keys file on the TS/ACS/KVM box. The authorized +keys file location is set at the SSH daemon configuration file. +The default location is /etc/ssh/authorized_keys, so: + +[root@clusterhost]# scp /root/.ssh/id_rsa.pub root@alterpath:/tmp + +login to the TS/ACS/KVM box normally and append the public key. + +# ssh root@alterpath +Password: .... + +[root@CAS root]# cat /tmp/id_rsa.pub >> /etc/ssh/authorized_keys + +The following entries must be present on /etc/ssh/sshd_config for the +passwordless scheme to work properly: + +RSAAuthentication yes +PubkeyAuthentication yes +AuthorizedKeysFile /etc/ssh/authorized_keys + +Next step is to test if the configuration has been done successfully: + +[root@clusterhost root]# ssh root@alterpath +[root@CAS root]# + +If it logins automatically without asking for a password, then everything +has been done correctly! + +Note that such configuration procedure (including generation of the key pair) +has to be done for each machine in the cluster which intends to use the +AlterPath PM as a STONITH device. + +------ +Any questions please contact Cyclades support at <support@cyclades.com> +or <marcelo.tosatti@cyclades.com> diff --git a/doc/stonith/README.drac3 b/doc/stonith/README.drac3 new file mode 100644 index 0000000..e3c071b --- /dev/null +++ b/doc/stonith/README.drac3 @@ -0,0 +1,18 @@ +Stonith module for Dell DRACIII remote access card +-------------------------------------------------- + +This module uses the Dell DRACIII PCI card as a stonith device. +It sends the XML commands over HTTPS to the DRACIII web server. + +The card firmware must be version 2.0 at least, with support for SSL based +service and many bug fixes over 1.x versions. + +This module uses libcurl, libxml2 (gnome xml libs) and libssl. + +Any hints, bug reports, improvements, etc. will be apreciated. + +--- +Roberto Moreda <moreda@alfa21.com> http://www.alfa21.com +Alfa21 A Coruña (Spain) +UNIX, Linux & TCP/IP Services - High Availability Solutions + diff --git a/doc/stonith/README.dracmc b/doc/stonith/README.dracmc new file mode 100644 index 0000000..761f5ad --- /dev/null +++ b/doc/stonith/README.dracmc @@ -0,0 +1,87 @@ +dracmc-telnet - External stonith plugin for HAv2 (http://linux-ha.org/wiki) + Connects to Dell Drac/MC Blade Enclosure via a Cyclades + terminal server with telnet and switches power of named + blade servers appropriatelly. + +Description: + Dell offers the Drac/MC in their blade enclosures. The +Drac/MC can be accessed in different ways. One way to interface to it +is to connect the blade enclosure's Drac/MC serial port to a Cyclades +terminal server. You can then access the Drac/MC via telnet via the +Cyclades. Once logged in, you can use 'help' to show all available +commands. With the 'serveraction' command, you can control both +hard and soft resets as well as power to a particular blade. The +blades are named 'Server-X', where 'X' is a number which corresponds +to the blade number in the enclosure. This plugin allows using the +Drac/MC with stonith. It uses python's standards 'telnetlib' library +to log in and issue commands. The code is very similar to the original +ibmrsa-telnet plugin released by Andreas and was quite easy to +modify for this application. + One complication is that the Cyclades only allows one active +connection. If someone or something has a connection active, the +terminal server closes the new attempted connection. Since this +situation can be common, for example if trying to stonith two blades +or when the plugin is started by multiple cluster nodes, there is a +built in retry mechanism for login. On 10 retries, the code gives up +and throws. + When running this resource, it is best to not run it as a clone, +rather as a normal, single-instance resource. Make sure to create a +location constraint that excludes the node that is to be fenced. + +Required parameters: + nodename: The name of the server you want to touch on your network + cyclades_ip: The IP address of the cyclades terminal server + cyclades_port: The port for telnet to access on the cyclades (i.e. 7032) + servername: The DRAC/MC server name of the blade (i.e. Server-7) + username: The login user name for the DRAC/MC + password: The login password for the DRAC/MC + +Example configuration + +These are examples: you should adjust parameters, scores and +timeout values to fit your environment. + +crm shell: + + primitive fence_node1 stonith:external/dracmc-telnet \ + nodename=node1 cyclades_ip=10.0.0.1 cyclades_port=7001 \ + servername=Server-1 username=USERID password=PASSWORD \ + op monitor interval="200m" timeout="60s" + location loc-fence_node1 fence_node1 -inf: node1 + +XML: + +<?xml version="1.0" ?> +<cib> + <configuration> + <resources> + <primitive id="r_stonith-node01" class="stonith" type="external/dracmc-telnet" provider="heartbeat" resource_stickiness="0"> + <operations> + <op name="monitor" interval="200m" timeout="60s" prereq="nothing" id="r_stonith-node01-mon"/> + <op name="start" timeout="180" id="r_stonith-node01-start"/> + <op name="stop" timeout="180" id="r_stonith-node01-stop"/> + </operations> + <instance_attributes id="r_stonith-node01"> + <attributes> + <nvpair id="r_stonith-node01-nodename" name="nodename" value="node01"/> + <nvpair id="r_stonith-node01-cyclades_ip" name="cyclades_ip" value="192.168.0.1"/> + <nvpair id="r_stonith-node01-cyclades_port" name="cyclades_port" value="7032"/> + <nvpair id="r_stonith-node01-servername" name="servername" value="Server-7"/> + <nvpair id="r_stonith-node01-username" name="username" value="USERID"/> + <nvpair id="r_stonith-node01-password" name="password" value="PASSWORD"/> + <nvpair id="r_stonith-node01-type" name="type" value="dellblade"/> + </attributes> + </instance_attributes> + </primitive> + </resources> + <constraints> + <rsc_location id="r_stonith-node01_prefer_node02" rsc="r_stonith-node01"> + <rule id="r_stonith-node01_prefer_node02_rule" score="50"> + <expression attribute="#uname" id="r_stonith-node01_prefer_node02_expr" operation="eq" value="node02"/> + </rule> + </rsc_location> + </constraints> + + </configuration> +</cib> + diff --git a/doc/stonith/README.external b/doc/stonith/README.external new file mode 100644 index 0000000..a70ccde --- /dev/null +++ b/doc/stonith/README.external @@ -0,0 +1,90 @@ +EXTERNAL module for Linux-HA STONITH + + +This stonith plugin runs an external command written in your favorite +language to shutdown the given host. The external command should return +a zero exit status after a successful shutdown, or non-zero exit status +for a shutdown failure. Failures notifications will be sent to syslog. + +To create your own external plugin, write a script that supports the +following actions: + + reset + on (optional) + off (optional) + gethosts + status + getconfignames + getinfo-devid + getinfo-devname + getinfo-devdescr + getinfo-devurl + getinfo-xml + +and place it in the /usr/lib/stonith/plugins/external directory - the +script must be a regular executable file that is NOT writable by group +or others in order to be recognized as an external plugin. If the +action requires information to be returned, such as the list of hosts +or config names or any of the getinfo calls, simply write the +information to stdout. When complete, return zero to indicate the +action succeeded or non-zero to indicate the action failed. You can +use the ssh (sh) and riloe (pyhton) scripts already in that directory +as working examples. + +To make sure that your external plugin is recognized, run "stonith -L" +and look for its name in the output, something along the lines of: + + external/yourplugin + +To configure the plugin on an R1 (legacy) cluster, add a line similar +to the following to /etc/ha.d/ha.cf: + + stonith external/yourplugin /etc/ha.d/yourplugin.cfg + +where /etc/ha.d/yourplugin.cfg contains a single line with all of your +plugin's parameters: + + parm1-value parm2-value ... + +Another way to configure the plugin on a legacy cluster is to add a line +similiar to the following to /etc/ha.d/ha.cf instead: + + stonith_host * external/yourplugin parm1-value parm2-value ... + +where all of your plugin's parameters are placed at the end of the line. + +Please note that all parameters come in to the plugin in name/value +(environment variable) form, but in R1 configurations, they appear as a +list of parameters. They are ordered in the config file or on the +stonith_host line according to the ordering specified in the output of +the getconfignames operation. + +To configure the plugin on an R2 cluster, place lines similar to the +following into the <resources> section of your CIB, which is contained +in /var/lib/heartbeat/crm/cib.xml: + + <clone id="DoFencing"> + <instance_attributes> + <nvpair name="clone_max" value="2"/> + <nvpair name="clone_node_max" value="1"/> + </instance_attributes> + <primitive id="child_DoFencing" class="stonith" type="external/yourplugin" provider="heartbeat"> + <operations> + <op name="monitor" interval="5s" timeout="20s" requires="nothing"/> + <op name="start" timeout="20s" requires="nothing"/> + </operations> + <instance_attributes> + <nvpair name="parm1-name" value="parm1-value"/> + <nvpair name="parm2-name" value="parm2-value"/> + <!-- ... --> + </instance_attributes> + </primitive> + </clone> + +Whatever <nvpair> parameters specified in the <attributes> section of +the CIB are passed to the script as environment variables. For the +example above, the parameters are passed as parm1-name=parm1-value, +parm2-name=parm2-value and so on. + +Additional information can be found at +http://linux-ha.org/wiki/ExternalStonithPlugins. diff --git a/doc/stonith/README.ibmrsa b/doc/stonith/README.ibmrsa new file mode 100644 index 0000000..b34031b --- /dev/null +++ b/doc/stonith/README.ibmrsa @@ -0,0 +1,9 @@ +See + +ftp://ftp.software.ibm.com/systems/support/system_x_pdf/d3basmst.pdf +ftp://ftp.software.ibm.com/systems/support/system_x_pdf/88p9248.pdf +http://www.redbooks.ibm.com/abstracts/sg246495.html + +for documentation about IBM management processors and the +IBMmpcli utility. + diff --git a/doc/stonith/README.ibmrsa-telnet b/doc/stonith/README.ibmrsa-telnet new file mode 100644 index 0000000..109bdd9 --- /dev/null +++ b/doc/stonith/README.ibmrsa-telnet @@ -0,0 +1,55 @@ +ibmrsa-telnet - External stonith plugin for HAv2 (http://linux-ha.org/wiki) + Connects to IBM RSA Board via telnet and switches power + of server appropriately. + +Description: + + IBM offers Remote Supervisor Adapters II for several + servers. These RSA boards can be accessed in different ways. + One of that is via telnet. Once logged in you can use 'help' to + show all available commands. With 'power' you can reset, power on and + off the controlled server. This command is used in combination + with python's standard library 'telnetlib' to do it automatically. + +Code snippet for cib + + It's useful to give a location preference so that the stonith agent + is run on the/an other node. This is not necessary as one node can kill + itself via RSA Board. But: If this node becomes crazy my experiences + showed that the node is not able to shoot itself anymore properly. + + You have to adjust parameters, scores and timeout values to fit your + HA environment. + +<?xml version="1.0" ?> +<cib> + <configuration> + <resources> + <primitive id="r_stonith-node01" class="stonith" type="external/ibmrsa" provider="heartbeat" resource_stickiness="0"> + <operations> + <op name="monitor" interval="60" timeout="300" prereq="nothing" id="r_stonith-node01-mon"/> + <op name="start" timeout="180" id="r_stonith-node01-start"/> + <op name="stop" timeout="180" id="r_stonith-node01-stop"/> + </operations> + <instance_attributes id="r_stonith-node01"> + <attributes> + <nvpair id="r_stonith-node01-nodename" name="nodename" value="node01"/> + <nvpair id="r_stonith-node01-ipaddr" name="ipaddr" value="192.168.0.1"/> + <nvpair id="r_stonith-node01-userid" name="userid" value="userid"/> + <nvpair id="r_stonith-node01-passwd" name="passwd" value="password"/> + <nvpair id="r_stonith-node01-type" name="type" value="ibm"/> + </attributes> + </instance_attributes> + </primitive> + </resources> + <constraints> + <rsc_location id="r_stonith-node01_not_on_node01" rsc="r_stonith-node01"> + <rule id="r_stonith-node01_not_on_node01_rule" score="-INFINITY"> + <expression attribute="#uname" id="r_stonith-node01_not_on_node01_expr" operation="eq" value="node01"/> + </rule> + </rsc_location> + </constraints> + + </configuration> +</cib> + diff --git a/doc/stonith/README.ipmilan b/doc/stonith/README.ipmilan new file mode 100644 index 0000000..eef86cf --- /dev/null +++ b/doc/stonith/README.ipmilan @@ -0,0 +1,131 @@ + IPMILAN STONITH Module + Copyright (c) 2003 Intel Corp. + yixiong.zou@intel.com + +1. Intro + +IPMILAN STONITH module works by sending a node an IPMI message, in particular, +a 'chassis control' command. Currently the message is sent over the LAN. + +2. Hardware Requirement + +In order to use this module, the node has to be IPMI v1.5 compliant and +also supports IPMI over LAN. For example, the Intel Langley platform. + +Note: IPMI over LAN is an optional feature defined by IPMI v1.5 spec. +So even if a system is IPMI compliant/compatible, it might still not +support IPMI over LAN. If you are sure this is your case and you still +want to try this plugin, read section 6, IPMI v1.5 without IPMI over +LAN Support. + +3. Software Requirement + +This module needs OpenIPMI (http://openipmi.sf.net) to compile. +Version 1.4.x or 2.0.x is supported. + +4. Hardware Configuration + +How to configure the node so it accepts IPMI lan packets is beyond the +scope of this document. Consult your product manual for this. + +5. STONITH Configuration + +Each node in the cluster has to be configured individually. So normally there +would be at least two entries, unless you want to use a different STONITH +device for the other nodes in the cluster. ;) + +The configuration file syntax looks like this: + + <node1> <ip> <port> <auth> <priv> <user> <pass> <reset_method> + <node2> <ip> <port> <auth> <priv> <user> <pass> <reset_method> + ... + + node: the hostname. + + ip: the IP address of the node. If a node has more than one IP addresses, + this is the IP address of the interface which accepts IPMI messages. :) + + port: the port number to send the IPMI message to. The default is 623. + But it could be different or even configurable. + + auth: the authorization type of the IPMI session. Valid choices are + "none", "straight", "md2", and "md5". + + priv: the privilege level of the user. Valid choices are "operator" + or "admin". These are the privilege levels required to run the + 'chassis control' command. + + user: the username. use "" if it is empty. Cannot exceed 16 characters. + + pass: the password. use "" if it is empty. Cannot exceed 16 characters. + + reset_method: (optional) which IPMI chassis control to send + to reset the host. Possible values are power_cycle (default) + and hard_reset. + +Each line is white-space delimited and lines begins with '#' are ignored. + +6. IPMI v1.5 without IPMI over LAN Support + +If somehow your computer have a BMC but without LAN support, you might +still be able to use this module. + + 0) Make sure OpenIPMI is installed. OpenIPMI 1.0.3 should work. + + 1) Create a /etc/ipmi_lan.conf file. + + Here's a sample of how this file should look like + + addr 172.16.1.249 999 + PEF_alerting on + per_msg_auth off + priv_limit admin + allowed_auths_admin none md2 md5 + user 20 on "" "" admin 5 md2 md5 none + + If you do not understand what each line means, do a man on ipmilan. + + 2) run ipmilan as root. + + 3) Try send youself an IPMI packet over the network using ipmicmd see + if it works. + + ipmicmd -k "0f 00 06 01" lan 172.16.1.249 999 none admin "" "" + + The result should be something like: + + Connection 0 to the BMC is up0f 07 00 01 00 01 80 01 19 01 8f 77 00 00 4b 02 + + 4) Configure your system so everytime it boots up, the ipmi device + drivers are all loaded and ipmilan is run. This is all OS dependent + so I can't tell you what to do. + + The major draw back of this is that you will not be able to power it up + once it's power down, which for a real IPMI, you could. + + +7. Bugs + +Some IPMI device does not return 0x0, success, to the host who issued the reset +command. A timeout, 0xc3, could be returned instead. So I am counting that +also as a "successful reset". + +Note: This behavior is not fully IPMI v1.5 compliant. Based on the IPMI v1.5 +spec, the IPMI device should return the appropriate return code. And it is +even allowed to return the appropriate return code before performing the +action. + + +8. TODO + +1) Right now the timeout on each host is hard coded to be 10 seconds. It will + be nice to be able to set this value for individual host. + +2) A better way of detecting the success of the reset operation will be good. A + lot of times the host which carried out the reset does not return a success. + +3) The os_handler should be contributed back to the OpenIPMI project so that + we do not need to maintain it here. It does not make sense for every little + app like this to write its own os_handler. A generic one like in this + program should be sufficient. + diff --git a/doc/stonith/README.ippower9258 b/doc/stonith/README.ippower9258 new file mode 100644 index 0000000..6873efd --- /dev/null +++ b/doc/stonith/README.ippower9258 @@ -0,0 +1,68 @@ +IP Power 9258 as external stonith device. +========================================= + +Device Information +================== + + Warning: + ======== + + Aviosys provides different types and versions of IP Power 9258. + The device is currently available with four or eight power outlets. + This script was tested with firmware version: V1.55 2009/12/22 + + Especially "IP Power 9258 HP" uses a different http command interface. + ====================================================================== + + Resources for device documentation: + + Manufacturer URL: http://www.aviosys.com/ippower9258.htm + Manual URL: http://www.aviosys.com/manual.htm + Manual current version URL: + http://www.aviosys.com/images/9258_manual_20081104.pdf + +The documentation of the http command interface defines three +supported commands: + + GetPower - useful for testing status of the device and of each port + SetPower - used to control status of each power outlet + SetSchedule+Power - useless for stonith + +Common documented structure of these three commands is + + http://username:password@a.b.c.d/Set.cmd?CMD=command[+param=value...] + param is one or more of P60 to P67 and value is 0 or 1 + expected response for GetPower is of the format + <html>P60=1,P61=0,P62=1,P63=1,P64=0,P65=0,P66=0,P67=0</html> + SetPower does respond with the same format but restricts the list + to the modified ports. + P60 to P67 represent the status of the power outlet 1 to 8: 0 <=> + power off; 1 <=> power on. + +IP Power 9258 allows to assign port names (pw1Name to pw8Name) to each +port. These names can be used with the web interface (web form with +post-method). + +Script specific notes +===================== + +There is no documented http command to retrieve port names via the +http command interface. We try to get the hostlist via the web +interface. + +This script assumes a one to one mapping between names of hostlist and +port attributes of power outlet: + + 1st hostname in hostlist connected to 1st power outlet with port + status P60 and port name pw1Name. + ... + 8th hostname in hostlist connected to 8th power outlet with port + status P67 and port name pw8Name. + +If the hostlist parameter is not defined, then all assigned outlets +are inserted into the hostlist. Unused outlets should have empty +names. The node names obviously have to match the corresponding outlet +names. A reserved hostname is "*not-defined*". This is a +sript-internal placeholder for unused outlets. It does not appear in +the hostlist. + diff --git a/doc/stonith/README.meatware b/doc/stonith/README.meatware new file mode 100644 index 0000000..0b9b15d --- /dev/null +++ b/doc/stonith/README.meatware @@ -0,0 +1,26 @@ + +MEATWARE Module for Linux-HA STONITH + +ABOUT: + + This is a port of the "meatware" stomith method found in the GFS + distribution (see http://globalfilesystem.org/) to the Linux-HA + project. It notifies operators if a node needs to be reset and + waits for confirmation. + +USAGE: + + The module can be used like any other stonith module. It will + syslog a message at CRIT level if it needs an operator to power-cycle + a node on its behalf. + To confirm that a manual reset has been done, execute + + "meatclient -c <host>". + + If you abort the confirmation, the module will report that the reset + has failed. + +AUTHOR: + + Gregor Binder <gbinder@sysfive.com> + diff --git a/doc/stonith/README.rackpdu b/doc/stonith/README.rackpdu new file mode 100644 index 0000000..69a0f44 --- /dev/null +++ b/doc/stonith/README.rackpdu @@ -0,0 +1,21 @@ +APC Rack PDU + +The product information pages: + +http://www.apcc.com/products/family/index.cfm?id=70 + +The User's Guide: + +http://www.apcmedia.com/salestools/ASTE-6Z6KAV_R1_EN.pdf + +Apparently, an existing http or telnet session will make the +plugin fail. + +In case your nodes are equipped with multiple power supplies, the +PDU supports synchronous operation on multiple outlets on up to +four Switched Rack PDUs. See the User's Guide for more +information on how to setup outlet groups. + +NB: There has been a report by one user that in case a link +between two PDUs in the chain is broken, the PDU returns success +even though it failed. This needs to be verified. diff --git a/doc/stonith/README.rcd_serial b/doc/stonith/README.rcd_serial new file mode 100644 index 0000000..8b4abb4 --- /dev/null +++ b/doc/stonith/README.rcd_serial @@ -0,0 +1,186 @@ +rcd_serial - RC Delayed Serial +------------------------------ + +This stonith plugin uses one (or both) of the control lines of a serial +device (on the stonith host) to reboot another host (the stonith'ed host) +by closing its reset switch. A simple idea with one major problem - any +glitch which occurs on the serial line of the stonith host can potentially +cause a reset of the stonith'ed host. Such "glitches" can occur when the +stonith host is powered up or reset, during BIOS detection of the serial +ports, when the kernel loads up the serial port driver, etc. + +To fix this, you need to introduce a delay between the assertion of the +control signal on the serial port and the closing of the reset switch. +Then any glitches will be dissipated. When you really want to do the +business, you hold the control signal high for a "long time" rather than +just tickling it "glitch-fashion" by, e.g., using the rcd_serial plugin. + +As the name of the plugin suggests, one way to achieve the required delay is +to use a simple RC circuit and an npn transistor: + + + . . + RTS . . ----------- +5V + or ---------- . | + DTR . | . Rl reset + . | T1 . | |\logic + . Rt | ------RWL--------| -------> + . | b| /c . |/ + . |---Rb---|/ . + . | |\ . (m/b wiring typical + . C | \e . only - YMMV!) + . | | . + . | | . + SG ---------------------------RWG----------- 0V + . . + . . stonith'ed host + stonith host --->.<----- RC circuit ----->.<---- RWL = reset wire live + (serial port) . . RWG = reset wire ground + + +The characteristic delay (in seconds) is given by the product of Rt (in ohms) +and C (in Farads). Suitable values for the 4 components of the RC circuit +above are: + +Rt = 20k +C = 47uF +Rb = 360k +T1 = BC108 + +which gives a delay of 20 x 10e3 x 47 x 10e-6 = 0.94s. In practice the +actual delay achieved will depend on the pull-up load resistor Rl if Rl is +small: for Rl greater than 3k there is no significant dependence but lower +than this and the delay will increase - to about 1.4s at 1k and 1.9s at 0.5k. + +This circuit will work but it is a bit dangerous for the following reasons: + +1) If by mistake you open the serial port with minicom (or virtually any +other piece of software) you will cause a stonith reset ;-(. This is +because opening the port will by default cause the assertion of both DTR +and RTS, and a program like minicom will hold them high thenceforth (unless +and until a receive buffer overflow pulls RTS down). + +2) Some motherboards have the property that when held in the reset state, +all serial outputs are driven high. Thus, if you have the circuit above +attached to a serial port on such a motherboard, if you were to press the +(manual) reset switch and hold it in for more than a second or so, you will +cause a stonith reset of the attached system ;-(. + +This problem can be solved by adding a second npn transistor to act as a +shorting switch across the capacitor, driven by the other serial output: + + + . . + . . ----------- +5V + RTS ----------------- . | + . | . Rl reset + . | T1 . | |\logic + . Rt | ------RWL--------| -------> + . | b| /c . |/ + . T2 --|---Rb---|/ . + . | / | |\ . (m/b wiring typical + . b| /c | | \e . only - YMMV!) + DTR ------Rb--|/ C | . + . |\ | | . + . | \e | | . + . | | | . + SG ----------------------------------RWG------------- 0V + . . + . . stonith'ed host +stonith->.<--------- RC circuit ------->.<---- RWL = reset wire live + host . . RWG = reset wire ground + + +Now when RTS goes high it can only charge up C and cause a reset if DTR is +simultaneously kept low - if DTR goes high, T2 will switch on and discharge +the capacitor. Only a very unusual piece of software e.g. the rcd_serial +plugin, is going to achieve this (rather bizarre) combination of signals +(the "meaning" of which is something along the lines of "you are clear to +send but I'm not ready"!). T2 can be another BC108 and with Rb the same. + +RS232 signal levels are typically +-8V to +-12V so a 16V rating or greater +for the capacitor is sufficient BUT NOTE that a _polarised_ electrolytic should +not be used because the voltage switches around as the capacitor charges. +Nitai make a range of non-polar aluminium electrolytic capacitors. A 16V 47uF +radial capacitor measures 6mm diameter by 11mm long and along with the 3 +resistors (1/8W are fine) and the transistors, the whole circuit can be built +in the back of a DB9 serial "plug" so that all that emerges from the plug are +the 2 reset wires to go to the stonith'ed host's m/b reset pins. + +NOTE that with these circuits the reset wires are now POLARISED and hence +they are labelled RWG and RWL above. You cannot connect to the reset pins +either way round as you can when connecting a manual reset switch! You'll +soon enough know if you've got it the wrong way round because your machine +will be in permanent reset state ;-( + + +How to find out if your motherboard can be reset by these circuits +------------------------------------------------------------------ + +You can either build it first and then suck it and see, or, you need a +multimeter. The 0V rail of your system is available in either +of the 2 black wires in the middle of a spare power connector (one of +those horrible 4-way plugs which you push with difficulty into the back +of hard disks, etc. Curse IBM for ever specifying such a monstrosity!). +Likewise, the +5V rail is the red wire. (The yellow one is +12V, ignore +this.) + +First, with the system powered down and the meter set to read ohms: + + check that one of the reset pins is connected to 0V - this then + is the RWG pin; + + check that the other pin (RWL) has a high resistance wrt 0V + (probably > 2M) and has a small resistance wrt to +5V - between + 0.5k and 10k (or higher, doesn't really matter) will be fine. + +Second, with the system powered up and the meter set to read Volts: + + check that RWG is indeed that i.e. there should be 0V between it + and the 0V rail; + + check that RWL is around +5V wrt the 0V rail. + +If all this checks out, you are _probably_ OK. However, I've got one +system which checks out fine but actually won't work. The reason is that +when you short the reset pins, the actual current drain is much higher than +one would expect. Why, I don't know, but there is a final test you can do +to detect this kind of system. + +With the system powered up and the meter set to read milliamps: + + short the reset pins with the meter i.e. reset the system, and + note how much current is actually drained when the system is in + the reset state. + +Mostly you will find that the reset current is 1mA or less and this is +fine. On the system I mention above, it is 80mA! If the current is +greater than 20mA or so, you have probably had it with the simple circuits +above, although reducing the base bias resistor will get you a bit further. +Otherwise, you have to use an analog switch (like the 4066 - I had to use 4 +of these in parallel to reset my 80mA system) which is tedious because then +you need a +5V supply rail to the circuit so you can no longer just build it +in the back of a serial plug. Mail me if you want the details. + +With the circuit built and the rcd_serial plugin compiled, you can use: + +stonith -t rcd_serial -p "testhost /dev/ttyS0 rts XXX" testhost + +to test it. XXX is the duration in millisecs so just keep increasing this +until you get a reset - but wait a few secs between each attempt because +the capacitor takes time to discharge. Once you've found the minimum value +required to cause a reset, add say 200ms for safety and use this value +henceforth. + +Finally, of course, all the usual disclaimers apply. If you follow my +advice and destroy your system, sorry. But it's highly unlikely: serial +port outputs are internally protected against short circuits, and reset pins +are designed to be short circuited! The only circumstance in which I can +see a possibility of damaging something by incorrect wiring would be if the +2 systems concerned were not at the same earth potential. Provided both +systems are plugged into the same mains system (i.e. are not miles apart +and connected only by a very long reset wire ;-) this shouldn't arise. + +John Sutton +john@scl.co.uk +October 2002 diff --git a/doc/stonith/README.riloe b/doc/stonith/README.riloe new file mode 100644 index 0000000..4befe95 --- /dev/null +++ b/doc/stonith/README.riloe @@ -0,0 +1,36 @@ +Note for iLO 3 users + +This plugin doesn't support the iLO version 3. Please use ipmilan +or external/ipmi, iLO3 should support IPMI. + +Alain St-Denis wrote the riloe plugin. Here is short usage: + +primitive st0 stonith:external/riloe \ + hostlist=target-node \ + ilo_hostname=ilo-ip-address \ + ilo_user=admin ilo_password=secret ilo_protocol=2.0 + +The following additional parameters are available: + +ilo_can_reset: + Set to "1" if the ilo is capable of rebooting the host. + Defaults to '0'. + +ilo_protocol: + Defaults to 1.2. Set to the protocol version ilo supports. + +ilo_powerdown_method: + "button" or "power", the former simulates pressing the + button, the latter pulling the power plug. Defaults to + "power". The "button" method is easier on the host, but + requires ACPI. "power" should be more reliable, but not to + be used excessively for testing. + +ilo_proxyhost (string): Proxy hostname + proxy hostname if required to access ILO board + +ilo_proxyport (string, [3128]): Proxy port + proxy port if required to access ILO board + parameter will be ignored if proxy hostname is not set + + diff --git a/doc/stonith/README.vacm b/doc/stonith/README.vacm new file mode 100644 index 0000000..c9083ee --- /dev/null +++ b/doc/stonith/README.vacm @@ -0,0 +1,40 @@ +20 December 2000 + +I (rather poorly) integrated this contributed stonith driver into the +linux-ha-stonith release. There is a problem that needs to be +resolved by autoconf in that the driver will not compile unless +libvacmclient is installed on the system. + +For now, what I've done is included a line in stonith/Makefile that you can +uncomment if you want to compile the vacm stonith module. Look in the +Makefile in this directory for the following lines and do like it says + + +# If you want the VA Linux Cluster stonith module installed, +# uncomment the following line. You must have the vacmclient library +#VACM_STONITH = vacm_stonith.so + +Please direct questions about the operation of the stonith module to +Mike Tilstra (see the announcement to the linux-ha-dev mailing list +attached below.) + + +-Eric. +eric.ayers@compgen.com + +------------------------------------------------------------------------------ + +From: Mike Tilstra <conrad@sistina.com> +Sender: linux-ha-dev-admin@lists.tummy.com +To: linux-ha-dev@lists.tummy.com +Subject: [Linux-ha-dev] stonith module for VACM +Date: Tue, 19 Dec 2000 16:41:38 -0600 + +This was in need for some testing I'm doing, so I hacked this up quick. It +works for me, but I'm willing to bet there's atleast one bug in it. + +Figured others might like it. + +... +-- +Mike Tilstra conrad@sistina.com
\ No newline at end of file diff --git a/doc/stonith/README.vcenter b/doc/stonith/README.vcenter new file mode 100644 index 0000000..e6cc9a5 --- /dev/null +++ b/doc/stonith/README.vcenter @@ -0,0 +1,90 @@ +VMware vCenter/ESX STONITH Module +================================= + +1. Intro +-------- + +VMware vCenter/ESX STONITH Module is intended to provide STONITH support to +clusters in VMware Virtual Infrastructures. It is able to deal with virtual +machines running on physically different HostSystems (e.g. ESX/ESXi) by using +VMware vSphere Web Services SDK http://www.vmware.com/support/developer/vc-sdk/ +and connecting directly on each HostSystem or through a VMware vCenter: in this +last case the module locates the specified virtual machine in the Virtual +Infrastructure and performs actions required by cluster policies. + +2. Software requirements +------------------------ + +VMware vSphere CLI, which includes both CLI tools and Perl SDK +http://www.vmware.com/support/developer/vcli/ . The plugin has been tested with +version 4.1 http://www.vmware.com/download/download.do?downloadGroup=VCLI41 + + +3. vCenter/ESX authentication settings +-------------------------------------- + +Create the credentials file with credstore_admin.pl: + +/usr/lib/vmware-vcli/apps/general/credstore_admin.pl \ + -s 10.1.1.1 -u myuser -p mypass + +This should create $HOME/.vmware/credstore/vicredentials.xml +Copy it to a system folder, e.g. /etc + +cp -p $HOME/.vmware/credstore/vicredentials.xml /etc + + +4. Testing +---------- + +The plugin can be invoked directly to perform a very first connection test +(replace all the provided sample values): + +VI_SERVER=10.1.1.1 \ + VI_CREDSTORE=/etc/vicredentials.xml \ + HOSTLIST="hostname1=vmname1;hostname2=vmname2" \ + RESETPOWERON=0 \ + /usr/lib/stonith/plugins/external/vcenter gethosts + +If everything works correctly you should get: + +hostname1 +hostname2 + +When invoked in this way, the plugin connects to VI_SERVER, authenticates with +credentials stored in VI_CREDSTORE and tries to retrieve the list of virtual +machines (case insensitive) matching vmname1 and vmname2 (and any other listed). +When finished, it reports the list back by mapping virtual machine names to +hostnames as provided in HOSTLIST. If you see the full list of hostnames as a +result, then everything is going well. If otherwise you are having a partial or +empty list, you have to check parameters. + +You can even test "reset", "off" and "on" commands, to test (carefully!) the +full chain. E.g. + +VI_SERVER=10.1.1.1 \ + VI_CREDSTORE=/etc/vicredentials.xml \ + HOSTLIST="hostname1=vmname1;hostname2=vmname2" \ + RESETPOWERON=0 \ + /usr/lib/stonith/plugins/external/vcenter reset hostname2 + +In the above examples the referring infrastructure is a vCenter with several +ESXi nodes. Server IP and credentials are referred to vCenter. + +5. CRM configuration +-------------------- + +The following is a sample procedure to setup STONITH for an HA 2-node cluster +(replace all the provided sample values): + +crm configure primitive vfencing stonith::external/vcenter params \ + VI_SERVER="10.1.1.1" VI_CREDSTORE="/etc/vicredentials.xml" \ + HOSTLIST="hostname1=vmname1;hostname2=vmname2" RESETPOWERON="0" \ + op monitor interval="60s" + +crm configure clone Fencing vfencing + +crm configure property stonith-enabled="true" + + + diff --git a/doc/stonith/README.wti_mpc b/doc/stonith/README.wti_mpc new file mode 100644 index 0000000..050953d --- /dev/null +++ b/doc/stonith/README.wti_mpc @@ -0,0 +1,85 @@ +STONITH module for WTI MPC +-------------------------- + + +****Introduction. + +wti_mpc module uses snmp for controlling the MPC power distribution unit. It has +been tested with MPC-8H and MPC-18H and should be compatible with the whole +MPC series: + * MPC-20* + * MPC-16* + * MPC-18* + * MPC-8* + +****Unit configuration. + +wti_mpc STONITH modules uses SNMP v1, therefore it should be configured on the +device side. To do so, you should login to device, go to "Network +configuration" (/N), select "SNMP access" (25) and turn it on (enable/1). At the +SNMP access screen set "Version" (2) to "V1/V2 Only", set "Read only" (3) to +"No and set any "Community" (10) you want. You may also set other options as +you need. You may check your setup by issuing the following command: + + snmpwalk -v1 -c <community> <host> .1.3.6.1.2.1.1.1.0 + +and result should be something like this: + + SNMPv2-MIB::sysDescr.0 = STRING: Linux 85.195.135.236 2.4.18_mvl30-cllf #1991 Sun Mar 16 14:39:29 PST 2008 ppc + + +****Plugin configuration. + + Plugin declares the following configuration variables: + + *ipaddr - ip address or hostname of a MPC unit. + *port - ip port, should be 161, as MPC listens for incoming SNMP + packets on that port. It is made for future use actually. + *community - Community that you've specified on previous step. + *mib_version - Should be 3 for MPC devices with firmware version 1.62 + and later. 1 is for firmware version 1.44 and below. + 2 is unused right now, if you have device, with mib V2 + feel free to contact me and I'll add it. + +****MIB version issue + + WTI guys have several time changed OIDs, used by MPC devices. I own two +types of the devices: + *With firmware v 1.44 which is compatible with MIB version 1 + *With firmware v 1.62 which is compatible with MIB version 3 + +I suppose there are exist MIB v2, but i cannot find it and I'd not able +to test it. +Anyway, this plugin supports both V1 and V3 versions, and the correct version +is selected by the "mib-version" configuration parameter. Default value is "1", +so if you do not specify this parameter or assign a unsupported value to it, +it will fall back to mib version 1. + +****Outlets and groups + + MPC devices forces unique names of the outlets. This is a big problem +for STONITH plugin, cause it uses nodes unames as outlet names, so in case +you have a node with several power plugs, you should have set the node uname +as name of all the plugs. The MPC device simply doesn't allows this. + So, this plugin works with a GROUPS instead of a PLUGS. You may give +any unique names for your physical outlets on the MPC, but you MUST create +a plug group, name it using node's uname and include plugs, corresponding to +that particular node to this group. It should be done even for node with +single power supply. Some example: + + Let's pretend you have a node "atest", with two power cords, connected +to plugs A1 and B1. You have to create a group ("Plug grouping parameters" (/G) +-> Add Plug Group to directory (2)), name it "atest" ("Plug Group Name (1)) and +assign plugs A1 and B1 to that group ("Plug access" (2)). Now save your +configuration and try to retrieve host list: + + stonith -t wti_mpc ipaddr=<host> port=161 community=<community> mib-version=<version> -l + +result should be: + + atest + + +------------------ +(C) Denis Chapligin <chollya@satgate.net>, SatGate, 2009 + diff --git a/doc/stonith/README_kdumpcheck.txt b/doc/stonith/README_kdumpcheck.txt new file mode 100644 index 0000000..cc8787c --- /dev/null +++ b/doc/stonith/README_kdumpcheck.txt @@ -0,0 +1,151 @@ + Kdump check STONITH plugin "kdumpcheck" +1. Introduction + This plugin's purpose is to avoid STONITH for a node which is doing kdump. + It confirms whether the node is doing kdump or not when STONITH reset or + off operation is executed. + If the target node is doing kdump, this plugin considers that STONITH + succeeded. If not, it considers that STONITH failed. + + NOTE: This plugin has no ability to shutdown or startup a node. + So it has to be used with other STONITH plugin. + Then, when this plugin failed, the next plugin which can kill a node + is executed. + NOTE: This plugin works only on Linux. + +2. The way to check + When STONITH reset or off is executed, kdumpcheck connects to the target + node, and checks the size of /proc/vmcore. + It judges that the target node is _not_ doing kdump when the size of + /proc/vmcore on the node is zero, or the file doesn't exist. + Then kdumpcheck returns "STONITH failed" to stonithd, and the next plugin + is executed. + +3. Expanding mkdumprd + This plugin requires non-root user and ssh connection even on 2nd kernel. + So, you need to apply mkdumprd_for_kdumpcheck.patch to /sbin/mkdumprd. + This patch is tested with mkdumprd version 5.0.39. + The patch adds the following functions: + i) Start udevd with specified .rules files. + ii) Bring the specified network interface up. + iii) Start sshd. + iv) Add the specified user to the 2nd kernel. + The user is to check whether the node is doing kdump or not. + v) Execute sync command after dumping. + + NOTE: i) to iv) expandings are only for the case that filesystem partition + is specified as the location where the vmcore should be dumped. + +4. Parameters + kdumpcheck's parameters are the following. + hostlist : The list of hosts that the STONITH device controls. + delimiter is "," or " ". + indispensable setting. (default:none) + identity_file: a full-path of the private key file for the user + who checks doing kdump. + (default: $HOME/.ssh/id_rsa, $HOME/.ssh/id_dsa and + $HOME/.ssh/identity) + + NOTE: To execute this plugin first, set the highest priority to this plugin + in all STONITH resources. + +5. How to Use + To use this tool, do the following steps at all nodes in the cluster. + 1) Add an user to check doing kdump. + ex.) + # useradd kdumpchecker + # passwd kdumpchecker + 2) Allow passwordless login from the node which will do STONITH to all + target nodes for the user added at step 1). + ex.) + $ cd + $ mkdir .ssh + $ chmod 700 .ssh + $ cd .ssh + $ ssh-keygen (generate authentication keys with empty passphrase) + $ scp id_rsa.pub kdumpchecker@target_node:"~/.ssh/." + $ ssh kdumpchecker@target_node + $ cd ~/.ssh + $ cat id_rsa.pub >> authorized_keys + $ chmod 600 autorized_keys + $ rm id_rsa.pub + 3) Limit the command that the user can execute. + Describe the following commands in a line at the head of the user's + public key in target node's authorized_keys file. + [command="test -s /proc/vmcore"] + And describe some options (like no-pty, no-port-forwarding and so on) + according to your security policy. + ex.) + $ vi ~/.ssh/authorized_keys + command="test -s /proc/vmcore",no-port-forwarding,no-X11-forwarding, + no-agent-forwarding,no-pty ssh-rsa AAA..snip..== kdumpchecker@node1 + 4) Add settings in /etc/kdump.conf. + network_device : network interface name to check doing kdump. + indispensable setting. (default: none) + kdump_check_user : user name to check doing kdump. + specify non-root user. + (default: "kdumpchecker") + udev_rules : .rules files' names. + specify if you use udev for mapping devices. + specified files have to be in /etc/udev/rules.d/. + you can specify two or more files. + delimiter is "," or " ". (default: none) + ex.) + # vi /etc/kdump.conf + ext3 /dev/sda1 + network_device eth0 + kdump_check_user kdumpchecker + udev_rules 10-if.rules + 5) Apply the patch to /sbin/mkdumprd. + # cd /sbin + # patch -p 1 < mkdumprd_for_kdumpcheck.patch + 6) Restart kdump service. + # service kdump restart + 7) Describe cib.xml to set STONITH plugin. + (See "2. Parameters" and "6. Appendix") + +6. Appendix + A sample cib.xml. + <clone id="clnStonith"> + <instance_attributes id="instance_attributes.id238245a"> + <nvpair id="clone0_clone_max" name="clone_max" value="2"/> + <nvpair id="clone0_clone_node_max" name="clone_node_max" value="1"/> + </instance_attributes> + <group id="grpStonith"> + <instance_attributes id="instance_attributes.id2382455"/> + <primitive id="grpStonith-kdumpcheck" class="stonith" type="external/kd + umpcheck"> + <instance_attributes id="instance_attributes.id238240a"> + <nvpair id="nvpair.id238240b" name="hostlist" value="node1,node2"/> + <nvpair id="nvpair.id238240c" name="priority" value="1"/> + <nvpair id="nvpair.id2382408b" name="stonith-timeout" value="30s"/> + </instance_attributes> + <operations> + <op id="grpStonith-kdumpcheck-start" name="start" interval="0" tim + eout="300" on-fail="restart"/> + <op id="grpStonith-kdumpcheck-monitor" name="monitor" interval="10" + timeout="60" on-fail="restart"/> + <op id="grpStonith-kdumpcheck-stop" name="stop" interval="0" timeou + t="300" on-fail="block"/> + </operations> + <meta_attributes id="primitive-grpStonith-kdump-check.meta"/> + </primitive> + <primitive id="grpStonith-ssh" class="stonith" type="external/ssh"> + <instance_attributes id="instance_attributes.id2382402a"> + <nvpair id="nvpair.id2382408a" name="hostlist" value="node1,node2"/ + > + <nvpair id="nvpair.id238066b" name="priority" value="2"/> + <nvpair id="nvpair.id2382408c" name="stonith-timeout" value="60s"/> + </instance_attributes> + <operations> + <op id="grpStonith-ssh-start" name="start" interval="0" timeout="30 + 0" on-fail="restart"/> + <op id="grpStonith-ssh-monitor" name="monitor" interval="10" timeou + t="60" on-fail="restart"/> + <op id="grpStonith-ssh-stop" name="stop" interval="0" timeout="300" + on-fail="block"/> + </operations> + <meta_attributes id="primitive-grpStonith-ssh.meta"/> + </primitive> + </group> + </clone> + |