diff options
Diffstat (limited to 'heartbeat/Xen')
-rwxr-xr-x | heartbeat/Xen | 653 |
1 files changed, 653 insertions, 0 deletions
diff --git a/heartbeat/Xen b/heartbeat/Xen new file mode 100755 index 0000000..1ef20d7 --- /dev/null +++ b/heartbeat/Xen @@ -0,0 +1,653 @@ +#!/bin/sh +# +# +# Support: users@clusterlabs.org +# License: GNU General Public License (GPL) +# +# Resource Agent for the Xen Hypervisor. +# Manages Xen virtual machine instances by +# mapping cluster resource start and stop, +# to Xen create and shutdown, respectively. +# +# usage: $0 {start|stop|status|monitor|meta-data} +# +# OCF parameters are as below: +# OCF_RESKEY_xmfile +# Absolute path to the Xen control file, +# for this virtual machine. +# OCF_RESKEY_allow_mem_management +# Change memory usage on start/stop/migration +# of virtual machine +# OCF_RESKEY_reserved_Dom0_memory +# minimum memory reserved for domain 0 +# OCF_RESKEY_monitor_scripts +# scripts to monitor services within the +# virtual domain + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Parameter defaults + +OCF_RESKEY_xmfile_default="/etc/xen/vm/MyDomU" +OCF_RESKEY_name_default="" +OCF_RESKEY_shutdown_timeout_default="" +OCF_RESKEY_shutdown_acpi_default="0" +OCF_RESKEY_allow_mem_management_default="0" +OCF_RESKEY_node_ip_attribute_default="" +OCF_RESKEY_reserved_Dom0_memory_default="512" +OCF_RESKEY_autoset_utilization_cpu_default="false" +OCF_RESKEY_autoset_utilization_hv_memory_default="false" +OCF_RESKEY_monitor_scripts_default="" + +: ${OCF_RESKEY_xmfile=${OCF_RESKEY_xmfile_default}} +: ${OCF_RESKEY_name=${OCF_RESKEY_name_default}} +: ${OCF_RESKEY_shutdown_timeout=${OCF_RESKEY_shutdown_timeout_default}} +: ${OCF_RESKEY_shutdown_acpi=${OCF_RESKEY_shutdown_acpi_default}} +: ${OCF_RESKEY_allow_mem_management=${OCF_RESKEY_allow_mem_management_default}} +: ${OCF_RESKEY_node_ip_attribute=${OCF_RESKEY_node_ip_attribute_default}} +: ${OCF_RESKEY_reserved_Dom0_memory=${OCF_RESKEY_reserved_Dom0_memory_default}} +: ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}} +: ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}} +: ${OCF_RESKEY_monitor_scripts=${OCF_RESKEY_monitor_scripts_default}} + +####################################################################### + +usage() { + cat <<-END + usage: $0 {start|stop|status|monitor|meta-data|validate-all} +END +} + + +# prefer xl +xentool=$(which xl 2> /dev/null || which xm) + +meta_data() { + cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="Xen" version="1.0"> +<version>1.0</version> + +<longdesc lang="en"> +Resource Agent for the Xen Hypervisor. +Manages Xen virtual machine instances by mapping cluster resource +start and stop, to Xen create and shutdown, respectively. + +A note on names + +We will try to extract the name from the config file (the xmfile +attribute). If you use a simple assignment statement, then you +should be fine. Otherwise, if there's some python acrobacy +involved such as dynamically assigning names depending on other +variables, and we will try to detect this, then please set the +name attribute. You should also do that if there is any chance of +a pathological situation where a config file might be missing, +for example if it resides on a shared storage. If all fails, we +finally fall back to the instance id to preserve backward +compatibility. + +Para-virtualized guests can also be migrated by enabling the +meta_attribute allow-migrate. + +</longdesc> +<shortdesc lang="en">Manages Xen unprivileged domains (DomUs)</shortdesc> + +<parameters> + +<parameter name="xmfile" unique="0" required="1"> +<longdesc lang="en"> +Absolute path to the Xen control file, +for this virtual machine. +</longdesc> +<shortdesc lang="en">Xen control file</shortdesc> +<content type="string" default="${OCF_RESKEY_xmfile_default}" /> +</parameter> +<parameter name="name" unique="0" required="0"> +<longdesc lang="en"> +Name of the virtual machine. +</longdesc> +<shortdesc lang="en">Xen DomU name</shortdesc> +<content type="string" default="${OCF_RESKEY_name_default}" /> +</parameter> +<parameter name="shutdown_timeout"> +<longdesc lang="en"> +The Xen agent will first try an orderly shutdown using xl shutdown. +Should this not succeed within this timeout, the agent will escalate to +xl destroy, forcibly killing the node. + +If this is not set, it will default to two-third of the stop action +timeout. + +Setting this value to 0 forces an immediate destroy. + +</longdesc> +<shortdesc lang="en">Shutdown escalation timeout</shortdesc> +<content type="string" default="${OCF_RESKEY_shutdown_timeout_default}" /> +</parameter> +<parameter name="shutdown_acpi" unique="0" required="0"> +<longdesc lang="en"> +Handle shutdown by simulating an ACPI power button event. +Enable this to allow graceful shutdown for HVM domains +without installed PV drivers. +</longdesc> +<shortdesc lang="en">Simulate power button event on shutdown</shortdesc> +<content type="boolean" default="${OCF_RESKEY_shutdown_acpi_default}" /> +</parameter> +<parameter name="allow_mem_management" unique="0" required="0"> +<longdesc lang="en"> +This parameter enables dynamic adjustment of memory for start +and stop actions used for Dom0 and the DomUs. The default is +to not adjust memory dynamically. +</longdesc> +<shortdesc lang="en">Use dynamic memory management</shortdesc> +<content type="boolean" default="${OCF_RESKEY_allow_mem_management_default}" /> +</parameter> + +<parameter name="node_ip_attribute"> +<longdesc lang="en"> +In case of a live migration, the system will default to using the IP +address associated with the hostname via DNS or /etc/hosts. + +This parameter allows you to specify a node attribute that will be +queried instead for the target node, overriding the IP address. This +allows you to use a dedicated network for live migration traffic to a +specific node. + +Warning: make very sure the IP address does point to the right node. Or +else the live migration will end up somewhere else, greatly confusing +the cluster and causing havoc. +</longdesc> +<shortdesc lang="en">Node attribute containing target IP address</shortdesc> +<content type="string" default="${OCF_RESKEY_node_ip_attribute_default}" /> +</parameter> + +<parameter name="reserved_Dom0_memory" unique="0" required="0"> +<longdesc lang="en"> +In case memory management is used, this parameter +defines the minimum amount of memory to be reserved +for the dom0. The default minimum memory is 512MB. +</longdesc> +<shortdesc lang="en">Minimum Dom0 memory</shortdesc> +<content type="string" default="${OCF_RESKEY_reserved_Dom0_memory_default}" /> +</parameter> + +<parameter name="autoset_utilization_cpu" unique="0" required="0"> +<longdesc lang="en"> +If set true, the agent will detect the number of domain's vCPUs from Xen, and put it +into the CPU utilization of the resource when the monitor is executed. +Before enabling make sure node utilization is also set (using NodeUtilization +agent or manually) or the resource might not be able to start anywhere. +</longdesc> +<shortdesc lang="en">Enable auto-setting the CPU utilization of the resource</shortdesc> +<content type="boolean" default="${OCF_RESKEY_autoset_utilization_cpu_default}" /> +</parameter> + +<parameter name="autoset_utilization_hv_memory" unique="0" required="0"> +<longdesc lang="en"> +If set true, the agent will detect the number of memory from Xen, and put it +into the hv_memory utilization of the resource when the monitor is executed. +Before enabling make sure node utilization is also set (using NodeUtilization +agent or manually) or the resource might not be able to start anywhere. +</longdesc> +<shortdesc lang="en">Enable auto-setting the hv_memory utilization of the resource</shortdesc> +<content type="boolean" default="${OCF_RESKEY_autoset_utilization_hv_memory_default}" /> +</parameter> + +<parameter name="monitor_scripts" unique="0" required="0"> +<longdesc lang="en"> +To additionally monitor services within the unprivileged domain, +add this parameter with a list of scripts to monitor. +</longdesc> +<shortdesc lang="en">list of space separated monitor scripts</shortdesc> +<content type="string" default="${OCF_RESKEY_monitor_scripts_default}" /> +</parameter> + +</parameters> + +<actions> +<action name="start" timeout="60s" /> +<action name="stop" timeout="40s" /> +<action name="migrate_from" timeout="120s" /> +<action name="migrate_to" timeout="120s" /> +<action name="monitor" depth="0" timeout="30s" interval="10s" /> +<action name="meta-data" timeout="5s" /> +<action name="validate-all" timeout="5s" /> +</actions> +</resource-agent> +END +} + +Xen_Status() { + if expr "x$xentool" : "x.*xl" >/dev/null; then + $xentool list $1 >/dev/null 2>&1 + if [ $? -ne 0 ]; then + return $OCF_NOT_RUNNING + else + return $OCF_SUCCESS + fi + fi + if have_binary xen-list; then + xen-list $1 2>/dev/null | grep -qs "State.*[-r][-b][-p]--" 2>/dev/null + if [ $? -ne 0 ]; then + return $OCF_NOT_RUNNING + else + return $OCF_SUCCESS + fi + fi + STATUS=`$xentool list --long $1 2>/dev/null | grep status 2>/dev/null` + if [ "X${STATUS}" != "X" ]; then + # we have Xen 3.0.4 or higher + STATUS_NOSPACES=`echo "$STATUS" | awk '{ print $1,$2}'` + if [ "$STATUS_NOSPACES" = "(status 2)" -o "$STATUS_NOSPACES" = "(status 1)" ]; then + return $OCF_SUCCESS + else + return $OCF_NOT_RUNNING + fi + else + # we have Xen 3.0.3 or lower + STATUS=`$xentool list --long $1 2>/dev/null | grep state 2>/dev/null` + echo "${STATUS}" | grep -qs "[-r][-b][-p]---" + if [ $? -ne 0 ]; then + return $OCF_NOT_RUNNING + else + return $OCF_SUCCESS + fi + fi +} + +# If the guest is rebooting, it may completely disappear from the +# list of defined guests, thus xl/xen-list would return with not +# running; apparently, this period lasts only for a second or +# two +# If a status returns not running, then test status +# again for 5 times (perhaps it'll show up) +Xen_Status_with_Retry() { + local rc cnt=5 + + Xen_Status $1 + rc=$? + while [ $rc -eq $OCF_NOT_RUNNING -a $cnt -gt 0 ]; do + case "$__OCF_ACTION" in + stop) + ocf_log debug "domain $1 reported as not running, waiting $cnt seconds ..." + ;; + monitor) + ocf_log warn "domain $1 reported as not running, but it is expected to be running! Retrying for $cnt seconds ..." + ;; + *) : not reachable + ;; + esac + sleep 1 + Xen_Status $1 + rc=$? + cnt=$((cnt-1)) + done + return $rc +} + +set_util_attr() { + local attr=$1 val=$2 + local cval outp + + cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null) + if [ $? -ne 0 ] && [ -z "$cval" ]; then + if crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -q "not connected"; then + ocf_log debug "Unable to get utilization attribute $attr: cib is not available" + return + fi + fi + + if [ "$cval" != "$val" ]; then + outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) || \ + ocf_log warn "Unable to set utilization attribute $attr: $outp" + fi +} + +Xen_Update_Utilization() { + local dom_status dom_cpu dom_mem + + dom_status=$($xentool list ${DOMAIN_NAME} | awk 'NR==2 {print $4, $3}') + + if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then + dom_cpu=${dom_status% *} + test -n "$dom_cpu" && set_util_attr cpu $dom_cpu + fi + + if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then + dom_mem=${dom_status#* } + test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem" + fi +} + +Xen_Adjust_Memory() { + if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then + CNTNEW=$1 + RUNNING=`Xen_List_running` + RUNCNT=`Xen_Count_running` + MAXMEM=`Xen_Total_Memory` + if [ ${RUNCNT} -eq 0 -a ${CNTNEW} -eq 0 ]; then + RUNCNT=1 + fi + #NEWMEM=`echo "(${MAXMEM}-${OCF_RESKEY_reserved_Dom0_memory})/(${RUNCNT}+${CNTNEW})"|bc` + NEWMEM=$(( (${MAXMEM} - ${OCF_RESKEY_reserved_Dom0_memory}) / (${RUNCNT} + ${CNTNEW} ) )) + # do not rely on ballooning add dom0_mem=512 instead to force memory for dom0 + #$xentool mem-set Domain-0 ${OCF_RESKEY_reserved_Dom0_memory} + for DOM in ${RUNNING}; do + $xentool mem-set ${DOM} ${NEWMEM} + done + ocf_log info "Adjusted memory to: $NEWMEM, for the following $RUNCNT domains: $RUNNING" + fi +} + +Xen_List_all() { + $xentool list | grep -v -e "Name" -e "Domain-0" | awk '{print $1}' +} +Xen_List_running() { + ALL_DOMS=`Xen_List_all` + for DOM in ${ALL_DOMS}; do + if Xen_Status $DOM; then + echo "${DOM} " + fi + done +} +Xen_Count_running() { + Xen_List_running | wc -w +} + +Xen_Monitor() { + if ocf_is_probe; then + Xen_Status ${DOMAIN_NAME} + else + Xen_Status_with_Retry ${DOMAIN_NAME} + fi + if [ $? -eq ${OCF_NOT_RUNNING} ]; then + ocf_is_probe || + ocf_log err "Xen domain $DOMAIN_NAME stopped" + return ${OCF_NOT_RUNNING} + fi + if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu" || \ + ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory" + then + Xen_Update_Utilization + fi + if [ "X${OCF_RESKEY_monitor_scripts}" = "X" ]; then + return ${OCF_SUCCESS} + fi + for SCRIPT in ${OCF_RESKEY_monitor_scripts}; do + $SCRIPT + if [ $? -ne 0 ]; then + return ${OCF_ERR_GENERIC} + fi + done + return ${OCF_SUCCESS} +} + +Xen_Total_Memory() { + $xentool info | grep "^total_memory" | awk '{print $3}' +} + +Xen_Start() { + if Xen_Status ${DOMAIN_NAME}; then + ocf_log info "Xen domain $DOMAIN_NAME already running." + return $OCF_SUCCESS + fi + + if [ ! -f "${OCF_RESKEY_xmfile}" ]; then + ocf_log err "Config file ${OCF_RESKEY_xmfile} for $DOMAIN_NAME does not exist." + return $OCF_ERR_INSTALLED + fi + + if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then + Xen_Adjust_Memory 1 + ocf_log info "New memory for virtual domains: ${NEWMEM}" + sed -i -e "/^memory=/ s/^memory=.*/memory=${NEWMEM}/" ${OCF_RESKEY_xmfile} + $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} + fi + + # the latest xl management tool is squeamish about some + # characters in a name (the vm name is xen-f): + # /etc/xen/vm/xen-f:15: config parsing error near `xen': + # syntax error, unexpected IDENT, expecting STRING or NUMBER + # or '[' + # /etc/xen/vm/xen-f:15: config parsing error near `-f': lexical error + # + # the older xm management tool cannot digest quotes (see + # https://developerbugs.linuxfoundation.org/show_bug.cgi?id=2671) + # + # hence the following + if expr "x$xentool" : "x.*xl" >/dev/null; then + $xentool create ${OCF_RESKEY_xmfile} name=\"$DOMAIN_NAME\" + else + $xentool create ${OCF_RESKEY_xmfile} name="$DOMAIN_NAME" + fi + rc=$? + + if [ $rc -ne 0 ]; then + return $OCF_ERR_GENERIC + else + if ocf_is_true "${OCF_RESKEY_allow_mem_management}"; then + $xentool mem-set ${DOMAIN_NAME} ${NEWMEM} + fi + fi + while sleep 1; do + Xen_Monitor && return $OCF_SUCCESS + done +} + +xen_domain_stop() { + local dom=$1 + local timeout + + if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then + timeout=$OCF_RESKEY_shutdown_timeout + elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + # Allow 2/3 of the action timeout for the orderly shutdown + # (The origin unit is ms, hence the conversion) + timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) + else + timeout=60 + fi + + if [ "$timeout" -gt 0 ]; then + ocf_log info "Xen domain $dom will be stopped (timeout: ${timeout}s)" + if ocf_is_true "${OCF_RESKEY_shutdown_acpi}"; then + $xentool trigger $dom power + else + $xentool shutdown $dom + fi + + while Xen_Status $dom && [ "$timeout" -gt 0 ]; do + ocf_log debug "$dom still not stopped. Waiting..." + timeout=$((timeout-1)) + sleep 1 + done + fi + + if [ "$timeout" -eq 0 ]; then + while Xen_Status $dom; do + ocf_log warn "Xen domain $dom will be destroyed!" + $xenkill $dom + sleep 1 + done + # Note: This does not give up. stop isn't allowed to to fail. + # If $xentool destroy fails, stop will eventually timeout. + # This is the correct behaviour. + fi + + ocf_log info "Xen domain $dom stopped." +} + +Xen_Stop() { + local vm + if Xen_Status_with_Retry ${DOMAIN_NAME}; then + vm=${DOMAIN_NAME} + elif Xen_Status migrating-${DOMAIN_NAME}; then + ocf_log info "Xen domain $DOMAIN_NAME is migrating" + vm="migrating-${DOMAIN_NAME}" + else + ocf_log info "Xen domain $DOMAIN_NAME already stopped." + fi + + if [ "$vm" ]; then + xen_domain_stop $vm + else + # It is supposed to be gone, but there have been situations where + # $xentool list / xen-list showed it as stopped but it was still + # instantiated. Nuke it once more to make sure: + $xenkill ${DOMAIN_NAME} + fi + + Xen_Adjust_Memory 0 + return $OCF_SUCCESS +} + +Xen_Migrate_To() { + target_node="$OCF_RESKEY_CRM_meta_migrate_target" + target_attr="$OCF_RESKEY_node_ip_attribute" + target_addr="$target_node" + + if Xen_Status ${DOMAIN_NAME}; then + ocf_log info "$DOMAIN_NAME: Starting $xentool migrate to $target_node" + + if [ -n "$target_attr" ]; then + nodevalue=`crm_attribute --type nodes --node $target_node -n $target_attr -G -q` + if [ -n "${nodevalue}" -a "${nodevalue}" != "(null)" ]; then + target_addr="$nodevalue" + ocf_log info "$DOMAIN_NAME: $target_node is using address $target_addr" + fi + fi + + if expr "x$xentool" : "x.*xm" >/dev/null; then + $xentool migrate --live $DOMAIN_NAME $target_addr + else + $xentool migrate $DOMAIN_NAME $target_addr + fi + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "$DOMAIN_NAME: $xentool migrate to $target_node failed: $rc" + return $OCF_ERR_GENERIC + else + Xen_Adjust_Memory 0 + ocf_log info "$DOMAIN_NAME: $xentool migrate to $target_node succeeded." + return $OCF_SUCCESS + fi + else + ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!" + return $OCF_ERR_GENERIC + fi +} + +Xen_Migrate_From() { + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + # Allow 2/3 of the action timeout for status to stabilize + # (The origin unit is ms, hence the conversion) + timeout=$((OCF_RESKEY_CRM_meta_timeout/1500)) + else + timeout=10 # should be plenty + fi + + while ! Xen_Status ${DOMAIN_NAME} && [ $timeout -gt 0 ]; do + ocf_log debug "$DOMAIN_NAME: Not yet active locally, waiting (timeout: ${timeout}s)" + timeout=$((timeout-1)) + sleep 1 + done + + if Xen_Status ${DOMAIN_NAME}; then + Xen_Adjust_Memory 0 + ocf_log info "$DOMAIN_NAME: Active locally, migration successful" + return $OCF_SUCCESS + else + ocf_log err "$DOMAIN_NAME: Not active locally, migration failed!" + return $OCF_ERR_GENERIC + fi +} + +Xen_Validate_All() { + return $OCF_SUCCESS +} + +if [ $# -ne 1 ]; then + usage + exit $OCF_ERR_ARGS +fi + +case $1 in + meta-data) + meta_data + exit $OCF_SUCCESS + ;; + usage) + usage + exit $OCF_SUCCESS + ;; +esac + +# the name business: +# +# 1. use the name attribute, or +# 2. find the name in the config file (if it exists) and use that +# unless it contains funny characters such as '%' or space, or +# 3. use the OCF_RESOURCE_INSTANCE + +if [ x"${OCF_RESKEY_name}" != x ]; then + DOMAIN_NAME="${OCF_RESKEY_name}" +else + if [ -f "${OCF_RESKEY_xmfile}" ]; then + DOMAIN_NAME=`awk '$1~/^name(=|$)/{print}' ${OCF_RESKEY_xmfile} | sed 's/.*=[[:space:]]*//' | tr -d "[\"']"` + if echo "$DOMAIN_NAME" | grep -qs '[%[:space:]]'; then + DOMAIN_NAME="" + fi + fi + DOMAIN_NAME=${DOMAIN_NAME:-${OCF_RESOURCE_INSTANCE}} +fi + +for binary in sed awk; do + check_binary $binary +done + +if have_binary xen-destroy ; then + xenkill="xen-destroy" +else + xenkill="$xentool destroy" +fi + +if [ -n "$OCF_RESKEY_shutdown_timeout" ]; then + ocf_is_decimal "$OCF_RESKEY_shutdown_timeout" || { + ocf_log err "shutdown_timeout must be a number" + exit $OCF_ERR_CONFIGURED + } +fi + +case $1 in + start) + Xen_Start + ;; + stop) + Xen_Stop + ;; + migrate_to) + Xen_Migrate_To + ;; + migrate_from) + Xen_Migrate_From + ;; + monitor) + Xen_Monitor + ;; + status) + Xen_Status ${DOMAIN_NAME} + ;; + validate-all) + Xen_Validate_All + ;; + *) + usage + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +exit $? |