summaryrefslogtreecommitdiffstats
path: root/heartbeat/rkt
diff options
context:
space:
mode:
Diffstat (limited to 'heartbeat/rkt')
-rwxr-xr-xheartbeat/rkt475
1 files changed, 475 insertions, 0 deletions
diff --git a/heartbeat/rkt b/heartbeat/rkt
new file mode 100755
index 0000000..724986f
--- /dev/null
+++ b/heartbeat/rkt
@@ -0,0 +1,475 @@
+#!/bin/sh
+#
+# The rkt HA resource agent creates and launches a container based off
+# a supplied image. Containers managed by this agent are both created
+# and removed upon the agent's start and stop actions.
+#
+# Copyright (c) 2017 Valentin Vidic <Valentin.Vidic@CARNet.hr>
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+#######################################################################
+
+meta_data()
+{
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="rkt" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">
+The rkt HA resource agent creates and launches a container
+based off a supplied image. Containers managed by this agent
+are both created and removed upon the agent's start and stop actions.
+</longdesc>
+<shortdesc lang="en">rkt container resource agent.</shortdesc>
+
+<parameters>
+<parameter name="image" required="1" unique="0">
+<longdesc lang="en">
+The image to base this container off of.
+</longdesc>
+<shortdesc lang="en">image</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="name" required="0" unique="0">
+<longdesc lang="en">
+The name to give the created container. By default this will
+be that resource's instance name.
+</longdesc>
+<shortdesc lang="en">container name</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="allow_pull" unique="0">
+<longdesc lang="en">
+Allow the image to be pulled from the configured registry when
+the image does not exist locally. NOTE, this can drastically increase
+the time required to start the container if the image repository is
+pulled over the network.
+</longdesc>
+<shortdesc lang="en">Allow pulling non-local images</shortdesc>
+<content type="boolean"/>
+</parameter>
+
+<parameter name="run_opts" required="0" unique="0">
+<longdesc lang="en">
+Add options to be appended to the 'rkt run' command which is used
+when creating the container during the start action. This option allows
+users to do things such as setting a custom entry point and injecting
+environment variables into the newly created container.
+
+NOTE: Do not explicitly specify the --name argument in the run_opts. This
+agent will set --name using either the resource's instance or the name
+provided in the 'name' argument of this agent.
+
+</longdesc>
+<shortdesc lang="en">run options</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="run_cmd" required="0" unique="0">
+<longdesc lang="en">
+Specify a command to launch within the container once
+it has initialized.
+</longdesc>
+<shortdesc lang="en">run command</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="mount_points" required="0" unique="0">
+<longdesc lang="en">
+A comma separated list of directories that the container is expecting to use.
+The agent will ensure they exist by running 'mkdir -p'
+</longdesc>
+<shortdesc lang="en">Required mount points</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="monitor_cmd" required="0" unique="0">
+<longdesc lang="en">
+Specify the full path of a command to launch within the container to check
+the health of the container. This command must return 0 to indicate that
+the container is healthy. A non-zero return code will indicate that the
+container has failed and should be recovered.
+
+Note: Using this method for monitoring processes inside a container
+is not recommended, as rkt tries to track processes running
+inside the container and does not deal well with many short-lived
+processes being spawned. Ensure that your container monitors its
+own processes and terminates on fatal error rather than invoking
+a command from the outside.
+</longdesc>
+<shortdesc lang="en">monitor command</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="force_kill" required="0" unique="0">
+<longdesc lang="en">
+Kill a container immediately rather than waiting for it to gracefully
+shutdown
+</longdesc>
+<shortdesc lang="en">force kill</shortdesc>
+<content type="boolean"/>
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="90s" />
+<action name="stop" timeout="90s" />
+<action name="monitor" timeout="30s" interval="30s" depth="0" />
+<action name="meta-data" timeout="5s" />
+<action name="validate-all" timeout="30s" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+REQUIRE_IMAGE_PULL=0
+
+rkt_usage()
+{
+ cat <<END
+usage: $0 {start|stop|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+
+monitor_cmd_exec()
+{
+ local rc=$OCF_SUCCESS
+ local out
+ local uuid
+
+ if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
+ return $rc
+ fi
+
+ uuid=$(container_uuid)
+ out=$(rkt enter $uuid $OCF_RESKEY_monitor_cmd 2>&1)
+ rc=$?
+
+ if [ $rc -eq 127 ]; then
+ ocf_log err "monitor cmd failed (rc=$rc), output: $out"
+ ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd}, not found within container."
+ # there is no recovering from this, exit immediately
+ exit $OCF_ERR_ARGS
+ elif [ $rc -ne 0 ]; then
+ ocf_exit_reason "monitor cmd failed (rc=$rc), output: $out"
+ rc=$OCF_ERR_GENERIC
+ else
+ ocf_log debug "monitor cmd passed: exit code = $rc"
+ fi
+
+ return $rc
+}
+
+container_exists()
+{
+ rkt list --no-legend | awk -v C=${CONTAINER} '$2 == C {exit 0} ENDFILE {exit 1}'
+}
+
+container_uuid()
+{
+ rkt list --no-legend --full | awk -v C=${CONTAINER} '$2 == C {print $1; exit}'
+}
+
+container_state()
+{
+ rkt list --no-legend | awk -v C=${CONTAINER} '$2 == C {print $4; exit}'
+}
+
+remove_container()
+{
+ local uuid
+
+ container_exists
+ if [ $? -ne 0 ]; then
+ # don't attempt to remove a container that doesn't exist
+ return 0
+ fi
+
+ uuid=$(container_uuid)
+ ocf_log notice "Cleaning up inactive container, ${CONTAINER}."
+ ocf_run rkt rm $uuid
+}
+
+rkt_simple_status()
+{
+ local val
+
+ container_exists
+ if [ $? -ne 0 ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ # retrieve the 'STATE' attribute for the container
+ val=$(container_state)
+ if [ "$val" = "running" ]; then
+ # container exists and is running
+ return $OCF_SUCCESS
+ fi
+
+ ocf_log debug "container ${CONTAINER} state is $val"
+ return $OCF_NOT_RUNNING
+}
+
+rkt_monitor()
+{
+ local rc=0
+
+ rkt_simple_status
+ rc=$?
+
+ if [ $rc -ne 0 ]; then
+ return $rc
+ fi
+
+ monitor_cmd_exec
+}
+
+rkt_create_mounts() {
+ oldIFS="$IFS"
+ IFS=","
+ for directory in $OCF_RESKEY_mount_points; do
+ mkdir -p "$directory"
+ done
+ IFS="$oldIFS"
+}
+
+rkt_start()
+{
+ rkt_create_mounts
+ local run_opts="--name=${CONTAINER}"
+
+ # check to see if the container has already started
+ rkt_simple_status
+ if [ $? -eq $OCF_SUCCESS ]; then
+ return $OCF_SUCCESS
+ fi
+
+ if [ -n "$OCF_RESKEY_run_cmd" ]; then
+ run_opts="$run_opts --exec=$OCF_RESKEY_run_cmd"
+ fi
+
+ if [ -n "$OCF_RESKEY_run_opts" ]; then
+ run_opts="$run_opts $OCF_RESKEY_run_opts"
+ fi
+
+ if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then
+ ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}"
+ rkt fetch "${OCF_RESKEY_image}"
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ # make sure any previous container matching our container name is cleaned up first.
+ # we already know at this point it wouldn't be running
+ remove_container
+ ocf_log info "Starting container, ${CONTAINER}."
+ ocf_run systemd-run --slice=machine rkt run $OCF_RESKEY_image $run_opts
+
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to launch container"
+ return $OCF_ERR_GENERIC
+ fi
+
+ while ! container_exists || [ "$(container_state)" = "preparing" ] ; do
+ ocf_log debug "waiting for container to start"
+ sleep 1
+ done
+
+ # wait for monitor to pass before declaring that the container is started
+ while true; do
+ rkt_simple_status
+ if [ $? -ne $OCF_SUCCESS ]; then
+ ocf_exit_reason "Newly created container exited after start"
+ return $OCF_ERR_GENERIC
+ fi
+
+ monitor_cmd_exec
+ if [ $? -eq $OCF_SUCCESS ]; then
+ ocf_log notice "Container ${CONTAINER} started successfully as $(container_uuid)"
+ return $OCF_SUCCESS
+ fi
+
+ ocf_exit_reason "waiting on monitor_cmd to pass after start"
+ sleep 1
+ done
+}
+
+rkt_stop()
+{
+ local timeout=60
+ local uuid
+
+ rkt_simple_status
+ if [ $? -eq $OCF_NOT_RUNNING ]; then
+ remove_container
+ return $OCF_SUCCESS
+ fi
+
+ if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
+ timeout=$(( $OCF_RESKEY_CRM_meta_timeout/1000 - 10 ))
+ if [ $timeout -lt 10 ]; then
+ timeout=10
+ fi
+ fi
+
+ uuid=$(container_uuid)
+ if ocf_is_true "$OCF_RESKEY_force_kill"; then
+ ocf_log info "Killing container, ${CONTAINER}."
+ ocf_run rkt stop --force $uuid
+ else
+ ocf_log info "Stopping container, ${CONTAINER}."
+ ocf_run rkt stop $uuid
+ fi
+
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
+ return $OCF_ERR_GENERIC
+ fi
+
+ while [ $timeout -gt 0 ]; do
+ rkt_simple_status
+ if [ $? -eq $OCF_NOT_RUNNING ]; then
+ break
+ fi
+
+ ocf_log debug "waiting for container to stop"
+ timeout=$(( $timeout - 1 ))
+ sleep 1
+ done
+
+ rkt_simple_status
+ if [ $? -eq $OCF_SUCCESS ]; then
+ ocf_exit_reason "Failed to stop container, ${CONTAINER}."
+ return $OCF_ERR_GENERIC
+ fi
+
+ remove_container
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+image_exists()
+{
+ rkt image list --no-legend | awk -v I=${OCF_RESKEY_image} '$2 == I {exit 0} ENDFILE {exit 1}'
+ if [ $? -eq 0 ]; then
+ # image found
+ return 0
+ fi
+
+ if ocf_is_true "$OCF_RESKEY_allow_pull"; then
+ REQUIRE_IMAGE_PULL=1
+ ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start"
+ return 0
+ fi
+
+ # image not found.
+ return 1
+}
+
+rkt_validate()
+{
+ check_binary rkt
+ check_binary systemd-run
+
+ if [ -z "$OCF_RESKEY_image" ]; then
+ ocf_exit_reason "'image' option is required"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ if echo ${CONTAINER} | grep -q '[^a-z0-9-]'; then
+ ocf_exit_reason "'name' must contain only lower case alphanumeric characters and -"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ image_exists
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found."
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ return $OCF_SUCCESS
+}
+
+# TODO :
+# When a user starts plural clones in a node in globally-unique, a user cannot appoint plural name parameters.
+
+if ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then
+ if [ -n "$OCF_RESKEY_name" ]; then
+ if [ -n "$OCF_RESKEY_CRM_meta_clone_node_max" ] && [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ]
+ then
+ ocf_exit_reason "Cannot make plural clones from the same name parameter."
+ exit $OCF_ERR_CONFIGURED
+ fi
+ if [ -n "$OCF_RESKEY_CRM_meta_master_node_max" ] && [ "$OCF_RESKEY_CRM_meta_master_node_max" -ne 1 ]
+ then
+ ocf_exit_reason "Cannot make plural master from the same name parameter."
+ exit $OCF_ERR_CONFIGURED
+ fi
+ fi
+ : ${OCF_RESKEY_name=`echo ${OCF_RESOURCE_INSTANCE} | tr ':' '-'`}
+else
+ : ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}}
+fi
+
+CONTAINER=$OCF_RESKEY_name
+
+case $__OCF_ACTION in
+meta-data) meta_data
+ exit $OCF_SUCCESS;;
+start)
+ rkt_validate
+ rkt_start;;
+stop) rkt_stop;;
+monitor) rkt_monitor;;
+validate-all) rkt_validate;;
+usage|help) rkt_usage
+ exit $OCF_SUCCESS
+ ;;
+*) rkt_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc