diff options
Diffstat (limited to 'src/claim/netdata-claim.sh.in')
-rwxr-xr-x | src/claim/netdata-claim.sh.in | 451 |
1 files changed, 451 insertions, 0 deletions
diff --git a/src/claim/netdata-claim.sh.in b/src/claim/netdata-claim.sh.in new file mode 100755 index 000000000..f4fa382b6 --- /dev/null +++ b/src/claim/netdata-claim.sh.in @@ -0,0 +1,451 @@ +#!/usr/bin/env bash +# netdata +# real-time performance and health monitoring, done right! +# (C) 2023 Netdata Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +# Exit code: 0 - Success +# Exit code: 1 - Unknown argument +# Exit code: 2 - Problems with claiming working directory +# Exit code: 3 - Missing dependencies +# Exit code: 4 - Failure to connect to endpoint +# Exit code: 5 - The CLI didn't work +# Exit code: 6 - Wrong user +# Exit code: 7 - Unknown HTTP error message +# +# OK: Agent claimed successfully +# HTTP Status code: 204 +# Exit code: 0 +# +# Unknown HTTP error message +# HTTP Status code: 422 +# Exit code: 7 +ERROR_KEYS[7]="None" +ERROR_MESSAGES[7]="Unknown HTTP error message" + +# Error: The agent id is invalid; it does not fulfill the constraints +# HTTP Status code: 422 +# Exit code: 8 +ERROR_KEYS[8]="ErrInvalidNodeID" +ERROR_MESSAGES[8]="invalid node id" + +# Error: The agent hostname is invalid; it does not fulfill the constraints +# HTTP Status code: 422 +# Exit code: 9 +ERROR_KEYS[9]="ErrInvalidNodeName" +ERROR_MESSAGES[9]="invalid node name" + +# Error: At least one of the given rooms ids is invalid; it does not fulfill the constraints +# HTTP Status code: 422 +# Exit code: 10 +ERROR_KEYS[10]="ErrInvalidRoomID" +ERROR_MESSAGES[10]="invalid room id" + +# Error: Invalid public key; the public key is empty or not present +# HTTP Status code: 422 +# Exit code: 11 +ERROR_KEYS[11]="ErrInvalidPublicKey" +ERROR_MESSAGES[11]="invalid public key" +# +# Error: Expired, missing or invalid token +# HTTP Status code: 403 +# Exit code: 12 +ERROR_KEYS[12]="ErrForbidden" +ERROR_MESSAGES[12]="token expired/token not found/invalid token" + +# Error: Duplicate agent id; an agent with the same id is already registered in the cloud +# HTTP Status code: 409 +# Exit code: 13 +ERROR_KEYS[13]="ErrAlreadyClaimed" +ERROR_MESSAGES[13]="already claimed" + +# Error: The node claiming process is still in progress. +# HTTP Status code: 102 +# Exit code: 14 +ERROR_KEYS[14]="ErrProcessingClaim" +ERROR_MESSAGES[14]="processing claiming" + +# Error: Internal server error. Any other unexpected error (DB problems, etc.) +# HTTP Status code: 500 +# Exit code: 15 +ERROR_KEYS[15]="ErrInternalServerError" +ERROR_MESSAGES[15]="Internal Server Error" + +# Error: There was a timeout processing the claim. +# HTTP Status code: 504 +# Exit code: 16 +ERROR_KEYS[16]="ErrGatewayTimeout" +ERROR_MESSAGES[16]="Gateway Timeout" + +# Error: The service cannot handle the claiming request at this time. +# HTTP Status code: 503 +# Exit code: 17 +ERROR_KEYS[17]="ErrServiceUnavailable" +ERROR_MESSAGES[17]="Service Unavailable" + +# Exit code: 18 - Agent unique id is not generated yet. + +NETDATA_RUNNING=1 + +get_config_value() { + conf_file="${1}" + section="${2}" + key_name="${3}" + if [ "${NETDATA_RUNNING}" -eq 1 ]; then + config_result=$(@sbindir_POST@/netdatacli 2>/dev/null read-config "$conf_file|$section|$key_name"; exit $?) + result="$?" + if [ "${result}" -ne 0 ]; then + echo >&2 "Unable to communicate with Netdata daemon, querying config from disk instead." + NETDATA_RUNNING=0 + fi + fi + if [ "${NETDATA_RUNNING}" -eq 0 ]; then + config_result=$(@sbindir_POST@/netdata 2>/dev/null -W get2 "$conf_file" "$section" "$key_name" unknown_default) + fi + echo "$config_result" +} +if command -v curl >/dev/null 2>&1 ; then + URLTOOL="curl" +elif command -v wget >/dev/null 2>&1 ; then + URLTOOL="wget" +else + echo >&2 "I need curl or wget to proceed, but neither is available on this system." + exit 3 +fi +if ! command -v openssl >/dev/null 2>&1 ; then + echo >&2 "I need openssl to proceed, but it is not available on this system." + exit 3 +fi + +# shellcheck disable=SC2050 +if [ "@enable_cloud_POST@" = "no" ]; then + echo >&2 "This agent was built with --disable-cloud and cannot be claimed" + exit 3 +fi +# shellcheck disable=SC2050 +if [ "@enable_aclk_POST@" != "yes" ]; then + echo >&2 "This agent was built without the dependencies for Cloud and cannot be claimed" + exit 3 +fi + +# ----------------------------------------------------------------------------- +# defaults to allow running this script by hand + +[ -z "${NETDATA_VARLIB_DIR}" ] && NETDATA_VARLIB_DIR="@varlibdir_POST@" +MACHINE_GUID_FILE="@registrydir_POST@/netdata.public.unique.id" +CLAIMING_DIR="${NETDATA_VARLIB_DIR}/cloud.d" +TOKEN="unknown" +URL_BASE=$(get_config_value cloud global "cloud base url") +[ -z "$URL_BASE" ] && URL_BASE="https://app.netdata.cloud" # Cover post-install with --dont-start +ID="unknown" +ROOMS="" +[ -z "$HOSTNAME" ] && HOSTNAME=$(hostname) +CLOUD_CERTIFICATE_FILE="${CLAIMING_DIR}/cloud_fullchain.pem" +VERBOSE=0 +INSECURE=0 +RELOAD=1 +NETDATA_USER=$(get_config_value netdata global "run as user") +[ -z "$EUID" ] && EUID="$(id -u)" + + +gen_id() { + local id + + if command -v uuidgen > /dev/null 2>&1; then + id="$(uuidgen | tr '[:upper:]' '[:lower:]')" + elif [ -r /proc/sys/kernel/random/uuid ]; then + id="$(cat /proc/sys/kernel/random/uuid)" + else + echo >&2 "Unable to generate machine ID." + exit 18 + fi + + if [ "${id}" = "8a795b0c-2311-11e6-8563-000c295076a6" ] || [ "${id}" = "4aed1458-1c3e-11e6-a53f-000c290fc8f5" ]; then + gen_id + else + echo "${id}" + fi +} + +# get the MACHINE_GUID by default +if [ -r "${MACHINE_GUID_FILE}" ]; then + ID="$(cat "${MACHINE_GUID_FILE}")" + MGUID=$ID +elif [ -f "${MACHINE_GUID_FILE}" ]; then + echo >&2 "netdata.public.unique.id is not readable. Please make sure you have rights to read it (Filename: ${MACHINE_GUID_FILE})." + exit 18 +else + if mkdir -p "${MACHINE_GUID_FILE%/*}" && echo -n "$(gen_id)" > "${MACHINE_GUID_FILE}"; then + ID="$(cat "${MACHINE_GUID_FILE}")" + MGUID=$ID + else + echo >&2 "Failed to write new machine GUID. Please make sure you have rights to write to ${MACHINE_GUID_FILE}." + exit 18 + fi +fi + +# get token from file +if [ -r "${CLAIMING_DIR}/token" ]; then + TOKEN="$(cat "${CLAIMING_DIR}/token")" +fi + +# get rooms from file +if [ -r "${CLAIMING_DIR}/rooms" ]; then + ROOMS="$(cat "${CLAIMING_DIR}/rooms")" +fi + +variable_to_set= +for arg in "$@" +do + if [ -z "$variable_to_set" ]; then + case $arg in + --claim-token) variable_to_set="TOKEN" ;; + --claim-rooms) variable_to_set="ROOMS" ;; + --claim-url) variable_to_set="URL_BASE" ;; + -token=*) TOKEN=${arg:7} ;; + -url=*) [ -n "${arg:5}" ] && URL_BASE=${arg:5} ;; + -id=*) ID=$(echo "${arg:4}" | tr '[:upper:]' '[:lower:]');; + -rooms=*) ROOMS=${arg:7} ;; + -hostname=*) HOSTNAME=${arg:10} ;; + -verbose) VERBOSE=1 ;; + -insecure) INSECURE=1 ;; + -proxy=*) PROXY=${arg:7} ;; + -noproxy) NOPROXY=yes ;; + -noreload) RELOAD=0 ;; + -user=*) NETDATA_USER=${arg:6} ;; + -daemon-not-running) NETDATA_RUNNING=0 ;; + *) echo >&2 "Unknown argument ${arg}" + exit 1 ;; + esac + else + case "$variable_to_set" in + TOKEN) TOKEN="$arg" ;; + ROOMS) ROOMS="$arg" ;; + URL_BASE) URL_BASE="$arg" ;; + esac + variable_to_set= + fi + shift 1 +done + +if [ "$EUID" != "0" ] && [ "$(whoami)" != "$NETDATA_USER" ]; then + echo >&2 "This script must be run by the $NETDATA_USER user account" + exit 6 +fi + +# if curl not installed give warning SOCKS can't be used +if [[ "${URLTOOL}" != "curl" && "${PROXY:0:5}" = socks ]] ; then + echo >&2 "wget doesn't support SOCKS. Please install curl or disable SOCKS proxy." + exit 1 +fi + +echo >&2 "Token: ****************" +echo >&2 "Base URL: $URL_BASE" +echo >&2 "Id: $ID" +echo >&2 "Rooms: $ROOMS" +echo >&2 "Hostname: $HOSTNAME" +echo >&2 "Proxy: $PROXY" +echo >&2 "Netdata user: $NETDATA_USER" + +# create the claiming directory for this user +if [ ! -d "${CLAIMING_DIR}" ] ; then + mkdir -p "${CLAIMING_DIR}" && chmod 0770 "${CLAIMING_DIR}" +# shellcheck disable=SC2181 + if [ $? -ne 0 ] ; then + echo >&2 "Failed to create claiming working directory ${CLAIMING_DIR}" + exit 2 + fi +fi +if [ ! -w "${CLAIMING_DIR}" ] ; then + echo >&2 "No write permission in claiming working directory ${CLAIMING_DIR}" + exit 2 +fi + +if [ ! -f "${CLAIMING_DIR}/private.pem" ] ; then + echo >&2 "Generating private/public key for the first time." + if ! openssl genrsa -out "${CLAIMING_DIR}/private.pem" 2048 ; then + echo >&2 "Failed to generate private/public key pair." + exit 2 + fi +fi +if [ ! -f "${CLAIMING_DIR}/public.pem" ] ; then + echo >&2 "Extracting public key from private key." + if ! openssl rsa -in "${CLAIMING_DIR}/private.pem" -outform PEM -pubout -out "${CLAIMING_DIR}/public.pem" ; then + echo >&2 "Failed to extract public key." + exit 2 + fi +fi + +TARGET_URL="${URL_BASE%/}/api/v1/spaces/nodes/${ID}" +# shellcheck disable=SC2002 +KEY=$(cat "${CLAIMING_DIR}/public.pem" | tr '\n' '!' | sed -e 's/!/\\n/g') +# shellcheck disable=SC2001 +[ -n "$ROOMS" ] && ROOMS=\"$(echo "$ROOMS" | sed s'/,/", "/g')\" + +cat > "${CLAIMING_DIR}/tmpin.txt" <<EMBED_JSON +{ + "node": { + "id": "$ID", + "hostname": "$HOSTNAME" + }, + "token": "$TOKEN", + "rooms" : [ $ROOMS ], + "publicKey" : "$KEY", + "mGUID" : "$MGUID" +} +EMBED_JSON + +if [ "${VERBOSE}" == 1 ] ; then + echo "Request to server:" + cat "${CLAIMING_DIR}/tmpin.txt" +fi + + +if [ "${URLTOOL}" = "curl" ] ; then + URLCOMMAND="curl --connect-timeout 30 --retry 0 -s -i -X PUT -d \"@${CLAIMING_DIR}/tmpin.txt\"" + if [ "${NOPROXY}" = "yes" ] ; then + URLCOMMAND="${URLCOMMAND} -x \"\"" + elif [ -n "${PROXY}" ] ; then + URLCOMMAND="${URLCOMMAND} -x \"${PROXY}\"" + fi +else + URLCOMMAND="wget -T 15 -O - -q --server-response --content-on-error=on --method=PUT \ + --body-file=\"${CLAIMING_DIR}/tmpin.txt\"" + if [ "${NOPROXY}" = "yes" ] ; then + URLCOMMAND="${URLCOMMAND} --no-proxy" + elif [ "${PROXY:0:4}" = http ] ; then + URLCOMMAND="export http_proxy=${PROXY}; ${URLCOMMAND}" + fi +fi + +if [ "${INSECURE}" == 1 ] ; then + if [ "${URLTOOL}" = "curl" ] ; then + URLCOMMAND="${URLCOMMAND} --insecure" + else + URLCOMMAND="${URLCOMMAND} --no-check-certificate" + fi +fi + +if [ -r "${CLOUD_CERTIFICATE_FILE}" ] ; then + if [ "${URLTOOL}" = "curl" ] ; then + URLCOMMAND="${URLCOMMAND} --cacert \"${CLOUD_CERTIFICATE_FILE}\"" + else + URLCOMMAND="${URLCOMMAND} --ca-certificate \"${CLOUD_CERTIFICATE_FILE}\"" + fi +fi + +if [ "${VERBOSE}" == 1 ]; then + echo "${URLCOMMAND} \"${TARGET_URL}\"" +fi + +attempt_contact () { + if [ "${URLTOOL}" = "curl" ] ; then + eval "${URLCOMMAND} \"${TARGET_URL}\"" >"${CLAIMING_DIR}/tmpout.txt" + else + eval "${URLCOMMAND} \"${TARGET_URL}\"" >"${CLAIMING_DIR}/tmpout.txt" 2>&1 + fi + URLCOMMAND_EXIT_CODE=$? + if [ "${URLTOOL}" = "wget" ] && [ "${URLCOMMAND_EXIT_CODE}" -eq 8 ] ; then + # We consider the server issuing an error response a successful attempt at communicating + URLCOMMAND_EXIT_CODE=0 + fi + + # Check if URLCOMMAND connected and received reply + if [ "${URLCOMMAND_EXIT_CODE}" -ne 0 ] ; then + echo >&2 "Failed to connect to ${URL_BASE}, return code ${URLCOMMAND_EXIT_CODE}" + rm -f "${CLAIMING_DIR}/tmpout.txt" + return 4 + fi + + if [ "${VERBOSE}" == 1 ] ; then + echo "Response from server:" + cat "${CLAIMING_DIR}/tmpout.txt" + fi + + return 0 +} + +for i in {1..3} +do + if attempt_contact ; then + echo "Connection attempt $i successful" + break + fi + echo "Connection attempt $i failed. Retry in ${i}s." + if [ "$i" -eq 5 ] ; then + rm -f "${CLAIMING_DIR}/tmpin.txt" + exit 4 + fi + sleep "$i" +done + +rm -f "${CLAIMING_DIR}/tmpin.txt" + +ERROR_KEY=$(grep "\"errorMsgKey\":" "${CLAIMING_DIR}/tmpout.txt" | awk -F "errorMsgKey\":\"" '{print $2}' | awk -F "\"" '{print $1}') +case ${ERROR_KEY} in + "ErrInvalidNodeID") EXIT_CODE=8 ;; + "ErrInvalidNodeName") EXIT_CODE=9 ;; + "ErrInvalidRoomID") EXIT_CODE=10 ;; + "ErrInvalidPublicKey") EXIT_CODE=11 ;; + "ErrForbidden") EXIT_CODE=12 ;; + "ErrAlreadyClaimed") EXIT_CODE=13 ;; + "ErrProcessingClaim") EXIT_CODE=14 ;; + "ErrInternalServerError") EXIT_CODE=15 ;; + "ErrGatewayTimeout") EXIT_CODE=16 ;; + "ErrServiceUnavailable") EXIT_CODE=17 ;; + *) EXIT_CODE=7 ;; +esac + +HTTP_STATUS_CODE=$(grep "HTTP" "${CLAIMING_DIR}/tmpout.txt" | tail -1 | awk -F " " '{print $2}') +if [ "${HTTP_STATUS_CODE}" = "204" ] ; then + EXIT_CODE=0 +fi + +if [ "${HTTP_STATUS_CODE}" = "204" ] || [ "${ERROR_KEY}" = "ErrAlreadyClaimed" ] ; then + rm -f "${CLAIMING_DIR}/tmpout.txt" + if [ "${HTTP_STATUS_CODE}" = "204" ] ; then + echo -n "${ID}" >"${CLAIMING_DIR}/claimed_id" || (echo >&2 "Claiming failed"; set -e; exit 2) + fi + rm -f "${CLAIMING_DIR}/token" || (echo >&2 "Claiming failed"; set -e; exit 2) + + # Rewrite the cloud.conf on the disk + cat > "$CLAIMING_DIR/cloud.conf" <<HERE_DOC +[global] + enabled = yes + cloud base url = $URL_BASE +${PROXY:+ proxy = $PROXY} +HERE_DOC + if [ "$EUID" == "0" ]; then + chown -R "${NETDATA_USER}:${NETDATA_USER}" "${CLAIMING_DIR}" || (echo >&2 "Claiming failed"; set -e; exit 2) + fi + if [ "${RELOAD}" == "0" ] ; then + exit $EXIT_CODE + fi + + # Update cloud.conf in the agent memory + @sbindir_POST@/netdatacli write-config 'cloud|global|enabled|yes' && \ + @sbindir_POST@/netdatacli write-config "cloud|global|cloud base url|$URL_BASE" && \ + @sbindir_POST@/netdatacli reload-claiming-state && \ + if [ "${HTTP_STATUS_CODE}" = "204" ] ; then + echo >&2 "Node was successfully claimed." + else + echo >&2 "The agent cloud base url is set to the url provided." + echo >&2 "The cloud may have different credentials already registered for this agent ID and it cannot be reclaimed under different credentials for security reasons. If you are unable to connect use -id=\$(uuidgen) to overwrite this agent ID with a fresh value if the original credentials cannot be restored." + echo >&2 "Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\"" + fi && exit $EXIT_CODE + + if [ "${ERROR_KEY}" = "ErrAlreadyClaimed" ] ; then + echo >&2 "The cloud may have different credentials already registered for this agent ID and it cannot be reclaimed under different credentials for security reasons. If you are unable to connect use -id=\$(uuidgen) to overwrite this agent ID with a fresh value if the original credentials cannot be restored." + echo >&2 "Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\"" + exit $EXIT_CODE + fi + echo >&2 "The claim was successful but the agent could not be notified ($?)- it requires a restart to connect to the cloud." + [ "$NETDATA_RUNNING" -eq 0 ] && exit 0 || exit 5 +fi + +echo >&2 "Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\"" +if [ "${VERBOSE}" == 1 ]; then + echo >&2 "Error key was:\"${ERROR_KEYS[$EXIT_CODE]}\"" +fi +rm -f "${CLAIMING_DIR}/tmpout.txt" +exit $EXIT_CODE |