#!/usr/bin/env bash
# netdata
# real-time performance and health monitoring, done right!
# (C) 2017 Costa Tsaousis <costa@tsaousis.gr>
# SPDX-License-Identifier: GPL-3.0-or-later

# Exit code: 0 - Success
# Exit code: 1 - Unknown argument
# Exit code: 2 - Problems with claiming working directory
# Exit code: 3 - Missing dependencies
# Exit code: 4 - Failure to connect to endpoint
# Exit code: 5 - The CLI didn't work
# Exit code: 6 - Wrong user
# Exit code: 7 - Unknown HTTP error message
#
# OK: Agent claimed successfully
# HTTP Status code: 204
# Exit code: 0
#
# Unknown HTTP error message
# HTTP Status code: 422
# Exit code: 7
ERROR_KEYS[7]="None"
ERROR_MESSAGES[7]="Unknown HTTP error message"

# Error: The agent id is invalid; it does not fulfill the constraints
# HTTP Status code: 422
# Exit code: 8
ERROR_KEYS[8]="ErrInvalidNodeID"
ERROR_MESSAGES[8]="invalid node id"

# Error: The agent hostname is invalid; it does not fulfill the constraints
# HTTP Status code: 422
# Exit code: 9
ERROR_KEYS[9]="ErrInvalidNodeName"
ERROR_MESSAGES[9]="invalid node name"

# Error: At least one of the given rooms ids is invalid; it does not fulfill the constraints
# HTTP Status code: 422
# Exit code: 10
ERROR_KEYS[10]="ErrInvalidRoomID"
ERROR_MESSAGES[10]="invalid room id"

# Error: Invalid public key; the public key is empty or not present
# HTTP Status code: 422
# Exit code: 11
ERROR_KEYS[11]="ErrInvalidPublicKey"
ERROR_MESSAGES[11]="invalid public key"
#
# Error: Expired, missing or invalid token
# HTTP Status code: 403
# Exit code: 12
ERROR_KEYS[12]="ErrForbidden"
ERROR_MESSAGES[12]="token expired/token not found/invalid token"

# Error: Duplicate agent id; an agent with the same id is already registered in the cloud
# HTTP Status code: 409
# Exit code: 13
ERROR_KEYS[13]="ErrAlreadyClaimed"
ERROR_MESSAGES[13]="already claimed"

# Error: The node claiming process is still in progress.
# HTTP Status code: 102
# Exit code: 14
ERROR_KEYS[14]="ErrProcessingClaim"
ERROR_MESSAGES[14]="processing claiming"

# Error: Internal server error. Any other unexpected error (DB problems, etc.)
# HTTP Status code: 500
# Exit code: 15
ERROR_KEYS[15]="ErrInternalServerError"
ERROR_MESSAGES[15]="Internal Server Error"

# Error: There was a timeout processing the claim.
# HTTP Status code: 504
# Exit code: 16
ERROR_KEYS[16]="ErrGatewayTimeout"
ERROR_MESSAGES[16]="Gateway Timeout"

# Error: The service cannot handle the claiming request at this time.
# HTTP Status code: 503
# Exit code: 17
ERROR_KEYS[17]="ErrServiceUnavailable"
ERROR_MESSAGES[17]="Service Unavailable"

# Exit code: 18 - Agent unique id is not generated yet.

NETDATA_RUNNING=1

get_config_value() {
    conf_file="${1}"
    section="${2}"
    key_name="${3}"
    if [ "${NETDATA_RUNNING}" -eq 1 ]; then
        config_result=$(@sbindir_POST@/netdatacli 2>/dev/null read-config "$conf_file|$section|$key_name"; exit $?)
        result="$?"
        if [ "${result}" -ne 0 ]; then
            echo >&2 "Unable to communicate with Netdata daemon, querying config from disk instead."
            NETDATA_RUNNING=0
        fi
    fi
    if [ "${NETDATA_RUNNING}" -eq 0 ]; then
        config_result=$(@sbindir_POST@/netdata 2>/dev/null -W get2 "$conf_file" "$section" "$key_name" unknown_default)
    fi
    echo "$config_result"
}
if command -v curl >/dev/null 2>&1 ; then
        URLTOOL="curl"
elif command -v wget >/dev/null 2>&1 ; then
        URLTOOL="wget"
else
        echo >&2 "I need curl or wget to proceed, but neither is available on this system."
        exit 3
fi
if ! command -v openssl >/dev/null 2>&1 ; then
        echo >&2 "I need openssl to proceed, but it is not available on this system."
        exit 3
fi

# shellcheck disable=SC2050
if [ "@enable_cloud_POST@" = "no" ]; then
    echo >&2 "This agent was built with --disable-cloud and cannot be claimed"
    exit 3
fi
# shellcheck disable=SC2050
if [ "@enable_aclk_POST@" != "yes" ]; then
    echo >&2 "This agent was built without the dependencies for Cloud and cannot be claimed"
    exit 3
fi

# -----------------------------------------------------------------------------
# defaults to allow running this script by hand

[ -z "${NETDATA_VARLIB_DIR}" ] && NETDATA_VARLIB_DIR="@varlibdir_POST@"
MACHINE_GUID_FILE="@registrydir_POST@/netdata.public.unique.id"
CLAIMING_DIR="${NETDATA_VARLIB_DIR}/cloud.d"
TOKEN="unknown"
URL_BASE=$(get_config_value cloud global "cloud base url")
[ -z "$URL_BASE" ] && URL_BASE="https://api.netdata.cloud"  # Cover post-install with --dont-start
ID="unknown"
ROOMS=""
[ -z "$HOSTNAME" ] && HOSTNAME=$(hostname)
CLOUD_CERTIFICATE_FILE="${CLAIMING_DIR}/cloud_fullchain.pem"
VERBOSE=0
INSECURE=0
RELOAD=1
NETDATA_USER=$(get_config_value netdata global "run as user")
[ -z "$EUID" ] && EUID="$(id -u)"


gen_id() {
    local id

    if command -v uuidgen > /dev/null 2>&1; then
        id="$(uuidgen | tr '[:upper:]' '[:lower:]')"
    elif [ -r /proc/sys/kernel/random/uuid ]; then
        id="$(cat /proc/sys/kernel/random/uuid)"
    else
        echo >&2 "Unable to generate machine ID."
        exit 18
    fi

    if [ "${id}" = "8a795b0c-2311-11e6-8563-000c295076a6" ] || [ "${id}" = "4aed1458-1c3e-11e6-a53f-000c290fc8f5" ]; then
        gen_id
    else
        echo "${id}"
    fi
}

# get the MACHINE_GUID by default
if [ -r "${MACHINE_GUID_FILE}" ]; then
        ID="$(cat "${MACHINE_GUID_FILE}")"
        MGUID=$ID
elif [ -f "${MACHINE_GUID_FILE}" ]; then
        echo >&2 "netdata.public.unique.id is not readable. Please make sure you have rights to read it (Filename: ${MACHINE_GUID_FILE})."
        exit 18
else
        if mkdir -p "${MACHINE_GUID_FILE%/*}" && /bin/echo -n "$(gen_id)" > "${MACHINE_GUID_FILE}"; then
                ID="$(cat "${MACHINE_GUID_FILE}")"
                MGUID=$ID
        else
                echo >&2 "Failed to write new machine GUID. Please make sure you have rights to write to ${MACHINE_GUID_FILE}."
                exit 18
        fi
fi

# get token from file
if [ -r "${CLAIMING_DIR}/token" ]; then
        TOKEN="$(cat "${CLAIMING_DIR}/token")"
fi

# get rooms from file
if [ -r "${CLAIMING_DIR}/rooms" ]; then
        ROOMS="$(cat "${CLAIMING_DIR}/rooms")"
fi

for arg in "$@"
do
        case $arg in
                -token=*) TOKEN=${arg:7} ;;
                -url=*) [ -n "${arg:5}" ] && URL_BASE=${arg:5} ;;
                -id=*) ID=$(echo "${arg:4}" | tr '[:upper:]' '[:lower:]');;
                -rooms=*) ROOMS=${arg:7} ;;
                -hostname=*) HOSTNAME=${arg:10} ;;
                -verbose) VERBOSE=1 ;;
                -insecure) INSECURE=1 ;;
                -proxy=*) PROXY=${arg:7} ;;
                -noproxy) NOPROXY=yes ;;
                -noreload) RELOAD=0 ;;
                -user=*) NETDATA_USER=${arg:6} ;;
                -daemon-not-running) NETDATA_RUNNING=0 ;;
                *)  echo >&2 "Unknown argument ${arg}"
                    exit 1 ;;
        esac
        shift 1
done

if [ "$EUID" != "0" ] && [ "$(whoami)" != "$NETDATA_USER" ]; then
    echo >&2 "This script must be run by the $NETDATA_USER user account"
    exit 6
fi

# if curl not installed give warning SOCKS can't be used
if [[ "${URLTOOL}" != "curl" && "${PROXY:0:5}" = socks ]] ; then
        echo >&2 "wget doesn't support SOCKS. Please install curl or disable SOCKS proxy."
        exit 1
fi

echo >&2 "Token: ****************"
echo >&2 "Base URL: $URL_BASE"
echo >&2 "Id: $ID"
echo >&2 "Rooms: $ROOMS"
echo >&2 "Hostname: $HOSTNAME"
echo >&2 "Proxy: $PROXY"
echo >&2 "Netdata user: $NETDATA_USER"

# create the claiming directory for this user
if [ ! -d "${CLAIMING_DIR}" ] ; then
        mkdir -p "${CLAIMING_DIR}" && chmod 0770 "${CLAIMING_DIR}"
# shellcheck disable=SC2181
        if [ $? -ne 0 ] ; then
                echo >&2 "Failed to create claiming working directory ${CLAIMING_DIR}"
                exit 2
        fi
fi
if [ ! -w "${CLAIMING_DIR}" ] ; then
        echo >&2 "No write permission in claiming working directory ${CLAIMING_DIR}"
        exit 2
fi

if [ ! -f "${CLAIMING_DIR}/private.pem" ] ; then
        echo >&2 "Generating private/public key for the first time."
        if ! openssl genrsa -out "${CLAIMING_DIR}/private.pem" 2048 ; then
                echo >&2 "Failed to generate private/public key pair."
                exit 2
        fi
fi
if [ ! -f "${CLAIMING_DIR}/public.pem" ] ; then
        echo >&2 "Extracting public key from private key."
        if ! openssl rsa -in "${CLAIMING_DIR}/private.pem" -outform PEM -pubout -out "${CLAIMING_DIR}/public.pem" ; then
                echo >&2 "Failed to extract public key."
                exit 2
        fi
fi

TARGET_URL="${URL_BASE%/}/api/v1/spaces/nodes/${ID}"
# shellcheck disable=SC2002
KEY=$(cat "${CLAIMING_DIR}/public.pem" | tr '\n' '!' | sed -e 's/!/\\n/g')
# shellcheck disable=SC2001
[ -n "$ROOMS" ] && ROOMS=\"$(echo "$ROOMS" | sed s'/,/", "/g')\"

cat > "${CLAIMING_DIR}/tmpin.txt" <<EMBED_JSON
{
    "node": {
        "id": "$ID",
        "hostname": "$HOSTNAME"
    },
    "token": "$TOKEN",
    "rooms" : [ $ROOMS ],
    "publicKey" : "$KEY",
    "mGUID" : "$MGUID"
}
EMBED_JSON

if [ "${VERBOSE}" == 1 ] ; then
    echo "Request to server:"
    cat "${CLAIMING_DIR}/tmpin.txt"
fi


if [ "${URLTOOL}" = "curl" ] ; then
        URLCOMMAND="curl --connect-timeout 30 --retry 0 -s -i -X PUT -d \"@${CLAIMING_DIR}/tmpin.txt\""
        if [ "${NOPROXY}" = "yes" ] ; then
                URLCOMMAND="${URLCOMMAND} -x \"\""
        elif [ -n "${PROXY}" ] ; then
                URLCOMMAND="${URLCOMMAND} -x \"${PROXY}\""
        fi
else
        URLCOMMAND="wget -T 15 -O -  -q --server-response --content-on-error=on --method=PUT \
        --body-file=\"${CLAIMING_DIR}/tmpin.txt\""
        if [ "${NOPROXY}" = "yes" ] ; then
                URLCOMMAND="${URLCOMMAND} --no-proxy"
        elif [ "${PROXY:0:4}" = http ] ; then
                URLCOMMAND="export http_proxy=${PROXY}; ${URLCOMMAND}"
        fi
fi

if [ "${INSECURE}" == 1 ] ; then
    if [ "${URLTOOL}" = "curl" ] ; then
        URLCOMMAND="${URLCOMMAND} --insecure"
    else
        URLCOMMAND="${URLCOMMAND} --no-check-certificate"
    fi
fi

if [ -r "${CLOUD_CERTIFICATE_FILE}" ] ; then
        if [ "${URLTOOL}" = "curl" ] ; then
                URLCOMMAND="${URLCOMMAND} --cacert \"${CLOUD_CERTIFICATE_FILE}\""
        else
                URLCOMMAND="${URLCOMMAND} --ca-certificate \"${CLOUD_CERTIFICATE_FILE}\""
        fi
fi

if [ "${VERBOSE}" == 1 ]; then
    echo "${URLCOMMAND} \"${TARGET_URL}\""
fi

attempt_contact () {
        if [ "${URLTOOL}" = "curl" ] ; then
                eval "${URLCOMMAND} \"${TARGET_URL}\"" >"${CLAIMING_DIR}/tmpout.txt"
        else
                eval "${URLCOMMAND} \"${TARGET_URL}\"" >"${CLAIMING_DIR}/tmpout.txt" 2>&1
        fi
        URLCOMMAND_EXIT_CODE=$?
        if [ "${URLTOOL}" = "wget" ] && [ "${URLCOMMAND_EXIT_CODE}" -eq 8 ] ; then
        # We consider the server issuing an error response a successful attempt at communicating
                URLCOMMAND_EXIT_CODE=0
        fi

        # Check if URLCOMMAND connected and received reply
        if [ "${URLCOMMAND_EXIT_CODE}" -ne 0 ] ; then
                echo >&2 "Failed to connect to ${URL_BASE}, return code ${URLCOMMAND_EXIT_CODE}"
                rm -f "${CLAIMING_DIR}/tmpout.txt"
                return 4
        fi

        if [ "${VERBOSE}" == 1 ] ; then
            echo "Response from server:"
            cat "${CLAIMING_DIR}/tmpout.txt"
        fi

        return 0
}

for i in {1..3}
do
        if attempt_contact ; then
                echo "Connection attempt $i successful"
                break
        fi
        echo "Connection attempt $i failed. Retry in ${i}s."
        if [ "$i" -eq 5 ] ; then
                rm -f "${CLAIMING_DIR}/tmpin.txt"
                exit 4
        fi
        sleep "$i"
done

rm -f "${CLAIMING_DIR}/tmpin.txt"

ERROR_KEY=$(grep "\"errorMsgKey\":" "${CLAIMING_DIR}/tmpout.txt" | awk -F "errorMsgKey\":\"" '{print $2}' | awk -F "\"" '{print $1}')
case ${ERROR_KEY} in
        "ErrInvalidNodeID") EXIT_CODE=8 ;;
        "ErrInvalidNodeName") EXIT_CODE=9 ;;
        "ErrInvalidRoomID") EXIT_CODE=10 ;;
        "ErrInvalidPublicKey") EXIT_CODE=11 ;;
        "ErrForbidden") EXIT_CODE=12 ;;
        "ErrAlreadyClaimed") EXIT_CODE=13 ;;
        "ErrProcessingClaim") EXIT_CODE=14 ;;
        "ErrInternalServerError") EXIT_CODE=15 ;;
        "ErrGatewayTimeout") EXIT_CODE=16 ;;
        "ErrServiceUnavailable") EXIT_CODE=17 ;;
        *) EXIT_CODE=7 ;;
esac

HTTP_STATUS_CODE=$(grep "HTTP" "${CLAIMING_DIR}/tmpout.txt" | tail -1 | awk -F " " '{print $2}')
if [ "${HTTP_STATUS_CODE}" = "204" ] ; then
        EXIT_CODE=0
fi

if [ "${HTTP_STATUS_CODE}" = "204" ] || [ "${ERROR_KEY}" = "ErrAlreadyClaimed" ] ; then
        rm -f "${CLAIMING_DIR}/tmpout.txt"
        if [ "${HTTP_STATUS_CODE}" = "204" ] ; then
                echo -n "${ID}" >"${CLAIMING_DIR}/claimed_id" || (echo >&2 "Claiming failed"; set -e; exit 2)
        fi
        rm -f "${CLAIMING_DIR}/token" || (echo >&2 "Claiming failed"; set -e; exit 2)

        # Rewrite the cloud.conf on the disk
        cat > "$CLAIMING_DIR/cloud.conf" <<HERE_DOC
[global]
  enabled = yes
  cloud base url = $URL_BASE
HERE_DOC
        if [ "$EUID" == "0" ]; then
            chown -R "${NETDATA_USER}:${NETDATA_USER}" ${CLAIMING_DIR} || (echo >&2 "Claiming failed"; set -e; exit 2)
        fi
        if [ "${RELOAD}" == "0" ] ; then
            exit $EXIT_CODE
        fi

        if [ -z "${PROXY}" ]; then
           PROXYMSG=""
        else
           PROXYMSG="You have attempted to claim this node through a proxy - please update your the proxy setting in your netdata.conf to ${PROXY}. "
        fi
        # Update cloud.conf in the agent memory
        @sbindir_POST@/netdatacli write-config 'cloud|global|enabled|yes' && \
        @sbindir_POST@/netdatacli write-config "cloud|global|cloud base url|$URL_BASE" && \
        @sbindir_POST@/netdatacli reload-claiming-state && \
        if [ "${HTTP_STATUS_CODE}" = "204" ] ; then
                echo >&2 "${PROXYMSG}Node was successfully claimed."
        else
                echo >&2 "The agent cloud base url is set to the url provided."
                echo >&2 "The cloud may have different credentials already registered for this agent ID and it cannot be reclaimed under different credentials for security reasons. If you are unable to connect use -id=\$(uuidgen) to overwrite this agent ID with a fresh value if the original credentials cannot be restored."
                echo >&2 "${PROXYMSG}Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\""
        fi && exit $EXIT_CODE

        if [ "${ERROR_KEY}" = "ErrAlreadyClaimed" ] ; then
                echo >&2 "The cloud may have different credentials already registered for this agent ID and it cannot be reclaimed under different credentials for security reasons. If you are unable to connect use -id=\$(uuidgen) to overwrite this agent ID with a fresh value if the original credentials cannot be restored."
                echo >&2 "${PROXYMSG}Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\""
                exit $EXIT_CODE
        fi
        echo >&2 "${PROXYMSG}The claim was successful but the agent could not be notified ($?)- it requires a restart to connect to the cloud."
        [ "$NETDATA_RUNNING" -eq 0 ] && exit 0 || exit 5
fi

echo >&2 "Failed to claim node with the following error message:\"${ERROR_MESSAGES[$EXIT_CODE]}\""
if [ "${VERBOSE}" == 1 ]; then
    echo >&2 "Error key was:\"${ERROR_KEYS[$EXIT_CODE]}\""
fi
rm -f "${CLAIMING_DIR}/tmpout.txt"
exit $EXIT_CODE