#!/bin/bash

function usage {
    log "In the updates subdirectory of the directory this script is in,"
    log "there are a bunch of config files. You should call this script,"
    log "passing the names of one or more of those files as parameters"
    log "to this script."
    log ""
    log "This will validate that the update.xml files all exist for the"
    log "given config file, and that they report the correct file sizes"
    log "for the associated mar files, and that the associated mar files"
    log "are available on the update servers."
    log ""
    log "This script will spawn multiple curl processes to query the"
    log "snippets (update.xml file downloads) and the download urls in"
    log "parallel. The number of parallel curl processes can be managed"
    log "with the -p MAX_PROCS option."
    log ""
    log "Only the first three bytes of the mar files are downloaded"
    log "using curl -r 0-2 option to save time. GET requests are issued"
    log "rather than HEAD requests, since Akamai (one of our CDN"
    log "partners) caches GET and HEAD requests separately - therefore"
    log "they can be out-of-sync, and it is important that we validate"
    log "that the GET requests return the expected results."
    log ""
    log "Please note this script can run on linux and OS X. It has not"
    log "been tested on Windows, but may also work. It can be run"
    log "locally, and does not require access to the mozilla vpn or"
    log "any other special network, since the update servers are"
    log "available over the internet. However, it does require an"
    log "up-to-date checkout of the tools repository, as the updates/"
    log "subfolder changes over time, and reflects the currently"
    log "available updates. It makes no changes to the update servers"
    log "so there is no harm in running it. It simply generates a"
    log "report. However, please try to avoid hammering the update"
    log "servers aggressively, e.g. with thousands of parallel"
    log "processes. For example, feel free to run the examples below,"
    log "first making sure that your source code checkout is up-to-"
    log "date on your own machine, to get the latest configs in the"
    log "updates/ subdirectory."
    log ""
    log "Usage:"
    log "    $(basename "${0}") [-p MAX_PROCS] config1 [config2 config3 config4 ...]"
    log "    $(basename "${0}") -h"
    log ""
    log "Examples:"
    log "    1. $(basename "${0}") -p 128 mozBeta-thunderbird-linux.cfg mozBeta-thunderbird-linux64.cfg"
    log "    2. $(basename "${0}") mozBeta-thunderbird-linux64.cfg"
}

function log {
    echo "$(date):  ${1}"
}

# subprocesses don't log in real time, due to synchronisation
# issues which can cause log entries to overwrite each other.
# therefore this function outputs log entries written to
# temporary files on disk, and then deletes them.
function flush_logs {
    ls -1rt "${TMPDIR}" | grep '^log\.' | while read LOG
    do
        cat "${TMPDIR}/${LOG}"
        rm "${TMPDIR}/${LOG}"
    done
}

# this function takes an update.xml url as an argument
# and then logs a list of config files and their line
# numbers, that led to this update.xml url being tested
function show_cfg_file_entries {
    local update_xml_url="${1}"
    cat "${update_xml_urls}" | cut -f1 -d' ' | grep -Fn "${update_xml_url}" | sed 's/:.*//' | while read match_line_no
    do
        cfg_file="$(sed -n -e "${match_line_no}p" "${update_xml_urls}" | cut -f3 -d' ')"
        cfg_line_no="$(sed -n -e "${match_line_no}p" "${update_xml_urls}" | cut -f4 -d' ')"
        log "        ${cfg_file} line ${cfg_line_no}: $(sed -n -e "${cfg_line_no}p" "${cfg_file}")"
    done
}

# this function takes a mar url as an argument and then
# logs information about which update.xml urls referenced
# this mar url, and which config files referenced those
# mar urls - so you have a full understanding of why this
# mar url was ever tested
function show_update_xml_entries {
    local mar_url="${1}"
    grep -Frl "${mar_url}" "${TMPDIR}" | grep '/update_xml_to_mar\.' | while read update_xml_to_mar
    do
        mar_size="$(cat "${update_xml_to_mar}" | cut -f2 -d' ')"
        update_xml_url="$(cat "${update_xml_to_mar}" | cut -f3 -d' ')"
        patch_type="$(cat "${update_xml_to_mar}" | cut -f4 -d' ')"
        update_xml_actual_url="$(cat "${update_xml_to_mar}" | cut -f5 -d' ')"
        log "        ${update_xml_url}"
        [ -n "${update_xml_actual_url}" ] && log "            which redirected to: ${update_xml_actual_url}"
        log "            This contained an entry for:"
        log "                patch type: ${patch_type}"
        log "                mar size: ${mar_size}"
        log "                mar url: ${mar_url}"
        log "            The update.xml url above was retrieved because of the following cfg file entries:"
        show_cfg_file_entries "${update_xml_url}" | sed 's/        /                /'
    done
}

echo -n "$(date):  Command called:"
for ((INDEX=0; INDEX<=$#; INDEX+=1))
do
    echo -n " '${!INDEX}'"
done
echo ''
log "From directory: '$(pwd)'"
log ''
log "Parsing arguments..."

# Max procs lowered in bug 894368 to try to avoid spurious failures
MAX_PROCS=48
BAD_ARG=0
BAD_FILE=0
while getopts p:h OPT
do
    case "${OPT}" in
        p) MAX_PROCS="${OPTARG}";;
        h) usage
           exit;;
        *) BAD_ARG=1;;
    esac
done
shift "$((OPTIND - 1))"

# invalid option specified
[ "${BAD_ARG}" == 1 ] && exit 66

log "Checking one or more config files have been specified..."
if [ $# -lt 1 ]
then
    usage
    log "ERROR: You must specify one or more config files"
    exit 64
fi

log "Checking whether MAX_PROCS is a number..."
if ! let x=MAX_PROCS 2>/dev/null
then
    usage
    log "ERROR: MAX_PROCS must be a number (-p option); you specified '${MAX_PROCS}' - this is not a number."
    exit 65
fi

# config files are in updates subdirectory below this script
if ! cd "$(dirname "${0}")/updates" 2>/dev/null
then
    log "ERROR: Cannot cd into '$(dirname "${0}")/updates' from '$(pwd)'"
    exit 68
fi

log "Checking specified config files (and downloading them if necessary):"
log ''
configs=()
for file in "${@}"
do
    if [[ ${file} == http* ]]
    then
        log "  Downloading config file '${file}'"
        cfg=$(mktemp)
        curl -fL "${file}" > "$cfg"
        if [ "$?" != 0 ]; then
            log "Error downloading config file '${file}'"
            BAD_FILE=1
        else
            log "  * '${file}' ok, downloaded to '${cfg}'"
            configs+=($cfg)
        fi
    elif [ -f "${file}" ]
    then
        log "  * '${file}' ok"
        configs+=(${file})
    else
        log "  * '${file}' missing"
        BAD_FILE=1
    fi
done
log ''

# invalid config specified
if [ "${BAD_FILE}" == 1 ]
then
    log "ERROR: Unable to download config file(s) or config files are missing from repo - see above"
    exit 67
fi

log "All checks completed successfully."
log ''
log "Starting stopwatch..."
log ''
log "Please be aware output will now be buffered up, and only displayed after completion."
log "Therefore do not be alarmed if you see no output for several minutes."
log "See https://bugzilla.mozilla.org/show_bug.cgi?id=863602#c5 for details".
log ''

START_TIME="$(date +%s)"

# Create a temporary directory for all temp files, that can easily be
# deleted afterwards. See https://bugzilla.mozilla.org/show_bug.cgi?id=863602
# to understand why we write everything in distinct temporary files rather
# than writing to standard error/standard out or files shared across
# processes.
# Need to unset TMPDIR first since it affects mktemp behaviour on next line
unset TMPDIR
export TMPDIR="$(mktemp -d -t final_verification.XXXXXXXXXX)"

# this temporary file will list all update urls that need to be checked, in this format:
# <update url> <comma separated list of patch types> <cfg file that requests it> <line number of config file>
# e.g.
# https://aus4.mozilla.org/update/3/Firefox/18.0/20130104154748/Linux_x86_64-gcc3/zh-TW/releasetest/default/default/default/update.xml?force=1 complete moz20-firefox-linux64-major.cfg 3
# https://aus4.mozilla.org/update/3/Firefox/18.0/20130104154748/Linux_x86_64-gcc3/zu/releasetest/default/default/default/update.xml?force=1 complete moz20-firefox-linux64.cfg 7
# https://aus4.mozilla.org/update/3/Firefox/19.0/20130215130331/Linux_x86_64-gcc3/ach/releasetest/default/default/default/update.xml?force=1 complete,partial moz20-firefox-linux64-major.cfg 11
# https://aus4.mozilla.org/update/3/Firefox/19.0/20130215130331/Linux_x86_64-gcc3/af/releasetest/default/default/default/update.xml?force=1 complete,partial moz20-firefox-linux64.cfg 17
update_xml_urls="$(mktemp -t update_xml_urls.XXXXXXXXXX)"

####################################################################################
# And now a summary of all temp files that will get generated during this process...
#
# 1) mktemp -t failure.XXXXXXXXXX
#
# Each failure will generate a one line temp file, which is a space separated
# output of the error code, and the instance data for the failure.
# e.g.
#
# PATCH_TYPE_MISSING https://aus4.mozilla.org/update/3/Firefox/4.0b12/20110222205441/Linux_x86-gcc3/dummy-locale/releasetest/update.xml?force=1 complete https://aus4.mozilla.org/update/3/Firefox/4.0b12/20110222205441/Linux_x86-gcc3/dummy-locale/releasetest/default/default/default/update.xml?force=1
#
# 2) mktemp -t update_xml_to_mar.XXXXXXXXXX
#
# For each mar url referenced in an update.xml file, a temp file will be created to store the
# association between update.xml url and mar url. This is later used (e.g. in function
# show_update_xml_entries) to trace back the update.xml url(s) that led to a mar url being
# tested. It is also used to keep a full list of mar urls to test.
# e.g.
#
# <mar url> <mar size> <update.xml url> <patch type> <update.xml redirection url, if HTTP 302 returned>
#
# 3) mktemp -t log.XXXXXXXXXX
#
# For each log message logged by a subprocesses, we will create a temp log file with the
# contents of the log message, since we cannot safely output the log message from the subprocess
# and guarantee that it will be correctly output. By buffering log output in individual log files
# we guarantee that log messages will not interfere with each other. We then flush them when all
# forked subprocesses have completed.
#
# 4) mktemp -t mar_headers.XXXXXXXXXX
#
# We keep a copy of the mar url http headers retrieved in one file per mar url.
#
# 5) mktemp -t update.xml.headers.XXXXXXXXXX
#
# We keep a copy of the update.xml http headers retrieved in one file per update.xml url.
#
# 6) mktemp -t update.xml.XXXXXXXXXX
#
# We keep a copy of each update.xml file retrieved in individual files.
####################################################################################


# generate full list of update.xml urls, followed by patch types,
# as defined in the specified config files - and write into "${update_xml_urls}" file
aus_server="https://aus5.mozilla.org"
for cfg_file in "${configs[@]}"
do
    line_no=0
    sed -e 's/localtest/cdntest/' "${cfg_file}" | while read config_line
    do
        let line_no++
        # to avoid contamination between iterations, reset variables
        # each loop in case they are not declared
        # aus_server is not "cleared" each iteration - to be consistent with previous behaviour of old
        # final-verification.sh script - might be worth reviewing if we really want this behaviour
        release="" product="" platform="" build_id="" locales="" channel="" from="" patch_types="complete"
        eval "${config_line}"
        for locale in ${locales}
        do
            echo "${aus_server}/update/3/$product/$release/$build_id/$platform/$locale/$channel/default/default/default/update.xml?force=1" "${patch_types// /,}" "${cfg_file}" "${line_no}"
        done
    done
done > "${update_xml_urls}"

# download update.xml files and grab the mar urls from downloaded file for each patch type required
cat "${update_xml_urls}" | cut -f1-2 -d' ' | sort -u | xargs -n2 "-P${MAX_PROCS}" ../get-update-xml.sh
if [ "$?" != 0 ]; then
    flush_logs
    log "Error generating update requests"
    exit 70
fi

flush_logs

# download http header for each mar url
find "${TMPDIR}" -name 'update_xml_to_mar.*' -type f | xargs cat | cut -f1-2 -d' ' | sort -u | xargs -n2 "-P${MAX_PROCS}" ../test-mar-url.sh
if [ "$?" != 0 ]; then
    flush_logs
    log "Error HEADing mar urls"
    exit 71
fi

flush_logs

log ''
log 'Stopping stopwatch...'
STOP_TIME="$(date +%s)"

number_of_failures="$(find "${TMPDIR}" -name 'failure.*' -type f | wc -l | sed 's/ //g')"
number_of_update_xml_urls="$(cat "${update_xml_urls}" | cut -f1 -d' ' | sort -u | wc -l | sed 's/ //g')"
number_of_mar_urls="$(find "${TMPDIR}" -name "update_xml_to_mar.*" | xargs cat | cut -f1 -d' ' | sort -u | wc -l | sed 's/ //g')"

if [ "${number_of_failures}" -eq 0 ]
then
    log
    log "All tests passed successfully."
    log
    exit_code=0
else
    log ''
    log '===================================='
    [ "${number_of_failures}" -gt 1 ] && log "${number_of_failures} FAILURES" || log '1 FAILURE'
    failure=0
    ls -1tr "${TMPDIR}" | grep '^failure\.' | while read failure_file
    do
        while read failure_code entry1 entry2 entry3 entry4 entry5 entry6 entry7
        do
            log '===================================='
            log ''
            case "${failure_code}" in

                UPDATE_XML_UNAVAILABLE)
                    update_xml_url="${entry1}"
                    update_xml="${entry2}"
                    update_xml_headers="${entry3}"
                    update_xml_debug="${entry4}"
                    update_xml_curl_exit_code="${entry5}"
                    log "FAILURE $((++failure)): Update xml file not available"
                    log ""
                    log "    Download url: ${update_xml_url}"
                    log "    Curl returned exit code: ${update_xml_curl_exit_code}"
                    log ""
                    log "    The HTTP headers were:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_headers}"
                    log ""
                    log "    The full curl debug output was:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_debug}"
                    log ""
                    log "    The returned update.xml file was:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml}"
                    log ""
                    log "    This url was tested because of the following cfg file entries:"
                    show_cfg_file_entries "${update_xml_url}"
                    log ""

                    ;;

                UPDATE_XML_REDIRECT_FAILED)
                    update_xml_url="${entry1}"
                    update_xml_actual_url="${entry2}"
                    update_xml="${entry3}"
                    update_xml_headers="${entry4}"
                    update_xml_debug="${entry5}"
                    update_xml_curl_exit_code="${entry6}"
                    log "FAILURE $((++failure)): Update xml file not available at *redirected* location"
                    log ""
                    log "    Download url: ${update_xml_url}"
                    log "    Redirected to: ${update_xml_actual_url}"
                    log "    It could not be downloaded from this url."
                    log "    Curl returned exit code: ${update_xml_curl_exit_code}"
                    log ""
                    log "    The HTTP headers were:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_headers}"
                    log ""
                    log "    The full curl debug output was:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_debug}"
                    log ""
                    log "    The returned update.xml file was:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml}"
                    log ""
                    log "    This url was tested because of the following cfg file entries:"
                    show_cfg_file_entries "${update_xml_url}"
                    log ""
                    ;;

                PATCH_TYPE_MISSING)
                    update_xml_url="${entry1}"
                    patch_type="${entry2}"
                    update_xml="${entry3}"
                    update_xml_headers="${entry4}"
                    update_xml_debug="${entry5}"
                    update_xml_actual_url="${entry6}"
                    log "FAILURE $((++failure)): Patch type '${patch_type}' not present in the downloaded update.xml file."
                    log ""
                    log "    Update xml file downloaded from: ${update_xml_url}"
                    [ -n "${update_xml_actual_url}" ] && log "    This redirected to the download url: ${update_xml_actual_url}"
                    log "    Curl returned exit code: 0 (success)"
                    log ""
                    log "    The HTTP headers were:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_headers}"
                    log ""
                    log "    The full curl debug output was:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_debug}"
                    log ""
                    log "    The returned update.xml file was:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml}"
                    log ""
                    log "    This url and patch type combination was tested due to the following cfg file entries:"
                    show_cfg_file_entries "${update_xml_url}"
                    log ""
                    ;;

                NO_MAR_FILE)
                    mar_url="${entry1}"
                    mar_headers_file="${entry2}"
                    mar_headers_debug_file="${entry3}"
                    mar_file_curl_exit_code="${entry4}"
                    mar_actual_url="${entry5}"
                    log "FAILURE $((++failure)): Could not retrieve mar file"
                    log ""
                    log "    Mar file url: ${mar_url}"
                    [ -n "${mar_actual_url}" ] && log "    This redirected to: ${mar_actual_url}"
                    log "    The mar file could not be downloaded from this location."
                    log "    Curl returned exit code: ${mar_file_curl_exit_code}"
                    log ""
                    log "    The HTTP headers were:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_file}"
                    log ""
                    log "    The full curl debug output was:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_debug_file}"
                    log ""
                    log "    The mar download was tested because it was referenced in the following update xml file(s):"
                    show_update_xml_entries "${mar_url}"
                    log ""
                    ;;

                MAR_FILE_WRONG_SIZE)
                    mar_url="${entry1}"
                    mar_required_size="${entry2}"
                    mar_actual_size="${entry3}"
                    mar_headers_file="${entry4}"
                    mar_headers_debug_file="${entry5}"
                    mar_file_curl_exit_code="${entry6}"
                    mar_actual_url="${entry7}"
                    log "FAILURE $((++failure)): Mar file is wrong size"
                    log ""
                    log "    Mar file url: ${mar_url}"
                    [ -n "${mar_actual_url}" ] && log "    This redirected to: ${mar_actual_url}"
                    log "    The http header of the mar file url says that the mar file is ${mar_actual_size} bytes."
                    log "    One or more of the following update.xml file(s) says that the file should be ${mar_required_size} bytes."
                    log ""
                    log "    These are the update xml file(s) that referenced this mar:"
                    show_update_xml_entries "${mar_url}"
                    log ""
                    log "    Curl returned exit code: ${mar_file_curl_exit_code}"
                    log ""
                    log "    The HTTP headers were:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_file}"
                    log ""
                    log "    The full curl debug output was:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_debug_file}"
                    log ""
                    ;;

                BAD_HTTP_RESPONSE_CODE_FOR_MAR)
                    mar_url="${entry1}"
                    mar_headers_file="${entry2}"
                    mar_headers_debug_file="${entry3}"
                    mar_file_curl_exit_code="${entry4}"
                    mar_actual_url="${entry5}"
                    http_response_code="$(sed -e "s/$(printf '\r')//" -n -e '/^HTTP\//p' "${mar_headers_file}" | tail -1)"
                    log "FAILURE $((++failure)): '${http_response_code}' for mar file"
                    log ""
                    log "    Mar file url: ${mar_url}"
                    [ -n "${mar_actual_url}" ] && log "    This redirected to: ${mar_actual_url}"
                    log ""
                    log "    These are the update xml file(s) that referenced this mar:"
                    show_update_xml_entries "${mar_url}"
                    log ""
                    log "    Curl returned exit code: ${mar_file_curl_exit_code}"
                    log ""
                    log "    The HTTP headers were:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_file}"
                    log ""
                    log "    The full curl debug output was:"
                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_debug_file}"
                    log ""
                    ;;

                *)
                    log "ERROR: Unknown failure code - '${failure_code}'"
                    log "ERROR: This is a serious bug in this script."
                    log "ERROR: Only known failure codes are: UPDATE_XML_UNAVAILABLE, UPDATE_XML_REDIRECT_FAILED, PATCH_TYPE_MISSING, NO_MAR_FILE, MAR_FILE_WRONG_SIZE, BAD_HTTP_RESPONSE_CODE_FOR_MAR"
                    log ""
                    log "FAILURE $((++failure)): Data from failure is: ${entry1} ${entry2} ${entry3} ${entry4} ${entry5} ${entry6}"
                    log ""
                    ;;

            esac
        done < "${TMPDIR}/${failure_file}"
    done
    exit_code=1
fi


log ''
log '===================================='
log 'KEY STATS'
log '===================================='
log ''
log "Config files scanned:                       ${#@}"
log "Update xml files downloaded and parsed:     ${number_of_update_xml_urls}"
log "Unique mar urls found:                      ${number_of_mar_urls}"
log "Failures:                                   ${number_of_failures}"
log "Parallel processes used (maximum limit):    ${MAX_PROCS}"
log "Execution time:                             $((STOP_TIME-START_TIME)) seconds"
log ''

rm -rf "${TMPDIR}"
exit ${exit_code}