1 files changed, 519 insertions, 0 deletions
diff --git a/tools/update-verify/release/final-verification.sh b/tools/update-verify/release/final-verification.sh
new file mode 100755
index 0000000000..879c64697f
--- /dev/null
+++ b/tools/update-verify/release/final-verification.sh
@@ -0,0 +1,519 @@
+#!/bin/bash
+
+function usage {
+    log "In the updates subdirectory of the directory this script is in,"
+    log "there are a bunch of config files. You should call this script,"
+    log "passing the names of one or more of those files as parameters"
+    log "to this script."
+    log ""
+    log "This will validate that the update.xml files all exist for the"
+    log "given config file, and that they report the correct file sizes"
+    log "for the associated mar files, and that the associated mar files"
+    log "are available on the update servers."
+    log ""
+    log "This script will spawn multiple curl processes to query the"
+    log "snippets (update.xml file downloads) and the download urls in"
+    log "parallel. The number of parallel curl processes can be managed"
+    log "with the -p MAX_PROCS option."
+    log ""
+    log "Only the first three bytes of the mar files are downloaded"
+    log "using curl -r 0-2 option to save time. GET requests are issued"
+    log "rather than HEAD requests, since Akamai (one of our CDN"
+    log "partners) caches GET and HEAD requests separately - therefore"
+    log "they can be out-of-sync, and it is important that we validate"
+    log "that the GET requests return the expected results."
+    log ""
+    log "Please note this script can run on linux and OS X. It has not"
+    log "been tested on Windows, but may also work. It can be run"
+    log "locally, and does not require access to the mozilla vpn or"
+    log "any other special network, since the update servers are"
+    log "available over the internet. However, it does require an"
+    log "up-to-date checkout of the tools repository, as the updates/"
+    log "subfolder changes over time, and reflects the currently"
+    log "available updates. It makes no changes to the update servers"
+    log "so there is no harm in running it. It simply generates a"
+    log "report. However, please try to avoid hammering the update"
+    log "servers aggressively, e.g. with thousands of parallel"
+    log "processes. For example, feel free to run the examples below,"
+    log "first making sure that your source code checkout is up-to-"
+    log "date on your own machine, to get the latest configs in the"
+    log "updates/ subdirectory."
+    log ""
+    log "Usage:"
+    log "    $(basename "${0}") [-p MAX_PROCS] config1 [config2 config3 config4 ...]"
+    log "    $(basename "${0}") -h"
+    log ""
+    log "Examples:"
+    log "    1. $(basename "${0}") -p 128 mozBeta-thunderbird-linux.cfg mozBeta-thunderbird-linux64.cfg"
+    log "    2. $(basename "${0}") mozBeta-thunderbird-linux64.cfg"
+}
+
+function log {
+    echo "$(date):  ${1}"
+}
+
+# subprocesses don't log in real time, due to synchronisation
+# issues which can cause log entries to overwrite each other.
+# therefore this function outputs log entries written to
+# temporary files on disk, and then deletes them.
+function flush_logs {
+    ls -1rt "${TMPDIR}" | grep '^log\.' | while read LOG
+    do
+        cat "${TMPDIR}/${LOG}"
+        rm "${TMPDIR}/${LOG}"
+    done
+}
+
+# this function takes an update.xml url as an argument
+# and then logs a list of config files and their line
+# numbers, that led to this update.xml url being tested
+function show_cfg_file_entries {
+    local update_xml_url="${1}"
+    cat "${update_xml_urls}" | cut -f1 -d' ' | grep -Fn "${update_xml_url}" | sed 's/:.*//' | while read match_line_no
+    do
+        cfg_file="$(sed -n -e "${match_line_no}p" "${update_xml_urls}" | cut -f3 -d' ')"
+        cfg_line_no="$(sed -n -e "${match_line_no}p" "${update_xml_urls}" | cut -f4 -d' ')"
+        log "        ${cfg_file} line ${cfg_line_no}: $(sed -n -e "${cfg_line_no}p" "${cfg_file}")"
+    done
+}
+
+# this function takes a mar url as an argument and then
+# logs information about which update.xml urls referenced
+# this mar url, and which config files referenced those
+# mar urls - so you have a full understanding of why this
+# mar url was ever tested
+function show_update_xml_entries {
+    local mar_url="${1}"
+    grep -Frl "${mar_url}" "${TMPDIR}" | grep '/update_xml_to_mar\.' | while read update_xml_to_mar
+    do
+        mar_size="$(cat "${update_xml_to_mar}" | cut -f2 -d' ')"
+        update_xml_url="$(cat "${update_xml_to_mar}" | cut -f3 -d' ')"
+        patch_type="$(cat "${update_xml_to_mar}" | cut -f4 -d' ')"
+        update_xml_actual_url="$(cat "${update_xml_to_mar}" | cut -f5 -d' ')"
+        log "        ${update_xml_url}"
+        [ -n "${update_xml_actual_url}" ] && log "            which redirected to: ${update_xml_actual_url}"
+        log "            This contained an entry for:"
+        log "                patch type: ${patch_type}"
+        log "                mar size: ${mar_size}"
+        log "                mar url: ${mar_url}"
+        log "            The update.xml url above was retrieved because of the following cfg file entries:"
+        show_cfg_file_entries "${update_xml_url}" | sed 's/        /                /'
+    done
+}
+
+echo -n "$(date):  Command called:"
+for ((INDEX=0; INDEX<=$#; INDEX+=1))
+do
+    echo -n " '${!INDEX}'"
+done
+echo ''
+log "From directory: '$(pwd)'"
+log ''
+log "Parsing arguments..."
+
+# Max procs lowered in bug 894368 to try to avoid spurious failures
+MAX_PROCS=48
+BAD_ARG=0
+BAD_FILE=0
+while getopts p:h OPT
+do
+    case "${OPT}" in
+        p) MAX_PROCS="${OPTARG}";;
+        h) usage
+           exit;;
+        *) BAD_ARG=1;;
+    esac
+done
+shift "$((OPTIND - 1))"
+
+# invalid option specified
+[ "${BAD_ARG}" == 1 ] && exit 66
+
+log "Checking one or more config files have been specified..."
+if [ $# -lt 1 ]
+then
+    usage
+    log "ERROR: You must specify one or more config files"
+    exit 64
+fi
+
+log "Checking whether MAX_PROCS is a number..."
+if ! let x=MAX_PROCS 2>/dev/null
+then
+    usage
+    log "ERROR: MAX_PROCS must be a number (-p option); you specified '${MAX_PROCS}' - this is not a number."
+    exit 65
+fi
+
+# config files are in updates subdirectory below this script
+if ! cd "$(dirname "${0}")/updates" 2>/dev/null
+then
+    log "ERROR: Cannot cd into '$(dirname "${0}")/updates' from '$(pwd)'"
+    exit 68
+fi
+
+log "Checking specified config files (and downloading them if necessary):"
+log ''
+configs=()
+for file in "${@}"
+do
+    if [[ ${file} == http* ]]
+    then
+        log "  Downloading config file '${file}'"
+        cfg=$(mktemp)
+        curl -fL --retry 5 --compressed "${file}" > "$cfg"
+        if [ "$?" != 0 ]; then
+            log "Error downloading config file '${file}'"
+            BAD_FILE=1
+        else
+            log "  * '${file}' ok, downloaded to '${cfg}'"
+            configs+=($cfg)
+        fi
+    elif [ -f "${file}" ]
+    then
+        log "  * '${file}' ok"
+        configs+=(${file})
+    else
+        log "  * '${file}' missing"
+        BAD_FILE=1
+    fi
+done
+log ''
+
+# invalid config specified
+if [ "${BAD_FILE}" == 1 ]
+then
+    log "ERROR: Unable to download config file(s) or config files are missing from repo - see above"
+    exit 67
+fi
+
+log "All checks completed successfully."
+log ''
+log "Starting stopwatch..."
+log ''
+log "Please be aware output will now be buffered up, and only displayed after completion."
+log "Therefore do not be alarmed if you see no output for several minutes."
+log "See https://bugzilla.mozilla.org/show_bug.cgi?id=863602#c5 for details".
+log ''
+
+START_TIME="$(date +%s)"
+
+# Create a temporary directory for all temp files, that can easily be
+# deleted afterwards. See https://bugzilla.mozilla.org/show_bug.cgi?id=863602
+# to understand why we write everything in distinct temporary files rather
+# than writing to standard error/standard out or files shared across
+# processes.
+# Need to unset TMPDIR first since it affects mktemp behaviour on next line
+unset TMPDIR
+export TMPDIR="$(mktemp -d -t final_verification.XXXXXXXXXX)"
+
+# this temporary file will list all update urls that need to be checked, in this format:
+# <update url> <comma separated list of patch types> <cfg file that requests it> <line number of config file>
+# e.g.
+# https://aus4.mozilla.org/update/3/Firefox/18.0/20130104154748/Linux_x86_64-gcc3/zh-TW/releasetest/default/default/default/update.xml?force=1 complete moz20-firefox-linux64-major.cfg 3
+# https://aus4.mozilla.org/update/3/Firefox/18.0/20130104154748/Linux_x86_64-gcc3/zu/releasetest/default/default/default/update.xml?force=1 complete moz20-firefox-linux64.cfg 7
+# https://aus4.mozilla.org/update/3/Firefox/19.0/20130215130331/Linux_x86_64-gcc3/ach/releasetest/default/default/default/update.xml?force=1 complete,partial moz20-firefox-linux64-major.cfg 11
+# https://aus4.mozilla.org/update/3/Firefox/19.0/20130215130331/Linux_x86_64-gcc3/af/releasetest/default/default/default/update.xml?force=1 complete,partial moz20-firefox-linux64.cfg 17
+update_xml_urls="$(mktemp -t update_xml_urls.XXXXXXXXXX)"
+
+####################################################################################
+# And now a summary of all temp files that will get generated during this process...
+#
+# 1) mktemp -t failure.XXXXXXXXXX
+#
+# Each failure will generate a one line temp file, which is a space separated
+# output of the error code, and the instance data for the failure.
+# e.g.
+#
+# PATCH_TYPE_MISSING https://aus4.mozilla.org/update/3/Firefox/4.0b12/20110222205441/Linux_x86-gcc3/dummy-locale/releasetest/update.xml?force=1 complete https://aus4.mozilla.org/update/3/Firefox/4.0b12/20110222205441/Linux_x86-gcc3/dummy-locale/releasetest/default/default/default/update.xml?force=1
+#
+# 2) mktemp -t update_xml_to_mar.XXXXXXXXXX
+#
+# For each mar url referenced in an update.xml file, a temp file will be created to store the
+# association between update.xml url and mar url. This is later used (e.g. in function
+# show_update_xml_entries) to trace back the update.xml url(s) that led to a mar url being
+# tested. It is also used to keep a full list of mar urls to test.
+# e.g.
+#
+# <mar url> <mar size> <update.xml url> <patch type> <update.xml redirection url, if HTTP 302 returned>
+#
+# 3) mktemp -t log.XXXXXXXXXX
+#
+# For each log message logged by a subprocesses, we will create a temp log file with the
+# contents of the log message, since we cannot safely output the log message from the subprocess
+# and guarantee that it will be correctly output. By buffering log output in individual log files
+# we guarantee that log messages will not interfere with each other. We then flush them when all
+# forked subprocesses have completed.
+#
+# 4) mktemp -t mar_headers.XXXXXXXXXX
+#
+# We keep a copy of the mar url http headers retrieved in one file per mar url.
+#
+# 5) mktemp -t update.xml.headers.XXXXXXXXXX
+#
+# We keep a copy of the update.xml http headers retrieved in one file per update.xml url.
+#
+# 6) mktemp -t update.xml.XXXXXXXXXX
+#
+# We keep a copy of each update.xml file retrieved in individual files.
+####################################################################################
+
+
+# generate full list of update.xml urls, followed by patch types,
+# as defined in the specified config files - and write into "${update_xml_urls}" file
+aus_server="https://aus5.mozilla.org"
+for cfg_file in "${configs[@]}"
+do
+    line_no=0
+    sed -e 's/localtest/cdntest/' "${cfg_file}" | while read config_line
+    do
+        let line_no++
+        # to avoid contamination between iterations, reset variables
+        # each loop in case they are not declared
+        # aus_server is not "cleared" each iteration - to be consistent with previous behaviour of old
+        # final-verification.sh script - might be worth reviewing if we really want this behaviour
+        release="" product="" platform="" build_id="" locales="" channel="" from="" patch_types="complete"
+        eval "${config_line}"
+        for locale in ${locales}
+        do
+            echo "${aus_server}/update/3/$product/$release/$build_id/$platform/$locale/$channel/default/default/default/update.xml?force=1" "${patch_types// /,}" "${cfg_file}" "${line_no}"
+        done
+    done
+done > "${update_xml_urls}"
+
+# download update.xml files and grab the mar urls from downloaded file for each patch type required
+cat "${update_xml_urls}" | cut -f1-2 -d' ' | sort -u | xargs -n2 "-P${MAX_PROCS}" ../get-update-xml.sh
+if [ "$?" != 0 ]; then
+    flush_logs
+    log "Error generating update requests"
+    exit 70
+fi
+
+flush_logs
+
+# download http header for each mar url
+find "${TMPDIR}" -name 'update_xml_to_mar.*' -type f | xargs cat | cut -f1-2 -d' ' | sort -u | xargs -n2 "-P${MAX_PROCS}" ../test-mar-url.sh
+if [ "$?" != 0 ]; then
+    flush_logs
+    log "Error HEADing mar urls"
+    exit 71
+fi
+
+flush_logs
+
+log ''
+log 'Stopping stopwatch...'
+STOP_TIME="$(date +%s)"
+
+number_of_failures="$(find "${TMPDIR}" -name 'failure.*' -type f | wc -l | sed 's/ //g')"
+number_of_update_xml_urls="$(cat "${update_xml_urls}" | cut -f1 -d' ' | sort -u | wc -l | sed 's/ //g')"
+number_of_mar_urls="$(find "${TMPDIR}" -name "update_xml_to_mar.*" | xargs cat | cut -f1 -d' ' | sort -u | wc -l | sed 's/ //g')"
+
+if [ "${number_of_failures}" -eq 0 ]
+then
+    log
+    log "All tests passed successfully."
+    log
+    exit_code=0
+else
+    log ''
+    log '===================================='
+    [ "${number_of_failures}" -gt 1 ] && log "${number_of_failures} FAILURES" || log '1 FAILURE'
+    failure=0
+    ls -1tr "${TMPDIR}" | grep '^failure\.' | while read failure_file
+    do
+        while read failure_code entry1 entry2 entry3 entry4 entry5 entry6 entry7
+        do
+            log '===================================='
+            log ''
+            case "${failure_code}" in
+
+                UPDATE_XML_UNAVAILABLE)
+                    update_xml_url="${entry1}"
+                    update_xml="${entry2}"
+                    update_xml_headers="${entry3}"
+                    update_xml_debug="${entry4}"
+                    update_xml_curl_exit_code="${entry5}"
+                    log "FAILURE $((++failure)): Update xml file not available"
+                    log ""
+                    log "    Download url: ${update_xml_url}"
+                    log "    Curl returned exit code: ${update_xml_curl_exit_code}"
+                    log ""
+                    log "    The HTTP headers were:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_headers}"
+                    log ""
+                    log "    The full curl debug output was:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_debug}"
+                    log ""
+                    log "    The returned update.xml file was:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml}"
+                    log ""
+                    log "    This url was tested because of the following cfg file entries:"
+                    show_cfg_file_entries "${update_xml_url}"
+                    log ""
+
+                    ;;
+
+                UPDATE_XML_REDIRECT_FAILED)
+                    update_xml_url="${entry1}"
+                    update_xml_actual_url="${entry2}"
+                    update_xml="${entry3}"
+                    update_xml_headers="${entry4}"
+                    update_xml_debug="${entry5}"
+                    update_xml_curl_exit_code="${entry6}"
+                    log "FAILURE $((++failure)): Update xml file not available at *redirected* location"
+                    log ""
+                    log "    Download url: ${update_xml_url}"
+                    log "    Redirected to: ${update_xml_actual_url}"
+                    log "    It could not be downloaded from this url."
+                    log "    Curl returned exit code: ${update_xml_curl_exit_code}"
+                    log ""
+                    log "    The HTTP headers were:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_headers}"
+                    log ""
+                    log "    The full curl debug output was:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_debug}"
+                    log ""
+                    log "    The returned update.xml file was:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml}"
+                    log ""
+                    log "    This url was tested because of the following cfg file entries:"
+                    show_cfg_file_entries "${update_xml_url}"
+                    log ""
+                    ;;
+
+                PATCH_TYPE_MISSING)
+                    update_xml_url="${entry1}"
+                    patch_type="${entry2}"
+                    update_xml="${entry3}"
+                    update_xml_headers="${entry4}"
+                    update_xml_debug="${entry5}"
+                    update_xml_actual_url="${entry6}"
+                    log "FAILURE $((++failure)): Patch type '${patch_type}' not present in the downloaded update.xml file."
+                    log ""
+                    log "    Update xml file downloaded from: ${update_xml_url}"
+                    [ -n "${update_xml_actual_url}" ] && log "    This redirected to the download url: ${update_xml_actual_url}"
+                    log "    Curl returned exit code: 0 (success)"
+                    log ""
+                    log "    The HTTP headers were:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_headers}"
+                    log ""
+                    log "    The full curl debug output was:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml_debug}"
+                    log ""
+                    log "    The returned update.xml file was:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${update_xml}"
+                    log ""
+                    log "    This url and patch type combination was tested due to the following cfg file entries:"
+                    show_cfg_file_entries "${update_xml_url}"
+                    log ""
+                    ;;
+
+                NO_MAR_FILE)
+                    mar_url="${entry1}"
+                    mar_headers_file="${entry2}"
+                    mar_headers_debug_file="${entry3}"
+                    mar_file_curl_exit_code="${entry4}"
+                    mar_actual_url="${entry5}"
+                    log "FAILURE $((++failure)): Could not retrieve mar file"
+                    log ""
+                    log "    Mar file url: ${mar_url}"
+                    [ -n "${mar_actual_url}" ] && log "    This redirected to: ${mar_actual_url}"
+                    log "    The mar file could not be downloaded from this location."
+                    log "    Curl returned exit code: ${mar_file_curl_exit_code}"
+                    log ""
+                    log "    The HTTP headers were:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_file}"
+                    log ""
+                    log "    The full curl debug output was:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_debug_file}"
+                    log ""
+                    log "    The mar download was tested because it was referenced in the following update xml file(s):"
+                    show_update_xml_entries "${mar_url}"
+                    log ""
+                    ;;
+
+                MAR_FILE_WRONG_SIZE)
+                    mar_url="${entry1}"
+                    mar_required_size="${entry2}"
+                    mar_actual_size="${entry3}"
+                    mar_headers_file="${entry4}"
+                    mar_headers_debug_file="${entry5}"
+                    mar_file_curl_exit_code="${entry6}"
+                    mar_actual_url="${entry7}"
+                    log "FAILURE $((++failure)): Mar file is wrong size"
+                    log ""
+                    log "    Mar file url: ${mar_url}"
+                    [ -n "${mar_actual_url}" ] && log "    This redirected to: ${mar_actual_url}"
+                    log "    The http header of the mar file url says that the mar file is ${mar_actual_size} bytes."
+                    log "    One or more of the following update.xml file(s) says that the file should be ${mar_required_size} bytes."
+                    log ""
+                    log "    These are the update xml file(s) that referenced this mar:"
+                    show_update_xml_entries "${mar_url}"
+                    log ""
+                    log "    Curl returned exit code: ${mar_file_curl_exit_code}"
+                    log ""
+                    log "    The HTTP headers were:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_file}"
+                    log ""
+                    log "    The full curl debug output was:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_debug_file}"
+                    log ""
+                    ;;
+
+                BAD_HTTP_RESPONSE_CODE_FOR_MAR)
+                    mar_url="${entry1}"
+                    mar_headers_file="${entry2}"
+                    mar_headers_debug_file="${entry3}"
+                    mar_file_curl_exit_code="${entry4}"
+                    mar_actual_url="${entry5}"
+                    http_response_code="$(sed -e "s/$(printf '\r')//" -n -e '/^HTTP\//p' "${mar_headers_file}" | tail -1)"
+                    log "FAILURE $((++failure)): '${http_response_code}' for mar file"
+                    log ""
+                    log "    Mar file url: ${mar_url}"
+                    [ -n "${mar_actual_url}" ] && log "    This redirected to: ${mar_actual_url}"
+                    log ""
+                    log "    These are the update xml file(s) that referenced this mar:"
+                    show_update_xml_entries "${mar_url}"
+                    log ""
+                    log "    Curl returned exit code: ${mar_file_curl_exit_code}"
+                    log ""
+                    log "    The HTTP headers were:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_file}"
+                    log ""
+                    log "    The full curl debug output was:"
+                    sed -e "s/$(printf '\r')//" -e "s/^/$(date):          /" -e '$a\' "${mar_headers_debug_file}"
+                    log ""
+                    ;;
+
+                *)
+                    log "ERROR: Unknown failure code - '${failure_code}'"
+                    log "ERROR: This is a serious bug in this script."
+                    log "ERROR: Only known failure codes are: UPDATE_XML_UNAVAILABLE, UPDATE_XML_REDIRECT_FAILED, PATCH_TYPE_MISSING, NO_MAR_FILE, MAR_FILE_WRONG_SIZE, BAD_HTTP_RESPONSE_CODE_FOR_MAR"
+                    log ""
+                    log "FAILURE $((++failure)): Data from failure is: ${entry1} ${entry2} ${entry3} ${entry4} ${entry5} ${entry6}"
+                    log ""
+                    ;;
+
+            esac
+        done < "${TMPDIR}/${failure_file}"
+    done
+    exit_code=1
+fi
+
+
+log ''
+log '===================================='
+log 'KEY STATS'
+log '===================================='
+log ''
+log "Config files scanned:                       ${#@}"
+log "Update xml files downloaded and parsed:     ${number_of_update_xml_urls}"
+log "Unique mar urls found:                      ${number_of_mar_urls}"
+log "Failures:                                   ${number_of_failures}"
+log "Parallel processes used (maximum limit):    ${MAX_PROCS}"
+log "Execution time:                             $((STOP_TIME-START_TIME)) seconds"
+log ''
+
+rm -rf "${TMPDIR}"
+exit ${exit_code}