diff options
Diffstat (limited to '')
-rwxr-xr-x | tools/update-verify/release/final-verification.sh | 519 |
1 files changed, 519 insertions, 0 deletions
diff --git a/tools/update-verify/release/final-verification.sh b/tools/update-verify/release/final-verification.sh new file mode 100755 index 0000000000..879c64697f --- /dev/null +++ b/tools/update-verify/release/final-verification.sh @@ -0,0 +1,519 @@ +#!/bin/bash + +function usage { + log "In the updates subdirectory of the directory this script is in," + log "there are a bunch of config files. You should call this script," + log "passing the names of one or more of those files as parameters" + log "to this script." + log "" + log "This will validate that the update.xml files all exist for the" + log "given config file, and that they report the correct file sizes" + log "for the associated mar files, and that the associated mar files" + log "are available on the update servers." + log "" + log "This script will spawn multiple curl processes to query the" + log "snippets (update.xml file downloads) and the download urls in" + log "parallel. The number of parallel curl processes can be managed" + log "with the -p MAX_PROCS option." + log "" + log "Only the first three bytes of the mar files are downloaded" + log "using curl -r 0-2 option to save time. GET requests are issued" + log "rather than HEAD requests, since Akamai (one of our CDN" + log "partners) caches GET and HEAD requests separately - therefore" + log "they can be out-of-sync, and it is important that we validate" + log "that the GET requests return the expected results." + log "" + log "Please note this script can run on linux and OS X. It has not" + log "been tested on Windows, but may also work. It can be run" + log "locally, and does not require access to the mozilla vpn or" + log "any other special network, since the update servers are" + log "available over the internet. However, it does require an" + log "up-to-date checkout of the tools repository, as the updates/" + log "subfolder changes over time, and reflects the currently" + log "available updates. It makes no changes to the update servers" + log "so there is no harm in running it. It simply generates a" + log "report. However, please try to avoid hammering the update" + log "servers aggressively, e.g. with thousands of parallel" + log "processes. For example, feel free to run the examples below," + log "first making sure that your source code checkout is up-to-" + log "date on your own machine, to get the latest configs in the" + log "updates/ subdirectory." + log "" + log "Usage:" + log " $(basename "${0}") [-p MAX_PROCS] config1 [config2 config3 config4 ...]" + log " $(basename "${0}") -h" + log "" + log "Examples:" + log " 1. $(basename "${0}") -p 128 mozBeta-thunderbird-linux.cfg mozBeta-thunderbird-linux64.cfg" + log " 2. $(basename "${0}") mozBeta-thunderbird-linux64.cfg" +} + +function log { + echo "$(date): ${1}" +} + +# subprocesses don't log in real time, due to synchronisation +# issues which can cause log entries to overwrite each other. +# therefore this function outputs log entries written to +# temporary files on disk, and then deletes them. +function flush_logs { + ls -1rt "${TMPDIR}" | grep '^log\.' | while read LOG + do + cat "${TMPDIR}/${LOG}" + rm "${TMPDIR}/${LOG}" + done +} + +# this function takes an update.xml url as an argument +# and then logs a list of config files and their line +# numbers, that led to this update.xml url being tested +function show_cfg_file_entries { + local update_xml_url="${1}" + cat "${update_xml_urls}" | cut -f1 -d' ' | grep -Fn "${update_xml_url}" | sed 's/:.*//' | while read match_line_no + do + cfg_file="$(sed -n -e "${match_line_no}p" "${update_xml_urls}" | cut -f3 -d' ')" + cfg_line_no="$(sed -n -e "${match_line_no}p" "${update_xml_urls}" | cut -f4 -d' ')" + log " ${cfg_file} line ${cfg_line_no}: $(sed -n -e "${cfg_line_no}p" "${cfg_file}")" + done +} + +# this function takes a mar url as an argument and then +# logs information about which update.xml urls referenced +# this mar url, and which config files referenced those +# mar urls - so you have a full understanding of why this +# mar url was ever tested +function show_update_xml_entries { + local mar_url="${1}" + grep -Frl "${mar_url}" "${TMPDIR}" | grep '/update_xml_to_mar\.' | while read update_xml_to_mar + do + mar_size="$(cat "${update_xml_to_mar}" | cut -f2 -d' ')" + update_xml_url="$(cat "${update_xml_to_mar}" | cut -f3 -d' ')" + patch_type="$(cat "${update_xml_to_mar}" | cut -f4 -d' ')" + update_xml_actual_url="$(cat "${update_xml_to_mar}" | cut -f5 -d' ')" + log " ${update_xml_url}" + [ -n "${update_xml_actual_url}" ] && log " which redirected to: ${update_xml_actual_url}" + log " This contained an entry for:" + log " patch type: ${patch_type}" + log " mar size: ${mar_size}" + log " mar url: ${mar_url}" + log " The update.xml url above was retrieved because of the following cfg file entries:" + show_cfg_file_entries "${update_xml_url}" | sed 's/ / /' + done +} + +echo -n "$(date): Command called:" +for ((INDEX=0; INDEX<=$#; INDEX+=1)) +do + echo -n " '${!INDEX}'" +done +echo '' +log "From directory: '$(pwd)'" +log '' +log "Parsing arguments..." + +# Max procs lowered in bug 894368 to try to avoid spurious failures +MAX_PROCS=48 +BAD_ARG=0 +BAD_FILE=0 +while getopts p:h OPT +do + case "${OPT}" in + p) MAX_PROCS="${OPTARG}";; + h) usage + exit;; + *) BAD_ARG=1;; + esac +done +shift "$((OPTIND - 1))" + +# invalid option specified +[ "${BAD_ARG}" == 1 ] && exit 66 + +log "Checking one or more config files have been specified..." +if [ $# -lt 1 ] +then + usage + log "ERROR: You must specify one or more config files" + exit 64 +fi + +log "Checking whether MAX_PROCS is a number..." +if ! let x=MAX_PROCS 2>/dev/null +then + usage + log "ERROR: MAX_PROCS must be a number (-p option); you specified '${MAX_PROCS}' - this is not a number." + exit 65 +fi + +# config files are in updates subdirectory below this script +if ! cd "$(dirname "${0}")/updates" 2>/dev/null +then + log "ERROR: Cannot cd into '$(dirname "${0}")/updates' from '$(pwd)'" + exit 68 +fi + +log "Checking specified config files (and downloading them if necessary):" +log '' +configs=() +for file in "${@}" +do + if [[ ${file} == http* ]] + then + log " Downloading config file '${file}'" + cfg=$(mktemp) + curl -fL --retry 5 --compressed "${file}" > "$cfg" + if [ "$?" != 0 ]; then + log "Error downloading config file '${file}'" + BAD_FILE=1 + else + log " * '${file}' ok, downloaded to '${cfg}'" + configs+=($cfg) + fi + elif [ -f "${file}" ] + then + log " * '${file}' ok" + configs+=(${file}) + else + log " * '${file}' missing" + BAD_FILE=1 + fi +done +log '' + +# invalid config specified +if [ "${BAD_FILE}" == 1 ] +then + log "ERROR: Unable to download config file(s) or config files are missing from repo - see above" + exit 67 +fi + +log "All checks completed successfully." +log '' +log "Starting stopwatch..." +log '' +log "Please be aware output will now be buffered up, and only displayed after completion." +log "Therefore do not be alarmed if you see no output for several minutes." +log "See https://bugzilla.mozilla.org/show_bug.cgi?id=863602#c5 for details". +log '' + +START_TIME="$(date +%s)" + +# Create a temporary directory for all temp files, that can easily be +# deleted afterwards. See https://bugzilla.mozilla.org/show_bug.cgi?id=863602 +# to understand why we write everything in distinct temporary files rather +# than writing to standard error/standard out or files shared across +# processes. +# Need to unset TMPDIR first since it affects mktemp behaviour on next line +unset TMPDIR +export TMPDIR="$(mktemp -d -t final_verification.XXXXXXXXXX)" + +# this temporary file will list all update urls that need to be checked, in this format: +# <update url> <comma separated list of patch types> <cfg file that requests it> <line number of config file> +# e.g. +# https://aus4.mozilla.org/update/3/Firefox/18.0/20130104154748/Linux_x86_64-gcc3/zh-TW/releasetest/default/default/default/update.xml?force=1 complete moz20-firefox-linux64-major.cfg 3 +# https://aus4.mozilla.org/update/3/Firefox/18.0/20130104154748/Linux_x86_64-gcc3/zu/releasetest/default/default/default/update.xml?force=1 complete moz20-firefox-linux64.cfg 7 +# https://aus4.mozilla.org/update/3/Firefox/19.0/20130215130331/Linux_x86_64-gcc3/ach/releasetest/default/default/default/update.xml?force=1 complete,partial moz20-firefox-linux64-major.cfg 11 +# https://aus4.mozilla.org/update/3/Firefox/19.0/20130215130331/Linux_x86_64-gcc3/af/releasetest/default/default/default/update.xml?force=1 complete,partial moz20-firefox-linux64.cfg 17 +update_xml_urls="$(mktemp -t update_xml_urls.XXXXXXXXXX)" + +#################################################################################### +# And now a summary of all temp files that will get generated during this process... +# +# 1) mktemp -t failure.XXXXXXXXXX +# +# Each failure will generate a one line temp file, which is a space separated +# output of the error code, and the instance data for the failure. +# e.g. +# +# PATCH_TYPE_MISSING https://aus4.mozilla.org/update/3/Firefox/4.0b12/20110222205441/Linux_x86-gcc3/dummy-locale/releasetest/update.xml?force=1 complete https://aus4.mozilla.org/update/3/Firefox/4.0b12/20110222205441/Linux_x86-gcc3/dummy-locale/releasetest/default/default/default/update.xml?force=1 +# +# 2) mktemp -t update_xml_to_mar.XXXXXXXXXX +# +# For each mar url referenced in an update.xml file, a temp file will be created to store the +# association between update.xml url and mar url. This is later used (e.g. in function +# show_update_xml_entries) to trace back the update.xml url(s) that led to a mar url being +# tested. It is also used to keep a full list of mar urls to test. +# e.g. +# +# <mar url> <mar size> <update.xml url> <patch type> <update.xml redirection url, if HTTP 302 returned> +# +# 3) mktemp -t log.XXXXXXXXXX +# +# For each log message logged by a subprocesses, we will create a temp log file with the +# contents of the log message, since we cannot safely output the log message from the subprocess +# and guarantee that it will be correctly output. By buffering log output in individual log files +# we guarantee that log messages will not interfere with each other. We then flush them when all +# forked subprocesses have completed. +# +# 4) mktemp -t mar_headers.XXXXXXXXXX +# +# We keep a copy of the mar url http headers retrieved in one file per mar url. +# +# 5) mktemp -t update.xml.headers.XXXXXXXXXX +# +# We keep a copy of the update.xml http headers retrieved in one file per update.xml url. +# +# 6) mktemp -t update.xml.XXXXXXXXXX +# +# We keep a copy of each update.xml file retrieved in individual files. +#################################################################################### + + +# generate full list of update.xml urls, followed by patch types, +# as defined in the specified config files - and write into "${update_xml_urls}" file +aus_server="https://aus5.mozilla.org" +for cfg_file in "${configs[@]}" +do + line_no=0 + sed -e 's/localtest/cdntest/' "${cfg_file}" | while read config_line + do + let line_no++ + # to avoid contamination between iterations, reset variables + # each loop in case they are not declared + # aus_server is not "cleared" each iteration - to be consistent with previous behaviour of old + # final-verification.sh script - might be worth reviewing if we really want this behaviour + release="" product="" platform="" build_id="" locales="" channel="" from="" patch_types="complete" + eval "${config_line}" + for locale in ${locales} + do + echo "${aus_server}/update/3/$product/$release/$build_id/$platform/$locale/$channel/default/default/default/update.xml?force=1" "${patch_types// /,}" "${cfg_file}" "${line_no}" + done + done +done > "${update_xml_urls}" + +# download update.xml files and grab the mar urls from downloaded file for each patch type required +cat "${update_xml_urls}" | cut -f1-2 -d' ' | sort -u | xargs -n2 "-P${MAX_PROCS}" ../get-update-xml.sh +if [ "$?" != 0 ]; then + flush_logs + log "Error generating update requests" + exit 70 +fi + +flush_logs + +# download http header for each mar url +find "${TMPDIR}" -name 'update_xml_to_mar.*' -type f | xargs cat | cut -f1-2 -d' ' | sort -u | xargs -n2 "-P${MAX_PROCS}" ../test-mar-url.sh +if [ "$?" != 0 ]; then + flush_logs + log "Error HEADing mar urls" + exit 71 +fi + +flush_logs + +log '' +log 'Stopping stopwatch...' +STOP_TIME="$(date +%s)" + +number_of_failures="$(find "${TMPDIR}" -name 'failure.*' -type f | wc -l | sed 's/ //g')" +number_of_update_xml_urls="$(cat "${update_xml_urls}" | cut -f1 -d' ' | sort -u | wc -l | sed 's/ //g')" +number_of_mar_urls="$(find "${TMPDIR}" -name "update_xml_to_mar.*" | xargs cat | cut -f1 -d' ' | sort -u | wc -l | sed 's/ //g')" + +if [ "${number_of_failures}" -eq 0 ] +then + log + log "All tests passed successfully." + log + exit_code=0 +else + log '' + log '====================================' + [ "${number_of_failures}" -gt 1 ] && log "${number_of_failures} FAILURES" || log '1 FAILURE' + failure=0 + ls -1tr "${TMPDIR}" | grep '^failure\.' | while read failure_file + do + while read failure_code entry1 entry2 entry3 entry4 entry5 entry6 entry7 + do + log '====================================' + log '' + case "${failure_code}" in + + UPDATE_XML_UNAVAILABLE) + update_xml_url="${entry1}" + update_xml="${entry2}" + update_xml_headers="${entry3}" + update_xml_debug="${entry4}" + update_xml_curl_exit_code="${entry5}" + log "FAILURE $((++failure)): Update xml file not available" + log "" + log " Download url: ${update_xml_url}" + log " Curl returned exit code: ${update_xml_curl_exit_code}" + log "" + log " The HTTP headers were:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_headers}" + log "" + log " The full curl debug output was:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_debug}" + log "" + log " The returned update.xml file was:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml}" + log "" + log " This url was tested because of the following cfg file entries:" + show_cfg_file_entries "${update_xml_url}" + log "" + + ;; + + UPDATE_XML_REDIRECT_FAILED) + update_xml_url="${entry1}" + update_xml_actual_url="${entry2}" + update_xml="${entry3}" + update_xml_headers="${entry4}" + update_xml_debug="${entry5}" + update_xml_curl_exit_code="${entry6}" + log "FAILURE $((++failure)): Update xml file not available at *redirected* location" + log "" + log " Download url: ${update_xml_url}" + log " Redirected to: ${update_xml_actual_url}" + log " It could not be downloaded from this url." + log " Curl returned exit code: ${update_xml_curl_exit_code}" + log "" + log " The HTTP headers were:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_headers}" + log "" + log " The full curl debug output was:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_debug}" + log "" + log " The returned update.xml file was:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml}" + log "" + log " This url was tested because of the following cfg file entries:" + show_cfg_file_entries "${update_xml_url}" + log "" + ;; + + PATCH_TYPE_MISSING) + update_xml_url="${entry1}" + patch_type="${entry2}" + update_xml="${entry3}" + update_xml_headers="${entry4}" + update_xml_debug="${entry5}" + update_xml_actual_url="${entry6}" + log "FAILURE $((++failure)): Patch type '${patch_type}' not present in the downloaded update.xml file." + log "" + log " Update xml file downloaded from: ${update_xml_url}" + [ -n "${update_xml_actual_url}" ] && log " This redirected to the download url: ${update_xml_actual_url}" + log " Curl returned exit code: 0 (success)" + log "" + log " The HTTP headers were:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_headers}" + log "" + log " The full curl debug output was:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_debug}" + log "" + log " The returned update.xml file was:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml}" + log "" + log " This url and patch type combination was tested due to the following cfg file entries:" + show_cfg_file_entries "${update_xml_url}" + log "" + ;; + + NO_MAR_FILE) + mar_url="${entry1}" + mar_headers_file="${entry2}" + mar_headers_debug_file="${entry3}" + mar_file_curl_exit_code="${entry4}" + mar_actual_url="${entry5}" + log "FAILURE $((++failure)): Could not retrieve mar file" + log "" + log " Mar file url: ${mar_url}" + [ -n "${mar_actual_url}" ] && log " This redirected to: ${mar_actual_url}" + log " The mar file could not be downloaded from this location." + log " Curl returned exit code: ${mar_file_curl_exit_code}" + log "" + log " The HTTP headers were:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_file}" + log "" + log " The full curl debug output was:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_debug_file}" + log "" + log " The mar download was tested because it was referenced in the following update xml file(s):" + show_update_xml_entries "${mar_url}" + log "" + ;; + + MAR_FILE_WRONG_SIZE) + mar_url="${entry1}" + mar_required_size="${entry2}" + mar_actual_size="${entry3}" + mar_headers_file="${entry4}" + mar_headers_debug_file="${entry5}" + mar_file_curl_exit_code="${entry6}" + mar_actual_url="${entry7}" + log "FAILURE $((++failure)): Mar file is wrong size" + log "" + log " Mar file url: ${mar_url}" + [ -n "${mar_actual_url}" ] && log " This redirected to: ${mar_actual_url}" + log " The http header of the mar file url says that the mar file is ${mar_actual_size} bytes." + log " One or more of the following update.xml file(s) says that the file should be ${mar_required_size} bytes." + log "" + log " These are the update xml file(s) that referenced this mar:" + show_update_xml_entries "${mar_url}" + log "" + log " Curl returned exit code: ${mar_file_curl_exit_code}" + log "" + log " The HTTP headers were:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_file}" + log "" + log " The full curl debug output was:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_debug_file}" + log "" + ;; + + BAD_HTTP_RESPONSE_CODE_FOR_MAR) + mar_url="${entry1}" + mar_headers_file="${entry2}" + mar_headers_debug_file="${entry3}" + mar_file_curl_exit_code="${entry4}" + mar_actual_url="${entry5}" + http_response_code="$(sed -e "s/$(printf '\r')//" -n -e '/^HTTP\//p' "${mar_headers_file}" | tail -1)" + log "FAILURE $((++failure)): '${http_response_code}' for mar file" + log "" + log " Mar file url: ${mar_url}" + [ -n "${mar_actual_url}" ] && log " This redirected to: ${mar_actual_url}" + log "" + log " These are the update xml file(s) that referenced this mar:" + show_update_xml_entries "${mar_url}" + log "" + log " Curl returned exit code: ${mar_file_curl_exit_code}" + log "" + log " The HTTP headers were:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_file}" + log "" + log " The full curl debug output was:" + sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_debug_file}" + log "" + ;; + + *) + log "ERROR: Unknown failure code - '${failure_code}'" + log "ERROR: This is a serious bug in this script." + log "ERROR: Only known failure codes are: UPDATE_XML_UNAVAILABLE, UPDATE_XML_REDIRECT_FAILED, PATCH_TYPE_MISSING, NO_MAR_FILE, MAR_FILE_WRONG_SIZE, BAD_HTTP_RESPONSE_CODE_FOR_MAR" + log "" + log "FAILURE $((++failure)): Data from failure is: ${entry1} ${entry2} ${entry3} ${entry4} ${entry5} ${entry6}" + log "" + ;; + + esac + done < "${TMPDIR}/${failure_file}" + done + exit_code=1 +fi + + +log '' +log '====================================' +log 'KEY STATS' +log '====================================' +log '' +log "Config files scanned: ${#@}" +log "Update xml files downloaded and parsed: ${number_of_update_xml_urls}" +log "Unique mar urls found: ${number_of_mar_urls}" +log "Failures: ${number_of_failures}" +log "Parallel processes used (maximum limit): ${MAX_PROCS}" +log "Execution time: $((STOP_TIME-START_TIME)) seconds" +log '' + +rm -rf "${TMPDIR}" +exit ${exit_code} |