#!/bin/bash function usage { log "In the updates subdirectory of the directory this script is in," log "there are a bunch of config files. You should call this script," log "passing the names of one or more of those files as parameters" log "to this script." log "" log "This will validate that the update.xml files all exist for the" log "given config file, and that they report the correct file sizes" log "for the associated mar files, and that the associated mar files" log "are available on the update servers." log "" log "This script will spawn multiple curl processes to query the" log "snippets (update.xml file downloads) and the download urls in" log "parallel. The number of parallel curl processes can be managed" log "with the -p MAX_PROCS option." log "" log "Only the first three bytes of the mar files are downloaded" log "using curl -r 0-2 option to save time. GET requests are issued" log "rather than HEAD requests, since Akamai (one of our CDN" log "partners) caches GET and HEAD requests separately - therefore" log "they can be out-of-sync, and it is important that we validate" log "that the GET requests return the expected results." log "" log "Please note this script can run on linux and OS X. It has not" log "been tested on Windows, but may also work. It can be run" log "locally, and does not require access to the mozilla vpn or" log "any other special network, since the update servers are" log "available over the internet. However, it does require an" log "up-to-date checkout of the tools repository, as the updates/" log "subfolder changes over time, and reflects the currently" log "available updates. It makes no changes to the update servers" log "so there is no harm in running it. It simply generates a" log "report. However, please try to avoid hammering the update" log "servers aggressively, e.g. with thousands of parallel" log "processes. For example, feel free to run the examples below," log "first making sure that your source code checkout is up-to-" log "date on your own machine, to get the latest configs in the" log "updates/ subdirectory." log "" log "Usage:" log " $(basename "${0}") [-p MAX_PROCS] config1 [config2 config3 config4 ...]" log " $(basename "${0}") -h" log "" log "Examples:" log " 1. $(basename "${0}") -p 128 mozBeta-thunderbird-linux.cfg mozBeta-thunderbird-linux64.cfg" log " 2. $(basename "${0}") mozBeta-thunderbird-linux64.cfg" } function log { echo "$(date): ${1}" } # subprocesses don't log in real time, due to synchronisation # issues which can cause log entries to overwrite each other. # therefore this function outputs log entries written to # temporary files on disk, and then deletes them. function flush_logs { ls -1rt "${TMPDIR}" | grep '^log\.' | while read LOG do cat "${TMPDIR}/${LOG}" rm "${TMPDIR}/${LOG}" done } # this function takes an update.xml url as an argument # and then logs a list of config files and their line # numbers, that led to this update.xml url being tested function show_cfg_file_entries { local update_xml_url="${1}" cat "${update_xml_urls}" | cut -f1 -d' ' | grep -Fn "${update_xml_url}" | sed 's/:.*//' | while read match_line_no do cfg_file="$(sed -n -e "${match_line_no}p" "${update_xml_urls}" | cut -f3 -d' ')" cfg_line_no="$(sed -n -e "${match_line_no}p" "${update_xml_urls}" | cut -f4 -d' ')" log " ${cfg_file} line ${cfg_line_no}: $(sed -n -e "${cfg_line_no}p" "${cfg_file}")" done } # this function takes a mar url as an argument and then # logs information about which update.xml urls referenced # this mar url, and which config files referenced those # mar urls - so you have a full understanding of why this # mar url was ever tested function show_update_xml_entries { local mar_url="${1}" grep -Frl "${mar_url}" "${TMPDIR}" | grep '/update_xml_to_mar\.' | while read update_xml_to_mar do mar_size="$(cat "${update_xml_to_mar}" | cut -f2 -d' ')" update_xml_url="$(cat "${update_xml_to_mar}" | cut -f3 -d' ')" patch_type="$(cat "${update_xml_to_mar}" | cut -f4 -d' ')" update_xml_actual_url="$(cat "${update_xml_to_mar}" | cut -f5 -d' ')" log " ${update_xml_url}" [ -n "${update_xml_actual_url}" ] && log " which redirected to: ${update_xml_actual_url}" log " This contained an entry for:" log " patch type: ${patch_type}" log " mar size: ${mar_size}" log " mar url: ${mar_url}" log " The update.xml url above was retrieved because of the following cfg file entries:" show_cfg_file_entries "${update_xml_url}" | sed 's/ / /' done } echo -n "$(date): Command called:" for ((INDEX=0; INDEX<=$#; INDEX+=1)) do echo -n " '${!INDEX}'" done echo '' log "From directory: '$(pwd)'" log '' log "Parsing arguments..." # Max procs lowered in bug 894368 to try to avoid spurious failures MAX_PROCS=48 BAD_ARG=0 BAD_FILE=0 while getopts p:h OPT do case "${OPT}" in p) MAX_PROCS="${OPTARG}";; h) usage exit;; *) BAD_ARG=1;; esac done shift "$((OPTIND - 1))" # invalid option specified [ "${BAD_ARG}" == 1 ] && exit 66 log "Checking one or more config files have been specified..." if [ $# -lt 1 ] then usage log "ERROR: You must specify one or more config files" exit 64 fi log "Checking whether MAX_PROCS is a number..." if ! let x=MAX_PROCS 2>/dev/null then usage log "ERROR: MAX_PROCS must be a number (-p option); you specified '${MAX_PROCS}' - this is not a number." exit 65 fi # config files are in updates subdirectory below this script if ! cd "$(dirname "${0}")/updates" 2>/dev/null then log "ERROR: Cannot cd into '$(dirname "${0}")/updates' from '$(pwd)'" exit 68 fi log "Checking specified config files (and downloading them if necessary):" log '' configs=() for file in "${@}" do if [[ ${file} == http* ]] then log " Downloading config file '${file}'" cfg=$(mktemp) curl -fL --retry 5 --compressed "${file}" > "$cfg" if [ "$?" != 0 ]; then log "Error downloading config file '${file}'" BAD_FILE=1 else log " * '${file}' ok, downloaded to '${cfg}'" configs+=($cfg) fi elif [ -f "${file}" ] then log " * '${file}' ok" configs+=(${file}) else log " * '${file}' missing" BAD_FILE=1 fi done log '' # invalid config specified if [ "${BAD_FILE}" == 1 ] then log "ERROR: Unable to download config file(s) or config files are missing from repo - see above" exit 67 fi log "All checks completed successfully." log '' log "Starting stopwatch..." log '' log "Please be aware output will now be buffered up, and only displayed after completion." log "Therefore do not be alarmed if you see no output for several minutes." log "See https://bugzilla.mozilla.org/show_bug.cgi?id=863602#c5 for details". log '' START_TIME="$(date +%s)" # Create a temporary directory for all temp files, that can easily be # deleted afterwards. See https://bugzilla.mozilla.org/show_bug.cgi?id=863602 # to understand why we write everything in distinct temporary files rather # than writing to standard error/standard out or files shared across # processes. # Need to unset TMPDIR first since it affects mktemp behaviour on next line unset TMPDIR export TMPDIR="$(mktemp -d -t final_verification.XXXXXXXXXX)" # this temporary file will list all update urls that need to be checked, in this format: # # e.g. # https://aus4.mozilla.org/update/3/Firefox/18.0/20130104154748/Linux_x86_64-gcc3/zh-TW/releasetest/default/default/default/update.xml?force=1 complete moz20-firefox-linux64-major.cfg 3 # https://aus4.mozilla.org/update/3/Firefox/18.0/20130104154748/Linux_x86_64-gcc3/zu/releasetest/default/default/default/update.xml?force=1 complete moz20-firefox-linux64.cfg 7 # https://aus4.mozilla.org/update/3/Firefox/19.0/20130215130331/Linux_x86_64-gcc3/ach/releasetest/default/default/default/update.xml?force=1 complete,partial moz20-firefox-linux64-major.cfg 11 # https://aus4.mozilla.org/update/3/Firefox/19.0/20130215130331/Linux_x86_64-gcc3/af/releasetest/default/default/default/update.xml?force=1 complete,partial moz20-firefox-linux64.cfg 17 update_xml_urls="$(mktemp -t update_xml_urls.XXXXXXXXXX)" #################################################################################### # And now a summary of all temp files that will get generated during this process... # # 1) mktemp -t failure.XXXXXXXXXX # # Each failure will generate a one line temp file, which is a space separated # output of the error code, and the instance data for the failure. # e.g. # # PATCH_TYPE_MISSING https://aus4.mozilla.org/update/3/Firefox/4.0b12/20110222205441/Linux_x86-gcc3/dummy-locale/releasetest/update.xml?force=1 complete https://aus4.mozilla.org/update/3/Firefox/4.0b12/20110222205441/Linux_x86-gcc3/dummy-locale/releasetest/default/default/default/update.xml?force=1 # # 2) mktemp -t update_xml_to_mar.XXXXXXXXXX # # For each mar url referenced in an update.xml file, a temp file will be created to store the # association between update.xml url and mar url. This is later used (e.g. in function # show_update_xml_entries) to trace back the update.xml url(s) that led to a mar url being # tested. It is also used to keep a full list of mar urls to test. # e.g. # # # # 3) mktemp -t log.XXXXXXXXXX # # For each log message logged by a subprocesses, we will create a temp log file with the # contents of the log message, since we cannot safely output the log message from the subprocess # and guarantee that it will be correctly output. By buffering log output in individual log files # we guarantee that log messages will not interfere with each other. We then flush them when all # forked subprocesses have completed. # # 4) mktemp -t mar_headers.XXXXXXXXXX # # We keep a copy of the mar url http headers retrieved in one file per mar url. # # 5) mktemp -t update.xml.headers.XXXXXXXXXX # # We keep a copy of the update.xml http headers retrieved in one file per update.xml url. # # 6) mktemp -t update.xml.XXXXXXXXXX # # We keep a copy of each update.xml file retrieved in individual files. #################################################################################### # generate full list of update.xml urls, followed by patch types, # as defined in the specified config files - and write into "${update_xml_urls}" file aus_server="https://aus5.mozilla.org" for cfg_file in "${configs[@]}" do line_no=0 sed -e 's/localtest/cdntest/' "${cfg_file}" | while read config_line do let line_no++ # to avoid contamination between iterations, reset variables # each loop in case they are not declared # aus_server is not "cleared" each iteration - to be consistent with previous behaviour of old # final-verification.sh script - might be worth reviewing if we really want this behaviour release="" product="" platform="" build_id="" locales="" channel="" from="" patch_types="complete" eval "${config_line}" for locale in ${locales} do echo "${aus_server}/update/3/$product/$release/$build_id/$platform/$locale/$channel/default/default/default/update.xml?force=1" "${patch_types// /,}" "${cfg_file}" "${line_no}" done done done > "${update_xml_urls}" # download update.xml files and grab the mar urls from downloaded file for each patch type required cat "${update_xml_urls}" | cut -f1-2 -d' ' | sort -u | xargs -n2 "-P${MAX_PROCS}" ../get-update-xml.sh if [ "$?" != 0 ]; then flush_logs log "Error generating update requests" exit 70 fi flush_logs # download http header for each mar url find "${TMPDIR}" -name 'update_xml_to_mar.*' -type f | xargs cat | cut -f1-2 -d' ' | sort -u | xargs -n2 "-P${MAX_PROCS}" ../test-mar-url.sh if [ "$?" != 0 ]; then flush_logs log "Error HEADing mar urls" exit 71 fi flush_logs log '' log 'Stopping stopwatch...' STOP_TIME="$(date +%s)" number_of_failures="$(find "${TMPDIR}" -name 'failure.*' -type f | wc -l | sed 's/ //g')" number_of_update_xml_urls="$(cat "${update_xml_urls}" | cut -f1 -d' ' | sort -u | wc -l | sed 's/ //g')" number_of_mar_urls="$(find "${TMPDIR}" -name "update_xml_to_mar.*" | xargs cat | cut -f1 -d' ' | sort -u | wc -l | sed 's/ //g')" if [ "${number_of_failures}" -eq 0 ] then log log "All tests passed successfully." log exit_code=0 else log '' log '====================================' [ "${number_of_failures}" -gt 1 ] && log "${number_of_failures} FAILURES" || log '1 FAILURE' failure=0 ls -1tr "${TMPDIR}" | grep '^failure\.' | while read failure_file do while read failure_code entry1 entry2 entry3 entry4 entry5 entry6 entry7 do log '====================================' log '' case "${failure_code}" in UPDATE_XML_UNAVAILABLE) update_xml_url="${entry1}" update_xml="${entry2}" update_xml_headers="${entry3}" update_xml_debug="${entry4}" update_xml_curl_exit_code="${entry5}" log "FAILURE $((++failure)): Update xml file not available" log "" log " Download url: ${update_xml_url}" log " Curl returned exit code: ${update_xml_curl_exit_code}" log "" log " The HTTP headers were:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_headers}" log "" log " The full curl debug output was:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_debug}" log "" log " The returned update.xml file was:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml}" log "" log " This url was tested because of the following cfg file entries:" show_cfg_file_entries "${update_xml_url}" log "" ;; UPDATE_XML_REDIRECT_FAILED) update_xml_url="${entry1}" update_xml_actual_url="${entry2}" update_xml="${entry3}" update_xml_headers="${entry4}" update_xml_debug="${entry5}" update_xml_curl_exit_code="${entry6}" log "FAILURE $((++failure)): Update xml file not available at *redirected* location" log "" log " Download url: ${update_xml_url}" log " Redirected to: ${update_xml_actual_url}" log " It could not be downloaded from this url." log " Curl returned exit code: ${update_xml_curl_exit_code}" log "" log " The HTTP headers were:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_headers}" log "" log " The full curl debug output was:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_debug}" log "" log " The returned update.xml file was:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml}" log "" log " This url was tested because of the following cfg file entries:" show_cfg_file_entries "${update_xml_url}" log "" ;; PATCH_TYPE_MISSING) update_xml_url="${entry1}" patch_type="${entry2}" update_xml="${entry3}" update_xml_headers="${entry4}" update_xml_debug="${entry5}" update_xml_actual_url="${entry6}" log "FAILURE $((++failure)): Patch type '${patch_type}' not present in the downloaded update.xml file." log "" log " Update xml file downloaded from: ${update_xml_url}" [ -n "${update_xml_actual_url}" ] && log " This redirected to the download url: ${update_xml_actual_url}" log " Curl returned exit code: 0 (success)" log "" log " The HTTP headers were:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_headers}" log "" log " The full curl debug output was:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml_debug}" log "" log " The returned update.xml file was:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${update_xml}" log "" log " This url and patch type combination was tested due to the following cfg file entries:" show_cfg_file_entries "${update_xml_url}" log "" ;; NO_MAR_FILE) mar_url="${entry1}" mar_headers_file="${entry2}" mar_headers_debug_file="${entry3}" mar_file_curl_exit_code="${entry4}" mar_actual_url="${entry5}" log "FAILURE $((++failure)): Could not retrieve mar file" log "" log " Mar file url: ${mar_url}" [ -n "${mar_actual_url}" ] && log " This redirected to: ${mar_actual_url}" log " The mar file could not be downloaded from this location." log " Curl returned exit code: ${mar_file_curl_exit_code}" log "" log " The HTTP headers were:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_file}" log "" log " The full curl debug output was:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_debug_file}" log "" log " The mar download was tested because it was referenced in the following update xml file(s):" show_update_xml_entries "${mar_url}" log "" ;; MAR_FILE_WRONG_SIZE) mar_url="${entry1}" mar_required_size="${entry2}" mar_actual_size="${entry3}" mar_headers_file="${entry4}" mar_headers_debug_file="${entry5}" mar_file_curl_exit_code="${entry6}" mar_actual_url="${entry7}" log "FAILURE $((++failure)): Mar file is wrong size" log "" log " Mar file url: ${mar_url}" [ -n "${mar_actual_url}" ] && log " This redirected to: ${mar_actual_url}" log " The http header of the mar file url says that the mar file is ${mar_actual_size} bytes." log " One or more of the following update.xml file(s) says that the file should be ${mar_required_size} bytes." log "" log " These are the update xml file(s) that referenced this mar:" show_update_xml_entries "${mar_url}" log "" log " Curl returned exit code: ${mar_file_curl_exit_code}" log "" log " The HTTP headers were:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_file}" log "" log " The full curl debug output was:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_debug_file}" log "" ;; BAD_HTTP_RESPONSE_CODE_FOR_MAR) mar_url="${entry1}" mar_headers_file="${entry2}" mar_headers_debug_file="${entry3}" mar_file_curl_exit_code="${entry4}" mar_actual_url="${entry5}" http_response_code="$(sed -e "s/$(printf '\r')//" -n -e '/^HTTP\//p' "${mar_headers_file}" | tail -1)" log "FAILURE $((++failure)): '${http_response_code}' for mar file" log "" log " Mar file url: ${mar_url}" [ -n "${mar_actual_url}" ] && log " This redirected to: ${mar_actual_url}" log "" log " These are the update xml file(s) that referenced this mar:" show_update_xml_entries "${mar_url}" log "" log " Curl returned exit code: ${mar_file_curl_exit_code}" log "" log " The HTTP headers were:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_file}" log "" log " The full curl debug output was:" sed -e "s/$(printf '\r')//" -e "s/^/$(date): /" -e '$a\' "${mar_headers_debug_file}" log "" ;; *) log "ERROR: Unknown failure code - '${failure_code}'" log "ERROR: This is a serious bug in this script." log "ERROR: Only known failure codes are: UPDATE_XML_UNAVAILABLE, UPDATE_XML_REDIRECT_FAILED, PATCH_TYPE_MISSING, NO_MAR_FILE, MAR_FILE_WRONG_SIZE, BAD_HTTP_RESPONSE_CODE_FOR_MAR" log "" log "FAILURE $((++failure)): Data from failure is: ${entry1} ${entry2} ${entry3} ${entry4} ${entry5} ${entry6}" log "" ;; esac done < "${TMPDIR}/${failure_file}" done exit_code=1 fi log '' log '====================================' log 'KEY STATS' log '====================================' log '' log "Config files scanned: ${#@}" log "Update xml files downloaded and parsed: ${number_of_update_xml_urls}" log "Unique mar urls found: ${number_of_mar_urls}" log "Failures: ${number_of_failures}" log "Parallel processes used (maximum limit): ${MAX_PROCS}" log "Execution time: $((STOP_TIME-START_TIME)) seconds" log '' rm -rf "${TMPDIR}" exit ${exit_code}