diff options
Diffstat (limited to '')
-rwxr-xr-x | src/rgw/rgw-gap-list | 456 | ||||
-rwxr-xr-x | src/rgw/rgw-gap-list-comparator | 119 |
2 files changed, 575 insertions, 0 deletions
diff --git a/src/rgw/rgw-gap-list b/src/rgw/rgw-gap-list new file mode 100755 index 000000000..5018cedd7 --- /dev/null +++ b/src/rgw/rgw-gap-list @@ -0,0 +1,456 @@ +#!/usr/bin/env bash + +# Last revision 2023-01-13 + +# NOTE: This script based based on rgw-orphan-list but doing the +# reverse calculation. + +# NOTE: The awk included in this script replaces the 'ceph-diff-sorted' +# utility but duplicates its functionality. This was done to minimize +# the number of times the massive data set must be iterated to complete +# the task. + +# IMPORTANT: Affects order produced by 'sort'. +export LC_ALL=C + +trap "exit 1" TERM +TOP_PID=$$ + +out_dir="$PWD" +timestamp=$(date -u +%Y%m%d%H%M) +lspools_err="${out_dir}/lspools-${timestamp}.error" +rados_out="${out_dir}/rados-${timestamp}.intermediate" +rados_err="${out_dir}/rados-${timestamp}.error" +rgwadmin_out="${out_dir}/radosgw-admin-${timestamp}.intermediate" +rgwadmin_err="${out_dir}/radosgw-admin-${timestamp}.error" +gap_out="${out_dir}/gap-list-${timestamp}.gap" + + +# field separator +# contains ascii 0xFE, designed to be a character that won't appear +# in normal output, can only be a single character due to use in the +# sort command +fs=$(echo -e "\xFE") + +log() { + echo $(date +%F\ %T) $(hostname -s) "$1" +} + +# +# checkReturn RETURNCODE MESSAGE TERMINATE +# RETURNCODE - ( usually $? ) of previous command +# MESSAGE - Message to print on non-zero return code +# TERMINATE - non-empty == terminate the script on non-zero return code +# +checkReturn() { + if [ $1 -ne 0 ]; then + error_addon="" + if [ ! -z "$3" ]; then + error_addon="; Terminating" + fi + log "ERROR: ${2} failed: returned ${1}${error_addon}" + if [ ! -z "$3" ]; then + >&2 echo + >&2 echo '***' + >&2 echo '*** WARNING: The results are incomplete. Do not use! ***' + >&2 echo '***' + kill -s TERM $TOP_PID + fi + fi +} + +prompt_pool() { + # note: all prompts go to stderr so stdout contains just the result + rados lspools >"$temp_file" 2>"$lspools_err" + checkReturn $? "Listing pools" 1 + + >&2 echo "" + >&2 echo "Available pools:" + >&2 sed 's/^/ /' "$temp_file" # list pools and indent + >&2 echo "" + >&2 echo "Which Rados Gateway Data pool do you want to search for gaps? " + >&2 echo "" + >&2 echo "NOTE: If your installation has multiple bucket data pools using " + >&2 echo " bucket placement policies, please enter a space separated " + >&2 echo " list of bucket data pools to enumerate." + >&2 echo "" + local mypool + read mypool + echo $mypool +} + +radosgw_radoslist() { + log "Running 'radosgw-admin bucket radoslist'." + rm -f "$rgwadmin_flag" &> /dev/null + radosgw-admin bucket radoslist --rgw-obj-fs="$fs" >"$rgwadmin_out" 2>"$rgwadmin_err" + RETVAL=$? + if [ "$RETVAL" -ne 0 ] ;then + touch "$rgwadmin_flag" + fi + checkReturn $RETVAL "radosgw-admin radoslist" 1 + log "Completed 'radosgw-admin bucket radoslist'." + + log "Sorting 'radosgw-admin bucket radoslist' output." + sort -T ${temp_prefix} --field-separator="$fs" -k1,1 -u "$rgwadmin_out" > "$rgwadmin_temp" + checkReturn $? "Sorting 'radosgw-admin bucket radoslist' output" 1 + log "Completed sorting 'radosgw-admin bucket radoslist'." + + log "Moving 'radosgw-admin bucket radoslist' output." + mv -f "$rgwadmin_temp" "$rgwadmin_out" + checkReturn $? "Moving 'radosgw-admin bucket radoslist' output" 1 + log "Completed moving 'radosgw-admin bucket radoslist' output." +} + +rados_ls() { + log "Starting 'rados ls' function." + rm -f "$rados_flag" &> /dev/null + rm -f "$rados_out" &> /dev/null + local mypool + for mypool in $pool; do + log "Running 'rados ls' on pool ${mypool}." + rados ls --pool="$mypool" >>"$rados_out" 2>"$rados_err" + RETVAL=$? + if [ "$RETVAL" -ne 0 ] ;then + touch "$rados_flag" + fi + checkReturn $RETVAL "'rados ls' on pool ${mypool}" 1 + log "Completed 'rados ls' on pool ${mypool}." + done + if [ ! -e "$rados_flag" ]; then + log "Sorting 'rados ls' output(s)." + sort -T ${temp_prefix} -u "$rados_out" >"$rados_temp" + checkReturn $? "Sorting 'rados ls' output(s)" 1 + + log "Moving sorted output(s)." + mv -f "$rados_temp" "$rados_out" + checkReturn $? "Moving temp file to output file" 1 + log "Sorting 'rados ls' output(s) complete." + fi +} + +usage() { + >&2 cat << EOF + +WARNING WARNING WARNING WARNING WARNING WARNING WARNING +WARNING: +WARNING: Command option format has changed. Please check closely. +WARNING: +WARNING WARNING WARNING WARNING WARNING WARNING WARNING + +Usage: $0 [-m] [-p <pool>] [-t <temp_dir>] + +Where: + -m Optionally, run the two listings in multiple threads. + --See NOTE below-- + + -p <pool> The RGW bucket data pool name, if omitted, pool name + will be prompted for during execution. + Multiple pools can be supplied as a space separated + double quoted list. + + -t <temp_dir> Optionally, set the directory to use for temp space. + This may be required if /tmp is low on space. + +NOTE: This tool is currently considered to be EXPERIMENTAL. + +NOTE: False positives are possible. False positives would likely + appear as objects that were never deleted and are fully + intact. All results should therefore be verified. + +NOTE: Multithread listing may increase performance but may also increase + the risk of false positives when the cluster is undergoing + modifications during the listing processes. In addition to the + above, false positives might also include objects that were + intentionally deleted. + +EOF + exit 1 +} + +multithread=0 +error=0 +temp_prefix="/tmp" +while getopts ":mp:t:" o; do + case "${o}" in + m) + multithread=1 + ;; + p) + pool=${OPTARG} + ;; + t) + if [ -d ${OPTARG} ]; then + temp_prefix=${OPTARG} + else + echo + echo "ERROR: Temporary directory does not exist: ${OPTARG}" + error=1 + fi + ;; + *) + echo + echo "ERROR: Unrecognized argument: ${o}" + error=1 + ;; + esac +done +shift $((OPTIND-1)) + +temp_file=${temp_prefix}/gap-tmp.$$ +rados_temp=${temp_prefix}/rados-tmp.$$ +rgwadmin_temp=${temp_prefix}/radosgw-admin-tmp.$$ +rados_flag=${temp_prefix}/rados-flag.$$ +rgwadmin_flag=${temp_prefix}/radosgw-admin-flag.$$ +incremental_grep_awk="${temp_prefix}/ig-${$}.awk" + +if [ $error -gt 0 ]; then + usage +fi + +if [ -z "$pool" ]; then + pool="$(prompt_pool)" +fi + +error=0 +rados ${CEPH_ARGS} lspools > ${temp_file} +checkReturn $? "rados lspools" 1 +for mypool in $pool; do + if [ $(grep -c "^${mypool}$" "${temp_file}") -eq 0 ]; then + echo + echo "ERROR: Supplied pool does not exist: ${mypool}" + error=1 + fi +done + +if [ $error -gt 0 ]; then + exit 1 +fi + +log "Pool is \"$pool\"." +log "Note: output files produced will be tagged with the current timestamp -- ${timestamp}." + +if [ $multithread -eq 1 ] ;then + startsecs=$(date +%s) + log "Starting multithread tasks..." + rados_ls & + radosgw_radoslist & + jobs &> /dev/null # without this, the myjobs count always equals 1 (confused) + myjobs=$(jobs | wc -l) + while [ $myjobs -gt 0 ]; do + # provide minutely status update + if [ $(( ($(date +%s)-$startsecs) % 60 )) -eq 0 ]; then + echo + deltasecs=$(( $(date +%s)-$startsecs )) + log "Waiting for listing tasks to complete. Running ${myjobs} tasks for ${deltasecs} seconds." + fi + sleep 1 + echo -n . + if [ -e "$rgw_admin_flag" ]; then + exit 1 + fi + if [ -e "$rados_flag" ]; then + exit 2 + fi + jobs &> /dev/null # without this, the myjobs count always equals 1 (confused) + myjobs=$(jobs | wc -l) + done + echo +else + rados_ls + radosgw_radoslist +fi + +if [ -e "$rgw_admin_flag" ]; then + exit 1 +fi + +if [ -e "$rados_flag" ]; then + exit 2 +fi + +for myfile in $rados_out $rgwadmin_out; do + if [ ! -s "${myfile}" ]; then + log "ERROR: Empty file detected: ${myfile}" + log "ERROR: RESULTS ARE INCOMPLETE - DO NOT USE" + exit 1 + fi +done + +# Create an awk script in a file for parsing the two command outoputs. +log "Creating awk script for comparing outputs: ${incremental_grep_awk}" + +cat <<"EOF" >$incremental_grep_awk +# This awk script is used by rgw-gap-list and will sequence through +# each line in $rados_out and $rgwadmin_out exactly once. +# +# During this iteration: +# * The 1st column of $rgwadmin_out is compared to the line of +# $rados_out. +# * If they are equal, the next line of $rados_out is read in and the +# next line of $rgwadmin_out is provided via normal awk iteration. +# * If a value appears in $rgwadmin_out, but not $rados_out, this +# indicates a possible deleted tail object and the accompanying +# bucket / user object name is output, assuming it had not been +# previously identified. +# - A map of outputed bucket / user object is maintained in memory +# * If a value appears in $rados_out, but not in $rgwadmin_out, the +# $rados_out file is iterated until the $rados_out line is equal +# or > (alphabetically) the value from the $rgwadmin_out file. + +function usage() { + print "Example Usage:">>"/dev/stderr" + print " # limit $fs to single char that will not appear in either output">>"/dev/stderr" + print " # The below is Octal 376, or Hex 0xFE">>"/dev/stderr" + print "">>"/dev/stderr" + print " $ fs=$(echo -e \"\\0376\") ">>"/dev/stderr" + print " $ rados ls -p default.rgw.buckets.data > rados_out.txt">>"/dev/stderr" + print " $ radosgw-admin bucket radoslist --rgw-obj-fs=\"$fs\" \\">>"/dev/stderr" + print " | sort --field-separator=\"$fs\" -k 1,1 > rgwadmin_out.txt">>"/dev/stderr" + print " ">>"/dev/stderr" + print " $ awk -F \"$fs\" \\">>"/dev/stderr" + print " -v filetwo=rados_out.txt \\">>"/dev/stderr" + print " -v map_out=MappedOutput.txt \\">>"/dev/stderr" + print " -f ig_awk \\">>"/dev/stderr" + print " rgwadmin_out.txt">>"/dev/stderr" + print "">>"/dev/stderr" + print " Result will be provided in the 'MappedOutput.txt' file in this">>"/dev/stderr" + print " example. If you'd prefer the output to be sorted, you can run">>"/dev/stderr" + print " $ sort MappedOutput.txt > SortedMappedOutput.txt">>"/dev/stderr" + print "">>"/dev/stderr" + print "">>"/dev/stderr" + exit 1 +} + +function get_date_time() { + dtstr="date +%F\\ %T" + dtstr | getline mydt + close(dtstr) + return mydt +} + +function status_out() { + printf("%s % 17d\t% 17d\t% 12d\n",get_date_time(),f1_count,f2_count,lineoutCount)>>"/dev/stderr" +} + +function advance_f2() { + if ((getline f2line<filetwo) <= 0) { + f2_eof=1 + } else { + f2_count++ + bcount=split(f2line,b,FS) + } +} + +function test_lines() { + if ($1==b[1]) { + advance_f2() + return 0 + } else if ($1<b[1]) { + line_out() + return 1 + } else { + return 2 + } +} + +function findnul(myfield) { + for(i=1;i<=split(myfield,a,"");i++) { + if(ord[a[i]]==0) { + return 1 + } + } + return 0 +} + +function line_out() { + if(findnul($1)) { + # If the RADOS object name has a NUL character, skip output + return + } + # Note: Intentionally using $2 and $NF below + # Use of $NF eliminates risk of exhausting input field count + if ($2" "$NF!=lastline) { + # Only output a given bucket/Obj combination once + printf("Bucket: \"%s\" Object: \"%s\"\n", $2, $NF)>>map_out + lastline=$2" "$NF + lineoutCount++ + } +} + +BEGIN { + if(filetwo==""||map_out=="") { + print "">>"/dev/stderr" + print "">>"/dev/stderr" + print "Missing parameter." + print "">>"/dev/stderr" + print "">>"/dev/stderr" + usage() + } + status_delta=100000 + f1_count=0 + f2_count=0 + advance_f2() + printf("%s File 1 Line Count\tFile 2 Line Count\tPotentially Impacted Objects\n",get_date_time())>>"/dev/stderr" + for(n=0;n<256;n++) { + ord[sprintf("%c",n)]=n + } +} + +{ + f1_count++ + if(f2_eof==0) { + if(test_lines()==2) { + while ($1>b[1]) { + advance_f2() + } + test_lines() + } + } else { + # If EOF hit, dump all remaining lines since they're missing + # from filetwo + line_out() + } + if((f1_count % status_delta)==0) { + status_out() + } +} + +END { + if(f1_count>0) { + status_out() + } +} + +EOF + + +log "Begin identifying potentially impacted user object names." + +echo -n > "$temp_file" # Ensure the file is empty +awk -F "$fs" -v filetwo=$rados_out -v map_out=$temp_file -f $incremental_grep_awk $rgwadmin_out +checkReturn $? "Identifying potentially impacted user object names" 1 + +log "Begin sorting results." +sort -T ${temp_prefix} "$temp_file" > "$gap_out" +checkReturn $? "sorting results" 1 +rm -f "$temp_file" + +found=$(wc -l < "$gap_out") +mydate=$(date +%F\ %T) + +log "Done." + +cat << EOF + +Found $found *possible* gaps. +The results can be found in "${gap_out}". + +Intermediate files: "${rados_out}" and "${rgwadmin_out}". + +*** +*** WARNING: This is EXPERIMENTAL code and the results should be used +*** with CAUTION and VERIFIED. Not everything listed is an +*** actual gap. EXPECT false positives. Every result +*** produced should be verified. +*** +EOF diff --git a/src/rgw/rgw-gap-list-comparator b/src/rgw/rgw-gap-list-comparator new file mode 100755 index 000000000..c377fdaf8 --- /dev/null +++ b/src/rgw/rgw-gap-list-comparator @@ -0,0 +1,119 @@ +#!/usr/bin/awk -f + +# +# Version 1 +# +# This awk script takes two, similarly sorted lists and outputs +# only the lines which exist in both lists. The script takes +# three inputs: +# +# ./rgw-gap-list-comparator \ +# -v filetwo=gap-list-B.txt \ +# -v matchout=matched_lines.txt \ +# gap-list-A.txt +# + +function usage() { + print "">>"/dev/stderr" + print "">>"/dev/stderr" + print "The idea behind the script is to eliminate false positive hits">>"/dev/stderr" + print "from the rgw-gap-list tool which are due to upload timing of new">>"/dev/stderr" + print "objects during the tool's execution. To use the tool properly,">>"/dev/stderr" + print "the following process should be followed:">>"/dev/stderr" + print "">>"/dev/stderr" + print "">>"/dev/stderr" + print " 1: Run the 'rgw-gap-list' tool twice">>"/dev/stderr" + print "">>"/dev/stderr" + print " 2: Sort the resulting map files:">>"/dev/stderr" + print " $ export LC_ALL=C">>"/dev/stderr" + print " $ sort gap-list-A.gap > gap-list-A.sorted.gap">>"/dev/stderr" + print " $ sort gap-list-B.gap > gap-list.B.sorted.gap">>"/dev/stderr" + print " -- Where the A / B in the gap-list file names are the date/time associated with each of the respective 'rgw-gap-list' outputs">>"/dev/stderr" + print "">>"/dev/stderr" + print " 3: Run the 'same_lines_only.awk' script over the two files:">>"/dev/stderr" + print " $ rm matched_lines.txt">>"/dev/stderr" + print " $ ./rgw-gap-list-comparator -v filetwo=gap-list-B.sorted.gap -v matchout=matched_lines.txt gap-list-A.sorted.gap">>"/dev/stderr" + print " -- Where the A / B in the gap-list file names are the date/time associated with each of the respective 'rgw-gap-list' outputs">>"/dev/stderr" + print "">>"/dev/stderr" + print " The resulting 'matched_lines.txt' will be a high confidence list of impacted objects with little to no false positives.">>"/dev/stderr" + print "">>"/dev/stderr" + print "">>"/dev/stderr" + exit 1 +} + +function advance_f2() { + if ((getline f2line<filetwo) <= 0) { + f2_eof=1 + } else { + f2_count++ + } +} + +function test_lines() { + if($0==f2line) { + print $0>>matchout + lineoutcount++ + advance_f2() + return 0 + } else if ($0>f2line) { + return 2 + } else { + return 1 + } +} + +function status_out() { + printf("%s % 17d\t% 17d\t% 12d\n",get_date_time(),f1_count,f2_count,lineoutcount)>>"/dev/stderr" +} + +function get_date_time() { + dtstr="date +%F\\ %T" + dtstr | getline mydt + close(dtstr) + return mydt +} + +BEGIN { + if(filetwo==""||matchout=="") { + print "">>"/dev/stderr" + print "">>"/dev/stderr" + print "Missing parameter." + print "">>"/dev/stderr" + print "">>"/dev/stderr" + usage() + } + + f1_count=0 + f2_count=0 + lineoutcount=0 + f2_eof=0 + statusevery=100000 + advance_f2() + printf("%s File 1 Line Count\tFile 2 Line Count\tPotentially Impacted Objects\n",get_date_time())>>"/dev/stderr" + status_out() +} + + +{ + f1_count++ + if(f2_eof==0) { + if(test_lines()==2) { + while($0>f2line && f2_eof==0) { + advance_f2() + } + test_lines() + } + } else { + exit 0 + } + if ((f1_count % statusevery)==0) { + status_out() + } +} + +END { + if(f1_count>0) { + status_out() + } +} + |