#!/usr/bin/env bash # Last revision 2023-01-13 # NOTE: This script based based on rgw-orphan-list but doing the # reverse calculation. # NOTE: The awk included in this script replaces the 'ceph-diff-sorted' # utility but duplicates its functionality. This was done to minimize # the number of times the massive data set must be iterated to complete # the task. # IMPORTANT: Affects order produced by 'sort'. export LC_ALL=C trap "exit 1" TERM TOP_PID=$$ out_dir="$PWD" timestamp=$(date -u +%Y%m%d%H%M) lspools_err="${out_dir}/lspools-${timestamp}.error" rados_out="${out_dir}/rados-${timestamp}.intermediate" rados_err="${out_dir}/rados-${timestamp}.error" rgwadmin_out="${out_dir}/radosgw-admin-${timestamp}.intermediate" rgwadmin_err="${out_dir}/radosgw-admin-${timestamp}.error" gap_out="${out_dir}/gap-list-${timestamp}.gap" # field separator # contains ascii 0xFE, designed to be a character that won't appear # in normal output, can only be a single character due to use in the # sort command fs=$(echo -e "\xFE") log() { echo $(date +%F\ %T) $(hostname -s) "$1" } # # checkReturn RETURNCODE MESSAGE TERMINATE # RETURNCODE - ( usually $? ) of previous command # MESSAGE - Message to print on non-zero return code # TERMINATE - non-empty == terminate the script on non-zero return code # checkReturn() { if [ $1 -ne 0 ]; then error_addon="" if [ ! -z "$3" ]; then error_addon="; Terminating" fi log "ERROR: ${2} failed: returned ${1}${error_addon}" if [ ! -z "$3" ]; then >&2 echo >&2 echo '***' >&2 echo '*** WARNING: The results are incomplete. Do not use! ***' >&2 echo '***' kill -s TERM $TOP_PID fi fi } prompt_pool() { # note: all prompts go to stderr so stdout contains just the result rados lspools >"$temp_file" 2>"$lspools_err" checkReturn $? "Listing pools" 1 >&2 echo "" >&2 echo "Available pools:" >&2 sed 's/^/ /' "$temp_file" # list pools and indent >&2 echo "" >&2 echo "Which Rados Gateway Data pool do you want to search for gaps? " >&2 echo "" >&2 echo "NOTE: If your installation has multiple bucket data pools using " >&2 echo " bucket placement policies, please enter a space separated " >&2 echo " list of bucket data pools to enumerate." >&2 echo "" local mypool read mypool echo $mypool } radosgw_radoslist() { log "Running 'radosgw-admin bucket radoslist'." rm -f "$rgwadmin_flag" &> /dev/null radosgw-admin bucket radoslist --rgw-obj-fs="$fs" >"$rgwadmin_out" 2>"$rgwadmin_err" RETVAL=$? if [ "$RETVAL" -ne 0 ] ;then touch "$rgwadmin_flag" fi checkReturn $RETVAL "radosgw-admin radoslist" 1 log "Completed 'radosgw-admin bucket radoslist'." log "Sorting 'radosgw-admin bucket radoslist' output." sort -T ${temp_prefix} --field-separator="$fs" -k1,1 -u "$rgwadmin_out" > "$rgwadmin_temp" checkReturn $? "Sorting 'radosgw-admin bucket radoslist' output" 1 log "Completed sorting 'radosgw-admin bucket radoslist'." log "Moving 'radosgw-admin bucket radoslist' output." mv -f "$rgwadmin_temp" "$rgwadmin_out" checkReturn $? "Moving 'radosgw-admin bucket radoslist' output" 1 log "Completed moving 'radosgw-admin bucket radoslist' output." } rados_ls() { log "Starting 'rados ls' function." rm -f "$rados_flag" &> /dev/null rm -f "$rados_out" &> /dev/null local mypool for mypool in $pool; do log "Running 'rados ls' on pool ${mypool}." rados ls --pool="$mypool" >>"$rados_out" 2>"$rados_err" RETVAL=$? if [ "$RETVAL" -ne 0 ] ;then touch "$rados_flag" fi checkReturn $RETVAL "'rados ls' on pool ${mypool}" 1 log "Completed 'rados ls' on pool ${mypool}." done if [ ! -e "$rados_flag" ]; then log "Sorting 'rados ls' output(s)." sort -T ${temp_prefix} -u "$rados_out" >"$rados_temp" checkReturn $? "Sorting 'rados ls' output(s)" 1 log "Moving sorted output(s)." mv -f "$rados_temp" "$rados_out" checkReturn $? "Moving temp file to output file" 1 log "Sorting 'rados ls' output(s) complete." fi } usage() { >&2 cat << EOF WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING: WARNING: Command option format has changed. Please check closely. WARNING: WARNING WARNING WARNING WARNING WARNING WARNING WARNING Usage: $0 [-m] [-p ] [-t ] Where: -m Optionally, run the two listings in multiple threads. --See NOTE below-- -p The RGW bucket data pool name, if omitted, pool name will be prompted for during execution. Multiple pools can be supplied as a space separated double quoted list. -t Optionally, set the directory to use for temp space. This may be required if /tmp is low on space. NOTE: This tool is currently considered to be EXPERIMENTAL. NOTE: False positives are possible. False positives would likely appear as objects that were never deleted and are fully intact. All results should therefore be verified. NOTE: Multithread listing may increase performance but may also increase the risk of false positives when the cluster is undergoing modifications during the listing processes. In addition to the above, false positives might also include objects that were intentionally deleted. EOF exit 1 } multithread=0 error=0 temp_prefix="/tmp" while getopts ":mp:t:" o; do case "${o}" in m) multithread=1 ;; p) pool=${OPTARG} ;; t) if [ -d ${OPTARG} ]; then temp_prefix=${OPTARG} else echo echo "ERROR: Temporary directory does not exist: ${OPTARG}" error=1 fi ;; *) echo echo "ERROR: Unrecognized argument: ${o}" error=1 ;; esac done shift $((OPTIND-1)) temp_file=${temp_prefix}/gap-tmp.$$ rados_temp=${temp_prefix}/rados-tmp.$$ rgwadmin_temp=${temp_prefix}/radosgw-admin-tmp.$$ rados_flag=${temp_prefix}/rados-flag.$$ rgwadmin_flag=${temp_prefix}/radosgw-admin-flag.$$ incremental_grep_awk="${temp_prefix}/ig-${$}.awk" if [ $error -gt 0 ]; then usage fi if [ -z "$pool" ]; then pool="$(prompt_pool)" fi error=0 rados ${CEPH_ARGS} lspools > ${temp_file} checkReturn $? "rados lspools" 1 for mypool in $pool; do if [ $(grep -c "^${mypool}$" "${temp_file}") -eq 0 ]; then echo echo "ERROR: Supplied pool does not exist: ${mypool}" error=1 fi done if [ $error -gt 0 ]; then exit 1 fi log "Pool is \"$pool\"." log "Note: output files produced will be tagged with the current timestamp -- ${timestamp}." if [ $multithread -eq 1 ] ;then startsecs=$(date +%s) log "Starting multithread tasks..." rados_ls & radosgw_radoslist & jobs &> /dev/null # without this, the myjobs count always equals 1 (confused) myjobs=$(jobs | wc -l) while [ $myjobs -gt 0 ]; do # provide minutely status update if [ $(( ($(date +%s)-$startsecs) % 60 )) -eq 0 ]; then echo deltasecs=$(( $(date +%s)-$startsecs )) log "Waiting for listing tasks to complete. Running ${myjobs} tasks for ${deltasecs} seconds." fi sleep 1 echo -n . if [ -e "$rgw_admin_flag" ]; then exit 1 fi if [ -e "$rados_flag" ]; then exit 2 fi jobs &> /dev/null # without this, the myjobs count always equals 1 (confused) myjobs=$(jobs | wc -l) done echo else rados_ls radosgw_radoslist fi if [ -e "$rgw_admin_flag" ]; then exit 1 fi if [ -e "$rados_flag" ]; then exit 2 fi for myfile in $rados_out $rgwadmin_out; do if [ ! -s "${myfile}" ]; then log "ERROR: Empty file detected: ${myfile}" log "ERROR: RESULTS ARE INCOMPLETE - DO NOT USE" exit 1 fi done # Create an awk script in a file for parsing the two command outoputs. log "Creating awk script for comparing outputs: ${incremental_grep_awk}" cat <<"EOF" >$incremental_grep_awk # This awk script is used by rgw-gap-list and will sequence through # each line in $rados_out and $rgwadmin_out exactly once. # # During this iteration: # * The 1st column of $rgwadmin_out is compared to the line of # $rados_out. # * If they are equal, the next line of $rados_out is read in and the # next line of $rgwadmin_out is provided via normal awk iteration. # * If a value appears in $rgwadmin_out, but not $rados_out, this # indicates a possible deleted tail object and the accompanying # bucket / user object name is output, assuming it had not been # previously identified. # - A map of outputed bucket / user object is maintained in memory # * If a value appears in $rados_out, but not in $rgwadmin_out, the # $rados_out file is iterated until the $rados_out line is equal # or > (alphabetically) the value from the $rgwadmin_out file. function usage() { print "Example Usage:">>"/dev/stderr" print " # limit $fs to single char that will not appear in either output">>"/dev/stderr" print " # The below is Octal 376, or Hex 0xFE">>"/dev/stderr" print "">>"/dev/stderr" print " $ fs=$(echo -e \"\\0376\") ">>"/dev/stderr" print " $ rados ls -p default.rgw.buckets.data > rados_out.txt">>"/dev/stderr" print " $ radosgw-admin bucket radoslist --rgw-obj-fs=\"$fs\" \\">>"/dev/stderr" print " | sort --field-separator=\"$fs\" -k 1,1 > rgwadmin_out.txt">>"/dev/stderr" print " ">>"/dev/stderr" print " $ awk -F \"$fs\" \\">>"/dev/stderr" print " -v filetwo=rados_out.txt \\">>"/dev/stderr" print " -v map_out=MappedOutput.txt \\">>"/dev/stderr" print " -f ig_awk \\">>"/dev/stderr" print " rgwadmin_out.txt">>"/dev/stderr" print "">>"/dev/stderr" print " Result will be provided in the 'MappedOutput.txt' file in this">>"/dev/stderr" print " example. If you'd prefer the output to be sorted, you can run">>"/dev/stderr" print " $ sort MappedOutput.txt > SortedMappedOutput.txt">>"/dev/stderr" print "">>"/dev/stderr" print "">>"/dev/stderr" exit 1 } function get_date_time() { dtstr="date +%F\\ %T" dtstr | getline mydt close(dtstr) return mydt } function status_out() { printf("%s % 17d\t% 17d\t% 12d\n",get_date_time(),f1_count,f2_count,lineoutCount)>>"/dev/stderr" } function advance_f2() { if ((getline f2line>map_out lastline=$2" "$NF lineoutCount++ } } BEGIN { if(filetwo==""||map_out=="") { print "">>"/dev/stderr" print "">>"/dev/stderr" print "Missing parameter." print "">>"/dev/stderr" print "">>"/dev/stderr" usage() } status_delta=100000 f1_count=0 f2_count=0 advance_f2() printf("%s File 1 Line Count\tFile 2 Line Count\tPotentially Impacted Objects\n",get_date_time())>>"/dev/stderr" for(n=0;n<256;n++) { ord[sprintf("%c",n)]=n } } { f1_count++ if(f2_eof==0) { if(test_lines()==2) { while ($1>b[1]) { advance_f2() } test_lines() } } else { # If EOF hit, dump all remaining lines since they're missing # from filetwo line_out() } if((f1_count % status_delta)==0) { status_out() } } END { if(f1_count>0) { status_out() } } EOF log "Begin identifying potentially impacted user object names." echo -n > "$temp_file" # Ensure the file is empty awk -F "$fs" -v filetwo=$rados_out -v map_out=$temp_file -f $incremental_grep_awk $rgwadmin_out checkReturn $? "Identifying potentially impacted user object names" 1 log "Begin sorting results." sort -T ${temp_prefix} "$temp_file" > "$gap_out" checkReturn $? "sorting results" 1 rm -f "$temp_file" found=$(wc -l < "$gap_out") mydate=$(date +%F\ %T) log "Done." cat << EOF Found $found *possible* gaps. The results can be found in "${gap_out}". Intermediate files: "${rados_out}" and "${rgwadmin_out}". *** *** WARNING: This is EXPERIMENTAL code and the results should be used *** with CAUTION and VERIFIED. Not everything listed is an *** actual gap. EXPECT false positives. Every result *** produced should be verified. *** EOF