2 files changed, 575 insertions, 0 deletions
diff --git a/src/rgw/rgw-gap-list b/src/rgw/rgw-gap-list
new file mode 100755
index 000000000..5018cedd7
--- /dev/null
+++ b/src/rgw/rgw-gap-list
@@ -0,0 +1,456 @@
+#!/usr/bin/env bash
+
+# Last revision 2023-01-13
+
+# NOTE: This script based based on rgw-orphan-list but doing the
+# reverse calculation.
+
+# NOTE: The awk included in this script replaces the 'ceph-diff-sorted'
+# utility but duplicates its functionality.  This was done to minimize
+# the number of times the massive data set must be iterated to complete
+# the task.
+
+# IMPORTANT: Affects order produced by 'sort'.
+export LC_ALL=C
+
+trap "exit 1" TERM
+TOP_PID=$$
+
+out_dir="$PWD"
+timestamp=$(date -u +%Y%m%d%H%M)
+lspools_err="${out_dir}/lspools-${timestamp}.error"
+rados_out="${out_dir}/rados-${timestamp}.intermediate"
+rados_err="${out_dir}/rados-${timestamp}.error"
+rgwadmin_out="${out_dir}/radosgw-admin-${timestamp}.intermediate"
+rgwadmin_err="${out_dir}/radosgw-admin-${timestamp}.error"
+gap_out="${out_dir}/gap-list-${timestamp}.gap"
+
+
+# field separator
+# contains ascii 0xFE, designed to be a  character that won't appear
+# in normal output, can only be a single character due to use in the 
+# sort command
+fs=$(echo -e "\xFE")
+
+log() {
+  echo $(date +%F\ %T) $(hostname -s) "$1"
+}
+
+#
+# checkReturn RETURNCODE MESSAGE TERMINATE
+#  RETURNCODE - ( usually $? ) of previous command
+#  MESSAGE    - Message to print on non-zero return code
+#  TERMINATE  - non-empty == terminate the script on non-zero return code
+#
+checkReturn() {
+  if [ $1 -ne 0 ]; then
+    error_addon=""
+    if [ ! -z "$3" ]; then
+      error_addon="; Terminating"
+    fi
+    log "ERROR: ${2} failed: returned ${1}${error_addon}"
+    if [ ! -z "$3" ]; then
+      >&2 echo
+      >&2 echo '***'
+      >&2 echo '*** WARNING: The results are incomplete. Do not use! ***'
+      >&2 echo '***'
+      kill -s TERM $TOP_PID
+    fi
+  fi
+}
+
+prompt_pool() {
+  # note: all prompts go to stderr so stdout contains just the result
+  rados lspools >"$temp_file" 2>"$lspools_err"
+  checkReturn $? "Listing pools" 1
+
+  >&2 echo ""
+  >&2 echo "Available pools:"
+  >&2 sed 's/^/    /' "$temp_file" # list pools and indent
+  >&2 echo ""
+  >&2 echo "Which Rados Gateway Data pool do you want to search for gaps? "
+  >&2 echo ""
+  >&2 echo "NOTE: If your installation has multiple bucket data pools using "
+  >&2 echo "      bucket placement policies, please enter a space separated "
+  >&2 echo "      list of bucket data pools to enumerate."
+  >&2 echo ""
+  local mypool
+  read mypool
+  echo $mypool
+}
+
+radosgw_radoslist() {
+  log "Running 'radosgw-admin bucket radoslist'."
+  rm -f "$rgwadmin_flag" &> /dev/null
+  radosgw-admin bucket radoslist --rgw-obj-fs="$fs" >"$rgwadmin_out" 2>"$rgwadmin_err"
+  RETVAL=$?
+  if [ "$RETVAL" -ne 0 ] ;then
+    touch "$rgwadmin_flag"
+  fi
+  checkReturn $RETVAL "radosgw-admin radoslist" 1
+  log "Completed 'radosgw-admin bucket radoslist'."
+
+  log "Sorting 'radosgw-admin bucket radoslist' output."
+  sort -T ${temp_prefix} --field-separator="$fs" -k1,1 -u "$rgwadmin_out" > "$rgwadmin_temp"
+  checkReturn $? "Sorting 'radosgw-admin bucket radoslist' output" 1
+  log "Completed sorting 'radosgw-admin bucket radoslist'."
+
+  log "Moving 'radosgw-admin bucket radoslist' output."
+  mv -f "$rgwadmin_temp" "$rgwadmin_out"
+  checkReturn $? "Moving 'radosgw-admin bucket radoslist' output" 1
+  log "Completed moving 'radosgw-admin bucket radoslist' output."
+}
+
+rados_ls() {
+  log "Starting 'rados ls' function."
+  rm -f "$rados_flag" &> /dev/null
+  rm -f "$rados_out" &> /dev/null
+  local mypool
+  for mypool in $pool; do
+    log "Running 'rados ls' on pool ${mypool}."
+    rados ls --pool="$mypool" >>"$rados_out" 2>"$rados_err"
+    RETVAL=$?
+    if [ "$RETVAL" -ne 0 ] ;then
+      touch "$rados_flag"
+    fi
+    checkReturn $RETVAL "'rados ls' on pool ${mypool}" 1
+    log "Completed 'rados ls' on pool ${mypool}."
+  done
+  if [ ! -e "$rados_flag" ]; then
+    log "Sorting 'rados ls' output(s)."
+    sort -T ${temp_prefix} -u "$rados_out" >"$rados_temp"
+    checkReturn $? "Sorting 'rados ls' output(s)" 1
+
+    log "Moving sorted output(s)."
+    mv -f "$rados_temp" "$rados_out"
+    checkReturn $? "Moving temp file to output file" 1
+    log "Sorting 'rados ls' output(s) complete."
+  fi
+}
+
+usage() {
+  >&2 cat << EOF
+
+WARNING   WARNING   WARNING   WARNING   WARNING   WARNING   WARNING
+WARNING: 
+WARNING: Command option format has changed.  Please check closely.
+WARNING: 
+WARNING   WARNING   WARNING   WARNING   WARNING   WARNING   WARNING
+
+Usage: $0 [-m] [-p <pool>] [-t <temp_dir>]
+
+Where:
+  -m               Optionally, run the two listings in multiple threads.
+                   --See NOTE below--
+
+  -p <pool>        The RGW bucket data pool name, if omitted, pool name
+                   will be prompted for during execution.
+                   Multiple pools can be supplied as a space separated
+                   double quoted list.
+
+  -t <temp_dir>    Optionally, set the directory to use for temp space.
+                   This may be required if /tmp is low on space.
+
+NOTE: This tool is currently considered to be EXPERIMENTAL.
+
+NOTE: False positives are possible. False positives would likely
+      appear as objects that were never deleted and are fully
+      intact. All results should therefore be verified.
+
+NOTE: Multithread listing may increase performance but may also increase
+      the risk of false positives when the cluster is undergoing
+      modifications during the listing processes. In addition to the
+      above, false positives might also include objects that were
+      intentionally deleted.
+
+EOF
+  exit 1
+}
+
+multithread=0
+error=0
+temp_prefix="/tmp"
+while getopts ":mp:t:" o; do
+  case "${o}" in
+    m)
+      multithread=1
+    ;;
+    p)
+      pool=${OPTARG}
+    ;;
+    t)
+      if [ -d ${OPTARG} ]; then
+        temp_prefix=${OPTARG}
+      else
+        echo
+        echo "ERROR: Temporary directory does not exist: ${OPTARG}"
+        error=1
+      fi
+    ;;
+    *)
+      echo
+      echo "ERROR: Unrecognized argument: ${o}"
+      error=1
+    ;;
+  esac
+done
+shift $((OPTIND-1))
+
+temp_file=${temp_prefix}/gap-tmp.$$
+rados_temp=${temp_prefix}/rados-tmp.$$
+rgwadmin_temp=${temp_prefix}/radosgw-admin-tmp.$$
+rados_flag=${temp_prefix}/rados-flag.$$
+rgwadmin_flag=${temp_prefix}/radosgw-admin-flag.$$
+incremental_grep_awk="${temp_prefix}/ig-${$}.awk"
+
+if [ $error -gt 0 ]; then
+  usage
+fi
+
+if [ -z "$pool" ]; then
+  pool="$(prompt_pool)"
+fi
+
+error=0
+rados ${CEPH_ARGS} lspools > ${temp_file}
+checkReturn $? "rados lspools" 1
+for mypool in $pool; do
+  if [ $(grep -c "^${mypool}$" "${temp_file}") -eq 0 ]; then
+      echo
+      echo "ERROR: Supplied pool does not exist: ${mypool}"
+      error=1
+  fi
+done
+
+if [ $error -gt 0 ]; then
+  exit 1
+fi
+
+log "Pool is \"$pool\"."
+log "Note: output files produced will be tagged with the current timestamp -- ${timestamp}."
+
+if [ $multithread -eq 1 ] ;then
+  startsecs=$(date +%s)
+  log "Starting multithread tasks..."
+  rados_ls &
+  radosgw_radoslist &
+  jobs &> /dev/null  # without this, the myjobs count always equals 1 (confused)
+  myjobs=$(jobs | wc -l)
+  while [ $myjobs -gt 0 ]; do
+    # provide minutely status update
+    if [ $(( ($(date +%s)-$startsecs) % 60 )) -eq 0 ]; then
+      echo
+      deltasecs=$(( $(date +%s)-$startsecs ))
+      log "Waiting for listing tasks to complete. Running ${myjobs} tasks for ${deltasecs} seconds."
+    fi
+    sleep 1
+    echo -n .
+    if [ -e "$rgw_admin_flag" ]; then
+      exit 1
+    fi
+    if [ -e "$rados_flag" ]; then
+      exit 2
+    fi
+    jobs &> /dev/null  # without this, the myjobs count always equals 1 (confused)
+    myjobs=$(jobs | wc -l)
+  done
+  echo
+else
+  rados_ls
+  radosgw_radoslist
+fi
+
+if [ -e "$rgw_admin_flag" ]; then
+  exit 1
+fi
+
+if [ -e "$rados_flag" ]; then
+  exit 2
+fi
+
+for myfile in $rados_out $rgwadmin_out; do
+  if [ ! -s "${myfile}" ]; then
+    log "ERROR: Empty file detected: ${myfile}"
+    log "ERROR: RESULTS ARE INCOMPLETE - DO NOT USE"
+    exit 1
+  fi 
+done
+
+# Create an awk script in a file for parsing the two command outoputs.
+log "Creating awk script for comparing outputs: ${incremental_grep_awk}"
+
+cat <<"EOF" >$incremental_grep_awk
+# This awk script is used by rgw-gap-list and will sequence through
+# each line in $rados_out and $rgwadmin_out exactly once.
+#
+# During this iteration:
+#  * The 1st column of $rgwadmin_out is compared to the line of
+#    $rados_out.
+#  * If they are equal, the next line of $rados_out is read in and the
+#    next line of $rgwadmin_out is provided via normal awk iteration.
+#  * If a value appears in $rgwadmin_out, but not $rados_out, this 
+#    indicates a possible deleted tail object and the accompanying
+#    bucket / user object name is output, assuming it had not been
+#    previously identified.
+#    - A map of outputed bucket / user object is maintained in memory
+#  * If a value appears in $rados_out, but not in $rgwadmin_out, the
+#    $rados_out file is iterated until the $rados_out line is equal
+#    or > (alphabetically) the value from the $rgwadmin_out file.
+
+function usage() {
+  print "Example Usage:">>"/dev/stderr"
+  print "   # limit $fs to single char that will not appear in either output">>"/dev/stderr"
+  print "   # The below is Octal 376, or Hex 0xFE">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print "   $ fs=$(echo -e \"\\0376\")  ">>"/dev/stderr"
+  print "   $ rados ls -p default.rgw.buckets.data > rados_out.txt">>"/dev/stderr"
+  print "   $ radosgw-admin bucket radoslist --rgw-obj-fs=\"$fs\" \\">>"/dev/stderr"
+  print "       | sort --field-separator=\"$fs\" -k 1,1 > rgwadmin_out.txt">>"/dev/stderr"
+  print " ">>"/dev/stderr"
+  print "   $ awk -F \"$fs\" \\">>"/dev/stderr"
+  print "         -v filetwo=rados_out.txt \\">>"/dev/stderr"
+  print "         -v map_out=MappedOutput.txt \\">>"/dev/stderr"
+  print "         -f ig_awk \\">>"/dev/stderr"
+  print "         rgwadmin_out.txt">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print "   Result will be provided in the 'MappedOutput.txt' file in this">>"/dev/stderr"
+  print "   example.  If you'd prefer the output to be sorted, you can run">>"/dev/stderr"
+  print "   $ sort MappedOutput.txt > SortedMappedOutput.txt">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  exit 1
+}
+
+function get_date_time() {
+  dtstr="date +%F\\ %T"
+  dtstr | getline mydt
+  close(dtstr)
+  return mydt
+}
+
+function status_out() {
+  printf("%s % 17d\t% 17d\t% 12d\n",get_date_time(),f1_count,f2_count,lineoutCount)>>"/dev/stderr"
+}
+
+function advance_f2() {
+  if ((getline f2line<filetwo) <= 0) {
+    f2_eof=1
+  } else {
+    f2_count++
+    bcount=split(f2line,b,FS)
+  }
+}
+
+function test_lines() {
+  if ($1==b[1]) {
+    advance_f2()
+    return 0
+  } else if ($1<b[1]) {
+    line_out()
+    return 1
+  } else {
+    return 2
+  }
+}
+
+function findnul(myfield) {
+  for(i=1;i<=split(myfield,a,"");i++) {
+    if(ord[a[i]]==0) {
+      return 1
+    }
+  }
+  return 0
+}
+
+function line_out() {
+  if(findnul($1)) {
+    # If the RADOS object name has a NUL character, skip output
+    return
+  }
+  # Note: Intentionally using $2 and $NF below
+  # Use of $NF eliminates risk of exhausting input field count
+  if ($2" "$NF!=lastline) {
+    # Only output a given bucket/Obj combination once
+    printf("Bucket: \"%s\"  Object: \"%s\"\n", $2, $NF)>>map_out
+    lastline=$2" "$NF
+    lineoutCount++
+  }
+}
+
+BEGIN {
+  if(filetwo==""||map_out=="") {
+     print "">>"/dev/stderr"
+     print "">>"/dev/stderr"
+     print "Missing parameter."
+     print "">>"/dev/stderr"
+     print "">>"/dev/stderr"
+     usage()
+  }
+  status_delta=100000
+  f1_count=0
+  f2_count=0
+  advance_f2()
+  printf("%s File 1 Line Count\tFile 2 Line Count\tPotentially Impacted Objects\n",get_date_time())>>"/dev/stderr"
+  for(n=0;n<256;n++) {
+    ord[sprintf("%c",n)]=n
+  }
+}
+
+{
+  f1_count++
+  if(f2_eof==0) {
+    if(test_lines()==2) {
+      while ($1>b[1]) {
+        advance_f2()
+      }
+      test_lines()
+    }
+  } else {
+    # If EOF hit, dump all remaining lines since they're missing
+    # from filetwo
+    line_out()
+  }
+  if((f1_count % status_delta)==0) {
+    status_out()
+  }
+}
+
+END {
+  if(f1_count>0) {
+    status_out()
+  }
+}
+
+EOF
+
+
+log "Begin identifying potentially impacted user object names."
+
+echo -n > "$temp_file" # Ensure the file is empty
+awk -F "$fs" -v filetwo=$rados_out -v map_out=$temp_file -f $incremental_grep_awk $rgwadmin_out
+checkReturn $? "Identifying potentially impacted user object names" 1
+
+log "Begin sorting results."
+sort -T ${temp_prefix} "$temp_file" > "$gap_out"
+checkReturn $? "sorting results" 1
+rm -f "$temp_file"
+
+found=$(wc -l < "$gap_out")
+mydate=$(date +%F\ %T)
+
+log "Done."
+
+cat << EOF
+
+Found $found *possible* gaps.
+The results can be found in "${gap_out}".
+
+Intermediate files: "${rados_out}" and "${rgwadmin_out}".
+
+***
+*** WARNING: This is EXPERIMENTAL code and the results should be used
+***          with CAUTION and VERIFIED. Not everything listed is an
+***          actual gap. EXPECT false positives. Every result
+***          produced should be verified.
+***
+EOF
diff --git a/src/rgw/rgw-gap-list-comparator b/src/rgw/rgw-gap-list-comparator
new file mode 100755
index 000000000..c377fdaf8
--- /dev/null
+++ b/src/rgw/rgw-gap-list-comparator
@@ -0,0 +1,119 @@
+#!/usr/bin/awk -f
+
+#
+# Version 1
+#
+# This awk script takes two, similarly sorted lists and outputs
+# only the lines which exist in both lists.  The script takes
+# three inputs:
+#
+# ./rgw-gap-list-comparator \
+#     -v filetwo=gap-list-B.txt \
+#     -v matchout=matched_lines.txt \
+#     gap-list-A.txt
+#
+
+function usage() {
+  print "">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print "The idea behind the script is to eliminate false positive hits">>"/dev/stderr"
+  print "from the rgw-gap-list tool which are due to upload timing of new">>"/dev/stderr"
+  print "objects during the tool's execution.  To use the tool properly,">>"/dev/stderr"
+  print "the following process should be followed:">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print " 1: Run the 'rgw-gap-list' tool twice">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print " 2: Sort the resulting map files:">>"/dev/stderr"
+  print "   $ export LC_ALL=C">>"/dev/stderr"
+  print "   $ sort gap-list-A.gap > gap-list-A.sorted.gap">>"/dev/stderr"
+  print "   $ sort gap-list-B.gap > gap-list.B.sorted.gap">>"/dev/stderr"
+  print "   -- Where the A / B in the gap-list file names are the date/time associated with each of the respective 'rgw-gap-list' outputs">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print " 3: Run the 'same_lines_only.awk' script over the two files:">>"/dev/stderr"
+  print "   $ rm matched_lines.txt">>"/dev/stderr"
+  print "   $ ./rgw-gap-list-comparator -v filetwo=gap-list-B.sorted.gap -v matchout=matched_lines.txt gap-list-A.sorted.gap">>"/dev/stderr"
+  print "   -- Where the A / B in the gap-list file names are the date/time associated with each of the respective 'rgw-gap-list' outputs">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print " The resulting 'matched_lines.txt' will be a high confidence list of impacted objects with little to no false positives.">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  print "">>"/dev/stderr"
+  exit 1
+}
+
+function advance_f2() {
+  if ((getline f2line<filetwo) <= 0) {
+    f2_eof=1
+  } else {
+    f2_count++
+  }
+}
+
+function test_lines() {
+ if($0==f2line) {
+    print $0>>matchout
+    lineoutcount++
+    advance_f2()
+    return 0
+  } else if ($0>f2line) {
+    return 2
+  } else {
+    return 1
+  }
+}
+
+function status_out() {
+  printf("%s % 17d\t% 17d\t% 12d\n",get_date_time(),f1_count,f2_count,lineoutcount)>>"/dev/stderr"
+}
+
+function get_date_time() {
+  dtstr="date +%F\\ %T"
+  dtstr | getline mydt
+  close(dtstr)
+  return mydt
+}
+
+BEGIN {
+  if(filetwo==""||matchout=="") {
+     print "">>"/dev/stderr"
+     print "">>"/dev/stderr"
+     print "Missing parameter."
+     print "">>"/dev/stderr"
+     print "">>"/dev/stderr"
+     usage()
+  }
+
+  f1_count=0
+  f2_count=0
+  lineoutcount=0
+  f2_eof=0
+  statusevery=100000
+  advance_f2()
+  printf("%s File 1 Line Count\tFile 2 Line Count\tPotentially Impacted Objects\n",get_date_time())>>"/dev/stderr"
+  status_out()
+}
+
+
+{
+  f1_count++
+  if(f2_eof==0) {
+    if(test_lines()==2) {
+      while($0>f2line && f2_eof==0) {
+        advance_f2()
+      }
+      test_lines()
+    }
+  } else {
+    exit 0
+  }
+  if ((f1_count % statusevery)==0) {
+    status_out()
+  }
+}
+
+END {
+  if(f1_count>0) {
+    status_out()
+  }
+}
+