diff options
Diffstat (limited to 'src/rgw/rgw-orphan-list')
-rwxr-xr-x | src/rgw/rgw-orphan-list | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/src/rgw/rgw-orphan-list b/src/rgw/rgw-orphan-list new file mode 100755 index 00000000..7f60c651 --- /dev/null +++ b/src/rgw/rgw-orphan-list @@ -0,0 +1,144 @@ +#!/usr/bin/env bash + +# version 2020-10-20 + +# IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted' +# relies on this ordering +export LANG=C + +out_dir="." +temp_file=/tmp/temp.$$ +timestamp=$(date -u +%Y%m%d%H%M%S) +lspools_err="${out_dir}/lspools-${timestamp}.error" +rados_out="${out_dir}/rados-${timestamp}.intermediate" +rados_odd="${out_dir}/rados-${timestamp}.issues" +rados_err="${out_dir}/rados-${timestamp}.error" +rgwadmin_out="${out_dir}/radosgw-admin-${timestamp}.intermediate" +rgwadmin_err="${out_dir}/radosgw-admin-${timestamp}.error" +delta_out="${out_dir}/orphan-list-${timestamp}.out" + +error_out() { + echo "An error was encountered while running '$1'. Aborting." + if [ $# -gt 2 ] ;then + echo "Error: $3" + fi + if [ $# -gt 1 ] ;then + echo "Review file '$2' for details." + fi + echo "***" + echo "*** WARNING: The results are incomplete. Do not use! ***" + echo "***" + exit 1 +} + +prompt_pool() { + # note: all prompts go to stderr so stdout contains just the result + >&2 echo "Available pools:" + rados lspools >"$temp_file" 2>"$lspools_err" + if [ "$?" -ne 0 ] ;then + error_out "rados lspools" "$lspools_err" + fi + >&2 sed 's/^/ /' "$temp_file" # list pools and indent + >&2 printf "Which pool do you want to search for orphans? " + local mypool + read mypool + echo $mypool +} + +if [ $# -eq 0 ] ;then + pool="$(prompt_pool)" +elif [ $# -eq 1 ] ;then + pool="$1" +else + error_out "Usage: $0 [pool]" +fi + +echo "Pool is \"$pool\"." + +echo "Note: output files produced will be tagged with the current timestamp -- ${timestamp}." + +echo "running 'rados ls' at $(date)" +# since --format is not specified, plain should be used +rados ls --pool="$pool" --all >"$rados_out" 2>"$rados_err" +if [ "$?" -ne 0 ] ;then + error_out "rados ls" "$rados_err" +fi + +# NOTE: Each entry (line of output) of `rados ls --all` should be in +# one of four formats depending on whether or not an entry has a +# namespace and/or locator: +# +# <TAB>oid +# <TAB>oid<TAB>locator +# namespace<TAB>oid +# namespace<TAB>oid<TAB>locator +# +# Any occurrences of the 2nd, 3rd, or 4th (i.e., existence of +# namespace and/or locator) should cause the create of the "odd" file +# and an explanation in the output, and those entries will not be +# retained, and therefore they will not be called out as orphans. They +# will need special handling by the end-user as we do not expect +# namespaces or locators. + +# check for namespaces -- any line that does not begin with a tab +# indicates a namespace; add those to "odd" file and set flag; note: +# this also picks up entries with namespace and locator +grep $'^[^\t]' "$rados_out" >"$rados_odd" +if [ "${PIPESTATUS[0]}" -eq 0 ] ;then + namespace_found=1 +fi + +# check for locators (w/o namespace); we idenitfy them by skipping +# past the empty namespace (i.e., one TAB), skipping past the oid, +# then looking for a TAB; note we use egrep to get the '+' character +# and the $ in front of the ' allows the \t to be interpreted as a TAB +egrep $'^\t[[:graph:]]+\t' "$rados_out" >>"$rados_odd" +if [ "${PIPESTATUS[0]}" -eq 0 ] ;then + locator_found=1 +fi + +# extract the entries that are just oids (i.e., no namespace or +# locator) for further processing; only look at lines that begin with +# a TAB and do not contain a second TAB, and then grab everything +# after the initial TAB +grep $'^\t' "$rados_out" | grep -v $'^\t.*\t' | sed -E 's/^\t//' >"$temp_file" +mv -f "$temp_file" "$rados_out" + +sort -u "$rados_out" >"$temp_file" +mv -f "$temp_file" "$rados_out" + +echo "running 'radosgw-admin bucket radoslist' at $(date)" +radosgw-admin bucket radoslist >"$rgwadmin_out" 2>"$rgwadmin_err" +if [ "$?" -ne 0 ] ;then + error_out "radosgw-admin radoslist" "$rgwadmin_err" +fi +sort -u "$rgwadmin_out" >"$temp_file" +mv -f "$temp_file" "$rgwadmin_out" + +echo "computing delta at $(date)" +ceph-diff-sorted "$rados_out" "$rgwadmin_out" | grep "^<" | sed 's/^< *//' >"$delta_out" +# use PIPESTATUS to get at exit status of first process in above pipe; +# 0 means same, 1 means different, >1 means error +if [ "${PIPESTATUS[0]}" -gt 1 ] ;then + error_out "ceph-diff-sorted" +fi + +found=$(wc -l < "$delta_out") +possible=$(wc -l < "$rados_out") +percentage=0 +if [ $possible -ne 0 ] ;then + percentage=$(expr 100 \* $found / $possible) +fi + +echo "$found potential orphans found out of a possible $possible (${percentage}%)." +echo "The results can be found in '${delta_out}'." +echo " Intermediate files are '${rados_out}' and '${rgwadmin_out}'." +if [ -n "$namespace_found" -o -n "$locator_found" ] ;then + echo " Note: 'rados ls' found entries that might be in a namespace or might" + echo " have a locator; see '${rados_odd}' for those entries." +fi +echo "***" +echo "*** WARNING: This is EXPERIMENTAL code and the results should be used" +echo "*** only with CAUTION!" +echo "***" +echo "Done at $(date)." |