diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/rgw/rgw-orphan-list | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rgw/rgw-orphan-list')
-rwxr-xr-x | src/rgw/rgw-orphan-list | 278 |
1 files changed, 278 insertions, 0 deletions
diff --git a/src/rgw/rgw-orphan-list b/src/rgw/rgw-orphan-list new file mode 100755 index 000000000..c8856e8ee --- /dev/null +++ b/src/rgw/rgw-orphan-list @@ -0,0 +1,278 @@ +#!/usr/bin/env bash + +# version 2023-01-11 + +# IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted' +# relies on this ordering +export LC_ALL=C + +# If your ceph.conf is not in /etc/ceph, then set CEPH_CONF="-c /path/to/ceph.conf" + +trap "exit 1" TERM +TOP_PID=$$ + +out_dir="." +timestamp=$(date -u +%Y%m%d%H%M%S) +lspools_err="${out_dir}/lspools-${timestamp}.error" +rados_out="${out_dir}/rados-${timestamp}.intermediate" +rados_odd="${out_dir}/rados-${timestamp}.issues" +rados_err="${out_dir}/rados-${timestamp}.error" +rgwadmin_out="${out_dir}/radosgw-admin-${timestamp}.intermediate" +rgwadmin_err="${out_dir}/radosgw-admin-${timestamp}.error" +delta_out="${out_dir}/orphan-list-${timestamp}.out" + +log() { + echo $(date +%F\ %T) $(hostname -s) "$1" +} + +usage() { + >&2 cat << EOF + +Usage: $0 [-h] "<radospools>" [<temp_dir>] + +Where: + -h This help output + <radospools> The RGW data pool name, if omitted, pool name will be + prompted for during execution. + If specifying multiple pools, please use space separated + list and wrap the entire list in quotes. + + <temp_dir> Optionally, set the directory to use for temp space. + This may be required if /tmp is low on space. + +NOTES: + - This tool should be ran on a node with ceph-radosgw package installed. + Specifically, it needs the 'ceph-diff-tool' command from that package. + + - This tool is currently considered to be EXPERIMENTAL. + + - False positives are possible. False positives would likely + appear as objects that were never deleted and are fully + intact. All results should therefore be verified. + +WARNING: + - Indexless buckets will appear as 100% orphan objects. + - Therefore, this tool MUST NOT be used in environments with indexless + buckets. + +EOF + exit 1 +} + +# +# checkReturn RETURNCODE MESSAGE TERMINATE +# RETURNCODE - ( usually $? ) of previous command +# MESSAGE - Message to print on non-zero return code +# TERMINATE - non-empty == terminate the script on non-zero return code +# +checkReturn() { + if [ $1 -ne 0 ]; then + error_addon="" + if [ ! -z "$3" ]; then + error_addon="; Terminating" + fi + log "ERROR: ${2} failed: returned ${1}${error_addon}" + if [ ! -z "$3" ]; then + >&2 echo + >&2 echo '***' + >&2 echo '*** WARNING: The results are incomplete. Do not use! ***' + >&2 echo '***' + kill -s TERM $TOP_PID + fi + fi +} + +prompt_pool() { + # note: all prompts go to stderr so stdout contains just the result + >&2 echo "Available pools:" + rados ${CEPH_CONF} lspools >"$temp_file" 2>"$lspools_err" + checkReturn $? "Listing pools failed" 1 + + >&2 sed 's/^/ /' "$temp_file" # list pools and indent + >&2 printf "Which pool do you want to search for orphans (for multiple, use space-separated list)? " + local mypool + read mypool + echo $mypool +} + +radosgw_radoslist() { + log "Running 'radosgw-admin bucket radoslist'." + rm -f "$rgwadmin_flag" &> /dev/null + radosgw-admin ${CEPH_CONF} bucket radoslist >"$rgwadmin_out" 2>"$rgwadmin_err" + RETVAL=$? + if [ "$RETVAL" -ne 0 ] ;then + touch "$rgwadmin_flag" + fi + checkReturn $RETVAL "radosgw-admin radoslist" 1 + log "Completed 'radosgw-admin bucket radoslist'." + + log "Sorting 'radosgw-admin bucket radoslist' output." + sort -T ${temp_prefix} -u "$rgwadmin_out" > "$rgwadmin_temp" + checkReturn $? "Sorting 'radosgw-admin bucket radoslist' output" 1 + log "Completed sorting 'radosgw-admin bucket radoslist'." + + log "Moving 'radosgw-admin bucket radoslist' output." + mv -f "$rgwadmin_temp" "$rgwadmin_out" + checkReturn $? "Moving 'radosgw-admin bucket radoslist' output" 1 + log "Completed moving 'radosgw-admin bucket radoslist' output." +} + +rados_ls() { + log "Starting 'rados ls' function." + rm -f "$rados_flag" &> /dev/null + rm -f "$rados_out" &> /dev/null + local mypool + for mypool in $pool; do + log "Running 'rados ls' on pool ${mypool}." + rados ${CEPH_CONF} ls --pool="$mypool" --all >>"$rados_out" 2>"$rados_err" + RETVAL=$? + if [ "$RETVAL" -ne 0 ] ;then + touch "$rados_flag" + fi + checkReturn $RETVAL "'rados ls' on pool ${mypool}" 1 + log "Completed 'rados ls' on pool ${mypool}." + done + if [ ! -e "$rados_flag" ]; then + # NOTE: Each entry (line of output) of `rados ls --all` should be in + # one of four formats depending on whether or not an entry has a + # namespace and/or locator: + # + # <TAB>oid + # <TAB>oid<TAB>locator + # namespace<TAB>oid + # namespace<TAB>oid<TAB>locator + # + # Any occurrences of the 2nd, 3rd, or 4th (i.e., existence of + # namespace and/or locator) should cause the create of the "odd" file + # and an explanation in the output, and those entries will not be + # retained, and therefore they will not be called out as orphans. They + # will need special handling by the end-user as we do not expect + # namespaces or locators. + + # check for namespaces -- any line that does not begin with a tab + # indicates a namespace; add those to "odd" file and set flag; note: + # this also picks up entries with namespace and locator + log "Checking for namespaces" + grep --text $'^[^\t]' "$rados_out" >"$rados_odd" + if [ "${PIPESTATUS[0]}" -eq 0 ] ;then + log "Namespaces found" + namespace_found=1 + fi + + # check for locators (w/o namespace); we idenitfy them by skipping + # past the empty namespace (i.e., one TAB), skipping past the oid, + # then looking for a TAB; note we use egrep to get the '+' character + # and the $ in front of the ' allows the \t to be interpreted as a TAB + log "Checking for locators" + egrep --text $'^\t[[:graph:]]+\t' "$rados_out" >>"$rados_odd" + if [ "${PIPESTATUS[0]}" -eq 0 ] ;then + log "Locator found" + locator_found=1 + fi + + # extract the entries that are just oids (i.e., no namespace or + # locator) for further processing; only look at lines that begin with + # a TAB and do not contain a second TAB, and then grab everything + # after the initial TAB + log "Generating final 'rados ls' output (without namespaces or locators)" + grep --text $'^\t' "$rados_out" | grep --text -v $'^\t.*\t' | sed -E 's/^\t//' >"$temp_file" + mv -f "$temp_file" "$rados_out" + + log "Sorting 'rados ls' output(s)." + sort -T ${temp_prefix} -u "$rados_out" >"$temp_file" + checkReturn $? "Sorting 'rados ls' output(s)" 1 + log "Sorting 'rados ls' output(s) complete." + + log "Moving sorted output(s)." + mv -f "$temp_file" "$rados_out" + checkReturn $? "Moving temp file to output file" 1 + fi +} + +temp_prefix="/tmp" +if [ ! -z "$2" ]; then + if [ -d "$2" ]; then + temp_prefix=$2 + else + echo + echo "ERROR: Provided temp directory does not exist: ${2}" + usage + fi + temp_prefix="$2" +fi +temp_file=${temp_prefix}/temp.$$ +rados_flag=${temp_prefix}/rados_flag.$$ +rgwadmin_flag=${temp_prefix}/rgwadmin_flag.$$ +rgwadmin_temp=${temp_prefix}/rgwadmin_temp.$$ + +if [ $# -eq 0 ] ;then + pool="$(prompt_pool)" +else + if [ "$1" == "-h" ]; then + usage + fi + pool="$1" +fi + +error=0 +rados ${CEPH_CONF} lspools > $temp_file +for mypool in $pool; do + if [ $(grep -c "^${mypool}$" "${temp_file}") -eq 0 ]; then + echo + echo "ERROR: Supplied pool does not exist: ${mypool}" + error=1 + fi +done +if [ $error -gt 0 ]; then + echo "Terminating" + exit 1 +fi + +log "Pool is \"$pool\"." +log "Note: output files produced will be tagged with the current timestamp -- ${timestamp}." + +rados_ls +radosgw_radoslist + +# +# Check for any empty output files +# + +for myfile in $rados_out $rgwadmin_out; do + if [ ! -s "${myfile}" ]; then + log "ERROR: Empty file detected: ${myfile}" + log "ERROR: RESULTS ARE INCOMPLETE - DO NOT USE" + exit 1 + fi +done + +log "Computing delta..." +ceph-diff-sorted "$rados_out" "$rgwadmin_out" | grep --text "^<" | sed 's/^< *//' >"$delta_out" +# use PIPESTATUS to get at exit status of first process in above pipe; +# 0 means same, 1 means different, >1 means error +if [ "${PIPESTATUS[0]}" -gt 1 ] ;then + log "ERROR: ceph-diff-sorted failed with status: ${PIPESTATUS[0]}" + log "TERMINATING - Results are incomplete - DO NOT USE" + exit 1 +fi + +log "Computing results..." +found=$(wc -l < "$delta_out") +possible=$(wc -l < "$rados_out") +percentage=0 +if [ $possible -ne 0 ] ;then + percentage=$(expr 100 \* $found / $possible) +fi + +echo "$found potential orphans found out of a possible $possible (${percentage}%)." +echo "The results can be found in '${delta_out}'." +echo " Intermediate files are '${rados_out}' and '${rgwadmin_out}'." +if [ -n "$namespace_found" -o -n "$locator_found" ] ;then + echo " Note: 'rados ls' found entries that might be in a namespace or might" + echo " have a locator; see '${rados_odd}' for those entries." +fi +echo "***" +echo "*** WARNING: This is EXPERIMENTAL code and the results should be used" +echo "*** only with CAUTION!" +echo "***" +echo "Done at $(date +%F\ %T)." |