diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/rgw/rgw-restore-bucket-index | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rgw/rgw-restore-bucket-index')
-rwxr-xr-x | src/rgw/rgw-restore-bucket-index | 250 |
1 files changed, 250 insertions, 0 deletions
diff --git a/src/rgw/rgw-restore-bucket-index b/src/rgw/rgw-restore-bucket-index new file mode 100755 index 000000000..056658119 --- /dev/null +++ b/src/rgw/rgw-restore-bucket-index @@ -0,0 +1,250 @@ +#!/usr/bin/env bash + +# version 2023-03-21 + +# rgw-restore-bucket-index is an EXPERIMENTAL tool to use in case +# bucket index entries for objects in the bucket are somehow lost. It +# is expected to be needed and used rarely. A bucket name is provided +# and the data pool for that bucket is scanned for all head objects +# matching the bucket's marker. The rgw object name is then extracted +# from the rados object name, and `radosgw-admin bucket reindex ...` +# is used to add the bucket index entry. +# +# Because this script must process json objects, the `jq` tool must be +# installed on the system. +# +# Usage: $0 [--proceed] <bucket-name> [data-pool-name] +# +# This tool is designed to be interactive, allowing the user to +# examine the list of objects to be reindexed before +# proceeding. However, if the "--proceed" option is provided, the +# script will not prompt the user and simply proceed. + +trap "clean ; exit 1" TERM +export TOP_PID=$$ + +# IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted' +# relies on this ordering +export LC_ALL=C + +export bkt_entry=/tmp/rgwrbi-bkt-entry.$$ +export bkt_inst=/tmp/rgwrbi-bkt-inst.$$ +export bkt_inst_new=/tmp/rgwrbi-bkt-inst-new.$$ +export obj_list=/tmp/rgwrbi-object-list.$$ +export zone_info=/tmp/rgwrbi-zone-info.$$ +export clean_temps=1 + +# number of seconds for a bucket index pending op to be completed via +# dir_suggest mechanism +pending_op_secs=120 + +# +if which radosgw-admin > /dev/null ;then + : +else + echo 'Error: must have command `radosgw-admin` installed and on $PATH for operation.' + exit 1 +fi + +# make sure jq is available +if which jq > /dev/null ;then + : +else + echo 'Error: must have command `jq` installed and on $PATH for json parsing.' + exit 1 +fi + +clean() { + if [ -n "$clean_temps" ] ;then + rm -f $bkt_entry $bkt_inst $bkt_inst_new $obj_list $zone_info + fi +} + +super_exit() { + kill -s TERM $TOP_PID +} + +usage() { + >&2 cat << EOF + +Usage: $0 [--proceed] <bucket-name> [data-pool-name] + NOTE: This tool is currently considered EXPERIMENTAL. + NOTE: If a data-pool-name is not supplied then it will be inferred from bucket and zone information. + NOTE: If --proceed is provided then user will not be prompted to proceed. Use with caution. +EOF + super_exit +} + +# strips the starting and ending double quotes from a string, so: +# "dog" -> dog +# "dog -> "dog +# d"o"g -> d"o"g +# "do"g" -> do"g +strip_quotes() { + echo "$1" | sed 's/^"\(.*\)"$/\1/' +} + +# Determines the name of the data pool. Expects the optional +# command-line argument to appear as $1 if there is one. The +# command-line has the highest priority, then the "explicit_placement" +# in the bucket instance data, and finally the "placement_rule" in the +# bucket instance data. +get_pool() { + # command-line + if [ -n "$1" ] ;then + echo "$1" + exit 0 + fi + + # explicit_placement + expl_pool=$(strip_quotes $(jq '.data.bucket_info.bucket.explicit_placement.data_pool' $bkt_inst)) + if [ -n "$expl_pool" ] ;then + echo "$expl_pool" + exit 0 + fi + + # placement_rule + plmt_rule=$(strip_quotes $(jq '.data.bucket_info.placement_rule' $bkt_inst)) + plmt_pool=$(echo "$plmt_rule" | awk -F / '{print $1}') + plmt_class=$(echo "$plmt_rule" | awk -F / '{print $2}') + if [ -z "$plmt_class" ] ;then + plmt_class=STANDARD + fi + + radosgw-admin zone get >$zone_info 2>/dev/null + pool=$(strip_quotes $(jq ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info)) + + if [ -z "$pool" ] ;then + echo ERROR: unable to determine pool. + super_exit + fi + echo "$pool" +} + +if [ $1 == "--proceed" ] ;then + echo "NOTICE: This tool is currently considered EXPERIMENTAL." + proceed=1 + shift +fi + +# expect 1 or 2 arguments +if [ $# -eq 0 -o $# -gt 2 ] ;then + usage +fi + +bucket=$1 + +# read bucket entry metadata +radosgw-admin metadata get bucket:$bucket >$bkt_entry 2>/dev/null +marker=$(strip_quotes $(jq ".data.bucket.marker" $bkt_entry)) +bucket_id=$(strip_quotes $(jq ".data.bucket.bucket_id" $bkt_entry)) +if [ -z "$marker" -o -z "$bucket_id" ] ;then + echo "ERROR: unable to read entry-point metadata for bucket \"$bucket\"." + clean + exit 1 +fi + +echo marker is $marker +echo bucket_id is $bucket_id + +# read bucket instance metadata +radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id >$bkt_inst 2>/dev/null + +# handle versioned buckets +bkt_flags=$(jq ".data.bucket_info.flags" $bkt_inst) +if [ -z "$bkt_flags" ] ;then + echo "ERROR: unable to read instance metadata for bucket \"$bucket\"." + exit 1 +fi + +# mask bit indicating it's a versioned bucket +is_versioned=$(( $bkt_flags & 2)) +if [ "$is_versioned" -ne 0 ] ;then + echo "Error: this bucket appears to be versioned, and this tool cannot work with versioned buckets." + clean + exit 1 +fi + +# examine number of bucket index shards +num_shards=$(jq ".data.bucket_info.num_shards" $bkt_inst) +echo number of bucket index shards is $num_shards + +# determine data pool +pool=$(get_pool $2) +echo data pool is $pool + +# search the data pool for all of the head objects that begin with the +# marker that are not in namespaces (indicated by an extra underscore) +# and then strip away all but the rgw object name +( rados -p $pool ls | grep "^${marker}_[^_]" | sed "s/^${marker}_\(.*\)/\1/" >$obj_list ) 2>/dev/null + +# handle the case where the resulting object list file is empty +if [ -s $obj_list ] ;then + : +else + echo "NOTICE: No head objects for bucket \"$bucket\" were found in pool \"$pool\", so nothing was recovered." + clean + exit 0 +fi + +if [ -z "$proceed" ] ;then + # warn user and get permission to proceed + echo "NOTICE: This tool is currently considered EXPERIMENTAL." + echo "The list of objects that we will attempt to restore can be found in \"$obj_list\"." + echo "Please review the object names in that file (either below or in another window/terminal) before proceeding." + while true ; do + read -p "Type \"proceed!\" to proceed, \"view\" to view object list, or \"q\" to quit: " action + if [ "$action" == "q" ] ;then + echo "Exiting..." + clean + exit 0 + elif [ "$action" == "view" ] ;then + echo "Viewing..." + less $obj_list + elif [ "$action" == "proceed!" ] ;then + echo "Proceeding..." + break + else + echo "Error: response \"$action\" is not understood." + fi + done +fi + +# execute object rewrite on all of the head objects +radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list 2>/dev/null +reindex_done=$(date +%s) + +# note: large is 2^30 +export large=1073741824 + +listcmd="radosgw-admin bucket list --bucket=$bucket --allow-unordered --max-entries=$large" + +if [ -n "$proceed" ] ;then + sleep $pending_op_secs + $listcmd >/dev/null 2>/dev/null +else + echo "NOTICE: Bucket stats are currently incorrect. They can be restored with the following command after 2 minutes:" + echo " $listcmd" + + while true ; do + read -p "Would you like to take the time to recalculate bucket stats now? [yes/no] " action + if [ "$action" == "no" ] ;then + break + elif [ "$action" == "yes" ] ;then + # make sure at least $pending_op_secs since reindex completed + now=$(date +%s) + sleep_time=$(expr $pending_op_secs - $now + $reindex_done) + if [ "$sleep_time" -gt 0 ] ;then + sleep $sleep_time + fi + + $listcmd >/dev/null 2>/dev/null + break + else + echo "Error: response \"$action\" is not understood." + fi + done +fi + +clean +echo Done |