1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
|
#!/usr/bin/env bash
# version 2023-03-21
# rgw-restore-bucket-index is an EXPERIMENTAL tool to use in case
# bucket index entries for objects in the bucket are somehow lost. It
# is expected to be needed and used rarely. A bucket name is provided
# and the data pool for that bucket is scanned for all head objects
# matching the bucket's marker. The rgw object name is then extracted
# from the rados object name, and `radosgw-admin bucket reindex ...`
# is used to add the bucket index entry.
#
# Because this script must process json objects, the `jq` tool must be
# installed on the system.
#
# Usage: $0 [--proceed] <bucket-name> [data-pool-name]
#
# This tool is designed to be interactive, allowing the user to
# examine the list of objects to be reindexed before
# proceeding. However, if the "--proceed" option is provided, the
# script will not prompt the user and simply proceed.
trap "clean ; exit 1" TERM
export TOP_PID=$$
# IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted'
# relies on this ordering
export LC_ALL=C
export bkt_entry=/tmp/rgwrbi-bkt-entry.$$
export bkt_inst=/tmp/rgwrbi-bkt-inst.$$
export bkt_inst_new=/tmp/rgwrbi-bkt-inst-new.$$
export obj_list=/tmp/rgwrbi-object-list.$$
export zone_info=/tmp/rgwrbi-zone-info.$$
export clean_temps=1
# number of seconds for a bucket index pending op to be completed via
# dir_suggest mechanism
pending_op_secs=120
#
if which radosgw-admin > /dev/null ;then
:
else
echo 'Error: must have command `radosgw-admin` installed and on $PATH for operation.'
exit 1
fi
# make sure jq is available
if which jq > /dev/null ;then
:
else
echo 'Error: must have command `jq` installed and on $PATH for json parsing.'
exit 1
fi
clean() {
if [ -n "$clean_temps" ] ;then
rm -f $bkt_entry $bkt_inst $bkt_inst_new $obj_list $zone_info
fi
}
super_exit() {
kill -s TERM $TOP_PID
}
usage() {
>&2 cat << EOF
Usage: $0 [--proceed] <bucket-name> [data-pool-name]
NOTE: This tool is currently considered EXPERIMENTAL.
NOTE: If a data-pool-name is not supplied then it will be inferred from bucket and zone information.
NOTE: If --proceed is provided then user will not be prompted to proceed. Use with caution.
EOF
super_exit
}
# strips the starting and ending double quotes from a string, so:
# "dog" -> dog
# "dog -> "dog
# d"o"g -> d"o"g
# "do"g" -> do"g
strip_quotes() {
echo "$1" | sed 's/^"\(.*\)"$/\1/'
}
# Determines the name of the data pool. Expects the optional
# command-line argument to appear as $1 if there is one. The
# command-line has the highest priority, then the "explicit_placement"
# in the bucket instance data, and finally the "placement_rule" in the
# bucket instance data.
get_pool() {
# command-line
if [ -n "$1" ] ;then
echo "$1"
exit 0
fi
# explicit_placement
expl_pool=$(strip_quotes $(jq '.data.bucket_info.bucket.explicit_placement.data_pool' $bkt_inst))
if [ -n "$expl_pool" ] ;then
echo "$expl_pool"
exit 0
fi
# placement_rule
plmt_rule=$(strip_quotes $(jq '.data.bucket_info.placement_rule' $bkt_inst))
plmt_pool=$(echo "$plmt_rule" | awk -F / '{print $1}')
plmt_class=$(echo "$plmt_rule" | awk -F / '{print $2}')
if [ -z "$plmt_class" ] ;then
plmt_class=STANDARD
fi
radosgw-admin zone get >$zone_info 2>/dev/null
pool=$(strip_quotes $(jq ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info))
if [ -z "$pool" ] ;then
echo ERROR: unable to determine pool.
super_exit
fi
echo "$pool"
}
if [ $1 == "--proceed" ] ;then
echo "NOTICE: This tool is currently considered EXPERIMENTAL."
proceed=1
shift
fi
# expect 1 or 2 arguments
if [ $# -eq 0 -o $# -gt 2 ] ;then
usage
fi
bucket=$1
# read bucket entry metadata
radosgw-admin metadata get bucket:$bucket >$bkt_entry 2>/dev/null
marker=$(strip_quotes $(jq ".data.bucket.marker" $bkt_entry))
bucket_id=$(strip_quotes $(jq ".data.bucket.bucket_id" $bkt_entry))
if [ -z "$marker" -o -z "$bucket_id" ] ;then
echo "ERROR: unable to read entry-point metadata for bucket \"$bucket\"."
clean
exit 1
fi
echo marker is $marker
echo bucket_id is $bucket_id
# read bucket instance metadata
radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id >$bkt_inst 2>/dev/null
# handle versioned buckets
bkt_flags=$(jq ".data.bucket_info.flags" $bkt_inst)
if [ -z "$bkt_flags" ] ;then
echo "ERROR: unable to read instance metadata for bucket \"$bucket\"."
exit 1
fi
# mask bit indicating it's a versioned bucket
is_versioned=$(( $bkt_flags & 2))
if [ "$is_versioned" -ne 0 ] ;then
echo "Error: this bucket appears to be versioned, and this tool cannot work with versioned buckets."
clean
exit 1
fi
# examine number of bucket index shards
num_shards=$(jq ".data.bucket_info.num_shards" $bkt_inst)
echo number of bucket index shards is $num_shards
# determine data pool
pool=$(get_pool $2)
echo data pool is $pool
# search the data pool for all of the head objects that begin with the
# marker that are not in namespaces (indicated by an extra underscore)
# and then strip away all but the rgw object name
( rados -p $pool ls | grep "^${marker}_[^_]" | sed "s/^${marker}_\(.*\)/\1/" >$obj_list ) 2>/dev/null
# handle the case where the resulting object list file is empty
if [ -s $obj_list ] ;then
:
else
echo "NOTICE: No head objects for bucket \"$bucket\" were found in pool \"$pool\", so nothing was recovered."
clean
exit 0
fi
if [ -z "$proceed" ] ;then
# warn user and get permission to proceed
echo "NOTICE: This tool is currently considered EXPERIMENTAL."
echo "The list of objects that we will attempt to restore can be found in \"$obj_list\"."
echo "Please review the object names in that file (either below or in another window/terminal) before proceeding."
while true ; do
read -p "Type \"proceed!\" to proceed, \"view\" to view object list, or \"q\" to quit: " action
if [ "$action" == "q" ] ;then
echo "Exiting..."
clean
exit 0
elif [ "$action" == "view" ] ;then
echo "Viewing..."
less $obj_list
elif [ "$action" == "proceed!" ] ;then
echo "Proceeding..."
break
else
echo "Error: response \"$action\" is not understood."
fi
done
fi
# execute object rewrite on all of the head objects
radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list 2>/dev/null
reindex_done=$(date +%s)
# note: large is 2^30
export large=1073741824
listcmd="radosgw-admin bucket list --bucket=$bucket --allow-unordered --max-entries=$large"
if [ -n "$proceed" ] ;then
sleep $pending_op_secs
$listcmd >/dev/null 2>/dev/null
else
echo "NOTICE: Bucket stats are currently incorrect. They can be restored with the following command after 2 minutes:"
echo " $listcmd"
while true ; do
read -p "Would you like to take the time to recalculate bucket stats now? [yes/no] " action
if [ "$action" == "no" ] ;then
break
elif [ "$action" == "yes" ] ;then
# make sure at least $pending_op_secs since reindex completed
now=$(date +%s)
sleep_time=$(expr $pending_op_secs - $now + $reindex_done)
if [ "$sleep_time" -gt 0 ] ;then
sleep $sleep_time
fi
$listcmd >/dev/null 2>/dev/null
break
else
echo "Error: response \"$action\" is not understood."
fi
done
fi
clean
echo Done
|