summaryrefslogtreecommitdiffstats
path: root/src/rgw/rgw-orphan-list
blob: 989b50600fd8fba28e010c169909eaec4b655935 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env bash

# version 2021-02-04

# IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted'
# relies on this ordering
export LC_ALL=C

out_dir="."
temp_file=/tmp/temp.$$
timestamp=$(date -u +%Y%m%d%H%M%S)
lspools_err="${out_dir}/lspools-${timestamp}.error"
rados_out="${out_dir}/rados-${timestamp}.intermediate"
rados_odd="${out_dir}/rados-${timestamp}.issues"
rados_err="${out_dir}/rados-${timestamp}.error"
rgwadmin_out="${out_dir}/radosgw-admin-${timestamp}.intermediate"
rgwadmin_err="${out_dir}/radosgw-admin-${timestamp}.error"
delta_out="${out_dir}/orphan-list-${timestamp}.out"

error_out() {
    echo "An error was encountered while running '$1'. Aborting."
    if [ $# -gt 2 ] ;then
	echo "Error: $3"
    fi
    if [ $# -gt 1 ] ;then
	echo "Review file '$2' for details."
    fi
    echo "***"
    echo "*** WARNING: The results are incomplete. Do not use! ***"
    echo "***"
    exit 1
}

prompt_pool() {
    # note: all prompts go to stderr so stdout contains just the result
    >&2 echo "Available pools:"
    rados lspools >"$temp_file" 2>"$lspools_err"
    if [ "$?" -ne 0 ] ;then
	error_out "rados lspools" "$lspools_err"
    fi
    >&2 sed 's/^/    /' "$temp_file" # list pools and indent
    >&2 printf "Which pool do you want to search for orphans (for multiple, use space-separated list)? "
    local mypool
    read mypool
    echo $mypool
}

if [ $# -eq 0 ] ;then
    pool="$(prompt_pool)"
else
    pool="$*"
fi

echo "Pool is \"$pool\"."

echo "Note: output files produced will be tagged with the current timestamp -- ${timestamp}."

echo "running 'rados ls' at $(date)"
# since --format is not specified, plain should be used

rm -f "$rados_out" &> /dev/null
for mypool in $pool ; do
    echo "running 'rados ls' on pool ${mypool}."
    rados ls --pool="$mypool" --all >>"$rados_out" 2>"$rados_err"
    if [ "$?" -ne 0 ] ;then
	error_out "rados ls" "$rados_err"
    fi
done

# NOTE: Each entry (line of output) of `rados ls --all` should be in
# one of four formats depending on whether or not an entry has a
# namespace and/or locator:
#
#   <TAB>oid
#   <TAB>oid<TAB>locator
#   namespace<TAB>oid
#   namespace<TAB>oid<TAB>locator
#
# Any occurrences of the 2nd, 3rd, or 4th (i.e., existence of
# namespace and/or locator) should cause the create of the "odd" file
# and an explanation in the output, and those entries will not be
# retained, and therefore they will not be called out as orphans. They
# will need special handling by the end-user as we do not expect
# namespaces or locators.

# check for namespaces -- any line that does not begin with a tab
# indicates a namespace; add those to "odd" file and set flag; note:
# this also picks up entries with namespace and locator
grep --text $'^[^\t]' "$rados_out" >"$rados_odd"
if [ "${PIPESTATUS[0]}" -eq 0 ] ;then
    namespace_found=1
fi

# check for locators (w/o namespace); we idenitfy them by skipping
# past the empty namespace (i.e., one TAB), skipping past the oid,
# then looking for a TAB; note we use egrep to get the '+' character
# and the $ in front of the ' allows the \t to be interpreted as a TAB
egrep --text $'^\t[[:graph:]]+\t' "$rados_out" >>"$rados_odd"
if [ "${PIPESTATUS[0]}" -eq 0 ] ;then
    locator_found=1
fi

# extract the entries that are just oids (i.e., no namespace or
# locator) for further processing; only look at lines that begin with
# a TAB and do not contain a second TAB, and then grab everything
# after the initial TAB
grep --text $'^\t' "$rados_out" | grep --text -v $'^\t.*\t' | sed -E 's/^\t//' >"$temp_file"
mv -f "$temp_file" "$rados_out"

sort -u "$rados_out" >"$temp_file"
mv -f "$temp_file" "$rados_out"

echo "running 'radosgw-admin bucket radoslist' at $(date)"
radosgw-admin bucket radoslist >"$rgwadmin_out" 2>"$rgwadmin_err"
if [ "$?" -ne 0 ] ;then
    error_out "radosgw-admin radoslist" "$rgwadmin_err"
fi
sort -u "$rgwadmin_out" >"$temp_file"
mv -f "$temp_file" "$rgwadmin_out"

echo "computing delta at $(date)"
ceph-diff-sorted "$rados_out" "$rgwadmin_out" | grep --text "^<" | sed 's/^< *//' >"$delta_out"
# use PIPESTATUS to get at exit status of first process in above pipe;
# 0 means same, 1 means different, >1 means error
if [ "${PIPESTATUS[0]}" -gt 1 ] ;then
    error_out "ceph-diff-sorted"
fi

found=$(wc -l < "$delta_out")
possible=$(wc -l < "$rados_out")
percentage=0
if [ $possible -ne 0 ] ;then
    percentage=$(expr 100 \* $found / $possible)
fi

echo "$found potential orphans found out of a possible $possible (${percentage}%)."
echo "The results can be found in '${delta_out}'."
echo "    Intermediate files are '${rados_out}' and '${rgwadmin_out}'."
if [ -n "$namespace_found" -o -n "$locator_found" ] ;then
    echo "    Note: 'rados ls' found entries that might be in a namespace or might"
    echo "          have a locator; see '${rados_odd}' for those entries."
fi
echo "***"
echo "*** WARNING: This is EXPERIMENTAL code and the results should be used"
echo "***          only with CAUTION!"
echo "***"
echo "Done at $(date)."