summaryrefslogtreecommitdiffstats
path: root/src/tools/rbd_recover_tool/rbd-recover-tool
diff options
context:
space:
mode:
Diffstat (limited to 'src/tools/rbd_recover_tool/rbd-recover-tool')
-rwxr-xr-xsrc/tools/rbd_recover_tool/rbd-recover-tool327
1 files changed, 327 insertions, 0 deletions
diff --git a/src/tools/rbd_recover_tool/rbd-recover-tool b/src/tools/rbd_recover_tool/rbd-recover-tool
new file mode 100755
index 00000000..b7a25865
--- /dev/null
+++ b/src/tools/rbd_recover_tool/rbd-recover-tool
@@ -0,0 +1,327 @@
+#!/usr/bin/env bash
+# file: rbd-recover-tool
+#
+# Copyright (C) 2015 Ubuntu Kylin
+#
+# Author: Min Chen <minchen@ubuntukylin.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+# rbd-recover-tool is an offline recover tool for rbd image in replicated pool
+# when ceph cluster is stopped.
+# it is a simple disater recovery policy, just for urgent condition
+
+my_dir=$(dirname "$0")
+
+. $my_dir/common_h
+. $my_dir/metadata_h
+. $my_dir/epoch_h
+. $my_dir/database_h
+
+#scp files from admin node to osd node
+file1=common_h
+file2=metadata_h
+file3=epoch_h
+file4=osd_job
+
+#------------ admin node's action -------------
+
+function scp_file()
+{
+ local func="scp_file"
+ file=$1
+ if [ "$1"x = ""x ];then
+ echo "$func: not file input"
+ exit
+ fi
+ for host in `cat $osd_host`
+ do
+ {
+ echo "$func: $host"
+ scp $ssh_option $file $host:$job_path 1>/dev/null
+ } &
+ done
+}
+
+function scp_files()
+{
+ local func="scp_files"
+ for host in `cat $osd_host`
+ do
+ {
+ echo "$func: $host"
+ scp $ssh_option $file1 $host:$job_path
+ scp $ssh_option $file2 $host:$job_path
+ scp $ssh_option $file3 $host:$job_path
+ scp $ssh_option $file4 $host:$job_path
+ } &
+ done
+ wait
+ echo "$func: finish"
+}
+
+function scatter_node_jobs()
+{
+ local func="scatter_node_jobs"
+ local host=
+ local data_path=
+ echo "$func: flush osd journal & generate infos: omap, pg, image metadata ..."
+
+ trap 'echo $func failed; exit' INT HUP
+ while read line
+ do
+ {
+ host=`echo $line|awk '{print $1}'`
+ data_path=`echo $line|awk '{print $2}'`
+ check_osd_process $host
+
+ cmd="mkdir -p $job_path"
+ ssh $ssh_option $host $cmd
+ scp $ssh_option $file1 $host:$job_path >/dev/null
+ scp $ssh_option $file2 $host:$job_path >/dev/null
+ scp $ssh_option $file3 $host:$job_path >/dev/null
+ scp $ssh_option $file4 $host:$job_path >/dev/null
+
+ cmd="bash $job_path/osd_job flush_osd_journal $data_path;"
+ cmd="$cmd $job_path/osd_job do_omap_list $data_path;"
+ cmd="$cmd bash $job_path/osd_job do_pg_epoch $data_path;"
+ cmd="$cmd bash $job_path/osd_job do_image_list $data_path;"
+
+ ssh $ssh_option $host $cmd </dev/null
+ } &
+ done < $osd_host_path
+ wait
+ echo "$func: finish"
+}
+
+function gather_node_infos()
+{
+ local func="gather_node_infos"
+ echo "$func ..."
+ >$pg_coll
+ >$image_coll_v1
+ >$image_coll_v2
+ trap 'echo $func failed; exit' INT HUP
+ while read line
+ do
+ {
+ host=`echo $line|awk '{print $1}'`
+ data_path=`echo $line|awk '{print $2}'`
+ echo "$func: $host"
+ check_osd_process $host
+
+ #pg epoch
+ cmd1="bash $job_path/osd_job cat_pg_epoch $data_path"
+ ssh $ssh_option $host $cmd1 >> $pg_coll
+ #image v1
+ cmd2="bash $job_path/osd_job cat_image_v1 $data_path"
+ ssh $ssh_option $host $cmd2 >> $image_coll_v1
+ #image v2
+ cmd3="bash $job_path/osd_job cat_image_v2 $data_path"
+ ssh $ssh_option $host $cmd3 >> $image_coll_v2
+ } &
+ done < $osd_host_path
+ wait
+ echo "$func: finish"
+}
+
+function scatter_gather()
+{
+ local func="scatter_gather"
+ if [ ! -s $osd_host ];then
+ echo "$func: no osd_host input"
+ exit
+ fi
+ if [ ! -s $mon_host ];then
+ echo "$func: no mon_host input"
+ exit
+ fi
+ scatter_node_jobs
+ gather_node_infos
+}
+
+
+#------------- operations --------------
+
+function database()
+{
+ scatter_gather
+ gen_database
+}
+
+function list()
+{
+ list_images
+}
+
+function lookup()
+{
+ lookup_image $1 $2 $3
+}
+
+function recover()
+{
+ recover_image $1 $2 $3 $4
+}
+
+#------------- helper -------------
+
+function usage()
+{
+ local cmd_name="rbd-recover-tool"
+ echo
+ echo "$cmd_name is used to recover rbd image of replicated pool,
+ when all ceph services are stopped"
+ echo "Usage:"
+ echo "$cmd_name database
+ gather pg info, object info, image metadata,
+ and epoch info from all osd nodes,
+ this will cosume a long time, just be patient,
+ especially when scale up to 1000+ osds"
+ echo "$cmd_name list
+ list all rbd images of all replicated pools,
+ before to lookup & recover"
+ echo "$cmd_name lookup <pool_id>/<image_name>[@[<snap_name>]]
+ show image metadata: image format, rbd id, size, order, snapseq
+ In addition, for image with snapshots,
+ this will list all snapshot infomations"
+ echo "$cmd_name recover <pool_id>/<image_name>[@[<snap_name>]] [</path/to/store/image>]
+ all snapshots share one image head, to economize disk space
+ so there is only one snapshot at any time,
+ image is saved at </path/to/store/image>/pool_<pool_id>/image_name/image_name
+ cat <path/to/store/image>/pool_<pool_id>/image_name/@CURRENT,
+ will show snapid
+ recover to raw image/nosnap/head: <image_name>
+ rollback to image head: <image_name>@
+ rollback to image snap: <image_name>@<snap_name>
+ recover steps:
+ 1. recover image nosnap (only one time)
+ 2. rollback to image snap"
+}
+
+function get_path()
+{
+ local func="get_path"
+ if [ $# -lt 1 ];then
+ return
+ fi
+ if [[ $1 =~ // ]];then
+ return # "/path//to" is invalid
+ fi
+ local parent=`dirname $1`
+ local name=`basename $1`
+ if [ "$parent"x = "/"x ];then
+ echo "$parent$name"
+ else
+ echo -n "$parent/$name"
+ fi
+}
+
+function admin_cmd()
+{
+ local func="admin_cmd"
+ if [ $# -lt 1 ];then
+ usage
+ exit
+ fi
+ if [ "$1"x = "-h"x ] || [ "$1"x = "--help"x ];then
+ usage
+ exit
+ fi
+
+ if [ "$1"x = "database"x ];then
+ if [ $# -gt 1 ];then
+ usage
+ exit
+ fi
+ # remove osd_host to refresh osd_host and osd_host_mapping
+ rm -f $osd_host
+ init_env_admin
+ database
+ elif [ "$1"x = "list"x ];then
+ if [ $# -gt 1 ];then
+ usage
+ exit
+ fi
+ init_env_admin
+ list
+ elif [ "$1"x = "lookup"x ];then
+ if [ $# -gt 2 ];then
+ usage
+ exit
+ fi
+ local pool_id=-1
+ local image_name=
+ local snap_name=
+ if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ snap_name="${BASH_REMATCH[3]}"
+ else
+ echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
+ exit
+ fi
+ init_env_admin
+ lookup $pool_id $image_name $snap_name
+ elif [ "$1"x = "recover"x ];then
+ if [ $# -lt 2 ] || [ $# -gt 3 ];then
+ usage
+ exit
+ fi
+ local pool_id=-1
+ local image_name=
+ local snap_name=@
+ local image_dir=
+ if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then
+ pool_id="${BASH_REMATCH[1]}"
+ image_name="${BASH_REMATCH[2]}"
+ snap_name="${BASH_REMATCH[3]}"
+ if [ "$snap_name"x = ""x ];then
+ snap_name=@@
+ fi
+ else
+ echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]"
+ exit
+ fi
+ if [ $# = 3 ];then
+ image_dir=`get_path $3`
+ if [ "image_dir"x = ""x ];then
+ echo "$3 invalid"
+ exit
+ fi
+ fi
+ init_env_admin
+ recover $pool_id $image_name $snap_name $image_dir
+ elif [ "$1"x = "scp_files"x ];then
+ if [ $# -gt 1 ];then
+ exit
+ fi
+ admin_parse_osd
+ scp_files
+ elif [ "$1"x = "scp_file"x ];then
+ if [ $# -gt 2 ];then
+ exit
+ fi
+ admin_parse_osd
+ scp_file $2
+ else
+ echo "$func: $1: command not found"
+ fi
+}
+
+admin_cmd $*