diff options
Diffstat (limited to 'src/tools/rbd_recover_tool')
-rw-r--r-- | src/tools/rbd_recover_tool/FAQ | 16 | ||||
-rw-r--r-- | src/tools/rbd_recover_tool/README | 97 | ||||
-rw-r--r-- | src/tools/rbd_recover_tool/TODO | 2 | ||||
-rw-r--r-- | src/tools/rbd_recover_tool/common_h | 412 | ||||
-rw-r--r-- | src/tools/rbd_recover_tool/config/mds_host | 0 | ||||
-rw-r--r-- | src/tools/rbd_recover_tool/config/mon_host | 0 | ||||
-rw-r--r-- | src/tools/rbd_recover_tool/config/osd_host_path | 0 | ||||
-rw-r--r-- | src/tools/rbd_recover_tool/database_h | 1134 | ||||
-rw-r--r-- | src/tools/rbd_recover_tool/epoch_h | 119 | ||||
-rw-r--r-- | src/tools/rbd_recover_tool/metadata_h | 368 | ||||
-rwxr-xr-x | src/tools/rbd_recover_tool/osd_job | 170 | ||||
-rwxr-xr-x | src/tools/rbd_recover_tool/rbd-recover-tool | 327 | ||||
-rwxr-xr-x | src/tools/rbd_recover_tool/test_rbd_recover_tool.sh | 542 |
13 files changed, 3187 insertions, 0 deletions
diff --git a/src/tools/rbd_recover_tool/FAQ b/src/tools/rbd_recover_tool/FAQ new file mode 100644 index 00000000..1655e853 --- /dev/null +++ b/src/tools/rbd_recover_tool/FAQ @@ -0,0 +1,16 @@ +# author: min chen(minchen@ubuntukylin.com) 2014 2015 + +1. error "get_image_metadata_v2: no meta_header_seq input" +cause: + database is old, refresh database +solution: + ./rbd-recover-tool database + +2. Error initializing leveldb: IO error: lock /var/lib/ceph/osd/ceph-0/current/omap/LOCK: Resource temporarily unavailable + ERROR: error flushing journal /var/lib/ceph/osd/ceph-0/journal for object store /var/lib/ceph/osd/ceph-0: (1) Operation not permitted +cause: + when ./rbd-recover-tool database is interrupted , but command has been sent to each osd node, and there is a process reading leveldb and it is LOCKED + if run ./rbd-recover-tool database again, all command are sent to osd nodes again, while previous process is locking leveldb, so all new command + are failed. +solution: + wait until all previous command finished. diff --git a/src/tools/rbd_recover_tool/README b/src/tools/rbd_recover_tool/README new file mode 100644 index 00000000..d289c11c --- /dev/null +++ b/src/tools/rbd_recover_tool/README @@ -0,0 +1,97 @@ +# author: Min chen(minchen@ubuntukylin.com) 2014 2015 + +------------- ceph rbd recover tool ------------- + + ceph rbd recover tool is used for recovering ceph rbd image, when all ceph services are killed. +it is based on ceph-0.80.x (Firefly and newer) + currently, ceph service(ceph-mon, ceph-osd) evently are not available caused by bugs or sth else +, especially on large scale ceph cluster, so that the ceph cluster can not supply service +and rbd images can not be accessed. In this case, a tool to recover rbd image is necessary. + ceph rbd recover tool is just used for this, it can collect all objects of an image from distributed +osd nodes with the latest pg epoch, and splice objects by offset to a complete image. To make sure +object data is complete, this tool does flush osd journal on each osd node before recovering. + but, there are some limitions: +-need ssh service and unobstructed network +-osd data must be accessed on local disk +-clone image is not supported, while snapshot is supported +-only support relicated pool + +before you run this tool, you should make sure that: +1). all processes (ceph-osd, ceph-mon, ceph-mds) are shutdown +2). ssh daemon is running & network is ok (ssh to each node without password) +3). ceph-kvstore-tool is installed(for ubuntu: apt-get install ceph-test) +4). osd disk is not crashed and data can be accessed on local filesystem + +-architecture: + + +---- osd.0 + | +admin_node -----------+---- osd.1 + | + +---- osd.2 + | + ...... + +-files: +admin_node: {rbd-recover-tool common_h epoch_h metadata_h database_h} +osd: {osd_job common_h epoch_h metadata_h} #/var/rbd_tool/osd_job +in this architecture, admin_node acts as client, osds act as server. +so, they run different files: +on admin_node run: rbd-recover-tool <action> [<parameters>] +on osd node run: ./osd_job <function> <parameters> +admin_node will copy files: osd_job, common_h, epoch_h, metadata_h to remote osd node + + +-config file +before you run this tool, make sure write config files first +osd_host_path: osd hostnames and osd data path #user input + osdhost0 /var/lib/ceph/osd/ceph-0 + osdhost1 /var/lib/ceph/osd/ceph-1 + ...... +mon_host: all mon node hostname #user input + monhost0 + monhost1 + ...... +mds_host: all mds node hostname #user input + mdshost0 + mdshost1 + ...... +then, init_env_admin function will create file: osd_host +osd_host: all osd node hostname #generated by admin_job, user ignore it + osdhost0 + osdhost1 + ...... + + +-usage: +rbd-recovert-tool <operation> +<operation> : +database #generating offline database: hobject path, node hostname, pg_epoch and image metadata +list #list all images from offline database +lookup <pool_id>/<image_name>[@[<snap_name>]] #lookup image metadata in offline database +recover <pool_id><image_name>[@[<snap_name>]] [/path/to/store/image] #recover image data according to image metadata + +-steps: +1. stop all ceph services: ceph-mon, ceph-osd, ceph-mds +2. setup config files: osd_host_path, mon_host, mds_host +3. rbd-recover-tool database # wait a long time +4. rbd-recover-tool list +4. rbd-recover-tool recover <pool_id>/<image_name>[@[<image_name>]] [/path/to/store/image] + + +-debug & error check +if admin_node operation is failed, you can check it on osd node +cd /var/rbd_tool/osd_job +./osd_job <operation> +<operation> : +do_image_id <image_id_hobject> #get image id of image format v2 +do_image_id <image_header_hobject> #get image id of image format v1 +do_image_metadata_v1 <image_header_hobject> #get image metadata of image format v1, maybe pg epoch is not latest +do_image_metadata_v2 <image_header_hobject> #get image metadata of image format v2, maybe pg epoch is not latest +do_image_list #get all images on this osd(image head hobject) +do_pg_epoch #get all pg epoch and store it in /var/rbd_tool/single_node/node_pg_epoch +do_omap_list #list all omap headers and omap entries on this osd + + +-FAQ +file FAQ lists some common confusing cases while testing diff --git a/src/tools/rbd_recover_tool/TODO b/src/tools/rbd_recover_tool/TODO new file mode 100644 index 00000000..c36d4c94 --- /dev/null +++ b/src/tools/rbd_recover_tool/TODO @@ -0,0 +1,2 @@ + +1.support clone imag diff --git a/src/tools/rbd_recover_tool/common_h b/src/tools/rbd_recover_tool/common_h new file mode 100644 index 00000000..f2df662a --- /dev/null +++ b/src/tools/rbd_recover_tool/common_h @@ -0,0 +1,412 @@ +#!/usr/bin/env bash +# file: common_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") + +# admin node init path +rbd_image=/var/rbd_tool/rbd_image +database=$rbd_image/database +image_coll_v1=$rbd_image/image_coll_v1 +image_coll_v2=$rbd_image/image_coll_v2 +pg_coll=$rbd_image/pg_coll +images=$rbd_image/images +images_meta=$rbd_image/images_meta +default_backup_dir=/var/rbd_tool/default_backup_dir + +# admin node: image snap & nosnap +nosnap= #$rbd_image/<image_name>/nosnap +snap= #rbd_image/<image_name>/<snap_name> + +# osd node init path +job_path=/var/rbd_tool/osd_job +single_node=/var/rbd_tool/single_node + +# osd node vars +osd_env= #single_node/$cluster$id/osd_env +osd_data= #/var/lib/ceph/osd/$cluster-$id +omap_path= #$osd_data/current/omap +image_list_v1= #single_node/$cluster-$id/image_list_v1 +image_list_v2= #single_node/$cluster-$id/image_list_v2 +image_v1= #$single_node/$cluster-$id/image_v1 +image_v2= #$single_node/$cluster-$id/image_v2 +pgid_list= #$single_node/$cluster-$id/pgid_list +node_pg_epoch= #$single_node/$cluster-$id/node_pg_epoch +omap_list= #$single_node/$cluster-$id/omap_list + +# admin node config file +osd_host_path=$my_dir/config/osd_host_path +osd_host_mapping= #$pwd_path/config/osd_host_mapping # host --> host_remote: by init_env_admin() +osd_host=$my_dir/config/osd_host #generated by function init_env_admin() +mon_host=$my_dir/config/mon_host +mds_host=$my_dir/config/mds_host + +# ssh option +ssh_option="-o ConnectTimeout=1" + +# gen md5sum +function gen_md5() +{ + echo $1|md5sum|awk '{print $1}' +} + +# on each osd node +# check ceph environment: ssh, ceph-kvstore-tool, osd_data_path +function check_ceph_env() +{ + local func="check_ceph_env" + if [ $# -lt 2 ];then + echo "$func: parameters: <node> <data_path>" + exit + fi + local node=$1 + local data_path=$2 + local res= + local cmd= + + trap 'echo [$node]: ssh failed; exit' INT HUP + ssh -o ConnectTimeout=1 $node "echo -n" </dev/null + res=$? + if [ $res -ne 0 ];then + echo "[$node]: ssh failed" + exit + fi + + cmd=ceph-kvstore-tool + trap 'echo [$node]: $cmd failed; exit' INT HUP + ssh -o ConnectTimeout=1 $node "$cmd &>/dev/null;" </dev/null + res=$? + # ceph-kvstore-tool will return 1 with no parameters input + if [ $res -ne 1 ];then + echo "[$node]: $cmd not installed" + exit + fi + + trap 'echo [$node]: stat $data_path failed; exit' INT HUP + ssh -o ConnectTimeout=1 $node "stat $data_path &>/dev/null;" </dev/null + res=$? + if [ $res -ne 0 ];then + echo "[$node]: $data_path not exists" + exit + fi +} + +# osd node context : osd_data_path +function init_env_osd() +{ + local func="init_env_osd" + if [ "$1"x = ""x ];then + echo "$func: no osd_data_path input" + exit + fi + osd_data=$1 + omap_path=$osd_data/current/omap + + if [ ! -e $single_node ];then + mkdir -p $single_node + fi + + local osd_id=`gen_md5 $osd_data` + local osd_dir=$single_node/$osd_id + + if [ ! -e $osd_dir ];then + mkdir -p $osd_dir + fi + + image_list_v1=$osd_dir/image_list_v1 + image_list_v2=$osd_dir/image_list_v2 + image_v1=$osd_dir/image_v1 + image_v2=$osd_dir/image_v2 + pgid_list=$osd_dir/pgid_list + node_pg_epoch=$osd_dir/node_pg_epoch + omap_list=$osd_dir/omap_list +} + +# admin node process file: osd_host_path +function init_env_admin() +{ + local func="init_env_admin" + local pwd_path=`pwd` + osd_host_mapping=$pwd_path/config/osd_host_mapping + if [ ! -s $osd_host_path ];then + echo "$func: config/osd_host_path not exists or empty" + exit + fi + if [ ! -e $rbd_image ];then + mkdir -p $rbd_image + fi + if [ ! -e $images ];then + mkdir -p $images + fi + + if [ ! -s $mon_host ];then + echo "$func: config/mon_host not exists or empty" + exit + fi + if [ ! -e $mds_host ];then + echo "$func: config/mds_host not exists" + exit + fi + + # we just judge if osd_host is needed to be updated + if [ -s $osd_host ] && [ $osd_host -nt $osd_host_path ];then + return + fi + echo "$func: create osd_host ..." + # create file: osd_host and osd_host_mapping + >$osd_host + >$osd_host_mapping + local lines=0 + local lineno=0 + while read line + do + lineno=$(($lineno + 1)) + if [ "$line"x = ""x ];then + continue; + fi + local node=`echo $line|awk '{print $1}'` + if [ "$node"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd hostname not input" + rm -rf $osd_host $osd_host_mapping + exit + fi + local data_path=`echo $line|awk '{print $2}'` + if [ "$data_path"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd data_path not input" + rm -rf $osd_host $osd_host_mapping + exit + fi + lines=$(($lines + 1)) + # in case : there are servral hostnames on the same node + # just need output of `hostname` + local hostname_alias= + hostname_alias=`ssh $ssh_option $node "hostname" 2>/dev/null </dev/null` + if [ "$hostname_alias"x = ""x ];then + echo "$func: osd_host_path: line $lineno: $node: get remote hostname alias failed" + rm -rf $osd_host $osd_host_mapping + exit + fi + echo "$node $hostname_alias" >>$osd_host_mapping + echo $node >> $osd_host + # check ceph env on remote osd + check_ceph_env $node $data_path + done < $osd_host_path + + if [ $lines = 0 ];then + echo "$func: no osd host path valid" + exit + fi +} + +function admin_parse_osd() +{ + local func="admin_parse_osd" + if [ -s $osd_host ];then + return + fi + # create file: osd_host + >$osd_host + local lines=0 + local lineno=0 + while read line + do + lineno=$(($lineno + 1)) + if [ "$line"x = ""x ];then + continue; + fi + local node=`echo $line|awk '{print $1}'` + if [ "$node"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd_host not input" + exit + fi + local data_path=`echo $line|awk '{print $2}'` + if [ "$data_path"x = ""x ];then + echo "$func: osd_host_path : line $lineno: osd_data not input" + exit + fi + lines=$(($lines + 1)) + echo $node >> $osd_host + done < $osd_host_path +} + +# for osd node +function get_omap_list() +{ + ceph-kvstore-tool $omap_path list > $omap_list +} + +function convert_underline() +{ + if [ "$1"x = ""x ];then + return + fi + + echo $1|sed -e 's/_/\\u/gp'|head -n 1 +} + +function dump_backslash() +{ + echo $*|sed -e 's/\\/\\\\/gp'|head -n 1 +} + +function dump_dump_backslash() +{ + echo $*|sed -e 's/\\/\\\\\\\\/gp'|head -n 1 +} + +function char_convert() +{ + if [ "$1"x = ""x ];then + return + fi + + echo $1|sed -e 's/_/\\u/gp' -e 's/\./%e/gp' -e 's/%/%p/gp'|head -n 1 +} + +function check_osd_process() +{ + local func="check_osd_process" + local host=$1 + if [ "$1"x = ""x ];then + exit + fi + local cmds="ps aux|grep ceph-osd|grep -v grep" + local ret=/tmp/ret.$$$$ + ssh $ssh_option $host $cmds |tee $ret + if [ -s $ret ];then + echo "$func: [$host] ceph-osd process is not killed" + exit + fi + rm -f $ret +} + +function get_map_header_prefix() +{ + echo "_HOBJTOSEQ_" +} + +function get_map_header_key() +{ + local func="get_map_header_key" + if [ "$1"x = ""x ];then + #echo $func': no keyword input' + exit + fi + local keyword=$1 + local res=`cat $omap_list| grep $keyword` + if [ "$res"x = ""x ];then + #echo "$func: map_header_key = $keyword not exists" + exit + fi + echo $res|awk -F ":" '{print $2}' +} + +function get_header_seq() +{ + local func="get_header_seq" + if [ "$1"x == ""x ];then + #echo "$func: no prefix input" + exit; + elif [ "$2"x == ""x ];then + #echo "$func: no key input" + exit; + fi + local prefix=$1; + local key=$2; + local res=/tmp/header_seq.$$$$ + + ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res + if [ $? != 0 ]; then + #echo "$func: <$prefix , $key> not exists" ; + exit; + fi + + # ceph-kvstore-tool get result like this: + # 02 01 7e 00 00 00 12 44 00 00 00 00 00 00 00 00 + # get header seq bytes: + # 12 44 00 00 00 00 00 00 + # -> 00 00 00 00 00 00 44 12 + # echo $((16#0000000000004412)) -> 17426 == header_seq + local seq=`cat $res |head -n 2|tail -n 1| \ + awk ' + BEGIN { + FS=":" + seq=""; + i=7; + } { + split($2, arr, " ") + # header_seq uint64 : 8 bytes + for (x=7; x>=0; --x) { + seq=seq""arr[i+x]; + } + } + END { + print seq + }'` + if [ "$seq"x = ""x ];then + #echo "$func: get <$prefix , $key> failed" + exit; + fi + rm -f $res + echo $((16#$seq)) +} + +# get header info key/value +function get_header_kv() +{ + local func="get_header_kv" + if [ "$1"x = ""x ];then + #echo "$func: no prefix input" + exit + elif [ "$2"x = ""x ];then + #echo "$func: no key input" + exit + elif [ "$3"x != "string"x ] && [ "$3"x != "int"x ];then + #echo "$func: no valid type input, use type (string|int)" + exit + fi + + local prefix=$1 + local key=$2 + local types=$3 + local res=/tmp/kv.$$$$ + + ceph-kvstore-tool $omap_path get $prefix $key 2>/dev/null 1>$res + if [ $? != 0 ];then + #echo "$func: <$prefix , $key> not exists" + exit + fi + + if [ "$types"x = "string"x ];then + local value=`cat $res |tail -n +2|head -n -1|awk -F ": " '{printf $3}'|sed -n 's/^\.\{4\}//p'` + echo $value + elif [ "$types"x = "int"x ];then + local value=`cat $res |tail -n +2|head -n -1| \ + awk ' + BEGIN{ + FS=":" + } { + split($2, arr, " "); + len=length(arr) + for (i=len; i>0; --i) { + printf arr[i]; + } + }'` + echo $((16#$value)) + fi + rm -f $res +} diff --git a/src/tools/rbd_recover_tool/config/mds_host b/src/tools/rbd_recover_tool/config/mds_host new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/tools/rbd_recover_tool/config/mds_host diff --git a/src/tools/rbd_recover_tool/config/mon_host b/src/tools/rbd_recover_tool/config/mon_host new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/tools/rbd_recover_tool/config/mon_host diff --git a/src/tools/rbd_recover_tool/config/osd_host_path b/src/tools/rbd_recover_tool/config/osd_host_path new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/tools/rbd_recover_tool/config/osd_host_path diff --git a/src/tools/rbd_recover_tool/database_h b/src/tools/rbd_recover_tool/database_h new file mode 100644 index 00000000..4ff20425 --- /dev/null +++ b/src/tools/rbd_recover_tool/database_h @@ -0,0 +1,1134 @@ +#!/usr/bin/env bash +# file: database_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") + +. $my_dir/common_h +. $my_dir/metadata_h +. $my_dir/epoch_h + +db_image_prefix= +db_image_size= +db_order= +db_snap_id= +db_snap_image_size= +found=0 + +#init osd_data and get all objects path +function gen_database() +{ + local func="gen_database" + rm -rf $database/* + rm -rf $images + rm -rf $raw + mkdir -p $database + local host= + local data_path= + + trap 'echo $func failed; exit;' INT HUP + while read line + do + { + host=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + if [ "$host"x = ""x ] || [ "$data_path"x = ""x ];then + continue + fi + local cmds="find $data_path/current -type f" + ssh $ssh_option $host $cmds > $database/$host + } & + done < $osd_host_path + wait + echo "$func: finish" +} + +# collect hobjects from database +# and choose the object whose epoch is latest +# then, sort the objects by their offsets in image +function gather_hobject_common() +{ + func="gather_hobject_common" + + trap 'echo $func failed; exit;' INT HUP + if [ $# -lt 2 ];then + echo "$func: parameters: <pool_id> <image_prefix> [<snap_id>]" + exit + fi + + local pool_id=$1 + local image_prefix=$2 + pool_id=$(($pool_id)) + local hex_pool_id=`printf "%x" $pool_id` + # NOSNAP = uint64(-2) + local snap_id=`printf "%u" -2` + local hex_snap_id="head" + local psuffix= + local fsuffix="_head" + if [ $# = 3 ];then + snap_id=$(($3)) + hex_snap_id=`printf "%x" $snap_id` + psuffix="_"$snap_id + fsuffix="_"$snap_id + fi + local underline_image_prefix=`convert_underline $image_prefix` + local dump_image_prefix=`dump_backslash $underline_image_prefix` + local ddump_image_prefix=`dump_dump_backslash $underline_image_prefix` + local images_raw_dir=$rbd_image/raw + local image_hobjects_dir=$images/pool_$pool_id/$image_prefix + # $images/raw/$image_prefix"_head" + local image_hobjects_raw=$images_raw_dir/$image_prefix"$fsuffix" + # $images/$image_prefix/$image_prefix"_head" + local image_hobjects_stable=$image_hobjects_dir/$image_prefix"$fsuffix" + + if [ ! -e $images_raw_dir ];then + mkdir -p $images_raw_dir + fi + if [ ! -e $image_hobjects_dir ];then + local image_metadata=$images_meta/$image_name_in + mkdir -p $image_hobjects_dir + fi + + pushd $database >/dev/null + local pattern="\.[0-9a-f]+__"$hex_snap_id"_[0-9A-F]{8}__"$hex_pool_id + >$image_hobjects_raw + grep -r -E $dump_image_prefix""$pattern * >$image_hobjects_raw + if [ ! -s $image_hobjects_raw ];then + echo "$func: image snap [ $image_prefix"$psuffix" ] is empty" + return 1 #no data available + fi + popd >/dev/null + + local offset_dir_temp=$images_raw_dir/$image_prefix"$fsuffix""_dir_temp" + rm -rf $offset_dir_temp + mkdir -p $offset_dir_temp + + echo "gather hobjects from database: snapid=$snap_id ..." + + # format: ceph2:/var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2 + local tmp_image=$offset_dir_temp/tmpimage.$$$$ + >$tmp_image + cat $image_hobjects_raw | + awk -F ':' ' + BEGIN { + pg_coll="'$pg_coll'" + tmp_image="'$tmp_image'" + osd_host_mapping="'$osd_host_mapping'" + snapid="'$snap_id'" + }{ + # $2 = /var/lib/ceph/osd/ceph-1/current/2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2 + + split($2, arr1, "/current/"); # {/var/lib/ceph/osd/ceph-1/, 2.d3_head/rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2} + split(arr1[2], arr2, "/"); # {2.d3_head, rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2} + split(arr2[1], arr3, "_head"); # {2.d3,} + + hobject=$2; + data_path=arr1[1]; + gsub(/\\u/, "\\\\\\\\u", hobject); # dump backslash to delay escape (\ -> \\) + "awk \"\\$1 == \\\""$1"\\\" {print \\$2}\" "osd_host_mapping" | head -n 1" | getline node + pgid = arr3[1]; + + len=length(arr2); + offset_hobject=arr2[len] # rb.0.1293.6b8b4567.000000000002__head_FB425CD3__2 + split(offset_hobject, offarr1, "."); # {rb, 0, 1293, 6b8b4567, 000000000002__head_FB425CD3__2} + len1=length(offarr1) + offset_p=offarr1[len1] # 000000000002__head_FB425CD3__2 + split(offset_p, offarr2, "__"); # {000000000002, head_FB425CD3, 2} + offset=offarr2[1]; # 000000000002 + + system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \" >>"tmp_image); + #system("echo -n \""node" "pgid" "hobject" "offset" "snapid" \""); + #print node" "pgid" "hobject" "offset" "snapid + + # find pg_epoch from pg_coll database + system("awk \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll" >>"tmp_image); + #system("awk \"\\$1 == \\\""node"\\\" && \\$2 == \\\""pgid"\\\" && \\$4 == \\\""data_path"\\\" {print \\$3}\" "pg_coll); + }' + + local sort_image=$offset_dir_temp/sortimage.$$$$ + >$sort_image + sort -t ' ' -k 4.1,4 -k 6.1nr -k 1.1,1 $tmp_image >$sort_image + sort -t ' ' -k 4.1,4 -u $sort_image > $image_hobjects_stable + + #rm -rf $offset_dir_temp + return 0 +} + +function gather_hobject_nosnap() +{ + gather_hobject_common $1 $2 +} + +function gather_hobject_snap() +{ + gather_hobject_common $1 $2 $3 +} + +# select the max pg_epoch item of the same $field +# if no same $field, choose the first +# format : "node $field pg_epoch" +function choose_epoch() +{ + cat $1|sort -t ' ' -k 3.1,3nr -k 2.1,2n |head -n 1; +} + +# lookup image info , after scatter_node_jobs & gather_node_infos +function lookup_image() +{ + local func="lookup_image" + if [ $# -lt 2 ];then + echo "$func: parameters error <pool_id> <image_name> [<snap_name>]" + fi + local pool_id=$1 + local image_name=$2 + local snap_name=$3 + pool_id=$((pool_id)) + echo -e "$func: pool_id = $pool_id\timage_name = $image_name\tsnap_name = $snap_name" + if [ $pool_id -lt 0 ];then + echo "$func: pool_id must great than zero" + exit + fi + local hex_pool_id=`printf "%x" $pool_id` + input_image $image_name + local node= + local item=/tmp/item.$$$$ + local img_name=`dump_backslash $image_name` + + local image_format=0 + local image_id_hobject= + local image_header_hobject= + local result=/tmp/tmp_result.$$$$ + local res1=/tmp/tmp_res1.$$$$ + local res2=/tmp/tmp_res2.$$$$ + local data_path= + + # image format v1 + { + cat $image_coll_v1|grep -E "/$img_name\.rbd__head_[0-9A-F]{8}__$hex_pool_id" >$res1 + if [ -s $res1 ];then + echo -n "$func: rbd_header_hobject = " + choose_epoch $res1| tee $item + #choose_epoch $res1 > $item + + if [ -e $item ];then + node=`cat $item|awk '{print $1}'` + image_header_hobject=`cat $item|awk '{print $2}'` + if [ "$node"x = ""x ];then + echo "$func: v1 node is NULL" + exit + fi + if [ "$image_header_hobject"x = ""x ];then + echo "$func: v1 image_header_hobject is NULL" + exit + fi + rm -f $item + fi + + image_format=1 + echo -e "image_name:\t$image_name_in" + echo -e "image_format:\t$image_format" + data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'` + + >$result + cmds="bash $job_path/osd_job do_image_metadata_v1 $data_path `dump_backslash $image_header_hobject` $snap_name" + ssh $ssh_option $node $cmds | tee $result + fi + } + + # image format v2 + { + cat $image_coll_v2|grep -E "/rbd\\\\uid\."$img_name"__head_[0-9A-F]{8}__$hex_pool_id" >$res2 + if [ -s $res2 ];then + echo -n "$func: rbd_id_hobject = " + choose_epoch $res2 | tee $item + #choose_epoch $res2 > $item + + if [ -e $item ];then + node=`cat $item|awk '{print $1}'` + image_id_hobject=`cat $item|awk '{print $2}'` + if [ "$node"x = ""x ];then + echo "$func: v2 node is NULL(to get image_id_hobject)" + exit + fi + if [ "$image_id_hobject"x = ""x ];then + echo "$func: v2 image_id_hobject is NULL" + exit + fi + rm -f $item + fi + + check_osd_process $node + image_format=2 + + local tid=/tmp/image_id.$$$$ + data_path=`echo $image_id_hobject|awk -F "/current" '{print $1}'` + >$tid + cmds="bash $job_path/osd_job do_image_id $data_path `dump_backslash $image_id_hobject`" + ssh $ssh_option $node $cmds > $tid + + local image_id=`cat $tid` + rm -f $tid + + #get image_header_hobject + pushd $database >/dev/null + local pattern="header\."$image_id"__head_[0-9A-F]{8}__$hex_pool_id" + local tcoll=/tmp/tmp_image_head_coll.$$$$ + + # hostname(by command hostname) in $pg_coll maybe different from hostname in tcoll(input by user) + # t_host: hostname read from config file ($tcoll) + # t_host_remote: $(hostname) on osd node ($pg_coll) + grep -r -E $pattern * >$tcoll + popd >/dev/null + + local t_host=(`cat $tcoll|awk -F ":" '{print $1}'`) + local t_pgid=(`cat $tcoll|awk -F ":" '{print $2}'|sed -n 's/.*\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\/.*/\1/p'`) + local t_hobject=(`cat $tcoll|awk -F ":" '{print $2}'`) + local t_data_path=(`cat $tcoll|awk -F ":" '{split($2, arr, "/current/"); print arr[1];}'`) + rm -f $tcoll + declare -a t_host_remote + + #if there is no failed pg migration, number of t_host is replica num + #replica num : 3, 4, 5 ... + local t_hostname=/tmp/t_hostname.$$$$ + for ((i=0; i<${#t_host[*]}; i++)) + do + ssh $ssh_option ${t_host[$i]} "hostname" >$t_hostname + if [ $? != 0 ];then + echo "$func: ${t_host[$i]} get host_remote failed" + exit + fi + t_host_remote[$i]=`cat $t_hostname` + done + rm -f $t_hostname + + local t_item=/tmp/tmp_item.$$$$ + local tmp_item=/tmp/tmp_tmp_item.$$$$ + + >$tmp_item + for ((i=0; i<${#t_host_remote[*]}; i++ )) + do + local node=${t_host_remote[$i]} + local pgid=${t_pgid[$i]} + awk '$1 == "'"$node"'" && $2 == "'"$pgid"'" {print}' $pg_coll >>$tmp_item + done + + # t_item: <remote_hostname> <pgid> <epoch> <data_path> + sort -u $tmp_item >$t_item + rm -f $tmp_item + + local entry=`choose_epoch $t_item` #t_host_remote + rm -f $t_item + + node=`echo $entry|awk '{print $1}'` + data_path=`echo $entry|awk '{print $4}'` + if [ "$node"x = ""x ];then + echo "$func: v2 node is NULL (to get image_header_hobject)" + exit + fi + + for ((i=0; i<${#t_host_remote[*]}; i++)) + do + if [ "${t_host_remote[$i]}"x = "$node"x ] && [ "${t_data_path[$i]}"x = "$data_path"x ];then + image_header_hobject=${t_hobject[$i]} + break + fi + done + + if [ "$image_id_hobject"x = ""x ];then + echo "$func: v2 image_header_hobject is NULL" + exit + fi + + check_osd_process $node + + echo "$func: rbd_header_hobject = $node $image_header_hobject" + echo -e "image_name:\t$image_name_in" + echo -e "image_format:\t$image_format" + + #data_path=`echo $image_header_hobject|awk -F "/current" '{print $1}'` + >$result + cmds="bash $job_path/osd_job do_image_metadata_v2 $data_path $image_id `dump_backslash $image_header_hobject` $snap_name" + ssh $ssh_option $node $cmds | tee $result + fi + } + + if [ ! -s $result ];then + echo "$func: $image_name_in not exists" + exit + fi + + # to assign value to global variable + db_image_prefix=`cat $result|awk '/^(object_prefix|block_name):/{print $2}'` + if [ "$db_image_prefix"x = ""x ];then + echo "$func: image_prefix is NULL" + exit + fi + + db_image_size=`cat $result|awk '/^image_size:/{print $2}'` + db_order=`cat $result|awk '/^order:/{print $2}'` + if [ "$snap_name"x != ""x ];then + db_snap_id=`cat $result|awk '/^snapshot:/{print $2}'` + if [ "$db_snap_id"x = ""x ];then + echo "$func: $image_name_in@$snap_name NOT EXISTS" + exit + fi + db_snap_image_size=`cat $result|awk '/^snapshot:/{print $4}'` + else + #save snaplist + local image_snaplist=$images/pool_$pool_id/$image_name_in/@snaplist + local image_dir=$images/pool_$pool_id/$image_name_in + if [ ! -e $image_dir ];then + mkdir -p $image_dir + fi + cat $result|awk '/^snapshot:/{print $2" "$3" "$4}' >$image_snaplist + fi + found=1 + rm -f $result +} + +function list_images() +{ + echo "=============== format ==============" + echo "format: <pool_id>/<image_name>" + echo "================ v1: ================" + #sed -n 's/\(.*\)\/\(.*\)\.rbd__\(.*\)/\2/p' $image_coll_v1|sort -u|sed -e 's/\\u/_/g' + sed -n 's/.*\/\(.*\)\.rbd__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v1|sort -u|awk '{print strtonum("0x"$1)"/"$2;}'|sed -e 's/\\u/_/g' + echo "================ v2: ================" + #sed -n 's/\(.*\)\/rbd\\uid.\(.*\)__\(head.*\)/\2/p' $image_coll_v2|sort -u|sed 's/\\u/_/g' + sed -n 's/.*\/rbd\\uid.\(.*\)__head_[0-9A-F]\{8\}__\([0-9a-f]\+\).*/\2 \1/p' $image_coll_v2|sort -u|awk '{print strtonum("0x"$1)"/"$2}'|sed 's/\\u/_/g' +} + +# lookup image metadata +# and +# collect hobjects of image with the latest pg epoch +function discover_image_nosnap() +{ + local func="discover_image_nosnap" + echo "$func ..." + local pool_id=$1 + local image_name=$2 + pool_id=$(($pool_id)) + lookup_image $pool_id $image_name # assign $image_prefix + gather_hobject_nosnap $pool_id $db_image_prefix + if [ $? -ne 0 ];then + exit + fi + local image_hobjects_stable_nosnap=$images/pool_$pool_id/$db_image_prefix/$db_image_prefix"_head" + local image_hobjects_dir=$images/pool_$pool_id/$image_name_in + if [ ! -e $image_hobjects_dir ];then + mkdir -p $image_hobjects_dir + fi + # mv image_prefix to image_name + mv $image_hobjects_stable_nosnap $image_hobjects_dir/$image_name_in + rm -rf $images/pool_$pool_id/$db_image_prefix +} + +# get the offset snapid object +# if there is no object, choose the smallest snapid which is greater than current snapid +function get_object_clone() +{ + local func="get_object_clone" + if [ $# -lt 4 ];then + exit + fi + + local object_offset_string=$1 + local snapid=$2 + local snaplist_path=$3 + local snapset_output_dir=$4 + + # snapid in desc + local snap_coll_arr=(` + cat $snaplist_path|awk '{ if ($1 >= '"$snapid"') print "'"$snapset_output_dir"'/@"$1}'`) + + local hex_snapid=`printf "%x" $snapid` + pushd $snapset_output_dir >/dev/null + # get object with the smallest snapid greater than current snapid + awk '$4 == "'"$object_offset_string"'" && $5 >= '$snapid' {print}' `echo ${snap_coll_arr[@]}` |tail -n 1 + popd >/dev/null +} + +# gather hobject for each snapid +function gen_snapset_hobject() +{ + local func="gen_image_snapset" + echo "$func ..." + if [ $# -lt 4 ];then + echo "$func: parameters: <pool_id> <image_prefix> <snaplist_path> <snapset_output_dir>" + exit + fi + local pool_id=$1 + local image_prefix=$2 + local snaplist_path=$3 + local snapset_output_dir=$4 + pool_id=$(($pool_id)) + OIFS=$IFS + IFS=$'\n' + local snaparr=(`cat $snaplist_path`) + # gather hobject for each snapshot + trap 'echo $func failed; exit;' INT HUP + for line in ${snaparr[@]} + do + OOIFS=$IFS + IFS=$' ' + local field=(`echo $line`) + local snapid=${field[0]} + local image_hobjects_stable_snap=$images/pool_$pool_id/$image_prefix/$image_prefix"_"$snapid + local image_snap=$snapset_output_dir/@$snapid + gather_hobject_snap $pool_id $image_prefix $snapid + local res=$? + if [ $res -ne 0 ];then + touch $image_snap + else + mv $image_hobjects_stable_snap $image_snap + fi + IFS=$OOIFS + done + IFS=$OIFS +} + +# lookup image metadata and get snapid hobjects +function discover_image_snap() +{ + local func="discover_image_snap" + echo "$func ..." + if [ $# -lt 3 ];then + echo "$func: parameters: <pool_id> <image_name> [<snap_name>]" + exit + fi + local pool_id=$1 + local image_name=$2 + local snap_name=$3 + pool_id=$(($pool_id)) + #mkdir -p $images/$image_prefix + lookup_image $pool_id $image_name $snap_name # input image_name and snap_name to lookup metadata and snap_id + if [ "$db_snap_id"x = ""x ];then + echo "$func: lookup image failed to gen snapid" + exit + fi + local image_hobjects_dir_prefix=$images/pool_$pool_id/$db_image_prefix + local image_nosnap=$images/pool_$pool_id/$image_name_in + #check if image nosnap recovered + if [ ! -s $image_nosnap ];then + echo "$func: please recover image nosnap before recover with snap" + rm -rf $image_hobjects_dir_prefix + exit + fi + local image_hobject_dir=$images/pool_$pool_id/$image_name_in + local image_snap_hobject=$image_hobject_dir/$image_name_in@$db_snap_id + local image_snap_hobject_head=$image_hobject_dir/$image_name_in@$db_snap_id@head + local image_snaplist=$image_hobject_dir/@snaplist + local image_snapset_dir=$image_hobject_dir/@snapset_dir + local image_head=$image_hobject_dir/$image_name_in + if [ ! -e $image_hobject_dir ];then + mkdir -p $image_hobject_dir + fi + # only gen snapset one time + if [ ! -e $image_snapset_dir ];then + mkdir -p $image_snapset_dir + gen_snapset_hobject $pool_id $db_image_prefix $image_snaplist $image_snapset_dir + + fi + + echo "$func: will get object clone ..." + >$image_snap_hobject + >$image_snap_hobject_head + + trap 'echo $func failed; exit;' INT HUP + # get each offset 's snapid hobject + while read line + do + #echo $line + OOIFS=$IFS + IFS=$' ' + local field=(`echo $line`) + local offset_string=${field[3]} + IFS=$OOIFS + local entry=`get_object_clone $offset_string $db_snap_id $image_snaplist $image_snapset_dir` + if [ "$entry"x != ""x ];then + echo $entry >> $image_snap_hobject + echo `dump_backslash $line` >> $image_snap_hobject_head + fi + done < $image_head + rm -rf $image_hobjects_dir_prefix +} + +# after discover_image_nosnap +# collect objects from osds one by one in sequence +function copy_image_nosnap_single_thread() +{ + local func="copy_image_nosnap_single_thread" + echo "$func ..." + if [ $# -lt 3 ];then + echo "$func: parameters: <pool_id> <image_hobjects> <backup_dir>" + exit + fi + local pool_id=$1 + local image_hobjects=$2 + local backup_dir=$3 + pool_id=$(($pool_id)) + + # make sure lookup_image first + if [ $found = 0 ];then + echo "$func: image not found, maybe forget to discover_image" + exit + fi + if [ ! -e $backup_dir ];then + mkdir -p $backup_dir + fi + + local image_dir=$backup_dir/pool_$pool_id/$image_name_in + local image_file=$image_dir/$image_name_in + local CURRENT=$image_dir/@CURRENT + local LOCK=$image_dir/@LOCK + if [ ! -e $image_dir ];then + mkdir -p $image_dir + fi + if [ -e $LOCK ];then + echo "$func: $LOCK is locked by other process" + exit + else + touch $LOCK + fi + + >$image_file + truncate -s $db_image_size $image_file + echo "head">$CURRENT + + local count=$(($db_image_size >> $db_order)) + local start=`cat $image_hobjects|head -n 1|awk '{print $4}'` + local end=`cat $image_hobjects|tail -n 1|awk '{print $4}'` + local entry_count=`cat $image_hobjects|wc -l` + + local char_bits=$((`echo $start|wc -c` -1 )) + local format="%0"$char_bits"x" + + local expect_start=`printf $format 0` + local expect_end=`printf $format $(($count -1 ))` + + echo -e "object_count\t$entry_count" + echo -e "expect\t\t[$expect_start ~ $expect_end] count:$count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + local icount=0 + local istart= + local iend= + local percent= + + trap 'echo $func failed; exit;' INT HUP + local unit=$((1<<$db_order)) + while read line + do + { + icount=$(($icount+1)) + node=`echo $line|awk '{print $1}'` + hobject=`echo $line|awk '{print $3}'` + offset=`echo $line|awk '{print $4}'` + off=$((16#$offset)) + if [ $icount = 1 ];then + istart=$offset + fi + hobject=`dump_backslash $hobject` + iend=$offset + sshcmd="cat $hobject" + ssh $ssh_option $node $sshcmd < /dev/null | dd of=$image_file bs=$unit seek=$off conv=notrunc 2>/dev/null + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + } + done < $image_hobjects + + echo + echo -n "size: " + ls -lh $image_file|awk '{print $5"\t"$9}' + echo -n "du: " + du -h $image_file + #unlock + rm -f $LOCK +} + + +# ssh copy snap_object & head_object from osd to admin node +# copy all snapshot objects +# and +# all head objects which have the same offset as snapshot objects +function collect_image_snap_objects() +{ + local func="collect_image_snap_objects" + #$1=backup_dir, $2=snap_name, $3=snap_hobjects, $4=head_hobjects + if [ $# -lt 6 ];then + echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>" + exit + fi + + local pool_id=$1 + local image_name=$2 + local snap_id=$3 + local snap_hobjects=$4 #snap hobjects info + local head_hobjects=$5 #head hobjects info + local backup_dir=$6 + pool_id=$(($pool_id)) + + local head_dir=$backup_dir/pool_$pool_id/$image_name/@head + local snap_dir=$backup_dir/pool_$pool_id/$image_name/@$snap_id + local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT + + if [ ! -e $head_dir ];then + mkdir -p $head_dir + fi + if [ ! -e $snap_dir ];then + mkdir -p $snap_dir + fi + + local snap_node= #osd node + local snap_hobject= #hobject path with snapid on osd + local snap_offset= + local snap_filename= + + local head_node= + local head_hobject= + local head_offset= + local head_filename= + + # ignore if there is no object in snapshot(empty ) + if [ ! -s $snap_hobjects ];then + echo "$func: $snap_hobjects is empty" + return 0 + fi + local start=`head -n 1 $snap_hobjects|awk '{print $4}'` + local end=`tail -n 1 $snap_hobjects|awk '{print $4}'` + local entry_count=`cat $snap_hobjects|wc -l` + if [ $((16#$first_offset)) -gt $((16#$last_offset)) ];then + echo "$func: $snap_hobjects not sorted" + return 1 + fi + + # just assert if ignored empty snapshot + if [ "$start"x = ""x ] || [ "$end"x = ""x ];then + return 1 + fi + + # speed up copy snapshot + # lookup the corresponding head hobject of snap hobject + # use command: grep <offset> <head hobjects> + # + # eg. + # head hobjects: (32 objects, snapid = uint64(-2) = 18446744073709551614) + # ceph1 29.4d /var/lib/ceph/osd/ceph-0/current/29.4d_head/rb.0.1c414.6b8b4567.000000000000__head_EC2C1C4D__1d 000000000000 18446744073709551614 869 + # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__head_0F439A8C__1d 000000000001 18446744073709551614 867 + # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__head_FC55706A__1d 000000000002 18446744073709551614 869 + # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__head_20A6328B__1d 000000000003 18446744073709551614 869 + # ceph2 29.75 /var/lib/ceph/osd/ceph-1/current/29.75_head/rb.0.1c414.6b8b4567.000000000004__head_AC5ADB75__1d 000000000004 18446744073709551614 867 + # ceph2 29.23 /var/lib/ceph/osd/ceph-1/current/29.23_head/rb.0.1c414.6b8b4567.000000000005__head_1FDEA823__1d 000000000005 18446744073709551614 867 + # ...... + # ceph1 29.34 /var/lib/ceph/osd/ceph-0/current/29.34_head/rb.0.1c414.6b8b4567.00000000001f__head_52373734__1d 00000000001f 18446744073709551614 869 + # + # snap hobjects: (3 objects, snapid >= 29) + # ceph1 29.8c /var/lib/ceph/osd/ceph-0/current/29.8c_head/rb.0.1c414.6b8b4567.000000000001__1f_0F439A8C__1d 000000000001 31 867 + # ceph1 29.6a /var/lib/ceph/osd/ceph-0/current/29.6a_head/rb.0.1c414.6b8b4567.000000000002__1e_FC55706A__1d 000000000002 30 869 + # ceph1 29.8b /var/lib/ceph/osd/ceph-0/current/29.8b_head/rb.0.1c414.6b8b4567.000000000003__1d_20A6328B__1d 000000000003 29 869 + # + # so find out offset in head hobjects line number: + # snap hobjects: 000000000001 ---> head hobjects: 2 (n1) + # snap hobjects: 000000000003 ---> head hobjects: 4 (n2) + # + # finally , grep range from the whole file [1 ~ N] shranked to part of file [n1 ~ n2] + # the worst case : [n1 ~ n2] = [1 ~ N], means no shranking + + # get the line number of the start offset in head hobjects + local n1=`grep -n $start $head_hobjects|head -n 1|cut -d ":" -f 1` + # get the line number of the end offset in head hobjects + local n2=`grep -n $end $head_hobjects|head -n 1|cut -d ":" -f 1` + + local icount=0 + local istart= + local iend= + local percent= + + OIFS=$IFS + IFS=$'\n' + + #assume file:snap_hobjects is not very large, and can be loaded into memory + local snap_arr=(`cat $snap_hobjects`) + local snap_tmp=/tmp/snaptmp.$$$$ + + # snap_tmp: + # consists of snap hobject or head hobject + # select lineno range: [n1 ~ n2] + head -n $n2 $head_hobjects|tail -n $(($n2-$n1+1)) >$snap_tmp + + echo "copy image snap/head objects from osd ..." + echo -e "object_count\t$entry_count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + trap 'echo $func failed; exit;' INT HUP + for line in ${snap_arr[*]} + do + icount=$(($icount+1)) + + OOIFS=$IFS + IFS=$' ' + + local arr=(`echo $line`) + snap_node=${arr[0]} + snap_hobject=${arr[2]} + snap_offset=${arr[3]} + snap_filename=$snap_dir/$snap_offset + + if [ $icount = 1 ];then + istart=$snap_offset + fi + iend=$snap_offset + + #lookup corresponding head hobject of snap hobject + local res=`grep $snap_offset $snap_tmp|head -n 1` + if [ "$res"x = ""x ];then + echo "$func: image object[ $snap_offset ] missing" + exit + fi + + local arr2=(`echo $res`) + head_node=${arr2[0]} + head_hobject=${arr2[2]} + head_offset=${arr2[3]} + head_filename=$head_dir/$head_offset + + # just copy object(snap/head) if it does not exist + if [ ! -e $snap_filename ];then + ssh $ssh_option $snap_node "cat $snap_hobject" > $snap_filename + fi + if [ ! -e $head_filename ];then + ssh $ssh_option $head_node "cat $head_hobject" > $head_filename + fi + IFS=$OOIFS + + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + done + echo + IFS=$OIFS + rm -f $snap_tmp + return 0 +} + +# copy all snap objects and corresponding head objects from osds +# in single process +function copy_image_snap_single_thread() +{ + local func="copy_image_snap_single_thread" + if [ $# -lt 6 ];then + echo "$func: parameters: <pool_id> <image_name> <snap_id> <snap_hobjects> <head_hobjects> <backup_dir>" + exit + fi + local pool_id=$1 + local image_name=$2 + local snap_id=$3 + local snap_hobjects=$4 + local head_hobjects=$5 + local backup_dir=$6 + pool_id=$(($pool_id)) + + local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT + local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK + #lock + if [ -e $LOCK ];then + echo "$func: $LOCK is locked by other process" + exit + else + touch $LOCK + fi + collect_image_snap_objects $pool_id $image_name $snap_id $snap_hobjects $head_hobjects $backup_dir + #unlock + rm -f $LOCK +} + +# after all snap objects and necessary head objects are copied, +# just pick appropriate head objects and snap objects and write them to image +# in order to rollback image to snapshot +# +# init: image is created by copy_image_nosnap_single_thread firstly +# +# all output include 3 parts: +# <image> <head objects> <snap objects> +# +# head objects1 --- snap1 objects +# head objects2 --- snap2 objects +# image head objects3 --- snap3 objects +# ...... +# head objectsN --- snapN objects +# +# how to rollback: +# firstly rollback to head, secondly write <snapX objects> +# head = <image> + <head objects> +# snap1 = <image> + <head objects> + <snap1 objects> +# snap2 = <image> + <head objects> + <snap2 objects> +# snap3 = <image> + <head objects> + <snap3 objects> +# ...... +# snapN = <image> + <head objects> + <snapN objects> +# +# improve rollback: +# there is intersection of head objects and snapX objects, if snapX objects are not empty +# and need to deduplicate the intersection. +# deduplicate steps: +# - get difference set of head objects and snapX objects +# - write the difference set objects to image +# - write the snapX objects to image +function rollback_image_snap() +{ + local func="rollback_image_snap" + + echo "$func ..." + + trap 'echo $func failed; exit;' INT HUP + if [ $# -lt 6 ];then + echo "$func: parameters <pool_id> <image_name> <snap_id> <snap_object_dir> <backup_dir> <image_unit>" + exit + fi + local pool_id=$1 + local image_name=$2 + local snap_id=$3 + local snap_object_dir=$4 + local backup_dir=$5 + local image_unit=$6 + + local need_diff_set=0 + + local image_path=$backup_dir/pool_$pool_id/$image_name/$image_name + local head_object_dir=$backup_dir/pool_$pool_id/$image_name/@head + local CURRENT=$backup_dir/pool_$pool_id/$image_name/@CURRENT + local LOCK=$backup_dir/pool_$pool_id/$image_name/@LOCK + if [ -e $LOCK ];then + echo "$func: $LOCK is locked by other process" + exit + else + touch $LOCK + fi + if [ $snap_id -ne -2 ];then + echo $snap_id > $CURRENT + else + echo "head" > $CURRENT + fi + + if [ ! -e $snap_object_dir ];then + return 0 + fi + + if [ "$snap_object_dir"x != "$head_object_dir"x ];then + echo "$func: need to compute diff_set of head" + need_diff_set=1 + else + echo "$func: NO diff_set" + need_diff_set=0 + fi + + local entry_count=0 + local start= + local end= + local offset= + local icount=0 + local istart= + local iend= + local percent= + + local snap_objects= + local head_objects= + local diff_set= + + snap_objects=(`ls $snap_object_dir`) + + # if need to compute difference set of head_objects and snap_objects + if [ $need_diff_set -ne 0 ];then + head_objects=(`ls $head_object_dir`) + + #get the difference set: ( head_objects - snap_objects ) + diff_set=(` + sort -m <(echo ${head_objects[@]}|xargs -n 1 echo) <(echo ${snap_objects[@]}|xargs -n 1 echo) \ + <(echo ${snap_objects[@]}|xargs -n 1 echo) |uniq -u`) + + # copy diff_set of head object to image + pushd $head_object_dir >/dev/null + + echo "$func: copy diff_set head objects ..." + entry_count=${#diff_set[@]} + start=${diff_set[0]} + end= + if [ $entry_count -gt 0 ];then + end=${diff_set[$(($entry_count - 1))]} + fi + offset= + icount=0 + istart= + iend= + percent= + + echo -e "object_count\t$entry_count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + for object in ${diff_set[@]} + do + icount=$(($icount+1)) + if [ $icount = 1 ];then + istart=$object + fi + iend=$object + + local offset=$((16#$object)) + dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null + + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + done + if [ $entry_count -gt 0 ];then + echo + fi + popd >/dev/null + + if [ $snap_id -ne -2 ];then + echo -e "$image_name already rollback diff_set: (head - snap)" + fi + fi + + # copy snap object to image + pushd $snap_object_dir >/dev/null + + if [ $need_diff_set -ne 0 ];then + echo "$func: copy snap objects ..." + else + echo "$func: copy head objects ..." + fi + entry_count=${#snap_objects[@]} + start=${snap_objects[0]} + end= + if [ $entry_count -gt 0 ];then + end=${snap_objects[$(($entry_count - 1))]} + fi + offset= + icount=0 + istart= + iend= + percent= + + echo -e "object_count\t$entry_count" + echo -e "range\t\t[$start ~ $end] count:$entry_count" + + for object in ${snap_objects[@]} + do + icount=$(($icount+1)) + if [ $icount = 1 ];then + istart=$object + fi + iend=$object + + local offset=$((16#$object)) + dd if=$object of=$image_path bs=$image_unit seek=$offset conv=notrunc 2>/dev/null + + percent=`echo "scale=3; 100*$icount/$entry_count"|bc` + tput sc #record current cursor + echo -n -e "complete\t[$istart ~ $iend] $icount/$entry_count ==> "$percent"%" + if [ $icount != $entry_count ];then + tput rc # backport most recent cursor + fi + done + if [ $entry_count -gt 0 ];then + echo + fi + popd >/dev/null + + rm -f $LOCK + if [ $snap_id -ne -2 ];then + echo "$image_name rollback to snapid: $snap_id" + else + echo "$image_name rollback to head" + fi +} + +function recover_image() +{ + local func="recover_image" + echo "$func ..." + + if [ $# -lt 3 ];then + echo "$func: parameters: <pool_id> <image_name> <snap_name> [<backup_dir>]" + exit + fi + + local pool_id=$1 + local img_name=$2 + local snap_name=$3 + local backup_dir=$4 + pool_id=$(($pool_id)) + if [ "$snap_name"x = "@"x ];then + snap_name= + fi + if [ "$backup_dir"x = ""x ];then + backup_dir=$default_backup_dir + fi + + #recover image with nosnap + if [ "$snap_name"x = ""x ];then + discover_image_nosnap $pool_id $img_name #input image_name + local image_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in + copy_image_nosnap_single_thread $pool_id $image_hobjects $backup_dir + + #recover image with snap + else + + # check if recovered head already + local img_hobjects_path=$images/pool_$pool_id/$img_name/$img_name + local img_file_path=$backup_dir/pool_$pool_id/$img_name/$img_name + if [ ! -e $img_hobjects_path ] || [ ! -e $img_file_path ];then + echo "$func: $img_name@$snap_name : can not rollback to snapshot, please recover image head first" + exit + fi + + # rollback to head + if [ "$snap_name"x = "@@"x ];then + local head_dir=$backup_dir/pool_$pool_id/$img_name/@head + if [ -e $head_dir ];then + local unit=`pushd $head_dir >/dev/null; ls|head -n 1|xargs -n 1 stat|awk '/Size:/{print $2}'` + # rollback to head + rollback_image_snap $pool_id $img_name -2 $backup_dir/$img_name/@head $backup_dir $unit + echo "$image_name_in head : $backup_dir/$img_name/$img_name" + else + echo "$func: no need to rollback to head" + fi + return 0 + fi + + # rollback to snap + discover_image_snap $pool_id $img_name $snap_name # get image meta & get snapid object + local snap_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id + local head_hobjects=$images/pool_$pool_id/$image_name_in/$image_name_in@$db_snap_id@head + local snap_object_dir=$backup_dir/pool_$pool_id/$image_name_in/@$db_snap_id + local image_path=$backup_dir/pool_$pool_id/$image_name_in/$image_name_in + local image_unit=$((1<<$db_order)) + copy_image_snap_single_thread $pool_id $image_name_in $db_snap_id $snap_hobjects $head_hobjects $backup_dir + rollback_image_snap $pool_id $image_name_in $db_snap_id $snap_object_dir $backup_dir $image_unit + echo "$image_name_in@$snap_name : $image_path" + fi +} diff --git a/src/tools/rbd_recover_tool/epoch_h b/src/tools/rbd_recover_tool/epoch_h new file mode 100644 index 00000000..e268eafa --- /dev/null +++ b/src/tools/rbd_recover_tool/epoch_h @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# file: epoch_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") +. $my_dir/common_h + +#pgid_list=$single_node/$cluster-$id/pgid_list +function get_pgid_list() +{ + find $osd_data/current/ -type d -name "*_head"|\ + sed -n 's/\(.*\)\/current\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head/\2 \1/p'|\ + sort -t ' ' -k 1.1,1h -k 2.1,2 > $pgid_list; +} + +function get_pgid() +{ + hobject_path=$1 + echo $hobject_path| sed -n 's/\(.*\)\/\([0-9a-fA-F]\+\.[0-9a-fA-F]\+\)_head\(.*\)/\2/p' +} + +infos_seq= +function get_infos_seq() +{ + local func="get_infos_seq" + + local keyword=":infos." + local infos_key=`get_map_header_key $keyword` + + if [ "$infos_key"x = ""x ];then + echo "$func: keyword not input or infos_key not exists" + exit + fi + local prefix=`get_map_header_prefix` + local key=$infos_key + + infos_seq=`get_header_seq $prefix $key` + if [ "$infos_seq"x = ""x ];then + echo "$func: infos_seq not exists" + exit + fi +} + +pg_epoch= +function get_pg_epoch() +{ + local func="get_pg_epoch" + if [ "$1"x = ""x ];then + echo "$func: no pgid input" + exit + fi + + get_pg_epoch_firefly "$1" + if [ "$pg_epoch"x != ""x ]; then + # echo "Epoch for $1: $pg_epoch (firefly)" + return + fi + + get_pg_epoch_hammer "$1" + if [ "$pg_epoch"x != ""x ]; then + # echo "Epoch for $1: $pg_epoch (hammer)" + return + fi + + echo "$func: Couldn't find epoch for $1" + exit +} + +function get_pg_epoch_firefly() +{ + local func="get_pg_epoch_firefly" + if [ "$1"x = ""x ];then + echo "$func: no pgid input" + exit + fi + local pgid=$1 + local key=$pgid"_epoch" + + #get_infos_seq; + # infos_seq default to 1 + infos_seq=1 + local infos_seq=`printf "%016d" $infos_seq` + local prefix="_USER_"$infos_seq"_USER_" + + pg_epoch=`get_header_kv $prefix $key int` +} + +function get_pg_epoch_hammer() +{ + local func="get_pg_epoch_hammer" + if [ "$1"x = ""x ];then + echo "$func: no pgid input" + exit + fi + local pgid="$1" + local hkey_prefix="$(get_map_header_prefix)" + local hkey="$(printf '...head.%x.%08X' "$(echo "$pgid"|cut -d'.' -f1)" "$((0x$(echo "$pgid"|cut -d'.' -f2)))")" + + local infos_seq="$(get_header_seq "$hkey_prefix" "$hkey")" + local infos_seq=`printf "%016d" $infos_seq` + local prefix="_USER_"$infos_seq"_USER_" + local key="_epoch" + + pg_epoch=`get_header_kv $prefix $key int` +} diff --git a/src/tools/rbd_recover_tool/metadata_h b/src/tools/rbd_recover_tool/metadata_h new file mode 100644 index 00000000..4aa491b5 --- /dev/null +++ b/src/tools/rbd_recover_tool/metadata_h @@ -0,0 +1,368 @@ +#!/usr/bin/env bash +# file: metadata_h +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") +. $my_dir/common_h +. $my_dir/epoch_h + +# put origin name in $image_name_in: for output +# put convert "_" name in $image_name: for grep image hobjects from database +image_name_in= +image_name= +function input_image() +{ + local func="input_image" + if [ "$1"x = ""x ];then + echo "$func: no image name input" + exit + fi + + image_name_in=$1 + # "_" -> "\u" + image_name=`convert_underline $image_name_in` +} + +#======================================== distinguish v1 or v2 =================================== +#image_list_v1=$single_node/$cluster-$id/image_list_v1 +#image_list_v2=$single_node/$cluster-$id/image_list_v2 +function get_image_list() +{ + find $osd_data/current/ -type f|grep ".rbd__" >$image_list_v1 + find $osd_data/current/ -type f|grep "rbd\\\\uid." >$image_list_v2 +} + +function get_image_format_by_hobject() +{ + local func="get_image_format" + if [ "$1"x = ""x ];then + exit + fi + local res1=`cat $image_list_v1|grep $1` + if [ "$res1"x != ""x ];then + echo 1 + exit + fi + + local res2=`cat $image_list_v2|grep $1` + if [ "$res2"x = ""x ];then + echo 2 + exit + fi +} + +#======================================== image format v1 ======================================== +# <image_name>.rbd include 3 parts: +# header + snap_count*snapshot + snap_count*snap_name +# +# struct rbd_obj_header_ondisk { +# 40 char text[40]; +# 24 char block_name[RBD_MAX_BLOCK_NAME_SIZE]; +# 4 char signature[4]; +# 8 char version[8]; +# struct { +# 1 __u8 order; +# 1 __u8 crypt_type; +# 1 __u8 comp_type; +# 1 __u8 unused; +# } __attribute__((packed)) options; +# 8 __le64 image_size;//hexdump -C s=80 n=8 +# 8 __le64 snap_seq; //hexdump -C s=88 n=8 +# 4 __le32 snap_count;//hexdump -C s=96 n=4 +# 4 __le32 reserved; +# 8 __le64 snap_names_len;//hexdump -C s=104 n=8 +# struct rbd_obj_snap_ondisk snaps[0]; +# } __attribute__((packed)); +# +# sizeof(rbd_obj_header_ondisk): 112 +# +# struct rbd_obj_snap_ondisk { +# 8 __le64 id; //hexdump -C s=112+i*16 n=8 , i=[0, snap_count) +# 8 __le64 image_size;//hexdump -C s=112+i*16+8 n=8, i=[0, snap_count) +# } __attribute__((packed)); +# sizeof(rbd_obj_snap_ondisk): 16 +# +# get snap_names form <image_nane>.rbd +# hexdump -e '10/1 "%_c"' -s $((112 + $snap_count*16)) -n $snap_names_len <image_name>.rbd +# then split snap_names into array + +function get_image_metadata_v1() +{ + local func="get_image_metadata_v1" + if [ "$1"x = ""x ];then + echo "$func: no image head object input" + exit + fi + local snap_name= + if [ "$2"x != ""x ];then + snap_name=$2 + fi + + if [ ! -e $1 ];then + echo "$func: $1 not exists" + exit + fi + local hobject_path=$1 + d_hobject_path=`dump_backslash $1` + local image_format=`get_image_format_by_hobject $d_hobject_path` + if [ $image_format != 1 ];then + echo "$func: image_format must be 1" + exit + fi + + if [ ! -e $hobject_path ];then + echo "$func: $hobject_path not exists" + exit + fi + + # decode rbd_obj_header_ondisk of <image_name>.rbd + local block_name=`hexdump -e '10/1 "%c"' -s 40 -n 24 $hobject_path` + local order=`hexdump -e '10/4 "%u"' -s 76 -n 1 $hobject_path` + local image_size=`hexdump -C -s 80 -n 8 $hobject_path|head -n 1|awk '{for (i=9; i>1; i--) {printf $i}}'` + image_size=$((16#$image_size)) + local snap_seq=`hexdump -C -s 88 -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` + local snap_count=`hexdump -C -s 96 -n 4 $hobject_path|head -n 1| + awk '{num=""; for(i=5; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` + local snap_names_len=`hexdump -C -s 104 -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){ num=num""$i;} print strtonum("0x"num);}'` + + echo -e "block_name:\t$block_name" + echo -e "order:\t\t$order" + echo -e "image_size:\t$image_size" + echo -e "snap_seq:\t$snap_seq" + + # decode N rbd_obj_snap_ondisk of <image_name>.rbd + declare -a snap_ids + declare -a snap_names + declare -a snap_image_sizes + local size_header=112 #sizeof(rbd_obj_header_ondisk) + local size_snap=16 #sizeof(rbd_obj_snap_ondisk) + local offset=0 + local id_off=0 + local size_off=0 + for ((i=0; i<$snap_count; i++)) + do + offset=$(($size_header + $i * $size_snap)) + id_off=$offset + size_off=$(($offset + 8)) + snap_ids[$i]=`hexdump -C -s $id_off -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'` + snap_image_sizes[$i]=`hexdump -C -s $size_off -n 8 $hobject_path|head -n 1| + awk '{num=""; for(i=9; i>1; i--){num=num""$i;} print strtonum("0x"num);}'` + done + offset=$(($size_header + $snap_count * $size_snap)) + snap_names=(`hexdump -e '10/1 "%_c"' -s $offset -n $snap_names_len $hobject_path| + awk -F "\\\\\\\\\\\\\\\\0" '{for(i=1; i<=NF; i++) {print $i" "} }'`); + + echo -e "\t\tID\tNAME\t\tSIZE" + for ((i=0; i<$snap_count; i++)) + do + if [ "$snap_name"x = ""x ];then + echo -n -e "snapshot:\t" + echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}" + continue + fi + if [ "$snap_name"x = "${snap_names[$i]}"x ];then + echo -n -e "snapshot:\t" + echo -e "${snap_ids[$i]}\t${snap_names[$i]}\t\t${snap_image_sizes[$i]}" + return + fi + done +} + +#======================================== end image format v1 ======================================== + +#======================================== image format v2 ======================================== + +# map_header, header_seq, header, key/value +# eg. +# map_header _HOBJTOSEQ_:rbd%uheader%e139a6b8b4567...head.2.68E826B6 +# meta_header_seq 17426 +# header: _USER_0000000000017426_USER_:object_prefix +# _USER_0000000000017426_USER_:order +# _USER_0000000000017426_USER_:size +# _USER_0000000000017426_USER_:snap_seq +# key/value ceph-kvstore-tool /storepath get _USER_0000000000017426_USER_ (object_prefix|order|size|snap_seq) + +# decode image id from image_id_hobject +function get_image_id() +{ + local func="get_image_id" + if [ "$1"x = ""x ];then + exit; + fi + local image_id_hobject=$1 #from admin node's database + + if [ ! -e $image_id_hobject ];then + #echo "$func: $image_id_hobject not exists" + exit; + fi + + # get len of string + local n=`hexdump -e '10/4 "%u"' -s 0 -n 4 $image_id_hobject` + # get string + hexdump -e '10/1 "%c"' -s 4 -n $n $image_id_hobject +} + +#find image_id omap entry in omaplist +map_header_prefix= +map_header_key= +function get_map_header() +{ + local func="get_map_header" + local image_id=$1 + if [ "$image_id"x = ""x ];then + echo "$func: no image_id input" + exit; + fi + map_header_prefix=`get_map_header_prefix` + local keyword="header%e"$image_id + map_header_key=`get_map_header_key $keyword` + if [ "$map_header_key"x = ""x ];then + echo "$func: map_header_key is NULL(not in omaplist)" + exit + fi +} + +#get meta header seq from map_header +meta_header_seq= +function get_meta_header_seq() +{ + local func="get_meta_header_seq" + if [ "$1"x == ""x ];then + echo "$func: no prefix input" + exit; + elif [ "$2"x == ""x ];then + echo "$func: no key input" + exit; + fi + local prefix=$1; + local key=$2; + meta_header_seq=`get_header_seq $prefix $key` +} + +# get image metadata : object_prefix, order, image_size, snap_seq +object_prefix= +order= +image_size= +snap_seq= +function get_image_metadata_v2() +{ + local func="get_image_metadata_v2" + if [ "$1"x = ""x ];then + echo "$func: no meta_header_seq input" + exit; + fi + local meta_header_seq=`printf "%016d" $1` + #echo "$func: meta_header_seq = "$meta_header_seq + local ghobject_key="_USER_"$meta_header_seq"_USER_" + local prefix=$ghobject_key + + object_prefix=`get_header_kv $prefix object_prefix string` + #object_prefix="rbd_data.$image_id" + order=`get_header_kv $prefix order int` + image_size=`get_header_kv $prefix size int` + snap_seq=`get_header_kv $prefix snap_seq int` + + echo -e "object_prefix:\t$object_prefix" + echo -e "order:\t\t$order" + echo -e "image_size:\t$image_size" + echo -e "snap_seq:\t$snap_seq" + + # list snapshot + list_snaps_v2 $1 $2 +} + +# struct cls_rbd_snap { +# snapid_t id; +# string name; +# uint64_t image_size; +# uint64_t features; +# uint8_t protection_status; +# cls_rbd_parent parent; +# } +# decode cls_rbd_snap +# 1 u8 struct_v +# 1 u8 struct_compat +# 4 u32 struct_len +# 8 u64 snapid_t id //s=6 n=8 +# 4 u32 len of name //s=14 n=4 +# len char name //s=18 n=len +# 8 u64 image_size +# 8 u64 features +# ...... +# +function list_snaps_v2() +{ + local func="list_snaps_v2" + if [ "$1"x = ""x ];then + exit + fi + local sname= + if [ $# -eq 2 ];then + sname=$2 + fi + local meta_header_seq=`printf "%016d" $1` + local prefix="_USER_"$meta_header_seq"_USER_" + local keys=(`awk -F ":" '/snapshot_/ && $1 == "'"$prefix"'" {if ($2 == "") exit; split($2, arr, "_"); + print arr[2];}' $omap_list|sort -r`) + echo -e "\t\tID\tNAME\t\tSIZE" + for key in ${keys[@]} + do + key="snapshot_$key" + local arr=(`ceph-kvstore-tool $omap_path get $prefix $key|awk -F ":" '{print $2}'`); + # get snap_name + tmp= + for ((i=17; i>13; i--)) + do + tmp="$tmp${arr[$i]}" + done + local len=$((16#$tmp)) + local snap_name= + for ((i=18; i<$((18+$len)); i++)) + do + # convert ascii to char + local char=`echo -e "\x${arr[$i]}"` + snap_name="$snap_name$char" + done + # get snap_id (little endian) + local tmp= + for ((i=13; i>5; i--)) + do + tmp="$tmp${arr[$i]}" + done + local snap_id=$((16#$tmp)) + # get image_size of current snap (little endian) + tmp= + for ((i=$((25+$len)); i>$((17+$len)); i--)) + do + tmp="$tmp${arr[$i]}" + done + local image_size=$((16#$tmp)) + if [ "$sname"x = ""x ];then + echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size" + continue + fi + if [ "$sname"x = "$snap_name"x ];then + echo -e "snapshot:\t$snap_id\t$snap_name\t\t$image_size" + return + fi + done +} + +#======================================== end image format v2 ======================================== diff --git a/src/tools/rbd_recover_tool/osd_job b/src/tools/rbd_recover_tool/osd_job new file mode 100755 index 00000000..b4b80be8 --- /dev/null +++ b/src/tools/rbd_recover_tool/osd_job @@ -0,0 +1,170 @@ +#!/usr/bin/env bash +# file: osd_job +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +my_dir=$(dirname "$0") + +. $my_dir/common_h +. $my_dir/metadata_h +. $my_dir/epoch_h + +function check_ceph_osd() +{ + local func="check_ceph_osd" + local host=`hostname` + # if ceph-osd service is still running, except flush-journal + if [ "`ps aux|grep ceph-osd|grep -v flush-journal|grep -v grep`"x != ""x ];then + echo "[$host]: $func: ceph-osd is running..., stop it" + exit + fi +} + +function cat_pg_epoch() +{ + local func="cat_pg_epoch" + init_env_osd $1 + if [ -e $node_pg_epoch ];then + cat $node_pg_epoch + fi +} + +function cat_image_v1() +{ + local func="cat_image_v1" + init_env_osd $1 + if [ -e $image_v1 ];then + cat $image_v1 + fi +} + +function cat_image_v2() +{ + local func="cat_image_v2" + init_env_osd $1 + if [ -e $image_v2 ];then + cat $image_v2 + fi +} + +function flush_osd_journal() +{ + local func="flush_osd_journal" + init_env_osd $1 + local osd_data_path=$osd_data + local osd_journal_path=$osd_data/journal + local whoami_path=$osd_data/whoami + local host=`hostname` + if [ ! -e $whoami_path ];then + echo "[$host]: $func: $whoami_path not exists" + exit + fi + local whoami=`cat $whoami_path` + echo "[$host]: $func ..." + ceph-osd -i $whoami --osd-data $osd_data_path --osd-journal $osd_journal_path --flush-journal >/dev/null + if [ $? -ne 0 ];then + echo "[$host]: $func: flush osd journal failed" + exit + fi +} + +function do_omap_list() +{ + local func="do_omap_list" + init_env_osd $1 + local host=`hostname` + echo "[$host]: $func ..." + get_omap_list +} + +# get all pgs epoch +function do_pg_epoch() +{ + local func="do_pg_epoch" + init_env_osd $1 + local node=`hostname` + get_pgid_list + >$node_pg_epoch + local pgid= + local data_path= + local host=`hostname` + echo "[$host]: $func ..." + while read line + do + { + pgid=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + get_pg_epoch $pgid + echo -e "$node $pgid $pg_epoch $data_path" >>$node_pg_epoch + } + done < $pgid_list +} + +# get an list of image in this osd node, pg epoch maybe not the latest, the admin node will do distinguish +function do_image_list() +{ + local func="do_image_list" + init_env_osd $1 + get_image_list + local node=`hostname` + >$image_v1 + >$image_v2 + local host=`hostname` + echo "[$host]: $func ..." + for line in `cat $image_list_v1` + do + pgid=`get_pgid $line` + get_pg_epoch $pgid + echo "$node $line $pg_epoch" >> $image_v1 + done + for line in `cat $image_list_v2` + do + pgid=`get_pgid $line` + get_pg_epoch $pgid + echo "$node $line $pg_epoch" >> $image_v2 + done +} + +function do_image_id() +{ + local func="do_image_id" + init_env_osd $1 + get_image_id $2 +} + +function do_image_metadata_v1() +{ + local func="do_image_metadata_v1" + init_env_osd $1 + local image_header_hobject=$2 + local snap_name=$3 + get_image_metadata_v1 $image_header_hobject $snap_name +} + +function do_image_metadata_v2() +{ + local func="do_image_metadata_v2" + init_env_osd $1 + local image_id=$2 + local image_header_hobject=$3 + local snap_name=$4 + get_map_header $image_id + get_meta_header_seq $map_header_prefix $map_header_key + get_image_metadata_v2 $meta_header_seq $snap_name +} + +check_ceph_osd +$* diff --git a/src/tools/rbd_recover_tool/rbd-recover-tool b/src/tools/rbd_recover_tool/rbd-recover-tool new file mode 100755 index 00000000..b7a25865 --- /dev/null +++ b/src/tools/rbd_recover_tool/rbd-recover-tool @@ -0,0 +1,327 @@ +#!/usr/bin/env bash +# file: rbd-recover-tool +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +# rbd-recover-tool is an offline recover tool for rbd image in replicated pool +# when ceph cluster is stopped. +# it is a simple disater recovery policy, just for urgent condition + +my_dir=$(dirname "$0") + +. $my_dir/common_h +. $my_dir/metadata_h +. $my_dir/epoch_h +. $my_dir/database_h + +#scp files from admin node to osd node +file1=common_h +file2=metadata_h +file3=epoch_h +file4=osd_job + +#------------ admin node's action ------------- + +function scp_file() +{ + local func="scp_file" + file=$1 + if [ "$1"x = ""x ];then + echo "$func: not file input" + exit + fi + for host in `cat $osd_host` + do + { + echo "$func: $host" + scp $ssh_option $file $host:$job_path 1>/dev/null + } & + done +} + +function scp_files() +{ + local func="scp_files" + for host in `cat $osd_host` + do + { + echo "$func: $host" + scp $ssh_option $file1 $host:$job_path + scp $ssh_option $file2 $host:$job_path + scp $ssh_option $file3 $host:$job_path + scp $ssh_option $file4 $host:$job_path + } & + done + wait + echo "$func: finish" +} + +function scatter_node_jobs() +{ + local func="scatter_node_jobs" + local host= + local data_path= + echo "$func: flush osd journal & generate infos: omap, pg, image metadata ..." + + trap 'echo $func failed; exit' INT HUP + while read line + do + { + host=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + check_osd_process $host + + cmd="mkdir -p $job_path" + ssh $ssh_option $host $cmd + scp $ssh_option $file1 $host:$job_path >/dev/null + scp $ssh_option $file2 $host:$job_path >/dev/null + scp $ssh_option $file3 $host:$job_path >/dev/null + scp $ssh_option $file4 $host:$job_path >/dev/null + + cmd="bash $job_path/osd_job flush_osd_journal $data_path;" + cmd="$cmd $job_path/osd_job do_omap_list $data_path;" + cmd="$cmd bash $job_path/osd_job do_pg_epoch $data_path;" + cmd="$cmd bash $job_path/osd_job do_image_list $data_path;" + + ssh $ssh_option $host $cmd </dev/null + } & + done < $osd_host_path + wait + echo "$func: finish" +} + +function gather_node_infos() +{ + local func="gather_node_infos" + echo "$func ..." + >$pg_coll + >$image_coll_v1 + >$image_coll_v2 + trap 'echo $func failed; exit' INT HUP + while read line + do + { + host=`echo $line|awk '{print $1}'` + data_path=`echo $line|awk '{print $2}'` + echo "$func: $host" + check_osd_process $host + + #pg epoch + cmd1="bash $job_path/osd_job cat_pg_epoch $data_path" + ssh $ssh_option $host $cmd1 >> $pg_coll + #image v1 + cmd2="bash $job_path/osd_job cat_image_v1 $data_path" + ssh $ssh_option $host $cmd2 >> $image_coll_v1 + #image v2 + cmd3="bash $job_path/osd_job cat_image_v2 $data_path" + ssh $ssh_option $host $cmd3 >> $image_coll_v2 + } & + done < $osd_host_path + wait + echo "$func: finish" +} + +function scatter_gather() +{ + local func="scatter_gather" + if [ ! -s $osd_host ];then + echo "$func: no osd_host input" + exit + fi + if [ ! -s $mon_host ];then + echo "$func: no mon_host input" + exit + fi + scatter_node_jobs + gather_node_infos +} + + +#------------- operations -------------- + +function database() +{ + scatter_gather + gen_database +} + +function list() +{ + list_images +} + +function lookup() +{ + lookup_image $1 $2 $3 +} + +function recover() +{ + recover_image $1 $2 $3 $4 +} + +#------------- helper ------------- + +function usage() +{ + local cmd_name="rbd-recover-tool" + echo + echo "$cmd_name is used to recover rbd image of replicated pool, + when all ceph services are stopped" + echo "Usage:" + echo "$cmd_name database + gather pg info, object info, image metadata, + and epoch info from all osd nodes, + this will cosume a long time, just be patient, + especially when scale up to 1000+ osds" + echo "$cmd_name list + list all rbd images of all replicated pools, + before to lookup & recover" + echo "$cmd_name lookup <pool_id>/<image_name>[@[<snap_name>]] + show image metadata: image format, rbd id, size, order, snapseq + In addition, for image with snapshots, + this will list all snapshot infomations" + echo "$cmd_name recover <pool_id>/<image_name>[@[<snap_name>]] [</path/to/store/image>] + all snapshots share one image head, to economize disk space + so there is only one snapshot at any time, + image is saved at </path/to/store/image>/pool_<pool_id>/image_name/image_name + cat <path/to/store/image>/pool_<pool_id>/image_name/@CURRENT, + will show snapid + recover to raw image/nosnap/head: <image_name> + rollback to image head: <image_name>@ + rollback to image snap: <image_name>@<snap_name> + recover steps: + 1. recover image nosnap (only one time) + 2. rollback to image snap" +} + +function get_path() +{ + local func="get_path" + if [ $# -lt 1 ];then + return + fi + if [[ $1 =~ // ]];then + return # "/path//to" is invalid + fi + local parent=`dirname $1` + local name=`basename $1` + if [ "$parent"x = "/"x ];then + echo "$parent$name" + else + echo -n "$parent/$name" + fi +} + +function admin_cmd() +{ + local func="admin_cmd" + if [ $# -lt 1 ];then + usage + exit + fi + if [ "$1"x = "-h"x ] || [ "$1"x = "--help"x ];then + usage + exit + fi + + if [ "$1"x = "database"x ];then + if [ $# -gt 1 ];then + usage + exit + fi + # remove osd_host to refresh osd_host and osd_host_mapping + rm -f $osd_host + init_env_admin + database + elif [ "$1"x = "list"x ];then + if [ $# -gt 1 ];then + usage + exit + fi + init_env_admin + list + elif [ "$1"x = "lookup"x ];then + if [ $# -gt 2 ];then + usage + exit + fi + local pool_id=-1 + local image_name= + local snap_name= + if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + snap_name="${BASH_REMATCH[3]}" + else + echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]" + exit + fi + init_env_admin + lookup $pool_id $image_name $snap_name + elif [ "$1"x = "recover"x ];then + if [ $# -lt 2 ] || [ $# -gt 3 ];then + usage + exit + fi + local pool_id=-1 + local image_name= + local snap_name=@ + local image_dir= + if [[ $2 =~ ^([^@/]+)/([^@/]+)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + elif [[ $2 =~ ^([^@/]+)/([^@/]+)@([^@/]*)$ ]];then + pool_id="${BASH_REMATCH[1]}" + image_name="${BASH_REMATCH[2]}" + snap_name="${BASH_REMATCH[3]}" + if [ "$snap_name"x = ""x ];then + snap_name=@@ + fi + else + echo "format: $2 is invalid, use <pool_id>/<image_name>[@[<snap_name>]]" + exit + fi + if [ $# = 3 ];then + image_dir=`get_path $3` + if [ "image_dir"x = ""x ];then + echo "$3 invalid" + exit + fi + fi + init_env_admin + recover $pool_id $image_name $snap_name $image_dir + elif [ "$1"x = "scp_files"x ];then + if [ $# -gt 1 ];then + exit + fi + admin_parse_osd + scp_files + elif [ "$1"x = "scp_file"x ];then + if [ $# -gt 2 ];then + exit + fi + admin_parse_osd + scp_file $2 + else + echo "$func: $1: command not found" + fi +} + +admin_cmd $* diff --git a/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh b/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh new file mode 100755 index 00000000..876b47b9 --- /dev/null +++ b/src/tools/rbd_recover_tool/test_rbd_recover_tool.sh @@ -0,0 +1,542 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2015 Ubuntu Kylin +# +# Author: Min Chen <minchen@ubuntukylin.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Library Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library Public License for more details. +# + +# unit test case for rbd-recover-tool + +#prepare: +# - write config files: config/osd_host, config/mon_host, config/storage_path, config/mds_host if exist mds +#step 1. rbd export all images as you need +#step 2. stop all ceph services +#step 3. use ceph_rbd_recover_tool to recover all images +#step 4. compare md5sum of recover image with that of export image who has the same image name + +ssh_opt="-o ConnectTimeout=1" +my_dir=$(dirname "$0") +tool_dir=$my_dir + +#storage_path=$my_dir/config/storage_path +mon_host=$my_dir/config/mon_host +osd_host=$my_dir/config/osd_host +mds_host=$my_dir/config/mds_host + +test_dir= # `cat $storage_path` +export_dir= #$test_dir/export +recover_dir= #$test_dir/recover +image_names= #$test_dir/image_names +online_images= #$test_dir/online_images, all images on ceph rbd pool +gen_db= #$test_dir/gen_db, label database if exist +pool=rbd +pool_id=2 + +function get_pool_id() +{ + local pool_id_file=/tmp/pool_id_file.$$$$ + ceph osd pool stats $pool|head -n 1|awk '{print $4}' >$pool_id_file + if [ $? -ne 0 ];then + echo "$func: get pool id failed: pool = $pool" + rm -f $pool_id_file + exit + fi + pool_id=`cat $pool_id_file` + echo "$func: pool_id = $pool_id" + rm -f $pool_id_file +} + +function init() +{ + local func="init" + if [ $# -eq 0 ];then + echo "$func: must input <path> to storage images, enough disk space is good" + exit + fi + if [ ! -s $osd_host ];then + echo "$func: config/osd_host not exists or empty" + exit + fi + if [ ! -s $mon_host ];then + echo "$func: config/mon_host not exists or empty" + exit + fi + if [ ! -e $mds_host ];then + echo "$func: config/mds_host not exists" + exit + fi + test_dir=$1 + export_dir=$test_dir/export + recover_dir=$test_dir/recover + image_names=$test_dir/image_names + online_images=$test_dir/online_images + gen_db=$test_dir/gen_db + + trap 'echo "ceph cluster is stopped ..."; exit;' INT + ceph -s >/dev/null + get_pool_id + + mkdir -p $test_dir + mkdir -p $export_dir + mkdir -p $recover_dir + rm -rf $export_dir/* + rm -rf $recover_dir/* +} + +function do_gen_database() +{ + local func="do_gen_database" + if [ -s $gen_db ] && [ `cat $gen_db` = 1 ];then + echo "$func: database already existed" + exit + fi + bash $tool_dir/rbd-recover-tool database + echo 1 >$gen_db +} + +#check if all ceph processes are stopped +function check_ceph_service() +{ + local func="check_ceph_service" + local res=`cat $osd_host $mon_host $mds_host|sort -u|tr -d [:blank:]|xargs -n 1 -I @ ssh $ssh_opt @ "ps aux|grep -E \"(ceph-osd|ceph-mon|ceph-mds)\"|grep -v grep"` + if [ "$res"x != ""x ];then + echo "$func: NOT all ceph services are stopped" + return 1 + exit + fi + echo "$func: all ceph services are stopped" + return 0 +} + +function stop_ceph() +{ + local func="stop_ceph" + #cat osd_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-osd" + while read osd + do + { + osd=`echo $osd|tr -d [:blank:]` + if [ "$osd"x = ""x ];then + continue + fi + #ssh $ssh_opt $osd "killall ceph-osd ceph-mon ceph-mds" </dev/null + ssh $ssh_opt $osd "killall ceph-osd" </dev/null + } & + done < $osd_host + wait + echo "waiting kill all osd ..." + sleep 1 + #cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon ceph-osd ceph-mds" + cat $mon_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mon" + #cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds ceph-mon ceph-osd" + cat $mds_host|xargs -n 1 -I @ ssh $ssh_opt @ "killall ceph-mds" +} + +function create_image() +{ + local func="create_image" + if [ ${#} -lt 3 ];then + echo "create_image: parameters: <image_name> <size> <image_format>" + exit + fi + local image_name=$1 + local size=$2 + local image_format=$3 + if [ $image_format -lt 1 ] || [ $image_format -gt 2 ];then + echo "$func: image_format must be 1 or 2" + exit + fi + local res=`rbd list|grep -E "^$1$"` + echo "$func $image_name ..." + if [ "$res"x = ""x ];then + rbd -p $pool create $image_name --size $size --image_format $image_format + else + if [ $image_format -eq 2 ];then + rbd snap ls $image_name|tail -n +2|awk '{print $2}'|xargs -n 1 -I % rbd snap unprotect $image_name@% + fi + rbd snap purge $image_name + #rbd rm $image_name + rbd -p $pool resize --allow-shrink --size $size $image_name + fi +} + +function export_image() +{ + local func="export_image" + + if [ $# -lt 2 ];then + echo "$func: parameters: <image_name> <image_format> [<image_size>]" + exit + fi + + local image_name=$1 + local format=$(($2)) + local size=$(($3)) #MB + + if [ $format -ne 1 ] && [ $format -ne 2 ];then + echo "$func: image format must be 1 or 2" + exit + fi + + if [ $size -eq 0 ];then + size=24 #MB + echo "$func: size = $size" + fi + local mnt=/rbdfuse + + mount |grep "rbd-fuse on /rbdfuse" &>/dev/null + if [ $? -ne 0 ];then + rbd-fuse $mnt + fi + + create_image $image_name $size $format + + dd conv=notrunc if=/dev/urandom of=$mnt/$image_name bs=4M count=$(($size/4)) + + local export_image_dir=$export_dir/pool_$pool_id/$image_name + mkdir -p $export_image_dir + local export_md5_nosnap=$export_image_dir/@md5_nosnap + >$export_md5_nosnap + + local export_image_path=$export_image_dir/$image_name + rm -f $export_image_path + + rbd export $pool/$image_name $export_image_path + md5sum $export_image_path |awk '{print $1}' >$export_md5_nosnap +} + +function recover_image() +{ + local func="recover_snapshots" + if [ $# -lt 1 ];then + echo "$func: parameters: <image_name>" + exit + fi + + local image_name=$1 + #pool_id=29 + + local recover_image_dir=$recover_dir/pool_$pool_id/$image_name + mkdir -p $recover_image_dir + local recover_md5_nosnap=$recover_image_dir/@md5_nosnap + >$recover_md5_nosnap + local snapshot= + + bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir + md5sum $recover_image_dir/$image_name|awk '{print $1}' >$recover_md5_nosnap +} + +function make_snapshot() +{ + local func="make_snapshot" + if [ $# -lt 5 ];then + echo "$func: parameters: <ofile> <seek> <count> <snap> <export_image_dir>" + exit + fi + local ofile=$1 + local seek=$(($2)) + local count=$(($3)) + local snap=$4 + local export_image_dir=$5 + + if [ $seek -lt 0 ];then + echo "$func: seek can not be minus" + exit + fi + + if [ $count -lt 1 ];then + echo "$func: count must great than zero" + exit + fi + + echo "[$snap] $func ..." + echo "$1 $2 $3 $4" + rbd snap ls $image_name|grep $snap; + + local res=$? + if [ $res -eq 0 ];then + return $res + fi + + dd conv=notrunc if=/dev/urandom of=$ofile bs=1M count=$count seek=$seek 2>/dev/null + snapshot=$image_name@$snap + rbd snap create $snapshot + rm -f $export_image_dir/$snapshot + rbd export $pool/$image_name $export_image_dir/$snapshot + pushd $export_image_dir >/dev/null + md5sum $snapshot >> @md5 + popd >/dev/null +} + +function recover_snapshots() +{ + local func="recover_snapshots" + if [ $# -lt 1 ];then + echo "$func: parameters: <image_name>" + exit + fi + + local image_name=$1 + #pool_id=29 + + local recover_image_dir=$recover_dir/pool_$pool_id/$image_name + mkdir -p $recover_image_dir + local recover_md5=$recover_image_dir/@md5 + >$recover_md5 + local snapshot= + + + # recover head + bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name $recover_dir + + # recover snapshots + for((i=1; i<10; i++)) + do + snapshot=snap$i + bash $tool_dir/rbd-recover-tool recover $pool_id/$image_name@$snapshot $recover_dir + pushd $recover_image_dir >/dev/null + local chksum=`md5sum $image_name|awk '{print $1}'` + echo "$chksum $image_name@$snapshot" >>@md5 + popd >/dev/null + done +} + +function export_snapshots() +{ + local func="export_snapshots" + + if [ $# -lt 2 ];then + echo "$func: parameters: <image_name> <image_format> [<image_size>]" + exit + fi + + local image_name=$1 + local format=$(($2)) + local size=$(($3)) #MB + + if [ $format -ne 1 ] && [ $format -ne 2 ];then + echo "$func: image format must be 1 or 2" + exit + fi + + if [ $size -eq 0 ];then + size=24 #MB + echo "$func: size = $size" + fi + local mnt=/rbdfuse + + mount |grep "rbd-fuse on /rbdfuse" &>/dev/null + if [ $? -ne 0 ];then + rbd-fuse $mnt + fi + + create_image $image_name $size $format + + local export_image_dir=$export_dir/pool_$pool_id/$image_name + mkdir -p $export_image_dir + local export_md5=$export_image_dir/@md5 + >$export_md5 + + # create 9 snapshots + # image = {object0, object1, object2, object3, object4, object5, ...} + # + # snap1 : init/write all objects + # snap2 : write object0 + # snap3 : write object1 + # snap4 : write object2 + # snap5 : write object3 + # snap6 : write object4 + # snap7 : write object5 + # snap8 : write object0 + # snap9 : write object3 + + make_snapshot $mnt/$image_name 0 $size snap1 $export_image_dir + make_snapshot $mnt/$image_name 0 1 snap2 $export_image_dir + make_snapshot $mnt/$image_name 4 1 snap3 $export_image_dir + make_snapshot $mnt/$image_name 8 1 snap4 $export_image_dir + make_snapshot $mnt/$image_name 12 1 snap5 $export_image_dir + make_snapshot $mnt/$image_name 16 1 snap6 $export_image_dir + make_snapshot $mnt/$image_name 20 1 snap7 $export_image_dir + make_snapshot $mnt/$image_name 1 1 snap8 $export_image_dir + make_snapshot $mnt/$image_name 13 1 snap9 $export_image_dir +} + +function check_recover_nosnap() +{ + local func="check_recover_nosnap" + if [ $# -lt 3 ];then + echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>" + fi + local export_md5=$1 + local recover_md5=$2 + local image_name=$3 + + local ifpassed="FAILED" + + echo "================ < $image_name nosnap > ================" + + local export_md5sum=`cat $export_md5` + local recover_md5sum=`cat $recover_md5` + + if [ "$export_md5sum"x != ""x ] && [ "$export_md5sum"x = "$recover_md5sum"x ];then + ifpassed="PASSED" + fi + echo "export: $export_md5sum" + echo "recover: $recover_md5sum $ifpassed" +} + +function check_recover_snapshots() +{ + local func="check_recover_snapshots" + if [ $# -lt 3 ];then + echo "$func: parameters: <export_md5_file> <recover_md5_file> <image_name>" + fi + local export_md5=$1 + local recover_md5=$2 + local image_name=$3 + + local ifpassed="FAILED" + + echo "================ < $image_name snapshots > ================" + + OIFS=$IFS + IFS=$'\n' + local export_md5s=(`cat $export_md5`) + local recover_md5s=(`cat $recover_md5`) + for((i=0; i<9; i++)) + do + OOIFS=$IFS + IFS=$' ' + local x=$(($i+1)) + snapshot=snap$x + + local export_arr=(`echo ${export_md5s[$i]}`) + local recover_arr=(`echo ${recover_md5s[$i]}`) + echo "export: ${export_md5s[$i]}" + if [ "${export_arr[1]}"x != ""x ] && [ "${export_arr[1]}"x = "${recover_arr[1]}"x ];then + ifpassed="PASSED" + fi + echo "recover: ${recover_md5s[$i]} $ifpassed" + IFS=$OOIFS + done + IFS=$OIFS +} + +# step 1: export image, snapshot +function do_export_nosnap() +{ + export_image image_v1_nosnap 1 + export_image image_v2_nosnap 2 +} + +function do_export_snap() +{ + export_snapshots image_v1_snap 1 + export_snapshots image_v2_snap 2 +} + +# step 2: stop ceph cluster and gen database +function stop_cluster_gen_database() +{ + trap 'echo stop ceph cluster failed; exit;' INT HUP + stop_ceph + sleep 2 + check_ceph_service + local res=$? + while [ $res -ne 0 ] + do + stop_ceph + sleep 2 + check_ceph_service + res=$? + done + + echo 0 >$gen_db + do_gen_database +} + +# step 3: recover image,snapshot +function do_recover_nosnap() +{ + recover_image image_v1_nosnap + recover_image image_v2_nosnap +} + +function do_recover_snap() +{ + recover_snapshots image_v1_snap + recover_snapshots image_v2_snap +} + +# step 4: check md5sum pair<export_md5sum, recover_md5sum> +function do_check_recover_nosnap() +{ + local image1=image_v1_nosnap + local image2=image_v2_nosnap + + local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5_nosnap + local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5_nosnap + local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5_nosnap + local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5_nosnap + + check_recover_nosnap $export_md5_1 $recover_md5_1 $image1 + check_recover_nosnap $export_md5_2 $recover_md5_2 $image2 +} + +function do_check_recover_snap() +{ + local image1=image_v1_snap + local image2=image_v2_snap + + local export_md5_1=$export_dir/pool_$pool_id/$image1/@md5 + local export_md5_2=$export_dir/pool_$pool_id/$image2/@md5 + local recover_md5_1=$recover_dir/pool_$pool_id/$image1/@md5 + local recover_md5_2=$recover_dir/pool_$pool_id/$image2/@md5 + + check_recover_snapshots $export_md5_1 $recover_md5_1 $image1 + check_recover_snapshots $export_md5_2 $recover_md5_2 $image2 +} + +function test_case_1() +{ + do_export_nosnap + stop_cluster_gen_database + do_recover_nosnap + do_check_recover_nosnap +} + +function test_case_2() +{ + do_export_snap + stop_cluster_gen_database + do_recover_snap + do_check_recover_snap +} + +function test_case_3() +{ + do_export_nosnap + do_export_snap + + stop_cluster_gen_database + + do_recover_nosnap + do_recover_snap + + do_check_recover_nosnap + do_check_recover_snap +} + + +init $* +test_case_3 |