From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/tools/contrib/README.rst | 10 + src/tools/contrib/ceph-migrate-bluestore.bash | 370 ++++++++++++++++++++++++++ 2 files changed, 380 insertions(+) create mode 100644 src/tools/contrib/README.rst create mode 100755 src/tools/contrib/ceph-migrate-bluestore.bash (limited to 'src/tools/contrib') diff --git a/src/tools/contrib/README.rst b/src/tools/contrib/README.rst new file mode 100644 index 000000000..d7655e208 --- /dev/null +++ b/src/tools/contrib/README.rst @@ -0,0 +1,10 @@ +contrib +================== + +This directory houses scripts and other files that may be useful to Ceph +administrators. Everything here is provided *as-is*, and may or may +not be up-to-date or functional. Code may not be up to official standards. +Please do not assume any level of support. Your mileage may vary. + +Each file's header must include a tracker number and an author signed-off-by +line. diff --git a/src/tools/contrib/ceph-migrate-bluestore.bash b/src/tools/contrib/ceph-migrate-bluestore.bash new file mode 100755 index 000000000..a4cc0a549 --- /dev/null +++ b/src/tools/contrib/ceph-migrate-bluestore.bash @@ -0,0 +1,370 @@ +#!/bin/bash +# https://tracker.ceph.com/issues/47839 +# Signed-off-by: Chris Dunlop + + +###################################################################### +function usage +{ + cat <& /dev/null || error "VG '${vgdb}' for block.db not found" + +# +# Size of LV in $vgdb for the block.db +# +dblvsize=60G + +# +# Prefix used for block LVs +# +block_prefix='osd-block' + +# +# Some less(?) common we use - abort early if they're missing +# +cmds=( + bc + sgdisk +) + +###################################################################### +# Functions... +# +function runcmd +{ + local IFS=' ' + echo 1>&2 "$*" + "$@" +} + +function is_uuid +{ + [[ $1 =~ ^[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}$ ]] +} + +# +# Compare the used size of the OSD with the new device +# (and arbitrarily 20% larger 'cos we don't want to fill it up) +# +function check-device-size +{ + # "ceph osd df" fields 7 and 8 - "RAW USE", size and units + IFS=' ' read -r sz units <<< "$(ceph osd df | awk -v"id=${osd}" '$1==id { print $7, $8 }')" + case $units in + KiB) pow=1 ;; + MiB) pow=2 ;; + GiB) pow=3 ;; + TiB) pow=4 ;; + PiB) pow=5 ;; + *) error "ceph df: units not recognized: ${units}" ;; + esac + osdbytes=$(printf '%.0f' "$(bc <<< "${sz} * 1024^${pow} * 1.2")") + + bdev=$(realpath "${bluestore_device}") + bdev=${bdev##*/} + [[ -e /sys/block/${bdev##*/}/size ]] || error "Can't find size for ${bluestore_device}" + bdevbytes=$(($(<"/sys/block/${bdev##*/}/size") * 512)) + + declare -p osdbytes bdevbytes + + ((bdevbytes >= osdbytes)) || error "The block device isn't large enough" +} + +# +# Check things look ok +# +# Is there a better way of checking, other than manually? +# +function check-ceph-ok +{ + local ans=r + + while [[ $ans = r ]] + do + runcmd ceph -s + read -r -p $'\nCheck status above and press r to recheck or to continue with scrub' ans + done + + # + # Run a scrub "to be sure, to be sure" + # + # For smaller OSDs we can see which PGs we need to watch for... + # + runcmd ceph pg ls-by-primary "${osd}" | awk '$1~/^[0-9]+\./ { print $1 }' + runcmd ceph osd scrub "${osd}" + + hr + tail -n0 -f "/var/log/ceph/ceph-osd.${osd}.log" & + pid=$! + sleep 2 + while ! read -r -t 10 -p $'\n\n\ntailing osd log file: press to continue\n\n\n' ans + do + : + done + kill "${pid}" + hr + + ans=r + while [[ $ans = r ]] + do + runcmd ceph -s + read -r -p $'\nCheck status above and press r to recheck or to continue' ans + done +} + +# +# Disable the FileStore so it doesn't attempt to come back on reboot, but +# so we can revert back to it if necessary +# +# https://en.wikipedia.org/wiki/GUID_Partition_Table#Partition_type_GUIDs +# Partition GUID code: 4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D (Ceph OSD) +# Partition GUID code: 0FC63DAF-8483-4772-8E79-3D69D8477DE4 (Linux filesystem data) +# +function disable-filestore +{ + # + # Remove the original device from fstab if it's there + # (it may be in here for xfs with logdev etc.) + # + if grep -qE '^[^#[:space:]]+[[:space:]]+'"${osddir}"'[[:space:]]' /etc/fstab + then + [[ -e /etc/fstab.${0##*/} ]] || cp -a /etc/fstab{,."${0##*/}"} + sed -ri '/^[^#[:space:]]+[[:space:]]+'"${osddir//\//\\\/}"'[[:space:]]/ s/^/# /' /etc/fstab + fi + + # + # Change the partition type + # + [[ -e ${osd_json%.json}.part ]] || + runcmd sgdisk --backup="${osd_json%.json}.part" "${filestore_device}" + part_guid=$(sgdisk -i1 "${filestore_device}" | sed -rn 's/^Partition GUID code: ([[:xdigit:]-]+) .*/\1/p') + if [[ $part_guid = 4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D ]] + then + runcmd sgdisk --typecode=1:0FC63DAF-8483-4772-8E79-3D69D8477DE4 "${filestore_device}" + echo "${filestore_device} partition 1 changed to type 0FC63DAF-8483-4772-8E79-3D69D8477DE4 (Linux filesystem data)" + fi +} + +###################################################################### +# Processing... +# + +# +# Check we have the commands we need +# +for cmd in "${cmds[@]}" +do + type "${cmd}" >& /dev/null || error "${cmd} utility required" +done + +# +# Get/check OSD +# +unit=ceph-osd@${osd} +runcmd systemctl is-enabled "${unit}" || + error "systemd unit ${unit} not enabled" + +osddir=/var/lib/ceph/osd/ceph-${osd} +[[ -d $osddir ]] || error "No directory: ${osddir}" + +fsid=$(< "${osddir}/fsid") +is_uuid "${fsid}" || error "fsid uuid not found in ${osddir}/fsid" + +osd_json=/etc/ceph/osd/${osd}-${fsid}.json +[[ -f $osd_json ]] || error "File doesn't exist: ${osd_json}" + +lvnewdb=${vgdb}/osd-db-${fsid} +authkey=$(sed -rn 's/^[[:space:]]+key[[:space:]]*=[[:space:]]*//p' "${osddir}/keyring") +[[ $authkey ]] || error "Can't get authkey from ${osddir}/keyring" + +# +# We want the device containing the FileStore version of the OSD +# so we can disable it once the BlueStore version is up and running, +# so the FileStore doesn't contend with the BlueStore on reboot etc. +# +filestore_device=$(awk '$2=="'"${osddir}"'" { print $1; }' /etc/mtab) +[[ $filestore_device ]] || error "Can't find device currently mounted on ${osddir}" +[[ $filestore_device =~ ^/dev/sd[a-z]+[0-9]*$ ]] || error "Don't recognize device currently mounted on ${osddir}: ${filestore_device}" +filestore_device=${filestore_device%%+([0-9])} + +declare -p unit block_prefix bluestore_device osd osddir fsid osd_json lvnewdb authkey filestore_device + +runcmd check-device-size + +# +# Create raw LV for block.db +# +runcmd lvcreate --yes -L "${dblvsize}" -n "${lvnewdb#*/}" "${lvnewdb%/*}" + + +# +# Prepare the new OSD +# osd-list.orig is so we can work out which osd was created +# +ceph osd ls > /tmp/osd-list.orig +runcmd ceph-volume lvm prepare --data "${bluestore_device}" --block.db "${lvnewdb}" + +# +# Work out which OSD has been created +# Is there a better way of doing this? +# +ceph osd ls > /tmp/osd-list.new + +new=$(comm -13 /tmp/osd-list.{orig,new}) +[[ $new =~ ^[0-9]+$ ]] || error "New OSD id not found" + +# +# remove the new OSD from the ceph database +# (it's left mounted) +# +runcmd ceph osd purge "${new}" --yes-i-really-mean-it + +# +# Params for the newly created OSD +# +newdir=/var/lib/ceph/osd/ceph-${new} +lvnew=$(readlink "${newdir}/block"); lvnew=${lvnew#/dev/} + +# +# lvfix is what we're going to rename the LV to so +# it ends in the (original) fsid +# +is_uuid "${lvnew#*/${block_prefix}-}" || error "LV not recognised: ${lvnew}" +lvfix=${lvnew%%/*}/${block_prefix}-${fsid} + +declare -p new newdir lvnew lvfix + +# +# the "dup" step only works if the destination has the same id and fsid +# as the source: fix 'em up +# +new_fsid=$(< "${newdir}/fsid") +args=( + --deltag "ceph.osd_id=${new}" + --addtag "ceph.osd_id=${osd}" + + --deltag "ceph.osd_fsid=${new_fsid}" + --addtag "ceph.osd_fsid=${fsid}" + + --deltag "ceph.block_device=${lvnew}" + --addtag "ceph.block_device=${lvfix}" +) +runcmd lvchange "${args[@]}" "${lvnew}" +runcmd lvchange "${args[@]}" "${lvnewdb}" + +runcmd ceph-bluestore-tool set-label-key --dev "${newdir}/block" --key whoami --value "${osd}" +runcmd ceph-bluestore-tool set-label-key --dev "${newdir}/block" --key osd_uuid --value "${fsid}" +runcmd ceph-bluestore-tool set-label-key --dev "${newdir}/block.db" --key osd_uuid --value "${fsid}" + +echo "${fsid}" > "${newdir}/fsid" + +# +# Rename the LV so it ends in the (original) fsid +# +runcmd lvrename "${lvnew}" "${lvfix}" +runcmd ln -sf "/dev/${lvfix}" "${newdir}/block" +lvnew=$lvfix + +# +# Remove the flags that mkfs has already been done - otherwise mkfs skips the actual mkfs! +# +runcmd ceph-bluestore-tool rm-label-key --dev "${newdir}/block" --key mkfs_done +runcmd rm "${newdir}/mkfs_done" + +# +# Empty out the new OSD filesystem +# +runcmd ceph-objectstore-tool --type bluestore --data-path "${newdir}" --fsid "${fsid}" --op mkfs --no-mon-config + +# +# Stop the osd - the copy can't proceed if it's busy +# +runcmd systemctl is-active --quiet "ceph-osd@${osd}" && + runcmd systemctl stop "ceph-osd@${osd}" + +# +# The actual copy... +# +runcmd time ceph-objectstore-tool --type filestore --data-path "/var/lib/ceph/osd/ceph-${osd}" --target-data-path "${newdir}" --op dup + +# +# Fix up some keys from the copy +# +printf '[osd.%d]\n\tkey = %s\n' "${osd}" "${authkey}" > "${newdir}/key" +ceph-bluestore-tool set-label-key --dev "${newdir}/block" --key osd_key --value "${authkey}" +ceph-bluestore-tool rm-label-key --dev "${newdir}/block" --key fsid + +# +# Move the FileStore config file out of the way to avoid it being used on boot +# +runcmd mv "${osd_json}"{,.orig} + +# +# prepare the mount points +# +runcmd umount "${osddir}" +runcmd umount "${newdir}" +runcmd rmdir "${newdir}" + +# +# Start the new BlueStore version of the OSD +# +runcmd ceph-volume lvm trigger "${osd}-${fsid}" + +# +# Let things settle a little then check the new OSD is running +# +sleep 5 +if ! systemctl is-active --quiet "${unit}" +then + systemctl status "${unit}" + exit 1 +fi + +runcmd check-ceph-ok + +runcmd disable-filestore + +exit 0 -- cgit v1.2.3