diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/tools/crushdiff | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/tools/crushdiff')
-rwxr-xr-x | src/tools/crushdiff | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/src/tools/crushdiff b/src/tools/crushdiff new file mode 100755 index 000000000..4b0b71bed --- /dev/null +++ b/src/tools/crushdiff @@ -0,0 +1,336 @@ +#!/usr/bin/python3 +# +# A tool to test the effect (number of pgs, objects, bytes moved) of a +# crushmap change. This is a wrapper around osdmaptool, hardly relying +# on its --test-map-pgs-dump option to get the list of changed pgs. +# Additionally it uses pg stats to calculate the numbers of objects +# and bytes moved. +# +# Typical usage: +# +# # Get current crushmap +# $ crushdiff export cm.txt +# # Edit the map +# $ $EDITOR cm.txt +# # Check the result +# $ crushdiff compare cm.txt +# # Install the updated map +# $ crushdiff import cm.txt +# +# By default, crushdiff will use the cluster current osdmap and pg +# stats, which requires access to the cluster. But one can use the +# --osdmap and --pg-dump options to test against previously obtained +# data. +# + +import argparse +import re +import json +import os +import sys +import tempfile + +# +# Global +# + +parser = argparse.ArgumentParser(prog='crushdiff', + description='Tool for updating crush map') +parser.add_argument( + 'command', + metavar='compare|export|import', + help='command', + default=None, +) +parser.add_argument( + '-c', '--compiled', + action='store_true', + help='use compiled crush map', + default=False, +) +parser.add_argument( + 'crushmap', + metavar='crushmap', + help='crushmap json file', + default=None, +) +parser.add_argument( + '-m', '--osdmap', + metavar='osdmap', + help='', + default=None, +) +parser.add_argument( + '-p', '--pg-dump', + metavar='pg-dump', + help='`ceph pg dump` json output', + default=None, +) +parser.add_argument( + '-v', '--verbose', + action='store_true', + help='be verbose', + default=False, +) + +# +# Functions +# + +def get_human_readable(bytes, precision=2): + suffixes = ['', 'Ki', 'Mi', 'Gi', 'Ti'] + suffix_index = 0 + while bytes > 1024 and suffix_index < 4: + # increment the index of the suffix + suffix_index += 1 + # apply the division + bytes = bytes / 1024.0 + return '%.*f%s' % (precision, bytes, suffixes[suffix_index]) + +def run_cmd(cmd, verbose=False): + if verbose: + print(cmd, file=sys.stderr, flush=True) + os.system(cmd) + +def get_osdmap(file): + with open(file, "r") as f: + return json.load(f) + +def get_pools(osdmap): + return {p['pool']: p for p in osdmap['pools']} + +def get_erasure_code_profiles(osdmap): + return osdmap['erasure_code_profiles'] + +def get_pgmap(pg_dump_file): + with open(pg_dump_file, "r") as f: + dump = json.load(f) + return dump.get('pg_map', dump) + +def get_pg_stats(pgmap): + return {pg['pgid']: pg for pg in pgmap['pg_stats']} + +def parse_test_map_pgs_dump(file): + # Format: + # pool 1 pg_num 16 + # 1.0 [1,0,2] 1 + # 1.1 [2,0,1] 2 + # ... + # pool 2 pg_num 32 + # 2.0 [2,1,0] 2 + # 2.1 [2,1,0] 2 + # ... + # #osd count first primary c wt wt + # osd.1 208 123 123 0.098587 1 + + pgs = {} + + with open(file, "r") as f: + pool = None + for l in f.readlines(): + m = re.match('^pool (\d+) pg_num (\d+)', l) + if m: + pool = m.group(1) + continue + if not pool: + continue + m = re.match('^#osd', l) + if m: + break + m = re.match('^(\d+\.[0-9a-f]+)\s+\[([\d,]+)\]', l) + if not m: + continue + pgid = m.group(1) + osds = [int(x) for x in m.group(2).split(',')] + pgs[pgid] = osds + + return pgs + +def do_compare(new_crushmap_in, osdmap=None, pg_dump=None, compiled=False, + verbose=False): + with tempfile.TemporaryDirectory() as tmpdirname: + if compiled: + new_crushmap_file = new_crushmap_in + else: + new_crushmap_file = os.path.join(tmpdirname, 'crushmap') + run_cmd('crushtool -c {} -o {}'.format(new_crushmap_in, + new_crushmap_file), verbose) + + osdmap_file = os.path.join(tmpdirname, 'osdmap') + if osdmap: + run_cmd('cp {} {}'.format(osdmap, osdmap_file), verbose) + else: + run_cmd('ceph osd getmap -o {}'.format(osdmap_file), verbose) + + if not pg_dump: + pg_dump = os.path.join(tmpdirname, 'pg_dump.json') + run_cmd('ceph pg dump --format json > {}'.format(pg_dump), verbose) + + old_test_map_pgs_dump = os.path.join(tmpdirname, 'pgs.old.txt') + run_cmd('osdmaptool {} --test-map-pgs-dump > {}'.format( + osdmap_file, old_test_map_pgs_dump), verbose) + if verbose: + run_cmd('cat {} >&2'.format(old_test_map_pgs_dump), True) + + new_test_map_pgs_dump = os.path.join(tmpdirname, 'pgs.new.txt') + run_cmd( + 'osdmaptool {} --import-crush {} --test-map-pgs-dump > {}'.format( + osdmap_file, new_crushmap_file, new_test_map_pgs_dump), verbose) + if verbose: + run_cmd('cat {} >&2'.format(new_test_map_pgs_dump), True) + + osdmap_file_json = os.path.join(tmpdirname, 'osdmap.json') + run_cmd('osdmaptool {} --dump json > {}'.format( + osdmap_file, osdmap_file_json), verbose) + osdmap = get_osdmap(osdmap_file_json) + pools = get_pools(osdmap) + ec_profiles = get_erasure_code_profiles(osdmap) + + pgmap = get_pgmap(pg_dump) + pg_stats = get_pg_stats(pgmap) + + old_pgs = parse_test_map_pgs_dump(old_test_map_pgs_dump) + new_pgs = parse_test_map_pgs_dump(new_test_map_pgs_dump) + + diff_pg_count = 0 + total_object_count = 0 + diff_object_count = 0 + for pgid in old_pgs: + objects = pg_stats[pgid]['stat_sum']['num_objects'] + total_object_count += objects + + if old_pgs[pgid] == new_pgs[pgid]: + continue + + pool_id = int(pgid.split('.')[0]) + + if len(new_pgs[pgid]) < pools[pool_id]['size']: + print("WARNING: {} will be undersized ({})".format( + pgid, new_pgs[pgid]), file=sys.stderr, flush=True) + + if not pools[pool_id]['erasure_code_profile'] and \ + sorted(old_pgs[pgid]) == sorted(new_pgs[pgid]): + continue + + if verbose: + print("{}\t{} -> {}".format(pgid, old_pgs[pgid], new_pgs[pgid]), + file=sys.stderr, flush=True) + diff_pg_count += 1 + diff_object_count += objects + + print("{}/{} ({:.2f}%) pgs affected".format( + diff_pg_count, len(old_pgs), + 100 * diff_pg_count / len(old_pgs) if len(old_pgs) else 0), + flush=True) + print("{}/{} ({:.2f}%) objects affected".format( + diff_object_count, total_object_count, + 100 * diff_object_count / total_object_count \ + if total_object_count else 0), flush=True) + + total_pg_shard_count = 0 + diff_pg_shard_count = 0 + total_object_shard_count = 0 + diff_object_shard_count = 0 + total_bytes = 0 + diff_bytes = 0 + for pgid in old_pgs: + pool_id = int(pgid.split('.')[0]) + ec_profile = pools[pool_id]['erasure_code_profile'] + if ec_profile: + k = int(ec_profiles[ec_profile]['k']) + m = int(ec_profiles[ec_profile]['m']) + else: + k = 1 + m = pools[pool_id]['size'] - 1 + + bytes = pg_stats[pgid]['stat_sum']['num_bytes'] + \ + pg_stats[pgid]['stat_sum']['num_omap_bytes'] + objects = pg_stats[pgid]['stat_sum']['num_objects'] + + total_pg_shard_count += len(old_pgs[pgid]) + total_object_shard_count += objects * (k + m) + total_bytes += bytes * (k + m) / k + + if old_pgs[pgid] == new_pgs[pgid]: + continue + + old_count = diff_pg_shard_count + + if ec_profile: + for i in range(len(old_pgs[pgid])): + if old_pgs[pgid][i] != new_pgs[pgid][i]: + diff_pg_shard_count += 1 + diff_object_shard_count += objects + diff_bytes += bytes / k + else: + for osd in old_pgs[pgid]: + if osd not in new_pgs[pgid]: + diff_pg_shard_count += 1 + diff_object_shard_count += objects + diff_bytes += bytes / k + + if old_count == diff_pg_shard_count: + continue + + if verbose: + print("{}\t{} -> {}".format(pgid, old_pgs[pgid], new_pgs[pgid]), + file=sys.stderr, flush=True) + + print("{}/{} ({:.2f}%) pg shards to move".format( + diff_pg_shard_count, total_pg_shard_count, + 100 * diff_pg_shard_count / total_pg_shard_count \ + if total_pg_shard_count else 0), flush=True) + print("{}/{} ({:.2f}%) pg object shards to move".format( + diff_object_shard_count, total_object_shard_count, + 100 * diff_object_shard_count / total_object_shard_count \ + if total_object_shard_count else 0), flush=True) + print("{}/{} ({:.2f}%) bytes to move".format( + get_human_readable(int(diff_bytes)), + get_human_readable(int(total_bytes)), + 100 * diff_bytes / total_bytes if total_bytes else 0), + flush=True) + +def do_export(crushmap_out, osdmap_file=None, compiled=False, verbose=False): + with tempfile.TemporaryDirectory() as tmpdirname: + if not osdmap_file: + osdmap_file = os.path.join(tmpdirname, 'osdmap') + run_cmd('ceph osd getmap -o {}'.format(osdmap_file), verbose) + + crushmap_file = crushmap_out if compiled else \ + os.path.join(tmpdirname, 'crushmap') + run_cmd('osdmaptool {} --export-crush {}'.format( + osdmap_file, crushmap_file), verbose) + if not compiled: + run_cmd('crushtool -d {} -o {}'.format(crushmap_file, crushmap_out), + verbose) + +def do_import(crushmap_in, osdmap=None, compiled=False, verbose=False): + with tempfile.TemporaryDirectory() as tmpdirname: + if compiled: + crushmap_file = crushmap_in + else: + crushmap_file = os.path.join(tmpdirname, 'crushmap') + run_cmd('crushtool -c {} -o {}'.format(crushmap_in, + crushmap_file), verbose) + if osdmap: + run_cmd('osdmaptool {} --import-crush {}'.format( + osdmap, crushmap_file), verbose) + else: + run_cmd('ceph osd setcrushmap -i {}'.format(crushmap_file), verbose) + +def main(): + args = parser.parse_args() + + if args.command == 'compare': + do_compare(args.crushmap, args.osdmap, args.pg_dump, args.compiled, + args.verbose) + elif args.command == 'export': + do_export(args.crushmap, args.osdmap, args.compiled, args.verbose) + elif args.command == 'import': + do_import(args.crushmap, args.osdmap, args.compiled, args.verbose) + +# +# main +# + +main() |