diff options
Diffstat (limited to 'src/ceph-crash.in')
-rwxr-xr-x | src/ceph-crash.in | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/src/ceph-crash.in b/src/ceph-crash.in new file mode 100755 index 00000000..e5f08acb --- /dev/null +++ b/src/ceph-crash.in @@ -0,0 +1,98 @@ +#!@PYTHON_EXECUTABLE@ +# -*- mode:python -*- +# vim: ts=4 sw=4 smarttab expandtab + +import argparse +import logging +import os +import socket +import subprocess +import sys +import time + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + +auth_names = ['client.crash.%s' % socket.gethostname(), + 'client.crash', + 'client.admin'] + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + '-p', '--path', default='/var/lib/ceph/crash', + help='base path to monitor for crash dumps') + parser.add_argument( + '-d', '--delay', default=10.0, type=float, + help='minutes to delay between scans (0 to exit after one)', + ) + parser.add_argument( + '--name', '-n', + help='ceph name to authenticate as (default: try client.crash, client.admin)') + return parser.parse_args() + + +def post_crash(path): + rc = 0 + for n in auth_names: + pr = subprocess.Popen( + args=['timeout', '30', 'ceph', + '-n', n, + 'crash', 'post', '-i', '-'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + f = open(os.path.join(path, 'meta'), 'rb') + stdout, stderr = pr.communicate(input=f.read()) + rc = pr.wait() + f.close() + if rc != 0: + log.warning('post %s as %s failed: %s' % (path, n, stderr)) + if rc == 0: + break + return rc + + +def scrape_path(path): + for p in os.listdir(path): + crashpath = os.path.join(path, p) + metapath = os.path.join(crashpath, 'meta') + donepath = os.path.join(crashpath, 'done') + if os.path.isfile(metapath): + if not os.path.isfile(donepath): + # hang out just for a bit; either we interrupted the dump + # or the daemon crashed before finishing it + time.sleep(1) + if not os.path.isfile(donepath): + return + # ok, we can process this one + rc = post_crash(crashpath) + if rc == 0: + os.rename(crashpath, os.path.join(path, 'posted/', p)) + log.debug( + "posted %s and renamed %s -> %s " % + (metapath, p, os.path.join('posted/', p)) + ) + + +def main(): + args = parse_args() + postdir = os.path.join(args.path, 'posted') + if args.name: + auth_names = [args.name] + + while not os.path.isdir(postdir): + log.error("directory %s does not exist; please create" % postdir) + time.sleep(30) + + log.info("monitoring path %s, delay %ds" % (args.path, args.delay * 60.0)) + while True: + scrape_path(args.path) + if args.delay == 0: + sys.exit(0) + time.sleep(args.delay * 60) + + +if __name__ == "__main__": + main() |