summaryrefslogtreecommitdiffstats
path: root/src/ceph-crash.in
diff options
context:
space:
mode:
Diffstat (limited to 'src/ceph-crash.in')
-rwxr-xr-xsrc/ceph-crash.in98
1 files changed, 98 insertions, 0 deletions
diff --git a/src/ceph-crash.in b/src/ceph-crash.in
new file mode 100755
index 00000000..e5f08acb
--- /dev/null
+++ b/src/ceph-crash.in
@@ -0,0 +1,98 @@
+#!@PYTHON_EXECUTABLE@
+# -*- mode:python -*-
+# vim: ts=4 sw=4 smarttab expandtab
+
+import argparse
+import logging
+import os
+import socket
+import subprocess
+import sys
+import time
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+auth_names = ['client.crash.%s' % socket.gethostname(),
+ 'client.crash',
+ 'client.admin']
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '-p', '--path', default='/var/lib/ceph/crash',
+ help='base path to monitor for crash dumps')
+ parser.add_argument(
+ '-d', '--delay', default=10.0, type=float,
+ help='minutes to delay between scans (0 to exit after one)',
+ )
+ parser.add_argument(
+ '--name', '-n',
+ help='ceph name to authenticate as (default: try client.crash, client.admin)')
+ return parser.parse_args()
+
+
+def post_crash(path):
+ rc = 0
+ for n in auth_names:
+ pr = subprocess.Popen(
+ args=['timeout', '30', 'ceph',
+ '-n', n,
+ 'crash', 'post', '-i', '-'],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+ f = open(os.path.join(path, 'meta'), 'rb')
+ stdout, stderr = pr.communicate(input=f.read())
+ rc = pr.wait()
+ f.close()
+ if rc != 0:
+ log.warning('post %s as %s failed: %s' % (path, n, stderr))
+ if rc == 0:
+ break
+ return rc
+
+
+def scrape_path(path):
+ for p in os.listdir(path):
+ crashpath = os.path.join(path, p)
+ metapath = os.path.join(crashpath, 'meta')
+ donepath = os.path.join(crashpath, 'done')
+ if os.path.isfile(metapath):
+ if not os.path.isfile(donepath):
+ # hang out just for a bit; either we interrupted the dump
+ # or the daemon crashed before finishing it
+ time.sleep(1)
+ if not os.path.isfile(donepath):
+ return
+ # ok, we can process this one
+ rc = post_crash(crashpath)
+ if rc == 0:
+ os.rename(crashpath, os.path.join(path, 'posted/', p))
+ log.debug(
+ "posted %s and renamed %s -> %s " %
+ (metapath, p, os.path.join('posted/', p))
+ )
+
+
+def main():
+ args = parse_args()
+ postdir = os.path.join(args.path, 'posted')
+ if args.name:
+ auth_names = [args.name]
+
+ while not os.path.isdir(postdir):
+ log.error("directory %s does not exist; please create" % postdir)
+ time.sleep(30)
+
+ log.info("monitoring path %s, delay %ds" % (args.path, args.delay * 60.0))
+ while True:
+ scrape_path(args.path)
+ if args.delay == 0:
+ sys.exit(0)
+ time.sleep(args.delay * 60)
+
+
+if __name__ == "__main__":
+ main()