summaryrefslogtreecommitdiffstats
path: root/src/ceph-crash.in
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/ceph-crash.in
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/ceph-crash.in')
-rwxr-xr-xsrc/ceph-crash.in113
1 files changed, 113 insertions, 0 deletions
diff --git a/src/ceph-crash.in b/src/ceph-crash.in
new file mode 100755
index 000000000..ae0e4f516
--- /dev/null
+++ b/src/ceph-crash.in
@@ -0,0 +1,113 @@
+#!@Python3_EXECUTABLE@
+# -*- mode:python -*-
+# vim: ts=4 sw=4 smarttab expandtab
+
+import argparse
+import logging
+import os
+import signal
+import socket
+import subprocess
+import sys
+import time
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger('ceph-crash')
+
+auth_names = ['client.crash.%s' % socket.gethostname(),
+ 'client.crash',
+ 'client.admin']
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '-p', '--path', default='/var/lib/ceph/crash',
+ help='base path to monitor for crash dumps')
+ parser.add_argument(
+ '-d', '--delay', default=10.0, type=float,
+ help='minutes to delay between scans (0 to exit after one)',
+ )
+ parser.add_argument(
+ '--name', '-n',
+ help='ceph name to authenticate as (default: try client.crash, client.admin)')
+ parser.add_argument(
+ '--log-level', '-l',
+ help='log level output (default: INFO), support INFO or DEBUG')
+
+ return parser.parse_args()
+
+
+def post_crash(path):
+ rc = 0
+ for n in auth_names:
+ pr = subprocess.Popen(
+ args=['timeout', '30', 'ceph',
+ '-n', n,
+ 'crash', 'post', '-i', '-'],
+ stdin=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+ f = open(os.path.join(path, 'meta'), 'rb')
+ stderr = pr.communicate(input=f.read())
+ rc = pr.wait()
+ f.close()
+ if rc != 0 or stderr != "":
+ log.warning('post %s as %s failed: %s' % (path, n, stderr))
+ if rc == 0:
+ break
+ return rc
+
+
+def scrape_path(path):
+ for p in os.listdir(path):
+ crashpath = os.path.join(path, p)
+ metapath = os.path.join(crashpath, 'meta')
+ donepath = os.path.join(crashpath, 'done')
+ if os.path.isfile(metapath):
+ if not os.path.isfile(donepath):
+ # hang out just for a bit; either we interrupted the dump
+ # or the daemon crashed before finishing it
+ time.sleep(1)
+ if not os.path.isfile(donepath):
+ return
+ # ok, we can process this one
+ rc = post_crash(crashpath)
+ if rc == 0:
+ os.rename(crashpath, os.path.join(path, 'posted/', p))
+ log.debug(
+ "posted %s and renamed %s -> %s " %
+ (metapath, p, os.path.join('posted/', p))
+ )
+
+def handler(signum):
+ print('*** Interrupted with signal %d ***' % signum)
+ sys.exit(0)
+
+def main():
+ global auth_names
+ # exit code 0 on SIGINT, SIGTERM
+ signal.signal(signal.SIGINT, handler)
+ signal.signal(signal.SIGTERM, handler)
+
+ args = parse_args()
+ if args.log_level == 'DEBUG':
+ log.setLevel(logging.DEBUG)
+
+ postdir = os.path.join(args.path, 'posted')
+ if args.name:
+ auth_names = [args.name]
+
+ while not os.path.isdir(postdir):
+ log.error("directory %s does not exist; please create" % postdir)
+ time.sleep(30)
+
+ log.info("monitoring path %s, delay %ds" % (args.path, args.delay * 60.0))
+ while True:
+ scrape_path(args.path)
+ if args.delay == 0:
+ sys.exit(0)
+ time.sleep(args.delay * 60)
+
+
+if __name__ == "__main__":
+ main()