194 lines
5.8 KiB
Python
Executable file
194 lines
5.8 KiB
Python
Executable file
#!/usr/bin/python3
|
|
"""
|
|
Collect systemd-journal log entries around time of daemon exit and coredumps.
|
|
"""
|
|
|
|
import datetime
|
|
import json
|
|
import logging
|
|
import pathlib
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
|
|
|
|
TIMESPAN_BEFORE = 600 # s
|
|
TIMESPAN_AFTER = TIMESPAN_BEFORE
|
|
CURSOR_DIR = pathlib.Path('/var/lib/knot-resolver')
|
|
CURSOR_PATH = CURSOR_DIR / 'coredump_watcher.cursor'
|
|
|
|
|
|
class Timestamp:
|
|
def __init__(self, usec):
|
|
self.usec = int(usec)
|
|
|
|
@property
|
|
def unix(self):
|
|
return self.usec // 10**6
|
|
|
|
def __str__(self):
|
|
return datetime.datetime.utcfromtimestamp(self.unix).strftime('%Y-%m-%d_%H:%M:%S')
|
|
|
|
def __lt__(self, other):
|
|
return self.usec < other.usec
|
|
|
|
def __eq__(self, other):
|
|
return self.usec == other.usec
|
|
|
|
|
|
class Entry(dict):
|
|
@property
|
|
def timestamp(self):
|
|
usec = self.get('__REALTIME_TIMESTAMP')
|
|
if usec is None:
|
|
return None
|
|
return Timestamp(usec)
|
|
|
|
@property
|
|
def core_path(self):
|
|
filename = self.get('COREDUMP_FILENAME')
|
|
if filename is None:
|
|
return None
|
|
return pathlib.Path(filename)
|
|
|
|
def get_first(self, *keys):
|
|
for key in keys:
|
|
try:
|
|
return self[key]
|
|
except KeyError:
|
|
continue
|
|
return None
|
|
|
|
@property
|
|
def program(self):
|
|
return self.get_first('COREDUMP_UNIT', 'UNIT', '_SYSTEMD_UNIT', 'SYSLOG_IDENTIFIER')
|
|
|
|
@property
|
|
def pid(self):
|
|
return self.get_first('COREDUMP_PID', '_PID')
|
|
|
|
|
|
def save_cursor(cursor):
|
|
if cursor is None:
|
|
return
|
|
CURSOR_DIR.mkdir(parents=True, exist_ok=True)
|
|
with CURSOR_PATH.open('w') as curfile:
|
|
curfile.write(cursor)
|
|
logging.info('log cursor saved into %s, next run will skip old logs',
|
|
CURSOR_PATH)
|
|
|
|
|
|
def load_cursor():
|
|
try:
|
|
with CURSOR_PATH.open('r') as curfile:
|
|
logging.info('log cursor read from %s, skipping old logs',
|
|
CURSOR_PATH)
|
|
return curfile.read().strip()
|
|
except FileNotFoundError:
|
|
logging.info('log cursor file %s does not exist, parsing all logs',
|
|
CURSOR_PATH)
|
|
return None
|
|
|
|
|
|
def get_cursor():
|
|
journal_args = ['journalctl', '-o', 'json', '-n', '1']
|
|
with subprocess.Popen(
|
|
journal_args,
|
|
bufsize=1, # line buffering
|
|
universal_newlines=True,
|
|
stdout=subprocess.PIPE) as jproc:
|
|
stdout, _ = jproc.communicate()
|
|
data = json.loads(stdout)
|
|
entry = Entry(**data)
|
|
return entry.get('__CURSOR')
|
|
|
|
|
|
def read_journal(*args):
|
|
journal_args = [
|
|
'journalctl',
|
|
'-o', 'json',
|
|
'-u', 'kres*',
|
|
'-u', 'systemd-coredump*']
|
|
journal_args += args
|
|
with subprocess.Popen(
|
|
journal_args,
|
|
bufsize=1, # line buffering
|
|
universal_newlines=True,
|
|
stdout=subprocess.PIPE) as jproc:
|
|
for line in jproc.stdout:
|
|
data = json.loads(line)
|
|
yield Entry(**data)
|
|
|
|
|
|
def extract_logs(around_time, log_name):
|
|
start_time = Timestamp(around_time.usec - TIMESPAN_BEFORE * 10**6)
|
|
end_time = Timestamp(around_time.usec + TIMESPAN_AFTER * 10**6)
|
|
log_window = list(read_journal(
|
|
'--since', '@{}'.format(start_time.unix),
|
|
'--until', '@{}'.format(end_time.unix)))
|
|
with log_name.with_suffix('.json').open('w') as jsonf:
|
|
json.dump(log_window, jsonf, indent=4)
|
|
with log_name.with_suffix('.log').open('w') as logf:
|
|
logf.write('##### logs since {}\n'.format(start_time))
|
|
for entry in log_window:
|
|
if entry.timestamp == around_time:
|
|
logf.write('##### HERE #####\n')
|
|
logf.write('{t} {h} {prg}[{pid}]: {m}\n'.format(
|
|
t=entry.timestamp,
|
|
h=entry.get('_HOSTNAME'),
|
|
prg=entry.program,
|
|
pid=entry.pid,
|
|
m=entry.get('MESSAGE')))
|
|
logf.write('##### logs until {}\n'.format(end_time))
|
|
|
|
|
|
def main():
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
if len(sys.argv) != 2:
|
|
sys.exit('Usage: {} <output log directory>'.format(sys.argv[0]))
|
|
outdir = pathlib.Path(sys.argv[1])
|
|
outdir.mkdir(parents=True, exist_ok=True)
|
|
|
|
cursor_previous = load_cursor()
|
|
cursor_at_start = get_cursor()
|
|
|
|
exit_times = []
|
|
coredumps = {}
|
|
filter_args = []
|
|
if cursor_previous is not None:
|
|
filter_args = ['--after-cursor', cursor_previous]
|
|
for entry in read_journal(*filter_args):
|
|
if 'EXIT_CODE' in entry:
|
|
logging.debug('exit@%s: %s', entry.timestamp, entry)
|
|
exit_times.append(entry.timestamp)
|
|
if 'COREDUMP_FILENAME' in entry:
|
|
logging.debug('coredump @ %s: %s', entry.timestamp, entry.core_path)
|
|
coredumps[entry.core_path] = entry.timestamp
|
|
|
|
exit_times.sort()
|
|
logging.debug('detected exits: %s', exit_times)
|
|
for exit_time in exit_times:
|
|
extract_logs(exit_time, outdir / str(exit_time))
|
|
|
|
coredumps_missing = 0
|
|
logging.debug('detected coredumps: %s', coredumps)
|
|
for core_path, core_time in coredumps.items():
|
|
core_name = core_path.name
|
|
out_path_prefix = (outdir / str(core_time))
|
|
extract_logs(core_time, out_path_prefix.with_suffix('.logs'))
|
|
try:
|
|
shutil.copy(
|
|
str(core_path),
|
|
str(out_path_prefix.with_suffix('.{}'.format(core_name))))
|
|
except FileNotFoundError as ex:
|
|
logging.error('coredump file %s cannot be copied: %s', core_path, ex)
|
|
coredumps_missing += 1
|
|
logging.info('wrote %d coredumps and %d logs snippets (%s coredumps missing)',
|
|
len(coredumps) - coredumps_missing, len(exit_times), coredumps_missing)
|
|
|
|
save_cursor(cursor_at_start)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|