#!/usr/bin/env python3 # ####################################################################### # # A script to parse samba (especially winbind) logfiles. # Trace files should be in a non-syslog format (debug syslog format = no). # # --traceid ... Specify the traceid of the request to parse # --pid ... Specify the pid # --breakdown ... Break to separate files per each traceid # --merge-by-timestamp ... Merge logs by timestamp # --flow ... Show the request/sub-request call flow # --flow-compact ... Show the request/sub-request call flow without dcerpc # # # Copyright (c) 2023 Andreas Schneider # Copyright (c) 2023 Pavel Filipenský # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # ####################################################################### # # Requires: ??? import sys import os import re from argparse import ArgumentParser from collections import defaultdict # Trace record consists of a trace header followed by one or more text lines. # # This tool expects trace header format based on these smb.conf parameters: # # debug syslog format = no # debug hires timestamp = yes # winbind debug traceid = yes # # If 'winbind debug traceid = no' is set, then the option --merge-by-timestamp # still can be used. # # Each trace header contains a traceid, which is the main identifier for this # tool. A single traceid is either provided via command line option --traceid # or a list of traceids is derived from the PID specified via option --pid. # Creating and evaluating list of traceids from PID can be tricky: # The traceid can appear in a trace record before trace record containing the # PID is processed. So when we see a new traceid we are not sure if it belongs # to the traced PID. # The PID appears only in the main winbind process (log.winbind). If a # directory with many log files should be processed, we process the files in # random order. # It might happen that e.g. log.wb-ADDOMAIN is processed before log.winbind so # we do not know the list of traceids yet. # To make all this easy we put into memory all trace records and do the final # traceid filtering only after all files are read. This can require lot of # memory if files are big. def process_file_no_traceid(record_list, fname): with open(fname, "r") as infile: data = infile.readlines() date = "" record_lines = [] RE_HEADER_NO_TRACEID = re.compile( r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d{6}).*") for line in data: header = RE_HEADER_NO_TRACEID.search(line) if header: # Append all previous trace lines of a record if record_lines: record_list.append((date, None, record_lines, fname)) record_lines = [] # Remember the new date date = header.group(1) record_lines.append(line) def process_file(record_list, traceid_set, fname, opid, otraceid): with open(fname, "r") as infile: data = infile.readlines() pid = None traceid = 0 traceid_prev = None undecided_traceid = False date = "" record_lines = [] # If traceid option was provided the traceid_set will contain just it if otraceid: traceid_set.add(otraceid) RE_HEADER = re.compile( r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d{6}).*?, .*, " r"traceid=([0-9]+).*\]") RE_INTERFACE_VERSION = re.compile( r"^\s+winbindd_interface_version: \[\S* \((\d+)\)\]") RE_ASYNC_REQUEST = re.compile( r"^\s+process_request_send: " r"\[\S* \((\d+)\)\] Handling async request:") # Example of a header line # [2023/05/01 07:40:45.439049, 3, pid=418844, effective(0, 0), real(0, 0), class=winbind, traceid=37] ../../source3/winbindd/winbindd_misc.c:355(winbindd_interface_version) for line in data: header = RE_HEADER.search(line) if header: # Append all previous trace lines of a record if the traceid is in # the list. if record_lines: record_list.append((date, traceid, record_lines, fname)) record_lines = [] # Remember the new date and the new traceid date = header.group(1) traceid = header.group(2) if traceid != traceid_prev: traceid_prev = traceid undecided_traceid = True if opid: # Search for lines that identify a new winbind client and the # client PID # winbindd_interface_version: [nss_winbind (500725)]: request interface version (version = 32) # process_request_send: [nss_winbind (500725)] Handling async request: SETPWENT interface_version = RE_INTERFACE_VERSION.search(line) async_request = RE_ASYNC_REQUEST.search(line) if interface_version: pid = interface_version.group(1) if undecided_traceid: if pid == opid: traceid_set.add(traceid) undecided_traceid = False if async_request: pid = async_request.group(1) if undecided_traceid: if pid == opid: traceid_set.add(traceid) undecided_traceid = False # For --breakdown add every traceid if not opid and not otraceid: traceid_set.add(traceid) record_lines.append(line) def filter_traceids(record_list, traceid_set): llist = [] for (d, t, li, f) in record_list: if t in traceid_set: llist.append((d, t, li, f)) return llist def filter_flow(record_list): local_list = [] for (date, traceid, lines, filename) in record_list: for line in lines: isflow = re.search(r"^(\s+)flow: (.*)", line) if isflow: local_list.append(isflow.group(1) + isflow.group(2)) return local_list def filter_flowcompact(flist): local_list = [] end_marker = None for fl in flist: if not end_marker: local_list.append(fl) dcerpc_start = re.search(r"^(\s+)-> dcerpc_", fl) if dcerpc_start: end_marker = dcerpc_start.group(1) else: dcerpc_end = re.search(r"^" + end_marker + "<- dcerpc_", fl) if dcerpc_end: end_marker = None local_list.append(fl) return local_list def print_record_list(record_list, file): f_prev = None for (date, traceid, lines, filename) in record_list: # Inform about filename change if filename != f_prev: print("-" * 72, file=file) print("FILE: ", filename, file=file) print("-" * 72, file=file) for line in lines: print(line, end='', file=file) f_prev = filename # record_list ... list of quadruplets # flow_list ... lines from record_list with 'flow' traces # traceid_set ... list of traceids we want to trace # with --traceid ... there is a single traceids # with --pid ... there are all traceids for the PID # with --breakdown ... there are all traceids def setup_parser(): parser = ArgumentParser() parser.add_argument( "path", type=str, help="logfile or directory" ) parser.add_argument( "--traceid", dest="traceid", help="specify the traceid of the trace records", metavar="ID" ) parser.add_argument( "--pid", dest="pid", help="specify the pid of winbind client", metavar="PID" ) parser.add_argument( "--breakdown", action="store_true", dest="breakdown", default=False, help="breakdown the traces into per traceid files" ) parser.add_argument( "--merge-by-timestamp", action="store_true", dest="merge", default=False, help="merge logs by timestamp" ) parser.add_argument( "--flow", action="store_true", dest="flow", default=False, help="show the request/sub-request flow traces" ) parser.add_argument( "--flow-compact", action="store_true", dest="flowcompact", default=False, help="show the request/sub-request flow traces without dcerpc details" ) return parser def main(): # noqa record_list = [] flow_list = [] traceid_set = set() parser = setup_parser() options = parser.parse_args() if (not options.traceid and not options.pid and not options.breakdown and not options.merge): print("One of --traceid or --pid is needed" " or --breakdown or --merge-by-timestamp.") sys.exit(1) elif options.traceid and options.pid: print("Only one of --traceid or --pid is allowed.") sys.exit(1) elif options.breakdown and (options.traceid or options.pid): print("--breakdown cannot be combined with --traceid and --pid.") sys.exit(1) if options.flow and not options.traceid: print("Option --flow can be used only together with --traceid.") sys.exit(1) if options.flowcompact and not options.traceid: print("Option --flow-compact can be used only together with " "--traceid.") sys.exit(1) if options.flow and options.flowcompact: print("Only one of --flow or --flow-compact is allowed.") sys.exit(1) if not options.path: print("Path to logfile or directory with logs is needed.") sys.exit(1) merge_with_no_traceid = (not options.traceid and not options.pid and not options.breakdown) and options.merge path = options.path if os.path.isdir(path): for root, dirs, files in os.walk(path): for name in files: if merge_with_no_traceid: process_file_no_traceid( record_list, os.path.join(root, name) ) else: process_file( record_list, traceid_set, os.path.join(root, name), options.pid, options.traceid, ) elif os.path.isfile(path): if merge_with_no_traceid: process_file_no_traceid( record_list, path ) else: process_file( record_list, traceid_set, path, options.pid, options.traceid ) else: print(path, "Path is neither file or directory.") sys.exit(1) # Sort only using timestamps, no use of traceid if merge_with_no_traceid: record_list.sort() print_record_list(record_list, sys.stdout) sys.exit(0) # Keep only records with matching traceids if not options.breakdown: record_list = filter_traceids(record_list, traceid_set) if options.breakdown: for traceid in traceid_set: # Full with open("%s.full" % traceid, "w") as full_f: full_l = filter_traceids(record_list, {traceid}) if options.merge: full_l.sort() print_record_list(full_l, full_f) # Flow with open("%s.flow" % traceid, "w") as flow_f: flow_l = filter_flow(full_l) for fl in flow_l: print(fl, file=flow_f) # Flow compact with open("%s.flowcompact" % traceid, "w") as flowcompact_f: flowcompact_l = filter_flowcompact(flow_l) for fl in flowcompact_l: print(fl, file=flowcompact_f) elif options.flow: flow_list = filter_flow(record_list) for fl in flow_list: print(fl) elif options.flowcompact: flow_list = filter_flow(record_list) flow_list = filter_flowcompact(flow_list) for fl in flow_list: print(fl) else: if options.merge: record_list.sort() print_record_list(record_list, sys.stdout) sys.exit(0) if __name__ == "__main__": main()