From 8daa83a594a2e98f39d764422bfbdbc62c9efd44 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 19:20:00 +0200 Subject: Adding upstream version 2:4.20.0+dfsg. Signed-off-by: Daniel Baumann --- source3/script/samba-log-parser | 382 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 382 insertions(+) create mode 100755 source3/script/samba-log-parser (limited to 'source3/script/samba-log-parser') diff --git a/source3/script/samba-log-parser b/source3/script/samba-log-parser new file mode 100755 index 0000000..a07dfdb --- /dev/null +++ b/source3/script/samba-log-parser @@ -0,0 +1,382 @@ +#!/usr/bin/env python3 +# +####################################################################### +# +# A script to parse samba (especially winbind) logfiles. +# Trace files should be in a non-syslog format (debug syslog format = no). +# +# --traceid ... Specify the traceid of the request to parse +# --pid ... Specify the pid +# --breakdown ... Break to separate files per each traceid +# --merge-by-timestamp ... Merge logs by timestamp +# --flow ... Show the request/sub-request call flow +# --flow-compact ... Show the request/sub-request call flow without dcerpc +# +# +# Copyright (c) 2023 Andreas Schneider +# Copyright (c) 2023 Pavel Filipenský +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +####################################################################### +# +# Requires: ??? + +import sys +import os +import re +from argparse import ArgumentParser +from collections import defaultdict + +# Trace record consists of a trace header followed by one or more text lines. +# +# This tool expects trace header format based on these smb.conf parameters: +# +# debug syslog format = no +# debug hires timestamp = yes +# winbind debug traceid = yes +# +# If 'winbind debug traceid = no' is set, then the option --merge-by-timestamp +# still can be used. +# +# Each trace header contains a traceid, which is the main identifier for this +# tool. A single traceid is either provided via command line option --traceid +# or a list of traceids is derived from the PID specified via option --pid. +# Creating and evaluating list of traceids from PID can be tricky: +# The traceid can appear in a trace record before trace record containing the +# PID is processed. So when we see a new traceid we are not sure if it belongs +# to the traced PID. +# The PID appears only in the main winbind process (log.winbind). If a +# directory with many log files should be processed, we process the files in +# random order. +# It might happen that e.g. log.wb-ADDOMAIN is processed before log.winbind so +# we do not know the list of traceids yet. +# To make all this easy we put into memory all trace records and do the final +# traceid filtering only after all files are read. This can require lot of +# memory if files are big. + + +def process_file_no_traceid(record_list, fname): + with open(fname, "r") as infile: + data = infile.readlines() + date = "" + record_lines = [] + + RE_HEADER_NO_TRACEID = re.compile( + r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d{6}).*") + for line in data: + header = RE_HEADER_NO_TRACEID.search(line) + if header: + # Append all previous trace lines of a record + if record_lines: + record_list.append((date, None, record_lines, fname)) + record_lines = [] + # Remember the new date + date = header.group(1) + record_lines.append(line) + + +def process_file(record_list, traceid_set, fname, opid, otraceid): + with open(fname, "r") as infile: + data = infile.readlines() + pid = None + traceid = 0 + traceid_prev = None + undecided_traceid = False + date = "" + record_lines = [] + + # If traceid option was provided the traceid_set will contain just it + if otraceid: + traceid_set.add(otraceid) + + RE_HEADER = re.compile( + r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d{6}).*?, .*, " + r"traceid=([0-9]+).*\]") + RE_INTERFACE_VERSION = re.compile( + r"^\s+winbindd_interface_version: \[\S* \((\d+)\)\]") + RE_ASYNC_REQUEST = re.compile( + r"^\s+process_request_send: " + r"\[\S* \((\d+)\)\] Handling async request:") + # Example of a header line + # [2023/05/01 07:40:45.439049, 3, pid=418844, effective(0, 0), real(0, 0), class=winbind, traceid=37] ../../source3/winbindd/winbindd_misc.c:355(winbindd_interface_version) + for line in data: + header = RE_HEADER.search(line) + if header: + # Append all previous trace lines of a record if the traceid is in + # the list. + if record_lines: + record_list.append((date, traceid, record_lines, fname)) + record_lines = [] + # Remember the new date and the new traceid + date = header.group(1) + traceid = header.group(2) + if traceid != traceid_prev: + traceid_prev = traceid + undecided_traceid = True + if opid: + # Search for lines that identify a new winbind client and the + # client PID + + # winbindd_interface_version: [nss_winbind (500725)]: request interface version (version = 32) + # process_request_send: [nss_winbind (500725)] Handling async request: SETPWENT + interface_version = RE_INTERFACE_VERSION.search(line) + async_request = RE_ASYNC_REQUEST.search(line) + if interface_version: + pid = interface_version.group(1) + if undecided_traceid: + if pid == opid: + traceid_set.add(traceid) + undecided_traceid = False + if async_request: + pid = async_request.group(1) + if undecided_traceid: + if pid == opid: + traceid_set.add(traceid) + undecided_traceid = False + # For --breakdown add every traceid + if not opid and not otraceid: + traceid_set.add(traceid) + + record_lines.append(line) + + +def filter_traceids(record_list, traceid_set): + llist = [] + for (d, t, li, f) in record_list: + if t in traceid_set: + llist.append((d, t, li, f)) + return llist + + +def filter_flow(record_list): + local_list = [] + for (date, traceid, lines, filename) in record_list: + for line in lines: + isflow = re.search(r"^(\s+)flow: (.*)", line) + if isflow: + local_list.append(isflow.group(1) + isflow.group(2)) + return local_list + + +def filter_flowcompact(flist): + local_list = [] + end_marker = None + for fl in flist: + if not end_marker: + local_list.append(fl) + dcerpc_start = re.search(r"^(\s+)-> dcerpc_", fl) + if dcerpc_start: + end_marker = dcerpc_start.group(1) + else: + dcerpc_end = re.search(r"^" + end_marker + "<- dcerpc_", fl) + if dcerpc_end: + end_marker = None + local_list.append(fl) + return local_list + + +def print_record_list(record_list, file): + f_prev = None + for (date, traceid, lines, filename) in record_list: + # Inform about filename change + if filename != f_prev: + print("-" * 72, file=file) + print("FILE: ", filename, file=file) + print("-" * 72, file=file) + for line in lines: + print(line, end='', file=file) + f_prev = filename + +# record_list ... list of quadruplets +# flow_list ... lines from record_list with 'flow' traces +# traceid_set ... list of traceids we want to trace +# with --traceid ... there is a single traceids +# with --pid ... there are all traceids for the PID +# with --breakdown ... there are all traceids + + +def setup_parser(): + parser = ArgumentParser() + + parser.add_argument( + "path", + type=str, + help="logfile or directory" + ) + parser.add_argument( + "--traceid", + dest="traceid", + help="specify the traceid of the trace records", + metavar="ID" + ) + parser.add_argument( + "--pid", + dest="pid", + help="specify the pid of winbind client", + metavar="PID" + ) + parser.add_argument( + "--breakdown", + action="store_true", + dest="breakdown", + default=False, + help="breakdown the traces into per traceid files" + ) + parser.add_argument( + "--merge-by-timestamp", + action="store_true", + dest="merge", + default=False, + help="merge logs by timestamp" + ) + parser.add_argument( + "--flow", + action="store_true", + dest="flow", + default=False, + help="show the request/sub-request flow traces" + ) + parser.add_argument( + "--flow-compact", + action="store_true", + dest="flowcompact", + default=False, + help="show the request/sub-request flow traces without dcerpc details" + ) + return parser + + +def main(): # noqa + record_list = [] + flow_list = [] + traceid_set = set() + + parser = setup_parser() + options = parser.parse_args() + + if (not options.traceid and not options.pid and not options.breakdown + and not options.merge): + print("One of --traceid or --pid is needed" + " or --breakdown or --merge-by-timestamp.") + sys.exit(1) + elif options.traceid and options.pid: + print("Only one of --traceid or --pid is allowed.") + sys.exit(1) + elif options.breakdown and (options.traceid or options.pid): + print("--breakdown cannot be combined with --traceid and --pid.") + sys.exit(1) + + if options.flow and not options.traceid: + print("Option --flow can be used only together with --traceid.") + sys.exit(1) + + if options.flowcompact and not options.traceid: + print("Option --flow-compact can be used only together with " + "--traceid.") + sys.exit(1) + + if options.flow and options.flowcompact: + print("Only one of --flow or --flow-compact is allowed.") + sys.exit(1) + + if not options.path: + print("Path to logfile or directory with logs is needed.") + sys.exit(1) + + merge_with_no_traceid = (not options.traceid and not options.pid + and not options.breakdown) and options.merge + + path = options.path + if os.path.isdir(path): + for root, dirs, files in os.walk(path): + for name in files: + if merge_with_no_traceid: + process_file_no_traceid( + record_list, + os.path.join(root, name) + ) + else: + process_file( + record_list, + traceid_set, + os.path.join(root, name), + options.pid, + options.traceid, + ) + elif os.path.isfile(path): + if merge_with_no_traceid: + process_file_no_traceid( + record_list, + path + ) + else: + process_file( + record_list, + traceid_set, + path, + options.pid, + options.traceid + ) + else: + print(path, "Path is neither file or directory.") + sys.exit(1) + + # Sort only using timestamps, no use of traceid + if merge_with_no_traceid: + record_list.sort() + print_record_list(record_list, sys.stdout) + sys.exit(0) + + # Keep only records with matching traceids + if not options.breakdown: + record_list = filter_traceids(record_list, traceid_set) + + if options.breakdown: + for traceid in traceid_set: + # Full + with open("%s.full" % traceid, "w") as full_f: + full_l = filter_traceids(record_list, {traceid}) + if options.merge: + full_l.sort() + print_record_list(full_l, full_f) + # Flow + with open("%s.flow" % traceid, "w") as flow_f: + flow_l = filter_flow(full_l) + for fl in flow_l: + print(fl, file=flow_f) + # Flow compact + with open("%s.flowcompact" % traceid, "w") as flowcompact_f: + flowcompact_l = filter_flowcompact(flow_l) + for fl in flowcompact_l: + print(fl, file=flowcompact_f) + elif options.flow: + flow_list = filter_flow(record_list) + for fl in flow_list: + print(fl) + elif options.flowcompact: + flow_list = filter_flow(record_list) + flow_list = filter_flowcompact(flow_list) + for fl in flow_list: + print(fl) + else: + if options.merge: + record_list.sort() + print_record_list(record_list, sys.stdout) + + sys.exit(0) + + +if __name__ == "__main__": + main() -- cgit v1.2.3