diff options
Diffstat (limited to '')
-rw-r--r-- | src/rocksdb/tools/advisor/advisor/db_log_parser.py | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/src/rocksdb/tools/advisor/advisor/db_log_parser.py b/src/rocksdb/tools/advisor/advisor/db_log_parser.py new file mode 100644 index 000000000..9ba541fc3 --- /dev/null +++ b/src/rocksdb/tools/advisor/advisor/db_log_parser.py @@ -0,0 +1,134 @@ +# Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +# This source code is licensed under both the GPLv2 (found in the +# COPYING file in the root directory) and Apache 2.0 License +# (found in the LICENSE.Apache file in the root directory). + +import glob +import re +import time +from abc import ABC, abstractmethod +from calendar import timegm +from enum import Enum + + +NO_COL_FAMILY = "DB_WIDE" + + +class DataSource(ABC): + class Type(Enum): + LOG = 1 + DB_OPTIONS = 2 + TIME_SERIES = 3 + + def __init__(self, type): + self.type = type + + @abstractmethod + def check_and_trigger_conditions(self, conditions): + pass + + +class Log: + @staticmethod + def is_new_log(log_line): + # The assumption is that a new log will start with a date printed in + # the below regex format. + date_regex = "\d{4}/\d{2}/\d{2}-\d{2}:\d{2}:\d{2}\.\d{6}" # noqa + return re.match(date_regex, log_line) + + def __init__(self, log_line, column_families): + token_list = log_line.strip().split() + self.time = token_list[0] + self.context = token_list[1] + self.message = " ".join(token_list[2:]) + self.column_family = None + # example log for 'default' column family: + # "2018/07/25-17:29:05.176080 7f969de68700 [db/compaction_job.cc:1634] + # [default] [JOB 3] Compacting 24@0 + 16@1 files to L1, score 6.00\n" + for col_fam in column_families: + search_for_str = "\[" + col_fam + "\]" # noqa + if re.search(search_for_str, self.message): + self.column_family = col_fam + break + if not self.column_family: + self.column_family = NO_COL_FAMILY + + def get_human_readable_time(self): + # example from a log line: '2018/07/25-11:25:45.782710' + return self.time + + def get_column_family(self): + return self.column_family + + def get_context(self): + return self.context + + def get_message(self): + return self.message + + def append_message(self, remaining_log): + self.message = self.message + "\n" + remaining_log.strip() + + def get_timestamp(self): + # example: '2018/07/25-11:25:45.782710' will be converted to the GMT + # Unix timestamp 1532517945 (note: this method assumes that self.time + # is in GMT) + hr_time = self.time + "GMT" + timestamp = timegm(time.strptime(hr_time, "%Y/%m/%d-%H:%M:%S.%f%Z")) + return timestamp + + def __repr__(self): + return ( + "time: " + + self.time + + "; context: " + + self.context + + "; col_fam: " + + self.column_family + + "; message: " + + self.message + ) + + +class DatabaseLogs(DataSource): + def __init__(self, logs_path_prefix, column_families): + super().__init__(DataSource.Type.LOG) + self.logs_path_prefix = logs_path_prefix + self.column_families = column_families + + def trigger_conditions_for_log(self, conditions, log): + # For a LogCondition object, trigger is: + # Dict[column_family_name, List[Log]]. This explains why the condition + # was triggered and for which column families. + for cond in conditions: + if re.search(cond.regex, log.get_message(), re.IGNORECASE): + trigger = cond.get_trigger() + if not trigger: + trigger = {} + if log.get_column_family() not in trigger: + trigger[log.get_column_family()] = [] + trigger[log.get_column_family()].append(log) + cond.set_trigger(trigger) + + def check_and_trigger_conditions(self, conditions): + for file_name in glob.glob(self.logs_path_prefix + "*"): + # TODO(poojam23): find a way to distinguish between log files + # - generated in the current experiment but are labeled 'old' + # because they LOGs exceeded the file size limit AND + # - generated in some previous experiment that are also labeled + # 'old' and were not deleted for some reason + if re.search("old", file_name, re.IGNORECASE): + continue + with open(file_name, "r") as db_logs: + new_log = None + for line in db_logs: + if Log.is_new_log(line): + if new_log: + self.trigger_conditions_for_log(conditions, new_log) + new_log = Log(line, self.column_families) + else: + # To account for logs split into multiple lines + new_log.append_message(line) + # Check for the last log in the file. + if new_log: + self.trigger_conditions_for_log(conditions, new_log) |