diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/pybind/mgr/diskprediction_local/predictor.py | |
parent | Initial commit. (diff) | |
download | ceph-upstream.tar.xz ceph-upstream.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/pybind/mgr/diskprediction_local/predictor.py')
-rw-r--r-- | src/pybind/mgr/diskprediction_local/predictor.py | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/src/pybind/mgr/diskprediction_local/predictor.py b/src/pybind/mgr/diskprediction_local/predictor.py new file mode 100644 index 00000000..bf9b0d7d --- /dev/null +++ b/src/pybind/mgr/diskprediction_local/predictor.py @@ -0,0 +1,265 @@ +"""Sample code for disk failure prediction. + +This sample code is a community version for anyone who is interested in Machine +Learning and care about disk failure. + +This class provides a disk failure prediction module. Given models dirpath to +initialize a predictor instance and then use 6 days data to predict. Predict +function will return a string to indicate disk failure status: "Good", +"Warning", "Bad", or "Unknown". + +An example code is as follows: + +>>> model = disk_failure_predictor.DiskFailurePredictor() +>>> status = model.initialize("./models") +>>> if status: +>>> model.predict(disk_days) +'Bad' + + +Provided by ProphetStor Data Services Inc. +http://www.prophetstor.com/ + +""" + +from __future__ import print_function +import os +import json +import pickle + + +def get_diskfailurepredictor_path(): + path = os.path.abspath(__file__) + dir_path = os.path.dirname(path) + return dir_path + + +class DiskFailurePredictor(object): + """Disk failure prediction + + This class implements a disk failure prediction module. + """ + + CONFIG_FILE = "config.json" + EXCLUDED_ATTRS = ['smart_9_raw', 'smart_241_raw', 'smart_242_raw'] + + def __init__(self): + """ + This function may throw exception due to wrong file operation. + """ + + self.model_dirpath = "" + self.model_context = {} + + def initialize(self, model_dirpath): + """ + Initialize all models. + + Args: None + + Returns: + Error message. If all goes well, return an empty string. + + Raises: + """ + + config_path = os.path.join(model_dirpath, self.CONFIG_FILE) + if not os.path.isfile(config_path): + return "Missing config file: " + config_path + else: + with open(config_path) as f_conf: + self.model_context = json.load(f_conf) + + for model_name in self.model_context: + model_path = os.path.join(model_dirpath, model_name) + + if not os.path.isfile(model_path): + return "Missing model file: " + model_path + + self.model_dirpath = model_dirpath + + def __preprocess(self, disk_days): + """ + Preprocess disk attributes. + + Args: + disk_days: Refer to function predict(...). + + Returns: + new_disk_days: Processed disk days. + """ + + req_attrs = [] + new_disk_days = [] + + attr_list = set.intersection(*[set(disk_day.keys()) + for disk_day in disk_days]) + for attr in attr_list: + if (attr.startswith('smart_') and attr.endswith('_raw')) and \ + attr not in self.EXCLUDED_ATTRS: + req_attrs.append(attr) + + for disk_day in disk_days: + new_disk_day = {} + for attr in req_attrs: + if float(disk_day[attr]) >= 0.0: + new_disk_day[attr] = disk_day[attr] + + new_disk_days.append(new_disk_day) + + return new_disk_days + + @staticmethod + def __get_diff_attrs(disk_days): + """ + Get 5 days differential attributes. + + Args: + disk_days: Refer to function predict(...). + + Returns: + attr_list: All S.M.A.R.T. attributes used in given disk. Here we + use intersection set of all disk days. + + diff_disk_days: A list struct comprises 5 dictionaries, each + dictionary contains differential attributes. + + Raises: + Exceptions of wrong list/dict operations. + """ + + all_attrs = [set(disk_day.keys()) for disk_day in disk_days] + attr_list = list(set.intersection(*all_attrs)) + attr_list = disk_days[0].keys() + prev_days = disk_days[:-1] + curr_days = disk_days[1:] + diff_disk_days = [] + + for prev, cur in zip(prev_days, curr_days): + diff_disk_days.append({attr:(int(cur[attr]) - int(prev[attr])) + for attr in attr_list}) + + return attr_list, diff_disk_days + + def __get_best_models(self, attr_list): + """ + Find the best model from model list according to given attribute list. + + Args: + attr_list: All S.M.A.R.T. attributes used in given disk. + + Returns: + modelpath: The best model for the given attribute list. + model_attrlist: 'Ordered' attribute list of the returned model. + Must be aware that SMART attributes is in order. + + Raises: + """ + + models = self.model_context.keys() + + scores = [] + for model_name in models: + scores.append(sum(attr in attr_list + for attr in self.model_context[model_name])) + max_score = max(scores) + + # Skip if too few matched attributes. + if max_score < 3: + print("Too few matched attributes") + return None + + best_models = {} + best_model_indices = [idx for idx, score in enumerate(scores) + if score > max_score - 2] + for model_idx in best_model_indices: + model_name = list(models)[model_idx] + model_path = os.path.join(self.model_dirpath, model_name) + model_attrlist = self.model_context[model_name] + best_models[model_path] = model_attrlist + + return best_models + # return os.path.join(self.model_dirpath, model_name), model_attrlist + + @staticmethod + def __get_ordered_attrs(disk_days, model_attrlist): + """ + Return ordered attributes of given disk days. + + Args: + disk_days: Unordered disk days. + model_attrlist: Model's ordered attribute list. + + Returns: + ordered_attrs: Ordered disk days. + + Raises: None + """ + + ordered_attrs = [] + + for one_day in disk_days: + one_day_attrs = [] + + for attr in model_attrlist: + if attr in one_day: + one_day_attrs.append(one_day[attr]) + else: + one_day_attrs.append(0) + + ordered_attrs.append(one_day_attrs) + + return ordered_attrs + + def predict(self, disk_days): + """ + Predict using given 6-days disk S.M.A.R.T. attributes. + + Args: + disk_days: A list struct comprises 6 dictionaries. These + dictionaries store 'consecutive' days of disk SMART + attributes. + Returns: + A string indicates prediction result. One of following four strings + will be returned according to disk failure status: + (1) Good : Disk is health + (2) Warning : Disk has some symptoms but may not fail immediately + (3) Bad : Disk is in danger and data backup is highly recommended + (4) Unknown : Not enough data for prediction. + + Raises: + Pickle exceptions + """ + + all_pred = [] + + proc_disk_days = self.__preprocess(disk_days) + attr_list, diff_data = DiskFailurePredictor.__get_diff_attrs(proc_disk_days) + modellist = self.__get_best_models(attr_list) + if modellist is None: + return "Unknown" + + for modelpath in modellist: + model_attrlist = modellist[modelpath] + ordered_data = DiskFailurePredictor.__get_ordered_attrs( + diff_data, model_attrlist) + + try: + with open(modelpath, 'rb') as f_model: + clf = pickle.load(f_model) + + except UnicodeDecodeError: + # Compatibility for python3 + with open(modelpath, 'rb') as f_model: + clf = pickle.load(f_model, encoding='latin1') + + pred = clf.predict(ordered_data) + + all_pred.append(1 if any(pred) else 0) + + score = 2 ** sum(all_pred) - len(modellist) + if score > 10: + return "Bad" + if score > 4: + return "Warning" + return "Good" |