summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/db/error_handler.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/rocksdb/db/error_handler.h')
-rw-r--r--src/rocksdb/db/error_handler.h124
1 files changed, 124 insertions, 0 deletions
diff --git a/src/rocksdb/db/error_handler.h b/src/rocksdb/db/error_handler.h
new file mode 100644
index 000000000..34e08a525
--- /dev/null
+++ b/src/rocksdb/db/error_handler.h
@@ -0,0 +1,124 @@
+// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+#pragma once
+
+#include "monitoring/instrumented_mutex.h"
+#include "options/db_options.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/listener.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class DBImpl;
+
+// This structure is used to store the DB recovery context. The context is
+// the information that related to the recover actions. For example, it contains
+// FlushReason, which tells the flush job why this flush is called.
+struct DBRecoverContext {
+ FlushReason flush_reason;
+
+ DBRecoverContext() : flush_reason(FlushReason::kErrorRecovery) {}
+
+ DBRecoverContext(FlushReason reason) : flush_reason(reason) {}
+};
+
+class ErrorHandler {
+ public:
+ ErrorHandler(DBImpl* db, const ImmutableDBOptions& db_options,
+ InstrumentedMutex* db_mutex)
+ : db_(db),
+ db_options_(db_options),
+ cv_(db_mutex),
+ end_recovery_(false),
+ recovery_thread_(nullptr),
+ db_mutex_(db_mutex),
+ auto_recovery_(false),
+ recovery_in_prog_(false),
+ soft_error_no_bg_work_(false),
+ is_db_stopped_(false),
+ bg_error_stats_(db_options.statistics) {
+ // Clear the checked flag for uninitialized errors
+ bg_error_.PermitUncheckedError();
+ recovery_error_.PermitUncheckedError();
+ recovery_io_error_.PermitUncheckedError();
+ }
+
+ void EnableAutoRecovery() { auto_recovery_ = true; }
+
+ Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
+ Status::Code code, Status::SubCode subcode);
+
+ const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason);
+
+ Status GetBGError() const { return bg_error_; }
+
+ Status GetRecoveryError() const { return recovery_error_; }
+
+ Status ClearBGError();
+
+ bool IsDBStopped() { return is_db_stopped_.load(std::memory_order_acquire); }
+
+ bool IsBGWorkStopped() {
+ assert(db_mutex_);
+ db_mutex_->AssertHeld();
+ return !bg_error_.ok() &&
+ (bg_error_.severity() >= Status::Severity::kHardError ||
+ !auto_recovery_ || soft_error_no_bg_work_);
+ }
+
+ bool IsSoftErrorNoBGWork() { return soft_error_no_bg_work_; }
+
+ bool IsRecoveryInProgress() { return recovery_in_prog_; }
+
+ Status RecoverFromBGError(bool is_manual = false);
+ void CancelErrorRecovery();
+
+ void EndAutoRecovery();
+
+ private:
+ DBImpl* db_;
+ const ImmutableDBOptions& db_options_;
+ Status bg_error_;
+ // A separate Status variable used to record any errors during the
+ // recovery process from hard errors
+ Status recovery_error_;
+ // A separate IO Status variable used to record any IO errors during
+ // the recovery process. At the same time, recovery_error_ is also set.
+ IOStatus recovery_io_error_;
+ // The condition variable used with db_mutex during auto resume for time
+ // wait.
+ InstrumentedCondVar cv_;
+ bool end_recovery_;
+ std::unique_ptr<port::Thread> recovery_thread_;
+
+ InstrumentedMutex* db_mutex_;
+ // A flag indicating whether automatic recovery from errors is enabled
+ bool auto_recovery_;
+ bool recovery_in_prog_;
+ // A flag to indicate that for the soft error, we should not allow any
+ // background work except the work is from recovery.
+ bool soft_error_no_bg_work_;
+
+ // Used to store the context for recover, such as flush reason.
+ DBRecoverContext recover_context_;
+ std::atomic<bool> is_db_stopped_;
+
+ // The pointer of DB statistics.
+ std::shared_ptr<Statistics> bg_error_stats_;
+
+ const Status& HandleKnownErrors(const Status& bg_err,
+ BackgroundErrorReason reason);
+ Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
+ void RecoverFromNoSpace();
+ const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
+ void RecoverFromRetryableBGIOError();
+ // First, if it is in recovery and the recovery_error is ok. Set the
+ // recovery_error_ to bg_err. Second, if the severity is higher than the
+ // current bg_error_, overwrite it.
+ void CheckAndSetRecoveryAndBGError(const Status& bg_err);
+};
+
+} // namespace ROCKSDB_NAMESPACE