summaryrefslogtreecommitdiffstats
path: root/smartd.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 17:14:45 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 17:14:45 +0000
commit43e8530e93493bb978c446a2023134bdd4277e50 (patch)
treee8c0d3c0c394b17381f48fb2d288f166b4f22440 /smartd.cpp
parentInitial commit. (diff)
downloadsmartmontools-43e8530e93493bb978c446a2023134bdd4277e50.tar.xz
smartmontools-43e8530e93493bb978c446a2023134bdd4277e50.zip
Adding upstream version 7.4.upstream/7.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'smartd.cpp')
-rw-r--r--smartd.cpp5991
1 files changed, 5991 insertions, 0 deletions
diff --git a/smartd.cpp b/smartd.cpp
new file mode 100644
index 0000000..5322fc9
--- /dev/null
+++ b/smartd.cpp
@@ -0,0 +1,5991 @@
+/*
+ * Home page of code is: https://www.smartmontools.org
+ *
+ * Copyright (C) 2002-11 Bruce Allen
+ * Copyright (C) 2008-23 Christian Franke
+ * Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
+ * Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "config.h"
+#define __STDC_FORMAT_MACROS 1 // enable PRI* for C++
+
+// unconditionally included files
+#include <inttypes.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h> // umask
+#include <signal.h>
+#include <fcntl.h>
+#include <string.h>
+#include <syslog.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <time.h>
+#include <limits.h>
+#include <getopt.h>
+
+#include <algorithm> // std::replace()
+#include <map>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+// conditionally included files
+#ifndef _WIN32
+#include <sys/wait.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef _WIN32
+#include "os_win32/popen.h" // popen_as_rstr_user(), pclose()
+#ifdef _MSC_VER
+#pragma warning(disable:4761) // "conversion supplied"
+typedef unsigned short mode_t;
+typedef int pid_t;
+#endif
+#include <io.h> // umask()
+#include <process.h> // getpid()
+#endif // _WIN32
+
+#ifdef __CYGWIN__
+#include <io.h> // setmode()
+#endif // __CYGWIN__
+
+#ifdef HAVE_LIBCAP_NG
+#include <cap-ng.h>
+#endif // LIBCAP_NG
+
+#ifdef HAVE_LIBSYSTEMD
+#include <systemd/sd-daemon.h>
+#endif // HAVE_LIBSYSTEMD
+
+// locally included files
+#include "atacmds.h"
+#include "dev_interface.h"
+#include "knowndrives.h"
+#include "scsicmds.h"
+#include "nvmecmds.h"
+#include "utility.h"
+
+#ifdef HAVE_POSIX_API
+#include "popen_as_ugid.h"
+#endif
+
+#ifdef _WIN32
+// fork()/signal()/initd simulation for native Windows
+#include "os_win32/daemon_win32.h" // daemon_main/detach/signal()
+#define strsignal daemon_strsignal
+#define sleep daemon_sleep
+// SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
+#define SIGQUIT SIGBREAK
+#define SIGQUIT_KEYNAME "CONTROL-Break"
+#else // _WIN32
+#define SIGQUIT_KEYNAME "CONTROL-\\"
+#endif // _WIN32
+
+const char * smartd_cpp_cvsid = "$Id: smartd.cpp 5519 2023-07-24 15:57:54Z chrfranke $"
+ CONFIG_H_CVSID;
+
+extern "C" {
+ typedef void (*signal_handler_type)(int);
+}
+
+static void set_signal_if_not_ignored(int sig, signal_handler_type handler)
+{
+#if defined(_WIN32)
+ // signal() emulation
+ daemon_signal(sig, handler);
+
+#elif defined(HAVE_SIGACTION)
+ // SVr4, POSIX.1-2001, POSIX.1-2008
+ struct sigaction sa;
+ sa.sa_handler = SIG_DFL;
+ sigaction(sig, (struct sigaction *)0, &sa);
+ if (sa.sa_handler == SIG_IGN)
+ return;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ sa.sa_flags = SA_RESTART; // BSD signal() semantics
+ sigaction(sig, &sa, (struct sigaction *)0);
+
+#elif defined(HAVE_SIGSET)
+ // SVr4, POSIX.1-2001, obsoleted in POSIX.1-2008
+ if (sigset(sig, handler) == SIG_IGN)
+ sigset(sig, SIG_IGN);
+
+#else
+ // POSIX.1-2001, POSIX.1-2008, C89, C99, undefined semantics.
+ // Important: BSD semantics is required. Traditional signal()
+ // resets the handler to SIG_DFL after the first signal is caught.
+ if (signal(sig, handler) == SIG_IGN)
+ signal(sig, SIG_IGN);
+#endif
+}
+
+using namespace smartmontools;
+
+static const int scsiLogRespLen = 252;
+
+// smartd exit codes
+#define EXIT_BADCMD 1 // command line did not parse
+#define EXIT_BADCONF 2 // syntax error in config file
+#define EXIT_STARTUP 3 // problem forking daemon
+#define EXIT_PID 4 // problem creating pid file
+#define EXIT_NOCONF 5 // config file does not exist
+#define EXIT_READCONF 6 // config file exists but cannot be read
+
+#define EXIT_NOMEM 8 // out of memory
+#define EXIT_BADCODE 10 // internal error - should NEVER happen
+
+#define EXIT_BADDEV 16 // we can't monitor this device
+#define EXIT_NODEV 17 // no devices to monitor
+
+#define EXIT_SIGNAL 254 // abort on signal
+
+
+// command-line: 1=debug mode, 2=print presets
+static unsigned char debugmode = 0;
+
+// command-line: how long to sleep between checks
+static constexpr int default_checktime = 1800;
+static int checktime = default_checktime;
+static int checktime_min = 0; // Minimum individual check time, 0 if none
+
+// command-line: name of PID file (empty for no pid file)
+static std::string pid_file;
+
+// command-line: path prefix of persistent state file, empty if no persistence.
+static std::string state_path_prefix
+#ifdef SMARTMONTOOLS_SAVESTATES
+ = SMARTMONTOOLS_SAVESTATES
+#endif
+ ;
+
+// command-line: path prefix of attribute log file, empty if no logs.
+static std::string attrlog_path_prefix
+#ifdef SMARTMONTOOLS_ATTRIBUTELOG
+ = SMARTMONTOOLS_ATTRIBUTELOG
+#endif
+ ;
+
+// configuration file name
+static const char * configfile;
+// configuration file "name" if read from stdin
+static const char * const configfile_stdin = "<stdin>";
+// path of alternate configuration file
+static std::string configfile_alt;
+
+// warning script file
+static std::string warning_script;
+
+#ifdef HAVE_POSIX_API
+// run warning script as non-privileged user
+static bool warn_as_user;
+static uid_t warn_uid;
+static gid_t warn_gid;
+static std::string warn_uname, warn_gname;
+#elif defined(_WIN32)
+// run warning script as restricted user
+static bool warn_as_restr_user;
+#endif
+
+// command-line: when should we exit?
+enum quit_t {
+ QUIT_NODEV, QUIT_NODEVSTARTUP, QUIT_NEVER, QUIT_ONECHECK,
+ QUIT_SHOWTESTS, QUIT_ERRORS
+};
+static quit_t quit = QUIT_NODEV;
+static bool quit_nodev0 = false;
+
+// command-line; this is the default syslog(3) log facility to use.
+static int facility=LOG_DAEMON;
+
+#ifndef _WIN32
+// command-line: fork into background?
+static bool do_fork=true;
+#endif
+
+// TODO: This smartctl only variable is also used in some os_*.cpp
+unsigned char failuretest_permissive = 0;
+
+// set to one if we catch a USR1 (check devices now)
+static volatile int caughtsigUSR1=0;
+
+#ifdef _WIN32
+// set to one if we catch a USR2 (toggle debug mode)
+static volatile int caughtsigUSR2=0;
+#endif
+
+// set to one if we catch a HUP (reload config file). In debug mode,
+// set to two, if we catch INT (also reload config file).
+static volatile int caughtsigHUP=0;
+
+// set to signal value if we catch INT, QUIT, or TERM
+static volatile int caughtsigEXIT=0;
+
+// This function prints either to stdout or to the syslog as needed.
+static void PrintOut(int priority, const char *fmt, ...)
+ __attribute_format_printf(2, 3);
+
+#ifdef HAVE_LIBSYSTEMD
+// systemd notify support
+
+static bool notify_enabled = false;
+static bool notify_ready = false;
+
+static inline void notify_init()
+{
+ if (!getenv("NOTIFY_SOCKET"))
+ return;
+ notify_enabled = true;
+}
+
+static inline bool notify_post_init()
+{
+ if (!notify_enabled)
+ return true;
+ if (do_fork) {
+ PrintOut(LOG_CRIT, "Option -n (--no-fork) is required if 'Type=notify' is set.\n");
+ return false;
+ }
+ return true;
+}
+
+static inline void notify_extend_timeout()
+{
+ if (!notify_enabled)
+ return;
+ if (notify_ready)
+ return;
+ const char * notify = "EXTEND_TIMEOUT_USEC=20000000"; // typical drive spinup time is 20s tops
+ if (debugmode) {
+ pout("sd_notify(0, \"%s\")\n", notify);
+ return;
+ }
+ sd_notify(0, notify);
+}
+
+static void notify_msg(const char * msg, bool ready = false)
+{
+ if (!notify_enabled)
+ return;
+ if (debugmode) {
+ pout("sd_notify(0, \"%sSTATUS=%s\")\n", (ready ? "READY=1\\n" : ""), msg);
+ return;
+ }
+ sd_notifyf(0, "%sSTATUS=%s", (ready ? "READY=1\n" : ""), msg);
+}
+
+static void notify_check(int numdev)
+{
+ if (!notify_enabled)
+ return;
+ char msg[32];
+ snprintf(msg, sizeof(msg), "Checking %d device%s ...",
+ numdev, (numdev != 1 ? "s" : ""));
+ notify_msg(msg);
+}
+
+static void notify_wait(time_t wakeuptime, int numdev)
+{
+ if (!notify_enabled)
+ return;
+ char ts[16] = ""; struct tm tmbuf;
+ strftime(ts, sizeof(ts), "%H:%M:%S", time_to_tm_local(&tmbuf, wakeuptime));
+ char msg[64];
+ snprintf(msg, sizeof(msg), "Next check of %d device%s will start at %s",
+ numdev, (numdev != 1 ? "s" : ""), ts);
+ notify_msg(msg, !notify_ready); // first call notifies READY=1
+ notify_ready = true;
+}
+
+static void notify_exit(int status)
+{
+ if (!notify_enabled)
+ return;
+ const char * msg;
+ switch (status) {
+ case 0: msg = "Exiting ..."; break;
+ case EXIT_BADCMD: msg = "Error in command line (see SYSLOG)"; break;
+ case EXIT_BADCONF: case EXIT_NOCONF:
+ case EXIT_READCONF: msg = "Error in config file (see SYSLOG)"; break;
+ case EXIT_BADDEV: msg = "Unable to register a device (see SYSLOG)"; break;
+ case EXIT_NODEV: msg = "No devices to monitor"; break;
+ default: msg = "Error (see SYSLOG)"; break;
+ }
+ // Ensure that READY=1 is notified before 'exit(0)' because otherwise
+ // systemd will report a service (protocol) failure
+ notify_msg(msg, (!status && !notify_ready));
+}
+
+#else // HAVE_LIBSYSTEMD
+// No systemd notify support
+
+static inline bool notify_post_init()
+{
+#ifdef __linux__
+ if (getenv("NOTIFY_SOCKET")) {
+ PrintOut(LOG_CRIT, "This version of smartd was build without 'Type=notify' support.\n");
+ return false;
+ }
+#endif
+ return true;
+}
+
+static inline void notify_init() { }
+static inline void notify_extend_timeout() { }
+static inline void notify_msg(const char *) { }
+static inline void notify_check(int) { }
+static inline void notify_wait(time_t, int) { }
+static inline void notify_exit(int) { }
+
+#endif // HAVE_LIBSYSTEMD
+
+// Email frequencies
+enum class emailfreqs : unsigned char {
+ unknown, once, always, daily, diminishing
+};
+
+// Attribute monitoring flags.
+// See monitor_attr_flags below.
+enum {
+ MONITOR_IGN_FAILUSE = 0x01,
+ MONITOR_IGNORE = 0x02,
+ MONITOR_RAW_PRINT = 0x04,
+ MONITOR_RAW = 0x08,
+ MONITOR_AS_CRIT = 0x10,
+ MONITOR_RAW_AS_CRIT = 0x20,
+};
+
+// Array of flags for each attribute.
+class attribute_flags
+{
+public:
+ bool is_set(int id, unsigned char flag) const
+ { return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
+
+ void set(int id, unsigned char flags)
+ {
+ if (0 < id && id < (int)sizeof(m_flags))
+ m_flags[id] |= flags;
+ }
+
+private:
+ unsigned char m_flags[256]{};
+};
+
+
+/// Configuration data for a device. Read from smartd.conf.
+/// Supports copy & assignment and is compatible with STL containers.
+struct dev_config
+{
+ int lineno{}; // Line number of entry in file
+ std::string name; // Device name (with optional extra info)
+ std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
+ std::string dev_type; // Device type argument from -d directive, empty if none
+ std::string dev_idinfo; // Device identify info for warning emails
+ std::string state_file; // Path of the persistent state file, empty if none
+ std::string attrlog_file; // Path of the persistent attrlog file, empty if none
+ int checktime{}; // Individual check interval, 0 if none
+ bool ignore{}; // Ignore this entry
+ bool id_is_unique{}; // True if dev_idinfo is unique (includes S/N or WWN)
+ bool smartcheck{}; // Check SMART status
+ bool usagefailed{}; // Check for failed Usage Attributes
+ bool prefail{}; // Track changes in Prefail Attributes
+ bool usage{}; // Track changes in Usage Attributes
+ bool selftest{}; // Monitor number of selftest errors
+ bool errorlog{}; // Monitor number of ATA errors
+ bool xerrorlog{}; // Monitor number of ATA errors (Extended Comprehensive error log)
+ bool offlinests{}; // Monitor changes in offline data collection status
+ bool offlinests_ns{}; // Disable auto standby if in progress
+ bool selfteststs{}; // Monitor changes in self-test execution status
+ bool selfteststs_ns{}; // Disable auto standby if in progress
+ bool permissive{}; // Ignore failed SMART commands
+ char autosave{}; // 1=disable, 2=enable Autosave Attributes
+ char autoofflinetest{}; // 1=disable, 2=enable Auto Offline Test
+ firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
+ bool ignorepresets{}; // Ignore database of -v options
+ bool showpresets{}; // Show database entry for this device
+ bool removable{}; // Device may disappear (not be present)
+ char powermode{}; // skip check, if disk in idle or standby mode
+ bool powerquiet{}; // skip powermode 'skipping checks' message
+ int powerskipmax{}; // how many times can be check skipped
+ unsigned char tempdiff{}; // Track Temperature changes >= this limit
+ unsigned char tempinfo{}, tempcrit{}; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
+ regular_expression test_regex; // Regex for scheduled testing
+ unsigned test_offset_factor{}; // Factor for staggering of scheduled tests
+
+ // Configuration of email warning messages
+ std::string emailcmdline; // script to execute, empty if no messages
+ std::string emailaddress; // email address, or empty
+ emailfreqs emailfreq{}; // Send emails once, daily, diminishing
+ bool emailtest{}; // Send test email?
+
+ // ATA ONLY
+ int dev_rpm{}; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
+ int set_aam{}; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
+ int set_apm{}; // disable(-1), enable(2..255->1..254) Advanced Power Management
+ int set_lookahead{}; // disable(-1), enable(1) read look-ahead
+ int set_standby{}; // set(1..255->0..254) standby timer
+ bool set_security_freeze{}; // Freeze ATA security
+ int set_wcache{}; // disable(-1), enable(1) write cache
+ int set_dsn{}; // disable(0x2), enable(0x1) DSN
+
+ bool sct_erc_set{}; // set SCT ERC to:
+ unsigned short sct_erc_readtime{}; // ERC read time (deciseconds)
+ unsigned short sct_erc_writetime{}; // ERC write time (deciseconds)
+
+ unsigned char curr_pending_id{}; // ID of current pending sector count, 0 if none
+ unsigned char offl_pending_id{}; // ID of offline uncorrectable sector count, 0 if none
+ bool curr_pending_incr{}, offl_pending_incr{}; // True if current/offline pending values increase
+ bool curr_pending_set{}, offl_pending_set{}; // True if '-C', '-U' set in smartd.conf
+
+ attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
+
+ ata_vendor_attr_defs attribute_defs; // -v options
+
+ // NVMe only
+ unsigned nvme_err_log_max_entries{}; // size of error log
+};
+
+// Number of allowed mail message types
+static const int SMARTD_NMAIL = 13;
+// Type for '-M test' mails (state not persistent)
+static const int MAILTYPE_TEST = 0;
+// TODO: Add const or enum for all mail types.
+
+struct mailinfo {
+ int logged{}; // number of times an email has been sent
+ time_t firstsent{}; // time first email was sent, as defined by time(2)
+ time_t lastsent{}; // time last email was sent, as defined by time(2)
+};
+
+/// Persistent state data for a device.
+struct persistent_dev_state
+{
+ unsigned char tempmin{}, tempmax{}; // Min/Max Temperatures
+
+ unsigned char selflogcount{}; // total number of self-test errors
+ unsigned short selfloghour{}; // lifetime hours of last self-test error
+
+ time_t scheduled_test_next_check{}; // Time of next check for scheduled self-tests
+
+ uint64_t selective_test_last_start{}; // Start LBA of last scheduled selective self-test
+ uint64_t selective_test_last_end{}; // End LBA of last scheduled selective self-test
+
+ mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
+
+ // ATA ONLY
+ int ataerrorcount{}; // Total number of ATA errors
+
+ // Persistent part of ata_smart_values:
+ struct ata_attribute {
+ unsigned char id{};
+ unsigned char val{};
+ unsigned char worst{}; // Byte needed for 'raw64' attribute only.
+ uint64_t raw{};
+ unsigned char resvd{};
+ };
+ ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
+
+ // SCSI ONLY
+
+ struct scsi_error_counter_t {
+ struct scsiErrorCounter errCounter{};
+ unsigned char found{};
+ };
+ scsi_error_counter_t scsi_error_counters[3];
+
+ struct scsi_nonmedium_error_t {
+ struct scsiNonMediumError nme{};
+ unsigned char found{};
+ };
+ scsi_nonmedium_error_t scsi_nonmedium_error;
+
+ // NVMe only
+ uint64_t nvme_err_log_entries{};
+};
+
+/// Non-persistent state data for a device.
+struct temp_dev_state
+{
+ bool must_write{}; // true if persistent part should be written
+
+ bool skip{}; // skip during next check cycle
+ time_t wakeuptime{}; // next wakeup time, 0 if unknown or global
+
+ bool not_cap_offline{}; // true == not capable of offline testing
+ bool not_cap_conveyance{};
+ bool not_cap_short{};
+ bool not_cap_long{};
+ bool not_cap_selective{};
+
+ unsigned char temperature{}; // last recorded Temperature (in Celsius)
+ time_t tempmin_delay{}; // time where Min Temperature tracking will start
+
+ bool removed{}; // true if open() failed for removable device
+
+ bool powermodefail{}; // true if power mode check failed
+ int powerskipcnt{}; // Number of checks skipped due to idle or standby mode
+ int lastpowermodeskipped{}; // the last power mode that was skipped
+
+ bool attrlog_dirty{}; // true if persistent part has new attr values that
+ // need to be written to attrlog
+
+ // SCSI ONLY
+ // TODO: change to bool
+ unsigned char SmartPageSupported{}; // has log sense IE page (0x2f)
+ unsigned char TempPageSupported{}; // has log sense temperature page (0xd)
+ unsigned char ReadECounterPageSupported{};
+ unsigned char WriteECounterPageSupported{};
+ unsigned char VerifyECounterPageSupported{};
+ unsigned char NonMediumErrorPageSupported{};
+ unsigned char SuppressReport{}; // minimize nuisance reports
+ unsigned char modese_len{}; // mode sense/select cmd len: 0 (don't
+ // know yet) 6 or 10
+ // ATA ONLY
+ uint64_t num_sectors{}; // Number of sectors
+ ata_smart_values smartval{}; // SMART data
+ ata_smart_thresholds_pvt smartthres{}; // SMART thresholds
+ bool offline_started{}; // true if offline data collection was started
+ bool selftest_started{}; // true if self-test was started
+};
+
+/// Runtime state data for a device.
+struct dev_state
+: public persistent_dev_state,
+ public temp_dev_state
+{
+ void update_persistent_state();
+ void update_temp_state();
+};
+
+/// Container for configuration info for each device.
+typedef std::vector<dev_config> dev_config_vector;
+
+/// Container for state info for each device.
+typedef std::vector<dev_state> dev_state_vector;
+
+// Copy ATA attributes to persistent state.
+void dev_state::update_persistent_state()
+{
+ for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
+ const ata_smart_attribute & ta = smartval.vendor_attributes[i];
+ ata_attribute & pa = ata_attributes[i];
+ pa.id = ta.id;
+ if (ta.id == 0) {
+ pa.val = pa.worst = 0; pa.raw = 0;
+ continue;
+ }
+ pa.val = ta.current;
+ pa.worst = ta.worst;
+ pa.raw = ta.raw[0]
+ | ( ta.raw[1] << 8)
+ | ( ta.raw[2] << 16)
+ | ((uint64_t)ta.raw[3] << 24)
+ | ((uint64_t)ta.raw[4] << 32)
+ | ((uint64_t)ta.raw[5] << 40);
+ pa.resvd = ta.reserv;
+ }
+}
+
+// Copy ATA from persistent to temp state.
+void dev_state::update_temp_state()
+{
+ for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
+ const ata_attribute & pa = ata_attributes[i];
+ ata_smart_attribute & ta = smartval.vendor_attributes[i];
+ ta.id = pa.id;
+ if (pa.id == 0) {
+ ta.current = ta.worst = 0;
+ memset(ta.raw, 0, sizeof(ta.raw));
+ continue;
+ }
+ ta.current = pa.val;
+ ta.worst = pa.worst;
+ ta.raw[0] = (unsigned char) pa.raw;
+ ta.raw[1] = (unsigned char)(pa.raw >> 8);
+ ta.raw[2] = (unsigned char)(pa.raw >> 16);
+ ta.raw[3] = (unsigned char)(pa.raw >> 24);
+ ta.raw[4] = (unsigned char)(pa.raw >> 32);
+ ta.raw[5] = (unsigned char)(pa.raw >> 40);
+ ta.reserv = pa.resvd;
+ }
+}
+
+// Parse a line from a state file.
+static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
+{
+ static const regular_expression regex(
+ "^ *"
+ "((temperature-min)" // (1 (2)
+ "|(temperature-max)" // (3)
+ "|(self-test-errors)" // (4)
+ "|(self-test-last-err-hour)" // (5)
+ "|(scheduled-test-next-check)" // (6)
+ "|(selective-test-last-start)" // (7)
+ "|(selective-test-last-end)" // (8)
+ "|(ata-error-count)" // (9)
+ "|(mail\\.([0-9]+)\\." // (10 (11)
+ "((count)" // (12 (13)
+ "|(first-sent-time)" // (14)
+ "|(last-sent-time)" // (15)
+ ")" // 12)
+ ")" // 10)
+ "|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
+ "((id)" // (18 (19)
+ "|(val)" // (20)
+ "|(worst)" // (21)
+ "|(raw)" // (22)
+ "|(resvd)" // (23)
+ ")" // 18)
+ ")" // 16)
+ "|(nvme-err-log-entries)" // (24)
+ ")" // 1)
+ " *= *([0-9]+)[ \n]*$" // (25)
+ );
+
+ const int nmatch = 1+25;
+ regular_expression::match_range match[nmatch];
+ if (!regex.execute(line, nmatch, match))
+ return false;
+ if (match[nmatch-1].rm_so < 0)
+ return false;
+
+ uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
+
+ int m = 1;
+ if (match[++m].rm_so >= 0)
+ state.tempmin = (unsigned char)val;
+ else if (match[++m].rm_so >= 0)
+ state.tempmax = (unsigned char)val;
+ else if (match[++m].rm_so >= 0)
+ state.selflogcount = (unsigned char)val;
+ else if (match[++m].rm_so >= 0)
+ state.selfloghour = (unsigned short)val;
+ else if (match[++m].rm_so >= 0)
+ state.scheduled_test_next_check = (time_t)val;
+ else if (match[++m].rm_so >= 0)
+ state.selective_test_last_start = val;
+ else if (match[++m].rm_so >= 0)
+ state.selective_test_last_end = val;
+ else if (match[++m].rm_so >= 0)
+ state.ataerrorcount = (int)val;
+ else if (match[m+=2].rm_so >= 0) {
+ int i = atoi(line+match[m].rm_so);
+ if (!(0 <= i && i < SMARTD_NMAIL))
+ return false;
+ if (i == MAILTYPE_TEST) // Don't suppress test mails
+ return true;
+ if (match[m+=2].rm_so >= 0)
+ state.maillog[i].logged = (int)val;
+ else if (match[++m].rm_so >= 0)
+ state.maillog[i].firstsent = (time_t)val;
+ else if (match[++m].rm_so >= 0)
+ state.maillog[i].lastsent = (time_t)val;
+ else
+ return false;
+ }
+ else if (match[m+=5+1].rm_so >= 0) {
+ int i = atoi(line+match[m].rm_so);
+ if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
+ return false;
+ if (match[m+=2].rm_so >= 0)
+ state.ata_attributes[i].id = (unsigned char)val;
+ else if (match[++m].rm_so >= 0)
+ state.ata_attributes[i].val = (unsigned char)val;
+ else if (match[++m].rm_so >= 0)
+ state.ata_attributes[i].worst = (unsigned char)val;
+ else if (match[++m].rm_so >= 0)
+ state.ata_attributes[i].raw = val;
+ else if (match[++m].rm_so >= 0)
+ state.ata_attributes[i].resvd = (unsigned char)val;
+ else
+ return false;
+ }
+ else if (match[m+7].rm_so >= 0)
+ state.nvme_err_log_entries = val;
+ else
+ return false;
+ return true;
+}
+
+// Read a state file.
+static bool read_dev_state(const char * path, persistent_dev_state & state)
+{
+ stdio_file f(path, "r");
+ if (!f) {
+ if (errno != ENOENT)
+ pout("Cannot read state file \"%s\"\n", path);
+ return false;
+ }
+#ifdef __CYGWIN__
+ setmode(fileno(f), O_TEXT); // Allow files with \r\n
+#endif
+
+ persistent_dev_state new_state;
+ int good = 0, bad = 0;
+ char line[256];
+ while (fgets(line, sizeof(line), f)) {
+ const char * s = line + strspn(line, " \t");
+ if (!*s || *s == '#')
+ continue;
+ if (!parse_dev_state_line(line, new_state))
+ bad++;
+ else
+ good++;
+ }
+
+ if (bad) {
+ if (!good) {
+ pout("%s: format error\n", path);
+ return false;
+ }
+ pout("%s: %d invalid line(s) ignored\n", path, bad);
+ }
+
+ // This sets the values missing in the file to 0.
+ state = new_state;
+ return true;
+}
+
+static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
+{
+ if (val)
+ fprintf(f, "%s = %" PRIu64 "\n", name, val);
+}
+
+static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
+{
+ if (val)
+ fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
+}
+
+// Write a state file
+static bool write_dev_state(const char * path, const persistent_dev_state & state)
+{
+ // Rename old "file" to "file~"
+ std::string pathbak = path; pathbak += '~';
+ unlink(pathbak.c_str());
+ rename(path, pathbak.c_str());
+
+ stdio_file f(path, "w");
+ if (!f) {
+ pout("Cannot create state file \"%s\"\n", path);
+ return false;
+ }
+
+ fprintf(f, "# smartd state file\n");
+ write_dev_state_line(f, "temperature-min", state.tempmin);
+ write_dev_state_line(f, "temperature-max", state.tempmax);
+ write_dev_state_line(f, "self-test-errors", state.selflogcount);
+ write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
+ write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
+ write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
+ write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
+
+ for (int i = 0; i < SMARTD_NMAIL; i++) {
+ if (i == MAILTYPE_TEST) // Don't suppress test mails
+ continue;
+ const mailinfo & mi = state.maillog[i];
+ if (!mi.logged)
+ continue;
+ write_dev_state_line(f, "mail", i, "count", mi.logged);
+ write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
+ write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
+ }
+
+ // ATA ONLY
+ write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
+
+ for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
+ const auto & pa = state.ata_attributes[i];
+ if (!pa.id)
+ continue;
+ write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
+ write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
+ write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
+ write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
+ write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
+ }
+
+ // NVMe only
+ write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
+
+ return true;
+}
+
+// Write to the attrlog file
+static bool write_dev_attrlog(const char * path, const dev_state & state)
+{
+ stdio_file f(path, "a");
+ if (!f) {
+ pout("Cannot create attribute log file \"%s\"\n", path);
+ return false;
+ }
+
+
+ time_t now = time(nullptr);
+ struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, now);
+ fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
+ 1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
+ tms->tm_hour, tms->tm_min, tms->tm_sec);
+ // ATA ONLY
+ for (const auto & pa : state.ata_attributes) {
+ if (!pa.id)
+ continue;
+ fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
+ }
+ // SCSI ONLY
+ const struct scsiErrorCounter * ecp;
+ const char * pageNames[3] = {"read", "write", "verify"};
+ for (int k = 0; k < 3; ++k) {
+ if ( !state.scsi_error_counters[k].found ) continue;
+ ecp = &state.scsi_error_counters[k].errCounter;
+ fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
+ "\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
+ "\t%s-corr-by-retry;%" PRIu64 ";"
+ "\t%s-total-err-corrected;%" PRIu64 ";"
+ "\t%s-corr-algorithm-invocations;%" PRIu64 ";"
+ "\t%s-gb-processed;%.3f;"
+ "\t%s-total-unc-errors;%" PRIu64 ";",
+ pageNames[k], ecp->counter[0],
+ pageNames[k], ecp->counter[1],
+ pageNames[k], ecp->counter[2],
+ pageNames[k], ecp->counter[3],
+ pageNames[k], ecp->counter[4],
+ pageNames[k], (ecp->counter[5] / 1000000000.0),
+ pageNames[k], ecp->counter[6]);
+ }
+ if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
+ fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
+ }
+ // write SCSI current temperature if it is monitored
+ if (state.temperature)
+ fprintf(f, "\ttemperature;%d;", state.temperature);
+ // end of line
+ fprintf(f, "\n");
+ return true;
+}
+
+// Write all state files. If write_always is false, don't write
+// unless must_write is set.
+static void write_all_dev_states(const dev_config_vector & configs,
+ dev_state_vector & states,
+ bool write_always = true)
+{
+ for (unsigned i = 0; i < states.size(); i++) {
+ const dev_config & cfg = configs.at(i);
+ if (cfg.state_file.empty())
+ continue;
+ dev_state & state = states[i];
+ if (!write_always && !state.must_write)
+ continue;
+ if (!write_dev_state(cfg.state_file.c_str(), state))
+ continue;
+ state.must_write = false;
+ if (write_always || debugmode)
+ PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
+ cfg.name.c_str(), cfg.state_file.c_str());
+ }
+}
+
+// Write to all attrlog files
+static void write_all_dev_attrlogs(const dev_config_vector & configs,
+ dev_state_vector & states)
+{
+ for (unsigned i = 0; i < states.size(); i++) {
+ const dev_config & cfg = configs.at(i);
+ if (cfg.attrlog_file.empty())
+ continue;
+ dev_state & state = states[i];
+ if (state.attrlog_dirty) {
+ write_dev_attrlog(cfg.attrlog_file.c_str(), state);
+ state.attrlog_dirty = false;
+ }
+ }
+}
+
+extern "C" { // signal handlers require C-linkage
+
+// Note if we catch a SIGUSR1
+static void USR1handler(int sig)
+{
+ if (SIGUSR1==sig)
+ caughtsigUSR1=1;
+ return;
+}
+
+#ifdef _WIN32
+// Note if we catch a SIGUSR2
+static void USR2handler(int sig)
+{
+ if (SIGUSR2==sig)
+ caughtsigUSR2=1;
+ return;
+}
+#endif
+
+// Note if we catch a HUP (or INT in debug mode)
+static void HUPhandler(int sig)
+{
+ if (sig==SIGHUP)
+ caughtsigHUP=1;
+ else
+ caughtsigHUP=2;
+ return;
+}
+
+// signal handler for TERM, QUIT, and INT (if not in debug mode)
+static void sighandler(int sig)
+{
+ if (!caughtsigEXIT)
+ caughtsigEXIT=sig;
+ return;
+}
+
+} // extern "C"
+
+#ifdef HAVE_LIBCAP_NG
+// capabilities(7) support
+
+static int capabilities_mode /* = 0 */; // 1=enabled, 2=mail
+
+static void capabilities_drop_now()
+{
+ if (!capabilities_mode)
+ return;
+ capng_clear(CAPNG_SELECT_BOTH);
+ capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
+ CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
+ if (warn_as_user && (warn_uid || warn_gid)) {
+ // For popen_as_ugid()
+ capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
+ CAP_SETGID, CAP_SETUID, -1);
+ }
+ if (capabilities_mode > 1) {
+ // For exim MTA
+ capng_updatev(CAPNG_ADD, CAPNG_BOUNDING_SET,
+ CAP_SETGID, CAP_SETUID, CAP_CHOWN, CAP_FOWNER, CAP_DAC_OVERRIDE, -1);
+ }
+ capng_apply(CAPNG_SELECT_BOTH);
+}
+
+static void capabilities_log_error_hint()
+{
+ if (!capabilities_mode)
+ return;
+ PrintOut(LOG_INFO, "If mail notification does not work with '--capabilities%s\n",
+ (capabilities_mode == 1 ? "', try '--capabilities=mail'"
+ : "=mail', please inform " PACKAGE_BUGREPORT));
+}
+
+#else // HAVE_LIBCAP_NG
+// No capabilities(7) support
+
+static inline void capabilities_drop_now() { }
+static inline void capabilities_log_error_hint() { }
+
+#endif // HAVE_LIBCAP_NG
+
+// a replacement for setenv() which is not available on all platforms.
+// Note that the string passed to putenv must not be freed or made
+// invalid, since a pointer to it is kept by putenv(). This means that
+// it must either be a static buffer or allocated off the heap. The
+// string can be freed if the environment variable is redefined via
+// another call to putenv(). There is no portable way to unset a variable
+// with putenv(). So we manage the buffer in a static object.
+// Using setenv() if available is not considered because some
+// implementations may produce memory leaks.
+
+class env_buffer
+{
+public:
+ env_buffer() = default;
+ env_buffer(const env_buffer &) = delete;
+ void operator=(const env_buffer &) = delete;
+
+ void set(const char * name, const char * value);
+private:
+ char * m_buf = nullptr;
+};
+
+void env_buffer::set(const char * name, const char * value)
+{
+ int size = strlen(name) + 1 + strlen(value) + 1;
+ char * newbuf = new char[size];
+ snprintf(newbuf, size, "%s=%s", name, value);
+
+ if (putenv(newbuf))
+ throw std::runtime_error("putenv() failed");
+
+ // This assumes that the same NAME is passed on each call
+ delete [] m_buf;
+ m_buf = newbuf;
+}
+
+#define EBUFLEN 1024
+
+static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
+ __attribute_format_printf(4, 5);
+
+// If either address or executable path is non-null then send and log
+// a warning email, or execute executable
+static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
+{
+ // See if user wants us to send mail
+ if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
+ return;
+
+ // Which type of mail are we sending?
+ static const char * const whichfail[] = {
+ "EmailTest", // 0
+ "Health", // 1
+ "Usage", // 2
+ "SelfTest", // 3
+ "ErrorCount", // 4
+ "FailedHealthCheck", // 5
+ "FailedReadSmartData", // 6
+ "FailedReadSmartErrorLog", // 7
+ "FailedReadSmartSelfTestLog", // 8
+ "FailedOpenDevice", // 9
+ "CurrentPendingSector", // 10
+ "OfflineUncorrectableSector", // 11
+ "Temperature" // 12
+ };
+ STATIC_ASSERT(sizeof(whichfail) == SMARTD_NMAIL * sizeof(whichfail[0]));
+
+ if (!(0 <= which && which < SMARTD_NMAIL)) {
+ PrintOut(LOG_CRIT, "Internal error in MailWarning(): which=%d\n", which);
+ return;
+ }
+ mailinfo * mail = state.maillog + which;
+
+ // Calc current and next interval for warning reminder emails
+ int days, nextdays;
+ if (which == 0)
+ days = nextdays = -1; // EmailTest
+ else switch (cfg.emailfreq) {
+ case emailfreqs::once:
+ days = nextdays = -1; break;
+ case emailfreqs::always:
+ days = nextdays = 0; break;
+ case emailfreqs::daily:
+ days = nextdays = 1; break;
+ case emailfreqs::diminishing:
+ // 0, 1, 2, 3, 4, 5, 6, 7, ... => 1, 2, 4, 8, 16, 32, 32, 32, ...
+ nextdays = 1 << ((unsigned)mail->logged <= 5 ? mail->logged : 5);
+ // 0, 1, 2, 3, 4, 5, 6, 7, ... => 0, 1, 2, 4, 8, 16, 32, 32, ... (0 not used below)
+ days = ((unsigned)mail->logged <= 5 ? nextdays >> 1 : nextdays);
+ break;
+ default:
+ PrintOut(LOG_CRIT, "Internal error in MailWarning(): cfg.emailfreq=%d\n", (int)cfg.emailfreq);
+ return;
+ }
+
+ time_t now = time(nullptr);
+ if (mail->logged) {
+ // Return if no warning reminder email needs to be sent (now)
+ if (days < 0)
+ return; // '-M once' or EmailTest
+ if (days > 0 && now < mail->lastsent + days * 24 * 3600)
+ return; // '-M daily/diminishing' and too early
+ }
+ else {
+ // Record the time of this first email message
+ mail->firstsent = now;
+ }
+
+ // Record the time of this email message
+ mail->lastsent = now;
+
+ // print warning string into message
+ // Note: Message length may reach ~300 characters as device names may be
+ // very long on certain platforms (macOS ~230 characters).
+ // Message length must not exceed email line length limit, see RFC 5322:
+ // "... MUST be no more than 998 characters, ... excluding the CRLF."
+ char message[512];
+ va_list ap;
+ va_start(ap, fmt);
+ vsnprintf(message, sizeof(message), fmt, ap);
+ va_end(ap);
+
+ // replace commas by spaces to separate recipients
+ std::string address = cfg.emailaddress;
+ std::replace(address.begin(), address.end(), ',', ' ');
+
+ // Export information in environment variables that will be useful
+ // for user scripts
+ const char * executable = cfg.emailcmdline.c_str();
+ static env_buffer env[13];
+ env[0].set("SMARTD_MAILER", executable);
+ env[1].set("SMARTD_MESSAGE", message);
+ char dates[DATEANDEPOCHLEN];
+ snprintf(dates, sizeof(dates), "%d", mail->logged);
+ env[2].set("SMARTD_PREVCNT", dates);
+ dateandtimezoneepoch(dates, mail->firstsent);
+ env[3].set("SMARTD_TFIRST", dates);
+ snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
+ env[4].set("SMARTD_TFIRSTEPOCH", dates);
+ env[5].set("SMARTD_FAILTYPE", whichfail[which]);
+ env[6].set("SMARTD_ADDRESS", address.c_str());
+ env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
+
+ // Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
+ env[8].set("SMARTD_DEVICETYPE",
+ (!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
+ env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
+
+ env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
+ dates[0] = 0;
+ if (nextdays >= 0)
+ snprintf(dates, sizeof(dates), "%d", nextdays);
+ env[11].set("SMARTD_NEXTDAYS", dates);
+ // Avoid false positive recursion detection by smartd_warning.{sh,cmd}
+ env[12].set("SMARTD_SUBJECT", "");
+
+ // now construct a command to send this as EMAIL
+ if (!*executable)
+ executable = "<mail>";
+ const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
+ const char * newwarn = (which? "Warning via" : "Test of");
+
+ char command[256];
+#ifdef _WIN32
+ // Path may contain spaces
+ snprintf(command, sizeof(command), "\"%s\" 2>&1", warning_script.c_str());
+#else
+ snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
+#endif
+
+ // tell SYSLOG what we are about to do...
+ PrintOut(LOG_INFO,"%s %s to %s%s ...\n",
+ (which ? "Sending warning via" : "Executing test of"), executable, newadd,
+ (
+#ifdef HAVE_POSIX_API
+ warn_as_user ?
+ strprintf(" (uid=%u(%s) gid=%u(%s))",
+ (unsigned)warn_uid, warn_uname.c_str(),
+ (unsigned)warn_gid, warn_gname.c_str() ).c_str() :
+#elif defined(_WIN32)
+ warn_as_restr_user ? " (restricted user)" :
+#endif
+ ""
+ )
+ );
+
+ // issue the command to send mail or to run the user's executable
+ errno=0;
+ FILE * pfp;
+
+#ifdef HAVE_POSIX_API
+ if (warn_as_user) {
+ pfp = popen_as_ugid(command, "r", warn_uid, warn_gid);
+ } else
+#endif
+ {
+#ifdef _WIN32
+ pfp = popen_as_restr_user(command, "r", warn_as_restr_user);
+#else
+ pfp = popen(command, "r");
+#endif
+ }
+
+ if (!pfp)
+ // failed to popen() mail process
+ PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
+ newwarn, executable, newadd, errno?strerror(errno):"");
+ else {
+ // pipe succeeded!
+ int len;
+ char buffer[EBUFLEN];
+
+ // if unexpected output on stdout/stderr, null terminate, print, and flush
+ if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
+ int count=0;
+ int newlen = len<EBUFLEN ? len : EBUFLEN-1;
+ buffer[newlen]='\0';
+ PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
+ newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
+
+ // flush pipe if needed
+ while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
+ count++;
+
+ // tell user that pipe was flushed, or that something is really wrong
+ if (count && count<EBUFLEN)
+ PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
+ newwarn, executable, newadd);
+ else if (count)
+ PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
+ newwarn, executable, newadd);
+ }
+
+ // if something went wrong with mail process, print warning
+ errno=0;
+ int status;
+
+#ifdef HAVE_POSIX_API
+ if (warn_as_user) {
+ status = pclose_as_ugid(pfp);
+ } else
+#endif
+ {
+ status = pclose(pfp);
+ }
+
+ if (status == -1)
+ PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
+ errno?strerror(errno):"");
+ else {
+ // mail process apparently succeeded. Check and report exit status
+ if (WIFEXITED(status)) {
+ // exited 'normally' (but perhaps with nonzero status)
+ int status8 = WEXITSTATUS(status);
+ if (status8>128)
+ PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
+ newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
+ else if (status8) {
+ PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
+ newwarn, executable, newadd, status, status8);
+ capabilities_log_error_hint();
+ }
+ else
+ PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
+ }
+
+ if (WIFSIGNALED(status))
+ PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
+ newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
+
+ // this branch is probably not possible. If subprocess is
+ // stopped then pclose() should not return.
+ if (WIFSTOPPED(status))
+ PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
+ newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
+
+ }
+ }
+
+ // increment mail sent counter
+ mail->logged++;
+}
+
+static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
+ __attribute_format_printf(4, 5);
+
+static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
+{
+ if (!(0 <= which && which < SMARTD_NMAIL))
+ return;
+
+ // Return if no mail sent yet
+ mailinfo & mi = state.maillog[which];
+ if (!mi.logged)
+ return;
+
+ // Format & print message
+ char msg[256];
+ va_list ap;
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
+ msg, mi.logged, (mi.logged==1 ? "" : "s"));
+
+ // Clear mail counter and timestamps
+ mi = mailinfo();
+ state.must_write = true;
+}
+
+#ifndef _WIN32
+
+// Output multiple lines via separate syslog(3) calls.
+__attribute_format_printf(2, 0)
+static void vsyslog_lines(int priority, const char * fmt, va_list ap)
+{
+ char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
+ vsnprintf(buf, sizeof(buf), fmt, ap);
+
+ for (char * p = buf, * q; p && *p; p = q) {
+ if ((q = strchr(p, '\n')))
+ *q++ = 0;
+ if (*p)
+ syslog(priority, "%s\n", p);
+ }
+}
+
+#else // _WIN32
+// os_win32/syslog_win32.cpp supports multiple lines.
+#define vsyslog_lines vsyslog
+#endif // _WIN32
+
+// Printing function for watching ataprint commands, or losing them
+// [From GLIBC Manual: Since the prototype doesn't specify types for
+// optional arguments, in a call to a variadic function the default
+// argument promotions are performed on the optional argument
+// values. This means the objects of type char or short int (whether
+// signed or not) are promoted to either int or unsigned int, as
+// appropriate.]
+void pout(const char *fmt, ...){
+ va_list ap;
+
+ // get the correct time in syslog()
+ FixGlibcTimeZoneBug();
+ // initialize variable argument list
+ va_start(ap,fmt);
+ // in debugmode==1 mode we will print the output from the ataprint.o functions!
+ if (debugmode && debugmode != 2) {
+ FILE * f = stdout;
+#ifdef _WIN32
+ if (facility == LOG_LOCAL1) // logging to stdout
+ f = stderr;
+#endif
+ vfprintf(f, fmt, ap);
+ fflush(f);
+ }
+ // in debugmode==2 mode we print output from knowndrives.o functions
+ else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
+ openlog("smartd", LOG_PID, facility);
+ vsyslog_lines(LOG_INFO, fmt, ap);
+ closelog();
+ }
+ va_end(ap);
+ return;
+}
+
+// This function prints either to stdout or to the syslog as needed.
+static void PrintOut(int priority, const char *fmt, ...){
+ va_list ap;
+
+ // get the correct time in syslog()
+ FixGlibcTimeZoneBug();
+ // initialize variable argument list
+ va_start(ap,fmt);
+ if (debugmode) {
+ FILE * f = stdout;
+#ifdef _WIN32
+ if (facility == LOG_LOCAL1) // logging to stdout
+ f = stderr;
+#endif
+ vfprintf(f, fmt, ap);
+ fflush(f);
+ }
+ else {
+ openlog("smartd", LOG_PID, facility);
+ vsyslog_lines(priority, fmt, ap);
+ closelog();
+ }
+ va_end(ap);
+ return;
+}
+
+// Used to warn users about invalid checksums. Called from atacmds.cpp.
+void checksumwarning(const char * string)
+{
+ pout("Warning! %s error: invalid SMART checksum.\n", string);
+}
+
+#ifndef _WIN32
+
+// Wait for the pid file to show up, this makes sure a calling program knows
+// that the daemon is really up and running and has a pid to kill it
+static bool WaitForPidFile()
+{
+ int waited, max_wait = 10;
+ struct stat stat_buf;
+
+ if (pid_file.empty() || debugmode)
+ return true;
+
+ for(waited = 0; waited < max_wait; ++waited) {
+ if (!stat(pid_file.c_str(), &stat_buf)) {
+ return true;
+ } else
+ sleep(1);
+ }
+ return false;
+}
+
+#endif // _WIN32
+
+// Forks new process if needed, closes ALL file descriptors,
+// redirects stdin, stdout, and stderr. Not quite daemon().
+// See https://www.linuxjournal.com/article/2335
+// for a good description of why we do things this way.
+static int daemon_init()
+{
+#ifndef _WIN32
+
+ // flush all buffered streams. Else we might get two copies of open
+ // streams since both parent and child get copies of the buffers.
+ fflush(nullptr);
+
+ if (do_fork) {
+ pid_t pid;
+ if ((pid=fork()) < 0) {
+ // unable to fork!
+ PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
+ return EXIT_STARTUP;
+ }
+ if (pid) {
+ // we are the parent process, wait for pid file, then exit cleanly
+ if(!WaitForPidFile()) {
+ PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
+ return EXIT_STARTUP;
+ }
+ return 0;
+ }
+
+ // from here on, we are the child process.
+ setsid();
+
+ // Fork one more time to avoid any possibility of having terminals
+ if ((pid=fork()) < 0) {
+ // unable to fork!
+ PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
+ return EXIT_STARTUP;
+ }
+ if (pid)
+ // we are the parent process -- exit cleanly
+ return 0;
+
+ // Now we are the child's child...
+ }
+
+ // close any open file descriptors
+ for (int i = sysconf(_SC_OPEN_MAX); --i >= 0; )
+ close(i);
+
+ // redirect any IO attempts to /dev/null and change to root directory
+ int fd = open("/dev/null", O_RDWR);
+ if (!(fd == 0 && dup(fd) == 1 && dup(fd) == 2 && !chdir("/"))) {
+ PrintOut(LOG_CRIT, "smartd unable to redirect to /dev/null or to chdir to root!\n");
+ return EXIT_STARTUP;
+ }
+ umask(0022);
+
+ if (do_fork)
+ PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
+
+#else // _WIN32
+
+ // No fork() on native Win32
+ // Detach this process from console
+ fflush(nullptr);
+ if (daemon_detach("smartd")) {
+ PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
+ return EXIT_STARTUP;
+ }
+ // stdin/out/err now closed if not redirected
+
+#endif // _WIN32
+
+ // No error, continue in main_worker()
+ return -1;
+}
+
+// create a PID file containing the current process id
+static bool write_pid_file()
+{
+ if (!pid_file.empty()) {
+ pid_t pid = getpid();
+ mode_t old_umask;
+#ifndef __CYGWIN__
+ old_umask = umask(0077); // rwx------
+#else
+ // Cygwin: smartd service runs on system account, ensure PID file can be read by admins
+ old_umask = umask(0033); // rwxr--r--
+#endif
+
+ stdio_file f(pid_file.c_str(), "w");
+ umask(old_umask);
+ if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
+ PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
+ return false;
+ }
+ PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
+ }
+ return true;
+}
+
+// Prints header identifying version of code and home
+static void PrintHead()
+{
+ PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
+}
+
+// prints help info for configuration file Directives
+static void Directives()
+{
+ PrintOut(LOG_INFO,
+ "Configuration file (%s) Directives (after device name):\n"
+ " -d TYPE Set the device type: auto, ignore, removable,\n"
+ " %s\n"
+ " -T TYPE Set the tolerance to one of: normal, permissive\n"
+ " -o VAL Enable/disable automatic offline tests (on/off)\n"
+ " -S VAL Enable/disable attribute autosave (on/off)\n"
+ " -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
+ " -H Monitor SMART Health Status, report if failed\n"
+ " -s REG Do Self-Test at time(s) given by regular expression REG\n"
+ " -l TYPE Monitor SMART log or self-test status:\n"
+ " error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
+ " -l scterc,R,W Set SCT Error Recovery Control\n"
+ " -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
+ " lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
+ " -f Monitor 'Usage' Attributes, report failures\n"
+ " -m ADD Send email warning to address ADD\n"
+ " -M TYPE Modify email warning behavior (see man page)\n"
+ " -p Report changes in 'Prefailure' Attributes\n"
+ " -u Report changes in 'Usage' Attributes\n"
+ " -t Equivalent to -p and -u Directives\n"
+ " -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
+ " -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
+ " -i ID Ignore Attribute ID for -f Directive\n"
+ " -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
+ " -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
+ " -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
+ " -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
+ " -v N,ST Modifies labeling of Attribute N (see man page) \n"
+ " -P TYPE Drive-specific presets: use, ignore, show, showall\n"
+ " -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
+ " -F TYPE Use firmware bug workaround:\n"
+ " %s\n"
+ " -c i=N Set interval between disk checks to N seconds\n"
+ " # Comment: text after a hash sign is ignored\n"
+ " \\ Line continuation character\n"
+ "Attribute ID is a decimal integer 1 <= ID <= 255\n"
+ "Use ID = 0 to turn off -C and/or -U Directives\n"
+ "Example: /dev/sda -a\n",
+ configfile,
+ smi()->get_valid_dev_types_str().c_str(),
+ get_valid_firmwarebug_args());
+}
+
+/* Returns a pointer to a static string containing a formatted list of the valid
+ arguments to the option opt or nullptr on failure. */
+static const char *GetValidArgList(char opt)
+{
+ switch (opt) {
+ case 'A':
+ case 's':
+ return "<PATH_PREFIX>, -";
+ case 'B':
+ return "[+]<FILE_NAME>";
+ case 'c':
+ return "<FILE_NAME>, -";
+ case 'l':
+ return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
+ case 'q':
+ return "nodev[0], errors[,nodev0], nodev[0]startup, never, onecheck, showtests";
+ case 'r':
+ return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
+ case 'p':
+ case 'w':
+ return "<FILE_NAME>";
+ case 'i':
+ return "<INTEGER_SECONDS>";
+#ifdef HAVE_POSIX_API
+ case 'u':
+ return "<USER>[:<GROUP>], -";
+#elif defined(_WIN32)
+ case 'u':
+ return "restricted, unchanged";
+#endif
+#ifdef HAVE_LIBCAP_NG
+ case 'C':
+ return "mail, <no_argument>";
+#endif
+ default:
+ return nullptr;
+ }
+}
+
+/* prints help information for command syntax */
+static void Usage()
+{
+ PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
+#ifdef SMARTMONTOOLS_ATTRIBUTELOG
+ PrintOut(LOG_INFO," -A PREFIX|-, --attributelog=PREFIX|-\n");
+#else
+ PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
+#endif
+ PrintOut(LOG_INFO," Log attribute information to {PREFIX}MODEL-SERIAL.TYPE.csv\n");
+#ifdef SMARTMONTOOLS_ATTRIBUTELOG
+ PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.TYPE.csv]\n");
+#endif
+ PrintOut(LOG_INFO,"\n");
+ PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
+ PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
+ PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
+#ifdef SMARTMONTOOLS_DRIVEDBDIR
+ PrintOut(LOG_INFO,"\n");
+ PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
+#endif
+ PrintOut(LOG_INFO,"]\n\n");
+ PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
+ PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
+ PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
+#ifdef HAVE_LIBCAP_NG
+ PrintOut(LOG_INFO," -C, --capabilities[=mail]\n");
+ PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
+ " Warning: Mail notification may not work when used.\n\n");
+#endif
+ PrintOut(LOG_INFO," -d, --debug\n");
+ PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
+ PrintOut(LOG_INFO," -D, --showdirectives\n");
+ PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
+ PrintOut(LOG_INFO," -h, --help, --usage\n");
+ PrintOut(LOG_INFO," Display this help and exit\n\n");
+ PrintOut(LOG_INFO," -i N, --interval=N\n");
+ PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
+ PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
+#ifndef _WIN32
+ PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
+#else
+ PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
+#endif
+#ifndef _WIN32
+ PrintOut(LOG_INFO," -n, --no-fork\n");
+ PrintOut(LOG_INFO," Do not fork into background\n");
+#ifdef HAVE_LIBSYSTEMD
+ PrintOut(LOG_INFO," (systemd 'Type=notify' is assumed if $NOTIFY_SOCKET is set)\n");
+#endif // HAVE_LIBSYSTEMD
+ PrintOut(LOG_INFO,"\n");
+#endif // WIN32
+ PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
+ PrintOut(LOG_INFO," Write PID file NAME\n\n");
+ PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
+ PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
+ PrintOut(LOG_INFO," -r, --report=TYPE\n");
+ PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
+#ifdef SMARTMONTOOLS_SAVESTATES
+ PrintOut(LOG_INFO," -s PREFIX|-, --savestates=PREFIX|-\n");
+#else
+ PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
+#endif
+ PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
+#ifdef SMARTMONTOOLS_SAVESTATES
+ PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
+#endif
+ PrintOut(LOG_INFO,"\n");
+ PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
+ PrintOut(LOG_INFO," Run executable NAME on warnings\n");
+#ifndef _WIN32
+ PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
+#else
+ PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
+#endif
+#ifdef HAVE_POSIX_API
+ PrintOut(LOG_INFO," -u USER[:GROUP], --warn-as-user=USER[:GROUP]\n");
+ PrintOut(LOG_INFO," Run warning script as non-privileged USER\n\n");
+#elif defined(_WIN32)
+ PrintOut(LOG_INFO," -u MODE, --warn-as-user=MODE\n");
+ PrintOut(LOG_INFO," Run warning script with modified access token: %s\n\n", GetValidArgList('u'));
+#endif
+#ifdef _WIN32
+ PrintOut(LOG_INFO," --service\n");
+ PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
+ PrintOut(LOG_INFO," smartd install [options]\n");
+ PrintOut(LOG_INFO," Remove service with:\n");
+ PrintOut(LOG_INFO," smartd remove\n\n");
+#endif // _WIN32
+ PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
+ PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
+}
+
+static int CloseDevice(smart_device * device, const char * name)
+{
+ if (!device->close()){
+ PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
+ return 1;
+ }
+ // device successfully closed
+ return 0;
+}
+
+// Replace invalid characters in cfg.dev_idinfo
+static bool sanitize_dev_idinfo(std::string & s)
+{
+ bool changed = false;
+ for (unsigned i = 0; i < s.size(); i++) {
+ char c = s[i];
+ STATIC_ASSERT(' ' == 0x20 && '~' == 0x07e); // Assume ASCII
+ // Don't pass possible command escapes ('~! COMMAND') to the 'mail' command.
+ if ((' ' <= c && c <= '~') && !(i == 0 && c == '~'))
+ continue;
+ s[i] = '?';
+ changed = true;
+ }
+ return changed;
+}
+
+// return true if a char is not allowed in a state file name
+static bool not_allowed_in_filename(char c)
+{
+ return !( ('0' <= c && c <= '9')
+ || ('A' <= c && c <= 'Z')
+ || ('a' <= c && c <= 'z'));
+}
+
+// Read error count from Summary or Extended Comprehensive SMART error log
+// Return -1 on error
+static int read_ata_error_count(ata_device * device, const char * name,
+ firmwarebug_defs firmwarebugs, bool extended)
+{
+ if (!extended) {
+ ata_smart_errorlog log;
+ if (ataReadErrorLog(device, &log, firmwarebugs)){
+ PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
+ return -1;
+ }
+ return (log.error_log_pointer ? log.ata_error_count : 0);
+ }
+ else {
+ ata_smart_exterrlog logx;
+ if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
+ PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
+ return -1;
+ }
+ // Some disks use the reserved byte as index, see ataprint.cpp.
+ return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
+ }
+}
+
+// returns <0 if problem. Otherwise, bottom 8 bits are the self test
+// error count, and top bits are the power-on hours of the last error.
+static int SelfTestErrorCount(ata_device * device, const char * name,
+ firmwarebug_defs firmwarebugs)
+{
+ struct ata_smart_selftestlog log;
+
+ if (ataReadSelfTestLog(device, &log, firmwarebugs)){
+ PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
+ return -1;
+ }
+
+ if (!log.mostrecenttest)
+ // No tests logged
+ return 0;
+
+ // Count failed self-tests
+ int errcnt = 0, hours = 0;
+ for (int i = 20; i >= 0; i--) {
+ int j = (i + log.mostrecenttest) % 21;
+ const ata_smart_selftestlog_struct & entry = log.selftest_struct[j];
+ if (!nonempty(&entry, sizeof(entry)))
+ continue;
+
+ int status = entry.selfteststatus >> 4;
+ if (status == 0x0 && (entry.selftestnumber & 0x7f) == 0x02)
+ // First successful extended self-test, stop count
+ break;
+
+ if (0x3 <= status && status <= 0x8) {
+ // Self-test showed an error
+ errcnt++;
+ // Keep track of time of most recent error
+ if (!hours)
+ hours = entry.timestamp;
+ }
+ }
+
+ return ((hours << 8) | errcnt);
+}
+
+#define SELFTEST_ERRORCOUNT(x) (x & 0xff)
+#define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
+
+// Check offline data collection status
+static inline bool is_offl_coll_in_progress(unsigned char status)
+{
+ return ((status & 0x7f) == 0x03);
+}
+
+// Check self-test execution status
+static inline bool is_self_test_in_progress(unsigned char status)
+{
+ return ((status >> 4) == 0xf);
+}
+
+// Log offline data collection status
+static void log_offline_data_coll_status(const char * name, unsigned char status)
+{
+ const char * msg;
+ switch (status & 0x7f) {
+ case 0x00: msg = "was never started"; break;
+ case 0x02: msg = "was completed without error"; break;
+ case 0x03: msg = "is in progress"; break;
+ case 0x04: msg = "was suspended by an interrupting command from host"; break;
+ case 0x05: msg = "was aborted by an interrupting command from host"; break;
+ case 0x06: msg = "was aborted by the device with a fatal error"; break;
+ default: msg = nullptr;
+ }
+
+ if (msg)
+ PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
+ "Device: %s, offline data collection %s%s\n", name, msg,
+ ((status & 0x80) ? " (auto:on)" : ""));
+ else
+ PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
+ name, status);
+}
+
+// Log self-test execution status
+static void log_self_test_exec_status(const char * name, unsigned char status)
+{
+ const char * msg;
+ switch (status >> 4) {
+ case 0x0: msg = "completed without error"; break;
+ case 0x1: msg = "was aborted by the host"; break;
+ case 0x2: msg = "was interrupted by the host with a reset"; break;
+ case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
+ case 0x4: msg = "completed with error (unknown test element)"; break;
+ case 0x5: msg = "completed with error (electrical test element)"; break;
+ case 0x6: msg = "completed with error (servo/seek test element)"; break;
+ case 0x7: msg = "completed with error (read test element)"; break;
+ case 0x8: msg = "completed with error (handling damage?)"; break;
+ default: msg = nullptr;
+ }
+
+ if (msg)
+ PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
+ "Device: %s, previous self-test %s\n", name, msg);
+ else if ((status >> 4) == 0xf)
+ PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
+ name, status & 0x0f);
+ else
+ PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
+ name, status);
+}
+
+// Check pending sector count id (-C, -U directives).
+static bool check_pending_id(const dev_config & cfg, const dev_state & state,
+ unsigned char id, const char * msg)
+{
+ // Check attribute index
+ int i = ata_find_attr_index(id, state.smartval);
+ if (i < 0) {
+ PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
+ cfg.name.c_str(), msg, id);
+ return false;
+ }
+
+ // Check value
+ uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
+ cfg.attribute_defs);
+ if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
+ PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
+ cfg.name.c_str(), msg, id, rawval, rawval);
+ return false;
+ }
+
+ return true;
+}
+
+// Called by ATA/SCSI/NVMeDeviceScan() after successful device check
+static void finish_device_scan(dev_config & cfg, dev_state & state)
+{
+ // Set cfg.emailfreq if user hasn't set it
+ if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && cfg.emailfreq == emailfreqs::unknown) {
+ // Avoid that emails are suppressed forever due to state persistence
+ if (cfg.state_file.empty())
+ cfg.emailfreq = emailfreqs::once;
+ else
+ cfg.emailfreq = emailfreqs::daily;
+ }
+
+ // Start self-test regex check now if time was not read from state file
+ if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
+ state.scheduled_test_next_check = time(nullptr);
+}
+
+// Common function to format result message for ATA setting
+static void format_set_result_msg(std::string & msg, const char * name, bool ok,
+ int set_option = 0, bool has_value = false)
+{
+ if (!msg.empty())
+ msg += ", ";
+ msg += name;
+ if (!ok)
+ msg += ":--";
+ else if (set_option < 0)
+ msg += ":off";
+ else if (has_value)
+ msg += strprintf(":%d", set_option-1);
+ else if (set_option > 0)
+ msg += ":on";
+}
+
+// Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
+static bool is_duplicate_dev_idinfo(const dev_config & cfg, const dev_config_vector & prev_cfgs)
+{
+ if (!cfg.id_is_unique)
+ return false;
+
+ for (const auto & prev_cfg : prev_cfgs) {
+ if (!prev_cfg.id_is_unique)
+ continue;
+ if (cfg.dev_idinfo != prev_cfg.dev_idinfo)
+ continue;
+
+ PrintOut(LOG_INFO, "Device: %s, same identity as %s, ignored\n",
+ cfg.dev_name.c_str(), prev_cfg.dev_name.c_str());
+ return true;
+ }
+
+ return false;
+}
+
+// TODO: Add '-F swapid' directive
+const bool fix_swapped_id = false;
+
+// scan to see what ata devices there are, and if they support SMART
+static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev,
+ const dev_config_vector * prev_cfgs)
+{
+ int supported=0;
+ struct ata_identify_device drive;
+ const char *name = cfg.name.c_str();
+ int retid;
+
+ // Device must be open
+
+ // Get drive identity structure
+ if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
+ if (retid<0)
+ // Unable to read Identity structure
+ PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
+ else
+ PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
+ name, packetdevicetype(retid-1));
+ CloseDevice(atadev, name);
+ return 2;
+ }
+
+ // Get drive identity, size and rotation rate (HDD/SSD)
+ char model[40+1], serial[20+1], firmware[8+1];
+ ata_format_id_string(model, drive.model, sizeof(model)-1);
+ ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
+ ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
+
+ ata_size_info sizes;
+ ata_get_size_info(&drive, sizes);
+ state.num_sectors = sizes.sectors;
+ cfg.dev_rpm = ata_get_rotation_rate(&drive);
+
+ char wwn[64]; wwn[0] = 0;
+ unsigned oui = 0; uint64_t unique_id = 0;
+ int naa = ata_get_wwn(&drive, oui, unique_id);
+ if (naa >= 0)
+ snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
+
+ // Format device id string for warning emails
+ char cap[32];
+ cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
+ format_capacity(cap, sizeof(cap), sizes.capacity, "."));
+ cfg.id_is_unique = true; // TODO: Check serial?
+ if (sanitize_dev_idinfo(cfg.dev_idinfo))
+ cfg.id_is_unique = false;
+
+ PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
+
+ // Check for duplicates
+ if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
+ CloseDevice(atadev, name);
+ return 1;
+ }
+
+ // Show if device in database, and use preset vendor attribute
+ // options unless user has requested otherwise.
+ if (cfg.ignorepresets)
+ PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
+ else {
+ // Apply vendor specific presets, print warning if present
+ std::string dbversion;
+ const drive_settings * dbentry = lookup_drive_apply_presets(
+ &drive, cfg.attribute_defs, cfg.firmwarebugs, dbversion);
+ if (!dbentry)
+ PrintOut(LOG_INFO, "Device: %s, not found in smartd database%s%s.\n", name,
+ (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""));
+ else {
+ PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s%s%s\n",
+ name, (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""),
+ (*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
+ if (*dbentry->warningmsg)
+ PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
+ }
+ }
+
+ // Check for ATA Security LOCK
+ unsigned short word128 = drive.words088_255[128-88];
+ bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
+ if (locked)
+ PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
+
+ // Set default '-C 197[+]' if no '-C ID' is specified.
+ if (!cfg.curr_pending_set)
+ cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
+ // Set default '-U 198[+]' if no '-U ID' is specified.
+ if (!cfg.offl_pending_set)
+ cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
+
+ // If requested, show which presets would be used for this drive
+ if (cfg.showpresets) {
+ int savedebugmode=debugmode;
+ PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
+ if (!debugmode)
+ debugmode=2;
+ show_presets(&drive);
+ debugmode=savedebugmode;
+ }
+
+ // see if drive supports SMART
+ supported=ataSmartSupport(&drive);
+ if (supported!=1) {
+ if (supported==0)
+ // drive does NOT support SMART
+ PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
+ else
+ // can't tell if drive supports SMART
+ PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
+
+ // should we proceed anyway?
+ if (cfg.permissive) {
+ PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
+ }
+ else {
+ PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
+ CloseDevice(atadev, name);
+ return 2;
+ }
+ }
+
+ if (ataEnableSmart(atadev)) {
+ // Enable SMART command has failed
+ PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
+
+ if (ataIsSmartEnabled(&drive) <= 0) {
+ if (!cfg.permissive) {
+ PrintOut(LOG_INFO, "Device: %s, to proceed anyway, use '-T permissive' Directive.\n", name);
+ CloseDevice(atadev, name);
+ return 2;
+ }
+ PrintOut(LOG_INFO, "Device: %s, proceeding since '-T permissive' Directive given.\n", name);
+ }
+ else {
+ PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
+ }
+ }
+
+ // disable device attribute autosave...
+ if (cfg.autosave==1) {
+ if (ataDisableAutoSave(atadev))
+ PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
+ else
+ PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
+ }
+
+ // or enable device attribute autosave
+ if (cfg.autosave==2) {
+ if (ataEnableAutoSave(atadev))
+ PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
+ else
+ PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
+ }
+
+ // capability check: SMART status
+ if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
+ PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
+ cfg.smartcheck = false;
+ }
+
+ // capability check: Read smart values and thresholds. Note that
+ // smart values are ALSO needed even if we ONLY want to know if the
+ // device is self-test log or error-log capable! After ATA-5, this
+ // information was ALSO reproduced in the IDENTIFY DEVICE response,
+ // but sadly not for ATA-5. Sigh.
+
+ // do we need to get SMART data?
+ bool smart_val_ok = false;
+ if ( cfg.autoofflinetest || cfg.selftest
+ || cfg.errorlog || cfg.xerrorlog
+ || cfg.offlinests || cfg.selfteststs
+ || cfg.usagefailed || cfg.prefail || cfg.usage
+ || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
+ || cfg.curr_pending_id || cfg.offl_pending_id ) {
+
+ if (ataReadSmartValues(atadev, &state.smartval)) {
+ PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
+ cfg.usagefailed = cfg.prefail = cfg.usage = false;
+ cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
+ cfg.curr_pending_id = cfg.offl_pending_id = 0;
+ }
+ else {
+ smart_val_ok = true;
+ if (ataReadSmartThresholds(atadev, &state.smartthres)) {
+ PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
+ name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
+ cfg.usagefailed = false;
+ // Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
+ memset(&state.smartthres, 0, sizeof(state.smartthres));
+ }
+ }
+
+ // see if the necessary Attribute is there to monitor offline or
+ // current pending sectors or temperature
+ if ( cfg.curr_pending_id
+ && !check_pending_id(cfg, state, cfg.curr_pending_id,
+ "Current_Pending_Sector"))
+ cfg.curr_pending_id = 0;
+
+ if ( cfg.offl_pending_id
+ && !check_pending_id(cfg, state, cfg.offl_pending_id,
+ "Offline_Uncorrectable"))
+ cfg.offl_pending_id = 0;
+
+ if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
+ && !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
+ PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
+ name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
+ cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
+ }
+
+ // Report ignored '-r' or '-R' directives
+ for (int id = 1; id <= 255; id++) {
+ if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
+ char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
+ const char * excl = (cfg.monitor_attr_flags.is_set(id,
+ (opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
+
+ int idx = ata_find_attr_index(id, state.smartval);
+ if (idx < 0)
+ PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
+ else {
+ bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
+ if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
+ PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
+ (prefail ? "Prefailure" : "Usage"), opt, id, excl);
+ }
+ }
+ }
+ }
+
+ // enable/disable automatic on-line testing
+ if (cfg.autoofflinetest) {
+ // is this an enable or disable request?
+ const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
+ if (!smart_val_ok)
+ PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
+ else {
+ // if command appears unsupported, issue a warning...
+ if (!isSupportAutomaticTimer(&state.smartval))
+ PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
+ // ... but then try anyway
+ if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
+ PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
+ else
+ PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
+ }
+ }
+
+ // Read log directories if required for capability check
+ ata_smart_log_directory smart_logdir, gp_logdir;
+ bool smart_logdir_ok = false, gp_logdir_ok = false;
+
+ if ( isGeneralPurposeLoggingCapable(&drive)
+ && (cfg.errorlog || cfg.selftest)
+ && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
+ if (!ataReadLogDirectory(atadev, &smart_logdir, false))
+ smart_logdir_ok = true;
+ }
+
+ if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
+ if (!ataReadLogDirectory(atadev, &gp_logdir, true))
+ gp_logdir_ok = true;
+ }
+
+ // capability check: self-test-log
+ state.selflogcount = 0; state.selfloghour = 0;
+ if (cfg.selftest) {
+ int retval;
+ if (!( cfg.permissive
+ || ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
+ || (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
+ PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
+ cfg.selftest = false;
+ }
+ else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
+ PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
+ cfg.selftest = false;
+ }
+ else {
+ state.selflogcount=SELFTEST_ERRORCOUNT(retval);
+ state.selfloghour =SELFTEST_ERRORHOURS(retval);
+ }
+ }
+
+ // capability check: ATA error log
+ state.ataerrorcount = 0;
+ if (cfg.errorlog) {
+ int errcnt1;
+ if (!( cfg.permissive
+ || ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
+ || (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
+ PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
+ cfg.errorlog = false;
+ }
+ else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
+ PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
+ cfg.errorlog = false;
+ }
+ else
+ state.ataerrorcount = errcnt1;
+ }
+
+ if (cfg.xerrorlog) {
+ int errcnt2;
+ if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
+ || (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
+ PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
+ name);
+ cfg.xerrorlog = false;
+ }
+ else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
+ PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
+ cfg.xerrorlog = false;
+ }
+ else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
+ PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
+ name, state.ataerrorcount, errcnt2);
+ // Record max error count
+ if (errcnt2 > state.ataerrorcount)
+ state.ataerrorcount = errcnt2;
+ }
+ else
+ state.ataerrorcount = errcnt2;
+ }
+
+ // capability check: self-test and offline data collection status
+ if (cfg.offlinests || cfg.selfteststs) {
+ if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
+ if (cfg.offlinests)
+ PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
+ if (cfg.selfteststs)
+ PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
+ cfg.offlinests = cfg.selfteststs = false;
+ }
+ }
+
+ // capabilities check -- does it support powermode?
+ if (cfg.powermode) {
+ int powermode = ataCheckPowerMode(atadev);
+
+ if (-1 == powermode) {
+ PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
+ cfg.powermode=0;
+ }
+ else if (powermode!=0x00 && powermode!=0x01
+ && powermode!=0x40 && powermode!=0x41
+ && powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
+ && powermode!=0xff) {
+ PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
+ name, powermode);
+ cfg.powermode=0;
+ }
+ }
+
+ // Apply ATA settings
+ std::string msg;
+
+ if (cfg.set_aam)
+ format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
+ ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
+ ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
+
+ if (cfg.set_apm)
+ format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
+ ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
+ ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
+
+ if (cfg.set_lookahead)
+ format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
+ (cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
+ cfg.set_lookahead);
+
+ if (cfg.set_wcache)
+ format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
+ (cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
+
+ if (cfg.set_dsn)
+ format_set_result_msg(msg, "DSN", ata_set_features(atadev,
+ ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
+
+ if (cfg.set_security_freeze)
+ format_set_result_msg(msg, "Security freeze",
+ ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
+
+ if (cfg.set_standby)
+ format_set_result_msg(msg, "Standby",
+ ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
+
+ // Report as one log entry
+ if (!msg.empty())
+ PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
+
+ // set SCT Error Recovery Control if requested
+ if (cfg.sct_erc_set) {
+ if (!isSCTErrorRecoveryControlCapable(&drive))
+ PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
+ name);
+ else if (locked)
+ PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
+ name);
+ else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime, false, false )
+ || ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime, false, false))
+ PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
+ else
+ PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
+ name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
+ }
+
+ // If no tests available or selected, return
+ if (!( cfg.smartcheck || cfg.selftest
+ || cfg.errorlog || cfg.xerrorlog
+ || cfg.offlinests || cfg.selfteststs
+ || cfg.usagefailed || cfg.prefail || cfg.usage
+ || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
+ CloseDevice(atadev, name);
+ return 3;
+ }
+
+ // tell user we are registering device
+ PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
+
+ // close file descriptor
+ CloseDevice(atadev, name);
+
+ if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
+ // Build file name for state file
+ std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
+ std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
+ if (!state_path_prefix.empty()) {
+ cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
+ // Read previous state
+ if (read_dev_state(cfg.state_file.c_str(), state)) {
+ PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
+ // Copy ATA attribute values to temp state
+ state.update_temp_state();
+ }
+ }
+ if (!attrlog_path_prefix.empty())
+ cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
+ }
+
+ finish_device_scan(cfg, state);
+
+ return 0;
+}
+
+// on success, return 0. On failure, return >0. Never return <0,
+// please.
+static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev,
+ const dev_config_vector * prev_cfgs)
+{
+ int err, req_len, avail_len, version, len;
+ const char *device = cfg.name.c_str();
+ struct scsi_iec_mode_page iec;
+ uint8_t tBuf[64];
+ uint8_t inqBuf[96];
+ uint8_t vpdBuf[252];
+ char lu_id[64], serial[256], vendor[40], model[40];
+
+ // Device must be open
+ memset(inqBuf, 0, 96);
+ req_len = 36;
+ if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
+ /* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
+ req_len = 64;
+ int err64;
+ if ((err64 = scsiStdInquiry(scsidev, inqBuf, req_len))) {
+ PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
+ "skip device [err=%d, %d]\n", device, err, err64);
+ return 2;
+ }
+ }
+ version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
+
+ avail_len = inqBuf[4] + 5;
+ len = (avail_len < req_len) ? avail_len : req_len;
+ if (len < 36) {
+ PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
+ "skip device\n", device);
+ return 2;
+ }
+
+ int pdt = inqBuf[0] & 0x1f;
+
+ switch (pdt) {
+ case SCSI_PT_DIRECT_ACCESS:
+ case SCSI_PT_WO:
+ case SCSI_PT_CDROM:
+ case SCSI_PT_OPTICAL:
+ case SCSI_PT_RBC: /* Reduced Block commands */
+ case SCSI_PT_HOST_MANAGED: /* Zoned disk */
+ break;
+ default:
+ PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
+ "skip\n", device, pdt);
+ return 2;
+ }
+
+ if (supported_vpd_pages_p) {
+ delete supported_vpd_pages_p;
+ supported_vpd_pages_p = nullptr;
+ }
+ supported_vpd_pages_p = new supported_vpd_pages(scsidev);
+
+ lu_id[0] = '\0';
+ if (version >= 0x3) {
+ /* SPC to SPC-5, assume SPC-6 is version==8 or higher */
+ if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_DEVICE_IDENTIFICATION,
+ vpdBuf, sizeof(vpdBuf))) {
+ len = vpdBuf[3];
+ scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), nullptr);
+ }
+ }
+ serial[0] = '\0';
+ if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
+ vpdBuf, sizeof(vpdBuf))) {
+ len = vpdBuf[3];
+ vpdBuf[4 + len] = '\0';
+ scsi_format_id_string(serial, &vpdBuf[4], len);
+ }
+
+ char si_str[64];
+ struct scsi_readcap_resp srr;
+ uint64_t capacity = scsiGetSize(scsidev, scsidev->use_rcap16(), &srr);
+
+ if (capacity)
+ format_capacity(si_str, sizeof(si_str), capacity, ".");
+ else
+ si_str[0] = '\0';
+
+ // Format device id string for warning emails
+ cfg.dev_idinfo = strprintf("[%.8s %.16s %.4s]%s%s%s%s%s%s",
+ (char *)&inqBuf[8], (char *)&inqBuf[16], (char *)&inqBuf[32],
+ (lu_id[0] ? ", lu id: " : ""), (lu_id[0] ? lu_id : ""),
+ (serial[0] ? ", S/N: " : ""), (serial[0] ? serial : ""),
+ (si_str[0] ? ", " : ""), (si_str[0] ? si_str : ""));
+ cfg.id_is_unique = (lu_id[0] || serial[0]);
+ if (sanitize_dev_idinfo(cfg.dev_idinfo))
+ cfg.id_is_unique = false;
+
+ // format "model" string
+ scsi_format_id_string(vendor, &inqBuf[8], 8);
+ scsi_format_id_string(model, &inqBuf[16], 16);
+ PrintOut(LOG_INFO, "Device: %s, %s\n", device, cfg.dev_idinfo.c_str());
+
+ // Check for duplicates
+ if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
+ CloseDevice(scsidev, device);
+ return 1;
+ }
+
+ // check that device is ready for commands. IE stores its stuff on
+ // the media.
+ if ((err = scsiTestUnitReady(scsidev))) {
+ if (SIMPLE_ERR_NOT_READY == err)
+ PrintOut(LOG_INFO, "Device: %s, NOT READY (e.g. spun down); skip device\n", device);
+ else if (SIMPLE_ERR_NO_MEDIUM == err)
+ PrintOut(LOG_INFO, "Device: %s, NO MEDIUM present; skip device\n", device);
+ else if (SIMPLE_ERR_BECOMING_READY == err)
+ PrintOut(LOG_INFO, "Device: %s, BECOMING (but not yet) READY; skip device\n", device);
+ else
+ PrintOut(LOG_CRIT, "Device: %s, failed Test Unit Ready [err=%d]\n", device, err);
+ CloseDevice(scsidev, device);
+ return 2;
+ }
+
+ // Badly-conforming USB storage devices may fail this check.
+ // The response to the following IE mode page fetch (current and
+ // changeable values) is carefully examined. It has been found
+ // that various USB devices that malform the response will lock up
+ // if asked for a log page (e.g. temperature) so it is best to
+ // bail out now.
+ if (!(err = scsiFetchIECmpage(scsidev, &iec, state.modese_len)))
+ state.modese_len = iec.modese_len;
+ else if (SIMPLE_ERR_BAD_FIELD == err)
+ ; /* continue since it is reasonable not to support IE mpage */
+ else { /* any other error (including malformed response) unreasonable */
+ PrintOut(LOG_INFO,
+ "Device: %s, Bad IEC (SMART) mode page, err=%d, skip device\n",
+ device, err);
+ CloseDevice(scsidev, device);
+ return 3;
+ }
+
+ // N.B. The following is passive (i.e. it doesn't attempt to turn on
+ // smart if it is off). This may change to be the same as the ATA side.
+ if (!scsi_IsExceptionControlEnabled(&iec)) {
+ PrintOut(LOG_INFO, "Device: %s, IE (SMART) not enabled, skip device\n"
+ "Try 'smartctl -s on %s' to turn on SMART features\n",
+ device, device);
+ CloseDevice(scsidev, device);
+ return 3;
+ }
+
+ // Flag that certain log pages are supported (information may be
+ // available from other sources).
+ if (0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 0) ||
+ 0 == scsiLogSense(scsidev, SUPPORTED_LPAGES, 0, tBuf, sizeof(tBuf), 68))
+ /* workaround for the bug #678 on ST8000NM0075/E001. Up to 64 pages + 4b header */
+ {
+ for (int k = 4; k < tBuf[3] + LOGPAGEHDRSIZE; ++k) {
+ switch (tBuf[k]) {
+ case TEMPERATURE_LPAGE:
+ state.TempPageSupported = 1;
+ break;
+ case IE_LPAGE:
+ state.SmartPageSupported = 1;
+ break;
+ case READ_ERROR_COUNTER_LPAGE:
+ state.ReadECounterPageSupported = 1;
+ break;
+ case WRITE_ERROR_COUNTER_LPAGE:
+ state.WriteECounterPageSupported = 1;
+ break;
+ case VERIFY_ERROR_COUNTER_LPAGE:
+ state.VerifyECounterPageSupported = 1;
+ break;
+ case NON_MEDIUM_ERROR_LPAGE:
+ state.NonMediumErrorPageSupported = 1;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ // Check if scsiCheckIE() is going to work
+ {
+ uint8_t asc = 0;
+ uint8_t ascq = 0;
+ uint8_t currenttemp = 0;
+ uint8_t triptemp = 0;
+
+ if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
+ &asc, &ascq, &currenttemp, &triptemp)) {
+ PrintOut(LOG_INFO, "Device: %s, unexpectedly failed to read SMART values\n", device);
+ state.SuppressReport = 1;
+ }
+ if ( (state.SuppressReport || !currenttemp)
+ && (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
+ PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
+ device, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
+ cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
+ }
+ }
+
+ // capability check: self-test-log
+ if (cfg.selftest){
+ int retval = scsiCountFailedSelfTests(scsidev, 0);
+ if (retval<0) {
+ // no self-test log, turn off monitoring
+ PrintOut(LOG_INFO, "Device: %s, does not support SMART Self-Test Log.\n", device);
+ cfg.selftest = false;
+ state.selflogcount = 0;
+ state.selfloghour = 0;
+ }
+ else {
+ // register starting values to watch for changes
+ state.selflogcount=SELFTEST_ERRORCOUNT(retval);
+ state.selfloghour =SELFTEST_ERRORHOURS(retval);
+ }
+ }
+
+ // disable autosave (set GLTSD bit)
+ if (cfg.autosave==1){
+ if (scsiSetControlGLTSD(scsidev, 1, state.modese_len))
+ PrintOut(LOG_INFO,"Device: %s, could not disable autosave (set GLTSD bit).\n",device);
+ else
+ PrintOut(LOG_INFO,"Device: %s, disabled autosave (set GLTSD bit).\n",device);
+ }
+
+ // or enable autosave (clear GLTSD bit)
+ if (cfg.autosave==2){
+ if (scsiSetControlGLTSD(scsidev, 0, state.modese_len))
+ PrintOut(LOG_INFO,"Device: %s, could not enable autosave (clear GLTSD bit).\n",device);
+ else
+ PrintOut(LOG_INFO,"Device: %s, enabled autosave (cleared GLTSD bit).\n",device);
+ }
+
+ // tell user we are registering device
+ PrintOut(LOG_INFO, "Device: %s, is SMART capable. Adding to \"monitor\" list.\n", device);
+
+ // Make sure that init_standby_check() ignores SCSI devices
+ cfg.offlinests_ns = cfg.selfteststs_ns = false;
+
+ // close file descriptor
+ CloseDevice(scsidev, device);
+
+ if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
+ // Build file name for state file
+ std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
+ std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
+ if (!state_path_prefix.empty()) {
+ cfg.state_file = strprintf("%s%s-%s-%s.scsi.state", state_path_prefix.c_str(), vendor, model, serial);
+ // Read previous state
+ if (read_dev_state(cfg.state_file.c_str(), state)) {
+ PrintOut(LOG_INFO, "Device: %s, state read from %s\n", device, cfg.state_file.c_str());
+ // Copy ATA attribute values to temp state
+ state.update_temp_state();
+ }
+ }
+ if (!attrlog_path_prefix.empty())
+ cfg.attrlog_file = strprintf("%s%s-%s-%s.scsi.csv", attrlog_path_prefix.c_str(), vendor, model, serial);
+ }
+
+ finish_device_scan(cfg, state);
+
+ return 0;
+}
+
+// Convert 128 bit LE integer to uint64_t or its max value on overflow.
+static uint64_t le128_to_uint64(const unsigned char (& val)[16])
+{
+ for (int i = 8; i < 16; i++) {
+ if (val[i])
+ return ~(uint64_t)0;
+ }
+ uint64_t lo = val[7];
+ for (int i = 7-1; i >= 0; i--) {
+ lo <<= 8; lo += val[i];
+ }
+ return lo;
+}
+
+// Get max temperature in Kelvin reported in NVMe SMART/Health log.
+static int nvme_get_max_temp_kelvin(const nvme_smart_log & smart_log)
+{
+ int k = (smart_log.temperature[1] << 8) | smart_log.temperature[0];
+ for (auto s : smart_log.temp_sensor) {
+ if (s > k)
+ k = s; // cppcheck-suppress useStlAlgorithm
+ }
+ return k;
+}
+
+// Check the NVMe Error Information log for device related errors.
+static bool check_nvme_error_log(const dev_config & cfg, dev_state & state, nvme_device * nvmedev,
+ uint64_t newcnt = 0)
+{
+ // Limit transfer size to one page (64 entries) to avoid problems with
+ // limits of NVMe pass-through layer or too low MDTS values.
+ unsigned want_entries = 64;
+ if (want_entries > cfg.nvme_err_log_max_entries)
+ want_entries = cfg.nvme_err_log_max_entries;
+ raw_buffer error_log_buf(want_entries * sizeof(nvme_error_log_page));
+ nvme_error_log_page * error_log =
+ reinterpret_cast<nvme_error_log_page *>(error_log_buf.data());
+ unsigned read_entries = nvme_read_error_log(nvmedev, error_log, want_entries, false /*!lpo_sup*/);
+ if (!read_entries) {
+ PrintOut(LOG_INFO, "Device: %s, Read %u entries from Error Information Log failed\n",
+ cfg.name.c_str(), want_entries);
+ return false;
+ }
+
+ if (!newcnt)
+ return true; // Support check only
+
+ // Scan log, find device related errors
+ uint64_t oldcnt = state.nvme_err_log_entries, mincnt = newcnt;
+ int err = 0, ign = 0;
+ for (unsigned i = 0; i < read_entries; i++) {
+ const nvme_error_log_page & e = error_log[i];
+ if (!e.error_count)
+ continue; // unused
+ if (e.error_count <= oldcnt)
+ break; // stop on first old entry
+ if (e.error_count < mincnt)
+ mincnt = e.error_count; // min known error
+ if (e.error_count > newcnt)
+ newcnt = e.error_count; // adjust maximum
+ uint16_t status = e.status_field >> 1;
+ if (!nvme_status_is_error(status) || nvme_status_to_errno(status) == EINVAL) {
+ ign++; // Not a device related error
+ continue;
+ }
+
+ // Log the most recent 8 errors
+ if (++err > 8)
+ continue;
+ char buf[64];
+ PrintOut(LOG_INFO, "Device: %s, NVMe error [%u], count %" PRIu64 ", status 0x%04x: %s\n",
+ cfg.name.c_str(), i, e.error_count, e.status_field,
+ nvme_status_to_info_str(buf, e.status_field >> 1));
+ }
+
+ std::string msg = strprintf("Device: %s, NVMe error count increased from %" PRIu64 " to %" PRIu64
+ " (%d new, %d ignored, %" PRIu64 " unknown)",
+ cfg.name.c_str(), oldcnt, newcnt, err, ign,
+ (mincnt > oldcnt + 1 ? mincnt - oldcnt - 1 : 0));
+ // LOG_CRIT only if device related errors are found
+ if (!err) {
+ PrintOut(LOG_INFO, "%s\n", msg.c_str());
+ }
+ else {
+ PrintOut(LOG_CRIT, "%s\n", msg.c_str());
+ MailWarning(cfg, state, 4, "%s", msg.c_str());
+ }
+
+ state.nvme_err_log_entries = newcnt;
+ state.must_write = true;
+ return true;
+}
+
+static int NVMeDeviceScan(dev_config & cfg, dev_state & state, nvme_device * nvmedev,
+ const dev_config_vector * prev_cfgs)
+{
+ const char *name = cfg.name.c_str();
+
+ // Device must be open
+
+ // Get ID Controller
+ nvme_id_ctrl id_ctrl;
+ if (!nvme_read_id_ctrl(nvmedev, id_ctrl)) {
+ PrintOut(LOG_INFO, "Device: %s, NVMe Identify Controller failed\n", name);
+ CloseDevice(nvmedev, name);
+ return 2;
+ }
+
+ // Get drive identity
+ char model[40+1], serial[20+1], firmware[8+1];
+ format_char_array(model, id_ctrl.mn);
+ format_char_array(serial, id_ctrl.sn);
+ format_char_array(firmware, id_ctrl.fr);
+
+ // Format device id string for warning emails
+ char nsstr[32] = "", capstr[32] = "";
+ unsigned nsid = nvmedev->get_nsid();
+ if (nsid != 0xffffffff)
+ snprintf(nsstr, sizeof(nsstr), ", NSID:%u", nsid);
+ uint64_t capacity = le128_to_uint64(id_ctrl.tnvmcap);
+ if (capacity)
+ format_capacity(capstr, sizeof(capstr), capacity, ".");
+ cfg.dev_idinfo = strprintf("%s, S/N:%s, FW:%s%s%s%s", model, serial, firmware,
+ nsstr, (capstr[0] ? ", " : ""), capstr);
+ cfg.id_is_unique = true; // TODO: Check serial?
+ if (sanitize_dev_idinfo(cfg.dev_idinfo))
+ cfg.id_is_unique = false;
+
+ PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
+
+ // Check for duplicates
+ if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
+ CloseDevice(nvmedev, name);
+ return 1;
+ }
+
+ // Read SMART/Health log
+ nvme_smart_log smart_log;
+ if (!nvme_read_smart_log(nvmedev, smart_log)) {
+ PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
+ CloseDevice(nvmedev, name);
+ return 2;
+ }
+
+ // Check temperature sensor support
+ if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
+ if (!nvme_get_max_temp_kelvin(smart_log)) {
+ PrintOut(LOG_INFO, "Device: %s, no Temperature sensors, ignoring -W %d,%d,%d\n",
+ name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
+ cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
+ }
+ }
+
+ // Init total error count
+ cfg.nvme_err_log_max_entries = id_ctrl.elpe + 1; // 0's based value
+ if (cfg.errorlog || cfg.xerrorlog) {
+ if (!check_nvme_error_log(cfg, state, nvmedev)) {
+ PrintOut(LOG_INFO, "Device: %s, Error Information unavailable, ignoring -l [x]error\n", name);
+ cfg.errorlog = cfg.xerrorlog = false;
+ }
+ else
+ state.nvme_err_log_entries = le128_to_uint64(smart_log.num_err_log_entries);
+ }
+
+ // If no supported tests selected, return
+ if (!( cfg.smartcheck || cfg.errorlog || cfg.xerrorlog
+ || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit )) {
+ CloseDevice(nvmedev, name);
+ return 3;
+ }
+
+ // Tell user we are registering device
+ PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n", name);
+
+ // Make sure that init_standby_check() ignores NVMe devices
+ cfg.offlinests_ns = cfg.selfteststs_ns = false;
+
+ CloseDevice(nvmedev, name);
+
+ if (!state_path_prefix.empty()) {
+ // Build file name for state file
+ std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
+ std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
+ nsstr[0] = 0;
+ if (nsid != 0xffffffff)
+ snprintf(nsstr, sizeof(nsstr), "-n%u", nsid);
+ cfg.state_file = strprintf("%s%s-%s%s.nvme.state", state_path_prefix.c_str(), model, serial, nsstr);
+ // Read previous state
+ if (read_dev_state(cfg.state_file.c_str(), state))
+ PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
+ }
+
+ finish_device_scan(cfg, state);
+
+ return 0;
+}
+
+// Open device for next check, return false on error
+static bool open_device(const dev_config & cfg, dev_state & state, smart_device * device,
+ const char * type)
+{
+ const char * name = cfg.name.c_str();
+
+ // If user has asked, test the email warning system
+ if (cfg.emailtest)
+ MailWarning(cfg, state, 0, "TEST EMAIL from smartd for device: %s", name);
+
+ // User may have requested (with the -n Directive) to leave the disk
+ // alone if it is in idle or standby mode. In this case check the
+ // power mode first before opening the device for full access,
+ // and exit without check if disk is reported in standby.
+ if (device->is_ata() && cfg.powermode && !state.powermodefail && !state.removed) {
+ // Note that 'is_powered_down()' handles opening the device itself, and
+ // can be used before calling 'open()' (that's the whole point of 'is_powered_down()'!).
+ if (device->is_powered_down())
+ {
+ // skip at most powerskipmax checks
+ if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
+ // report first only except if state has changed, avoid waking up system disk
+ if ((!state.powerskipcnt || state.lastpowermodeskipped != -1) && !cfg.powerquiet) {
+ PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, "STANDBY (OS)");
+ state.lastpowermodeskipped = -1;
+ }
+ state.powerskipcnt++;
+ return false;
+ }
+ }
+ }
+
+ // if we can't open device, fail gracefully rather than hard --
+ // perhaps the next time around we'll be able to open it
+ if (!device->open()) {
+ // For removable devices, print error message only once and suppress email
+ if (!cfg.removable) {
+ PrintOut(LOG_INFO, "Device: %s, open() of %s device failed: %s\n", name, type, device->get_errmsg());
+ MailWarning(cfg, state, 9, "Device: %s, unable to open %s device", name, type);
+ }
+ else if (!state.removed) {
+ PrintOut(LOG_INFO, "Device: %s, removed %s device: %s\n", name, type, device->get_errmsg());
+ state.removed = true;
+ }
+ else if (debugmode)
+ PrintOut(LOG_INFO, "Device: %s, %s device still removed: %s\n", name, type, device->get_errmsg());
+ return false;
+ }
+
+ if (debugmode)
+ PrintOut(LOG_INFO,"Device: %s, opened %s device\n", name, type);
+
+ if (!cfg.removable)
+ reset_warning_mail(cfg, state, 9, "open of %s device worked again", type);
+ else if (state.removed) {
+ PrintOut(LOG_INFO, "Device: %s, reconnected %s device\n", name, type);
+ state.removed = false;
+ }
+
+ return true;
+}
+
+// If the self-test log has got more self-test errors (or more recent
+// self-test errors) recorded, then notify user.
+static void CheckSelfTestLogs(const dev_config & cfg, dev_state & state, int newi)
+{
+ const char * name = cfg.name.c_str();
+
+ if (newi<0)
+ // command failed
+ MailWarning(cfg, state, 8, "Device: %s, Read SMART Self-Test Log Failed", name);
+ else {
+ reset_warning_mail(cfg, state, 8, "Read SMART Self-Test Log worked again");
+
+ // old and new error counts
+ int oldc=state.selflogcount;
+ int newc=SELFTEST_ERRORCOUNT(newi);
+
+ // old and new error timestamps in hours
+ int oldh=state.selfloghour;
+ int newh=SELFTEST_ERRORHOURS(newi);
+
+ if (oldc<newc) {
+ // increase in error count
+ PrintOut(LOG_CRIT, "Device: %s, Self-Test Log error count increased from %d to %d\n",
+ name, oldc, newc);
+ MailWarning(cfg, state, 3, "Device: %s, Self-Test Log error count increased from %d to %d",
+ name, oldc, newc);
+ state.must_write = true;
+ }
+ else if (newc > 0 && oldh != newh) {
+ // more recent error
+ // a 'more recent' error might actually be a smaller hour number,
+ // if the hour number has wrapped.
+ // There's still a bug here. You might just happen to run a new test
+ // exactly 32768 hours after the previous failure, and have run exactly
+ // 20 tests between the two, in which case smartd will miss the
+ // new failure.
+ PrintOut(LOG_CRIT, "Device: %s, new Self-Test Log error at hour timestamp %d\n",
+ name, newh);
+ MailWarning(cfg, state, 3, "Device: %s, new Self-Test Log error at hour timestamp %d",
+ name, newh);
+ state.must_write = true;
+ }
+
+ // Print info if error entries have disappeared
+ // or newer successful successful extended self-test exits
+ if (oldc > newc) {
+ PrintOut(LOG_INFO, "Device: %s, Self-Test Log error count decreased from %d to %d\n",
+ name, oldc, newc);
+ if (newc == 0)
+ reset_warning_mail(cfg, state, 3, "Self-Test Log does no longer report errors");
+ }
+
+ // Needed since self-test error count may DECREASE. Hour might
+ // also have changed.
+ state.selflogcount= newc;
+ state.selfloghour = newh;
+ }
+ return;
+}
+
+// Test types, ordered by priority.
+static const char test_type_chars[] = "LncrSCO";
+static const unsigned num_test_types = sizeof(test_type_chars)-1;
+
+// returns test type if time to do test of type testtype,
+// 0 if not time to do test.
+static char next_scheduled_test(const dev_config & cfg, dev_state & state, bool scsi, time_t usetime = 0)
+{
+ // check that self-testing has been requested
+ if (cfg.test_regex.empty())
+ return 0;
+
+ // Exit if drive not capable of any test
+ if ( state.not_cap_long && state.not_cap_short &&
+ (scsi || (state.not_cap_conveyance && state.not_cap_offline)))
+ return 0;
+
+ // since we are about to call localtime(), be sure glibc is informed
+ // of any timezone changes we make.
+ if (!usetime)
+ FixGlibcTimeZoneBug();
+
+ // Is it time for next check?
+ time_t now = (!usetime ? time(nullptr) : usetime);
+ if (now < state.scheduled_test_next_check) {
+ if (state.scheduled_test_next_check <= now + 3600)
+ return 0; // Next check within one hour
+ // More than one hour, assume system clock time adjusted to the past
+ state.scheduled_test_next_check = now;
+ }
+ else if (state.scheduled_test_next_check + (3600L*24*90) < now) {
+ // Limit time check interval to 90 days
+ state.scheduled_test_next_check = now - (3600L*24*90);
+ }
+
+ // Find ':NNN[-LLL]' in regex for possible offsets and limits
+ const unsigned max_offsets = 1 + num_test_types;
+ unsigned offsets[max_offsets] = {0, }, limits[max_offsets] = {0, };
+ unsigned num_offsets = 1; // offsets/limits[0] == 0 always
+ for (const char * p = cfg.test_regex.get_pattern(); num_offsets < max_offsets; ) {
+ const char * q = strchr(p, ':');
+ if (!q)
+ break;
+ p = q + 1;
+ unsigned offset = 0, limit = 0; int n1 = -1, n2 = -1, n3 = -1;
+ sscanf(p, "%u%n-%n%u%n", &offset, &n1, &n2, &limit, &n3);
+ if (!(n1 == 3 && (n2 < 0 || (n3 == 3+1+3 && limit > 0))))
+ continue;
+ offsets[num_offsets] = offset; limits[num_offsets] = limit;
+ num_offsets++;
+ p += (n3 > 0 ? n3 : n1);
+ }
+
+ // Check interval [state.scheduled_test_next_check, now] for scheduled tests
+ char testtype = 0;
+ time_t testtime = 0; int testhour = 0;
+ int maxtest = num_test_types-1;
+
+ for (time_t t = state.scheduled_test_next_check; ; ) {
+ // Check offset 0 and then all offsets for ':NNN' found above
+ for (unsigned i = 0; i < num_offsets; i++) {
+ unsigned offset = offsets[i], limit = limits[i];
+ unsigned delay = cfg.test_offset_factor * offset;
+ if (0 < limit && limit < delay)
+ delay %= limit + 1;
+ struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, t - (delay * 3600));
+
+ // tm_wday is 0 (Sunday) to 6 (Saturday). We use 1 (Monday) to 7 (Sunday).
+ int weekday = (tms->tm_wday ? tms->tm_wday : 7);
+ for (int j = 0; j <= maxtest; j++) {
+ // Skip if drive not capable of this test
+ switch (test_type_chars[j]) {
+ case 'L': if (state.not_cap_long) continue; break;
+ case 'S': if (state.not_cap_short) continue; break;
+ case 'C': if (scsi || state.not_cap_conveyance) continue; break;
+ case 'O': if (scsi || state.not_cap_offline) continue; break;
+ case 'c': case 'n':
+ case 'r': if (scsi || state.not_cap_selective) continue; break;
+ default: continue;
+ }
+ // Try match of "T/MM/DD/d/HH[:NNN]"
+ char pattern[64];
+ snprintf(pattern, sizeof(pattern), "%c/%02d/%02d/%1d/%02d",
+ test_type_chars[j], tms->tm_mon+1, tms->tm_mday, weekday, tms->tm_hour);
+ if (i > 0) {
+ const unsigned len = sizeof("S/01/01/1/01") - 1;
+ snprintf(pattern + len, sizeof(pattern) - len, ":%03u", offset);
+ if (limit > 0)
+ snprintf(pattern + len + 4, sizeof(pattern) - len - 4, "-%03u", limit);
+ }
+ if (cfg.test_regex.full_match(pattern)) {
+ // Test found
+ testtype = pattern[0];
+ testtime = t; testhour = tms->tm_hour;
+ // Limit further matches to higher priority self-tests
+ maxtest = j-1;
+ break;
+ }
+ }
+ }
+
+ // Exit if no tests left or current time reached
+ if (maxtest < 0)
+ break;
+ if (t >= now)
+ break;
+ // Check next hour
+ if ((t += 3600) > now)
+ t = now;
+ }
+
+ // Do next check not before next hour.
+ struct tm tmbuf, * tmnow = time_to_tm_local(&tmbuf, now);
+ state.scheduled_test_next_check = now + (3600 - tmnow->tm_min*60 - tmnow->tm_sec);
+
+ if (testtype) {
+ state.must_write = true;
+ // Tell user if an old test was found.
+ if (!usetime && !(testhour == tmnow->tm_hour && testtime + 3600 > now)) {
+ char datebuf[DATEANDEPOCHLEN]; dateandtimezoneepoch(datebuf, testtime);
+ PrintOut(LOG_INFO, "Device: %s, old test of type %c not run at %s, starting now.\n",
+ cfg.name.c_str(), testtype, datebuf);
+ }
+ }
+
+ return testtype;
+}
+
+// Print a list of future tests.
+static void PrintTestSchedule(const dev_config_vector & configs, dev_state_vector & states, const smart_device_list & devices)
+{
+ unsigned numdev = configs.size();
+ if (!numdev)
+ return;
+ std::vector<int> testcnts(numdev * num_test_types, 0);
+
+ PrintOut(LOG_INFO, "\nNext scheduled self tests (at most 5 of each type per device):\n");
+
+ // FixGlibcTimeZoneBug(); // done in PrintOut()
+ time_t now = time(nullptr);
+ char datenow[DATEANDEPOCHLEN], date[DATEANDEPOCHLEN];
+ dateandtimezoneepoch(datenow, now);
+
+ long seconds;
+ for (seconds=checktime; seconds<3600L*24*90; seconds+=checktime) {
+ // Check for each device whether a test will be run
+ time_t testtime = now + seconds;
+ for (unsigned i = 0; i < numdev; i++) {
+ const dev_config & cfg = configs.at(i);
+ dev_state & state = states.at(i);
+ const char * p;
+ char testtype = next_scheduled_test(cfg, state, devices.at(i)->is_scsi(), testtime);
+ if (testtype && (p = strchr(test_type_chars, testtype))) {
+ unsigned t = (p - test_type_chars);
+ // Report at most 5 tests of each type
+ if (++testcnts[i*num_test_types + t] <= 5) {
+ dateandtimezoneepoch(date, testtime);
+ PrintOut(LOG_INFO, "Device: %s, will do test %d of type %c at %s\n", cfg.name.c_str(),
+ testcnts[i*num_test_types + t], testtype, date);
+ }
+ }
+ }
+ }
+
+ // Report totals
+ dateandtimezoneepoch(date, now+seconds);
+ PrintOut(LOG_INFO, "\nTotals [%s - %s]:\n", datenow, date);
+ for (unsigned i = 0; i < numdev; i++) {
+ const dev_config & cfg = configs.at(i);
+ bool scsi = devices.at(i)->is_scsi();
+ for (unsigned t = 0; t < num_test_types; t++) {
+ int cnt = testcnts[i*num_test_types + t];
+ if (cnt == 0 && !strchr((scsi ? "LS" : "LSCO"), test_type_chars[t]))
+ continue;
+ PrintOut(LOG_INFO, "Device: %s, will do %3d test%s of type %c\n", cfg.name.c_str(),
+ cnt, (cnt==1?"":"s"), test_type_chars[t]);
+ }
+ }
+
+}
+
+// Return zero on success, nonzero on failure. Perform offline (background)
+// short or long (extended) self test on given scsi device.
+static int DoSCSISelfTest(const dev_config & cfg, dev_state & state, scsi_device * device, char testtype)
+{
+ int retval = 0;
+ const char *testname = nullptr;
+ const char *name = cfg.name.c_str();
+ int inProgress;
+
+ if (scsiSelfTestInProgress(device, &inProgress)) {
+ PrintOut(LOG_CRIT, "Device: %s, does not support Self-Tests\n", name);
+ state.not_cap_short = state.not_cap_long = true;
+ return 1;
+ }
+
+ if (1 == inProgress) {
+ PrintOut(LOG_INFO, "Device: %s, skip since Self-Test already in "
+ "progress.\n", name);
+ return 1;
+ }
+
+ switch (testtype) {
+ case 'S':
+ testname = "Short Self";
+ retval = scsiSmartShortSelfTest(device);
+ break;
+ case 'L':
+ testname = "Long Self";
+ retval = scsiSmartExtendSelfTest(device);
+ break;
+ }
+ // If we can't do the test, exit
+ if (!testname) {
+ PrintOut(LOG_CRIT, "Device: %s, not capable of %c Self-Test\n", name,
+ testtype);
+ return 1;
+ }
+ if (retval) {
+ if ((SIMPLE_ERR_BAD_OPCODE == retval) ||
+ (SIMPLE_ERR_BAD_FIELD == retval)) {
+ PrintOut(LOG_CRIT, "Device: %s, not capable of %s-Test\n", name,
+ testname);
+ if ('L'==testtype)
+ state.not_cap_long = true;
+ else
+ state.not_cap_short = true;
+
+ return 1;
+ }
+ PrintOut(LOG_CRIT, "Device: %s, execute %s-Test failed (err: %d)\n", name,
+ testname, retval);
+ return 1;
+ }
+
+ PrintOut(LOG_INFO, "Device: %s, starting scheduled %s-Test.\n", name, testname);
+
+ return 0;
+}
+
+// Do an offline immediate or self-test. Return zero on success,
+// nonzero on failure.
+static int DoATASelfTest(const dev_config & cfg, dev_state & state, ata_device * device, char testtype)
+{
+ const char *name = cfg.name.c_str();
+
+ // Read current smart data and check status/capability
+ struct ata_smart_values data;
+ if (ataReadSmartValues(device, &data) || !(data.offline_data_collection_capability)) {
+ PrintOut(LOG_CRIT, "Device: %s, not capable of Offline or Self-Testing.\n", name);
+ return 1;
+ }
+
+ // Check for capability to do the test
+ int dotest = -1, mode = 0;
+ const char *testname = nullptr;
+ switch (testtype) {
+ case 'O':
+ testname="Offline Immediate ";
+ if (isSupportExecuteOfflineImmediate(&data))
+ dotest=OFFLINE_FULL_SCAN;
+ else
+ state.not_cap_offline = true;
+ break;
+ case 'C':
+ testname="Conveyance Self-";
+ if (isSupportConveyanceSelfTest(&data))
+ dotest=CONVEYANCE_SELF_TEST;
+ else
+ state.not_cap_conveyance = true;
+ break;
+ case 'S':
+ testname="Short Self-";
+ if (isSupportSelfTest(&data))
+ dotest=SHORT_SELF_TEST;
+ else
+ state.not_cap_short = true;
+ break;
+ case 'L':
+ testname="Long Self-";
+ if (isSupportSelfTest(&data))
+ dotest=EXTEND_SELF_TEST;
+ else
+ state.not_cap_long = true;
+ break;
+
+ case 'c': case 'n': case 'r':
+ testname = "Selective Self-";
+ if (isSupportSelectiveSelfTest(&data)) {
+ dotest = SELECTIVE_SELF_TEST;
+ switch (testtype) {
+ case 'c': mode = SEL_CONT; break;
+ case 'n': mode = SEL_NEXT; break;
+ case 'r': mode = SEL_REDO; break;
+ }
+ }
+ else
+ state.not_cap_selective = true;
+ break;
+ }
+
+ // If we can't do the test, exit
+ if (dotest<0) {
+ PrintOut(LOG_CRIT, "Device: %s, not capable of %sTest\n", name, testname);
+ return 1;
+ }
+
+ // If currently running a self-test, do not interrupt it to start another.
+ if (15==(data.self_test_exec_status >> 4)) {
+ if (cfg.firmwarebugs.is_set(BUG_SAMSUNG3) && data.self_test_exec_status == 0xf0) {
+ PrintOut(LOG_INFO, "Device: %s, will not skip scheduled %sTest "
+ "despite unclear Self-Test byte (SAMSUNG Firmware bug).\n", name, testname);
+ } else {
+ PrintOut(LOG_INFO, "Device: %s, skip scheduled %sTest; %1d0%% remaining of current Self-Test.\n",
+ name, testname, (int)(data.self_test_exec_status & 0x0f));
+ return 1;
+ }
+ }
+
+ if (dotest == SELECTIVE_SELF_TEST) {
+ // Set test span
+ ata_selective_selftest_args selargs, prev_args;
+ selargs.num_spans = 1;
+ selargs.span[0].mode = mode;
+ prev_args.num_spans = 1;
+ prev_args.span[0].start = state.selective_test_last_start;
+ prev_args.span[0].end = state.selective_test_last_end;
+ if (ataWriteSelectiveSelfTestLog(device, selargs, &data, state.num_sectors, &prev_args)) {
+ PrintOut(LOG_CRIT, "Device: %s, prepare %sTest failed\n", name, testname);
+ return 1;
+ }
+ uint64_t start = selargs.span[0].start, end = selargs.span[0].end;
+ PrintOut(LOG_INFO, "Device: %s, %s test span at LBA %" PRIu64 " - %" PRIu64 " (%" PRIu64 " sectors, %u%% - %u%% of disk).\n",
+ name, (selargs.span[0].mode == SEL_NEXT ? "next" : "redo"),
+ start, end, end - start + 1,
+ (unsigned)((100 * start + state.num_sectors/2) / state.num_sectors),
+ (unsigned)((100 * end + state.num_sectors/2) / state.num_sectors));
+ state.selective_test_last_start = start;
+ state.selective_test_last_end = end;
+ }
+
+ // execute the test, and return status
+ int retval = smartcommandhandler(device, IMMEDIATE_OFFLINE, dotest, nullptr);
+ if (retval) {
+ PrintOut(LOG_CRIT, "Device: %s, execute %sTest failed.\n", name, testname);
+ return retval;
+ }
+
+ // Report recent test start to do_disable_standby_check()
+ // and force log of next test status
+ if (testtype == 'O')
+ state.offline_started = true;
+ else
+ state.selftest_started = true;
+
+ PrintOut(LOG_INFO, "Device: %s, starting scheduled %sTest.\n", name, testname);
+ return 0;
+}
+
+// Check pending sector count attribute values (-C, -U directives).
+static void check_pending(const dev_config & cfg, dev_state & state,
+ unsigned char id, bool increase_only,
+ const ata_smart_values & smartval,
+ int mailtype, const char * msg)
+{
+ // Find attribute index
+ int i = ata_find_attr_index(id, smartval);
+ if (!(i >= 0 && ata_find_attr_index(id, state.smartval) == i))
+ return;
+
+ // No report if no sectors pending.
+ uint64_t rawval = ata_get_attr_raw_value(smartval.vendor_attributes[i], cfg.attribute_defs);
+ if (rawval == 0) {
+ reset_warning_mail(cfg, state, mailtype, "No more %s", msg);
+ return;
+ }
+
+ // If attribute is not reset, report only sector count increases.
+ uint64_t prev_rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i], cfg.attribute_defs);
+ if (!(!increase_only || prev_rawval < rawval))
+ return;
+
+ // Format message.
+ std::string s = strprintf("Device: %s, %" PRId64 " %s", cfg.name.c_str(), rawval, msg);
+ if (prev_rawval > 0 && rawval != prev_rawval)
+ s += strprintf(" (changed %+" PRId64 ")", rawval - prev_rawval);
+
+ PrintOut(LOG_CRIT, "%s\n", s.c_str());
+ MailWarning(cfg, state, mailtype, "%s", s.c_str());
+ state.must_write = true;
+}
+
+// Format Temperature value
+static const char * fmt_temp(unsigned char x, char (& buf)[20])
+{
+ if (!x) // unset
+ return "??";
+ snprintf(buf, sizeof(buf), "%u", x);
+ return buf;
+}
+
+// Check Temperature limits
+static void CheckTemperature(const dev_config & cfg, dev_state & state, unsigned char currtemp, unsigned char triptemp)
+{
+ if (!(0 < currtemp && currtemp < 255)) {
+ PrintOut(LOG_INFO, "Device: %s, failed to read Temperature\n", cfg.name.c_str());
+ return;
+ }
+
+ // Update Max Temperature
+ const char * minchg = "", * maxchg = "";
+ if (currtemp > state.tempmax) {
+ if (state.tempmax)
+ maxchg = "!";
+ state.tempmax = currtemp;
+ state.must_write = true;
+ }
+
+ char buf[20];
+ if (!state.temperature) {
+ // First check
+ if (!state.tempmin || currtemp < state.tempmin)
+ // Delay Min Temperature update by ~ 30 minutes.
+ state.tempmin_delay = time(nullptr) + default_checktime - 60;
+ PrintOut(LOG_INFO, "Device: %s, initial Temperature is %d Celsius (Min/Max %s/%u%s)\n",
+ cfg.name.c_str(), (int)currtemp, fmt_temp(state.tempmin, buf), state.tempmax, maxchg);
+ if (triptemp)
+ PrintOut(LOG_INFO, " [trip Temperature is %d Celsius]\n", (int)triptemp);
+ state.temperature = currtemp;
+ }
+ else {
+ if (state.tempmin_delay) {
+ // End Min Temperature update delay if ...
+ if ( (state.tempmin && currtemp > state.tempmin) // current temp exceeds recorded min,
+ || (state.tempmin_delay <= time(nullptr))) { // or delay time is over.
+ state.tempmin_delay = 0;
+ if (!state.tempmin)
+ state.tempmin = 255;
+ }
+ }
+
+ // Update Min Temperature
+ if (!state.tempmin_delay && currtemp < state.tempmin) {
+ state.tempmin = currtemp;
+ state.must_write = true;
+ if (currtemp != state.temperature)
+ minchg = "!";
+ }
+
+ // Track changes
+ if (cfg.tempdiff && (*minchg || *maxchg || abs((int)currtemp - (int)state.temperature) >= cfg.tempdiff)) {
+ PrintOut(LOG_INFO, "Device: %s, Temperature changed %+d Celsius to %u Celsius (Min/Max %s%s/%u%s)\n",
+ cfg.name.c_str(), (int)currtemp-(int)state.temperature, currtemp, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
+ state.temperature = currtemp;
+ }
+ }
+
+ // Check limits
+ if (cfg.tempcrit && currtemp >= cfg.tempcrit) {
+ PrintOut(LOG_CRIT, "Device: %s, Temperature %u Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)\n",
+ cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
+ MailWarning(cfg, state, 12, "Device: %s, Temperature %d Celsius reached critical limit of %u Celsius (Min/Max %s%s/%u%s)",
+ cfg.name.c_str(), currtemp, cfg.tempcrit, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
+ }
+ else if (cfg.tempinfo && currtemp >= cfg.tempinfo) {
+ PrintOut(LOG_INFO, "Device: %s, Temperature %u Celsius reached limit of %u Celsius (Min/Max %s%s/%u%s)\n",
+ cfg.name.c_str(), currtemp, cfg.tempinfo, fmt_temp(state.tempmin, buf), minchg, state.tempmax, maxchg);
+ }
+ else if (cfg.tempcrit) {
+ unsigned char limit = (cfg.tempinfo ? cfg.tempinfo : cfg.tempcrit-5);
+ if (currtemp < limit)
+ reset_warning_mail(cfg, state, 12, "Temperature %u Celsius dropped below %u Celsius", currtemp, limit);
+ }
+}
+
+// Check normalized and raw attribute values.
+static void check_attribute(const dev_config & cfg, dev_state & state,
+ const ata_smart_attribute & attr,
+ const ata_smart_attribute & prev,
+ int attridx,
+ const ata_smart_threshold_entry * thresholds)
+{
+ // Check attribute and threshold
+ ata_attr_state attrstate = ata_get_attr_state(attr, attridx, thresholds, cfg.attribute_defs);
+ if (attrstate == ATTRSTATE_NON_EXISTING)
+ return;
+
+ // If requested, check for usage attributes that have failed.
+ if ( cfg.usagefailed && attrstate == ATTRSTATE_FAILED_NOW
+ && !cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGN_FAILUSE)) {
+ std::string attrname = ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm);
+ PrintOut(LOG_CRIT, "Device: %s, Failed SMART usage Attribute: %d %s.\n", cfg.name.c_str(), attr.id, attrname.c_str());
+ MailWarning(cfg, state, 2, "Device: %s, Failed SMART usage Attribute: %d %s.", cfg.name.c_str(), attr.id, attrname.c_str());
+ state.must_write = true;
+ }
+
+ // Return if we're not tracking this type of attribute
+ bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(attr.flags);
+ if (!( ( prefail && cfg.prefail)
+ || (!prefail && cfg.usage )))
+ return;
+
+ // Return if '-I ID' was specified
+ if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_IGNORE))
+ return;
+
+ // Issue warning if they don't have the same ID in all structures.
+ if (attr.id != prev.id) {
+ PrintOut(LOG_INFO,"Device: %s, same Attribute has different ID numbers: %d = %d\n",
+ cfg.name.c_str(), attr.id, prev.id);
+ return;
+ }
+
+ // Compare normalized values if valid.
+ bool valchanged = false;
+ if (attrstate > ATTRSTATE_NO_NORMVAL) {
+ if (attr.current != prev.current)
+ valchanged = true;
+ }
+
+ // Compare raw values if requested.
+ bool rawchanged = false;
+ if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW)) {
+ if ( ata_get_attr_raw_value(attr, cfg.attribute_defs)
+ != ata_get_attr_raw_value(prev, cfg.attribute_defs))
+ rawchanged = true;
+ }
+
+ // Return if no change
+ if (!(valchanged || rawchanged))
+ return;
+
+ // Format value strings
+ std::string currstr, prevstr;
+ if (attrstate == ATTRSTATE_NO_NORMVAL) {
+ // Print raw values only
+ currstr = strprintf("%s (Raw)",
+ ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
+ prevstr = strprintf("%s (Raw)",
+ ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
+ }
+ else if (cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_PRINT)) {
+ // Print normalized and raw values
+ currstr = strprintf("%d [Raw %s]", attr.current,
+ ata_format_attr_raw_value(attr, cfg.attribute_defs).c_str());
+ prevstr = strprintf("%d [Raw %s]", prev.current,
+ ata_format_attr_raw_value(prev, cfg.attribute_defs).c_str());
+ }
+ else {
+ // Print normalized values only
+ currstr = strprintf("%d", attr.current);
+ prevstr = strprintf("%d", prev.current);
+ }
+
+ // Format message
+ std::string msg = strprintf("Device: %s, SMART %s Attribute: %d %s changed from %s to %s",
+ cfg.name.c_str(), (prefail ? "Prefailure" : "Usage"), attr.id,
+ ata_get_smart_attr_name(attr.id, cfg.attribute_defs, cfg.dev_rpm).c_str(),
+ prevstr.c_str(), currstr.c_str());
+
+ // Report this change as critical ?
+ if ( (valchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_AS_CRIT))
+ || (rawchanged && cfg.monitor_attr_flags.is_set(attr.id, MONITOR_RAW_AS_CRIT))) {
+ PrintOut(LOG_CRIT, "%s\n", msg.c_str());
+ MailWarning(cfg, state, 2, "%s", msg.c_str());
+ }
+ else {
+ PrintOut(LOG_INFO, "%s\n", msg.c_str());
+ }
+ state.must_write = true;
+}
+
+
+static int ATACheckDevice(const dev_config & cfg, dev_state & state, ata_device * atadev,
+ bool firstpass, bool allow_selftests)
+{
+ if (!open_device(cfg, state, atadev, "ATA"))
+ return 1;
+
+ const char * name = cfg.name.c_str();
+
+ // user may have requested (with the -n Directive) to leave the disk
+ // alone if it is in idle or sleeping mode. In this case check the
+ // power mode and exit without check if needed
+ if (cfg.powermode && !state.powermodefail) {
+ int dontcheck=0, powermode=ataCheckPowerMode(atadev);
+ const char * mode = 0;
+ if (0 <= powermode && powermode < 0xff) {
+ // wait for possible spin up and check again
+ int powermode2;
+ sleep(5);
+ powermode2 = ataCheckPowerMode(atadev);
+ if (powermode2 > powermode)
+ PrintOut(LOG_INFO, "Device: %s, CHECK POWER STATUS spins up disk (0x%02x -> 0x%02x)\n", name, powermode, powermode2);
+ powermode = powermode2;
+ }
+
+ switch (powermode){
+ case -1:
+ // SLEEP
+ mode="SLEEP";
+ if (cfg.powermode>=1)
+ dontcheck=1;
+ break;
+ case 0x00:
+ // STANDBY
+ mode="STANDBY";
+ if (cfg.powermode>=2)
+ dontcheck=1;
+ break;
+ case 0x01:
+ // STANDBY_Y
+ mode="STANDBY_Y";
+ if (cfg.powermode>=2)
+ dontcheck=1;
+ break;
+ case 0x80:
+ // IDLE
+ mode="IDLE";
+ if (cfg.powermode>=3)
+ dontcheck=1;
+ break;
+ case 0x81:
+ // IDLE_A
+ mode="IDLE_A";
+ if (cfg.powermode>=3)
+ dontcheck=1;
+ break;
+ case 0x82:
+ // IDLE_B
+ mode="IDLE_B";
+ if (cfg.powermode>=3)
+ dontcheck=1;
+ break;
+ case 0x83:
+ // IDLE_C
+ mode="IDLE_C";
+ if (cfg.powermode>=3)
+ dontcheck=1;
+ break;
+ case 0xff:
+ // ACTIVE/IDLE
+ case 0x40:
+ // ACTIVE
+ case 0x41:
+ // ACTIVE
+ mode="ACTIVE or IDLE";
+ break;
+ default:
+ // UNKNOWN
+ PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
+ name, powermode);
+ state.powermodefail = true;
+ break;
+ }
+
+ // if we are going to skip a check, return now
+ if (dontcheck){
+ // skip at most powerskipmax checks
+ if (!cfg.powerskipmax || state.powerskipcnt<cfg.powerskipmax) {
+ CloseDevice(atadev, name);
+ // report first only except if state has changed, avoid waking up system disk
+ if ((!state.powerskipcnt || state.lastpowermodeskipped != powermode) && !cfg.powerquiet) {
+ PrintOut(LOG_INFO, "Device: %s, is in %s mode, suspending checks\n", name, mode);
+ state.lastpowermodeskipped = powermode;
+ }
+ state.powerskipcnt++;
+ return 0;
+ }
+ else {
+ PrintOut(LOG_INFO, "Device: %s, %s mode ignored due to reached limit of skipped checks (%d check%s skipped)\n",
+ name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
+ }
+ state.powerskipcnt = 0;
+ state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
+ }
+ else if (state.powerskipcnt) {
+ PrintOut(LOG_INFO, "Device: %s, is back in %s mode, resuming checks (%d check%s skipped)\n",
+ name, mode, state.powerskipcnt, (state.powerskipcnt==1?"":"s"));
+ state.powerskipcnt = 0;
+ state.tempmin_delay = time(nullptr) + default_checktime - 60; // Delay Min Temperature update
+ }
+ }
+
+ // check smart status
+ if (cfg.smartcheck) {
+ int status=ataSmartStatus2(atadev);
+ if (status==-1){
+ PrintOut(LOG_INFO,"Device: %s, not capable of SMART self-check\n",name);
+ MailWarning(cfg, state, 5, "Device: %s, not capable of SMART self-check", name);
+ state.must_write = true;
+ }
+ else if (status==1){
+ PrintOut(LOG_CRIT, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!\n", name);
+ MailWarning(cfg, state, 1, "Device: %s, FAILED SMART self-check. BACK UP DATA NOW!", name);
+ state.must_write = true;
+ }
+ }
+
+ // Check everything that depends upon SMART Data (eg, Attribute values)
+ if ( cfg.usagefailed || cfg.prefail || cfg.usage
+ || cfg.curr_pending_id || cfg.offl_pending_id
+ || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
+ || cfg.selftest || cfg.offlinests || cfg.selfteststs) {
+
+ // Read current attribute values.
+ ata_smart_values curval;
+ if (ataReadSmartValues(atadev, &curval)){
+ PrintOut(LOG_CRIT, "Device: %s, failed to read SMART Attribute Data\n", name);
+ MailWarning(cfg, state, 6, "Device: %s, failed to read SMART Attribute Data", name);
+ state.must_write = true;
+ }
+ else {
+ reset_warning_mail(cfg, state, 6, "read SMART Attribute Data worked again");
+
+ // look for current or offline pending sectors
+ if (cfg.curr_pending_id)
+ check_pending(cfg, state, cfg.curr_pending_id, cfg.curr_pending_incr, curval, 10,
+ (!cfg.curr_pending_incr ? "Currently unreadable (pending) sectors"
+ : "Total unreadable (pending) sectors" ));
+
+ if (cfg.offl_pending_id)
+ check_pending(cfg, state, cfg.offl_pending_id, cfg.offl_pending_incr, curval, 11,
+ (!cfg.offl_pending_incr ? "Offline uncorrectable sectors"
+ : "Total offline uncorrectable sectors"));
+
+ // check temperature limits
+ if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
+ CheckTemperature(cfg, state, ata_return_temperature_value(&curval, cfg.attribute_defs), 0);
+
+ // look for failed usage attributes, or track usage or prefail attributes
+ if (cfg.usagefailed || cfg.prefail || cfg.usage) {
+ for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
+ check_attribute(cfg, state,
+ curval.vendor_attributes[i],
+ state.smartval.vendor_attributes[i],
+ i, state.smartthres.thres_entries);
+ }
+ }
+
+ // Log changes of offline data collection status
+ if (cfg.offlinests) {
+ if ( curval.offline_data_collection_status
+ != state.smartval.offline_data_collection_status
+ || state.offline_started // test was started in previous call
+ || (firstpass && (debugmode || (curval.offline_data_collection_status & 0x7d))))
+ log_offline_data_coll_status(name, curval.offline_data_collection_status);
+ }
+
+ // Log changes of self-test execution status
+ if (cfg.selfteststs) {
+ if ( curval.self_test_exec_status != state.smartval.self_test_exec_status
+ || state.selftest_started // test was started in previous call
+ || (firstpass && (debugmode || (curval.self_test_exec_status & 0xf0))))
+ log_self_test_exec_status(name, curval.self_test_exec_status);
+ }
+
+ // Save the new values for the next time around
+ state.smartval = curval;
+ state.update_persistent_state();
+ state.attrlog_dirty = true;
+ }
+ }
+ state.offline_started = state.selftest_started = false;
+
+ // check if number of selftest errors has increased (note: may also DECREASE)
+ if (cfg.selftest)
+ CheckSelfTestLogs(cfg, state, SelfTestErrorCount(atadev, name, cfg.firmwarebugs));
+
+ // check if number of ATA errors has increased
+ if (cfg.errorlog || cfg.xerrorlog) {
+
+ int errcnt1 = -1, errcnt2 = -1;
+ if (cfg.errorlog)
+ errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false);
+ if (cfg.xerrorlog)
+ errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true);
+
+ // new number of errors is max of both logs
+ int newc = (errcnt1 >= errcnt2 ? errcnt1 : errcnt2);
+
+ // did command fail?
+ if (newc<0)
+ // lack of PrintOut here is INTENTIONAL
+ MailWarning(cfg, state, 7, "Device: %s, Read SMART Error Log Failed", name);
+
+ // has error count increased?
+ int oldc = state.ataerrorcount;
+ if (newc>oldc){
+ PrintOut(LOG_CRIT, "Device: %s, ATA error count increased from %d to %d\n",
+ name, oldc, newc);
+ MailWarning(cfg, state, 4, "Device: %s, ATA error count increased from %d to %d",
+ name, oldc, newc);
+ state.must_write = true;
+ }
+
+ if (newc>=0)
+ state.ataerrorcount=newc;
+ }
+
+ // if the user has asked, and device is capable (or we're not yet
+ // sure) check whether a self test should be done now.
+ if (allow_selftests && !cfg.test_regex.empty()) {
+ char testtype = next_scheduled_test(cfg, state, false/*!scsi*/);
+ if (testtype)
+ DoATASelfTest(cfg, state, atadev, testtype);
+ }
+
+ // Don't leave device open -- the OS/user may want to access it
+ // before the next smartd cycle!
+ CloseDevice(atadev, name);
+ return 0;
+}
+
+static int SCSICheckDevice(const dev_config & cfg, dev_state & state, scsi_device * scsidev, bool allow_selftests)
+{
+ if (!open_device(cfg, state, scsidev, "SCSI"))
+ return 1;
+
+ const char * name = cfg.name.c_str();
+
+ uint8_t asc = 0, ascq = 0;
+ uint8_t currenttemp = 0, triptemp = 0;
+ if (!state.SuppressReport) {
+ if (scsiCheckIE(scsidev, state.SmartPageSupported, state.TempPageSupported,
+ &asc, &ascq, &currenttemp, &triptemp)) {
+ PrintOut(LOG_INFO, "Device: %s, failed to read SMART values\n",
+ name);
+ MailWarning(cfg, state, 6, "Device: %s, failed to read SMART values", name);
+ state.SuppressReport = 1;
+ }
+ }
+ if (asc > 0) {
+ char b[128];
+ const char * cp = scsiGetIEString(asc, ascq, b, sizeof(b));
+
+ if (cp) {
+ PrintOut(LOG_CRIT, "Device: %s, SMART Failure: %s\n", name, cp);
+ MailWarning(cfg, state, 1,"Device: %s, SMART Failure: %s", name, cp);
+ } else if (asc == 4 && ascq == 9) {
+ PrintOut(LOG_INFO,"Device: %s, self-test in progress\n", name);
+ } else if (debugmode)
+ PrintOut(LOG_INFO,"Device: %s, non-SMART asc,ascq: %d,%d\n",
+ name, (int)asc, (int)ascq);
+ } else if (debugmode)
+ PrintOut(LOG_INFO,"Device: %s, SMART health: passed\n", name);
+
+ // check temperature limits
+ if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
+ CheckTemperature(cfg, state, currenttemp, triptemp);
+
+ // check if number of selftest errors has increased (note: may also DECREASE)
+ if (cfg.selftest)
+ CheckSelfTestLogs(cfg, state, scsiCountFailedSelfTests(scsidev, 0));
+
+ if (allow_selftests && !cfg.test_regex.empty()) {
+ char testtype = next_scheduled_test(cfg, state, true/*scsi*/);
+ if (testtype)
+ DoSCSISelfTest(cfg, state, scsidev, testtype);
+ }
+ if (!cfg.attrlog_file.empty()){
+ // saving error counters to state
+ uint8_t tBuf[252];
+ if (state.ReadECounterPageSupported && (0 == scsiLogSense(scsidev,
+ READ_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
+ scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[0].errCounter,
+ scsiLogRespLen);
+ state.scsi_error_counters[0].found=1;
+ }
+ if (state.WriteECounterPageSupported && (0 == scsiLogSense(scsidev,
+ WRITE_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
+ scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[1].errCounter,
+ scsiLogRespLen);
+ state.scsi_error_counters[1].found=1;
+ }
+ if (state.VerifyECounterPageSupported && (0 == scsiLogSense(scsidev,
+ VERIFY_ERROR_COUNTER_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
+ scsiDecodeErrCounterPage(tBuf, &state.scsi_error_counters[2].errCounter,
+ scsiLogRespLen);
+ state.scsi_error_counters[2].found=1;
+ }
+ if (state.NonMediumErrorPageSupported && (0 == scsiLogSense(scsidev,
+ NON_MEDIUM_ERROR_LPAGE, 0, tBuf, sizeof(tBuf), 0))) {
+ scsiDecodeNonMediumErrPage(tBuf, &state.scsi_nonmedium_error.nme,
+ scsiLogRespLen);
+ state.scsi_nonmedium_error.found=1;
+ }
+ // store temperature if not done by CheckTemperature() above
+ if (!(cfg.tempdiff || cfg.tempinfo || cfg.tempcrit))
+ state.temperature = currenttemp;
+ }
+ CloseDevice(scsidev, name);
+ state.attrlog_dirty = true;
+ return 0;
+}
+
+static int NVMeCheckDevice(const dev_config & cfg, dev_state & state, nvme_device * nvmedev)
+{
+ if (!open_device(cfg, state, nvmedev, "NVMe"))
+ return 1;
+
+ const char * name = cfg.name.c_str();
+
+ // Read SMART/Health log
+ nvme_smart_log smart_log;
+ if (!nvme_read_smart_log(nvmedev, smart_log)) {
+ CloseDevice(nvmedev, name);
+ PrintOut(LOG_INFO, "Device: %s, failed to read NVMe SMART/Health Information\n", name);
+ MailWarning(cfg, state, 6, "Device: %s, failed to read NVMe SMART/Health Information", name);
+ state.must_write = true;
+ return 0;
+ }
+
+ // Check Critical Warning bits
+ if (cfg.smartcheck && smart_log.critical_warning) {
+ unsigned char w = smart_log.critical_warning;
+ std::string msg;
+ static const char * const wnames[] =
+ {"LowSpare", "Temperature", "Reliability", "R/O", "VolMemBackup"};
+
+ for (unsigned b = 0, cnt = 0; b < 8 ; b++) {
+ if (!(w & (1 << b)))
+ continue;
+ if (cnt)
+ msg += ", ";
+ if (++cnt > 3) {
+ msg += "..."; break;
+ }
+ if (b >= sizeof(wnames)/sizeof(wnames[0])) {
+ msg += "*Unknown*"; break;
+ }
+ msg += wnames[b];
+ }
+
+ PrintOut(LOG_CRIT, "Device: %s, Critical Warning (0x%02x): %s\n", name, w, msg.c_str());
+ MailWarning(cfg, state, 1, "Device: %s, Critical Warning (0x%02x): %s", name, w, msg.c_str());
+ state.must_write = true;
+ }
+
+ // Check temperature limits
+ if (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit) {
+ int k = nvme_get_max_temp_kelvin(smart_log);
+ // Convert Kelvin to positive Celsius (TODO: Allow negative temperatures)
+ int c = k - 273;
+ if (c < 1)
+ c = 1;
+ else if (c > 0xff)
+ c = 0xff;
+ CheckTemperature(cfg, state, c, 0);
+ }
+
+ // Check if number of errors has increased
+ if (cfg.errorlog || cfg.xerrorlog) {
+ uint64_t newcnt = le128_to_uint64(smart_log.num_err_log_entries);
+ if (newcnt > state.nvme_err_log_entries) {
+ // Warn only if device related errors are found
+ check_nvme_error_log(cfg, state, nvmedev, newcnt);
+ }
+ // else // TODO: Handle decrease of count?
+ }
+
+ CloseDevice(nvmedev, name);
+ state.attrlog_dirty = true;
+ return 0;
+}
+
+// 0=not used, 1=not disabled, 2=disable rejected by OS, 3=disabled
+static int standby_disable_state = 0;
+
+static void init_disable_standby_check(const dev_config_vector & configs)
+{
+ // Check for '-l offlinests,ns' or '-l selfteststs,ns' directives
+ bool sts1 = false, sts2 = false;
+ for (const auto & cfg : configs) {
+ if (cfg.offlinests_ns)
+ sts1 = true;
+ if (cfg.selfteststs_ns)
+ sts2 = true;
+ }
+
+ // Check for support of disable auto standby
+ // Reenable standby if smartd.conf was reread
+ if (sts1 || sts2 || standby_disable_state == 3) {
+ if (!smi()->disable_system_auto_standby(false)) {
+ if (standby_disable_state == 3)
+ PrintOut(LOG_CRIT, "System auto standby enable failed: %s\n", smi()->get_errmsg());
+ if (sts1 || sts2) {
+ PrintOut(LOG_INFO, "Disable auto standby not supported, ignoring ',ns' from %s%s%s\n",
+ (sts1 ? "-l offlinests,ns" : ""), (sts1 && sts2 ? " and " : ""), (sts2 ? "-l selfteststs,ns" : ""));
+ sts1 = sts2 = false;
+ }
+ }
+ }
+
+ standby_disable_state = (sts1 || sts2 ? 1 : 0);
+}
+
+static void do_disable_standby_check(const dev_config_vector & configs, const dev_state_vector & states)
+{
+ if (!standby_disable_state)
+ return;
+
+ // Check for just started or still running self-tests
+ bool running = false;
+ for (unsigned i = 0; i < configs.size() && !running; i++) {
+ const dev_config & cfg = configs.at(i); const dev_state & state = states.at(i);
+
+ if ( ( cfg.offlinests_ns
+ && (state.offline_started ||
+ is_offl_coll_in_progress(state.smartval.offline_data_collection_status)))
+ || ( cfg.selfteststs_ns
+ && (state.selftest_started ||
+ is_self_test_in_progress(state.smartval.self_test_exec_status))) )
+ running = true;
+ // state.offline/selftest_started will be reset after next logging of test status
+ }
+
+ // Disable/enable auto standby and log state changes
+ if (!running) {
+ if (standby_disable_state != 1) {
+ if (!smi()->disable_system_auto_standby(false))
+ PrintOut(LOG_CRIT, "Self-test(s) completed, system auto standby enable failed: %s\n",
+ smi()->get_errmsg());
+ else
+ PrintOut(LOG_INFO, "Self-test(s) completed, system auto standby enabled\n");
+ standby_disable_state = 1;
+ }
+ }
+ else if (!smi()->disable_system_auto_standby(true)) {
+ if (standby_disable_state != 2) {
+ PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disable rejected: %s\n",
+ smi()->get_errmsg());
+ standby_disable_state = 2;
+ }
+ }
+ else {
+ if (standby_disable_state != 3) {
+ PrintOut(LOG_INFO, "Self-test(s) in progress, system auto standby disabled\n");
+ standby_disable_state = 3;
+ }
+ }
+}
+
+// Checks the SMART status of all ATA and SCSI devices
+static void CheckDevicesOnce(const dev_config_vector & configs, dev_state_vector & states,
+ smart_device_list & devices, bool firstpass, bool allow_selftests)
+{
+ for (unsigned i = 0; i < configs.size(); i++) {
+ const dev_config & cfg = configs.at(i);
+ dev_state & state = states.at(i);
+ if (state.skip) {
+ if (debugmode)
+ PrintOut(LOG_INFO, "Device: %s, skipped (interval=%d)\n", cfg.name.c_str(),
+ (cfg.checktime ? cfg.checktime : checktime));
+ continue;
+ }
+
+ smart_device * dev = devices.at(i);
+ if (dev->is_ata())
+ ATACheckDevice(cfg, state, dev->to_ata(), firstpass, allow_selftests);
+ else if (dev->is_scsi())
+ SCSICheckDevice(cfg, state, dev->to_scsi(), allow_selftests);
+ else if (dev->is_nvme())
+ NVMeCheckDevice(cfg, state, dev->to_nvme());
+
+ // Prevent systemd unit startup timeout when checking many devices on startup
+ notify_extend_timeout();
+ }
+
+ do_disable_standby_check(configs, states);
+}
+
+// Install all signal handlers
+static void install_signal_handlers()
+{
+ // normal and abnormal exit
+ set_signal_if_not_ignored(SIGTERM, sighandler);
+ set_signal_if_not_ignored(SIGQUIT, sighandler);
+
+ // in debug mode, <CONTROL-C> ==> HUP
+ set_signal_if_not_ignored(SIGINT, (debugmode ? HUPhandler : sighandler));
+
+ // Catch HUP and USR1
+ set_signal_if_not_ignored(SIGHUP, HUPhandler);
+ set_signal_if_not_ignored(SIGUSR1, USR1handler);
+#ifdef _WIN32
+ set_signal_if_not_ignored(SIGUSR2, USR2handler);
+#endif
+}
+
+#ifdef _WIN32
+// Toggle debug mode implemented for native windows only
+// (there is no easy way to reopen tty on *nix)
+static void ToggleDebugMode()
+{
+ if (!debugmode) {
+ PrintOut(LOG_INFO,"Signal USR2 - enabling debug mode\n");
+ if (!daemon_enable_console("smartd [Debug]")) {
+ debugmode = 1;
+ daemon_signal(SIGINT, HUPhandler);
+ PrintOut(LOG_INFO,"smartd debug mode enabled, PID=%d\n", getpid());
+ }
+ else
+ PrintOut(LOG_INFO,"enable console failed\n");
+ }
+ else if (debugmode == 1) {
+ daemon_disable_console();
+ debugmode = 0;
+ daemon_signal(SIGINT, sighandler);
+ PrintOut(LOG_INFO,"Signal USR2 - debug mode disabled\n");
+ }
+ else
+ PrintOut(LOG_INFO,"Signal USR2 - debug mode %d not changed\n", debugmode);
+}
+#endif
+
+time_t calc_next_wakeuptime(time_t wakeuptime, time_t timenow, int ct)
+{
+ if (timenow < wakeuptime)
+ return wakeuptime;
+ return timenow + ct - (timenow - wakeuptime) % ct;
+}
+
+static time_t dosleep(time_t wakeuptime, const dev_config_vector & configs,
+ dev_state_vector & states, bool & sigwakeup)
+{
+ // If past wake-up-time, compute next wake-up-time
+ time_t timenow = time(nullptr);
+ unsigned n = configs.size();
+ int ct;
+ if (!checktime_min) {
+ // Same for all devices
+ wakeuptime = calc_next_wakeuptime(wakeuptime, timenow, checktime);
+ ct = checktime;
+ }
+ else {
+ // Determine wakeuptime of next device(s)
+ wakeuptime = 0;
+ for (unsigned i = 0; i < n; i++) {
+ const dev_config & cfg = configs.at(i);
+ dev_state & state = states.at(i);
+ if (!state.skip)
+ state.wakeuptime = calc_next_wakeuptime((state.wakeuptime ? state.wakeuptime : timenow),
+ timenow, (cfg.checktime ? cfg.checktime : checktime));
+ if (!wakeuptime || state.wakeuptime < wakeuptime)
+ wakeuptime = state.wakeuptime;
+ }
+ ct = checktime_min;
+ }
+
+ notify_wait(wakeuptime, n);
+
+ // Sleep until we catch a signal or have completed sleeping
+ bool no_skip = false;
+ int addtime = 0;
+ while (timenow < wakeuptime+addtime && !caughtsigUSR1 && !caughtsigHUP && !caughtsigEXIT) {
+ // Restart if system clock has been adjusted to the past
+ if (wakeuptime > timenow + ct) {
+ PrintOut(LOG_INFO, "System clock time adjusted to the past. Resetting next wakeup time.\n");
+ wakeuptime = timenow + ct;
+ for (auto & state : states)
+ state.wakeuptime = 0;
+ no_skip = true;
+ }
+
+ // Exit sleep when time interval has expired or a signal is received
+ sleep(wakeuptime+addtime-timenow);
+
+#ifdef _WIN32
+ // toggle debug mode?
+ if (caughtsigUSR2) {
+ ToggleDebugMode();
+ caughtsigUSR2 = 0;
+ }
+#endif
+
+ timenow = time(nullptr);
+
+ // Actual sleep time too long?
+ if (!addtime && timenow > wakeuptime+60) {
+ if (debugmode)
+ PrintOut(LOG_INFO, "Sleep time was %d seconds too long, assuming wakeup from standby mode.\n",
+ (int)(timenow-wakeuptime));
+ // Wait another 20 seconds to avoid I/O errors during disk spin-up
+ addtime = timenow-wakeuptime+20;
+ // Use next wake-up-time if close
+ int nextcheck = ct - addtime % ct;
+ if (nextcheck <= 20)
+ addtime += nextcheck;
+ }
+ }
+
+ // if we caught a SIGUSR1 then print message and clear signal
+ if (caughtsigUSR1){
+ PrintOut(LOG_INFO,"Signal USR1 - checking devices now rather than in %d seconds.\n",
+ wakeuptime-timenow>0?(int)(wakeuptime-timenow):0);
+ caughtsigUSR1=0;
+ sigwakeup = no_skip = true;
+ }
+
+ // Check which devices must be skipped in this cycle
+ if (checktime_min) {
+ for (auto & state : states)
+ state.skip = (!no_skip && timenow < state.wakeuptime);
+ }
+
+ // return adjusted wakeuptime
+ return wakeuptime;
+}
+
+// Print out a list of valid arguments for the Directive d
+static void printoutvaliddirectiveargs(int priority, char d)
+{
+ switch (d) {
+ case 'n':
+ PrintOut(priority, "never[,N][,q], sleep[,N][,q], standby[,N][,q], idle[,N][,q]");
+ break;
+ case 's':
+ PrintOut(priority, "valid_regular_expression");
+ break;
+ case 'd':
+ PrintOut(priority, "%s", smi()->get_valid_dev_types_str().c_str());
+ break;
+ case 'T':
+ PrintOut(priority, "normal, permissive");
+ break;
+ case 'o':
+ case 'S':
+ PrintOut(priority, "on, off");
+ break;
+ case 'l':
+ PrintOut(priority, "error, selftest");
+ break;
+ case 'M':
+ PrintOut(priority, "\"once\", \"always\", \"daily\", \"diminishing\", \"test\", \"exec\"");
+ break;
+ case 'v':
+ PrintOut(priority, "\n%s\n", create_vendor_attribute_arg_list().c_str());
+ break;
+ case 'P':
+ PrintOut(priority, "use, ignore, show, showall");
+ break;
+ case 'F':
+ PrintOut(priority, "%s", get_valid_firmwarebug_args());
+ break;
+ case 'e':
+ PrintOut(priority, "aam,[N|off], apm,[N|off], lookahead,[on|off], dsn,[on|off] "
+ "security-freeze, standby,[N|off], wcache,[on|off]");
+ break;
+ case 'c':
+ PrintOut(priority, "i=N, interval=N");
+ break;
+ }
+}
+
+// exits with an error message, or returns integer value of token
+static int GetInteger(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
+ int min, int max, char * suffix = 0)
+{
+ // make sure argument is there
+ if (!arg) {
+ PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes integer argument from %d to %d.\n",
+ cfgfile, lineno, name, token, min, max);
+ return -1;
+ }
+
+ // get argument value (base 10), check that it's integer, and in-range
+ char *endptr;
+ int val = strtol(arg,&endptr,10);
+
+ // optional suffix present?
+ if (suffix) {
+ if (!strcmp(endptr, suffix))
+ endptr += strlen(suffix);
+ else
+ *suffix = 0;
+ }
+
+ if (!(!*endptr && min <= val && val <= max)) {
+ PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs integer from %d to %d.\n",
+ cfgfile, lineno, name, token, arg, min, max);
+ return -1;
+ }
+
+ // all is well; return value
+ return val;
+}
+
+
+// Get 1-3 small integer(s) for '-W' directive
+static int Get3Integers(const char *arg, const char *name, const char *token, int lineno, const char *cfgfile,
+ unsigned char *val1, unsigned char *val2, unsigned char *val3)
+{
+ unsigned v1 = 0, v2 = 0, v3 = 0;
+ int n1 = -1, n2 = -1, n3 = -1, len;
+ if (!arg) {
+ PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s takes 1-3 integer argument(s) from 0 to 255.\n",
+ cfgfile, lineno, name, token);
+ return -1;
+ }
+
+ len = strlen(arg);
+ if (!( sscanf(arg, "%u%n,%u%n,%u%n", &v1, &n1, &v2, &n2, &v3, &n3) >= 1
+ && (n1 == len || n2 == len || n3 == len) && v1 <= 255 && v2 <= 255 && v3 <= 255)) {
+ PrintOut(LOG_CRIT,"File %s line %d (drive %s): Directive: %s has argument: %s; needs 1-3 integer(s) from 0 to 255.\n",
+ cfgfile, lineno, name, token, arg);
+ return -1;
+ }
+ *val1 = (unsigned char)v1; *val2 = (unsigned char)v2; *val3 = (unsigned char)v3;
+ return 0;
+}
+
+
+#ifdef _WIN32
+
+// Concatenate strtok() results if quoted with "..."
+static const char * strtok_dequote(const char * delimiters)
+{
+ const char * t = strtok(nullptr, delimiters);
+ if (!t || t[0] != '"')
+ return t;
+
+ static std::string token;
+ token = t+1;
+ for (;;) {
+ t = strtok(nullptr, delimiters);
+ if (!t || !*t)
+ return "\"";
+ token += ' ';
+ int len = strlen(t);
+ if (t[len-1] == '"') {
+ token += std::string(t, len-1);
+ break;
+ }
+ token += t;
+ }
+ return token.c_str();
+}
+
+#endif // _WIN32
+
+
+// This function returns 1 if it has correctly parsed one token (and
+// any arguments), else zero if no tokens remain. It returns -1 if an
+// error was encountered.
+static int ParseToken(char * token, dev_config & cfg, smart_devtype_list & scan_types)
+{
+ char sym;
+ const char * name = cfg.name.c_str();
+ int lineno=cfg.lineno;
+ const char *delim = " \n\t";
+ int badarg = 0;
+ int missingarg = 0;
+ const char *arg = 0;
+
+ // is the rest of the line a comment
+ if (*token=='#')
+ return 1;
+
+ // is the token not recognized?
+ if (*token!='-' || strlen(token)!=2) {
+ PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
+ configfile, lineno, name, token);
+ PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
+ return -1;
+ }
+
+ // token we will be parsing:
+ sym=token[1];
+
+ // parse the token and swallow its argument
+ int val;
+ char plus[] = "+", excl[] = "!";
+
+ switch (sym) {
+ case 'C':
+ // monitor current pending sector count (default 197)
+ if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
+ return -1;
+ cfg.curr_pending_id = (unsigned char)val;
+ cfg.curr_pending_incr = (*plus == '+');
+ cfg.curr_pending_set = true;
+ break;
+ case 'U':
+ // monitor offline uncorrectable sectors (default 198)
+ if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 0, 255, plus)) < 0)
+ return -1;
+ cfg.offl_pending_id = (unsigned char)val;
+ cfg.offl_pending_incr = (*plus == '+');
+ cfg.offl_pending_set = true;
+ break;
+ case 'T':
+ // Set tolerance level for SMART command failures
+ if (!(arg = strtok(nullptr, delim))) {
+ missingarg = 1;
+ } else if (!strcmp(arg, "normal")) {
+ // Normal mode: exit on failure of a mandatory S.M.A.R.T. command, but
+ // not on failure of an optional S.M.A.R.T. command.
+ // This is the default so we don't need to actually do anything here.
+ cfg.permissive = false;
+ } else if (!strcmp(arg, "permissive")) {
+ // Permissive mode; ignore errors from Mandatory SMART commands
+ cfg.permissive = true;
+ } else {
+ badarg = 1;
+ }
+ break;
+ case 'd':
+ // specify the device type
+ if (!(arg = strtok(nullptr, delim))) {
+ missingarg = 1;
+ } else if (!strcmp(arg, "ignore")) {
+ cfg.ignore = true;
+ } else if (!strcmp(arg, "removable")) {
+ cfg.removable = true;
+ } else if (!strcmp(arg, "auto")) {
+ cfg.dev_type = "";
+ scan_types.clear();
+ } else {
+ cfg.dev_type = arg;
+ scan_types.push_back(arg);
+ }
+ break;
+ case 'F':
+ // fix firmware bug
+ if (!(arg = strtok(nullptr, delim)))
+ missingarg = 1;
+ else if (!parse_firmwarebug_def(arg, cfg.firmwarebugs))
+ badarg = 1;
+ break;
+ case 'H':
+ // check SMART status
+ cfg.smartcheck = true;
+ break;
+ case 'f':
+ // check for failure of usage attributes
+ cfg.usagefailed = true;
+ break;
+ case 't':
+ // track changes in all vendor attributes
+ cfg.prefail = true;
+ cfg.usage = true;
+ break;
+ case 'p':
+ // track changes in prefail vendor attributes
+ cfg.prefail = true;
+ break;
+ case 'u':
+ // track changes in usage vendor attributes
+ cfg.usage = true;
+ break;
+ case 'l':
+ // track changes in SMART logs
+ if (!(arg = strtok(nullptr, delim))) {
+ missingarg = 1;
+ } else if (!strcmp(arg, "selftest")) {
+ // track changes in self-test log
+ cfg.selftest = true;
+ } else if (!strcmp(arg, "error")) {
+ // track changes in ATA error log
+ cfg.errorlog = true;
+ } else if (!strcmp(arg, "xerror")) {
+ // track changes in Extended Comprehensive SMART error log
+ cfg.xerrorlog = true;
+ } else if (!strcmp(arg, "offlinests")) {
+ // track changes in offline data collection status
+ cfg.offlinests = true;
+ } else if (!strcmp(arg, "offlinests,ns")) {
+ // track changes in offline data collection status, disable auto standby
+ cfg.offlinests = cfg.offlinests_ns = true;
+ } else if (!strcmp(arg, "selfteststs")) {
+ // track changes in self-test execution status
+ cfg.selfteststs = true;
+ } else if (!strcmp(arg, "selfteststs,ns")) {
+ // track changes in self-test execution status, disable auto standby
+ cfg.selfteststs = cfg.selfteststs_ns = true;
+ } else if (!strncmp(arg, "scterc,", sizeof("scterc,")-1)) {
+ // set SCT Error Recovery Control
+ unsigned rt = ~0, wt = ~0; int nc = -1;
+ sscanf(arg,"scterc,%u,%u%n", &rt, &wt, &nc);
+ if (nc == (int)strlen(arg) && rt <= 999 && wt <= 999) {
+ cfg.sct_erc_set = true;
+ cfg.sct_erc_readtime = rt;
+ cfg.sct_erc_writetime = wt;
+ }
+ else
+ badarg = 1;
+ } else {
+ badarg = 1;
+ }
+ break;
+ case 'a':
+ // monitor everything
+ cfg.smartcheck = true;
+ cfg.prefail = true;
+ cfg.usagefailed = true;
+ cfg.usage = true;
+ cfg.selftest = true;
+ cfg.errorlog = true;
+ cfg.selfteststs = true;
+ break;
+ case 'o':
+ // automatic offline testing enable/disable
+ if (!(arg = strtok(nullptr, delim))) {
+ missingarg = 1;
+ } else if (!strcmp(arg, "on")) {
+ cfg.autoofflinetest = 2;
+ } else if (!strcmp(arg, "off")) {
+ cfg.autoofflinetest = 1;
+ } else {
+ badarg = 1;
+ }
+ break;
+ case 'n':
+ // skip disk check if in idle or standby mode
+ if (!(arg = strtok(nullptr, delim)))
+ missingarg = 1;
+ else {
+ char *endptr = nullptr;
+ char *next = strchr(const_cast<char*>(arg), ',');
+
+ cfg.powerquiet = false;
+ cfg.powerskipmax = 0;
+
+ if (next)
+ *next = '\0';
+ if (!strcmp(arg, "never"))
+ cfg.powermode = 0;
+ else if (!strcmp(arg, "sleep"))
+ cfg.powermode = 1;
+ else if (!strcmp(arg, "standby"))
+ cfg.powermode = 2;
+ else if (!strcmp(arg, "idle"))
+ cfg.powermode = 3;
+ else
+ badarg = 1;
+
+ // if optional arguments are present
+ if (!badarg && next) {
+ next++;
+ cfg.powerskipmax = strtol(next, &endptr, 10);
+ if (endptr == next)
+ cfg.powerskipmax = 0;
+ else {
+ next = endptr + (*endptr != '\0');
+ if (cfg.powerskipmax <= 0)
+ badarg = 1;
+ }
+ if (*next != '\0') {
+ if (!strcmp("q", next))
+ cfg.powerquiet = true;
+ else {
+ badarg = 1;
+ }
+ }
+ }
+ }
+ break;
+ case 'S':
+ // automatic attribute autosave enable/disable
+ if (!(arg = strtok(nullptr, delim))) {
+ missingarg = 1;
+ } else if (!strcmp(arg, "on")) {
+ cfg.autosave = 2;
+ } else if (!strcmp(arg, "off")) {
+ cfg.autosave = 1;
+ } else {
+ badarg = 1;
+ }
+ break;
+ case 's':
+ // warn user, and delete any previously given -s REGEXP Directives
+ if (!cfg.test_regex.empty()){
+ PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Test Directive -s %s\n",
+ configfile, lineno, name, cfg.test_regex.get_pattern());
+ cfg.test_regex = regular_expression();
+ }
+ // check for missing argument
+ if (!(arg = strtok(nullptr, delim))) {
+ missingarg = 1;
+ }
+ // Compile regex
+ else {
+ if (!cfg.test_regex.compile(arg)) {
+ // not a valid regular expression!
+ PrintOut(LOG_CRIT, "File %s line %d (drive %s): -s argument \"%s\" is INVALID extended regular expression. %s.\n",
+ configfile, lineno, name, arg, cfg.test_regex.get_errmsg());
+ return -1;
+ }
+ // Do a bit of sanity checking and warn user if we think that
+ // their regexp is "strange". User probably confused about shell
+ // glob(3) syntax versus regular expression syntax regexp(7).
+ // Check also for possible invalid number of digits in ':NNN[-LLL]' suffix.
+ static const regular_expression syntax_check(
+ "[^]$()*+./:?^[|0-9LSCOncr-]+|"
+ ":[0-9]{0,2}($|[^0-9])|:[0-9]{4,}|"
+ ":[0-9]{3}-(000|[0-9]{0,2}($|[^0-9])|[0-9]{4,})"
+ );
+ regular_expression::match_range range;
+ if (syntax_check.execute(arg, 1, &range) && 0 <= range.rm_so && range.rm_so < range.rm_eo)
+ PrintOut(LOG_INFO, "File %s line %d (drive %s): warning, \"%.*s\" looks odd in "
+ "extended regular expression \"%s\"\n",
+ configfile, lineno, name, (int)(range.rm_eo - range.rm_so), arg + range.rm_so, arg);
+ }
+ break;
+ case 'm':
+ // send email to address that follows
+ if (!(arg = strtok(nullptr, delim)))
+ missingarg = 1;
+ else {
+ if (!cfg.emailaddress.empty())
+ PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous Address Directive -m %s\n",
+ configfile, lineno, name, cfg.emailaddress.c_str());
+ cfg.emailaddress = arg;
+ }
+ break;
+ case 'M':
+ // email warning options
+ if (!(arg = strtok(nullptr, delim)))
+ missingarg = 1;
+ else if (!strcmp(arg, "once"))
+ cfg.emailfreq = emailfreqs::once;
+ else if (!strcmp(arg, "always"))
+ cfg.emailfreq = emailfreqs::always;
+ else if (!strcmp(arg, "daily"))
+ cfg.emailfreq = emailfreqs::daily;
+ else if (!strcmp(arg, "diminishing"))
+ cfg.emailfreq = emailfreqs::diminishing;
+ else if (!strcmp(arg, "test"))
+ cfg.emailtest = true;
+ else if (!strcmp(arg, "exec")) {
+ // Get the next argument (the command line)
+#ifdef _WIN32
+ // Allow "/path name/with spaces/..." on Windows
+ arg = strtok_dequote(delim);
+ if (arg && arg[0] == '"') {
+ PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument: missing closing quote\n",
+ configfile, lineno, name, token);
+ return -1;
+ }
+#else
+ arg = strtok(nullptr, delim);
+#endif
+ if (!arg) {
+ PrintOut(LOG_CRIT, "File %s line %d (drive %s): Directive %s 'exec' argument must be followed by executable path.\n",
+ configfile, lineno, name, token);
+ return -1;
+ }
+ // Free the last cmd line given if any, and copy new one
+ if (!cfg.emailcmdline.empty())
+ PrintOut(LOG_INFO, "File %s line %d (drive %s): ignoring previous mail Directive -M exec %s\n",
+ configfile, lineno, name, cfg.emailcmdline.c_str());
+ cfg.emailcmdline = arg;
+ }
+ else
+ badarg = 1;
+ break;
+ case 'i':
+ // ignore failure of usage attribute
+ if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
+ return -1;
+ cfg.monitor_attr_flags.set(val, MONITOR_IGN_FAILUSE);
+ break;
+ case 'I':
+ // ignore attribute for tracking purposes
+ if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255)) < 0)
+ return -1;
+ cfg.monitor_attr_flags.set(val, MONITOR_IGNORE);
+ break;
+ case 'r':
+ // print raw value when tracking
+ if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
+ return -1;
+ cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT);
+ if (*excl == '!') // attribute change is critical
+ cfg.monitor_attr_flags.set(val, MONITOR_AS_CRIT);
+ break;
+ case 'R':
+ // track changes in raw value (forces printing of raw value)
+ if ((val = GetInteger((arg = strtok(nullptr, delim)), name, token, lineno, configfile, 1, 255, excl)) < 0)
+ return -1;
+ cfg.monitor_attr_flags.set(val, MONITOR_RAW_PRINT|MONITOR_RAW);
+ if (*excl == '!') // raw value change is critical
+ cfg.monitor_attr_flags.set(val, MONITOR_RAW_AS_CRIT);
+ break;
+ case 'W':
+ // track Temperature
+ if (Get3Integers((arg = strtok(nullptr, delim)), name, token, lineno, configfile,
+ &cfg.tempdiff, &cfg.tempinfo, &cfg.tempcrit) < 0)
+ return -1;
+ break;
+ case 'v':
+ // non-default vendor-specific attribute meaning
+ if (!(arg = strtok(nullptr, delim))) {
+ missingarg = 1;
+ } else if (!parse_attribute_def(arg, cfg.attribute_defs, PRIOR_USER)) {
+ badarg = 1;
+ }
+ break;
+ case 'P':
+ // Define use of drive-specific presets.
+ if (!(arg = strtok(nullptr, delim))) {
+ missingarg = 1;
+ } else if (!strcmp(arg, "use")) {
+ cfg.ignorepresets = false;
+ } else if (!strcmp(arg, "ignore")) {
+ cfg.ignorepresets = true;
+ } else if (!strcmp(arg, "show")) {
+ cfg.showpresets = true;
+ } else if (!strcmp(arg, "showall")) {
+ showallpresets();
+ } else {
+ badarg = 1;
+ }
+ break;
+
+ case 'e':
+ // Various ATA settings
+ if (!(arg = strtok(nullptr, delim))) {
+ missingarg = true;
+ }
+ else {
+ char arg2[16+1]; unsigned uval;
+ int n1 = -1, n2 = -1, n3 = -1, len = strlen(arg);
+ if (sscanf(arg, "%16[^,=]%n%*[,=]%n%u%n", arg2, &n1, &n2, &uval, &n3) >= 1
+ && (n1 == len || n2 > 0)) {
+ bool on = (n2 > 0 && !strcmp(arg+n2, "on"));
+ bool off = (n2 > 0 && !strcmp(arg+n2, "off"));
+ if (n3 != len)
+ uval = ~0U;
+
+ if (!strcmp(arg2, "aam")) {
+ if (off)
+ cfg.set_aam = -1;
+ else if (uval <= 254)
+ cfg.set_aam = uval + 1;
+ else
+ badarg = true;
+ }
+ else if (!strcmp(arg2, "apm")) {
+ if (off)
+ cfg.set_apm = -1;
+ else if (1 <= uval && uval <= 254)
+ cfg.set_apm = uval + 1;
+ else
+ badarg = true;
+ }
+ else if (!strcmp(arg2, "lookahead")) {
+ if (off)
+ cfg.set_lookahead = -1;
+ else if (on)
+ cfg.set_lookahead = 1;
+ else
+ badarg = true;
+ }
+ else if (!strcmp(arg, "security-freeze")) {
+ cfg.set_security_freeze = true;
+ }
+ else if (!strcmp(arg2, "standby")) {
+ if (off)
+ cfg.set_standby = 0 + 1;
+ else if (uval <= 255)
+ cfg.set_standby = uval + 1;
+ else
+ badarg = true;
+ }
+ else if (!strcmp(arg2, "wcache")) {
+ if (off)
+ cfg.set_wcache = -1;
+ else if (on)
+ cfg.set_wcache = 1;
+ else
+ badarg = true;
+ }
+ else if (!strcmp(arg2, "dsn")) {
+ if (off)
+ cfg.set_dsn = -1;
+ else if (on)
+ cfg.set_dsn = 1;
+ else
+ badarg = true;
+ }
+ else
+ badarg = true;
+ }
+ else
+ badarg = true;
+ }
+ break;
+
+ case 'c':
+ // Override command line options
+ {
+ if (!(arg = strtok(nullptr, delim))) {
+ missingarg = true;
+ break;
+ }
+ int n = 0, nc = -1, len = strlen(arg);
+ if ( ( sscanf(arg, "i=%d%n", &n, &nc) == 1
+ || sscanf(arg, "interval=%d%n", &n, &nc) == 1)
+ && nc == len && n >= 10)
+ cfg.checktime = n;
+ else
+ badarg = true;
+ }
+ break;
+
+ default:
+ // Directive not recognized
+ PrintOut(LOG_CRIT,"File %s line %d (drive %s): unknown Directive: %s\n",
+ configfile, lineno, name, token);
+ PrintOut(LOG_CRIT, "Run smartd -D to print a list of valid Directives.\n");
+ return -1;
+ }
+ if (missingarg) {
+ PrintOut(LOG_CRIT, "File %s line %d (drive %s): Missing argument to %s Directive\n",
+ configfile, lineno, name, token);
+ }
+ if (badarg) {
+ PrintOut(LOG_CRIT, "File %s line %d (drive %s): Invalid argument to %s Directive: %s\n",
+ configfile, lineno, name, token, arg);
+ }
+ if (missingarg || badarg) {
+ PrintOut(LOG_CRIT, "Valid arguments to %s Directive are: ", token);
+ printoutvaliddirectiveargs(LOG_CRIT, sym);
+ PrintOut(LOG_CRIT, "\n");
+ return -1;
+ }
+
+ return 1;
+}
+
+// Scan directive for configuration file
+#define SCANDIRECTIVE "DEVICESCAN"
+
+// This is the routine that adds things to the conf_entries list.
+//
+// Return values are:
+// 1: parsed a normal line
+// 0: found DEFAULT setting or comment or blank line
+// -1: found SCANDIRECTIVE line
+// -2: found an error
+//
+// Note: this routine modifies *line from the caller!
+static int ParseConfigLine(dev_config_vector & conf_entries, dev_config & default_conf,
+ smart_devtype_list & scan_types, int lineno, /*const*/ char * line)
+{
+ const char *delim = " \n\t";
+
+ // get first token: device name. If a comment, skip line
+ const char * name = strtok(line, delim);
+ if (!name || *name == '#')
+ return 0;
+
+ // Check device name for DEFAULT or DEVICESCAN
+ int retval;
+ if (!strcmp("DEFAULT", name)) {
+ retval = 0;
+ // Restart with empty defaults
+ default_conf = dev_config();
+ }
+ else {
+ retval = (!strcmp(SCANDIRECTIVE, name) ? -1 : 1);
+ // Init new entry with current defaults
+ conf_entries.push_back(default_conf);
+ }
+ dev_config & cfg = (retval ? conf_entries.back() : default_conf);
+
+ cfg.name = name; // Later replaced by dev->get_info().info_name
+ cfg.dev_name = name; // If DEVICESCAN later replaced by get->dev_info().dev_name
+ cfg.lineno = lineno;
+
+ // parse tokens one at a time from the file.
+ while (char * token = strtok(nullptr, delim)) {
+ int rc = ParseToken(token, cfg, scan_types);
+ if (rc < 0)
+ // error found on the line
+ return -2;
+
+ if (rc == 0)
+ // No tokens left
+ break;
+
+ // PrintOut(LOG_INFO,"Parsed token %s\n",token);
+ }
+
+ // Check for multiple -d TYPE directives
+ if (retval != -1 && scan_types.size() > 1) {
+ PrintOut(LOG_CRIT, "Drive: %s, invalid multiple -d TYPE Directives on line %d of file %s\n",
+ cfg.name.c_str(), cfg.lineno, configfile);
+ return -2;
+ }
+
+ // Don't perform checks below for DEFAULT entries
+ if (retval == 0)
+ return retval;
+
+ // If NO monitoring directives are set, then set all of them.
+ if (!( cfg.smartcheck || cfg.selftest
+ || cfg.errorlog || cfg.xerrorlog
+ || cfg.offlinests || cfg.selfteststs
+ || cfg.usagefailed || cfg.prefail || cfg.usage
+ || cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
+
+ PrintOut(LOG_INFO,"Drive: %s, implied '-a' Directive on line %d of file %s\n",
+ cfg.name.c_str(), cfg.lineno, configfile);
+
+ cfg.smartcheck = true;
+ cfg.usagefailed = true;
+ cfg.prefail = true;
+ cfg.usage = true;
+ cfg.selftest = true;
+ cfg.errorlog = true;
+ cfg.selfteststs = true;
+ }
+
+ // additional sanity check. Has user set -M options without -m?
+ if ( cfg.emailaddress.empty()
+ && (!cfg.emailcmdline.empty() || cfg.emailfreq != emailfreqs::unknown || cfg.emailtest)) {
+ PrintOut(LOG_CRIT,"Drive: %s, -M Directive(s) on line %d of file %s need -m ADDRESS Directive\n",
+ cfg.name.c_str(), cfg.lineno, configfile);
+ return -2;
+ }
+
+ // has the user has set <nomailer>?
+ if (cfg.emailaddress == "<nomailer>") {
+ // check that -M exec is also set
+ if (cfg.emailcmdline.empty()){
+ PrintOut(LOG_CRIT,"Drive: %s, -m <nomailer> Directive on line %d of file %s needs -M exec Directive\n",
+ cfg.name.c_str(), cfg.lineno, configfile);
+ return -2;
+ }
+ // From here on the sign of <nomailer> is cfg.emailaddress.empty() and !cfg.emailcmdline.empty()
+ cfg.emailaddress.clear();
+ }
+
+ return retval;
+}
+
+// Parses a configuration file. Return values are:
+// N=>0: found N entries
+// -1: syntax error in config file
+// -2: config file does not exist
+// -3: config file exists but cannot be read
+//
+// In the case where the return value is 0, there are three
+// possibilities:
+// Empty configuration file ==> conf_entries.empty()
+// No configuration file ==> conf_entries[0].lineno == 0
+// SCANDIRECTIVE found ==> conf_entries.back().lineno != 0 (size >= 1)
+static int ParseConfigFile(dev_config_vector & conf_entries, smart_devtype_list & scan_types)
+{
+ // maximum line length in configuration file
+ const int MAXLINELEN = 256;
+ // maximum length of a continued line in configuration file
+ const int MAXCONTLINE = 1023;
+
+ stdio_file f;
+ // Open config file, if it exists and is not <stdin>
+ if (!(configfile == configfile_stdin)) { // pointer comparison ok here
+ if (!f.open(configfile,"r") && (errno!=ENOENT || !configfile_alt.empty())) {
+ // file exists but we can't read it or it should exist due to '-c' option
+ int ret = (errno!=ENOENT ? -3 : -2);
+ PrintOut(LOG_CRIT,"%s: Unable to open configuration file %s\n",
+ strerror(errno),configfile);
+ return ret;
+ }
+ }
+ else // read from stdin ('-c -' option)
+ f.open(stdin);
+
+ // Start with empty defaults
+ dev_config default_conf;
+
+ // No configuration file found -- use fake one
+ int entry = 0;
+ if (!f) {
+ char fakeconfig[] = SCANDIRECTIVE " -a"; // TODO: Remove this hack, build cfg_entry.
+
+ if (ParseConfigLine(conf_entries, default_conf, scan_types, 0, fakeconfig) != -1)
+ throw std::logic_error("Internal error parsing " SCANDIRECTIVE);
+ return 0;
+ }
+
+#ifdef __CYGWIN__
+ setmode(fileno(f), O_TEXT); // Allow files with \r\n
+#endif
+
+ // configuration file exists
+ PrintOut(LOG_INFO,"Opened configuration file %s\n",configfile);
+
+ // parse config file line by line
+ int lineno = 1, cont = 0, contlineno = 0;
+ char line[MAXLINELEN+2];
+ char fullline[MAXCONTLINE+1];
+
+ for (;;) {
+ int len=0,scandevice;
+ char *lastslash;
+ char *comment;
+ char *code;
+
+ // make debugging simpler
+ memset(line,0,sizeof(line));
+
+ // get a line
+ code=fgets(line, MAXLINELEN+2, f);
+
+ // are we at the end of the file?
+ if (!code){
+ if (cont) {
+ scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
+ // See if we found a SCANDIRECTIVE directive
+ if (scandevice==-1)
+ return 0;
+ // did we find a syntax error
+ if (scandevice==-2)
+ return -1;
+ // the final line is part of a continuation line
+ entry+=scandevice;
+ }
+ break;
+ }
+
+ // input file line number
+ contlineno++;
+
+ // See if line is too long
+ len=strlen(line);
+ if (len>MAXLINELEN){
+ const char *warn;
+ if (line[len-1]=='\n')
+ warn="(including newline!) ";
+ else
+ warn="";
+ PrintOut(LOG_CRIT,"Error: line %d of file %s %sis more than MAXLINELEN=%d characters.\n",
+ (int)contlineno,configfile,warn,(int)MAXLINELEN);
+ return -1;
+ }
+
+ // Ignore anything after comment symbol
+ if ((comment=strchr(line,'#'))){
+ *comment='\0';
+ len=strlen(line);
+ }
+
+ // is the total line (made of all continuation lines) too long?
+ if (cont+len>MAXCONTLINE){
+ PrintOut(LOG_CRIT,"Error: continued line %d (actual line %d) of file %s is more than MAXCONTLINE=%d characters.\n",
+ lineno, (int)contlineno, configfile, (int)MAXCONTLINE);
+ return -1;
+ }
+
+ // copy string so far into fullline, and increment length
+ snprintf(fullline+cont, sizeof(fullline)-cont, "%s" ,line);
+ cont+=len;
+
+ // is this a continuation line. If so, replace \ by space and look at next line
+ if ( (lastslash=strrchr(line,'\\')) && !strtok(lastslash+1," \n\t")){
+ *(fullline+(cont-len)+(lastslash-line))=' ';
+ continue;
+ }
+
+ // Not a continuation line. Parse it
+ scan_types.clear();
+ scandevice = ParseConfigLine(conf_entries, default_conf, scan_types, contlineno, fullline);
+
+ // did we find a scandevice directive?
+ if (scandevice==-1)
+ return 0;
+ // did we find a syntax error
+ if (scandevice==-2)
+ return -1;
+
+ entry+=scandevice;
+ lineno++;
+ cont=0;
+ }
+
+ // note -- may be zero if syntax of file OK, but no valid entries!
+ return entry;
+}
+
+/* Prints the message "=======> VALID ARGUMENTS ARE: <LIST> <=======\n", where
+ <LIST> is the list of valid arguments for option opt. */
+static void PrintValidArgs(char opt)
+{
+ const char *s;
+
+ PrintOut(LOG_CRIT, "=======> VALID ARGUMENTS ARE: ");
+ if (!(s = GetValidArgList(opt)))
+ PrintOut(LOG_CRIT, "Error constructing argument list for option %c", opt);
+ else
+ PrintOut(LOG_CRIT, "%s", (char *)s);
+ PrintOut(LOG_CRIT, " <=======\n");
+}
+
+#ifndef _WIN32
+// Report error and return false if specified path is not absolute.
+static bool check_abs_path(char option, const std::string & path)
+{
+ if (path.empty() || path[0] == '/')
+ return true;
+
+ debugmode = 1;
+ PrintHead();
+ PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <=======\n\n", option, path.c_str());
+ PrintOut(LOG_CRIT, "Error: relative path names are not allowed\n\n");
+ return false;
+}
+#endif // !_WIN32
+
+// Parses input line, prints usage message and
+// version/license/copyright messages
+static int parse_options(int argc, char **argv)
+{
+ // Init default path names
+#ifndef _WIN32
+ configfile = SMARTMONTOOLS_SYSCONFDIR "/smartd.conf";
+ warning_script = SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh";
+#else
+ std::string exedir = get_exe_dir();
+ static std::string configfile_str = exedir + "/smartd.conf";
+ configfile = configfile_str.c_str();
+ warning_script = exedir + "/smartd_warning.cmd";
+#endif
+
+ // Please update GetValidArgList() if you edit shortopts
+ static const char shortopts[] = "c:l:q:dDni:p:r:s:A:B:w:Vh?"
+#if defined(HAVE_POSIX_API) || defined(_WIN32)
+ "u:"
+#endif
+#ifdef HAVE_LIBCAP_NG
+ "C"
+#endif
+ ;
+ // Please update GetValidArgList() if you edit longopts
+ struct option longopts[] = {
+ { "configfile", required_argument, 0, 'c' },
+ { "logfacility", required_argument, 0, 'l' },
+ { "quit", required_argument, 0, 'q' },
+ { "debug", no_argument, 0, 'd' },
+ { "showdirectives", no_argument, 0, 'D' },
+ { "interval", required_argument, 0, 'i' },
+#ifndef _WIN32
+ { "no-fork", no_argument, 0, 'n' },
+#else
+ { "service", no_argument, 0, 'n' },
+#endif
+ { "pidfile", required_argument, 0, 'p' },
+ { "report", required_argument, 0, 'r' },
+ { "savestates", required_argument, 0, 's' },
+ { "attributelog", required_argument, 0, 'A' },
+ { "drivedb", required_argument, 0, 'B' },
+ { "warnexec", required_argument, 0, 'w' },
+ { "version", no_argument, 0, 'V' },
+ { "license", no_argument, 0, 'V' },
+ { "copyright", no_argument, 0, 'V' },
+ { "help", no_argument, 0, 'h' },
+ { "usage", no_argument, 0, 'h' },
+#if defined(HAVE_POSIX_API) || defined(_WIN32)
+ { "warn-as-user", required_argument, 0, 'u' },
+#endif
+#ifdef HAVE_LIBCAP_NG
+ { "capabilities", optional_argument, 0, 'C' },
+#endif
+ { 0, 0, 0, 0 }
+ };
+
+ opterr=optopt=0;
+ bool badarg = false;
+ const char * badarg_msg = nullptr;
+ bool use_default_db = true; // set false on '-B FILE'
+
+ // Parse input options.
+ int optchar;
+ while ((optchar = getopt_long(argc, argv, shortopts, longopts, nullptr)) != -1) {
+ char *arg;
+ char *tailptr;
+ long lchecktime;
+
+ switch(optchar) {
+ case 'q':
+ // when to quit
+ quit_nodev0 = false;
+ if (!strcmp(optarg, "nodev"))
+ quit = QUIT_NODEV;
+ else if (!strcmp(optarg, "nodev0")) {
+ quit = QUIT_NODEV;
+ quit_nodev0 = true;
+ }
+ else if (!strcmp(optarg, "nodevstartup"))
+ quit = QUIT_NODEVSTARTUP;
+ else if (!strcmp(optarg, "nodev0startup")) {
+ quit = QUIT_NODEVSTARTUP;
+ quit_nodev0 = true;
+ }
+ else if (!strcmp(optarg, "errors"))
+ quit = QUIT_ERRORS;
+ else if (!strcmp(optarg, "errors,nodev0")) {
+ quit = QUIT_ERRORS;
+ quit_nodev0 = true;
+ }
+ else if (!strcmp(optarg, "never"))
+ quit = QUIT_NEVER;
+ else if (!strcmp(optarg, "onecheck")) {
+ quit = QUIT_ONECHECK;
+ debugmode = 1;
+ }
+ else if (!strcmp(optarg, "showtests")) {
+ quit = QUIT_SHOWTESTS;
+ debugmode = 1;
+ }
+ else
+ badarg = true;
+ break;
+ case 'l':
+ // set the log facility level
+ if (!strcmp(optarg, "daemon"))
+ facility=LOG_DAEMON;
+ else if (!strcmp(optarg, "local0"))
+ facility=LOG_LOCAL0;
+ else if (!strcmp(optarg, "local1"))
+ facility=LOG_LOCAL1;
+ else if (!strcmp(optarg, "local2"))
+ facility=LOG_LOCAL2;
+ else if (!strcmp(optarg, "local3"))
+ facility=LOG_LOCAL3;
+ else if (!strcmp(optarg, "local4"))
+ facility=LOG_LOCAL4;
+ else if (!strcmp(optarg, "local5"))
+ facility=LOG_LOCAL5;
+ else if (!strcmp(optarg, "local6"))
+ facility=LOG_LOCAL6;
+ else if (!strcmp(optarg, "local7"))
+ facility=LOG_LOCAL7;
+ else
+ badarg = true;
+ break;
+ case 'd':
+ // enable debug mode
+ debugmode = 1;
+ break;
+ case 'n':
+ // don't fork()
+#ifndef _WIN32 // On Windows, --service is already handled by daemon_main()
+ do_fork = false;
+#endif
+ break;
+ case 'D':
+ // print summary of all valid directives
+ debugmode = 1;
+ Directives();
+ return 0;
+ case 'i':
+ // Period (time interval) for checking
+ // strtol will set errno in the event of overflow, so we'll check it.
+ errno = 0;
+ lchecktime = strtol(optarg, &tailptr, 10);
+ if (*tailptr != '\0' || lchecktime < 10 || lchecktime > INT_MAX || errno) {
+ debugmode=1;
+ PrintHead();
+ PrintOut(LOG_CRIT, "======> INVALID INTERVAL: %s <=======\n", optarg);
+ PrintOut(LOG_CRIT, "======> INTERVAL MUST BE INTEGER BETWEEN %d AND %d <=======\n", 10, INT_MAX);
+ PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
+ return EXIT_BADCMD;
+ }
+ checktime = (int)lchecktime;
+ break;
+ case 'r':
+ // report IOCTL transactions
+ {
+ int n1 = -1, n2 = -1, len = strlen(optarg);
+ char s[9+1]; unsigned i = 1;
+ sscanf(optarg, "%9[a-z]%n,%u%n", s, &n1, &i, &n2);
+ if (!((n1 == len || n2 == len) && 1 <= i && i <= 4)) {
+ badarg = true;
+ } else if (!strcmp(s,"ioctl")) {
+ ata_debugmode = scsi_debugmode = nvme_debugmode = i;
+ } else if (!strcmp(s,"ataioctl")) {
+ ata_debugmode = i;
+ } else if (!strcmp(s,"scsiioctl")) {
+ scsi_debugmode = i;
+ } else if (!strcmp(s,"nvmeioctl")) {
+ nvme_debugmode = i;
+ } else {
+ badarg = true;
+ }
+ }
+ break;
+ case 'c':
+ // alternate configuration file
+ if (strcmp(optarg,"-"))
+ configfile = (configfile_alt = optarg).c_str();
+ else // read from stdin
+ configfile=configfile_stdin;
+ break;
+ case 'p':
+ // output file with PID number
+ pid_file = optarg;
+ break;
+ case 's':
+ // path prefix of persistent state file
+ state_path_prefix = (strcmp(optarg, "-") ? optarg : "");
+ break;
+ case 'A':
+ // path prefix of attribute log file
+ attrlog_path_prefix = (strcmp(optarg, "-") ? optarg : "");
+ break;
+ case 'B':
+ {
+ const char * path = optarg;
+ if (*path == '+' && path[1])
+ path++;
+ else
+ use_default_db = false;
+ unsigned char savedebug = debugmode; debugmode = 1;
+ if (!read_drive_database(path))
+ return EXIT_BADCMD;
+ debugmode = savedebug;
+ }
+ break;
+ case 'w':
+ warning_script = optarg;
+ break;
+#ifdef HAVE_POSIX_API
+ case 'u':
+ warn_as_user = false;
+ if (strcmp(optarg, "-")) {
+ warn_uname = warn_gname = "unknown";
+ badarg_msg = parse_ugid(optarg, warn_uid, warn_gid,
+ warn_uname, warn_gname );
+ if (badarg_msg)
+ break;
+ warn_as_user = true;
+ }
+ break;
+#elif defined(_WIN32)
+ case 'u':
+ if (!strcmp(optarg, "restricted"))
+ warn_as_restr_user = true;
+ else if (!strcmp(optarg, "unchanged"))
+ warn_as_restr_user = false;
+ else
+ badarg = true;
+ break;
+#endif // HAVE_POSIX_API ||_WIN32
+ case 'V':
+ // print version and CVS info
+ debugmode = 1;
+ PrintOut(LOG_INFO, "%s", format_version_info("smartd", 3 /*full*/).c_str());
+ return 0;
+#ifdef HAVE_LIBCAP_NG
+ case 'C':
+ // enable capabilities
+ if (!optarg)
+ capabilities_mode = 1;
+ else if (!strcmp(optarg, "mail"))
+ capabilities_mode = 2;
+ else
+ badarg = true;
+ break;
+#endif
+ case 'h':
+ // help: print summary of command-line options
+ debugmode=1;
+ PrintHead();
+ Usage();
+ return 0;
+ case '?':
+ default:
+ // unrecognized option
+ debugmode=1;
+ PrintHead();
+ // Point arg to the argument in which this option was found.
+ arg = argv[optind-1];
+ // Check whether the option is a long option that doesn't map to -h.
+ if (arg[1] == '-' && optchar != 'h') {
+ // Iff optopt holds a valid option then argument must be missing.
+ if (optopt && strchr(shortopts, optopt)) {
+ PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %s <=======\n",arg+2);
+ PrintValidArgs(optopt);
+ } else {
+ PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %s <=======\n\n",arg+2);
+ }
+ PrintOut(LOG_CRIT, "\nUse smartd --help to get a usage summary\n\n");
+ return EXIT_BADCMD;
+ }
+ if (optopt) {
+ // Iff optopt holds a valid option then argument must be missing.
+ if (strchr(shortopts, optopt)){
+ PrintOut(LOG_CRIT, "=======> ARGUMENT REQUIRED FOR OPTION: %c <=======\n",optopt);
+ PrintValidArgs(optopt);
+ } else {
+ PrintOut(LOG_CRIT, "=======> UNRECOGNIZED OPTION: %c <=======\n\n",optopt);
+ }
+ PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
+ return EXIT_BADCMD;
+ }
+ Usage();
+ return 0;
+ }
+
+ // Check to see if option had an unrecognized or incorrect argument.
+ if (badarg || badarg_msg) {
+ debugmode=1;
+ PrintHead();
+ // It would be nice to print the actual option name given by the user
+ // here, but we just print the short form. Please fix this if you know
+ // a clean way to do it.
+ PrintOut(LOG_CRIT, "=======> INVALID ARGUMENT TO -%c: %s <======= \n", optchar, optarg);
+ if (badarg_msg)
+ PrintOut(LOG_CRIT, "%s\n", badarg_msg);
+ else
+ PrintValidArgs(optchar);
+ PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
+ return EXIT_BADCMD;
+ }
+ }
+
+ // non-option arguments are not allowed
+ if (argc > optind) {
+ debugmode=1;
+ PrintHead();
+ PrintOut(LOG_CRIT, "=======> UNRECOGNIZED ARGUMENT: %s <=======\n\n", argv[optind]);
+ PrintOut(LOG_CRIT, "\nUse smartd -h to get a usage summary\n\n");
+ return EXIT_BADCMD;
+ }
+
+ // no pidfile in debug mode
+ if (debugmode && !pid_file.empty()) {
+ debugmode=1;
+ PrintHead();
+ PrintOut(LOG_CRIT, "=======> INVALID CHOICE OF OPTIONS: -d and -p <======= \n\n");
+ PrintOut(LOG_CRIT, "Error: pid file %s not written in debug (-d) mode\n\n", pid_file.c_str());
+ return EXIT_BADCMD;
+ }
+
+#ifndef _WIN32
+ if (!debugmode) {
+ // absolute path names are required due to chdir('/') in daemon_init()
+ if (!( check_abs_path('p', pid_file)
+ && check_abs_path('s', state_path_prefix)
+ && check_abs_path('A', attrlog_path_prefix)))
+ return EXIT_BADCMD;
+ }
+#endif
+
+#ifdef _WIN32
+ if (warn_as_restr_user && !popen_as_restr_check()) {
+ // debugmode=1 // would suppress messages to eventlog or log file
+ PrintHead();
+ PrintOut(LOG_CRIT, "Option '--warn-as-user=restricted' is not effective if the current user\n");
+ PrintOut(LOG_CRIT, "is the local 'SYSTEM' or 'Administrator' account\n\n");
+ return EXIT_BADCMD;
+ }
+#endif
+
+ // Read or init drive database
+ {
+ unsigned char savedebug = debugmode; debugmode = 1;
+ if (!init_drive_database(use_default_db))
+ return EXIT_BADCMD;
+ debugmode = savedebug;
+ }
+
+ // Check option compatibility of notify support
+ // cppcheck-suppress knownConditionTrueFalse
+ if (!notify_post_init())
+ return EXIT_BADCMD;
+
+ // print header, don't write Copyright line to syslog
+ PrintOut(LOG_INFO, "%s\n", format_version_info("smartd", (debugmode ? 2 : 1)).c_str());
+
+ // No error, continue in main_worker()
+ return -1;
+}
+
+// Function we call if no configuration file was found or if the
+// SCANDIRECTIVE Directive was found. It makes entries for device
+// names returned by scan_smart_devices() in os_OSNAME.cpp
+static int MakeConfigEntries(const dev_config & base_cfg,
+ dev_config_vector & conf_entries, smart_device_list & scanned_devs,
+ const smart_devtype_list & types)
+{
+ // make list of devices
+ smart_device_list devlist;
+ if (!smi()->scan_smart_devices(devlist, types)) {
+ PrintOut(LOG_CRIT, "DEVICESCAN failed: %s\n", smi()->get_errmsg());
+ return 0;
+ }
+
+ // if no devices, return
+ if (devlist.size() == 0)
+ return 0;
+
+ // add empty device slots for existing config entries
+ while (scanned_devs.size() < conf_entries.size())
+ scanned_devs.push_back((smart_device *)0);
+
+ // loop over entries to create
+ for (unsigned i = 0; i < devlist.size(); i++) {
+ // Move device pointer
+ smart_device * dev = devlist.release(i);
+ scanned_devs.push_back(dev);
+
+ // Append configuration and update names
+ conf_entries.push_back(base_cfg);
+ dev_config & cfg = conf_entries.back();
+ cfg.name = dev->get_info().info_name;
+ cfg.dev_name = dev->get_info().dev_name;
+
+ // Set type only if scanning is limited to specific types
+ // This is later used to set SMARTD_DEVICETYPE environment variable
+ if (!types.empty())
+ cfg.dev_type = dev->get_info().dev_type;
+ else // SMARTD_DEVICETYPE=auto
+ cfg.dev_type.clear();
+ }
+
+ return devlist.size();
+}
+
+// Returns negative value (see ParseConfigFile()) if config file
+// had errors, else number of entries which may be zero or positive.
+static int ReadOrMakeConfigEntries(dev_config_vector & conf_entries, smart_device_list & scanned_devs)
+{
+ // parse configuration file configfile (normally /etc/smartd.conf)
+ smart_devtype_list scan_types;
+ int entries = ParseConfigFile(conf_entries, scan_types);
+
+ if (entries < 0) {
+ // There was an error reading the configuration file.
+ conf_entries.clear();
+ if (entries == -1)
+ PrintOut(LOG_CRIT, "Configuration file %s has fatal syntax errors.\n", configfile);
+ return entries;
+ }
+
+ // no error parsing config file.
+ if (entries) {
+ // we did not find a SCANDIRECTIVE and did find valid entries
+ PrintOut(LOG_INFO, "Configuration file %s parsed.\n", configfile);
+ }
+ else if (!conf_entries.empty()) {
+ // we found a SCANDIRECTIVE or there was no configuration file so
+ // scan. Configuration file's last entry contains all options
+ // that were set
+ dev_config first = conf_entries.back();
+ conf_entries.pop_back();
+
+ if (first.lineno)
+ PrintOut(LOG_INFO,"Configuration file %s was parsed, found %s, scanning devices\n", configfile, SCANDIRECTIVE);
+ else
+ PrintOut(LOG_INFO,"No configuration file %s found, scanning devices\n", configfile);
+
+ // make config list of devices to search for
+ MakeConfigEntries(first, conf_entries, scanned_devs, scan_types);
+
+ // warn user if scan table found no devices
+ if (conf_entries.empty())
+ PrintOut(LOG_CRIT,"In the system's table of devices NO devices found to scan\n");
+ }
+ else
+ PrintOut(LOG_CRIT, "Configuration file %s parsed but has no entries\n", configfile);
+
+ return conf_entries.size();
+}
+
+// Register one device, return false on error
+static bool register_device(dev_config & cfg, dev_state & state, smart_device_auto_ptr & dev,
+ const dev_config_vector * prev_cfgs)
+{
+ bool scanning;
+ if (!dev) {
+ // Get device of appropriate type
+ dev = smi()->get_smart_device(cfg.name.c_str(), cfg.dev_type.c_str());
+ if (!dev) {
+ if (cfg.dev_type.empty())
+ PrintOut(LOG_INFO, "Device: %s, unable to autodetect device type\n", cfg.name.c_str());
+ else
+ PrintOut(LOG_INFO, "Device: %s, unsupported device type '%s'\n", cfg.name.c_str(), cfg.dev_type.c_str());
+ return false;
+ }
+ scanning = false;
+ }
+ else {
+ // Use device from device scan
+ scanning = true;
+ }
+
+ // Save old info
+ smart_device::device_info oldinfo = dev->get_info();
+
+ // Open with autodetect support, may return 'better' device
+ dev.replace( dev->autodetect_open() );
+
+ // Report if type has changed
+ if (oldinfo.dev_type != dev->get_dev_type())
+ PrintOut(LOG_INFO, "Device: %s, type changed from '%s' to '%s'\n",
+ cfg.name.c_str(), oldinfo.dev_type.c_str(), dev->get_dev_type());
+
+ // Return if autodetect_open() failed
+ if (!dev->is_open()) {
+ if (debugmode || !scanning)
+ PrintOut(LOG_INFO, "Device: %s, open() failed: %s\n", dev->get_info_name(), dev->get_errmsg());
+ return false;
+ }
+
+ // Update informal name
+ cfg.name = dev->get_info().info_name;
+ PrintOut(LOG_INFO, "Device: %s, opened\n", cfg.name.c_str());
+
+ int status;
+ const char * typemsg;
+ // register ATA device
+ if (dev->is_ata()){
+ typemsg = "ATA";
+ status = ATADeviceScan(cfg, state, dev->to_ata(), prev_cfgs);
+ }
+ // or register SCSI device
+ else if (dev->is_scsi()){
+ typemsg = "SCSI";
+ status = SCSIDeviceScan(cfg, state, dev->to_scsi(), prev_cfgs);
+ }
+ // or register NVMe device
+ else if (dev->is_nvme()) {
+ typemsg = "NVMe";
+ status = NVMeDeviceScan(cfg, state, dev->to_nvme(), prev_cfgs);
+ }
+ else {
+ PrintOut(LOG_INFO, "Device: %s, neither ATA, SCSI nor NVMe device\n", cfg.name.c_str());
+ return false;
+ }
+
+ if (status) {
+ if (!scanning || debugmode) {
+ if (cfg.lineno)
+ PrintOut(scanning ? LOG_INFO : LOG_CRIT,
+ "Unable to register %s device %s at line %d of file %s\n",
+ typemsg, cfg.name.c_str(), cfg.lineno, configfile);
+ else
+ PrintOut(LOG_INFO, "Unable to register %s device %s\n",
+ typemsg, cfg.name.c_str());
+ }
+
+ return false;
+ }
+
+ return true;
+}
+
+// This function tries devices from conf_entries. Each one that can be
+// registered is moved onto the [ata|scsi]devices lists and removed
+// from the conf_entries list.
+static bool register_devices(const dev_config_vector & conf_entries, smart_device_list & scanned_devs,
+ dev_config_vector & configs, dev_state_vector & states, smart_device_list & devices)
+{
+ // start by clearing lists/memory of ALL existing devices
+ configs.clear();
+ devices.clear();
+ states.clear();
+
+ // Map of already seen non-DEVICESCAN devices (unique_name -> cfg.name)
+ typedef std::map<std::string, std::string> prev_unique_names_map;
+ prev_unique_names_map prev_unique_names;
+
+ // Register entries
+ for (unsigned i = 0; i < conf_entries.size(); i++) {
+ dev_config cfg = conf_entries[i];
+
+ // Get unique device "name [type]" (with symlinks resolved) for duplicate detection
+ std::string unique_name = smi()->get_unique_dev_name(cfg.dev_name.c_str(), cfg.dev_type.c_str());
+ if (debugmode && unique_name != cfg.dev_name) {
+ pout("Device: %s%s%s%s, unique name: %s\n", cfg.name.c_str(),
+ (!cfg.dev_type.empty() ? " [" : ""), cfg.dev_type.c_str(),
+ (!cfg.dev_type.empty() ? "]" : ""), unique_name.c_str());
+ }
+
+ if (cfg.ignore) {
+ // Store for duplicate detection and ignore
+ PrintOut(LOG_INFO, "Device: %s%s%s%s, ignored\n", cfg.name.c_str(),
+ (!cfg.dev_type.empty() ? " [" : ""), cfg.dev_type.c_str(),
+ (!cfg.dev_type.empty() ? "]" : ""));
+ prev_unique_names[unique_name] = cfg.name;
+ continue;
+ }
+
+ smart_device_auto_ptr dev;
+
+ // Device may already be detected during devicescan
+ bool scanning = false;
+ if (i < scanned_devs.size()) {
+ dev = scanned_devs.release(i);
+ if (dev) {
+ // Check for a preceding non-DEVICESCAN entry for the same device
+ prev_unique_names_map::iterator ui = prev_unique_names.find(unique_name);
+ if (ui != prev_unique_names.end()) {
+ bool ne = (ui->second != cfg.name);
+ PrintOut(LOG_INFO, "Device: %s, %s%s, ignored\n", dev->get_info_name(),
+ (ne ? "same as " : "duplicate"), (ne ? ui->second.c_str() : ""));
+ continue;
+ }
+ scanning = true;
+ }
+ }
+
+ // Prevent systemd unit startup timeout when registering many devices
+ notify_extend_timeout();
+
+ // Register device
+ // If scanning, pass dev_idinfo of previous devices for duplicate check
+ dev_state state;
+ if (!register_device(cfg, state, dev, (scanning ? &configs : 0))) {
+ // if device is explicitly listed and we can't register it, then
+ // exit unless the user has specified that the device is removable
+ if (!scanning) {
+ if (!(cfg.removable || quit == QUIT_NEVER)) {
+ PrintOut(LOG_CRIT, "Unable to register device %s (no Directive -d removable). Exiting.\n",
+ cfg.name.c_str());
+ return false;
+ }
+ PrintOut(LOG_INFO, "Device: %s, not available\n", cfg.name.c_str());
+ // Prevent retry of registration
+ prev_unique_names[unique_name] = cfg.name;
+ }
+ continue;
+ }
+
+ // move onto the list of devices
+ configs.push_back(cfg);
+ states.push_back(state);
+ devices.push_back(dev);
+ if (!scanning)
+ // Store for duplicate detection
+ prev_unique_names[unique_name] = cfg.name;
+ }
+
+ // Set minimum check time and factors for staggered tests
+ checktime_min = 0;
+ unsigned factor = 0;
+ for (auto & cfg : configs) {
+ if (cfg.checktime && (!checktime_min || checktime_min > cfg.checktime))
+ checktime_min = cfg.checktime;
+ if (!cfg.test_regex.empty())
+ cfg.test_offset_factor = factor++;
+ }
+ if (checktime_min && checktime_min > checktime)
+ checktime_min = checktime;
+
+ init_disable_standby_check(configs);
+ return true;
+}
+
+
+// Main program without exception handling
+static int main_worker(int argc, char **argv)
+{
+ // Initialize interface
+ smart_interface::init();
+ if (!smi())
+ return 1;
+
+ // Check whether systemd notify is supported and enabled
+ notify_init();
+
+ // parse input and print header and usage info if needed
+ int status = parse_options(argc,argv);
+ if (status >= 0)
+ return status;
+
+ // Configuration for each device
+ dev_config_vector configs;
+ // Device states
+ dev_state_vector states;
+ // Devices to monitor
+ smart_device_list devices;
+
+ // Drop capabilities if supported and enabled
+ capabilities_drop_now();
+
+ notify_msg("Initializing ...");
+
+ // the main loop of the code
+ bool firstpass = true, write_states_always = true;
+ time_t wakeuptime = 0;
+ // assert(status < 0);
+ do {
+ // Should we (re)read the config file?
+ if (firstpass || caughtsigHUP){
+ if (!firstpass) {
+ // Write state files
+ if (!state_path_prefix.empty())
+ write_all_dev_states(configs, states);
+
+ PrintOut(LOG_INFO,
+ caughtsigHUP==1?
+ "Signal HUP - rereading configuration file %s\n":
+ "\a\nSignal INT - rereading configuration file %s (" SIGQUIT_KEYNAME " quits)\n\n",
+ configfile);
+ notify_msg("Reloading ...");
+ }
+
+ {
+ dev_config_vector conf_entries; // Entries read from smartd.conf
+ smart_device_list scanned_devs; // Devices found during scan
+ // (re)reads config file, makes >=0 entries
+ int entries = ReadOrMakeConfigEntries(conf_entries, scanned_devs);
+
+ if (entries>=0) {
+ // checks devices, then moves onto ata/scsi list or deallocates.
+ if (!register_devices(conf_entries, scanned_devs, configs, states, devices)) {
+ status = EXIT_BADDEV;
+ break;
+ }
+ if (!(configs.size() == devices.size() && configs.size() == states.size()))
+ throw std::logic_error("Invalid result from RegisterDevices");
+ }
+ else if ( quit == QUIT_NEVER
+ || ((quit == QUIT_NODEV || quit == QUIT_NODEVSTARTUP) && !firstpass)) {
+ // user has asked to continue on error in configuration file
+ if (!firstpass)
+ PrintOut(LOG_INFO,"Reusing previous configuration\n");
+ }
+ else {
+ // exit with configuration file error status
+ status = (entries == -3 ? EXIT_READCONF : entries == -2 ? EXIT_NOCONF : EXIT_BADCONF);
+ break;
+ }
+ }
+
+ if (!( devices.size() > 0 || quit == QUIT_NEVER
+ || (quit == QUIT_NODEVSTARTUP && !firstpass))) {
+ status = (!quit_nodev0 ? EXIT_NODEV : 0);
+ PrintOut((status ? LOG_CRIT : LOG_INFO),
+ "Unable to monitor any SMART enabled devices. Exiting.\n");
+ break;
+ }
+
+ // Log number of devices we are monitoring...
+ int numata = 0, numscsi = 0;
+ for (unsigned i = 0; i < devices.size(); i++) {
+ const smart_device * dev = devices.at(i);
+ if (dev->is_ata())
+ numata++;
+ else if (dev->is_scsi())
+ numscsi++;
+ }
+ PrintOut(LOG_INFO, "Monitoring %d ATA/SATA, %d SCSI/SAS and %d NVMe devices\n",
+ numata, numscsi, (int)devices.size() - numata - numscsi);
+
+ if (quit == QUIT_SHOWTESTS) {
+ // user has asked to print test schedule
+ PrintTestSchedule(configs, states, devices);
+ // assert(firstpass);
+ return 0;
+ }
+
+ // reset signal
+ caughtsigHUP=0;
+
+ // Always write state files after (re)configuration
+ write_states_always = true;
+ }
+
+ // check all devices once,
+ // self tests are not started in first pass unless '-q onecheck' is specified
+ notify_check((int)devices.size());
+ CheckDevicesOnce(configs, states, devices, firstpass, (!firstpass || quit == QUIT_ONECHECK));
+
+ // Write state files
+ if (!state_path_prefix.empty())
+ write_all_dev_states(configs, states, write_states_always);
+ write_states_always = false;
+
+ // Write attribute logs
+ if (!attrlog_path_prefix.empty())
+ write_all_dev_attrlogs(configs, states);
+
+ // user has asked us to exit after first check
+ if (quit == QUIT_ONECHECK) {
+ PrintOut(LOG_INFO,"Started with '-q onecheck' option. All devices successfully checked once.\n"
+ "smartd is exiting (exit status 0)\n");
+ // assert(firstpass);
+ return 0;
+ }
+
+ if (firstpass) {
+ if (!debugmode) {
+ // fork() into background if needed, close ALL file descriptors,
+ // redirect stdin, stdout, and stderr, chdir to "/".
+ status = daemon_init();
+ if (status >= 0)
+ return status;
+
+ // Write PID file if configured
+ if (!write_pid_file())
+ return EXIT_PID;
+ }
+
+ // Set exit and signal handlers
+ install_signal_handlers();
+
+ // Initialize wakeup time to CURRENT time
+ wakeuptime = time(nullptr);
+
+ firstpass = false;
+ }
+
+ // sleep until next check time, or a signal arrives
+ wakeuptime = dosleep(wakeuptime, configs, states, write_states_always);
+
+ } while (!caughtsigEXIT);
+
+ if (caughtsigEXIT && status < 0) {
+ // Loop exited on signal
+ if (caughtsigEXIT == SIGTERM || (debugmode && caughtsigEXIT == SIGQUIT)) {
+ PrintOut(LOG_INFO, "smartd received signal %d: %s\n",
+ caughtsigEXIT, strsignal(caughtsigEXIT));
+ }
+ else {
+ // Unexpected SIGINT or SIGQUIT
+ PrintOut(LOG_CRIT, "smartd received unexpected signal %d: %s\n",
+ caughtsigEXIT, strsignal(caughtsigEXIT));
+ status = EXIT_SIGNAL;
+ }
+ }
+
+ // Status unset above implies success
+ if (status < 0)
+ status = 0;
+
+ if (!firstpass) {
+ // Loop exited after daemon_init() and write_pid_file()
+
+ // Write state files only on normal exit
+ if (!status && !state_path_prefix.empty())
+ write_all_dev_states(configs, states);
+
+ // Delete PID file, if one was created
+ if (!pid_file.empty() && unlink(pid_file.c_str()))
+ PrintOut(LOG_CRIT,"Can't unlink PID file %s (%s).\n",
+ pid_file.c_str(), strerror(errno));
+ }
+
+ PrintOut((status ? LOG_CRIT : LOG_INFO), "smartd is exiting (exit status %d)\n", status);
+ return status;
+}
+
+
+#ifndef _WIN32
+// Main program
+int main(int argc, char **argv)
+#else
+// Windows: internal main function started direct or by service control manager
+static int smartd_main(int argc, char **argv)
+#endif
+{
+ int status;
+ try {
+ // Do the real work ...
+ status = main_worker(argc, argv);
+ }
+ catch (const std::bad_alloc & /*ex*/) {
+ // Memory allocation failed (also thrown by std::operator new)
+ PrintOut(LOG_CRIT, "Smartd: Out of memory\n");
+ status = EXIT_NOMEM;
+ }
+ catch (const std::exception & ex) {
+ // Other fatal errors
+ PrintOut(LOG_CRIT, "Smartd: Exception: %s\n", ex.what());
+ status = EXIT_BADCODE;
+ }
+
+ // Check for remaining device objects
+ if (smart_device::get_num_objects() != 0) {
+ PrintOut(LOG_CRIT, "Smartd: Internal Error: %d device object(s) left at exit.\n",
+ smart_device::get_num_objects());
+ status = EXIT_BADCODE;
+ }
+
+ if (status == EXIT_BADCODE)
+ PrintOut(LOG_CRIT, "Please inform " PACKAGE_BUGREPORT ", including output of smartd -V.\n");
+
+ notify_exit(status);
+#ifdef _WIN32
+ daemon_winsvc_exitcode = status;
+#endif
+ return status;
+}
+
+
+#ifdef _WIN32
+// Main function for Windows
+int main(int argc, char **argv){
+ // Options for smartd windows service
+ static const daemon_winsvc_options svc_opts = {
+ "--service", // cmd_opt
+ "smartd", "SmartD Service", // servicename, displayname
+ // description
+ "Controls and monitors storage devices using the Self-Monitoring, "
+ "Analysis and Reporting Technology System (SMART) built into "
+ "ATA/SATA and SCSI/SAS hard drives and solid-state drives. "
+ "www.smartmontools.org"
+ };
+ // daemon_main() handles daemon and service specific commands
+ // and starts smartd_main() direct, from a new process,
+ // or via service control manager
+ return daemon_main("smartd", &svc_opts , smartd_main, argc, argv);
+}
+#endif