diff options
Diffstat (limited to 'src/hooks/dhcp/high_availability/communication_state.h')
-rw-r--r-- | src/hooks/dhcp/high_availability/communication_state.h | 846 |
1 files changed, 846 insertions, 0 deletions
diff --git a/src/hooks/dhcp/high_availability/communication_state.h b/src/hooks/dhcp/high_availability/communication_state.h new file mode 100644 index 0000000..615a596 --- /dev/null +++ b/src/hooks/dhcp/high_availability/communication_state.h @@ -0,0 +1,846 @@ +// Copyright (C) 2018-2022 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef HA_COMMUNICATION_STATE_H +#define HA_COMMUNICATION_STATE_H + +#include <ha_config.h> +#include <ha_service_states.h> +#include <asiolink/interval_timer.h> +#include <asiolink/io_service.h> +#include <cc/data.h> +#include <dhcp/pkt.h> + +#include <boost/date_time/posix_time/posix_time.hpp> +#include <boost/multi_index_container.hpp> +#include <boost/multi_index/composite_key.hpp> +#include <boost/multi_index/hashed_index.hpp> +#include <boost/multi_index/indexed_by.hpp> +#include <boost/multi_index/member.hpp> +#include <boost/multi_index/ordered_index.hpp> +#include <boost/scoped_ptr.hpp> +#include <boost/shared_ptr.hpp> + +#include <functional> +#include <map> +#include <mutex> +#include <set> +#include <string> +#include <utility> + +namespace isc { +namespace ha { + +/// @brief Holds communication state between the two HA peers. +/// +/// The HA service constantly monitors the state of the connection between +/// the two peers. If the connection is lost it is an indicator that +/// the partner server may be down and failover actions should be triggered. +/// +/// A heartbeat command successfully sent over the control channel is an +/// indicator that the connection is healthy. A reply to the heartbeat +/// command includes information about the recipient state, its notion of +/// time, and other information useful for determining its health and +/// current activity. +/// +/// This class uses an interval timer to run heartbeat commands over the +/// control channel. The implementation of the heartbeat is external to +/// this class and is provided via @c CommunicationState::startHeartbeat +/// method. This implementation is required to run the @c poke method +/// in case of receiving a successful response to the heartbeat command. +/// +/// The @c poke method sets the "last poke time" to current time, thus +/// indicating that the connection is healthy. The @c getDurationInMillisecs +/// method is used to check for how long the server hasn't been able +/// to communicate with the partner. This duration is simply a time +/// elapsed since last successful poke time. If this duration becomes +/// greater than the configured threshold, the server assumes that the +/// communication with the partner is interrupted. +/// +/// The derivations of this class provide DHCPv4 and DHCPv6 specific +/// mechanisms for detecting server failures based on the analysis of +/// the received DHCP messages, i.e. how long the clients have been +/// trying to communicate with the partner and message types they sent. +/// In particular, the increased number of Rebind messages may indicate +/// issues with the DHCP server. +/// +/// This class is also used to monitor the clock skew between the active +/// servers. Maintaining a reasonably low clock skew is essential for the +/// HA service to function properly. This class calculates the clock +/// skew by comparing local time of the server with the time returned by +/// the partner in response to a heartbeat command. If this value exceeds +/// the certain thresholds, the CommunicationState::clockSkewShouldWarn +/// and the @c CommuicationState::clockSkewShouldTerminate indicate +/// whether the HA service should continue to operate normally, should +/// start issuing a warning about high clock skew or simply enter the +/// "terminated" state refusing to further operate until the clocks +/// are synchronized. This requires administrative intervention and the +/// restart of the HA service. +class CommunicationState { +public: + + /// @brief Constructor. + /// + /// @param io_service pointer to the common IO service instance. + /// @param config pointer to the HA configuration. + CommunicationState(const asiolink::IOServicePtr& io_service, + const HAConfigPtr& config); + + /// @brief Destructor. + /// + /// Stops scheduled heartbeat. + virtual ~CommunicationState(); + + /// @brief Returns last known state of the partner. + /// + /// @return Partner's state if it is known, or a negative value otherwise. + int getPartnerState() const; + + /// @brief Sets partner state. + /// + /// @param state new partner's state in a textual form. Supported values are + /// those returned in response to a ha-heartbeat command. + /// @throw BadValue if unsupported state value was provided. + void setPartnerState(const std::string& state); + +private: + /// @brief Sets partner state. + /// + /// @param state new partner's state in a textual form. Supported values are + /// those returned in response to a ha-heartbeat command. + /// @throw BadValue if unsupported state value was provided. + void setPartnerStateInternal(const std::string& state); + +public: + /// @brief Returns scopes served by the partner server. + /// + /// @return A set of scopes served by the partner. + std::set<std::string> getPartnerScopes() const; + + /// @brief Sets partner scopes. + /// + /// @param new_scopes Partner scopes enclosed in a JSON list. + void setPartnerScopes(data::ConstElementPtr new_scopes); + +private: + /// @brief Sets partner scopes. + /// + /// @param new_scopes Partner scopes enclosed in a JSON list. + void setPartnerScopesInternal(data::ConstElementPtr new_scopes); + +public: + /// @brief Starts recurring heartbeat (public interface). + /// + /// @param interval heartbeat interval in milliseconds. + /// @param heartbeat_impl pointer to the heartbeat implementation + /// function. + void startHeartbeat(const long interval, + const std::function<void()>& heartbeat_impl); + + /// @brief Stops recurring heartbeat. + void stopHeartbeat(); + +private: + /// @brief Starts recurring heartbeat. + /// + /// @param interval heartbeat interval in milliseconds. + /// @param heartbeat_impl pointer to the heartbeat implementation + /// function. + void startHeartbeatInternal(const long interval = 0, + const std::function<void()>& heartbeat_impl = 0); + + /// @brief Stops recurring heartbeat. + void stopHeartbeatInternal(); + +public: + /// @brief Checks if recurring heartbeat is running. + /// + /// @return true if heartbeat is running, false otherwise. + bool isHeartbeatRunning() const; + + /// @brief Pokes the communication state. + /// + /// Sets the last poke time to current time. If the heartbeat timer + /// has been scheduled, it is reset (starts over measuring the time + /// to the next heartbeat). + void poke(); + +private: + /// @brief Pokes the communication state. + /// + /// Sets the last poke time to current time. If the heartbeat timer + /// has been scheduled, it is reset (starts over measuring the time + /// to the next heartbeat). + void pokeInternal(); + +public: + /// @brief Returns duration between the poke time and current time. + /// + /// @return Duration between the poke time and current time. + int64_t getDurationInMillisecs() const; + + /// @brief Checks if communication with the partner is interrupted. + /// + /// This method checks if the communication with the partner appears + /// to be interrupted. This is the case when the time since last + /// successful communication is longer than the configured + /// max-response-delay value. + /// + /// @return true if communication is interrupted, false otherwise. + bool isCommunicationInterrupted() const; + + /// @brief Checks if the DHCP message appears to be unanswered. + /// + /// This method is used to provide the communication state with a + /// received DHCP message directed to the HA partner, to detect + /// if the partner fails to answer DHCP messages directed to it. + /// The DHCPv4 and DHCPv6 specific derivations implement this + /// functionality. + /// + /// This check is orthogonal to the heartbeat mechanism and is + /// usually triggered after several consecutive heartbeats fail + /// to be responded. + /// + /// The general approach to server failure detection is based on the + /// analysis of the "secs" field value (DHCPv4) and "elapsed time" + /// option value (DHCPv6). They indicate for how long the client + /// has been trying to complete the DHCP transaction. If these + /// values exceed a configured threshold, the client is considered + /// to fail to communicate with the server. This fact is recorded + /// by this object. If the number of distinct clients failing to + /// communicate with the partner exceeds a configured maximum + /// value, this server considers the partner to be offline. In this + /// case, this server will most likely start serving clients + /// which would normally be served by the partner. + /// + /// All information gathered by this method is cleared when the + /// @c poke method is invoked. + /// + /// @param message DHCP message to be analyzed. This must be the + /// message which belongs to the partner, i.e. the caller must + /// filter out messages belonging to the partner prior to calling + /// this method. + virtual void analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message) = 0; + + /// @brief Returns the number of analyzed messages while being in the + /// communications interrupted state. + /// + /// @return Number of analyzed messages. It includes retransmissions by + /// the same clients. + size_t getAnalyzedMessagesCount() const; + + /// @brief Checks if the partner failure has been detected based + /// on the DHCP traffic analysis. + /// + /// In the special case when max-unacked-clients is set to 0 this + /// method always returns true. Note that max-unacked-clients + /// set to 0 means that failure detection is not really performed. + /// Returning true in that case simplifies the code of the + /// @c HAService which doesn't need to check if the failure detection + /// is enabled or not. It simply calls this method in the + /// 'communications interrupted' situation to check if the + /// server should be transitioned to the 'partner-down' state. + /// + /// @return true if the partner failure has been detected, false + /// otherwise. + virtual bool failureDetected() const = 0; + + /// @brief Returns the current number of clients which attempted + /// to get a lease from the partner server. + /// + /// The returned number is reset to 0 when the server successfully + /// establishes communication with the partner. The number is + /// incremented only in the communications interrupted case. + /// + /// @return The number of clients including unacked clients. + virtual size_t getConnectingClientsCount() const = 0; + + /// @brief Returns the current number of clients which haven't got + /// the lease from the partner server. + /// + /// The returned number is reset to 0 when the server successfully + /// establishes communication with the partner. The number is + /// incremented only in the communications interrupted case. + /// + /// @return Number of unacked clients. + virtual size_t getUnackedClientsCount() const = 0; + +protected: + + /// @brief Removes information about the clients the partner server + /// should respond to while communication with the partner was + /// interrupted. + /// + /// This information is cleared by the @c CommunicationState::poke. + /// The derivations of this class must provide DHCPv4 and DHCPv6 specific + /// implementations of this method. The @c poke method is called to + /// indicate that the connection has been successfully (re)established. + /// Therefore the clients counters are reset and the failure detection + /// procedure starts over. + /// + /// See @c CommunicationState::analyzeMessage for details. + virtual void clearConnectingClients() = 0; + +public: + + /// @brief Issues a warning about high clock skew between the active + /// servers if one is warranted. + /// + /// The HA service monitors the clock skew between the active servers. The + /// clock skew is calculated from the local time and the time returned by + /// the partner in response to a heartbeat. When clock skew exceeds a certain + /// threshold the HA service starts issuing a warning message. This method + /// returns true if the HA service should issue this message. + /// + /// Currently, the warning threshold for the clock skew is hardcoded to + /// 30 seconds. In the future it may become configurable. + /// + /// This method is called for each heartbeat. If we issue a warning for each + /// heartbeat it may flood logs with those messages. This method provides + /// a gating mechanism which prevents the HA service from logging the + /// warning more often than every 60 seconds. If the last warning was issued + /// less than 60 seconds ago this method will return false even if the clock + /// skew exceeds the 30 seconds threshold. The correction of the clock skew + /// will reset the gating counter. + /// + /// @return true if the warning message should be logged because of the clock + /// skew exceeding a warning threshold. + bool clockSkewShouldWarn(); + +private: + /// @brief Issues a warning about high clock skew between the active + /// servers if one is warranted. + /// + /// The HA service monitors the clock skew between the active servers. The + /// clock skew is calculated from the local time and the time returned by + /// the partner in response to a heartbeat. When clock skew exceeds a certain + /// threshold the HA service starts issuing a warning message. This method + /// returns true if the HA service should issue this message. + /// + /// Currently, the warning threshold for the clock skew is hardcoded to + /// 30 seconds. In the future it may become configurable. + /// + /// This method is called for each heartbeat. If we issue a warning for each + /// heartbeat it may flood logs with those messages. This method provides + /// a gating mechanism which prevents the HA service from logging the + /// warning more often than every 60 seconds. If the last warning was issued + /// less than 60 seconds ago this method will return false even if the clock + /// skew exceeds the 30 seconds threshold. The correction of the clock skew + /// will reset the gating counter. + /// + /// @return true if the warning message should be logged because of the clock + /// skew exceeding a warning threshold. + bool clockSkewShouldWarnInternal(); + +public: + /// @brief Indicates whether the HA service should enter "terminated" + /// state as a result of the clock skew exceeding maximum value. + /// + /// If the clocks on the active servers are not synchronized (perhaps as + /// a result of a warning message caused by @c clockSkewShouldWarn) and the + /// clocks further drift, the clock skew may exceed another threshold which + /// should cause the HA service to enter "terminated" state. In this state + /// the servers still respond to DHCP clients normally, but they will neither + /// send lease updates nor heartbeats. In this case, the administrator must + /// correct the problem (synchronize the clocks) and restart the service. + /// This method indicates whether the service should terminate or not. + /// + /// Currently, the terminal threshold for the clock skew is hardcoded to + /// 60 seconds. In the future it may become configurable. + /// + /// @return true if the HA service should enter "terminated" state. + bool clockSkewShouldTerminate() const; + +private: + /// @brief Indicates whether the HA service should enter "terminated" + /// state as a result of the clock skew exceeding maximum value. + /// + /// If the clocks on the active servers are not synchronized (perhaps as + /// a result of a warning message caused by @c clockSkewShouldWarn) and the + /// clocks further drift, the clock skew may exceed another threshold which + /// should cause the HA service to enter "terminated" state. In this state + /// the servers still respond to DHCP clients normally, but they will neither + /// send lease updates nor heartbeats. In this case, the administrator must + /// correct the problem (synchronize the clocks) and restart the service. + /// This method indicates whether the service should terminate or not. + /// + /// Currently, the terminal threshold for the clock skew is hardcoded to + /// 60 seconds. In the future it may become configurable. + /// + /// @return true if the HA service should enter "terminated" state. + bool clockSkewShouldTerminateInternal() const; + + /// @brief Checks if the clock skew is greater than the specified number + /// of seconds. + /// + /// @param seconds a positive value to compare the clock skew with. + /// @return true if the absolute clock skew is greater than the specified + /// number of seconds, false otherwise. + bool isClockSkewGreater(const long seconds) const; + +public: + + /// @brief Provide partner's notion of time so the new clock skew can be + /// calculated. + /// + /// @param time_text Partner's time received in response to a heartbeat. The + /// time must be provided in the RFC 1123 format. It stores the current + /// time, partner's time, and the difference (skew) between them. + /// + /// @throw isc::http::HttpTimeConversionError if the time format is invalid. + /// + /// @todo Consider some other time formats which include millisecond + /// precision. + void setPartnerTime(const std::string& time_text); + +private: + /// @brief Provide partner's notion of time so the new clock skew can be + /// calculated. + /// + /// @param time_text Partner's time received in response to a heartbeat. The + /// time must be provided in the RFC 1123 format. It stores the current + /// time, partner's time, and the difference (skew) between them. + /// + /// @throw isc::http::HttpTimeConversionError if the time format is invalid. + /// + /// @todo Consider some other time formats which include millisecond + /// precision. + void setPartnerTimeInternal(const std::string& time_text); + +public: + /// @brief Returns current clock skew value in the logger friendly format. + std::string logFormatClockSkew() const; + +private: + /// @brief Returns current clock skew value in the logger friendly format. + std::string logFormatClockSkewInternal() const; + +public: + /// @brief Returns the report about current communication state. + /// + /// This function returns a JSON map describing the state of communication + /// with a partner. This report is included in the response to the + /// status-get command. + /// + /// @return JSON element holding the report. + data::ElementPtr getReport() const; + + /// @brief Modifies poke time by adding seconds to it. + /// + /// Used in unittests only. + /// + /// @param secs number of seconds to be added to the poke time. If + /// the value is negative it will set the poke time in the past + /// comparing to current value. + void modifyPokeTime(const long secs); + +private: + + /// @brief Returns duration between the poke time and current time. + /// + /// Should be called in a thread safe context. + /// + /// @return Duration between the poke time and current time. + int64_t getDurationInMillisecsInternal() const; + +protected: + /// @brief Update the poke time and compute the duration. + /// + /// @return The time elapsed. + boost::posix_time::time_duration updatePokeTime(); + +private: + /// @brief Update the poke time and compute the duration. + /// + /// Should be called in a thread safe context. + /// + /// @return The time elapsed. + boost::posix_time::time_duration updatePokeTimeInternal(); + +public: + + /// @brief Returns a total number of unsent lease updates. + uint64_t getUnsentUpdateCount() const; + + /// @brief Increases a total number of unsent lease updates by 1. + /// + /// This method should be called when the server has allocated a + /// lease but decided to not send the lease update to its partner. + /// If the server is in the partner-down state it allocates new + /// leases but doesn't send lease updates because the partner is + /// unavailable. + /// + /// This method protects against setting the value to 0 in an + /// unlikely event of the overflow. The zero is reserved for the + /// server startup case. + void increaseUnsentUpdateCount(); + +private: + + /// @brief Thread unsafe implementation of the @c increaseUnsentUpdateCount. + void increaseUnsentUpdateCountInternal(); + +public: + + /// @brief Checks if the partner allocated new leases for which it hasn't sent + /// any lease updates. + /// + /// It compares a previous and current value of the @c partner_unsent_update_count_. + /// If the current value is 0 and the previous value is non-zero it indicates + /// that the partner was restarted. + /// + /// @return true if the partner has allocated new leases for which it didn't + /// send lease updates, false otherwise. + bool hasPartnerNewUnsentUpdates() const; + +private: + + /// @brief Thread unsafe implementation of the @c hasPartnerNewUnsentUpdates. + /// + /// @return true if the partner has allocated new leases for which it didn't + /// send lease updates, false otherwise. + bool hasPartnerNewUnsentUpdatesInternal() const; + +public: + + /// @brief Saves new total number of unsent lease updates from the partner. + /// + /// @param unsent_update_count new total number of unsent lease updates from + /// the partner. + void setPartnerUnsentUpdateCount(uint64_t unsent_update_count); + +private: + + /// @brief Thread unsafe implementation of the @c setPartnerUnsentUpdateCount. + /// + /// @param unsent_update_count new total number of unsent lease updates from + /// the partner. + void setPartnerUnsentUpdateCountInternal(uint64_t unsent_update_count); + +protected: + /// @brief Pointer to the common IO service instance. + asiolink::IOServicePtr io_service_; + + /// @brief High availability configuration. + HAConfigPtr config_; + + /// @brief Interval timer triggering heartbeat commands. + asiolink::IntervalTimerPtr timer_; + + /// @brief Interval specified for the heartbeat. + long interval_; + + /// @brief Last poke time. + boost::posix_time::ptime poke_time_; + + /// @brief Pointer to the function providing heartbeat implementation. + std::function<void()> heartbeat_impl_; + + /// @brief Last known state of the partner server. + /// + /// Negative value means that the partner's state is unknown. + int partner_state_; + + /// @brief Last known set of scopes served by the partner server. + std::set<std::string> partner_scopes_; + + /// @brief Clock skew between the active servers. + boost::posix_time::time_duration clock_skew_; + + /// @brief Holds a time when last warning about too high clock skew + /// was issued. + boost::posix_time::ptime last_clock_skew_warn_; + + /// @brief My time when skew was calculated. + boost::posix_time::ptime my_time_at_skew_; + + /// @brief Partner reported time when skew was calculated. + boost::posix_time::ptime partner_time_at_skew_; + + /// @brief Total number of analyzed messages to be responded by partner. + size_t analyzed_messages_count_; + + /// @brief Total number of unsent lease updates. + /// + /// The lease updates are not sent when the server is in the partner + /// down state. The server counts the number of lease updates which + /// haven't been sent to the partner because the partner was unavailable. + /// The partner receives this value in a response to a heartbeat message + /// and can use it to determine if it should synchronize its lease + /// database. + uint64_t unsent_update_count_; + + /// @brief Previous and current total number of unsent lease updates + /// from the partner. + /// + /// This value is returned in response to a heartbeat command and saved + /// using the @c setPartnerUnsentUpdateCount. The previous value is + /// preserved so the values can be compared in the state handlers. + std::pair<uint64_t, uint64_t> partner_unsent_update_count_; + + /// @brief The mutex used to protect internal state. + const boost::scoped_ptr<std::mutex> mutex_; +}; + +/// @brief Type of the pointer to the @c CommunicationState object. +typedef boost::shared_ptr<CommunicationState> CommunicationStatePtr; + + +/// @brief Holds communication state between DHCPv4 servers. +/// +/// This class implements DHCPv4 failure detection by monitoring the +/// value of the "secs" field in received DHCPv4 messages as described +/// in @c CommunicationState::analyzeMessage. +class CommunicationState4 : public CommunicationState { +public: + + /// @brief Constructor. + /// + /// @param io_service pointer to the common IO service instance. + /// @param config pointer to the HA configuration. + CommunicationState4(const asiolink::IOServicePtr& io_service, + const HAConfigPtr& config); + + /// @brief Checks if the DHCPv4 message appears to be unanswered. + /// + /// This method uses "secs" field value for detecting client + /// communication failures as described in the + /// @c CommunicationState::analyzeMessage. Some misbehaving Windows + /// clients were reported to swap "secs" field bytes. In this case + /// the first byte is set to non-zero byte and the second byte is + /// set to 0. This method handles such cases and corrects bytes + /// order before comparing against the threshold. + /// + /// @param message DHCPv4 message to be analyzed. This must be the + /// message which belongs to the partner, i.e. the caller must + /// filter out messages belonging to the partner prior to calling + /// this method. + virtual void analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message); + + /// @brief Checks if the partner failure has been detected based + /// on the DHCP traffic analysis. + /// + /// @return true if the partner failure has been detected, false + /// otherwise. + virtual bool failureDetected() const; + + /// @brief Returns the current number of clients which attempted + /// to get a lease from the partner server. + /// + /// The returned number is reset to 0 when the server successfully + /// establishes communication with the partner. The number is + /// incremented only in the communications interrupted case. + /// + /// @return The number of clients including unacked clients. + virtual size_t getConnectingClientsCount() const; + + /// @brief Returns the current number of clients which haven't gotten + /// a lease from the partner server. + /// + /// The returned number is reset to 0 when the server successfully + /// establishes communication with the partner. The number is + /// incremented only in the communications interrupted case. + /// + /// @return Number of unacked clients. + virtual size_t getUnackedClientsCount() const; + +protected: + + /// @brief Checks if the DHCPv4 message appears to be unanswered. + /// + /// Should be called in a thread safe context. + /// + /// This method uses "secs" field value for detecting client + /// communication failures as described in the + /// @c CommunicationState::analyzeMessage. Some misbehaving Windows + /// clients were reported to swap "secs" field bytes. In this case + /// the first byte is set to non-zero byte and the second byte is + /// set to 0. This method handles such cases and corrects bytes + /// order before comparing against the threshold. + /// + /// @param message DHCPv4 message to be analyzed. This must be the + /// message which belongs to the partner, i.e. the caller must + /// filter out messages belonging to the partner prior to calling + /// this method. + virtual void analyzeMessageInternal(const boost::shared_ptr<dhcp::Pkt>& message); + + /// @brief Checks if the partner failure has been detected based + /// on the DHCP traffic analysis. + /// + /// Should be called in a thread safe context. + /// + /// @return true if the partner failure has been detected, false + /// otherwise. + virtual bool failureDetectedInternal() const; + + /// @brief Removes information about the clients the partner server + /// should respond to while communication with the partner was + /// interrupted. + /// + /// See @c CommunicationState::analyzeMessage for details. + virtual void clearConnectingClients(); + + /// @brief Structure holding information about the client which has + /// send the packet being analyzed. + struct ConnectingClient4 { + std::vector<uint8_t> hwaddr_; + std::vector<uint8_t> clientid_; + bool unacked_; + }; + + /// @brief Multi index container holding information about the clients + /// attempting to get leases from the partner server. + typedef boost::multi_index_container< + ConnectingClient4, + boost::multi_index::indexed_by< + // First index is a composite index which allows to find a client + // by the HW address/client identifier tuple. + boost::multi_index::hashed_unique< + boost::multi_index::composite_key< + ConnectingClient4, + boost::multi_index::member<ConnectingClient4, std::vector<uint8_t>, + &ConnectingClient4::hwaddr_>, + boost::multi_index::member<ConnectingClient4, std::vector<uint8_t>, + &ConnectingClient4::clientid_> + > + >, + // Second index allows for counting all clients which are + // considered unacked. + boost::multi_index::ordered_non_unique< + boost::multi_index::member<ConnectingClient4, bool, &ConnectingClient4::unacked_> + > + > + > ConnectingClients4; + + /// @brief Holds information about the clients attempting to contact + /// the partner server while the servers are in communications + /// interrupted state. + ConnectingClients4 connecting_clients_; +}; + +/// @brief Pointer to the @c CommunicationState4 object. +typedef boost::shared_ptr<CommunicationState4> CommunicationState4Ptr; + +/// @brief Holds communication state between DHCPv6 servers. +/// +/// This class implements DHCPv6 failure detection by monitoring the +/// value of the "Elapsed Time" option in received DHCPv6 messages as described +/// in @c CommunicationState::analyzeMessage. +class CommunicationState6 : public CommunicationState { +public: + + /// @brief Constructor. + /// + /// @param io_service pointer to the common IO service instance. + /// @param config pointer to the HA configuration. + CommunicationState6(const asiolink::IOServicePtr& io_service, + const HAConfigPtr& config); + + /// @brief Checks if the DHCPv6 message appears to be unanswered. + /// + /// See @c CommunicationState::analyzeMessage for details. + /// + /// @param message DHCPv6 message to be analyzed. This must be the + /// message which belongs to the partner, i.e. the caller must + /// filter out messages belonging to the partner prior to calling + /// this method. + virtual void analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message); + + /// @brief Checks if the partner failure has been detected based + /// on the DHCP traffic analysis. + /// + /// @return true if the partner failure has been detected, false + /// otherwise. + virtual bool failureDetected() const; + + /// @brief Returns the current number of clients which attempted + /// to get a lease from the partner server. + /// + /// The returned number is reset to 0 when the server successfully + /// establishes communication with the partner. The number is + /// incremented only in the communications interrupted case. + /// + /// @return The number of clients including unacked clients. + virtual size_t getConnectingClientsCount() const; + + /// @brief Returns the current number of clients which haven't gotten + /// a lease from the partner server. + /// + /// The returned number is reset to 0 when the server successfully + /// establishes communication with the partner. The number is + /// incremented only in the communications interrupted case. + /// + /// @return Number of unacked clients. + virtual size_t getUnackedClientsCount() const; + +protected: + + /// @brief Checks if the DHCPv6 message appears to be unanswered. + /// + /// Should be called in a thread safe context. + /// + /// See @c CommunicationState::analyzeMessage for details. + /// + /// @param message DHCPv6 message to be analyzed. This must be the + /// message which belongs to the partner, i.e. the caller must + /// filter out messages belonging to the partner prior to calling + /// this method. + virtual void analyzeMessageInternal(const boost::shared_ptr<dhcp::Pkt>& message); + + /// @brief Checks if the partner failure has been detected based + /// on the DHCP traffic analysis. + /// + /// Should be called in a thread safe context. + /// + /// @return true if the partner failure has been detected, false + /// otherwise. + virtual bool failureDetectedInternal() const; + + /// @brief Removes information about the clients the partner server + /// should respond to while communication with the partner was + /// interrupted. + /// + /// See @c CommunicationState::analyzeMessage for details. + virtual void clearConnectingClients(); + + /// @brief Structure holding information about a client which + /// sent a packet being analyzed. + struct ConnectingClient6 { + std::vector<uint8_t> duid_; + bool unacked_; + }; + + /// @brief Multi index container holding information about the clients + /// attempting to get leases from the partner server. + typedef boost::multi_index_container< + ConnectingClient6, + boost::multi_index::indexed_by< + // First index is for accessing connecting clients by DUID. + boost::multi_index::hashed_unique< + boost::multi_index::member<ConnectingClient6, std::vector<uint8_t>, + &ConnectingClient6::duid_> + >, + // Second index allows for counting all clients which are + // considered unacked. + boost::multi_index::ordered_non_unique< + boost::multi_index::member<ConnectingClient6, bool, &ConnectingClient6::unacked_> + > + > + > ConnectingClients6; + + /// @brief Holds information about the clients attempting to contact + /// the partner server while the servers are in communications + /// interrupted state. + ConnectingClients6 connecting_clients_; +}; + +/// @brief Pointer to the @c CommunicationState6 object. +typedef boost::shared_ptr<CommunicationState6> CommunicationState6Ptr; + +} // end of namespace isc::ha +} // end of namespace isc + +#endif |