summaryrefslogtreecommitdiffstats
path: root/ntp_io_linux.c
diff options
context:
space:
mode:
Diffstat (limited to 'ntp_io_linux.c')
-rw-r--r--ntp_io_linux.c863
1 files changed, 863 insertions, 0 deletions
diff --git a/ntp_io_linux.c b/ntp_io_linux.c
new file mode 100644
index 0000000..819792a
--- /dev/null
+++ b/ntp_io_linux.c
@@ -0,0 +1,863 @@
+/*
+ chronyd/chronyc - Programs for keeping computer clocks accurate.
+
+ **********************************************************************
+ * Copyright (C) Miroslav Lichvar 2016-2018
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ **********************************************************************
+
+ =======================================================================
+
+ Functions for NTP I/O specific to Linux
+ */
+
+#include "config.h"
+
+#include "sysincl.h"
+
+#include <ifaddrs.h>
+#include <linux/errqueue.h>
+#include <linux/ethtool.h>
+#include <linux/net_tstamp.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+
+#include "array.h"
+#include "conf.h"
+#include "hwclock.h"
+#include "local.h"
+#include "logging.h"
+#include "ntp_core.h"
+#include "ntp_io.h"
+#include "ntp_io_linux.h"
+#include "ntp_sources.h"
+#include "sched.h"
+#include "sys_linux.h"
+#include "util.h"
+
+union sockaddr_in46 {
+ struct sockaddr_in in4;
+#ifdef FEAT_IPV6
+ struct sockaddr_in6 in6;
+#endif
+ struct sockaddr u;
+};
+
+struct Interface {
+ char name[IF_NAMESIZE];
+ int if_index;
+ int phc_fd;
+ int phc_mode;
+ int phc_nocrossts;
+ /* Link speed in mbit/s */
+ int link_speed;
+ /* Start of UDP data at layer 2 for IPv4 and IPv6 */
+ int l2_udp4_ntp_start;
+ int l2_udp6_ntp_start;
+ /* Precision of PHC readings */
+ double precision;
+ /* Compensation of errors in TX and RX timestamping */
+ double tx_comp;
+ double rx_comp;
+ HCL_Instance clock;
+};
+
+/* Number of PHC readings per HW clock sample */
+#define PHC_READINGS 10
+
+/* Minimum interval between PHC readings */
+#define MIN_PHC_POLL -6
+
+/* Maximum acceptable offset between HW and daemon/kernel timestamp */
+#define MAX_TS_DELAY 1.0
+
+/* Array of Interfaces */
+static ARR_Instance interfaces;
+
+/* RX/TX and TX-specific timestamping socket options */
+static int ts_flags;
+static int ts_tx_flags;
+
+/* Flag indicating the socket options can't be changed in control messages */
+static int permanent_ts_options;
+
+/* When sending client requests to a close and fast server, it is possible that
+ a response will be received before the HW transmit timestamp of the request
+ itself. To avoid processing of the response without the HW timestamp, we
+ monitor events returned by select() and suspend reading of packets from the
+ receive queue for up to 200 microseconds. As the requests are normally
+ separated by at least 200 milliseconds, it is sufficient to monitor and
+ suspend one socket at a time. */
+static int monitored_socket;
+static int suspended_socket;
+static SCH_TimeoutID resume_timeout_id;
+
+#define RESUME_TIMEOUT 200.0e-6
+
+/* Unbound socket keeping the kernel RX timestamping permanently enabled
+ in order to avoid a race condition between receiving a server response
+ and the kernel actually starting to timestamp received packets after
+ enabling the timestamping and sending a request */
+static int dummy_rxts_socket;
+
+#define INVALID_SOCK_FD -3
+
+/* ================================================== */
+
+static int
+add_interface(CNF_HwTsInterface *conf_iface)
+{
+ struct ethtool_ts_info ts_info;
+ struct hwtstamp_config ts_config;
+ struct ifreq req;
+ int sock_fd, if_index, phc_fd, req_hwts_flags;
+ unsigned int i;
+ struct Interface *iface;
+
+ /* Check if the interface was not already added */
+ for (i = 0; i < ARR_GetSize(interfaces); i++) {
+ if (!strcmp(conf_iface->name, ((struct Interface *)ARR_GetElement(interfaces, i))->name))
+ return 1;
+ }
+
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock_fd < 0)
+ return 0;
+
+ memset(&req, 0, sizeof (req));
+ memset(&ts_info, 0, sizeof (ts_info));
+
+ if (snprintf(req.ifr_name, sizeof (req.ifr_name), "%s", conf_iface->name) >=
+ sizeof (req.ifr_name)) {
+ close(sock_fd);
+ return 0;
+ }
+
+ if (ioctl(sock_fd, SIOCGIFINDEX, &req)) {
+ DEBUG_LOG("ioctl(%s) failed : %s", "SIOCGIFINDEX", strerror(errno));
+ close(sock_fd);
+ return 0;
+ }
+
+ if_index = req.ifr_ifindex;
+
+ ts_info.cmd = ETHTOOL_GET_TS_INFO;
+ req.ifr_data = (char *)&ts_info;
+
+ if (ioctl(sock_fd, SIOCETHTOOL, &req)) {
+ DEBUG_LOG("ioctl(%s) failed : %s", "SIOCETHTOOL", strerror(errno));
+ close(sock_fd);
+ return 0;
+ }
+
+ req_hwts_flags = SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+ if ((ts_info.so_timestamping & req_hwts_flags) != req_hwts_flags) {
+ DEBUG_LOG("HW timestamping not supported on %s", req.ifr_name);
+ close(sock_fd);
+ return 0;
+ }
+
+ if (ts_info.phc_index < 0) {
+ DEBUG_LOG("PHC missing on %s", req.ifr_name);
+ close(sock_fd);
+ return 0;
+ }
+
+ ts_config.flags = 0;
+ ts_config.tx_type = HWTSTAMP_TX_ON;
+
+ switch (conf_iface->rxfilter) {
+ case CNF_HWTS_RXFILTER_ANY:
+#ifdef HAVE_LINUX_TIMESTAMPING_RXFILTER_NTP
+ if (ts_info.rx_filters & (1 << HWTSTAMP_FILTER_NTP_ALL))
+ ts_config.rx_filter = HWTSTAMP_FILTER_NTP_ALL;
+ else
+#endif
+ if (ts_info.rx_filters & (1 << HWTSTAMP_FILTER_ALL))
+ ts_config.rx_filter = HWTSTAMP_FILTER_ALL;
+ else
+ ts_config.rx_filter = HWTSTAMP_FILTER_NONE;
+ break;
+ case CNF_HWTS_RXFILTER_NONE:
+ ts_config.rx_filter = HWTSTAMP_FILTER_NONE;
+ break;
+#ifdef HAVE_LINUX_TIMESTAMPING_RXFILTER_NTP
+ case CNF_HWTS_RXFILTER_NTP:
+ ts_config.rx_filter = HWTSTAMP_FILTER_NTP_ALL;
+ break;
+#endif
+ default:
+ ts_config.rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ }
+
+ req.ifr_data = (char *)&ts_config;
+
+ if (ioctl(sock_fd, SIOCSHWTSTAMP, &req)) {
+ DEBUG_LOG("ioctl(%s) failed : %s", "SIOCSHWTSTAMP", strerror(errno));
+ close(sock_fd);
+ return 0;
+ }
+
+ close(sock_fd);
+
+ phc_fd = SYS_Linux_OpenPHC(NULL, ts_info.phc_index);
+ if (phc_fd < 0)
+ return 0;
+
+ iface = ARR_GetNewElement(interfaces);
+
+ snprintf(iface->name, sizeof (iface->name), "%s", conf_iface->name);
+ iface->if_index = if_index;
+ iface->phc_fd = phc_fd;
+ iface->phc_mode = 0;
+ iface->phc_nocrossts = conf_iface->nocrossts;
+
+ /* Start with 1 gbit and no VLANs or IPv4/IPv6 options */
+ iface->link_speed = 1000;
+ iface->l2_udp4_ntp_start = 42;
+ iface->l2_udp6_ntp_start = 62;
+
+ iface->precision = conf_iface->precision;
+ iface->tx_comp = conf_iface->tx_comp;
+ iface->rx_comp = conf_iface->rx_comp;
+
+ iface->clock = HCL_CreateInstance(conf_iface->min_samples, conf_iface->max_samples,
+ UTI_Log2ToDouble(MAX(conf_iface->minpoll, MIN_PHC_POLL)));
+
+ LOG(LOGS_INFO, "Enabled HW timestamping %son %s",
+ ts_config.rx_filter == HWTSTAMP_FILTER_NONE ? "(TX only) " : "", iface->name);
+
+ return 1;
+}
+
+/* ================================================== */
+
+static int
+add_all_interfaces(CNF_HwTsInterface *conf_iface_all)
+{
+ CNF_HwTsInterface conf_iface;
+ struct ifaddrs *ifaddr, *ifa;
+ int r;
+
+ conf_iface = *conf_iface_all;
+
+ if (getifaddrs(&ifaddr)) {
+ DEBUG_LOG("getifaddrs() failed : %s", strerror(errno));
+ return 0;
+ }
+
+ for (r = 0, ifa = ifaddr; ifa; ifa = ifa->ifa_next) {
+ conf_iface.name = ifa->ifa_name;
+ if (add_interface(&conf_iface))
+ r = 1;
+ }
+
+ freeifaddrs(ifaddr);
+
+ /* Return success if at least one interface was added */
+ return r;
+}
+
+/* ================================================== */
+
+static void
+update_interface_speed(struct Interface *iface)
+{
+ struct ethtool_cmd cmd;
+ struct ifreq req;
+ int sock_fd, link_speed;
+
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock_fd < 0)
+ return;
+
+ memset(&req, 0, sizeof (req));
+ memset(&cmd, 0, sizeof (cmd));
+
+ snprintf(req.ifr_name, sizeof (req.ifr_name), "%s", iface->name);
+ cmd.cmd = ETHTOOL_GSET;
+ req.ifr_data = (char *)&cmd;
+
+ if (ioctl(sock_fd, SIOCETHTOOL, &req)) {
+ DEBUG_LOG("ioctl(%s) failed : %s", "SIOCETHTOOL", strerror(errno));
+ close(sock_fd);
+ return;
+ }
+
+ close(sock_fd);
+
+ link_speed = ethtool_cmd_speed(&cmd);
+
+ if (iface->link_speed != link_speed) {
+ iface->link_speed = link_speed;
+ DEBUG_LOG("Updated speed of %s to %d Mb/s", iface->name, link_speed);
+ }
+}
+
+/* ================================================== */
+
+#if defined(HAVE_LINUX_TIMESTAMPING_OPT_PKTINFO) || defined(HAVE_LINUX_TIMESTAMPING_OPT_TX_SWHW)
+static int
+check_timestamping_option(int option)
+{
+ int sock_fd;
+
+ sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock_fd < 0)
+ return 0;
+
+ if (setsockopt(sock_fd, SOL_SOCKET, SO_TIMESTAMPING, &option, sizeof (option)) < 0) {
+ DEBUG_LOG("Could not enable timestamping option %x", (unsigned int)option);
+ close(sock_fd);
+ return 0;
+ }
+
+ close(sock_fd);
+ return 1;
+}
+#endif
+
+/* ================================================== */
+
+static int
+open_dummy_socket(void)
+{
+ int sock_fd, events = 0;
+
+ if ((sock_fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0
+#ifdef FEAT_IPV6
+ && (sock_fd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0
+#endif
+ )
+ return INVALID_SOCK_FD;
+
+ if (!NIO_Linux_SetTimestampSocketOptions(sock_fd, 1, &events)) {
+ close(sock_fd);
+ return INVALID_SOCK_FD;
+ }
+
+ UTI_FdSetCloexec(sock_fd);
+ return sock_fd;
+}
+
+/* ================================================== */
+
+void
+NIO_Linux_Initialise(void)
+{
+ CNF_HwTsInterface *conf_iface;
+ unsigned int i;
+ int hwts;
+
+ interfaces = ARR_CreateInstance(sizeof (struct Interface));
+
+ /* Enable HW timestamping on specified interfaces. If "*" was specified, try
+ all interfaces. If no interface was specified, enable SW timestamping. */
+
+ for (i = hwts = 0; CNF_GetHwTsInterface(i, &conf_iface); i++) {
+ if (!strcmp("*", conf_iface->name))
+ continue;
+ if (!add_interface(conf_iface))
+ LOG_FATAL("Could not enable HW timestamping on %s", conf_iface->name);
+ hwts = 1;
+ }
+
+ for (i = 0; CNF_GetHwTsInterface(i, &conf_iface); i++) {
+ if (strcmp("*", conf_iface->name))
+ continue;
+ if (add_all_interfaces(conf_iface))
+ hwts = 1;
+ break;
+ }
+
+ ts_flags = SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE;
+ ts_tx_flags = SOF_TIMESTAMPING_TX_SOFTWARE;
+
+ if (hwts) {
+ ts_flags |= SOF_TIMESTAMPING_RAW_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE;
+ ts_tx_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
+#ifdef HAVE_LINUX_TIMESTAMPING_OPT_PKTINFO
+ if (check_timestamping_option(SOF_TIMESTAMPING_OPT_PKTINFO))
+ ts_flags |= SOF_TIMESTAMPING_OPT_PKTINFO;
+#endif
+#ifdef HAVE_LINUX_TIMESTAMPING_OPT_TX_SWHW
+ if (check_timestamping_option(SOF_TIMESTAMPING_OPT_TX_SWHW))
+ ts_flags |= SOF_TIMESTAMPING_OPT_TX_SWHW;
+#endif
+ }
+
+ /* Enable IP_PKTINFO in messages looped back to the error queue */
+ ts_flags |= SOF_TIMESTAMPING_OPT_CMSG;
+
+ /* Kernels before 4.7 ignore timestamping flags set in control messages */
+ permanent_ts_options = !SYS_Linux_CheckKernelVersion(4, 7);
+
+ monitored_socket = INVALID_SOCK_FD;
+ suspended_socket = INVALID_SOCK_FD;
+ dummy_rxts_socket = INVALID_SOCK_FD;
+}
+
+/* ================================================== */
+
+void
+NIO_Linux_Finalise(void)
+{
+ struct Interface *iface;
+ unsigned int i;
+
+ if (dummy_rxts_socket != INVALID_SOCK_FD)
+ close(dummy_rxts_socket);
+
+ for (i = 0; i < ARR_GetSize(interfaces); i++) {
+ iface = ARR_GetElement(interfaces, i);
+ HCL_DestroyInstance(iface->clock);
+ close(iface->phc_fd);
+ }
+
+ ARR_DestroyInstance(interfaces);
+}
+
+/* ================================================== */
+
+int
+NIO_Linux_SetTimestampSocketOptions(int sock_fd, int client_only, int *events)
+{
+ int val, flags;
+
+ if (!ts_flags)
+ return 0;
+
+ /* Enable SCM_TIMESTAMPING control messages and the socket's error queue in
+ order to receive our transmitted packets with more accurate timestamps */
+
+ val = 1;
+ flags = ts_flags;
+
+ if (client_only || permanent_ts_options)
+ flags |= ts_tx_flags;
+
+ if (setsockopt(sock_fd, SOL_SOCKET, SO_SELECT_ERR_QUEUE, &val, sizeof (val)) < 0) {
+ LOG(LOGS_ERR, "Could not set %s socket option", "SO_SELECT_ERR_QUEUE");
+ ts_flags = 0;
+ return 0;
+ }
+
+ if (setsockopt(sock_fd, SOL_SOCKET, SO_TIMESTAMPING, &flags, sizeof (flags)) < 0) {
+ LOG(LOGS_ERR, "Could not set %s socket option", "SO_TIMESTAMPING");
+ ts_flags = 0;
+ return 0;
+ }
+
+ *events |= SCH_FILE_EXCEPTION;
+ return 1;
+}
+
+/* ================================================== */
+
+static void
+resume_socket(int sock_fd)
+{
+ if (monitored_socket == sock_fd)
+ monitored_socket = INVALID_SOCK_FD;
+
+ if (sock_fd == INVALID_SOCK_FD || sock_fd != suspended_socket)
+ return;
+
+ suspended_socket = INVALID_SOCK_FD;
+
+ SCH_SetFileHandlerEvent(sock_fd, SCH_FILE_INPUT, 1);
+
+ DEBUG_LOG("Resumed RX processing %s timeout fd=%d",
+ resume_timeout_id ? "before" : "on", sock_fd);
+
+ if (resume_timeout_id) {
+ SCH_RemoveTimeout(resume_timeout_id);
+ resume_timeout_id = 0;
+ }
+}
+
+/* ================================================== */
+
+static void
+resume_timeout(void *arg)
+{
+ resume_timeout_id = 0;
+ resume_socket(suspended_socket);
+}
+
+/* ================================================== */
+
+static void
+suspend_socket(int sock_fd)
+{
+ resume_socket(suspended_socket);
+
+ suspended_socket = sock_fd;
+
+ SCH_SetFileHandlerEvent(suspended_socket, SCH_FILE_INPUT, 0);
+ resume_timeout_id = SCH_AddTimeoutByDelay(RESUME_TIMEOUT, resume_timeout, NULL);
+
+ DEBUG_LOG("Suspended RX processing fd=%d", sock_fd);
+}
+
+/* ================================================== */
+
+int
+NIO_Linux_ProcessEvent(int sock_fd, int event)
+{
+ if (sock_fd != monitored_socket)
+ return 0;
+
+ if (event == SCH_FILE_INPUT) {
+ suspend_socket(monitored_socket);
+ monitored_socket = INVALID_SOCK_FD;
+
+ /* Don't process the message yet */
+ return 1;
+ }
+
+ return 0;
+}
+
+/* ================================================== */
+
+static struct Interface *
+get_interface(int if_index)
+{
+ struct Interface *iface;
+ unsigned int i;
+
+ for (i = 0; i < ARR_GetSize(interfaces); i++) {
+ iface = ARR_GetElement(interfaces, i);
+ if (iface->if_index != if_index)
+ continue;
+
+ return iface;
+ }
+
+ return NULL;
+}
+
+/* ================================================== */
+
+static void
+process_hw_timestamp(struct Interface *iface, struct timespec *hw_ts,
+ NTP_Local_Timestamp *local_ts, int rx_ntp_length, int family,
+ int l2_length)
+{
+ struct timespec sample_phc_ts, sample_sys_ts, sample_local_ts, ts;
+ double rx_correction, ts_delay, phc_err, local_err;
+
+ if (HCL_NeedsNewSample(iface->clock, &local_ts->ts)) {
+ if (!SYS_Linux_GetPHCSample(iface->phc_fd, iface->phc_nocrossts, iface->precision,
+ &iface->phc_mode, &sample_phc_ts, &sample_sys_ts,
+ &phc_err))
+ return;
+
+ LCL_CookTime(&sample_sys_ts, &sample_local_ts, &local_err);
+ HCL_AccumulateSample(iface->clock, &sample_phc_ts, &sample_local_ts,
+ phc_err + local_err);
+
+ update_interface_speed(iface);
+ }
+
+ /* We need to transpose RX timestamps as hardware timestamps are normally
+ preamble timestamps and RX timestamps in NTP are supposed to be trailer
+ timestamps. If we don't know the length of the packet at layer 2, we
+ make an assumption that UDP data start at the same position as in the
+ last transmitted packet which had a HW TX timestamp. */
+ if (rx_ntp_length && iface->link_speed) {
+ if (!l2_length)
+ l2_length = (family == IPADDR_INET4 ? iface->l2_udp4_ntp_start :
+ iface->l2_udp6_ntp_start) + rx_ntp_length + 4;
+ rx_correction = l2_length / (1.0e6 / 8 * iface->link_speed);
+
+ UTI_AddDoubleToTimespec(hw_ts, rx_correction, hw_ts);
+ }
+
+ if (!HCL_CookTime(iface->clock, hw_ts, &ts, &local_err))
+ return;
+
+ if (!rx_ntp_length && iface->tx_comp)
+ UTI_AddDoubleToTimespec(&ts, iface->tx_comp, &ts);
+ else if (rx_ntp_length && iface->rx_comp)
+ UTI_AddDoubleToTimespec(&ts, -iface->rx_comp, &ts);
+
+ ts_delay = UTI_DiffTimespecsToDouble(&local_ts->ts, &ts);
+
+ if (fabs(ts_delay) > MAX_TS_DELAY) {
+ DEBUG_LOG("Unacceptable timestamp delay %.9f", ts_delay);
+ return;
+ }
+
+ local_ts->ts = ts;
+ local_ts->err = local_err;
+ local_ts->source = NTP_TS_HARDWARE;
+}
+
+/* ================================================== */
+/* Extract UDP data from a layer 2 message. Supported is Ethernet
+ with optional VLAN tags. */
+
+static int
+extract_udp_data(unsigned char *msg, NTP_Remote_Address *remote_addr, int len)
+{
+ unsigned char *msg_start = msg;
+ union sockaddr_in46 addr;
+
+ remote_addr->ip_addr.family = IPADDR_UNSPEC;
+ remote_addr->port = 0;
+
+ /* Skip MACs */
+ if (len < 12)
+ return 0;
+ len -= 12, msg += 12;
+
+ /* Skip VLAN tag(s) if present */
+ while (len >= 4 && msg[0] == 0x81 && msg[1] == 0x00)
+ len -= 4, msg += 4;
+
+ /* Skip IPv4 or IPv6 ethertype */
+ if (len < 2 || !((msg[0] == 0x08 && msg[1] == 0x00) ||
+ (msg[0] == 0x86 && msg[1] == 0xdd)))
+ return 0;
+ len -= 2, msg += 2;
+
+ /* Parse destination address and port from IPv4/IPv6 and UDP headers */
+ if (len >= 20 && msg[0] >> 4 == 4) {
+ int ihl = (msg[0] & 0xf) * 4;
+
+ if (len < ihl + 8 || msg[9] != 17)
+ return 0;
+
+ memcpy(&addr.in4.sin_addr.s_addr, msg + 16, sizeof (uint32_t));
+ addr.in4.sin_port = *(uint16_t *)(msg + ihl + 2);
+ addr.in4.sin_family = AF_INET;
+ len -= ihl + 8, msg += ihl + 8;
+#ifdef FEAT_IPV6
+ } else if (len >= 48 && msg[0] >> 4 == 6) {
+ int eh_len, next_header = msg[6];
+
+ memcpy(&addr.in6.sin6_addr.s6_addr, msg + 24, 16);
+ len -= 40, msg += 40;
+
+ /* Skip IPv6 extension headers if present */
+ while (next_header != 17) {
+ switch (next_header) {
+ case 44: /* Fragment Header */
+ /* Process only the first fragment */
+ if (ntohs(*(uint16_t *)(msg + 2)) >> 3 != 0)
+ return 0;
+ eh_len = 8;
+ break;
+ case 0: /* Hop-by-Hop Options */
+ case 43: /* Routing Header */
+ case 60: /* Destination Options */
+ case 135: /* Mobility Header */
+ eh_len = 8 * (msg[1] + 1);
+ break;
+ case 51: /* Authentication Header */
+ eh_len = 4 * (msg[1] + 2);
+ break;
+ default:
+ return 0;
+ }
+
+ if (eh_len < 8 || len < eh_len + 8)
+ return 0;
+
+ next_header = msg[0];
+ len -= eh_len, msg += eh_len;
+ }
+
+ addr.in6.sin6_port = *(uint16_t *)(msg + 2);
+ addr.in6.sin6_family = AF_INET6;
+ len -= 8, msg += 8;
+#endif
+ } else {
+ return 0;
+ }
+
+ UTI_SockaddrToIPAndPort(&addr.u, &remote_addr->ip_addr, &remote_addr->port);
+
+ /* Move the message to fix alignment of its fields */
+ if (len > 0)
+ memmove(msg_start, msg, len);
+
+ return len;
+}
+
+/* ================================================== */
+
+int
+NIO_Linux_ProcessMessage(NTP_Remote_Address *remote_addr, NTP_Local_Address *local_addr,
+ NTP_Local_Timestamp *local_ts, struct msghdr *hdr, int length)
+{
+ struct Interface *iface;
+ struct cmsghdr *cmsg;
+ int is_tx, ts_if_index, l2_length;
+
+ is_tx = hdr->msg_flags & MSG_ERRQUEUE;
+ iface = NULL;
+ ts_if_index = local_addr->if_index;
+ l2_length = 0;
+
+ for (cmsg = CMSG_FIRSTHDR(hdr); cmsg; cmsg = CMSG_NXTHDR(hdr, cmsg)) {
+#ifdef HAVE_LINUX_TIMESTAMPING_OPT_PKTINFO
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_TIMESTAMPING_PKTINFO) {
+ struct scm_ts_pktinfo ts_pktinfo;
+
+ memcpy(&ts_pktinfo, CMSG_DATA(cmsg), sizeof (ts_pktinfo));
+
+ ts_if_index = ts_pktinfo.if_index;
+ l2_length = ts_pktinfo.pkt_length;
+
+ DEBUG_LOG("Received HW timestamp info if=%d length=%d", ts_if_index, l2_length);
+ }
+#endif
+
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_TIMESTAMPING) {
+ struct scm_timestamping ts3;
+
+ memcpy(&ts3, CMSG_DATA(cmsg), sizeof (ts3));
+
+ if (!UTI_IsZeroTimespec(&ts3.ts[2])) {
+ iface = get_interface(ts_if_index);
+ if (iface) {
+ process_hw_timestamp(iface, &ts3.ts[2], local_ts, !is_tx ? length : 0,
+ remote_addr->ip_addr.family, l2_length);
+ } else {
+ DEBUG_LOG("HW clock not found for interface %d", ts_if_index);
+ }
+
+ /* If a HW transmit timestamp was received, resume processing
+ of non-error messages on this socket */
+ if (is_tx)
+ resume_socket(local_addr->sock_fd);
+ }
+
+ if (local_ts->source == NTP_TS_DAEMON && !UTI_IsZeroTimespec(&ts3.ts[0]) &&
+ (!is_tx || UTI_IsZeroTimespec(&ts3.ts[2]))) {
+ LCL_CookTime(&ts3.ts[0], &local_ts->ts, &local_ts->err);
+ local_ts->source = NTP_TS_KERNEL;
+ }
+ }
+
+ if ((cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_RECVERR) ||
+ (cmsg->cmsg_level == SOL_IPV6 && cmsg->cmsg_type == IPV6_RECVERR)) {
+ struct sock_extended_err err;
+
+ memcpy(&err, CMSG_DATA(cmsg), sizeof (err));
+
+ if (err.ee_errno != ENOMSG || err.ee_info != SCM_TSTAMP_SND ||
+ err.ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
+ DEBUG_LOG("Unknown extended error");
+ /* Drop the message */
+ return 1;
+ }
+ }
+ }
+
+ /* If the kernel is slow with enabling RX timestamping, open a dummy
+ socket to keep the kernel RX timestamping permanently enabled */
+ if (!is_tx && local_ts->source == NTP_TS_DAEMON && ts_flags) {
+ DEBUG_LOG("Missing kernel RX timestamp");
+ if (dummy_rxts_socket == INVALID_SOCK_FD)
+ dummy_rxts_socket = open_dummy_socket();
+ }
+
+ /* Return the message if it's not received from the error queue */
+ if (!is_tx)
+ return 0;
+
+ /* The data from the error queue includes all layers up to UDP. We have to
+ extract the UDP data and also the destination address with port as there
+ currently doesn't seem to be a better way to get them both. */
+ l2_length = length;
+ length = extract_udp_data(hdr->msg_iov[0].iov_base, remote_addr, length);
+
+ DEBUG_LOG("Received %d (%d) bytes from error queue for %s:%d fd=%d if=%d tss=%u",
+ l2_length, length, UTI_IPToString(&remote_addr->ip_addr), remote_addr->port,
+ local_addr->sock_fd, local_addr->if_index, local_ts->source);
+
+ /* Update assumed position of UDP data at layer 2 for next received packet */
+ if (iface && length) {
+ if (remote_addr->ip_addr.family == IPADDR_INET4)
+ iface->l2_udp4_ntp_start = l2_length - length;
+ else if (remote_addr->ip_addr.family == IPADDR_INET6)
+ iface->l2_udp6_ntp_start = l2_length - length;
+ }
+
+ /* Drop the message if it has no timestamp or its processing failed */
+ if (local_ts->source == NTP_TS_DAEMON) {
+ DEBUG_LOG("Missing TX timestamp");
+ return 1;
+ }
+
+ if (length < NTP_NORMAL_PACKET_LENGTH)
+ return 1;
+
+ NSR_ProcessTx(remote_addr, local_addr, local_ts,
+ (NTP_Packet *)hdr->msg_iov[0].iov_base, length);
+
+ return 1;
+}
+
+/* ================================================== */
+
+int
+NIO_Linux_RequestTxTimestamp(struct msghdr *msg, int cmsglen, int sock_fd)
+{
+ struct cmsghdr *cmsg;
+
+ if (!ts_flags)
+ return cmsglen;
+
+ /* If a HW transmit timestamp is requested on a client socket, monitor
+ events on the socket in order to avoid processing of a fast response
+ without the HW timestamp of the request */
+ if (ts_tx_flags & SOF_TIMESTAMPING_TX_HARDWARE && !NIO_IsServerSocket(sock_fd))
+ monitored_socket = sock_fd;
+
+ /* Check if TX timestamping is disabled on this socket */
+ if (permanent_ts_options || !NIO_IsServerSocket(sock_fd))
+ return cmsglen;
+
+ /* Add control message that will enable TX timestamping for this message.
+ Don't use CMSG_NXTHDR as the one in glibc is buggy for creating new
+ control messages. */
+ cmsg = (struct cmsghdr *)((char *)CMSG_FIRSTHDR(msg) + cmsglen);
+ memset(cmsg, 0, CMSG_SPACE(sizeof (ts_tx_flags)));
+ cmsglen += CMSG_SPACE(sizeof (ts_tx_flags));
+
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SO_TIMESTAMPING;
+ cmsg->cmsg_len = CMSG_LEN(sizeof (ts_tx_flags));
+
+ memcpy(CMSG_DATA(cmsg), &ts_tx_flags, sizeof (ts_tx_flags));
+
+ return cmsglen;
+}
+
+/* ================================================== */
+
+void
+NIO_Linux_NotifySocketClosing(int sock_fd)
+{
+ resume_socket(sock_fd);
+}