Adding upstream version 3.4.upstream/3.4 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-05 18:43:21 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-05 18:43:21 +0000
commit: 104f986b0650b8f93540785d2bcf486905e49b62 (patch)
tree: 2b2ae5113d9b57425d4bb3f726e325316b87e00a /sys_linux.c
parent: Initial commit. (diff)
download: chrony-upstream/3.4.tar.xz
chrony-upstream/3.4.zip
1 files changed, 902 insertions, 0 deletions
diff --git a/sys_linux.c b/sys_linux.c
new file mode 100644
index 0000000..00bc239
--- /dev/null
+++ b/sys_linux.c
@@ -0,0 +1,902 @@
+/*
+  chronyd/chronyc - Programs for keeping computer clocks accurate.
+
+ **********************************************************************
+ * Copyright (C) Richard P. Curnow  1997-2003
+ * Copyright (C) John G. Hasler  2009
+ * Copyright (C) Miroslav Lichvar  2009-2012, 2014-2018
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ * 
+ **********************************************************************
+
+  =======================================================================
+
+  This is the module specific to the Linux operating system.
+
+  */
+
+#include "config.h"
+
+#include "sysincl.h"
+
+#include <sys/utsname.h>
+
+#if defined(HAVE_SCHED_SETSCHEDULER)
+#  include <sched.h>
+#endif
+
+#if defined(HAVE_MLOCKALL)
+#  include <sys/mman.h>
+#include <sys/resource.h>
+#endif
+
+#if defined(FEAT_PHC) || defined(HAVE_LINUX_TIMESTAMPING)
+#include <linux/ptp_clock.h>
+#endif
+
+#ifdef FEAT_SCFILTER
+#include <sys/prctl.h>
+#include <seccomp.h>
+#include <termios.h>
+#ifdef FEAT_PPS
+#include <linux/pps.h>
+#endif
+#ifdef FEAT_RTC
+#include <linux/rtc.h>
+#endif
+#ifdef HAVE_LINUX_TIMESTAMPING
+#include <linux/sockios.h>
+#endif
+#endif
+
+#ifdef FEAT_PRIVDROP
+#include <sys/prctl.h>
+#include <sys/capability.h>
+#endif
+
+#include "sys_linux.h"
+#include "sys_timex.h"
+#include "conf.h"
+#include "local.h"
+#include "logging.h"
+#include "privops.h"
+#include "util.h"
+
+/* Frequency scale to convert from ppm to the timex freq */
+#define FREQ_SCALE (double)(1 << 16)
+
+/* Definitions used if missed in the system headers */
+#ifndef ADJ_SETOFFSET
+#define ADJ_SETOFFSET           0x0100  /* add 'time' to current time */
+#endif
+#ifndef ADJ_NANO
+#define ADJ_NANO                0x2000  /* select nanosecond resolution */
+#endif
+
+/* This is the uncompensated system tick value */
+static int nominal_tick;
+
+/* Current tick value */
+static int current_delta_tick;
+
+/* The maximum amount by which 'tick' can be biased away from 'nominal_tick'
+   (sys_adjtimex() in the kernel bounds this to 10%) */
+static int max_tick_bias;
+
+/* The kernel USER_HZ constant */
+static int hz;
+static double dhz; /* And dbl prec version of same for arithmetic */
+
+/* Flag indicating whether adjtimex() can step the clock */
+static int have_setoffset;
+
+/* The assumed rate at which the effective frequency and tick values are
+   updated in the kernel */
+static int tick_update_hz;
+
+/* ================================================== */
+
+inline static long
+our_round(double x)
+{
+  long y;
+
+  if (x > 0.0)
+    y = x + 0.5;
+  else
+    y = x - 0.5;
+
+  return y;
+}
+
+/* ================================================== */
+/* Positive means currently fast of true time, i.e. jump backwards */
+
+static int
+apply_step_offset(double offset)
+{
+  struct timex txc;
+
+  txc.modes = ADJ_SETOFFSET | ADJ_NANO;
+  txc.time.tv_sec = -offset;
+  txc.time.tv_usec = 1.0e9 * (-offset - txc.time.tv_sec);
+  if (txc.time.tv_usec < 0) {
+    txc.time.tv_sec--;
+    txc.time.tv_usec += 1000000000;
+  }
+
+  if (SYS_Timex_Adjust(&txc, 1) < 0)
+    return 0;
+
+  return 1;
+}
+
+/* ================================================== */
+/* This call sets the Linux kernel frequency to a given value in parts
+   per million relative to the nominal running frequency.  Nominal is taken to
+   be tick=10000, freq=0 (for a USER_HZ==100 system, other values otherwise).
+   The convention is that this is called with a positive argument if the local
+   clock runs fast when uncompensated.  */
+
+static double
+set_frequency(double freq_ppm)
+{
+  struct timex txc;
+  long required_tick;
+  double required_freq;
+  int required_delta_tick;
+
+  required_delta_tick = our_round(freq_ppm / dhz);
+
+  /* Older kernels (pre-2.6.18) don't apply the frequency offset exactly as
+     set by adjtimex() and a scaling constant (that depends on the internal
+     kernel HZ constant) would be needed to compensate for the error. Because
+     chronyd is closed loop it doesn't matter much if we don't scale the
+     required frequency, but we want to prevent thrashing between two states
+     when the system's frequency error is close to a multiple of USER_HZ.  With
+     USER_HZ <= 250, the maximum frequency adjustment of 500 ppm overlaps at
+     least two ticks and we can stick to the current tick if it's next to the
+     required tick. */
+  if (hz <= 250 && (required_delta_tick + 1 == current_delta_tick ||
+                    required_delta_tick - 1 == current_delta_tick)) {
+    required_delta_tick = current_delta_tick;
+  }
+
+  required_freq = -(freq_ppm - dhz * required_delta_tick);
+  required_tick = nominal_tick - required_delta_tick;
+
+  txc.modes = ADJ_TICK | ADJ_FREQUENCY;
+  txc.freq = required_freq * FREQ_SCALE;
+  txc.tick = required_tick;
+
+  SYS_Timex_Adjust(&txc, 0);
+
+  current_delta_tick = required_delta_tick;
+
+  return dhz * current_delta_tick - txc.freq / FREQ_SCALE;
+}
+
+/* ================================================== */
+/* Read the ppm frequency from the kernel */
+
+static double
+read_frequency(void)
+{
+  struct timex txc;
+
+  txc.modes = 0;
+
+  SYS_Timex_Adjust(&txc, 0);
+
+  current_delta_tick = nominal_tick - txc.tick;
+
+  return dhz * current_delta_tick - txc.freq / FREQ_SCALE;
+}
+
+/* ================================================== */
+
+/* Estimate the value of USER_HZ given the value of txc.tick that chronyd finds when
+ * it starts.  The only credible values are 100 (Linux/x86) or powers of 2.
+ * Also, the bounds checking inside the kernel's adjtimex system call enforces
+ * a +/- 10% movement of tick away from the nominal value 1e6/USER_HZ. */
+
+static int
+guess_hz(void)
+{
+  struct timex txc;
+  int i, tick, tick_lo, tick_hi, ihz;
+  double tick_nominal;
+
+  txc.modes = 0;
+  SYS_Timex_Adjust(&txc, 0);
+  tick = txc.tick;
+
+  /* Pick off the hz=100 case first */
+  if (tick >= 9000 && tick <= 11000) {
+    return 100;
+  }
+
+  for (i=4; i<16; i++) { /* surely 16 .. 32768 is a wide enough range? */
+    ihz = 1 << i;
+    tick_nominal = 1.0e6 / (double) ihz;
+    tick_lo = (int)(0.5 + tick_nominal*2.0/3.0);
+    tick_hi = (int)(0.5 + tick_nominal*4.0/3.0);
+    
+    if (tick_lo < tick && tick <= tick_hi) {
+      return ihz;
+    }
+  }
+
+  /* oh dear.  doomed. */
+  LOG_FATAL("Can't determine hz from tick %d", tick);
+
+  return 0;
+}
+
+/* ================================================== */
+
+static int
+get_hz(void)
+{
+#ifdef _SC_CLK_TCK
+  int hz;
+
+  if ((hz = sysconf(_SC_CLK_TCK)) < 1)
+    return 0;
+
+  return hz;
+#else
+  return 0;
+#endif
+}
+
+/* ================================================== */
+
+static int
+kernelvercmp(int major1, int minor1, int patch1,
+    int major2, int minor2, int patch2)
+{
+  if (major1 != major2)
+    return major1 - major2;
+  if (minor1 != minor2)
+    return minor1 - minor2;
+  return patch1 - patch2;
+}
+
+/* ================================================== */
+
+static void
+get_kernel_version(int *major, int *minor, int *patch)
+{
+  struct utsname uts;
+
+  if (uname(&uts) < 0)
+    LOG_FATAL("uname() failed");
+
+  *patch = 0;
+  if (sscanf(uts.release, "%d.%d.%d", major, minor, patch) < 2)
+    LOG_FATAL("Could not parse kernel version");
+}
+
+/* ================================================== */
+
+/* Compute the scaling to use on any frequency we set, according to
+   the vintage of the Linux kernel being used. */
+
+static void
+get_version_specific_details(void)
+{
+  int major, minor, patch;
+  
+  hz = get_hz();
+
+  if (!hz)
+    hz = guess_hz();
+
+  dhz = (double) hz;
+  nominal_tick = (1000000L + (hz/2))/hz; /* Mirror declaration in kernel */
+  max_tick_bias = nominal_tick / 10;
+
+  /* In modern kernels the frequency of the clock is updated immediately in the
+     adjtimex() system call.  Assume a maximum delay of 10 microseconds. */
+  tick_update_hz = 100000;
+
+  get_kernel_version(&major, &minor, &patch);
+  DEBUG_LOG("Linux kernel major=%d minor=%d patch=%d", major, minor, patch);
+
+  if (kernelvercmp(major, minor, patch, 2, 2, 0) < 0) {
+    LOG_FATAL("Kernel version not supported, sorry.");
+  }
+
+  if (kernelvercmp(major, minor, patch, 2, 6, 27) >= 0 &&
+      kernelvercmp(major, minor, patch, 2, 6, 33) < 0) {
+    /* In tickless kernels before 2.6.33 the frequency is updated in
+       a half-second interval */
+    tick_update_hz = 2;
+  } else if (kernelvercmp(major, minor, patch, 4, 19, 0) < 0) {
+    /* In kernels before 4.19 the frequency is updated only on internal ticks
+       (CONFIG_HZ).  As their rate cannot be reliably detected from the user
+       space, and it may not even be constant (CONFIG_NO_HZ - aka tickless),
+       assume the lowest commonly used constant rate */
+    tick_update_hz = 100;
+  }
+
+  /* ADJ_SETOFFSET support */
+  if (kernelvercmp(major, minor, patch, 2, 6, 39) < 0) {
+    have_setoffset = 0;
+  } else {
+    have_setoffset = 1;
+  }
+
+  DEBUG_LOG("hz=%d nominal_tick=%d max_tick_bias=%d tick_update_hz=%d",
+            hz, nominal_tick, max_tick_bias, tick_update_hz);
+}
+
+/* ================================================== */
+
+static void
+reset_adjtime_offset(void)
+{
+  struct timex txc;
+
+  /* Reset adjtime() offset */
+  txc.modes = ADJ_OFFSET_SINGLESHOT;
+  txc.offset = 0;
+
+  SYS_Timex_Adjust(&txc, 0);
+}
+
+/* ================================================== */
+
+static int
+test_step_offset(void)
+{
+  struct timex txc;
+
+  /* Zero maxerror and check it's reset to a maximum after ADJ_SETOFFSET.
+     This seems to be the only way how to verify that the kernel really
+     supports the ADJ_SETOFFSET mode as it doesn't return an error on unknown
+     mode. */
+
+  txc.modes = MOD_MAXERROR;
+  txc.maxerror = 0;
+
+  if (SYS_Timex_Adjust(&txc, 1) < 0 || txc.maxerror != 0)
+    return 0;
+
+  txc.modes = ADJ_SETOFFSET | ADJ_NANO;
+  txc.time.tv_sec = 0;
+  txc.time.tv_usec = 0;
+
+  if (SYS_Timex_Adjust(&txc, 1) < 0 || txc.maxerror < 100000)
+    return 0;
+
+  return 1;
+}
+
+/* ================================================== */
+
+static void
+report_time_adjust_blockers(void)
+{
+#if defined(FEAT_PRIVDROP) && defined(CAP_IS_SUPPORTED)
+  if (CAP_IS_SUPPORTED(CAP_SYS_TIME) && cap_get_bound(CAP_SYS_TIME))
+    return;
+  LOG(LOGS_WARN, "CAP_SYS_TIME not present");
+#endif
+}
+
+/* ================================================== */
+/* Initialisation code for this module */
+
+void
+SYS_Linux_Initialise(void)
+{
+  get_version_specific_details();
+
+  report_time_adjust_blockers();
+
+  reset_adjtime_offset();
+
+  if (have_setoffset && !test_step_offset()) {
+    LOG(LOGS_INFO, "adjtimex() doesn't support ADJ_SETOFFSET");
+    have_setoffset = 0;
+  }
+
+  SYS_Timex_InitialiseWithFunctions(1.0e6 * max_tick_bias / nominal_tick,
+                                    1.0 / tick_update_hz,
+                                    read_frequency, set_frequency,
+                                    have_setoffset ? apply_step_offset : NULL,
+                                    0.0, 0.0, NULL, NULL);
+}
+
+/* ================================================== */
+/* Finalisation code for this module */
+
+void
+SYS_Linux_Finalise(void)
+{
+  SYS_Timex_Finalise();
+}
+
+/* ================================================== */
+
+#ifdef FEAT_PRIVDROP
+void
+SYS_Linux_DropRoot(uid_t uid, gid_t gid, int clock_control)
+{
+  char cap_text[256];
+  cap_t cap;
+
+  if (prctl(PR_SET_KEEPCAPS, 1)) {
+    LOG_FATAL("prctl() failed");
+  }
+  
+  UTI_DropRoot(uid, gid);
+
+  /* Keep CAP_NET_BIND_SERVICE only if a server NTP port can be opened
+     and keep CAP_SYS_TIME only if the clock control is enabled */
+  if (snprintf(cap_text, sizeof (cap_text), "%s %s",
+               CNF_GetNTPPort() ? "cap_net_bind_service=ep" : "",
+               clock_control ? "cap_sys_time=ep" : "") >= sizeof (cap_text))
+    assert(0);
+
+  if ((cap = cap_from_text(cap_text)) == NULL) {
+    LOG_FATAL("cap_from_text() failed");
+  }
+
+  if (cap_set_proc(cap)) {
+    LOG_FATAL("cap_set_proc() failed");
+  }
+
+  cap_free(cap);
+}
+#endif
+
+/* ================================================== */
+
+#ifdef FEAT_SCFILTER
+static
+void check_seccomp_applicability(void)
+{
+  int mail_enabled;
+  double mail_threshold;
+  char *mail_user;
+
+  CNF_GetMailOnChange(&mail_enabled, &mail_threshold, &mail_user);
+  if (mail_enabled)
+    LOG_FATAL("mailonchange directive cannot be used with -F enabled");
+}
+
+/* ================================================== */
+
+void
+SYS_Linux_EnableSystemCallFilter(int level)
+{
+  const int syscalls[] = {
+    /* Clock */
+    SCMP_SYS(adjtimex), SCMP_SYS(clock_gettime), SCMP_SYS(gettimeofday),
+    SCMP_SYS(settimeofday), SCMP_SYS(time),
+    /* Process */
+    SCMP_SYS(clone), SCMP_SYS(exit), SCMP_SYS(exit_group), SCMP_SYS(getpid),
+    SCMP_SYS(getrlimit), SCMP_SYS(rt_sigaction), SCMP_SYS(rt_sigreturn),
+    SCMP_SYS(rt_sigprocmask), SCMP_SYS(set_tid_address), SCMP_SYS(sigreturn),
+    SCMP_SYS(wait4),
+    /* Memory */
+    SCMP_SYS(brk), SCMP_SYS(madvise), SCMP_SYS(mmap), SCMP_SYS(mmap2),
+    SCMP_SYS(mprotect), SCMP_SYS(mremap), SCMP_SYS(munmap), SCMP_SYS(shmdt),
+    /* Filesystem */
+    SCMP_SYS(access), SCMP_SYS(chmod), SCMP_SYS(chown), SCMP_SYS(chown32),
+    SCMP_SYS(fstat), SCMP_SYS(fstat64), SCMP_SYS(getdents), SCMP_SYS(getdents64),
+    SCMP_SYS(lseek), SCMP_SYS(rename), SCMP_SYS(stat), SCMP_SYS(stat64),
+    SCMP_SYS(statfs), SCMP_SYS(statfs64), SCMP_SYS(unlink),
+    /* Socket */
+    SCMP_SYS(bind), SCMP_SYS(connect), SCMP_SYS(getsockname), SCMP_SYS(getsockopt),
+    SCMP_SYS(recvfrom), SCMP_SYS(recvmmsg), SCMP_SYS(recvmsg),
+    SCMP_SYS(sendmmsg), SCMP_SYS(sendmsg), SCMP_SYS(sendto),
+    /* TODO: check socketcall arguments */
+    SCMP_SYS(socketcall),
+    /* General I/O */
+    SCMP_SYS(_newselect), SCMP_SYS(close), SCMP_SYS(open), SCMP_SYS(openat), SCMP_SYS(pipe),
+    SCMP_SYS(poll), SCMP_SYS(read), SCMP_SYS(futex), SCMP_SYS(select),
+    SCMP_SYS(set_robust_list), SCMP_SYS(write),
+    /* Miscellaneous */
+    SCMP_SYS(getrandom), SCMP_SYS(sysinfo), SCMP_SYS(uname),
+  };
+
+  const int socket_domains[] = {
+    AF_NETLINK, AF_UNIX, AF_INET,
+#ifdef FEAT_IPV6
+    AF_INET6,
+#endif
+  };
+
+  const static int socket_options[][2] = {
+    { SOL_IP, IP_PKTINFO }, { SOL_IP, IP_FREEBIND },
+#ifdef FEAT_IPV6
+    { SOL_IPV6, IPV6_V6ONLY }, { SOL_IPV6, IPV6_RECVPKTINFO },
+#endif
+    { SOL_SOCKET, SO_BROADCAST }, { SOL_SOCKET, SO_REUSEADDR },
+    { SOL_SOCKET, SO_TIMESTAMP }, { SOL_SOCKET, SO_TIMESTAMPNS },
+#ifdef HAVE_LINUX_TIMESTAMPING
+    { SOL_SOCKET, SO_SELECT_ERR_QUEUE }, { SOL_SOCKET, SO_TIMESTAMPING },
+#endif
+  };
+
+  const static int fcntls[] = { F_GETFD, F_SETFD, F_SETFL };
+
+  const static unsigned long ioctls[] = {
+    FIONREAD, TCGETS,
+#if defined(FEAT_PHC) || defined(HAVE_LINUX_TIMESTAMPING)
+    PTP_EXTTS_REQUEST, PTP_SYS_OFFSET,
+#ifdef PTP_PIN_SETFUNC
+    PTP_PIN_SETFUNC,
+#endif
+#ifdef PTP_SYS_OFFSET_PRECISE
+    PTP_SYS_OFFSET_PRECISE,
+#endif
+#endif
+#ifdef FEAT_PPS
+    PPS_FETCH,
+#endif
+#ifdef FEAT_RTC
+    RTC_RD_TIME, RTC_SET_TIME, RTC_UIE_ON, RTC_UIE_OFF,
+#endif
+#ifdef HAVE_LINUX_TIMESTAMPING
+    SIOCETHTOOL,
+#endif
+  };
+
+  scmp_filter_ctx *ctx;
+  int i;
+
+  /* Check if the chronyd configuration is supported */
+  check_seccomp_applicability();
+
+  /* Start the helper process, which will run without any seccomp filter.  It
+     will be used for getaddrinfo(), for which it's difficult to maintain a
+     list of required system calls (with glibc it depends on what NSS modules
+     are installed and enabled on the system). */
+  PRV_StartHelper();
+
+  ctx = seccomp_init(level > 0 ? SCMP_ACT_KILL : SCMP_ACT_TRAP);
+  if (ctx == NULL)
+      LOG_FATAL("Failed to initialize seccomp");
+
+  /* Add system calls that are always allowed */
+  for (i = 0; i < (sizeof (syscalls) / sizeof (*syscalls)); i++) {
+    if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) < 0)
+      goto add_failed;
+  }
+
+  /* Allow sockets to be created only in selected domains */
+  for (i = 0; i < sizeof (socket_domains) / sizeof (*socket_domains); i++) {
+    if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(socket), 1,
+                         SCMP_A0(SCMP_CMP_EQ, socket_domains[i])) < 0)
+      goto add_failed;
+  }
+
+  /* Allow setting only selected sockets options */
+  for (i = 0; i < sizeof (socket_options) / sizeof (*socket_options); i++) {
+    if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(setsockopt), 3,
+                         SCMP_A1(SCMP_CMP_EQ, socket_options[i][0]),
+                         SCMP_A2(SCMP_CMP_EQ, socket_options[i][1]),
+                         SCMP_A4(SCMP_CMP_LE, sizeof (int))) < 0)
+      goto add_failed;
+  }
+
+  /* Allow only selected fcntl calls */
+  for (i = 0; i < sizeof (fcntls) / sizeof (*fcntls); i++) {
+    if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(fcntl), 1,
+                         SCMP_A1(SCMP_CMP_EQ, fcntls[i])) < 0 ||
+        seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(fcntl64), 1,
+                         SCMP_A1(SCMP_CMP_EQ, fcntls[i])) < 0)
+      goto add_failed;
+  }
+
+  /* Allow only selected ioctls */
+  for (i = 0; i < sizeof (ioctls) / sizeof (*ioctls); i++) {
+    if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, SCMP_SYS(ioctl), 1,
+                         SCMP_A1(SCMP_CMP_EQ, ioctls[i])) < 0)
+      goto add_failed;
+  }
+
+  if (seccomp_load(ctx) < 0)
+    LOG_FATAL("Failed to load seccomp rules");
+
+  LOG(LOGS_INFO, "Loaded seccomp filter");
+  seccomp_release(ctx);
+  return;
+
+add_failed:
+  LOG_FATAL("Failed to add seccomp rules");
+}
+#endif
+
+/* ================================================== */
+
+#if defined(HAVE_SCHED_SETSCHEDULER)
+  /* Install SCHED_FIFO real-time scheduler with specified priority */
+void SYS_Linux_SetScheduler(int SchedPriority)
+{
+  int pmax, pmin;
+  struct sched_param sched;
+
+  if (SchedPriority < 1 || SchedPriority > 99) {
+    LOG_FATAL("Bad scheduler priority: %d", SchedPriority);
+  } else {
+    sched.sched_priority = SchedPriority;
+    pmax = sched_get_priority_max(SCHED_FIFO);
+    pmin = sched_get_priority_min(SCHED_FIFO);
+    if ( SchedPriority > pmax ) {
+      sched.sched_priority = pmax;
+    }
+    else if ( SchedPriority < pmin ) {
+      sched.sched_priority = pmin;
+    }
+    if ( sched_setscheduler(0, SCHED_FIFO, &sched) == -1 ) {
+      LOG(LOGS_ERR, "sched_setscheduler() failed");
+    }
+    else {
+      DEBUG_LOG("Enabled SCHED_FIFO with priority %d",
+          sched.sched_priority);
+    }
+  }
+}
+#endif /* HAVE_SCHED_SETSCHEDULER */
+
+#if defined(HAVE_MLOCKALL)
+/* Lock the process into RAM so that it will never be swapped out */ 
+void SYS_Linux_MemLockAll(int LockAll)
+{
+  struct rlimit rlim;
+  if (LockAll == 1 ) {
+    /* Make sure that we will be able to lock all the memory we need */
+    /* even after dropping privileges.  This does not actually reaerve any memory */
+    rlim.rlim_max = RLIM_INFINITY;
+    rlim.rlim_cur = RLIM_INFINITY;
+    if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) {
+      LOG(LOGS_ERR, "setrlimit() failed: not locking into RAM");
+    }
+    else {
+      if (mlockall(MCL_CURRENT|MCL_FUTURE) < 0) {
+	LOG(LOGS_ERR, "mlockall() failed");
+      }
+      else {
+	DEBUG_LOG("Successfully locked into RAM");
+      }
+    }
+  }
+}
+#endif /* HAVE_MLOCKALL */
+
+/* ================================================== */
+
+int
+SYS_Linux_CheckKernelVersion(int req_major, int req_minor)
+{
+  int major, minor, patch;
+
+  get_kernel_version(&major, &minor, &patch);
+
+  return kernelvercmp(req_major, req_minor, 0, major, minor, patch) <= 0;
+}
+
+/* ================================================== */
+
+#if defined(FEAT_PHC) || defined(HAVE_LINUX_TIMESTAMPING)
+
+#define PHC_READINGS 10
+
+static int
+get_phc_sample(int phc_fd, double precision, struct timespec *phc_ts,
+               struct timespec *sys_ts, double *err)
+{
+  struct ptp_sys_offset sys_off;
+  struct timespec ts1, ts2, ts3, phc_tss[PHC_READINGS], sys_tss[PHC_READINGS];
+  double min_delay = 0.0, delays[PHC_READINGS], phc_sum, sys_sum, sys_prec;
+  int i, n;
+
+  /* Silence valgrind */
+  memset(&sys_off, 0, sizeof (sys_off));
+
+  sys_off.n_samples = PHC_READINGS;
+
+  if (ioctl(phc_fd, PTP_SYS_OFFSET, &sys_off)) {
+    DEBUG_LOG("ioctl(%s) failed : %s", "PTP_SYS_OFFSET", strerror(errno));
+    return 0;
+  }
+
+  for (i = 0; i < PHC_READINGS; i++) {
+    ts1.tv_sec = sys_off.ts[i * 2].sec;
+    ts1.tv_nsec = sys_off.ts[i * 2].nsec;
+    ts2.tv_sec = sys_off.ts[i * 2 + 1].sec;
+    ts2.tv_nsec = sys_off.ts[i * 2 + 1].nsec;
+    ts3.tv_sec = sys_off.ts[i * 2 + 2].sec;
+    ts3.tv_nsec = sys_off.ts[i * 2 + 2].nsec;
+
+    sys_tss[i] = ts1;
+    phc_tss[i] = ts2;
+    delays[i] = UTI_DiffTimespecsToDouble(&ts3, &ts1);
+
+    if (delays[i] < 0.0) {
+      /* Step in the middle of a PHC reading? */
+      DEBUG_LOG("Bad PTP_SYS_OFFSET sample delay=%e", delays[i]);
+      return 0;
+    }
+
+    if (!i || delays[i] < min_delay)
+      min_delay = delays[i];
+  }
+
+  sys_prec = LCL_GetSysPrecisionAsQuantum();
+
+  /* Combine best readings */
+  for (i = n = 0, phc_sum = sys_sum = 0.0; i < PHC_READINGS; i++) {
+    if (delays[i] > min_delay + MAX(sys_prec, precision))
+      continue;
+
+    phc_sum += UTI_DiffTimespecsToDouble(&phc_tss[i], &phc_tss[0]);
+    sys_sum += UTI_DiffTimespecsToDouble(&sys_tss[i], &sys_tss[0]) + delays[i] / 2.0;
+    n++;
+  }
+
+  assert(n);
+
+  UTI_AddDoubleToTimespec(&phc_tss[0], phc_sum / n, phc_ts);
+  UTI_AddDoubleToTimespec(&sys_tss[0], sys_sum / n, sys_ts);
+  *err = MAX(min_delay / 2.0, precision);
+
+  return 1;
+}
+/* ================================================== */
+
+static int
+get_precise_phc_sample(int phc_fd, double precision, struct timespec *phc_ts,
+		       struct timespec *sys_ts, double *err)
+{
+#ifdef PTP_SYS_OFFSET_PRECISE
+  struct ptp_sys_offset_precise sys_off;
+
+  /* Silence valgrind */
+  memset(&sys_off, 0, sizeof (sys_off));
+
+  if (ioctl(phc_fd, PTP_SYS_OFFSET_PRECISE, &sys_off)) {
+    DEBUG_LOG("ioctl(%s) failed : %s", "PTP_SYS_OFFSET_PRECISE",
+              strerror(errno));
+    return 0;
+  }
+
+  phc_ts->tv_sec = sys_off.device.sec;
+  phc_ts->tv_nsec = sys_off.device.nsec;
+  sys_ts->tv_sec = sys_off.sys_realtime.sec;
+  sys_ts->tv_nsec = sys_off.sys_realtime.nsec;
+  *err = MAX(LCL_GetSysPrecisionAsQuantum(), precision);
+
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+/* ================================================== */
+
+int
+SYS_Linux_OpenPHC(const char *path, int phc_index)
+{
+  struct ptp_clock_caps caps;
+  char phc_path[64];
+  int phc_fd;
+
+  if (!path) {
+    if (snprintf(phc_path, sizeof (phc_path), "/dev/ptp%d", phc_index) >= sizeof (phc_path))
+      return -1;
+    path = phc_path;
+  }
+
+  phc_fd = open(path, O_RDONLY);
+  if (phc_fd < 0) {
+    LOG(LOGS_ERR, "Could not open %s : %s", path, strerror(errno));
+    return -1;
+  }
+
+  /* Make sure it is a PHC */
+  if (ioctl(phc_fd, PTP_CLOCK_GETCAPS, &caps)) {
+    LOG(LOGS_ERR, "ioctl(%s) failed : %s", "PTP_CLOCK_GETCAPS", strerror(errno));
+    close(phc_fd);
+    return -1;
+  }
+
+  UTI_FdSetCloexec(phc_fd);
+
+  return phc_fd;
+}
+
+/* ================================================== */
+
+int
+SYS_Linux_GetPHCSample(int fd, int nocrossts, double precision, int *reading_mode,
+                       struct timespec *phc_ts, struct timespec *sys_ts, double *err)
+{
+  if ((*reading_mode == 2 || !*reading_mode) && !nocrossts &&
+      get_precise_phc_sample(fd, precision, phc_ts, sys_ts, err)) {
+    *reading_mode = 2;
+    return 1;
+  } else if ((*reading_mode == 1 || !*reading_mode) &&
+      get_phc_sample(fd, precision, phc_ts, sys_ts, err)) {
+    *reading_mode = 1;
+    return 1;
+  }
+  return 0;
+}
+
+/* ================================================== */
+
+int
+SYS_Linux_SetPHCExtTimestamping(int fd, int pin, int channel,
+                                int rising, int falling, int enable)
+{
+  struct ptp_extts_request extts_req;
+#ifdef PTP_PIN_SETFUNC
+  struct ptp_pin_desc pin_desc;
+
+  memset(&pin_desc, 0, sizeof (pin_desc));
+  pin_desc.index = pin;
+  pin_desc.func = enable ? PTP_PF_EXTTS : PTP_PF_NONE;
+  pin_desc.chan = channel;
+
+  if (ioctl(fd, PTP_PIN_SETFUNC, &pin_desc)) {
+    DEBUG_LOG("ioctl(%s) failed : %s", "PTP_PIN_SETFUNC", strerror(errno));
+    return 0;
+  }
+#else
+  DEBUG_LOG("Missing PTP_PIN_SETFUNC");
+  return 0;
+#endif
+
+  memset(&extts_req, 0, sizeof (extts_req));
+  extts_req.index = channel;
+  extts_req.flags = (enable ? PTP_ENABLE_FEATURE : 0) |
+                    (rising ? PTP_RISING_EDGE : 0) |
+                    (falling ? PTP_FALLING_EDGE : 0);
+
+  if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_req)) {
+    DEBUG_LOG("ioctl(%s) failed : %s", "PTP_EXTTS_REQUEST", strerror(errno));
+    return 0;
+  }
+
+  return 1;
+}
+
+/* ================================================== */
+
+int
+SYS_Linux_ReadPHCExtTimestamp(int fd, struct timespec *phc_ts, int *channel)
+{
+  struct ptp_extts_event extts_event;
+
+  if (read(fd, &extts_event, sizeof (extts_event)) != sizeof (extts_event)) {
+    DEBUG_LOG("Could not read PHC extts event");
+    return 0;
+  }
+
+  phc_ts->tv_sec = extts_event.t.sec;
+  phc_ts->tv_nsec = extts_event.t.nsec;
+  *channel = extts_event.index;
+
+  return 1;
+}
+
+#endif
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-05 18:43:21 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-05 18:43:21 +0000
commit	104f986b0650b8f93540785d2bcf486905e49b62 (patch)
tree	2b2ae5113d9b57425d4bb3f726e325316b87e00a /sys_linux.c
parent	Initial commit. (diff)
download	chrony-upstream/3.4.tar.xz chrony-upstream/3.4.zip