diff options
Diffstat (limited to 'src/clock.c')
-rw-r--r-- | src/clock.c | 460 |
1 files changed, 460 insertions, 0 deletions
diff --git a/src/clock.c b/src/clock.c new file mode 100644 index 0000000..ec2133c --- /dev/null +++ b/src/clock.c @@ -0,0 +1,460 @@ +/* + * General time-keeping code and variables + * + * Copyright 2000-2021 Willy Tarreau <w@1wt.eu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <sys/time.h> +#include <signal.h> +#include <time.h> + +#ifdef USE_THREAD +#include <pthread.h> +#endif + +#include <haproxy/api.h> +#include <haproxy/activity.h> +#include <haproxy/clock.h> +#include <haproxy/signal-t.h> +#include <haproxy/time.h> +#include <haproxy/tinfo-t.h> +#include <haproxy/tools.h> + +struct timeval start_date; /* the process's start date in wall-clock time */ +struct timeval ready_date; /* date when the process was considered ready */ +ullong start_time_ns; /* the process's start date in internal monotonic time (ns) */ +volatile ullong global_now_ns; /* common monotonic date between all threads, in ns (wraps every 585 yr) */ +volatile uint global_now_ms; /* common monotonic date in milliseconds (may wrap) */ + +THREAD_ALIGNED(64) static llong now_offset; /* global offset between system time and global time in ns */ + +THREAD_LOCAL ullong now_ns; /* internal monotonic date derived from real clock, in ns (wraps every 585 yr) */ +THREAD_LOCAL uint now_ms; /* internal monotonic date in milliseconds (may wrap) */ +THREAD_LOCAL struct timeval date; /* the real current date (wall-clock time) */ + +static THREAD_LOCAL struct timeval before_poll; /* system date before calling poll() */ +static THREAD_LOCAL struct timeval after_poll; /* system date after leaving poll() */ +static THREAD_LOCAL unsigned int samp_time; /* total elapsed time over current sample */ +static THREAD_LOCAL unsigned int idle_time; /* total idle time over current sample */ +static THREAD_LOCAL unsigned int iso_time_sec; /* last iso time value for this thread */ +static THREAD_LOCAL char iso_time_str[34]; /* ISO time representation of gettimeofday() */ + +#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) +static clockid_t per_thread_clock_id[MAX_THREADS]; +#endif + +/* returns the system's monotonic time in nanoseconds if supported, otherwise zero */ +uint64_t now_mono_time(void) +{ + uint64_t ret = 0; +#if defined(_POSIX_TIMERS) && defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_MONOTONIC_CLOCK) + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec; +#endif + return ret; +} + +/* Returns the system's monotonic time in nanoseconds. + * Uses the coarse clock source if supported (for fast but + * less precise queries with limited resource usage). + * Fallback to now_mono_time() if coarse source is not supported, + * which may itself return 0 if not supported either. + */ +uint64_t now_mono_time_fast(void) +{ +#if defined(CLOCK_MONOTONIC_COARSE) + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC_COARSE, &ts); + return (ts.tv_sec * 1000000000ULL + ts.tv_nsec); +#else + /* fallback to regular mono time, + * returns 0 if not supported + */ + return now_mono_time(); +#endif +} + +/* returns the current thread's cumulated CPU time in nanoseconds if supported, otherwise zero */ +uint64_t now_cpu_time(void) +{ + uint64_t ret = 0; +#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) + struct timespec ts; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); + ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec; +#endif + return ret; +} + +/* Returns the current thread's cumulated CPU time in nanoseconds. + * + * thread_local timer is cached so that call is less precise but also less + * expensive if heavily used. + * We use the mono time as a cache expiration hint since now_cpu_time() is + * known to be much more expensive than now_mono_time_fast() on systems + * supporting the COARSE clock source. + * + * Returns 0 if either now_mono_time_fast() or now_cpu_time() are not + * supported. + */ +uint64_t now_cpu_time_fast(void) +{ + static THREAD_LOCAL uint64_t mono_cache = 0; + static THREAD_LOCAL uint64_t cpu_cache = 0; + uint64_t mono_cur; + + mono_cur = now_mono_time_fast(); + if (unlikely(mono_cur != mono_cache)) { + /* global mono clock was updated: local cache is outdated */ + cpu_cache = now_cpu_time(); + mono_cache = mono_cur; + } + return cpu_cache; +} + +/* returns another thread's cumulated CPU time in nanoseconds if supported, otherwise zero */ +uint64_t now_cpu_time_thread(int thr) +{ + uint64_t ret = 0; +#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) + struct timespec ts; + clock_gettime(per_thread_clock_id[thr], &ts); + ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec; +#endif + return ret; +} + +/* set the clock source for the local thread */ +void clock_set_local_source(void) +{ +#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) +#ifdef USE_THREAD + pthread_getcpuclockid(pthread_self(), &per_thread_clock_id[tid]); +#else + per_thread_clock_id[tid] = CLOCK_THREAD_CPUTIME_ID; +#endif +#endif +} + +/* registers a timer <tmr> of type timer_t delivering signal <sig> with value + * <val>. It tries on the current thread's clock ID first and falls back to + * CLOCK_REALTIME. Returns non-zero on success, 1 on failure. + */ +int clock_setup_signal_timer(void *tmr, int sig, int val) +{ + int ret = 0; + +#if defined(USE_RT) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) + struct sigevent sev = { }; + timer_t *timer = tmr; + sigset_t set; + + /* unblock the WDTSIG signal we intend to use */ + sigemptyset(&set); + sigaddset(&set, WDTSIG); + ha_sigmask(SIG_UNBLOCK, &set, NULL); + + /* this timer will signal WDTSIG when it fires, with tid in the si_int + * field (important since any thread will receive the signal). + */ + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = sig; + sev.sigev_value.sival_int = val; + if (timer_create(per_thread_clock_id[tid], &sev, timer) != -1 || + timer_create(CLOCK_REALTIME, &sev, timer) != -1) + ret = 1; +#endif + return ret; +} + +/* clock_update_date: sets <date> to system time, and sets <now_ns> to something + * as close as possible to real time, following a monotonic function. The main + * principle consists in detecting backwards and forwards time jumps and adjust + * an offset to correct them. This function should be called once after each + * poll, and never farther apart than MAX_DELAY_MS*2. The poll's timeout should + * be passed in <max_wait>, and the return value in <interrupted> (a non-zero + * value means that we have not expired the timeout). + * + * clock_init_process_date() must have been called once first, and + * clock_init_thread_date() must also have been called once for each thread. + * + * An offset is used to adjust the current time (date), to figure a monotonic + * local time (now_ns). The offset is not critical, as it is only updated after + * a clock jump is detected. From this point all threads will apply it to their + * locally measured time, and will then agree around a common monotonic + * global_now_ns value that serves to further refine their local time. Both + * now_ns and global_now_ns are 64-bit integers counting nanoseconds since a + * vague reference (it starts roughly 20s before the next wrap-around of the + * millisecond counter after boot). The offset is also an integral number of + * nanoseconds, but it's signed so that the clock can be adjusted in the two + * directions. + */ +void clock_update_local_date(int max_wait, int interrupted) +{ + struct timeval min_deadline, max_deadline; + + gettimeofday(&date, NULL); + + /* compute the minimum and maximum local date we may have reached based + * on our past date and the associated timeout. There are three possible + * extremities: + * - the new date cannot be older than before_poll + * - if not interrupted, the new date cannot be older than + * before_poll+max_wait + * - in any case the new date cannot be newer than + * before_poll+max_wait+some margin (100ms used here). + * In case of violation, we'll ignore the current date and instead + * restart from the last date we knew. + */ + _tv_ms_add(&min_deadline, &before_poll, max_wait); + _tv_ms_add(&max_deadline, &before_poll, max_wait + 100); + + if (unlikely(__tv_islt(&date, &before_poll) || // big jump backwards + (!interrupted && __tv_islt(&date, &min_deadline)) || // small jump backwards + __tv_islt(&max_deadline, &date))) { // big jump forwards + if (!interrupted) + now_ns += ms_to_ns(max_wait); + } else { + /* The date is still within expectations. Let's apply the + * now_offset to the system date. Note: ofs if made of two + * independent signed ints. + */ + now_ns = tv_to_ns(&date) + HA_ATOMIC_LOAD(&now_offset); + } + now_ms = ns_to_ms(now_ns); +} + +void clock_update_global_date() +{ + ullong old_now_ns; + uint old_now_ms; + + /* now that we have bounded the local time, let's check if it's + * realistic regarding the global date, which only moves forward, + * otherwise catch up. + */ + old_now_ns = _HA_ATOMIC_LOAD(&global_now_ns); + old_now_ms = global_now_ms; + + do { + if (now_ns < old_now_ns) + now_ns = old_now_ns; + + /* now <now_ns> is expected to be the most accurate date, + * equal to <global_now_ns> or newer. Updating the global + * date too often causes extreme contention and is not + * needed: it's only used to help threads run at the + * same date in case of local drift, and the global date, + * which changes, is only used by freq counters (a choice + * which is debatable by the way since it changes under us). + * Tests have seen that the contention can be reduced from + * 37% in this function to almost 0% when keeping clocks + * synchronized no better than 32 microseconds, so that's + * what we're doing here. + */ + now_ms = ns_to_ms(now_ns); + + if (!((now_ns ^ old_now_ns) & ~0x7FFFULL)) + return; + + /* let's try to update the global_now_ns (both in nanoseconds + * and ms forms) or loop again. + */ + } while ((!_HA_ATOMIC_CAS(&global_now_ns, &old_now_ns, now_ns) || + (now_ms != old_now_ms && !_HA_ATOMIC_CAS(&global_now_ms, &old_now_ms, now_ms))) && + __ha_cpu_relax()); + + /* <now_ns> and <now_ms> are now updated to the last value of + * global_now_ns and global_now_ms, which were also monotonically + * updated. We can compute the latest offset, we don't care who writes + * it last, the variations will not break the monotonic property. + */ + HA_ATOMIC_STORE(&now_offset, now_ns - tv_to_ns(&date)); +} + +/* must be called once at boot to initialize some global variables */ +void clock_init_process_date(void) +{ + now_offset = 0; + gettimeofday(&date, NULL); + after_poll = before_poll = date; + now_ns = global_now_ns = tv_to_ns(&date); + global_now_ms = ns_to_ms(now_ns); + + /* force time to wrap 20s after boot: we first compute the time offset + * that once applied to the wall-clock date will make the local time + * wrap in 5 seconds. This offset is applied to the process-wide time, + * and will be used to recompute the local time, both of which will + * match and continue from this shifted date. + */ + now_offset = sec_to_ns((uint)((uint)(-global_now_ms) / 1000U - BOOT_TIME_WRAP_SEC)); + global_now_ns += now_offset; + now_ns = global_now_ns; + now_ms = global_now_ms = ns_to_ms(now_ns); + + th_ctx->idle_pct = 100; + clock_update_date(0, 1); +} + +void clock_adjust_now_offset(void) +{ + HA_ATOMIC_STORE(&now_offset, now_ns - tv_to_ns(&date)); +} + +/* must be called once per thread to initialize their thread-local variables. + * Note that other threads might also be initializing and running in parallel. + */ +void clock_init_thread_date(void) +{ + gettimeofday(&date, NULL); + after_poll = before_poll = date; + + now_ns = _HA_ATOMIC_LOAD(&global_now_ns); + th_ctx->idle_pct = 100; + th_ctx->prev_cpu_time = now_cpu_time(); + clock_update_date(0, 1); +} + +/* report the average CPU idle percentage over all running threads, between 0 and 100 */ +uint clock_report_idle(void) +{ + uint total = 0; + uint rthr = 0; + uint thr; + + for (thr = 0; thr < MAX_THREADS; thr++) { + if (!ha_thread_info[thr].tg || + !(ha_thread_info[thr].tg->threads_enabled & ha_thread_info[thr].ltid_bit)) + continue; + total += HA_ATOMIC_LOAD(&ha_thread_ctx[thr].idle_pct); + rthr++; + } + return rthr ? total / rthr : 0; +} + +/* Update the idle time value twice a second, to be called after + * clock_update_date() when called after poll(), and currently called only by + * clock_leaving_poll() below. It relies on <before_poll> to be updated to + * the system time before calling poll(). + */ +static inline void clock_measure_idle(void) +{ + /* Let's compute the idle to work ratio. We worked between after_poll + * and before_poll, and slept between before_poll and date. The idle_pct + * is updated at most twice every second. Note that the current second + * rarely changes so we avoid a multiply when not needed. + */ + int delta; + + if ((delta = date.tv_sec - before_poll.tv_sec)) + delta *= 1000000; + idle_time += delta + (date.tv_usec - before_poll.tv_usec); + + if ((delta = date.tv_sec - after_poll.tv_sec)) + delta *= 1000000; + samp_time += delta + (date.tv_usec - after_poll.tv_usec); + + after_poll.tv_sec = date.tv_sec; after_poll.tv_usec = date.tv_usec; + if (samp_time < 500000) + return; + + HA_ATOMIC_STORE(&th_ctx->idle_pct, (100ULL * idle_time + samp_time / 2) / samp_time); + idle_time = samp_time = 0; +} + +/* Collect date and time information after leaving poll(). <timeout> must be + * set to the maximum sleep time passed to poll (in milliseconds), and + * <interrupted> must be zero if the poller reached the timeout or non-zero + * otherwise, which generally is provided by the poller's return value. + */ +void clock_leaving_poll(int timeout, int interrupted) +{ + clock_measure_idle(); + th_ctx->prev_cpu_time = now_cpu_time(); + th_ctx->prev_mono_time = now_mono_time(); +} + +/* Collect date and time information before calling poll(). This will be used + * to count the run time of the past loop and the sleep time of the next poll. + * It also compares the elapsed and cpu times during the activity period to + * estimate the amount of stolen time, which is reported if higher than half + * a millisecond. + */ +void clock_entering_poll(void) +{ + uint64_t new_mono_time; + uint64_t new_cpu_time; + uint32_t run_time; + int64_t stolen; + + gettimeofday(&before_poll, NULL); + + run_time = (before_poll.tv_sec - after_poll.tv_sec) * 1000000U + (before_poll.tv_usec - after_poll.tv_usec); + + new_cpu_time = now_cpu_time(); + new_mono_time = now_mono_time(); + + if (th_ctx->prev_cpu_time && th_ctx->prev_mono_time) { + new_cpu_time -= th_ctx->prev_cpu_time; + new_mono_time -= th_ctx->prev_mono_time; + stolen = new_mono_time - new_cpu_time; + if (unlikely(stolen >= 500000)) { + stolen /= 500000; + /* more than half a millisecond difference might + * indicate an undesired preemption. + */ + report_stolen_time(stolen); + } + } + + /* update the average runtime */ + activity_count_runtime(run_time); +} + +/* returns the current date as returned by gettimeofday() in ISO+microsecond + * format. It uses a thread-local static variable that the reader can consume + * for as long as it wants until next call. Thus, do not call it from a signal + * handler. If <pad> is non-0, a trailing space will be added. It will always + * return exactly 32 or 33 characters (depending on padding) and will always be + * zero-terminated, thus it will always fit into a 34 bytes buffer. + * This also always include the local timezone (in +/-HH:mm format) . + */ +char *timeofday_as_iso_us(int pad) +{ + struct timeval new_date; + struct tm tm; + const char *offset; + char c; + + gettimeofday(&new_date, NULL); + if (new_date.tv_sec != iso_time_sec || !new_date.tv_sec) { + get_localtime(new_date.tv_sec, &tm); + offset = get_gmt_offset(new_date.tv_sec, &tm); + if (unlikely(strftime(iso_time_str, sizeof(iso_time_str), "%Y-%m-%dT%H:%M:%S.000000+00:00", &tm) != 32)) + strlcpy2(iso_time_str, "YYYY-mm-ddTHH:MM:SS.000000-00:00", sizeof(iso_time_str)); // make the failure visible but respect format. + iso_time_str[26] = offset[0]; + iso_time_str[27] = offset[1]; + iso_time_str[28] = offset[2]; + iso_time_str[30] = offset[3]; + iso_time_str[31] = offset[4]; + iso_time_sec = new_date.tv_sec; + } + + /* utoa_pad adds a trailing 0 so we save the char for restore */ + c = iso_time_str[26]; + utoa_pad(new_date.tv_usec, iso_time_str + 20, 7); + iso_time_str[26] = c; + if (pad) { + iso_time_str[32] = ' '; + iso_time_str[33] = 0; + } + return iso_time_str; +} |