diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
commit | ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch) | |
tree | b2d64bc10158fdd5497876388cd68142ca374ed3 /tools/tracing | |
parent | Initial commit. (diff) | |
download | linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip |
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/tracing')
24 files changed, 10355 insertions, 0 deletions
diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile new file mode 100644 index 0000000000..95e485f12d --- /dev/null +++ b/tools/tracing/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../scripts/Makefile.include + +all: latency rtla + +clean: latency_clean rtla_clean + +install: latency_install rtla_install + +latency: + $(call descend,latency) + +latency_install: + $(call descend,latency,install) + +latency_clean: + $(call descend,latency,clean) + +rtla: + $(call descend,rtla) + +rtla_install: + $(call descend,rtla,install) + +rtla_clean: + $(call descend,rtla,clean) + +.PHONY: all install clean latency latency_install latency_clean \ + rtla rtla_install rtla_clean diff --git a/tools/tracing/latency/.gitignore b/tools/tracing/latency/.gitignore new file mode 100644 index 0000000000..0863960761 --- /dev/null +++ b/tools/tracing/latency/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +latency-collector diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile new file mode 100644 index 0000000000..40c4ddaf8b --- /dev/null +++ b/tools/tracing/latency/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for vm tools +# +VAR_CFLAGS := $(shell pkg-config --cflags libtracefs 2>/dev/null) +VAR_LDLIBS := $(shell pkg-config --libs libtracefs 2>/dev/null) + +TARGETS = latency-collector +CFLAGS = -Wall -Wextra -g -O2 $(VAR_CFLAGS) +LDFLAGS = -lpthread $(VAR_LDLIBS) + +all: $(TARGETS) + +%: %.c + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +clean: + $(RM) latency-collector + +prefix ?= /usr/local +sbindir ?= ${prefix}/sbin + +install: all + install -d $(DESTDIR)$(sbindir) + install -m 755 -p $(TARGETS) $(DESTDIR)$(sbindir) diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c new file mode 100644 index 0000000000..0fd9c747d3 --- /dev/null +++ b/tools/tracing/latency/latency-collector.c @@ -0,0 +1,2108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017, 2018, 2019, 2021 BMW Car IT GmbH + * Author: Viktor Rosendahl (viktor.rosendahl@bmw.de) + */ + +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 200809L + +#include <ctype.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <sched.h> +#include <linux/unistd.h> +#include <signal.h> +#include <sys/inotify.h> +#include <unistd.h> +#include <pthread.h> +#include <tracefs.h> + +static const char *prg_name; +static const char *prg_unknown = "unknown program name"; + +static int fd_stdout; + +static int sched_policy; +static bool sched_policy_set; + +static int sched_pri; +static bool sched_pri_set; + +static bool trace_enable = true; +static bool setup_ftrace = true; +static bool use_random_sleep; + +#define TRACE_OPTS \ + C(FUNC_TR, "function-trace"), \ + C(DISP_GR, "display-graph"), \ + C(NR, NULL) + +#undef C +#define C(a, b) OPTIDX_##a + +enum traceopt { + TRACE_OPTS +}; + +#undef C +#define C(a, b) b + +static const char *const optstr[] = { + TRACE_OPTS +}; + +enum errhandling { + ERR_EXIT = 0, + ERR_WARN, + ERR_CLEANUP, +}; + +static bool use_options[OPTIDX_NR]; + +static char inotify_buffer[655360]; + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define bool2str(x) (x ? "true":"false") + +#define DEFAULT_NR_PRINTER_THREADS (3) +static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS; + +#define DEFAULT_TABLE_SIZE (2) +static unsigned int table_startsize = DEFAULT_TABLE_SIZE; + +static int verbosity; + +#define verbose_sizechange() (verbosity >= 1) +#define verbose_lostevent() (verbosity >= 2) +#define verbose_ftrace() (verbosity >= 1) + +#define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0) +#define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0) + +static const char *debug_tracefile; +static const char *debug_tracefile_dflt; +static const char *debug_maxlat; +static const char *debug_maxlat_dflt; +static const char * const DEBUG_NOFILE = "[file not found]"; + +static const char * const TR_MAXLAT = "tracing_max_latency"; +static const char * const TR_THRESH = "tracing_thresh"; +static const char * const TR_CURRENT = "current_tracer"; +static const char * const TR_OPTIONS = "trace_options"; + +static const char * const NOP_TRACER = "nop"; + +static const char * const OPT_NO_PREFIX = "no"; + +#define DFLT_THRESHOLD_US "0" +static const char *threshold = DFLT_THRESHOLD_US; + +#define DEV_URANDOM "/dev/urandom" +#define RT_DEFAULT_PRI (99) +#define DEFAULT_PRI (0) + +#define USEC_PER_MSEC (1000L) +#define NSEC_PER_USEC (1000L) +#define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC) + +#define MSEC_PER_SEC (1000L) +#define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC) +#define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC) + +#define SLEEP_TIME_MS_DEFAULT (1000L) +#define TRY_PRINTMUTEX_MS (1000) + +static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT); + +static const char * const queue_full_warning = +"Could not queue trace for printing. It is likely that events happen faster\n" +"than what they can be printed. Probably partly because of random sleeping\n"; + +static const char * const no_tracer_msg = +"Could not find any tracers! Running this program as root may help!\n"; + +static const char * const no_latency_tr_msg = +"No latency tracers are supported by your kernel!\n"; + +struct policy { + const char *name; + int policy; + int default_pri; +}; + +static const struct policy policies[] = { + { "other", SCHED_OTHER, DEFAULT_PRI }, + { "batch", SCHED_BATCH, DEFAULT_PRI }, + { "idle", SCHED_IDLE, DEFAULT_PRI }, + { "rr", SCHED_RR, RT_DEFAULT_PRI }, + { "fifo", SCHED_FIFO, RT_DEFAULT_PRI }, + { NULL, 0, DEFAULT_PRI } +}; + +/* + * The default tracer will be the first on this list that is supported by the + * currently running Linux kernel. + */ +static const char * const relevant_tracers[] = { + "preemptirqsoff", + "preemptoff", + "irqsoff", + "wakeup", + "wakeup_rt", + "wakeup_dl", + NULL +}; + +/* This is the list of tracers for which random sleep makes sense */ +static const char * const random_tracers[] = { + "preemptirqsoff", + "preemptoff", + "irqsoff", + NULL +}; + +static const char *current_tracer; +static bool force_tracer; + +struct ftrace_state { + char *tracer; + char *thresh; + bool opt[OPTIDX_NR]; + bool opt_valid[OPTIDX_NR]; + pthread_mutex_t mutex; +}; + +struct entry { + int ticket; + int ticket_completed_ref; +}; + +struct print_state { + int ticket_counter; + int ticket_completed; + pthread_mutex_t mutex; + pthread_cond_t cond; + int cnt; + pthread_mutex_t cnt_mutex; +}; + +struct short_msg { + char buf[160]; + int len; +}; + +static struct print_state printstate; +static struct ftrace_state save_state; +volatile sig_atomic_t signal_flag; + +#define PROB_TABLE_MAX_SIZE (1000) + +int probabilities[PROB_TABLE_MAX_SIZE]; + +struct sleep_table { + int *table; + int size; + pthread_mutex_t mutex; +}; + +static struct sleep_table sleeptable; + +#define QUEUE_SIZE (10) + +struct queue { + struct entry entries[QUEUE_SIZE]; + int next_prod_idx; + int next_cons_idx; + pthread_mutex_t mutex; + pthread_cond_t cond; +}; + +#define MAX_THREADS (40) + +struct queue printqueue; +pthread_t printthread[MAX_THREADS]; +pthread_mutex_t print_mtx; +#define PRINT_BUFFER_SIZE (16 * 1024 * 1024) + +static void cleanup_exit(int status); +static int set_trace_opt(const char *opt, bool value); + +static __always_inline void *malloc_or_die(size_t size) +{ + void *ptr = malloc(size); + + if (unlikely(ptr == NULL)) { + warn("malloc() failed"); + cleanup_exit(EXIT_FAILURE); + } + return ptr; +} + +static __always_inline void *malloc_or_die_nocleanup(size_t size) +{ + void *ptr = malloc(size); + + if (unlikely(ptr == NULL)) + err(0, "malloc() failed"); + return ptr; +} + +static __always_inline void write_or_die(int fd, const char *buf, size_t count) +{ + ssize_t r; + + do { + r = write(fd, buf, count); + if (unlikely(r < 0)) { + if (errno == EINTR) + continue; + warn("write() failed"); + cleanup_exit(EXIT_FAILURE); + } + count -= r; + buf += r; + } while (count > 0); +} + +static __always_inline void clock_gettime_or_die(clockid_t clk_id, + struct timespec *tp) +{ + int r = clock_gettime(clk_id, tp); + + if (unlikely(r != 0)) + err(EXIT_FAILURE, "clock_gettime() failed"); +} + +static __always_inline void sigemptyset_or_die(sigset_t *s) +{ + if (unlikely(sigemptyset(s) != 0)) { + warn("sigemptyset() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static __always_inline void sigaddset_or_die(sigset_t *s, int signum) +{ + if (unlikely(sigaddset(s, signum) != 0)) { + warn("sigemptyset() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static __always_inline void sigaction_or_die(int signum, + const struct sigaction *act, + struct sigaction *oldact) +{ + if (unlikely(sigaction(signum, act, oldact) != 0)) { + warn("sigaction() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static void open_stdout(void) +{ + if (setvbuf(stdout, NULL, _IONBF, 0) != 0) + err(EXIT_FAILURE, "setvbuf() failed"); + fd_stdout = fileno(stdout); + if (fd_stdout < 0) + err(EXIT_FAILURE, "fileno() failed"); +} + +/* + * It's not worth it to call cleanup_exit() from mutex functions because + * cleanup_exit() uses mutexes. + */ +static __always_inline void mutex_lock(pthread_mutex_t *mtx) +{ + errno = pthread_mutex_lock(mtx); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_mutex_lock() failed"); +} + + +static __always_inline void mutex_unlock(pthread_mutex_t *mtx) +{ + errno = pthread_mutex_unlock(mtx); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_mutex_unlock() failed"); +} + +static __always_inline void cond_signal(pthread_cond_t *cond) +{ + errno = pthread_cond_signal(cond); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_signal() failed"); +} + +static __always_inline void cond_wait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex) +{ + errno = pthread_cond_wait(cond, mutex); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_wait() failed"); +} + +static __always_inline void cond_broadcast(pthread_cond_t *cond) +{ + errno = pthread_cond_broadcast(cond); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_broadcast() failed"); +} + +static __always_inline void +mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *attr) +{ + errno = pthread_mutex_init(mutex, attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutex_init() failed"); +} + +static __always_inline void mutexattr_init(pthread_mutexattr_t *attr) +{ + errno = pthread_mutexattr_init(attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_init() failed"); +} + +static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr) +{ + errno = pthread_mutexattr_destroy(attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed"); +} + +static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr, + int type) +{ + errno = pthread_mutexattr_settype(attr, type); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_settype() failed"); +} + +static __always_inline void condattr_init(pthread_condattr_t *attr) +{ + errno = pthread_condattr_init(attr); + if (errno) + err(EXIT_FAILURE, "pthread_condattr_init() failed"); +} + +static __always_inline void condattr_destroy(pthread_condattr_t *attr) +{ + errno = pthread_condattr_destroy(attr); + if (errno) + err(EXIT_FAILURE, "pthread_condattr_destroy() failed"); +} + +static __always_inline void condattr_setclock(pthread_condattr_t *attr, + clockid_t clock_id) +{ + errno = pthread_condattr_setclock(attr, clock_id); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_condattr_setclock() failed"); +} + +static __always_inline void cond_init(pthread_cond_t *cond, + const pthread_condattr_t *attr) +{ + errno = pthread_cond_init(cond, attr); + if (errno) + err(EXIT_FAILURE, "pthread_cond_init() failed"); +} + +static __always_inline int +cond_timedwait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex, + const struct timespec *restrict abstime) +{ + errno = pthread_cond_timedwait(cond, mutex, abstime); + if (errno && errno != ETIMEDOUT) + err(EXIT_FAILURE, "pthread_cond_timedwait() failed"); + return errno; +} + +static void init_printstate(void) +{ + pthread_condattr_t cattr; + + printstate.ticket_counter = 0; + printstate.ticket_completed = 0; + printstate.cnt = 0; + + mutex_init(&printstate.mutex, NULL); + + condattr_init(&cattr); + condattr_setclock(&cattr, CLOCK_MONOTONIC); + cond_init(&printstate.cond, &cattr); + condattr_destroy(&cattr); +} + +static void init_print_mtx(void) +{ + pthread_mutexattr_t mattr; + + mutexattr_init(&mattr); + mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); + mutex_init(&print_mtx, &mattr); + mutexattr_destroy(&mattr); + +} + +static void signal_blocking(int how) +{ + sigset_t s; + + sigemptyset_or_die(&s); + sigaddset_or_die(&s, SIGHUP); + sigaddset_or_die(&s, SIGTERM); + sigaddset_or_die(&s, SIGINT); + + errno = pthread_sigmask(how, &s, NULL); + if (unlikely(errno)) { + warn("pthread_sigmask() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static void signal_handler(int num) +{ + signal_flag = num; +} + +static void setup_sig_handler(void) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = signal_handler; + + sigaction_or_die(SIGHUP, &sa, NULL); + sigaction_or_die(SIGTERM, &sa, NULL); + sigaction_or_die(SIGINT, &sa, NULL); +} + +static void process_signal(int signal) +{ + char *name; + + name = strsignal(signal); + if (name == NULL) + printf("Received signal %d\n", signal); + else + printf("Received signal %d (%s)\n", signal, name); + cleanup_exit(EXIT_SUCCESS); +} + +static __always_inline void check_signals(void) +{ + int signal = signal_flag; + + if (unlikely(signal)) + process_signal(signal); +} + +static __always_inline void get_time_in_future(struct timespec *future, + long time_us) +{ + long nsec; + + clock_gettime_or_die(CLOCK_MONOTONIC, future); + future->tv_sec += time_us / USEC_PER_SEC; + nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC; + if (nsec >= NSEC_PER_SEC) { + future->tv_nsec = nsec % NSEC_PER_SEC; + future->tv_sec += 1; + } +} + +static __always_inline bool time_has_passed(const struct timespec *time) +{ + struct timespec now; + + clock_gettime_or_die(CLOCK_MONOTONIC, &now); + if (now.tv_sec > time->tv_sec) + return true; + if (now.tv_sec < time->tv_sec) + return false; + return (now.tv_nsec >= time->tv_nsec); +} + +static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms) +{ + long time_us = time_ms * USEC_PER_MSEC; + struct timespec limit; + + get_time_in_future(&limit, time_us); + do { + errno = pthread_mutex_trylock(mutex); + if (errno && errno != EBUSY) + err(EXIT_FAILURE, "pthread_mutex_trylock() failed"); + } while (errno && !time_has_passed(&limit)); + return errno == 0; +} + +static void restore_trace_opts(const struct ftrace_state *state, + const bool *cur) +{ + int i; + int r; + + for (i = 0; i < OPTIDX_NR; i++) + if (state->opt_valid[i] && state->opt[i] != cur[i]) { + r = set_trace_opt(optstr[i], state->opt[i]); + if (r < 0) + warnx("Failed to restore the %s option to %s", + optstr[i], bool2str(state->opt[i])); + else if (verbose_ftrace()) + printf("Restored the %s option in %s to %s\n", + optstr[i], TR_OPTIONS, + bool2str(state->opt[i])); + } +} + +static char *read_file(const char *file, enum errhandling h) +{ + int psize; + char *r; + static const char *emsg = "Failed to read the %s file"; + + r = tracefs_instance_file_read(NULL, file, &psize); + if (!r) { + if (h) { + warn(emsg, file); + if (h == ERR_CLEANUP) + cleanup_exit(EXIT_FAILURE); + } else + errx(EXIT_FAILURE, emsg, file); + } + + if (r && r[psize - 1] == '\n') + r[psize - 1] = '\0'; + return r; +} + +static void restore_file(const char *file, char **saved, const char *cur) +{ + if (*saved && was_changed(*saved, cur)) { + if (tracefs_instance_file_write(NULL, file, *saved) < 0) + warnx("Failed to restore %s to %s!", file, *saved); + else if (verbose_ftrace()) + printf("Restored %s to %s\n", file, *saved); + free(*saved); + *saved = NULL; + } +} + +static void restore_ftrace(void) +{ + mutex_lock(&save_state.mutex); + + restore_file(TR_CURRENT, &save_state.tracer, current_tracer); + restore_file(TR_THRESH, &save_state.thresh, threshold); + restore_trace_opts(&save_state, use_options); + + mutex_unlock(&save_state.mutex); +} + +static void cleanup_exit(int status) +{ + char *maxlat; + + if (!setup_ftrace) + exit(status); + + /* + * We try the print_mtx for 1 sec in order to avoid garbled + * output if possible, but if it cannot be obtained we proceed anyway. + */ + mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS); + + maxlat = read_file(TR_MAXLAT, ERR_WARN); + if (maxlat) { + printf("The maximum detected latency was: %sus\n", maxlat); + free(maxlat); + } + + restore_ftrace(); + /* + * We do not need to unlock the print_mtx here because we will exit at + * the end of this function. Unlocking print_mtx causes problems if a + * print thread happens to be waiting for the mutex because we have + * just changed the ftrace settings to the original and thus the + * print thread would output incorrect data from ftrace. + */ + exit(status); +} + +static void init_save_state(void) +{ + pthread_mutexattr_t mattr; + + mutexattr_init(&mattr); + mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); + mutex_init(&save_state.mutex, &mattr); + mutexattr_destroy(&mattr); + + save_state.tracer = NULL; + save_state.thresh = NULL; + save_state.opt_valid[OPTIDX_FUNC_TR] = false; + save_state.opt_valid[OPTIDX_DISP_GR] = false; +} + +static int printstate_next_ticket(struct entry *req) +{ + int r; + + r = ++(printstate.ticket_counter); + req->ticket = r; + req->ticket_completed_ref = printstate.ticket_completed; + cond_broadcast(&printstate.cond); + return r; +} + +static __always_inline +void printstate_mark_req_completed(const struct entry *req) +{ + if (req->ticket > printstate.ticket_completed) + printstate.ticket_completed = req->ticket; +} + +static __always_inline +bool printstate_has_new_req_arrived(const struct entry *req) +{ + return (printstate.ticket_counter != req->ticket); +} + +static __always_inline int printstate_cnt_inc(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = ++printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline int printstate_cnt_dec(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = --printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline int printstate_cnt_read(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline +bool prev_req_won_race(const struct entry *req) +{ + return (printstate.ticket_completed != req->ticket_completed_ref); +} + +static void sleeptable_resize(int size, bool printout, struct short_msg *msg) +{ + int bytes; + + if (printout) { + msg->len = 0; + if (unlikely(size > PROB_TABLE_MAX_SIZE)) + bytes = snprintf(msg->buf, sizeof(msg->buf), +"Cannot increase probability table to %d (maximum size reached)\n", size); + else + bytes = snprintf(msg->buf, sizeof(msg->buf), +"Increasing probability table to %d\n", size); + if (bytes < 0) + warn("snprintf() failed"); + else + msg->len = bytes; + } + + if (unlikely(size < 0)) { + /* Should never happen */ + warnx("Bad program state at %s:%d", __FILE__, __LINE__); + cleanup_exit(EXIT_FAILURE); + return; + } + sleeptable.size = size; + sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size]; +} + +static void init_probabilities(void) +{ + int i; + int j = 1000; + + for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) { + probabilities[i] = 1000 / j; + j--; + } + mutex_init(&sleeptable.mutex, NULL); +} + +static int table_get_probability(const struct entry *req, + struct short_msg *msg) +{ + int diff = req->ticket - req->ticket_completed_ref; + int rval = 0; + + msg->len = 0; + diff--; + /* Should never happen...*/ + if (unlikely(diff < 0)) { + warnx("Programmer assumption error at %s:%d\n", __FILE__, + __LINE__); + cleanup_exit(EXIT_FAILURE); + } + mutex_lock(&sleeptable.mutex); + if (diff >= (sleeptable.size - 1)) { + rval = sleeptable.table[sleeptable.size - 1]; + sleeptable_resize(sleeptable.size + 1, verbose_sizechange(), + msg); + } else { + rval = sleeptable.table[diff]; + } + mutex_unlock(&sleeptable.mutex); + return rval; +} + +static void init_queue(struct queue *q) +{ + q->next_prod_idx = 0; + q->next_cons_idx = 0; + mutex_init(&q->mutex, NULL); + errno = pthread_cond_init(&q->cond, NULL); + if (errno) + err(EXIT_FAILURE, "pthread_cond_init() failed"); +} + +static __always_inline int queue_len(const struct queue *q) +{ + if (q->next_prod_idx >= q->next_cons_idx) + return q->next_prod_idx - q->next_cons_idx; + else + return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx; +} + +static __always_inline int queue_nr_free(const struct queue *q) +{ + int nr_free = QUEUE_SIZE - queue_len(q); + + /* + * If there is only one slot left we will anyway lie and claim that the + * queue is full because adding an element will make it appear empty + */ + if (nr_free == 1) + nr_free = 0; + return nr_free; +} + +static __always_inline void queue_idx_inc(int *idx) +{ + *idx = (*idx + 1) % QUEUE_SIZE; +} + +static __always_inline void queue_push_to_back(struct queue *q, + const struct entry *e) +{ + q->entries[q->next_prod_idx] = *e; + queue_idx_inc(&q->next_prod_idx); +} + +static __always_inline struct entry queue_pop_from_front(struct queue *q) +{ + struct entry e = q->entries[q->next_cons_idx]; + + queue_idx_inc(&q->next_cons_idx); + return e; +} + +static __always_inline void queue_cond_signal(struct queue *q) +{ + cond_signal(&q->cond); +} + +static __always_inline void queue_cond_wait(struct queue *q) +{ + cond_wait(&q->cond, &q->mutex); +} + +static __always_inline int queue_try_to_add_entry(struct queue *q, + const struct entry *e) +{ + int r = 0; + + mutex_lock(&q->mutex); + if (queue_nr_free(q) > 0) { + queue_push_to_back(q, e); + cond_signal(&q->cond); + } else + r = -1; + mutex_unlock(&q->mutex); + return r; +} + +static struct entry queue_wait_for_entry(struct queue *q) +{ + struct entry e; + + mutex_lock(&q->mutex); + while (true) { + if (queue_len(&printqueue) > 0) { + e = queue_pop_from_front(q); + break; + } + queue_cond_wait(q); + } + mutex_unlock(&q->mutex); + + return e; +} + +static const struct policy *policy_from_name(const char *name) +{ + const struct policy *p = &policies[0]; + + while (p->name != NULL) { + if (!strcmp(name, p->name)) + return p; + p++; + } + return NULL; +} + +static const char *policy_name(int policy) +{ + const struct policy *p = &policies[0]; + static const char *rval = "unknown"; + + while (p->name != NULL) { + if (p->policy == policy) + return p->name; + p++; + } + return rval; +} + +static bool is_relevant_tracer(const char *name) +{ + unsigned int i; + + for (i = 0; relevant_tracers[i]; i++) + if (!strcmp(name, relevant_tracers[i])) + return true; + return false; +} + +static bool random_makes_sense(const char *name) +{ + unsigned int i; + + for (i = 0; random_tracers[i]; i++) + if (!strcmp(name, random_tracers[i])) + return true; + return false; +} + +static void show_available(void) +{ + char **tracers; + int found = 0; + int i; + + tracers = tracefs_tracers(NULL); + for (i = 0; tracers && tracers[i]; i++) { + if (is_relevant_tracer(tracers[i])) + found++; + } + + if (!tracers) { + warnx(no_tracer_msg); + return; + } + + if (!found) { + warnx(no_latency_tr_msg); + tracefs_list_free(tracers); + return; + } + + printf("The following latency tracers are available on your system:\n"); + for (i = 0; tracers[i]; i++) { + if (is_relevant_tracer(tracers[i])) + printf("%s\n", tracers[i]); + } + tracefs_list_free(tracers); +} + +static bool tracer_valid(const char *name, bool *notracer) +{ + char **tracers; + int i; + bool rval = false; + + *notracer = false; + tracers = tracefs_tracers(NULL); + if (!tracers) { + *notracer = true; + return false; + } + for (i = 0; tracers[i]; i++) + if (!strcmp(tracers[i], name)) { + rval = true; + break; + } + tracefs_list_free(tracers); + return rval; +} + +static const char *find_default_tracer(void) +{ + int i; + bool notracer; + bool valid; + + for (i = 0; relevant_tracers[i]; i++) { + valid = tracer_valid(relevant_tracers[i], ¬racer); + if (notracer) + errx(EXIT_FAILURE, no_tracer_msg); + if (valid) + return relevant_tracers[i]; + } + return NULL; +} + +static bool toss_coin(struct drand48_data *buffer, unsigned int prob) +{ + long r; + + if (unlikely(lrand48_r(buffer, &r))) { + warnx("lrand48_r() failed"); + cleanup_exit(EXIT_FAILURE); + } + r = r % 1000L; + if (r < prob) + return true; + else + return false; +} + + +static long go_to_sleep(const struct entry *req) +{ + struct timespec future; + long delay = sleep_time; + + get_time_in_future(&future, delay); + + mutex_lock(&printstate.mutex); + while (!printstate_has_new_req_arrived(req)) { + cond_timedwait(&printstate.cond, &printstate.mutex, &future); + if (time_has_passed(&future)) + break; + } + + if (printstate_has_new_req_arrived(req)) + delay = -1; + mutex_unlock(&printstate.mutex); + + return delay; +} + + +static void set_priority(void) +{ + int r; + pid_t pid; + struct sched_param param; + + memset(¶m, 0, sizeof(param)); + param.sched_priority = sched_pri; + + pid = getpid(); + r = sched_setscheduler(pid, sched_policy, ¶m); + + if (r != 0) + err(EXIT_FAILURE, "sched_setscheduler() failed"); +} + +pid_t latency_collector_gettid(void) +{ + return (pid_t) syscall(__NR_gettid); +} + +static void print_priority(void) +{ + pid_t tid; + int policy; + int r; + struct sched_param param; + + tid = latency_collector_gettid(); + r = pthread_getschedparam(pthread_self(), &policy, ¶m); + if (r != 0) { + warn("pthread_getschedparam() failed"); + cleanup_exit(EXIT_FAILURE); + } + mutex_lock(&print_mtx); + printf("Thread %d runs with scheduling policy %s and priority %d\n", + tid, policy_name(policy), param.sched_priority); + mutex_unlock(&print_mtx); +} + +static __always_inline +void __print_skipmessage(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, bool excuse, + const char *str) +{ + ssize_t bytes = 0; + char *p = &buffer[0]; + long us, sec; + int r; + + sec = timestamp->tv_sec; + us = timestamp->tv_nsec / 1000; + + if (resize_msg != NULL && resize_msg->len > 0) { + strncpy(p, resize_msg->buf, resize_msg->len); + bytes += resize_msg->len; + p += resize_msg->len; + bufspace -= resize_msg->len; + } + + if (excuse) + r = snprintf(p, bufspace, +"%ld.%06ld Latency %d printout skipped due to %s\n", + sec, us, req->ticket, str); + else + r = snprintf(p, bufspace, "%ld.%06ld Latency %d detected\n", + sec, us, req->ticket); + + if (r < 0) + warn("snprintf() failed"); + else + bytes += r; + + /* These prints could happen concurrently */ + mutex_lock(&print_mtx); + write_or_die(fd_stdout, buffer, bytes); + mutex_unlock(&print_mtx); +} + +static void print_skipmessage(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, + bool excuse) +{ + __print_skipmessage(resize_msg, timestamp, buffer, bufspace, req, + excuse, "random delay"); +} + +static void print_lostmessage(const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, + const char *reason) +{ + __print_skipmessage(NULL, timestamp, buffer, bufspace, req, true, + reason); +} + +static void print_tracefile(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, long slept, + const struct entry *req) +{ + static const int reserve = 256; + char *p = &buffer[0]; + ssize_t bytes = 0; + ssize_t bytes_tot = 0; + long us, sec; + long slept_ms; + int trace_fd; + + /* Save some space for the final string and final null char */ + bufspace = bufspace - reserve - 1; + + if (resize_msg != NULL && resize_msg->len > 0) { + bytes = resize_msg->len; + strncpy(p, resize_msg->buf, bytes); + bytes_tot += bytes; + p += bytes; + bufspace -= bytes; + } + + trace_fd = open(debug_tracefile, O_RDONLY); + + if (trace_fd < 0) { + warn("open() failed on %s", debug_tracefile); + return; + } + + sec = timestamp->tv_sec; + us = timestamp->tv_nsec / 1000; + + if (slept != 0) { + slept_ms = slept / 1000; + bytes = snprintf(p, bufspace, +"%ld.%06ld Latency %d randomly sleep for %ld ms before print\n", + sec, us, req->ticket, slept_ms); + } else { + bytes = snprintf(p, bufspace, + "%ld.%06ld Latency %d immediate print\n", sec, + us, req->ticket); + } + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + + bytes = snprintf(p, bufspace, +">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BEGIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n" + ); + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + + do { + bytes = read(trace_fd, p, bufspace); + if (bytes < 0) { + if (errno == EINTR) + continue; + warn("read() failed on %s", debug_tracefile); + if (unlikely(close(trace_fd) != 0)) + warn("close() failed on %s", debug_tracefile); + return; + } + if (bytes == 0) + break; + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + } while (true); + + if (unlikely(close(trace_fd) != 0)) + warn("close() failed on %s", debug_tracefile); + + printstate_cnt_dec(); + /* Add the reserve space back to the budget for the final string */ + bufspace += reserve; + + bytes = snprintf(p, bufspace, + ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n"); + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + + bytes_tot += bytes; + + /* These prints could happen concurrently */ + mutex_lock(&print_mtx); + write_or_die(fd_stdout, buffer, bytes_tot); + mutex_unlock(&print_mtx); +} + +static char *get_no_opt(const char *opt) +{ + char *no_opt; + int s; + + s = strlen(opt) + strlen(OPT_NO_PREFIX) + 1; + /* We may be called from cleanup_exit() via set_trace_opt() */ + no_opt = malloc_or_die_nocleanup(s); + strcpy(no_opt, OPT_NO_PREFIX); + strcat(no_opt, opt); + return no_opt; +} + +static char *find_next_optstr(const char *allopt, const char **next) +{ + const char *begin; + const char *end; + char *r; + int s = 0; + + if (allopt == NULL) + return NULL; + + for (begin = allopt; *begin != '\0'; begin++) { + if (isgraph(*begin)) + break; + } + + if (*begin == '\0') + return NULL; + + for (end = begin; *end != '\0' && isgraph(*end); end++) + s++; + + r = malloc_or_die_nocleanup(s + 1); + strncpy(r, begin, s); + r[s] = '\0'; + *next = begin + s; + return r; +} + +static bool get_trace_opt(const char *allopt, const char *opt, bool *found) +{ + *found = false; + char *no_opt; + char *str; + const char *next = allopt; + bool rval = false; + + no_opt = get_no_opt(opt); + + do { + str = find_next_optstr(next, &next); + if (str == NULL) + break; + if (!strcmp(str, opt)) { + *found = true; + rval = true; + free(str); + break; + } + if (!strcmp(str, no_opt)) { + *found = true; + rval = false; + free(str); + break; + } + free(str); + } while (true); + free(no_opt); + + return rval; +} + +static int set_trace_opt(const char *opt, bool value) +{ + char *str; + int r; + + if (value) + str = strdup(opt); + else + str = get_no_opt(opt); + + r = tracefs_instance_file_write(NULL, TR_OPTIONS, str); + free(str); + return r; +} + +void save_trace_opts(struct ftrace_state *state) +{ + char *allopt; + int psize; + int i; + + allopt = tracefs_instance_file_read(NULL, TR_OPTIONS, &psize); + if (!allopt) + errx(EXIT_FAILURE, "Failed to read the %s file\n", TR_OPTIONS); + + for (i = 0; i < OPTIDX_NR; i++) + state->opt[i] = get_trace_opt(allopt, optstr[i], + &state->opt_valid[i]); + + free(allopt); +} + +static void write_file(const char *file, const char *cur, const char *new, + enum errhandling h) +{ + int r; + static const char *emsg = "Failed to write to the %s file!"; + + /* Do nothing if we now that the current and new value are equal */ + if (cur && !needs_change(cur, new)) + return; + + r = tracefs_instance_file_write(NULL, file, new); + if (r < 0) { + if (h) { + warnx(emsg, file); + if (h == ERR_CLEANUP) + cleanup_exit(EXIT_FAILURE); + } else + errx(EXIT_FAILURE, emsg, file); + } + if (verbose_ftrace()) { + mutex_lock(&print_mtx); + printf("%s was set to %s\n", file, new); + mutex_unlock(&print_mtx); + } +} + +static void reset_max_latency(void) +{ + write_file(TR_MAXLAT, NULL, "0", ERR_CLEANUP); +} + +static void save_and_disable_tracer(void) +{ + char *orig_th; + char *tracer; + bool need_nop = false; + + mutex_lock(&save_state.mutex); + + save_trace_opts(&save_state); + tracer = read_file(TR_CURRENT, ERR_EXIT); + orig_th = read_file(TR_THRESH, ERR_EXIT); + + if (needs_change(tracer, NOP_TRACER)) { + mutex_lock(&print_mtx); + if (force_tracer) { + printf( + "The %s tracer is already in use but proceeding anyway!\n", + tracer); + } else { + printf( + "The %s tracer is already in use, cowardly bailing out!\n" + "This could indicate that another program or instance is tracing.\n" + "Use the -F [--force] option to disregard the current tracer.\n", tracer); + exit(0); + } + mutex_unlock(&print_mtx); + need_nop = true; + } + + save_state.tracer = tracer; + save_state.thresh = orig_th; + + if (need_nop) + write_file(TR_CURRENT, NULL, NOP_TRACER, ERR_EXIT); + + mutex_unlock(&save_state.mutex); +} + +void set_trace_opts(struct ftrace_state *state, bool *new) +{ + int i; + int r; + + /* + * We only set options if we earlier detected that the option exists in + * the trace_options file and that the wanted setting is different from + * the one we saw in save_and_disable_tracer() + */ + for (i = 0; i < OPTIDX_NR; i++) + if (state->opt_valid[i] && + state->opt[i] != new[i]) { + r = set_trace_opt(optstr[i], new[i]); + if (r < 0) { + warnx("Failed to set the %s option to %s", + optstr[i], bool2str(new[i])); + cleanup_exit(EXIT_FAILURE); + } + if (verbose_ftrace()) { + mutex_lock(&print_mtx); + printf("%s in %s was set to %s\n", optstr[i], + TR_OPTIONS, bool2str(new[i])); + mutex_unlock(&print_mtx); + } + } +} + +static void enable_tracer(void) +{ + mutex_lock(&save_state.mutex); + set_trace_opts(&save_state, use_options); + + write_file(TR_THRESH, save_state.thresh, threshold, ERR_CLEANUP); + write_file(TR_CURRENT, NOP_TRACER, current_tracer, ERR_CLEANUP); + + mutex_unlock(&save_state.mutex); +} + +static void tracing_loop(void) +{ + int ifd = inotify_init(); + int wd; + const ssize_t bufsize = sizeof(inotify_buffer); + const ssize_t istructsize = sizeof(struct inotify_event); + char *buf = &inotify_buffer[0]; + ssize_t nr_read; + char *p; + int modified; + struct inotify_event *event; + struct entry req; + char *buffer; + const size_t bufspace = PRINT_BUFFER_SIZE; + struct timespec timestamp; + + print_priority(); + + buffer = malloc_or_die(bufspace); + + if (ifd < 0) + err(EXIT_FAILURE, "inotify_init() failed!"); + + + if (setup_ftrace) { + /* + * We must disable the tracer before resetting the max_latency + */ + save_and_disable_tracer(); + /* + * We must reset the max_latency before the inotify_add_watch() + * call. + */ + reset_max_latency(); + } + + wd = inotify_add_watch(ifd, debug_maxlat, IN_MODIFY); + if (wd < 0) + err(EXIT_FAILURE, "inotify_add_watch() failed!"); + + if (setup_ftrace) + enable_tracer(); + + signal_blocking(SIG_UNBLOCK); + + while (true) { + modified = 0; + check_signals(); + nr_read = read(ifd, buf, bufsize); + check_signals(); + if (nr_read < 0) { + if (errno == EINTR) + continue; + warn("read() failed on inotify fd!"); + cleanup_exit(EXIT_FAILURE); + } + if (nr_read == bufsize) + warnx("inotify() buffer filled, skipping events"); + if (nr_read < istructsize) { + warnx("read() returned too few bytes on inotify fd"); + cleanup_exit(EXIT_FAILURE); + } + + for (p = buf; p < buf + nr_read;) { + event = (struct inotify_event *) p; + if ((event->mask & IN_MODIFY) != 0) + modified++; + p += istructsize + event->len; + } + while (modified > 0) { + check_signals(); + mutex_lock(&printstate.mutex); + check_signals(); + printstate_next_ticket(&req); + if (printstate_cnt_read() > 0) { + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + if (verbose_lostevent()) { + clock_gettime_or_die(CLOCK_MONOTONIC, + ×tamp); + print_lostmessage(×tamp, buffer, + bufspace, &req, + "inotify loop"); + } + break; + } + mutex_unlock(&printstate.mutex); + if (queue_try_to_add_entry(&printqueue, &req) != 0) { + /* These prints could happen concurrently */ + check_signals(); + mutex_lock(&print_mtx); + check_signals(); + write_or_die(fd_stdout, queue_full_warning, + strlen(queue_full_warning)); + mutex_unlock(&print_mtx); + } + modified--; + } + } +} + +static void *do_printloop(void *arg) +{ + const size_t bufspace = PRINT_BUFFER_SIZE; + char *buffer; + long *rseed = (long *) arg; + struct drand48_data drandbuf; + long slept = 0; + struct entry req; + int prob = 0; + struct timespec timestamp; + struct short_msg resize_msg; + + print_priority(); + + if (srand48_r(*rseed, &drandbuf) != 0) { + warn("srand48_r() failed!\n"); + cleanup_exit(EXIT_FAILURE); + } + + buffer = malloc_or_die(bufspace); + + while (true) { + req = queue_wait_for_entry(&printqueue); + clock_gettime_or_die(CLOCK_MONOTONIC, ×tamp); + mutex_lock(&printstate.mutex); + if (prev_req_won_race(&req)) { + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + if (verbose_lostevent()) + print_lostmessage(×tamp, buffer, bufspace, + &req, "print loop"); + continue; + } + mutex_unlock(&printstate.mutex); + + /* + * Toss a coin to decide if we want to sleep before printing + * out the backtrace. The reason for this is that opening + * /sys/kernel/tracing/trace will cause a blackout of + * hundreds of ms, where no latencies will be noted by the + * latency tracer. Thus by randomly sleeping we try to avoid + * missing traces systematically due to this. With this option + * we will sometimes get the first latency, some other times + * some of the later ones, in case of closely spaced traces. + */ + if (trace_enable && use_random_sleep) { + slept = 0; + prob = table_get_probability(&req, &resize_msg); + if (!toss_coin(&drandbuf, prob)) + slept = go_to_sleep(&req); + if (slept >= 0) { + /* A print is ongoing */ + printstate_cnt_inc(); + /* + * We will do the printout below so we have to + * mark it as completed while we still have the + * mutex. + */ + mutex_lock(&printstate.mutex); + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + } + } + if (trace_enable) { + /* + * slept < 0 means that we detected another + * notification in go_to_sleep() above + */ + if (slept >= 0) + /* + * N.B. printstate_cnt_dec(); will be called + * inside print_tracefile() + */ + print_tracefile(&resize_msg, ×tamp, buffer, + bufspace, slept, &req); + else + print_skipmessage(&resize_msg, ×tamp, + buffer, bufspace, &req, true); + } else { + print_skipmessage(&resize_msg, ×tamp, buffer, + bufspace, &req, false); + } + } + return NULL; +} + +static void start_printthread(void) +{ + unsigned int i; + long *seed; + int ufd; + + ufd = open(DEV_URANDOM, O_RDONLY); + if (nr_threads > MAX_THREADS) { + warnx( +"Number of requested print threads was %d, max number is %d\n", + nr_threads, MAX_THREADS); + nr_threads = MAX_THREADS; + } + for (i = 0; i < nr_threads; i++) { + seed = malloc_or_die(sizeof(*seed)); + if (ufd < 0 || + read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) { + printf( +"Warning! Using trivial random number seed, since %s not available\n", + DEV_URANDOM); + fflush(stdout); + *seed = i; + } + errno = pthread_create(&printthread[i], NULL, do_printloop, + seed); + if (errno) + err(EXIT_FAILURE, "pthread_create()"); + } + if (ufd > 0 && close(ufd) != 0) + warn("close() failed"); +} + +static void show_usage(void) +{ + printf( +"Usage: %s [OPTION]...\n\n" +"Collect closely occurring latencies from %s\n" +"with any of the following tracers: preemptirqsoff, preemptoff, irqsoff, " +"wakeup,\nwakeup_dl, or wakeup_rt.\n\n" + +"The occurrence of a latency is detected by monitoring the file\n" +"%s with inotify.\n\n" + +"The following options are supported:\n\n" + +"-l, --list\t\tList the latency tracers that are supported by the\n" +"\t\t\tcurrently running Linux kernel. If you don't see the\n" +"\t\t\ttracer that you want, you will probably need to\n" +"\t\t\tchange your kernel config and build a new kernel.\n\n" + +"-t, --tracer TR\t\tUse the tracer TR. The default is to use the first\n" +"\t\t\ttracer that is supported by the kernel in the following\n" +"\t\t\torder of precedence:\n\n" +"\t\t\tpreemptirqsoff\n" +"\t\t\tpreemptoff\n" +"\t\t\tirqsoff\n" +"\t\t\twakeup\n" +"\t\t\twakeup_rt\n" +"\t\t\twakeup_dl\n" +"\n" +"\t\t\tIf TR is not on the list above, then a warning will be\n" +"\t\t\tprinted.\n\n" + +"-F, --force\t\tProceed even if another ftrace tracer is active. Without\n" +"\t\t\tthis option, the program will refuse to start tracing if\n" +"\t\t\tany other tracer than the nop tracer is active.\n\n" + +"-s, --threshold TH\tConfigure ftrace to use a threshold of TH microseconds\n" +"\t\t\tfor the tracer. The default is 0, which means that\n" +"\t\t\ttracing_max_latency will be used. tracing_max_latency is\n" +"\t\t\tset to 0 when the program is started and contains the\n" +"\t\t\tmaximum of the latencies that have been encountered.\n\n" + +"-f, --function\t\tEnable the function-trace option in trace_options. With\n" +"\t\t\tthis option, ftrace will trace the functions that are\n" +"\t\t\texecuted during a latency, without it we only get the\n" +"\t\t\tbeginning, end, and backtrace.\n\n" + +"-g, --graph\t\tEnable the display-graph option in trace_option. This\n" +"\t\t\toption causes ftrace to show the graph of how functions\n" +"\t\t\tare calling other functions.\n\n" + +"-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n" +"\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n" +"\t\t\tusing rr or fifo, remember that these policies may cause\n" +"\t\t\tother tasks to experience latencies.\n\n" + +"-p, --priority PRI\tRun the program with priority PRI. The acceptable range\n" +"\t\t\tof PRI depends on the scheduling policy.\n\n" + +"-n, --notrace\t\tIf latency is detected, do not print out the content of\n" +"\t\t\tthe trace file to standard output\n\n" + +"-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n" + +"-r, --random\t\tArbitrarily sleep a certain amount of time, default\n" +"\t\t\t%ld ms, before reading the trace file. The\n" +"\t\t\tprobabilities for sleep are chosen so that the\n" +"\t\t\tprobability of obtaining any of a cluster of closely\n" +"\t\t\toccurring latencies are equal, i.e. we will randomly\n" +"\t\t\tchoose which one we collect from the trace file.\n\n" +"\t\t\tThis option is probably only useful with the irqsoff,\n" +"\t\t\tpreemptoff, and preemptirqsoff tracers.\n\n" + +"-a, --nrlat NRLAT\tFor the purpose of arbitrary delay, assume that there\n" +"\t\t\tare no more than NRLAT clustered latencies. If NRLAT\n" +"\t\t\tlatencies are detected during a run, this value will\n" +"\t\t\tautomatically be increased to NRLAT + 1 and then to\n" +"\t\t\tNRLAT + 2 and so on. The default is %d. This option\n" +"\t\t\timplies -r. We need to know this number in order to\n" +"\t\t\tbe able to calculate the probabilities of sleeping.\n" +"\t\t\tSpecifically, the probabilities of not sleeping, i.e. to\n" +"\t\t\tdo an immediate printout will be:\n\n" +"\t\t\t1/NRLAT 1/(NRLAT - 1) ... 1/3 1/2 1\n\n" +"\t\t\tThe probability of sleeping will be:\n\n" +"\t\t\t1 - P, where P is from the series above\n\n" +"\t\t\tThis descending probability will cause us to choose\n" +"\t\t\tan occurrence at random. Observe that the final\n" +"\t\t\tprobability is 0, it is when we reach this probability\n" +"\t\t\tthat we increase NRLAT automatically. As an example,\n" +"\t\t\twith the default value of 2, the probabilities will be:\n\n" +"\t\t\t1/2 0\n\n" +"\t\t\tThis means, when a latency is detected we will sleep\n" +"\t\t\twith 50%% probability. If we ever detect another latency\n" +"\t\t\tduring the sleep period, then the probability of sleep\n" +"\t\t\twill be 0%% and the table will be expanded to:\n\n" +"\t\t\t1/3 1/2 0\n\n" + +"-v, --verbose\t\tIncrease the verbosity. If this option is given once,\n" +"\t\t\tthen print a message every time that the NRLAT value\n" +"\t\t\tis automatically increased. It also causes a message to\n" +"\t\t\tbe printed when the ftrace settings are changed. If this\n" +"\t\t\toption is given at least twice, then also print a\n" +"\t\t\twarning for lost events.\n\n" + +"-u, --time TIME\t\tArbitrarily sleep for a specified time TIME ms before\n" +"\t\t\tprinting out the trace from the trace file. The default\n" +"\t\t\tis %ld ms. This option implies -r.\n\n" + +"-x, --no-ftrace\t\tDo not configure ftrace. This assume that the user\n" +"\t\t\tconfigures the ftrace files in sysfs such as\n" +"\t\t\t/sys/kernel/tracing/current_tracer or equivalent.\n\n" + +"-i, --tracefile FILE\tUse FILE as trace file. The default is\n" +"\t\t\t%s.\n" +"\t\t\tThis options implies -x\n\n" + +"-m, --max-lat FILE\tUse FILE as tracing_max_latency file. The default is\n" +"\t\t\t%s.\n" +"\t\t\tThis options implies -x\n\n" +, +prg_name, debug_tracefile_dflt, debug_maxlat_dflt, DEFAULT_NR_PRINTER_THREADS, +SLEEP_TIME_MS_DEFAULT, DEFAULT_TABLE_SIZE, SLEEP_TIME_MS_DEFAULT, +debug_tracefile_dflt, debug_maxlat_dflt); +} + +static void find_tracefiles(void) +{ + debug_tracefile_dflt = tracefs_get_tracing_file("trace"); + if (debug_tracefile_dflt == NULL) { + /* This is needed in show_usage() */ + debug_tracefile_dflt = DEBUG_NOFILE; + } + + debug_maxlat_dflt = tracefs_get_tracing_file("tracing_max_latency"); + if (debug_maxlat_dflt == NULL) { + /* This is needed in show_usage() */ + debug_maxlat_dflt = DEBUG_NOFILE; + } + + debug_tracefile = debug_tracefile_dflt; + debug_maxlat = debug_maxlat_dflt; +} + +bool alldigits(const char *s) +{ + for (; *s != '\0'; s++) + if (!isdigit(*s)) + return false; + return true; +} + +void check_alldigits(const char *optarg, const char *argname) +{ + if (!alldigits(optarg)) + errx(EXIT_FAILURE, + "The %s parameter expects a decimal argument\n", argname); +} + +static void scan_arguments(int argc, char *argv[]) +{ + int c; + int i; + int option_idx = 0; + + static struct option long_options[] = { + { "list", no_argument, 0, 'l' }, + { "tracer", required_argument, 0, 't' }, + { "force", no_argument, 0, 'F' }, + { "threshold", required_argument, 0, 's' }, + { "function", no_argument, 0, 'f' }, + { "graph", no_argument, 0, 'g' }, + { "policy", required_argument, 0, 'c' }, + { "priority", required_argument, 0, 'p' }, + { "help", no_argument, 0, 'h' }, + { "notrace", no_argument, 0, 'n' }, + { "random", no_argument, 0, 'r' }, + { "nrlat", required_argument, 0, 'a' }, + { "threads", required_argument, 0, 'e' }, + { "time", required_argument, 0, 'u' }, + { "verbose", no_argument, 0, 'v' }, + { "no-ftrace", no_argument, 0, 'x' }, + { "tracefile", required_argument, 0, 'i' }, + { "max-lat", required_argument, 0, 'm' }, + { 0, 0, 0, 0 } + }; + const struct policy *p; + int max, min; + int value; + bool notracer, valid; + + /* + * We must do this before parsing the arguments because show_usage() + * needs to display these. + */ + find_tracefiles(); + + while (true) { + c = getopt_long(argc, argv, "lt:Fs:fgc:p:hnra:e:u:vxi:m:", + long_options, &option_idx); + if (c == -1) + break; + + switch (c) { + case 'l': + show_available(); + exit(0); + break; + case 't': + current_tracer = strdup(optarg); + if (!is_relevant_tracer(current_tracer)) { + warnx("%s is not a known latency tracer!\n", + current_tracer); + } + valid = tracer_valid(current_tracer, ¬racer); + if (notracer) + errx(EXIT_FAILURE, no_tracer_msg); + if (!valid) + errx(EXIT_FAILURE, +"The tracer %s is not supported by your kernel!\n", current_tracer); + break; + case 'F': + force_tracer = true; + break; + case 's': + check_alldigits(optarg, "-s [--threshold]"); + threshold = strdup(optarg); + break; + case 'f': + use_options[OPTIDX_FUNC_TR] = true; + break; + case 'g': + use_options[OPTIDX_DISP_GR] = true; + break; + case 'c': + p = policy_from_name(optarg); + if (p != NULL) { + sched_policy = p->policy; + sched_policy_set = true; + if (!sched_pri_set) { + sched_pri = p->default_pri; + sched_pri_set = true; + } + } else { + warnx("Unknown scheduling %s\n", optarg); + show_usage(); + exit(0); + } + break; + case 'p': + check_alldigits(optarg, "-p [--priority]"); + sched_pri = atoi(optarg); + sched_pri_set = true; + break; + case 'h': + show_usage(); + exit(0); + break; + case 'n': + trace_enable = false; + use_random_sleep = false; + break; + case 'e': + check_alldigits(optarg, "-e [--threads]"); + value = atoi(optarg); + if (value > 0) + nr_threads = value; + else { + warnx("NRTHR must be > 0\n"); + show_usage(); + exit(0); + } + break; + case 'u': + check_alldigits(optarg, "-u [--time]"); + value = atoi(optarg); + if (value < 0) { + warnx("TIME must be >= 0\n"); + show_usage(); + exit(0); + } + trace_enable = true; + use_random_sleep = true; + sleep_time = value * USEC_PER_MSEC; + break; + case 'v': + verbosity++; + break; + case 'r': + trace_enable = true; + use_random_sleep = true; + break; + case 'a': + check_alldigits(optarg, "-a [--nrlat]"); + value = atoi(optarg); + if (value <= 0) { + warnx("NRLAT must be > 0\n"); + show_usage(); + exit(0); + } + trace_enable = true; + use_random_sleep = true; + table_startsize = value; + break; + case 'x': + setup_ftrace = false; + break; + case 'i': + setup_ftrace = false; + debug_tracefile = strdup(optarg); + break; + case 'm': + setup_ftrace = false; + debug_maxlat = strdup(optarg); + break; + default: + show_usage(); + exit(0); + break; + } + } + + if (setup_ftrace) { + if (!current_tracer) { + current_tracer = find_default_tracer(); + if (!current_tracer) + errx(EXIT_FAILURE, +"No default tracer found and tracer not specified\n"); + } + + if (use_random_sleep && !random_makes_sense(current_tracer)) { + warnx("WARNING: The tracer is %s and random sleep has", + current_tracer); + fprintf(stderr, +"been enabled. Random sleep is intended for the following tracers:\n"); + for (i = 0; random_tracers[i]; i++) + fprintf(stderr, "%s\n", random_tracers[i]); + fprintf(stderr, "\n"); + } + } + + if (debug_tracefile == DEBUG_NOFILE || + debug_maxlat == DEBUG_NOFILE) + errx(EXIT_FAILURE, +"Could not find tracing directory e.g. /sys/kernel/tracing\n"); + + if (!sched_policy_set) { + sched_policy = SCHED_RR; + sched_policy_set = true; + if (!sched_pri_set) { + sched_pri = RT_DEFAULT_PRI; + sched_pri_set = true; + } + } + + max = sched_get_priority_max(sched_policy); + min = sched_get_priority_min(sched_policy); + + if (sched_pri < min) { + printf( +"ATTENTION: Increasing priority to minimum, which is %d\n", min); + sched_pri = min; + } + if (sched_pri > max) { + printf( +"ATTENTION: Reducing priority to maximum, which is %d\n", max); + sched_pri = max; + } +} + +static void show_params(void) +{ + printf( +"\n" +"Running with scheduling policy %s and priority %d. Using %d print threads.\n", + policy_name(sched_policy), sched_pri, nr_threads); + if (trace_enable) { + if (use_random_sleep) { + printf( +"%s will be printed with random delay\n" +"Start size of the probability table:\t\t\t%d\n" +"Print a message when the prob. table changes size:\t%s\n" +"Print a warning when an event has been lost:\t\t%s\n" +"Sleep time is:\t\t\t\t\t\t%ld ms\n", +debug_tracefile, +table_startsize, +bool2str(verbose_sizechange()), +bool2str(verbose_lostevent()), +sleep_time / USEC_PER_MSEC); + } else { + printf("%s will be printed immediately\n", + debug_tracefile); + } + } else { + printf("%s will not be printed\n", + debug_tracefile); + } + if (setup_ftrace) { + printf("Tracer:\t\t\t\t\t\t\t%s\n" + "%s option:\t\t\t\t\t%s\n" + "%s option:\t\t\t\t\t%s\n", + current_tracer, + optstr[OPTIDX_FUNC_TR], + bool2str(use_options[OPTIDX_FUNC_TR]), + optstr[OPTIDX_DISP_GR], + bool2str(use_options[OPTIDX_DISP_GR])); + if (!strcmp(threshold, "0")) + printf("Threshold:\t\t\t\t\t\ttracing_max_latency\n"); + else + printf("Threshold:\t\t\t\t\t\t%s\n", threshold); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + init_save_state(); + signal_blocking(SIG_BLOCK); + setup_sig_handler(); + open_stdout(); + + if (argc >= 1) + prg_name = argv[0]; + else + prg_name = prg_unknown; + + scan_arguments(argc, argv); + show_params(); + + init_printstate(); + init_print_mtx(); + if (use_random_sleep) { + init_probabilities(); + if (verbose_sizechange()) + printf("Initializing probability table to %d\n", + table_startsize); + sleeptable_resize(table_startsize, false, NULL); + } + set_priority(); + init_queue(&printqueue); + start_printthread(); + tracing_loop(); + return 0; +} diff --git a/tools/tracing/rtla/.gitignore b/tools/tracing/rtla/.gitignore new file mode 100644 index 0000000000..e9df32419b --- /dev/null +++ b/tools/tracing/rtla/.gitignore @@ -0,0 +1 @@ +/rtla diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile new file mode 100644 index 0000000000..2456a399eb --- /dev/null +++ b/tools/tracing/rtla/Makefile @@ -0,0 +1,152 @@ +NAME := rtla +# Follow the kernel version +VERSION := $(shell cat VERSION 2> /dev/null || make -sC ../../.. kernelversion | grep -v make) + +# From libtracefs: +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,STRIP,$(CROSS_COMPILE)strip) +$(call allow-override,PKG_CONFIG,pkg-config) +$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/) +$(call allow-override,LDCONFIG,ldconfig) + +INSTALL = install +MKDIR = mkdir +FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ + -fasynchronous-unwind-tables -fstack-clash-protection +WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized + +TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) + +CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) +LDFLAGS := -ggdb $(EXTRA_LDFLAGS) +LIBS := $$($(PKG_CONFIG) --libs libtracefs) + +SRC := $(wildcard src/*.c) +HDR := $(wildcard src/*.h) +OBJ := $(SRC:.c=.o) +DIRS := src +FILES := Makefile README.txt +CEXT := bz2 +TARBALL := $(NAME)-$(VERSION).tar.$(CEXT) +TAROPTS := -cvjf $(TARBALL) +BINDIR := /usr/bin +DATADIR := /usr/share +DOCDIR := $(DATADIR)/doc +MANDIR := $(DATADIR)/man +LICDIR := $(DATADIR)/licenses +SRCTREE := $(or $(BUILD_SRC),$(CURDIR)) + +# If running from the tarball, man pages are stored in the Documentation +# dir. If running from the kernel source, man pages are stored in +# Documentation/tools/rtla/. +ifneq ($(wildcard Documentation/.*),) +DOCSRC = Documentation/ +else +DOCSRC = $(SRCTREE)/../../../Documentation/tools/rtla/ +endif + +LIBTRACEEVENT_MIN_VERSION = 1.5 +LIBTRACEFS_MIN_VERSION = 1.3 + +.PHONY: all warnings show_warnings +all: warnings rtla + +TEST_LIBTRACEEVENT = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEEVENT_MIN_VERSION) libtraceevent > /dev/null 2>&1 || echo n") +ifeq ("$(TEST_LIBTRACEEVENT)", "n") +WARNINGS = show_warnings +MISSING_LIBS += echo "** libtraceevent version $(LIBTRACEEVENT_MIN_VERSION) or higher"; +MISSING_PACKAGES += "libtraceevent-devel" +MISSING_SOURCE += echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ "; +endif + +TEST_LIBTRACEFS = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEFS_MIN_VERSION) libtracefs > /dev/null 2>&1 || echo n") +ifeq ("$(TEST_LIBTRACEFS)", "n") +WARNINGS = show_warnings +MISSING_LIBS += echo "** libtracefs version $(LIBTRACEFS_MIN_VERSION) or higher"; +MISSING_PACKAGES += "libtracefs-devel" +MISSING_SOURCE += echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/ "; +endif + +define show_dependencies + @echo "********************************************"; \ + echo "** NOTICE: Failed build dependencies"; \ + echo "**"; \ + echo "** Required Libraries:"; \ + $(MISSING_LIBS) \ + echo "**"; \ + echo "** Consider installing the latest libtracefs from your"; \ + echo "** distribution, e.g., 'dnf install $(MISSING_PACKAGES)' on Fedora,"; \ + echo "** or from source:"; \ + echo "**"; \ + $(MISSING_SOURCE) \ + echo "**"; \ + echo "********************************************" +endef + +show_warnings: + $(call show_dependencies); + +ifneq ("$(WARNINGS)", "") +ERROR_OUT = $(error Please add the necessary dependencies) + +warnings: $(WARNINGS) + $(ERROR_OUT) +endif + +rtla: $(OBJ) + $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS) + +static: $(OBJ) + $(CC) -o rtla-static $(LDFLAGS) --static $(OBJ) $(LIBS) -lpthread -ldl + +.PHONY: install +install: doc_install + $(MKDIR) -p $(DESTDIR)$(BINDIR) + $(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR) + $(STRIP) $(DESTDIR)$(BINDIR)/rtla + @test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise + ln -s rtla $(DESTDIR)$(BINDIR)/osnoise + @test ! -f $(DESTDIR)$(BINDIR)/hwnoise || rm $(DESTDIR)$(BINDIR)/hwnoise + ln -s rtla $(DESTDIR)$(BINDIR)/hwnoise + @test ! -f $(DESTDIR)$(BINDIR)/timerlat || rm $(DESTDIR)$(BINDIR)/timerlat + ln -s rtla $(DESTDIR)$(BINDIR)/timerlat + +.PHONY: clean tarball +clean: doc_clean + @test ! -f rtla || rm rtla + @test ! -f rtla-static || rm rtla-static + @test ! -f src/rtla.o || rm src/rtla.o + @test ! -f $(TARBALL) || rm -f $(TARBALL) + @rm -rf *~ $(OBJ) *.tar.$(CEXT) + +tarball: clean + rm -rf $(NAME)-$(VERSION) && mkdir $(NAME)-$(VERSION) + echo $(VERSION) > $(NAME)-$(VERSION)/VERSION + cp -r $(DIRS) $(FILES) $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION)/Documentation/ + cp -rp $(SRCTREE)/../../../Documentation/tools/rtla/* $(NAME)-$(VERSION)/Documentation/ + tar $(TAROPTS) --exclude='*~' $(NAME)-$(VERSION) + rm -rf $(NAME)-$(VERSION) + +.PHONY: doc doc_clean doc_install +doc: + $(MAKE) -C $(DOCSRC) + +doc_clean: + $(MAKE) -C $(DOCSRC) clean + +doc_install: + $(MAKE) -C $(DOCSRC) install diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt new file mode 100644 index 0000000000..4af3fd40f1 --- /dev/null +++ b/tools/tracing/rtla/README.txt @@ -0,0 +1,33 @@ +RTLA: Real-Time Linux Analysis tools + +The rtla meta-tool includes a set of commands that aims to analyze +the real-time properties of Linux. Instead of testing Linux as a black box, +rtla leverages kernel tracing capabilities to provide precise information +about the properties and root causes of unexpected results. + +Installing RTLA + +RTLA depends on the following libraries and tools: + + - libtracefs + - libtraceevent + +It also depends on python3-docutils to compile man pages. + +For development, we suggest the following steps for compiling rtla: + + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git + $ cd libtraceevent/ + $ make + $ sudo make install + $ cd .. + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git + $ cd libtracefs/ + $ make + $ sudo make install + $ cd .. + $ cd $rtla_src + $ make + $ sudo make install + +For further information, please refer to the rtla man page. diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c new file mode 100644 index 0000000000..245e934493 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.c @@ -0,0 +1,1142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> + +#include "osnoise.h" +#include "utils.h" + +/* + * osnoise_get_cpus - return the original "osnoise/cpus" content + * + * It also saves the value to be restored. + */ +char *osnoise_get_cpus(struct osnoise_context *context) +{ + if (context->curr_cpus) + return context->curr_cpus; + + if (context->orig_cpus) + return context->orig_cpus; + + context->orig_cpus = tracefs_instance_file_read(NULL, "osnoise/cpus", NULL); + + /* + * The error value (NULL) is the same for tracefs_instance_file_read() + * and this functions, so: + */ + return context->orig_cpus; +} + +/* + * osnoise_set_cpus - configure osnoise to run on *cpus + * + * "osnoise/cpus" file is used to set the cpus in which osnoise/timerlat + * will run. This function opens this file, saves the current value, + * and set the cpus passed as argument. + */ +int osnoise_set_cpus(struct osnoise_context *context, char *cpus) +{ + char *orig_cpus = osnoise_get_cpus(context); + char buffer[1024]; + int retval; + + if (!orig_cpus) + return -1; + + context->curr_cpus = strdup(cpus); + if (!context->curr_cpus) + return -1; + + snprintf(buffer, 1024, "%s\n", cpus); + + debug_msg("setting cpus to %s from %s", cpus, context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", buffer); + if (retval < 0) { + free(context->curr_cpus); + context->curr_cpus = NULL; + return -1; + } + + return 0; +} + +/* + * osnoise_restore_cpus - restore the original "osnoise/cpus" + * + * osnoise_set_cpus() saves the original data for the "osnoise/cpus" + * file. This function restore the original config it was previously + * modified. + */ +void osnoise_restore_cpus(struct osnoise_context *context) +{ + int retval; + + if (!context->orig_cpus) + return; + + if (!context->curr_cpus) + return; + + /* nothing to do? */ + if (!strcmp(context->orig_cpus, context->curr_cpus)) + goto out_done; + + debug_msg("restoring cpus to %s", context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", context->orig_cpus); + if (retval < 0) + err_msg("could not restore original osnoise cpus\n"); + +out_done: + free(context->curr_cpus); + context->curr_cpus = NULL; +} + +/* + * osnoise_put_cpus - restore cpus config and cleanup data + */ +void osnoise_put_cpus(struct osnoise_context *context) +{ + osnoise_restore_cpus(context); + + if (!context->orig_cpus) + return; + + free(context->orig_cpus); + context->orig_cpus = NULL; +} + +/* + * osnoise_read_ll_config - read a long long value from a config + * + * returns -1 on error. + */ +static long long osnoise_read_ll_config(char *rel_path) +{ + long long retval; + char *buffer; + + buffer = tracefs_instance_file_read(NULL, rel_path, NULL); + if (!buffer) + return -1; + + /* get_llong_from_str returns -1 on error */ + retval = get_llong_from_str(buffer); + + debug_msg("reading %s returned %lld\n", rel_path, retval); + + free(buffer); + + return retval; +} + +/* + * osnoise_write_ll_config - write a long long value to a config in rel_path + * + * returns -1 on error. + */ +static long long osnoise_write_ll_config(char *rel_path, long long value) +{ + char buffer[BUFF_U64_STR_SIZE]; + long long retval; + + snprintf(buffer, sizeof(buffer), "%lld\n", value); + + debug_msg("setting %s to %lld\n", rel_path, value); + + retval = tracefs_instance_file_write(NULL, rel_path, buffer); + return retval; +} + +/* + * osnoise_get_runtime - return the original "osnoise/runtime_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_runtime(struct osnoise_context *context) +{ + long long runtime_us; + + if (context->runtime_us != OSNOISE_TIME_INIT_VAL) + return context->runtime_us; + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + return context->orig_runtime_us; + + runtime_us = osnoise_read_ll_config("osnoise/runtime_us"); + if (runtime_us < 0) + goto out_err; + + context->orig_runtime_us = runtime_us; + return runtime_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_period - return the original "osnoise/period_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_period(struct osnoise_context *context) +{ + long long period_us; + + if (context->period_us != OSNOISE_TIME_INIT_VAL) + return context->period_us; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_period_us; + + period_us = osnoise_read_ll_config("osnoise/period_us"); + if (period_us < 0) + goto out_err; + + context->orig_period_us = period_us; + return period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +static int __osnoise_write_runtime(struct osnoise_context *context, + unsigned long long runtime) +{ + int retval; + + if (context->orig_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/runtime_us", runtime); + if (retval < 0) + return -1; + + context->runtime_us = runtime; + return 0; +} + +static int __osnoise_write_period(struct osnoise_context *context, + unsigned long long period) +{ + int retval; + + if (context->orig_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/period_us", period); + if (retval < 0) + return -1; + + context->period_us = period; + return 0; +} + +/* + * osnoise_set_runtime_period - set osnoise runtime and period + * + * Osnoise's runtime and period are related as runtime <= period. + * Thus, this function saves the original values, and then tries + * to set the runtime and period if they are != 0. + */ +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period) +{ + unsigned long long curr_runtime_us; + unsigned long long curr_period_us; + int retval; + + if (!period && !runtime) + return 0; + + curr_runtime_us = osnoise_get_runtime(context); + curr_period_us = osnoise_get_period(context); + + /* error getting any value? */ + if (curr_period_us == OSNOISE_TIME_INIT_VAL || curr_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + if (!period) { + if (runtime > curr_period_us) + return -1; + return __osnoise_write_runtime(context, runtime); + } else if (!runtime) { + if (period < curr_runtime_us) + return -1; + return __osnoise_write_period(context, period); + } + + if (runtime > curr_period_us) { + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + } else { + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + } + + return 0; +} + +/* + * osnoise_restore_runtime_period - restore the original runtime and period + */ +void osnoise_restore_runtime_period(struct osnoise_context *context) +{ + unsigned long long orig_runtime = context->orig_runtime_us; + unsigned long long orig_period = context->orig_period_us; + unsigned long long curr_runtime = context->runtime_us; + unsigned long long curr_period = context->period_us; + int retval; + + if ((orig_runtime == OSNOISE_TIME_INIT_VAL) && (orig_period == OSNOISE_TIME_INIT_VAL)) + return; + + if ((orig_period == curr_period) && (orig_runtime == curr_runtime)) + goto out_done; + + retval = osnoise_set_runtime_period(context, orig_runtime, orig_period); + if (retval) + err_msg("Could not restore original osnoise runtime/period\n"); + +out_done: + context->runtime_us = OSNOISE_TIME_INIT_VAL; + context->period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_runtime_period - restore original values and cleanup data + */ +void osnoise_put_runtime_period(struct osnoise_context *context) +{ + osnoise_restore_runtime_period(context); + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + context->orig_runtime_us = OSNOISE_TIME_INIT_VAL; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + context->orig_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_timerlat_period_us - read and save the original "timerlat_period_us" + */ +static long long +osnoise_get_timerlat_period_us(struct osnoise_context *context) +{ + long long timerlat_period_us; + + if (context->timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->timerlat_period_us; + + if (context->orig_timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_timerlat_period_us; + + timerlat_period_us = osnoise_read_ll_config("osnoise/timerlat_period_us"); + if (timerlat_period_us < 0) + goto out_err; + + context->orig_timerlat_period_us = timerlat_period_us; + return timerlat_period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_set_timerlat_period_us - set "timerlat_period_us" + */ +int osnoise_set_timerlat_period_us(struct osnoise_context *context, long long timerlat_period_us) +{ + long long curr_timerlat_period_us = osnoise_get_timerlat_period_us(context); + int retval; + + if (curr_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", timerlat_period_us); + if (retval < 0) + return -1; + + context->timerlat_period_us = timerlat_period_us; + + return 0; +} + +/* + * osnoise_restore_timerlat_period_us - restore "timerlat_period_us" + */ +void osnoise_restore_timerlat_period_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + if (context->orig_timerlat_period_us == context->timerlat_period_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", context->orig_timerlat_period_us); + if (retval < 0) + err_msg("Could not restore original osnoise timerlat_period_us\n"); + +out_done: + context->timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_timerlat_period_us - restore original values and cleanup data + */ +void osnoise_put_timerlat_period_us(struct osnoise_context *context) +{ + osnoise_restore_timerlat_period_us(context); + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + context->orig_timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_stop_us - read and save the original "stop_tracing_us" + */ +static long long +osnoise_get_stop_us(struct osnoise_context *context) +{ + long long stop_us; + + if (context->stop_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_us; + + if (context->orig_stop_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_us; + + stop_us = osnoise_read_ll_config("osnoise/stop_tracing_us"); + if (stop_us < 0) + goto out_err; + + context->orig_stop_us = stop_us; + return stop_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_us - set "stop_tracing_us" + */ +int osnoise_set_stop_us(struct osnoise_context *context, long long stop_us) +{ + long long curr_stop_us = osnoise_get_stop_us(context); + int retval; + + if (curr_stop_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", stop_us); + if (retval < 0) + return -1; + + context->stop_us = stop_us; + + return 0; +} + +/* + * osnoise_restore_stop_us - restore the original "stop_tracing_us" + */ +void osnoise_restore_stop_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_us == context->stop_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", context->orig_stop_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_us\n"); + +out_done: + context->stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_us - restore original values and cleanup data + */ +void osnoise_put_stop_us(struct osnoise_context *context) +{ + osnoise_restore_stop_us(context); + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_stop_total_us - read and save the original "stop_tracing_total_us" + */ +static long long +osnoise_get_stop_total_us(struct osnoise_context *context) +{ + long long stop_total_us; + + if (context->stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_total_us; + + if (context->orig_stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_total_us; + + stop_total_us = osnoise_read_ll_config("osnoise/stop_tracing_total_us"); + if (stop_total_us < 0) + goto out_err; + + context->orig_stop_total_us = stop_total_us; + return stop_total_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_total_us - set "stop_tracing_total_us" + */ +int osnoise_set_stop_total_us(struct osnoise_context *context, long long stop_total_us) +{ + long long curr_stop_total_us = osnoise_get_stop_total_us(context); + int retval; + + if (curr_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", stop_total_us); + if (retval < 0) + return -1; + + context->stop_total_us = stop_total_us; + + return 0; +} + +/* + * osnoise_restore_stop_total_us - restore the original "stop_tracing_total_us" + */ +void osnoise_restore_stop_total_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_total_us == context->stop_total_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", + context->orig_stop_total_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_total_us\n"); + +out_done: + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_total_us - restore original values and cleanup data + */ +void osnoise_put_stop_total_us(struct osnoise_context *context) +{ + osnoise_restore_stop_total_us(context); + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_print_stack - read and save the original "print_stack" + */ +static long long +osnoise_get_print_stack(struct osnoise_context *context) +{ + long long print_stack; + + if (context->print_stack != OSNOISE_OPTION_INIT_VAL) + return context->print_stack; + + if (context->orig_print_stack != OSNOISE_OPTION_INIT_VAL) + return context->orig_print_stack; + + print_stack = osnoise_read_ll_config("osnoise/print_stack"); + if (print_stack < 0) + goto out_err; + + context->orig_print_stack = print_stack; + return print_stack; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_print_stack - set "print_stack" + */ +int osnoise_set_print_stack(struct osnoise_context *context, long long print_stack) +{ + long long curr_print_stack = osnoise_get_print_stack(context); + int retval; + + if (curr_print_stack == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/print_stack", print_stack); + if (retval < 0) + return -1; + + context->print_stack = print_stack; + + return 0; +} + +/* + * osnoise_restore_print_stack - restore the original "print_stack" + */ +void osnoise_restore_print_stack(struct osnoise_context *context) +{ + int retval; + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_print_stack == context->print_stack) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/print_stack", context->orig_print_stack); + if (retval < 0) + err_msg("Could not restore original osnoise print_stack\n"); + +out_done: + context->print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_print_stack - restore original values and cleanup data + */ +void osnoise_put_print_stack(struct osnoise_context *context) +{ + osnoise_restore_print_stack(context); + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_tracing_thresh - read and save the original "tracing_thresh" + */ +static long long +osnoise_get_tracing_thresh(struct osnoise_context *context) +{ + long long tracing_thresh; + + if (context->tracing_thresh != OSNOISE_OPTION_INIT_VAL) + return context->tracing_thresh; + + if (context->orig_tracing_thresh != OSNOISE_OPTION_INIT_VAL) + return context->orig_tracing_thresh; + + tracing_thresh = osnoise_read_ll_config("tracing_thresh"); + if (tracing_thresh < 0) + goto out_err; + + context->orig_tracing_thresh = tracing_thresh; + return tracing_thresh; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_tracing_thresh - set "tracing_thresh" + */ +int osnoise_set_tracing_thresh(struct osnoise_context *context, long long tracing_thresh) +{ + long long curr_tracing_thresh = osnoise_get_tracing_thresh(context); + int retval; + + if (curr_tracing_thresh == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("tracing_thresh", tracing_thresh); + if (retval < 0) + return -1; + + context->tracing_thresh = tracing_thresh; + + return 0; +} + +/* + * osnoise_restore_tracing_thresh - restore the original "tracing_thresh" + */ +void osnoise_restore_tracing_thresh(struct osnoise_context *context) +{ + int retval; + + if (context->orig_tracing_thresh == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_tracing_thresh == context->tracing_thresh) + goto out_done; + + retval = osnoise_write_ll_config("tracing_thresh", context->orig_tracing_thresh); + if (retval < 0) + err_msg("Could not restore original tracing_thresh\n"); + +out_done: + context->tracing_thresh = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_tracing_thresh - restore original values and cleanup data + */ +void osnoise_put_tracing_thresh(struct osnoise_context *context) +{ + osnoise_restore_tracing_thresh(context); + + if (context->orig_tracing_thresh == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_tracing_thresh = OSNOISE_OPTION_INIT_VAL; +} + +static int osnoise_options_get_option(char *option) +{ + char *options = tracefs_instance_file_read(NULL, "osnoise/options", NULL); + char no_option[128]; + int retval = 0; + char *opt; + + if (!options) + return OSNOISE_OPTION_INIT_VAL; + + /* + * Check first if the option is disabled. + */ + snprintf(no_option, sizeof(no_option), "NO_%s", option); + + opt = strstr(options, no_option); + if (opt) + goto out_free; + + /* + * Now that it is not disabled, if the string is there, it is + * enabled. If the string is not there, the option does not exist. + */ + opt = strstr(options, option); + if (opt) + retval = 1; + else + retval = OSNOISE_OPTION_INIT_VAL; + +out_free: + free(options); + return retval; +} + +static int osnoise_options_set_option(char *option, bool onoff) +{ + char no_option[128]; + + if (onoff) + return tracefs_instance_file_write(NULL, "osnoise/options", option); + + snprintf(no_option, sizeof(no_option), "NO_%s", option); + + return tracefs_instance_file_write(NULL, "osnoise/options", no_option); +} + +static int osnoise_get_irq_disable(struct osnoise_context *context) +{ + if (context->opt_irq_disable != OSNOISE_OPTION_INIT_VAL) + return context->opt_irq_disable; + + if (context->orig_opt_irq_disable != OSNOISE_OPTION_INIT_VAL) + return context->orig_opt_irq_disable; + + context->orig_opt_irq_disable = osnoise_options_get_option("OSNOISE_IRQ_DISABLE"); + + return context->orig_opt_irq_disable; +} + +int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff) +{ + int opt_irq_disable = osnoise_get_irq_disable(context); + int retval; + + if (opt_irq_disable == OSNOISE_OPTION_INIT_VAL) + return -1; + + if (opt_irq_disable == onoff) + return 0; + + retval = osnoise_options_set_option("OSNOISE_IRQ_DISABLE", onoff); + if (retval < 0) + return -1; + + context->opt_irq_disable = onoff; + + return 0; +} + +static void osnoise_restore_irq_disable(struct osnoise_context *context) +{ + int retval; + + if (context->orig_opt_irq_disable == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_opt_irq_disable == context->opt_irq_disable) + goto out_done; + + retval = osnoise_options_set_option("OSNOISE_IRQ_DISABLE", context->orig_opt_irq_disable); + if (retval < 0) + err_msg("Could not restore original OSNOISE_IRQ_DISABLE option\n"); + +out_done: + context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; +} + +static void osnoise_put_irq_disable(struct osnoise_context *context) +{ + osnoise_restore_irq_disable(context); + + if (context->orig_opt_irq_disable == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; +} + +static int osnoise_get_workload(struct osnoise_context *context) +{ + if (context->opt_workload != OSNOISE_OPTION_INIT_VAL) + return context->opt_workload; + + if (context->orig_opt_workload != OSNOISE_OPTION_INIT_VAL) + return context->orig_opt_workload; + + context->orig_opt_workload = osnoise_options_get_option("OSNOISE_WORKLOAD"); + + return context->orig_opt_workload; +} + +int osnoise_set_workload(struct osnoise_context *context, bool onoff) +{ + int opt_workload = osnoise_get_workload(context); + int retval; + + if (opt_workload == OSNOISE_OPTION_INIT_VAL) + return -1; + + if (opt_workload == onoff) + return 0; + + retval = osnoise_options_set_option("OSNOISE_WORKLOAD", onoff); + if (retval < 0) + return -1; + + context->opt_workload = onoff; + + return 0; +} + +static void osnoise_restore_workload(struct osnoise_context *context) +{ + int retval; + + if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_opt_workload == context->opt_workload) + goto out_done; + + retval = osnoise_options_set_option("OSNOISE_WORKLOAD", context->orig_opt_workload); + if (retval < 0) + err_msg("Could not restore original OSNOISE_WORKLOAD option\n"); + +out_done: + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; +} + +static void osnoise_put_workload(struct osnoise_context *context) +{ + osnoise_restore_workload(context); + + if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; +} + +/* + * enable_osnoise - enable osnoise tracer in the trace_instance + */ +int enable_osnoise(struct trace_instance *trace) +{ + return enable_tracer_by_name(trace->inst, "osnoise"); +} + +/* + * enable_timerlat - enable timerlat tracer in the trace_instance + */ +int enable_timerlat(struct trace_instance *trace) +{ + return enable_tracer_by_name(trace->inst, "timerlat"); +} + +enum { + FLAG_CONTEXT_NEWLY_CREATED = (1 << 0), + FLAG_CONTEXT_DELETED = (1 << 1), +}; + +/* + * osnoise_get_context - increase the usage of a context and return it + */ +int osnoise_get_context(struct osnoise_context *context) +{ + int ret; + + if (context->flags & FLAG_CONTEXT_DELETED) { + ret = -1; + } else { + context->ref++; + ret = 0; + } + + return ret; +} + +/* + * osnoise_context_alloc - alloc an osnoise_context + * + * The osnoise context contains the information of the "osnoise/" configs. + * It is used to set and restore the config. + */ +struct osnoise_context *osnoise_context_alloc(void) +{ + struct osnoise_context *context; + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; + context->stop_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; + context->print_stack = OSNOISE_OPTION_INIT_VAL; + + context->orig_tracing_thresh = OSNOISE_OPTION_INIT_VAL; + context->tracing_thresh = OSNOISE_OPTION_INIT_VAL; + + context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; + context->opt_irq_disable = OSNOISE_OPTION_INIT_VAL; + + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; + context->opt_workload = OSNOISE_OPTION_INIT_VAL; + + osnoise_get_context(context); + + return context; +} + +/* + * osnoise_put_context - put the osnoise_put_context + * + * If there is no other user for the context, the original data + * is restored. + */ +void osnoise_put_context(struct osnoise_context *context) +{ + if (--context->ref < 1) + context->flags |= FLAG_CONTEXT_DELETED; + + if (!(context->flags & FLAG_CONTEXT_DELETED)) + return; + + osnoise_put_cpus(context); + osnoise_put_runtime_period(context); + osnoise_put_stop_us(context); + osnoise_put_stop_total_us(context); + osnoise_put_timerlat_period_us(context); + osnoise_put_print_stack(context); + osnoise_put_tracing_thresh(context); + osnoise_put_irq_disable(context); + osnoise_put_workload(context); + + free(context); +} + +/* + * osnoise_destroy_tool - disable trace, restore configs and free data + */ +void osnoise_destroy_tool(struct osnoise_tool *top) +{ + if (!top) + return; + + trace_instance_destroy(&top->trace); + + if (top->context) + osnoise_put_context(top->context); + + free(top); +} + +/* + * osnoise_init_tool - init an osnoise tool + * + * It allocs data, create a context to store data and + * creates a new trace instance for the tool. + */ +struct osnoise_tool *osnoise_init_tool(char *tool_name) +{ + struct osnoise_tool *top; + int retval; + + top = calloc(1, sizeof(*top)); + if (!top) + return NULL; + + top->context = osnoise_context_alloc(); + if (!top->context) + goto out_err; + + retval = trace_instance_init(&top->trace, tool_name); + if (retval) + goto out_err; + + return top; +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +/* + * osnoise_init_trace_tool - init a tracer instance to trace osnoise events + */ +struct osnoise_tool *osnoise_init_trace_tool(char *tracer) +{ + struct osnoise_tool *trace; + int retval; + + trace = osnoise_init_tool("osnoise_trace"); + if (!trace) + return NULL; + + retval = tracefs_event_enable(trace->trace.inst, "osnoise", NULL); + if (retval < 0 && !errno) { + err_msg("Could not find osnoise events\n"); + goto out_err; + } + + retval = enable_tracer_by_name(trace->trace.inst, tracer); + if (retval) { + err_msg("Could not enable %s tracer for tracing\n", tracer); + goto out_err; + } + + return trace; +out_err: + osnoise_destroy_tool(trace); + return NULL; +} + +static void osnoise_usage(int err) +{ + int i; + + static const char *msg[] = { + "", + "osnoise version " VERSION, + "", + " usage: [rtla] osnoise [MODE] ...", + "", + " modes:", + " top - prints the summary from osnoise tracer", + " hist - prints a histogram of osnoise samples", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(err); +} + +int osnoise_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if osnoise was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + osnoise_top_main(argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + osnoise_usage(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + osnoise_top_main(argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + osnoise_top_main(argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + osnoise_hist_main(argc-1, &argv[1]); + exit(0); + } + +usage: + osnoise_usage(1); + exit(1); +} + +int hwnoise_main(int argc, char *argv[]) +{ + osnoise_top_main(argc, argv); + exit(0); +} diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h new file mode 100644 index 0000000000..555f4f4903 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.h @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "trace.h" + +/* + * osnoise_context - read, store, write, restore osnoise configs. + */ +struct osnoise_context { + int flags; + int ref; + + char *curr_cpus; + char *orig_cpus; + + /* 0 as init value */ + unsigned long long orig_runtime_us; + unsigned long long runtime_us; + + /* 0 as init value */ + unsigned long long orig_period_us; + unsigned long long period_us; + + /* 0 as init value */ + long long orig_timerlat_period_us; + long long timerlat_period_us; + + /* 0 as init value */ + long long orig_tracing_thresh; + long long tracing_thresh; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_us; + long long stop_us; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_total_us; + long long stop_total_us; + + /* -1 as init value because 0 is disabled */ + long long orig_print_stack; + long long print_stack; + + /* -1 as init value because 0 is off */ + int orig_opt_irq_disable; + int opt_irq_disable; + + /* -1 as init value because 0 is off */ + int orig_opt_workload; + int opt_workload; +}; + +/* + * *_INIT_VALs are also invalid values, they are used to + * communicate errors. + */ +#define OSNOISE_OPTION_INIT_VAL (-1) +#define OSNOISE_TIME_INIT_VAL (0) + +struct osnoise_context *osnoise_context_alloc(void); +int osnoise_get_context(struct osnoise_context *context); +void osnoise_put_context(struct osnoise_context *context); + +int osnoise_set_cpus(struct osnoise_context *context, char *cpus); +void osnoise_restore_cpus(struct osnoise_context *context); + +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period); +void osnoise_restore_runtime_period(struct osnoise_context *context); + +int osnoise_set_stop_us(struct osnoise_context *context, + long long stop_us); +void osnoise_restore_stop_us(struct osnoise_context *context); + +int osnoise_set_stop_total_us(struct osnoise_context *context, + long long stop_total_us); +void osnoise_restore_stop_total_us(struct osnoise_context *context); + +int osnoise_set_timerlat_period_us(struct osnoise_context *context, + long long timerlat_period_us); +void osnoise_restore_timerlat_period_us(struct osnoise_context *context); + +int osnoise_set_tracing_thresh(struct osnoise_context *context, + long long tracing_thresh); +void osnoise_restore_tracing_thresh(struct osnoise_context *context); + +void osnoise_restore_print_stack(struct osnoise_context *context); +int osnoise_set_print_stack(struct osnoise_context *context, + long long print_stack); + +int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff); +int osnoise_set_workload(struct osnoise_context *context, bool onoff); + +/* + * osnoise_tool - osnoise based tool definition. + */ +struct osnoise_tool { + struct trace_instance trace; + struct osnoise_context *context; + void *data; + void *params; + time_t start_time; +}; + +void osnoise_destroy_tool(struct osnoise_tool *top); +struct osnoise_tool *osnoise_init_tool(char *tool_name); +struct osnoise_tool *osnoise_init_trace_tool(char *tracer); + +int osnoise_hist_main(int argc, char *argv[]); +int osnoise_top_main(int argc, char **argv); +int osnoise_main(int argc, char **argv); +int hwnoise_main(int argc, char **argv); diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c new file mode 100644 index 0000000000..8f81fa0073 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -0,0 +1,945 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <time.h> +#include <sched.h> + +#include "utils.h" +#include "osnoise.h" + +struct osnoise_hist_params { + char *cpus; + cpu_set_t monitored_cpus; + char *trace_output; + char *cgroup_name; + unsigned long long runtime; + unsigned long long period; + long long threshold; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + int set_sched; + int output_divisor; + int cgroup; + int hk_cpus; + cpu_set_t hk_cpu_set; + struct sched_attr sched_param; + struct trace_events *events; + + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +struct osnoise_hist_cpu { + int *samples; + int count; + + unsigned long long min_sample; + unsigned long long sum_sample; + unsigned long long max_sample; + +}; + +struct osnoise_hist_data { + struct tracefs_hist *trace_hist; + struct osnoise_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * osnoise_free_histogram - free runtime data + */ +static void +osnoise_free_histogram(struct osnoise_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].samples) + free(data->hist[cpu].samples); + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); + + free(data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_hist_data +*osnoise_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct osnoise_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].samples = calloc(1, sizeof(*data->hist->samples) * (entries + 1)); + if (!data->hist[cpu].samples) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) + data->hist[cpu].min_sample = ~0; + + return data; + +cleanup: + osnoise_free_histogram(data); + return NULL; +} + +static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, + unsigned long long duration, int count) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + unsigned long long total_duration; + int entries = data->entries; + int bucket; + int *hist; + + if (params->output_divisor) + duration = duration / params->output_divisor; + + if (data->bucket_size) + bucket = duration / data->bucket_size; + + total_duration = duration * count; + + hist = data->hist[cpu].samples; + data->hist[cpu].count += count; + update_min(&data->hist[cpu].min_sample, &duration); + update_sum(&data->hist[cpu].sum_sample, &total_duration); + update_max(&data->hist[cpu].max_sample, &duration); + + if (bucket < entries) + hist[bucket] += count; + else + hist[entries] += count; +} + +/* + * osnoise_destroy_trace_hist - disable events used to collect histogram + */ +static void osnoise_destroy_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + tracefs_hist_destroy(tool->trace.inst, data->trace_hist); +} + +/* + * osnoise_init_trace_hist - enable events used to collect histogram + */ +static int osnoise_init_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + int bucket_size; + char buff[128]; + int retval = 0; + + /* + * Set the size of the bucket. + */ + bucket_size = params->output_divisor * params->bucket_size; + snprintf(buff, sizeof(buff), "duration.buckets=%d", bucket_size); + + data->trace_hist = tracefs_hist_alloc(tool->trace.tep, "osnoise", "sample_threshold", + buff, TRACEFS_HIST_KEY_NORMAL); + if (!data->trace_hist) + return 1; + + retval = tracefs_hist_add_key(data->trace_hist, "cpu", 0); + if (retval) + goto out_err; + + retval = tracefs_hist_start(tool->trace.inst, data->trace_hist); + if (retval) + goto out_err; + + return 0; + +out_err: + osnoise_destroy_trace_hist(tool); + return 1; +} + +/* + * osnoise_read_trace_hist - parse histogram file and file osnoise histogram + */ +static void osnoise_read_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + long long cpu, counter, duration; + char *content, *position; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + + content = tracefs_event_file_read(tool->trace.inst, "osnoise", + "sample_threshold", + "hist", NULL); + if (!content) + return; + + position = content; + while (true) { + position = strstr(position, "duration: ~"); + if (!position) + break; + position += strlen("duration: ~"); + duration = get_llong_from_str(position); + if (duration == -1) + err_msg("error reading duration from histogram\n"); + + position = strstr(position, "cpu:"); + if (!position) + break; + position += strlen("cpu: "); + cpu = get_llong_from_str(position); + if (cpu == -1) + err_msg("error reading cpu from histogram\n"); + + position = strstr(position, "hitcount:"); + if (!position) + break; + position += strlen("hitcount: "); + counter = get_llong_from_str(position); + if (counter == -1) + err_msg("error reading counter from histogram\n"); + + osnoise_hist_update_multiple(tool, cpu, duration, counter); + } + free(content); +} + +/* + * osnoise_hist_header - print the header of the tracer to the output + */ +static void osnoise_hist_header(struct osnoise_tool *tool) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA osnoise histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->output_divisor == 1 ? "nanoseconds" : "microseconds", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->no_index) + trace_seq_printf(s, "Index"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(s, " CPU-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * osnoise_print_summary - print the summary of the hist data to the output + */ +static void +osnoise_print_summary(struct osnoise_hist_params *params, + struct trace_instance *trace, + struct osnoise_hist_data *data) +{ + int cpu; + + if (params->no_summary) + return; + + if (!params->no_index) + trace_seq_printf(trace->seq, "count:"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "min: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].min_sample); + + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "avg: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].count) + continue; + + if (data->hist[cpu].count) + trace_seq_printf(trace->seq, "%9.2f ", + ((double) data->hist[cpu].sum_sample) / data->hist[cpu].count); + else + trace_seq_printf(trace->seq, " - "); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "max: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].max_sample); + + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * osnoise_print_stats - print data for all CPUs + */ +static void +osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int bucket, cpu; + int total; + + osnoise_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].count) + continue; + + total += data->hist[cpu].samples[bucket]; + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].samples[bucket]); + } + + if (total == 0 && !params->with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + if (!params->no_index) + trace_seq_printf(trace->seq, "over: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].samples[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + osnoise_print_summary(params, trace, data); +} + +/* + * osnoise_hist_usage - prints osnoise hist usage message + */ +static void osnoise_hist_usage(char *usage) +{ + int i; + + static const char * const msg[] = { + "", + " usage: rtla osnoise hist [-h] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\", + " [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", + " [-c cpu-list] [-H cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros] [-C[=cgroup_name]]", + "", + " -h/--help: print this menu", + " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -T/--threshold us: the minimum delta to be considered a noise", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", + " --filter <filter>: enable a trace event filter to the previous -e event", + " --trigger <trigger>: enable a trace event trigger to the previous -e event", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -E/--entries N: set the number of entries of the histogram (default 256)", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period: set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla osnoise hist: a per-cpu histogram of the OS noise (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * osnoise_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct osnoise_hist_params +*osnoise_hist_parse_args(int argc, char *argv[]) +{ + struct osnoise_hist_params *params; + struct trace_events *tevent; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + params->bucket_size = 1; + params->entries = 256; + + while (1) { + static struct option long_options[] = { + {"auto", required_argument, 0, 'a'}, + {"bucket-size", required_argument, 0, 'b'}, + {"entries", required_argument, 0, 'E'}, + {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"house-keeping", required_argument, 0, 'H'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"trace", optional_argument, 0, 't'}, + {"event", required_argument, 0, 'e'}, + {"threshold", required_argument, 0, 'T'}, + {"no-header", no_argument, 0, '0'}, + {"no-summary", no_argument, 0, '1'}, + {"no-index", no_argument, 0, '2'}, + {"with-zeros", no_argument, 0, '3'}, + {"trigger", required_argument, 0, '4'}, + {"filter", required_argument, 0, '5'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:p:P:r:s:S:t::T:01234:5:", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'a': + /* set sample stop to auto_thresh */ + params->stop_us = get_llong_from_str(optarg); + + /* set sample threshold to 1 */ + params->threshold = 1; + + /* set trace */ + params->trace_output = "osnoise_trace.txt"; + + break; + case 'b': + params->bucket_size = get_llong_from_str(optarg); + if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) + osnoise_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + break; + case 'c': + retval = parse_cpu_set(optarg, ¶ms->monitored_cpus); + if (retval) + osnoise_hist_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'C': + params->cgroup = 1; + if (!optarg) { + /* will inherit this cgroup */ + params->cgroup_name = NULL; + } else if (*optarg == '=') { + /* skip the = */ + params->cgroup_name = ++optarg; + } + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + osnoise_hist_usage("Invalid -D duration\n"); + break; + case 'e': + tevent = trace_event_alloc(optarg); + if (!tevent) { + err_msg("Error alloc trace event"); + exit(EXIT_FAILURE); + } + + if (params->events) + tevent->next = params->events; + + params->events = tevent; + break; + case 'E': + params->entries = get_llong_from_str(optarg); + if ((params->entries < 10) || (params->entries > 9999999)) + osnoise_hist_usage("Entries must be > 10 and < 9999999\n"); + break; + case 'h': + case '?': + osnoise_hist_usage(NULL); + break; + case 'H': + params->hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->hk_cpu_set); + if (retval) { + err_msg("Error parsing house keeping CPUs\n"); + exit(EXIT_FAILURE); + } + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + osnoise_hist_usage("Period longer than 10 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + osnoise_hist_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + osnoise_hist_usage("Runtime shorter than 100 us\n"); + break; + case 's': + params->stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 'T': + params->threshold = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "osnoise_trace.txt"; + break; + case '0': /* no header */ + params->no_header = 1; + break; + case '1': /* no summary */ + params->no_summary = 1; + break; + case '2': /* no index */ + params->no_index = 1; + break; + case '3': /* with zeros */ + params->with_zeros = 1; + break; + case '4': /* trigger */ + if (params->events) { + retval = trace_event_add_trigger(params->events, optarg); + if (retval) { + err_msg("Error adding trigger %s\n", optarg); + exit(EXIT_FAILURE); + } + } else { + osnoise_hist_usage("--trigger requires a previous -e\n"); + } + break; + case '5': /* filter */ + if (params->events) { + retval = trace_event_add_filter(params->events, optarg); + if (retval) { + err_msg("Error adding filter %s\n", optarg); + exit(EXIT_FAILURE); + } + } else { + osnoise_hist_usage("--filter requires a previous -e\n"); + } + break; + default: + osnoise_hist_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + if (params->no_index && !params->with_zeros) + osnoise_hist_usage("no-index set and with-zeros not set - it does not make sense"); + + return params; +} + +/* + * osnoise_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + if (params->threshold) { + retval = osnoise_set_tracing_thresh(tool->context, params->threshold); + if (retval) { + err_msg("Failed to set tracing_thresh\n"); + goto out_err; + } + } + + if (params->hk_cpus) { + retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), + ¶ms->hk_cpu_set); + if (retval == -1) { + err_msg("Failed to set rtla to the house keeping CPUs\n"); + goto out_err; + } + } else if (params->cpus) { + /* + * Even if the user do not set a house-keeping CPU, try to + * move rtla to a CPU set different to the one where the user + * set the workload to run. + * + * No need to check results as this is an automatic attempt. + */ + auto_house_keeping(¶ms->monitored_cpus); + } + + return 0; + +out_err: + return -1; +} + +/* + * osnoise_init_hist - initialize a osnoise hist tool with parameters + */ +static struct osnoise_tool +*osnoise_init_hist(struct osnoise_hist_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_hist"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + if (!tool->data) + goto out_err; + + tool->params = params; + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_hist(int sig) +{ + stop_tracing = 1; +} + +/* + * osnoise_hist_set_signals - handles the signal to stop the tool + */ +static void +osnoise_hist_set_signals(struct osnoise_hist_params *params) +{ + signal(SIGINT, stop_hist); + if (params->duration) { + signal(SIGALRM, stop_hist); + alarm(params->duration); + } +} + +int osnoise_hist_main(int argc, char *argv[]) +{ + struct osnoise_hist_params *params; + struct osnoise_tool *record = NULL; + struct osnoise_tool *tool = NULL; + struct trace_instance *trace; + int return_value = 1; + int retval; + + params = osnoise_hist_parse_args(argc, argv); + if (!params) + exit(1); + + tool = osnoise_init_hist(params); + if (!tool) { + err_msg("Could not init osnoise hist\n"); + goto out_exit; + } + + retval = osnoise_hist_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_destroy; + } + + trace = &tool->trace; + + retval = enable_osnoise(trace); + if (retval) { + err_msg("Failed to enable osnoise tracer\n"); + goto out_destroy; + } + + retval = osnoise_init_trace_hist(tool); + if (retval) + goto out_destroy; + + if (params->set_sched) { + retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_free; + } + } + + if (params->cgroup) { + retval = set_comm_cgroup("timerlat/", params->cgroup_name); + if (!retval) { + err_msg("Failed to move threads to cgroup\n"); + goto out_free; + } + } + + if (params->trace_output) { + record = osnoise_init_trace_tool("osnoise"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_free; + } + + if (params->events) { + retval = trace_events_enable(&record->trace, params->events); + if (retval) + goto out_hist; + } + + } + + /* + * Start the tracer here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (params->trace_output) + trace_instance_start(&record->trace); + trace_instance_start(trace); + + tool->start_time = time(NULL); + osnoise_hist_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_hist; + } + + if (trace_is_off(&tool->trace, &record->trace)) + break; + } + + osnoise_read_trace_hist(tool); + + osnoise_print_stats(params, tool); + + return_value = 0; + + if (trace_is_off(&tool->trace, &record->trace)) { + printf("rtla osnoise hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_hist: + trace_events_destroy(&record->trace, params->events); + params->events = NULL; +out_free: + osnoise_free_histogram(tool->data); +out_destroy: + osnoise_destroy_tool(record); + osnoise_destroy_tool(tool); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c new file mode 100644 index 0000000000..f7c959be86 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -0,0 +1,778 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> +#include <sched.h> + +#include "osnoise.h" +#include "utils.h" + +enum osnoise_mode { + MODE_OSNOISE = 0, + MODE_HWNOISE +}; + +/* + * osnoise top parameters + */ +struct osnoise_top_params { + char *cpus; + cpu_set_t monitored_cpus; + char *trace_output; + char *cgroup_name; + unsigned long long runtime; + unsigned long long period; + long long threshold; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + int quiet; + int set_sched; + int cgroup; + int hk_cpus; + cpu_set_t hk_cpu_set; + struct sched_attr sched_param; + struct trace_events *events; + enum osnoise_mode mode; +}; + +struct osnoise_top_cpu { + unsigned long long sum_runtime; + unsigned long long sum_noise; + unsigned long long max_noise; + unsigned long long max_sample; + + unsigned long long hw_count; + unsigned long long nmi_count; + unsigned long long irq_count; + unsigned long long softirq_count; + unsigned long long thread_count; + + int sum_cycles; +}; + +struct osnoise_top_data { + struct osnoise_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * osnoise_free_top - free runtime data + */ +static void +osnoise_free_top(struct osnoise_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_top_data *osnoise_alloc_top(int nr_cpus) +{ + struct osnoise_top_data *data; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + return data; + +cleanup: + osnoise_free_top(data); + return NULL; +} + +/* + * osnoise_top_handler - this is the handler for osnoise tracer events + */ +static int +osnoise_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + struct osnoise_tool *tool; + unsigned long long val; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + cpu_data->sum_cycles++; + + tep_get_field_val(s, event, "runtime", record, &val, 1); + update_sum(&cpu_data->sum_runtime, &val); + + tep_get_field_val(s, event, "noise", record, &val, 1); + update_max(&cpu_data->max_noise, &val); + update_sum(&cpu_data->sum_noise, &val); + + tep_get_field_val(s, event, "max_sample", record, &val, 1); + update_max(&cpu_data->max_sample, &val); + + tep_get_field_val(s, event, "hw_count", record, &val, 1); + update_sum(&cpu_data->hw_count, &val); + + tep_get_field_val(s, event, "nmi_count", record, &val, 1); + update_sum(&cpu_data->nmi_count, &val); + + tep_get_field_val(s, event, "irq_count", record, &val, 1); + update_sum(&cpu_data->irq_count, &val); + + tep_get_field_val(s, event, "softirq_count", record, &val, 1); + update_sum(&cpu_data->softirq_count, &val); + + tep_get_field_val(s, event, "thread_count", record, &val, 1); + update_sum(&cpu_data->thread_count, &val); + + return 0; +} + +/* + * osnoise_top_header - print the header of the tool output + */ +static void osnoise_top_header(struct osnoise_tool *top) +{ + struct osnoise_top_params *params = top->params; + struct trace_seq *s = top->trace.seq; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " "); + + if (params->mode == MODE_OSNOISE) { + trace_seq_printf(s, "Operating System Noise"); + trace_seq_printf(s, " "); + } else if (params->mode == MODE_HWNOISE) { + trace_seq_printf(s, "Hardware-related Noise"); + } + + trace_seq_printf(s, " "); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "duration: %9s | time is in us\n", duration); + + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU Period Runtime "); + trace_seq_printf(s, " Noise "); + trace_seq_printf(s, " %% CPU Aval "); + trace_seq_printf(s, " Max Noise Max Single "); + trace_seq_printf(s, " HW NMI"); + + if (params->mode == MODE_HWNOISE) + goto eol; + + trace_seq_printf(s, " IRQ Softirq Thread"); + +eol: + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * osnoise_top_print - prints the output of a given CPU + */ +static void osnoise_top_print(struct osnoise_tool *tool, int cpu) +{ + struct osnoise_top_params *params = tool->params; + struct trace_seq *s = tool->trace.seq; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int percentage; + int decimal; + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + if (!cpu_data->sum_runtime) + return; + + percentage = ((cpu_data->sum_runtime - cpu_data->sum_noise) * 10000000) + / cpu_data->sum_runtime; + decimal = percentage % 100000; + percentage = percentage / 100000; + + trace_seq_printf(s, "%3d #%-6d %12llu ", cpu, cpu_data->sum_cycles, cpu_data->sum_runtime); + trace_seq_printf(s, "%12llu ", cpu_data->sum_noise); + trace_seq_printf(s, " %3d.%05d", percentage, decimal); + trace_seq_printf(s, "%12llu %12llu", cpu_data->max_noise, cpu_data->max_sample); + + trace_seq_printf(s, "%12llu ", cpu_data->hw_count); + trace_seq_printf(s, "%12llu ", cpu_data->nmi_count); + + if (params->mode == MODE_HWNOISE) { + trace_seq_printf(s, "\n"); + return; + } + + trace_seq_printf(s, "%12llu ", cpu_data->irq_count); + trace_seq_printf(s, "%12llu ", cpu_data->softirq_count); + trace_seq_printf(s, "%12llu\n", cpu_data->thread_count); +} + +/* + * osnoise_print_stats - print data for all cpus + */ +static void +osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top) +{ + struct trace_instance *trace = &top->trace; + static int nr_cpus = -1; + int i; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->quiet) + clear_terminal(trace->seq); + + osnoise_top_header(top); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + osnoise_top_print(top, i); + } + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * osnoise_top_usage - prints osnoise top usage message + */ +static void osnoise_top_usage(struct osnoise_top_params *params, char *usage) +{ + int i; + + static const char * const msg[] = { + " [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\", + " [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", + " [-c cpu-list] [-H cpu-list] [-P priority] [-C[=cgroup_name]]", + "", + " -h/--help: print this menu", + " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -T/--threshold us: the minimum delta to be considered a noise", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", + " --filter <filter>: enable a trace event filter to the previous -e event", + " --trigger <trigger>: enable a trace event trigger to the previous -e event", + " -q/--quiet print only a summary at the end", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + if (params->mode == MODE_OSNOISE) { + fprintf(stderr, + "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n", + VERSION); + + fprintf(stderr, " usage: rtla osnoise [top]"); + } + + if (params->mode == MODE_HWNOISE) { + fprintf(stderr, + "rtla hwnoise: a summary of hardware-related noise (version %s)\n", + VERSION); + + fprintf(stderr, " usage: rtla hwnoise"); + } + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * osnoise_top_parse_args - allocs, parse and fill the cmd line parameters + */ +struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) +{ + struct osnoise_top_params *params; + struct trace_events *tevent; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + if (strcmp(argv[0], "hwnoise") == 0) { + params->mode = MODE_HWNOISE; + /* + * Reduce CPU usage for 75% to avoid killing the system. + */ + params->runtime = 750000; + params->period = 1000000; + } + + while (1) { + static struct option long_options[] = { + {"auto", required_argument, 0, 'a'}, + {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"event", required_argument, 0, 'e'}, + {"house-keeping", required_argument, 0, 'H'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"threshold", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {"trigger", required_argument, 0, '0'}, + {"filter", required_argument, 0, '1'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "a:c:C::d:De:hH:p:P:qr:s:S:t::T:0:1:", + long_options, &option_index); + + /* Detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'a': + /* set sample stop to auto_thresh */ + params->stop_us = get_llong_from_str(optarg); + + /* set sample threshold to 1 */ + params->threshold = 1; + + /* set trace */ + params->trace_output = "osnoise_trace.txt"; + + break; + case 'c': + retval = parse_cpu_set(optarg, ¶ms->monitored_cpus); + if (retval) + osnoise_top_usage(params, "\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'C': + params->cgroup = 1; + if (!optarg) { + /* will inherit this cgroup */ + params->cgroup_name = NULL; + } else if (*optarg == '=') { + /* skip the = */ + params->cgroup_name = ++optarg; + } + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + osnoise_top_usage(params, "Invalid -D duration\n"); + break; + case 'e': + tevent = trace_event_alloc(optarg); + if (!tevent) { + err_msg("Error alloc trace event"); + exit(EXIT_FAILURE); + } + + if (params->events) + tevent->next = params->events; + params->events = tevent; + + break; + case 'h': + case '?': + osnoise_top_usage(params, NULL); + break; + case 'H': + params->hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->hk_cpu_set); + if (retval) { + err_msg("Error parsing house keeping CPUs\n"); + exit(EXIT_FAILURE); + } + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + osnoise_top_usage(params, "Period longer than 10 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + osnoise_top_usage(params, "Invalid -P priority"); + params->set_sched = 1; + break; + case 'q': + params->quiet = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + osnoise_top_usage(params, "Runtime shorter than 100 us\n"); + break; + case 's': + params->stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "osnoise_trace.txt"; + break; + case 'T': + params->threshold = get_llong_from_str(optarg); + break; + case '0': /* trigger */ + if (params->events) { + retval = trace_event_add_trigger(params->events, optarg); + if (retval) { + err_msg("Error adding trigger %s\n", optarg); + exit(EXIT_FAILURE); + } + } else { + osnoise_top_usage(params, "--trigger requires a previous -e\n"); + } + break; + case '1': /* filter */ + if (params->events) { + retval = trace_event_add_filter(params->events, optarg); + if (retval) { + err_msg("Error adding filter %s\n", optarg); + exit(EXIT_FAILURE); + } + } else { + osnoise_top_usage(params, "--filter requires a previous -e\n"); + } + break; + default: + osnoise_top_usage(params, "Invalid option"); + } + } + + if (geteuid()) { + err_msg("osnoise needs root permission\n"); + exit(EXIT_FAILURE); + } + + return params; +} + +/* + * osnoise_top_apply_config - apply the top configs to the initialized tool + */ +static int +osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + if (params->threshold) { + retval = osnoise_set_tracing_thresh(tool->context, params->threshold); + if (retval) { + err_msg("Failed to set tracing_thresh\n"); + goto out_err; + } + } + + if (params->mode == MODE_HWNOISE) { + retval = osnoise_set_irq_disable(tool->context, 1); + if (retval) { + err_msg("Failed to set OSNOISE_IRQ_DISABLE option\n"); + goto out_err; + } + } + + if (params->hk_cpus) { + retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), + ¶ms->hk_cpu_set); + if (retval == -1) { + err_msg("Failed to set rtla to the house keeping CPUs\n"); + goto out_err; + } + } else if (params->cpus) { + /* + * Even if the user do not set a house-keeping CPU, try to + * move rtla to a CPU set different to the one where the user + * set the workload to run. + * + * No need to check results as this is an automatic attempt. + */ + auto_house_keeping(¶ms->monitored_cpus); + } + + return 0; + +out_err: + return -1; +} + +/* + * osnoise_init_top - initialize a osnoise top tool with parameters + */ +struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_top"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_top(nr_cpus); + if (!tool->data) + goto out_err; + + tool->params = params; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "osnoise", + osnoise_top_handler, NULL); + + return tool; + +out_err: + osnoise_free_top(tool->data); + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_top(int sig) +{ + stop_tracing = 1; +} + +/* + * osnoise_top_set_signals - handles the signal to stop the tool + */ +static void osnoise_top_set_signals(struct osnoise_top_params *params) +{ + signal(SIGINT, stop_top); + if (params->duration) { + signal(SIGALRM, stop_top); + alarm(params->duration); + } +} + +int osnoise_top_main(int argc, char **argv) +{ + struct osnoise_top_params *params; + struct osnoise_tool *record = NULL; + struct osnoise_tool *tool = NULL; + struct trace_instance *trace; + int return_value = 1; + int retval; + + params = osnoise_top_parse_args(argc, argv); + if (!params) + exit(1); + + tool = osnoise_init_top(params); + if (!tool) { + err_msg("Could not init osnoise top\n"); + goto out_exit; + } + + retval = osnoise_top_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_free; + } + + trace = &tool->trace; + + retval = enable_osnoise(trace); + if (retval) { + err_msg("Failed to enable osnoise tracer\n"); + goto out_free; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_free; + } + } + + if (params->cgroup) { + retval = set_comm_cgroup("osnoise/", params->cgroup_name); + if (!retval) { + err_msg("Failed to move threads to cgroup\n"); + goto out_free; + } + } + + if (params->trace_output) { + record = osnoise_init_trace_tool("osnoise"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_free; + } + + if (params->events) { + retval = trace_events_enable(&record->trace, params->events); + if (retval) + goto out_top; + } + } + + /* + * Start the tracer here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (params->trace_output) + trace_instance_start(&record->trace); + trace_instance_start(trace); + + tool->start_time = time(NULL); + osnoise_top_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_top; + } + + if (!params->quiet) + osnoise_print_stats(params, tool); + + if (trace_is_off(&tool->trace, &record->trace)) + break; + + } + + osnoise_print_stats(params, tool); + + return_value = 0; + + if (trace_is_off(&tool->trace, &record->trace)) { + printf("osnoise hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_top: + trace_events_destroy(&record->trace, params->events); + params->events = NULL; +out_free: + osnoise_free_top(tool->data); + osnoise_destroy_tool(record); + osnoise_destroy_tool(tool); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/rtla.c b/tools/tracing/rtla/src/rtla.c new file mode 100644 index 0000000000..7635c70123 --- /dev/null +++ b/tools/tracing/rtla/src/rtla.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "osnoise.h" +#include "timerlat.h" + +/* + * rtla_usage - print rtla usage + */ +static void rtla_usage(int err) +{ + int i; + + static const char *msg[] = { + "", + "rtla version " VERSION, + "", + " usage: rtla COMMAND ...", + "", + " commands:", + " osnoise - gives information about the operating system noise (osnoise)", + " hwnoise - gives information about hardware-related noise", + " timerlat - measures the timer irq and thread latency", + "", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(err); +} + +/* + * run_command - try to run a rtla tool command + * + * It returns 0 if it fails. The tool's main will generally not + * return as they should call exit(). + */ +int run_command(int argc, char **argv, int start_position) +{ + if (strcmp(argv[start_position], "osnoise") == 0) { + osnoise_main(argc-start_position, &argv[start_position]); + goto ran; + } else if (strcmp(argv[start_position], "hwnoise") == 0) { + hwnoise_main(argc-start_position, &argv[start_position]); + goto ran; + } else if (strcmp(argv[start_position], "timerlat") == 0) { + timerlat_main(argc-start_position, &argv[start_position]); + goto ran; + } + + return 0; +ran: + return 1; +} + +int main(int argc, char *argv[]) +{ + int retval; + + /* is it an alias? */ + retval = run_command(argc, argv, 0); + if (retval) + exit(0); + + if (argc < 2) + goto usage; + + if (strcmp(argv[1], "-h") == 0) { + rtla_usage(0); + } else if (strcmp(argv[1], "--help") == 0) { + rtla_usage(0); + } + + retval = run_command(argc, argv, 1); + if (retval) + exit(0); + +usage: + rtla_usage(1); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c new file mode 100644 index 0000000000..21cdcc5c4a --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> + +#include "timerlat.h" + +static void timerlat_usage(int err) +{ + int i; + + static const char * const msg[] = { + "", + "timerlat version " VERSION, + "", + " usage: [rtla] timerlat [MODE] ...", + "", + " modes:", + " top - prints the summary from timerlat tracer", + " hist - prints a histogram of timer latencies", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(err); +} + +int timerlat_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if timerlat was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + timerlat_top_main(argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + timerlat_usage(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + timerlat_top_main(argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + timerlat_top_main(argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + timerlat_hist_main(argc-1, &argv[1]); + exit(0); + } + +usage: + timerlat_usage(1); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h new file mode 100644 index 0000000000..88561bfd14 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.h @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +int timerlat_hist_main(int argc, char *argv[]); +int timerlat_top_main(int argc, char *argv[]); +int timerlat_main(int argc, char *argv[]); diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c new file mode 100644 index 0000000000..7093fd5333 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_aa.c @@ -0,0 +1,1041 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <stdlib.h> +#include <errno.h> +#include "utils.h" +#include "osnoise.h" +#include "timerlat.h" +#include <unistd.h> + +enum timelat_state { + TIMERLAT_INIT = 0, + TIMERLAT_WAITING_IRQ, + TIMERLAT_WAITING_THREAD, +}; + +#define MAX_COMM 24 + +/* + * Per-cpu data statistics and data. + */ +struct timerlat_aa_data { + /* Current CPU state */ + int curr_state; + + /* timerlat IRQ latency */ + unsigned long long tlat_irq_seqnum; + unsigned long long tlat_irq_latency; + unsigned long long tlat_irq_timstamp; + + /* timerlat Thread latency */ + unsigned long long tlat_thread_seqnum; + unsigned long long tlat_thread_latency; + unsigned long long tlat_thread_timstamp; + + /* + * Information about the thread running when the IRQ + * arrived. + * + * This can be blocking or interference, depending on the + * priority of the thread. Assuming timerlat is the highest + * prio, it is blocking. If timerlat has a lower prio, it is + * interference. + * note: "unsigned long long" because they are fetch using tep_get_field_val(); + */ + unsigned long long run_thread_pid; + char run_thread_comm[MAX_COMM]; + unsigned long long thread_blocking_duration; + unsigned long long max_exit_idle_latency; + + /* Information about the timerlat timer irq */ + unsigned long long timer_irq_start_time; + unsigned long long timer_irq_start_delay; + unsigned long long timer_irq_duration; + unsigned long long timer_exit_from_idle; + + /* + * Information about the last IRQ before the timerlat irq + * arrived. + * + * If now - timestamp is <= latency, it might have influenced + * in the timerlat irq latency. Otherwise, ignore it. + */ + unsigned long long prev_irq_duration; + unsigned long long prev_irq_timstamp; + + /* + * Interference sum. + */ + unsigned long long thread_nmi_sum; + unsigned long long thread_irq_sum; + unsigned long long thread_softirq_sum; + unsigned long long thread_thread_sum; + + /* + * Interference task information. + */ + struct trace_seq *prev_irqs_seq; + struct trace_seq *nmi_seq; + struct trace_seq *irqs_seq; + struct trace_seq *softirqs_seq; + struct trace_seq *threads_seq; + struct trace_seq *stack_seq; + + /* + * Current thread. + */ + char current_comm[MAX_COMM]; + unsigned long long current_pid; + + /* + * Is the system running a kworker? + */ + unsigned long long kworker; + unsigned long long kworker_func; +}; + +/* + * The analysis context and system wide view + */ +struct timerlat_aa_context { + int nr_cpus; + int dump_tasks; + + /* per CPU data */ + struct timerlat_aa_data *taa_data; + + /* + * required to translate function names and register + * events. + */ + struct osnoise_tool *tool; +}; + +/* + * The data is stored as a local variable, but accessed via a helper function. + * + * It could be stored inside the trace context. But every access would + * require container_of() + a series of pointers. Do we need it? Not sure. + * + * For now keep it simple. If needed, store it in the tool, add the *context + * as a parameter in timerlat_aa_get_ctx() and do the magic there. + */ +static struct timerlat_aa_context *__timerlat_aa_ctx; + +static struct timerlat_aa_context *timerlat_aa_get_ctx(void) +{ + return __timerlat_aa_ctx; +} + +/* + * timerlat_aa_get_data - Get the per-cpu data from the timerlat context + */ +static struct timerlat_aa_data +*timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu) +{ + return &taa_ctx->taa_data[cpu]; +} + +/* + * timerlat_aa_irq_latency - Handles timerlat IRQ event + */ +static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data, + struct trace_seq *s, struct tep_record *record, + struct tep_event *event) +{ + /* + * For interference, we start now looking for things that can delay + * the thread. + */ + taa_data->curr_state = TIMERLAT_WAITING_THREAD; + taa_data->tlat_irq_timstamp = record->ts; + + /* + * Zero values. + */ + taa_data->thread_nmi_sum = 0; + taa_data->thread_irq_sum = 0; + taa_data->thread_softirq_sum = 0; + taa_data->thread_thread_sum = 0; + taa_data->thread_blocking_duration = 0; + taa_data->timer_irq_start_time = 0; + taa_data->timer_irq_duration = 0; + taa_data->timer_exit_from_idle = 0; + + /* + * Zero interference tasks. + */ + trace_seq_reset(taa_data->nmi_seq); + trace_seq_reset(taa_data->irqs_seq); + trace_seq_reset(taa_data->softirqs_seq); + trace_seq_reset(taa_data->threads_seq); + + /* IRQ latency values */ + tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1); + tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1); + + /* The thread that can cause blocking */ + tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1); + + /* + * Get exit from idle case. + * + * If it is not idle thread: + */ + if (taa_data->run_thread_pid) + return 0; + + /* + * if the latency is shorter than the known exit from idle: + */ + if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency) + return 0; + + /* + * To be safe, ignore the cases in which an IRQ/NMI could have + * interfered with the timerlat IRQ. + */ + if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency + < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) + return 0; + + taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency; + + return 0; +} + +/* + * timerlat_aa_thread_latency - Handles timerlat thread event + */ +static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data, + struct trace_seq *s, struct tep_record *record, + struct tep_event *event) +{ + /* + * For interference, we start now looking for things that can delay + * the IRQ of the next cycle. + */ + taa_data->curr_state = TIMERLAT_WAITING_IRQ; + taa_data->tlat_thread_timstamp = record->ts; + + /* Thread latency values */ + tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1); + tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1); + + return 0; +} + +/* + * timerlat_aa_handler - Handle timerlat events + * + * This function is called to handle timerlat events recording statistics. + * + * Returns 0 on success, -1 otherwise. + */ +static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long thread; + + if (!taa_data) + return -1; + + tep_get_field_val(s, event, "context", record, &thread, 1); + if (!thread) + return timerlat_aa_irq_latency(taa_data, s, record, event); + else + return timerlat_aa_thread_latency(taa_data, s, record, event); +} + +/* + * timerlat_aa_nmi_handler - Handles NMI noise + * + * It is used to collect information about interferences from NMI. It is + * hooked to the osnoise:nmi_noise event. + */ +static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long duration; + unsigned long long start; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { + taa_data->prev_irq_duration = duration; + taa_data->prev_irq_timstamp = start; + + trace_seq_reset(taa_data->prev_irqs_seq); + trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s \t\t\t%9.2f us\n", + "nmi", ns_to_usf(duration)); + return 0; + } + + taa_data->thread_nmi_sum += duration; + trace_seq_printf(taa_data->nmi_seq, " %24s \t\t\t%9.2f us\n", + "nmi", ns_to_usf(duration)); + + return 0; +} + +/* + * timerlat_aa_irq_handler - Handles IRQ noise + * + * It is used to collect information about interferences from IRQ. It is + * hooked to the osnoise:irq_noise event. + * + * It is a little bit more complex than the other because it measures: + * - The IRQs that can delay the timer IRQ before it happened. + * - The Timerlat IRQ handler + * - The IRQs that happened between the timerlat IRQ and the timerlat thread + * (IRQ interference). + */ +static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long expected_start; + unsigned long long duration; + unsigned long long vector; + unsigned long long start; + char *desc; + int val; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + tep_get_field_val(s, event, "vector", record, &vector, 1); + desc = tep_get_field_raw(s, event, "desc", record, &val, 1); + + /* + * Before the timerlat IRQ. + */ + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { + taa_data->prev_irq_duration = duration; + taa_data->prev_irq_timstamp = start; + + trace_seq_reset(taa_data->prev_irqs_seq); + trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu \t\t%9.2f us\n", + desc, vector, ns_to_usf(duration)); + return 0; + } + + /* + * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at + * the timerlat irq handler. + */ + if (!taa_data->timer_irq_start_time) { + expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency; + + taa_data->timer_irq_start_time = start; + taa_data->timer_irq_duration = duration; + + /* + * We are dealing with two different clock sources: the + * external clock source that timerlat uses as a reference + * and the clock used by the tracer. There are also two + * moments: the time reading the clock and the timer in + * which the event is placed in the buffer (the trace + * event timestamp). If the processor is slow or there + * is some hardware noise, the difference between the + * timestamp and the external clock read can be longer + * than the IRQ handler delay, resulting in a negative + * time. If so, set IRQ start delay as 0. In the end, + * it is less relevant than the noise. + */ + if (expected_start < taa_data->timer_irq_start_time) + taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; + else + taa_data->timer_irq_start_delay = 0; + + /* + * not exit from idle. + */ + if (taa_data->run_thread_pid) + return 0; + + if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) + taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay; + + return 0; + } + + /* + * IRQ interference. + */ + taa_data->thread_irq_sum += duration; + trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu \t %9.2f us\n", + desc, vector, ns_to_usf(duration)); + + return 0; +} + +static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", + "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" }; + + +/* + * timerlat_aa_softirq_handler - Handles Softirq noise + * + * It is used to collect information about interferences from Softirq. It is + * hooked to the osnoise:softirq_noise event. + * + * It is only printed in the non-rt kernel, as softirqs become thread on RT. + */ +static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long duration; + unsigned long long vector; + unsigned long long start; + + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) + return 0; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + tep_get_field_val(s, event, "vector", record, &vector, 1); + + taa_data->thread_softirq_sum += duration; + + trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t %9.2f us\n", + softirq_name[vector], vector, ns_to_usf(duration)); + return 0; +} + +/* + * timerlat_aa_softirq_handler - Handles thread noise + * + * It is used to collect information about interferences from threads. It is + * hooked to the osnoise:thread_noise event. + * + * Note: if you see thread noise, your timerlat thread was not the highest prio one. + */ +static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long duration; + unsigned long long start; + unsigned long long pid; + const char *comm; + int val; + + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) + return 0; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + + tep_get_common_field_val(s, event, "common_pid", record, &pid, 1); + comm = tep_get_field_raw(s, event, "comm", record, &val, 1); + + if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) { + taa_data->thread_blocking_duration = duration; + + if (comm) + strncpy(taa_data->run_thread_comm, comm, MAX_COMM); + else + sprintf(taa_data->run_thread_comm, "<...>"); + + } else { + taa_data->thread_thread_sum += duration; + + trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu \t\t%9.2f us\n", + comm, pid, ns_to_usf(duration)); + } + + return 0; +} + +/* + * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace + * + * Saves and parse the stack trace generated by the timerlat IRQ. + */ +static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long *caller; + const char *function; + int val, i; + + trace_seq_reset(taa_data->stack_seq); + + trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n"); + caller = tep_get_field_raw(s, event, "caller", record, &val, 1); + if (caller) { + for (i = 0; ; i++) { + function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]); + if (!function) + break; + trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function); + } + } + return 0; +} + +/* + * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU + * + * Handles the sched:sched_switch event to trace the current thread running on the + * CPU. It is used to display the threads running on the other CPUs when the trace + * stops. + */ +static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + const char *comm; + int val; + + tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1); + comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1); + + strncpy(taa_data->current_comm, comm, MAX_COMM); + + /* + * If this was a kworker, clean the last kworkers that ran. + */ + taa_data->kworker = 0; + taa_data->kworker_func = 0; + + return 0; +} + +/* + * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU + * + * Handles workqueue:workqueue_execute_start event, keeping track of + * the job that a kworker could be doing in the CPU. + * + * We already catch problems of hardware related latencies caused by work queues + * running driver code that causes hardware stall. For example, with DRM drivers. + */ +static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + + tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1); + tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1); + return 0; +} + +/* + * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing + * + * This is the core of the analysis. + */ +static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, + int irq_thresh, int thread_thresh) +{ + long long exp_irq_ts; + int total; + int irq; + + /* + * IRQ latency or Thread latency? + */ + if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) { + irq = 1; + total = taa_data->tlat_irq_latency; + } else { + irq = 0; + total = taa_data->tlat_thread_latency; + } + + /* + * Expected IRQ arrival time using the trace clock as the base. + * + * TODO: Add a list of previous IRQ, and then run the list backwards. + */ + exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; + if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { + if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) + printf(" Previous IRQ interference: \t\t up to %9.2f us\n", + ns_to_usf(taa_data->prev_irq_duration)); + } + + /* + * The delay that the IRQ suffered before starting. + */ + printf(" IRQ handler delay: %16s %9.2f us (%.2f %%)\n", + (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", + ns_to_usf(taa_data->timer_irq_start_delay), + ns_to_per(total, taa_data->timer_irq_start_delay)); + + /* + * Timerlat IRQ. + */ + printf(" IRQ latency: \t\t\t\t %9.2f us\n", + ns_to_usf(taa_data->tlat_irq_latency)); + + if (irq) { + /* + * If the trace stopped due to IRQ, the other events will not happen + * because... the trace stopped :-). + * + * That is all folks, the stack trace was printed before the stop, + * so it will be displayed, it is the key. + */ + printf(" Blocking thread:\n"); + printf(" %24s:%-9llu\n", + taa_data->run_thread_comm, taa_data->run_thread_pid); + } else { + /* + * The duration of the IRQ handler that handled the timerlat IRQ. + */ + printf(" Timerlat IRQ duration: \t\t %9.2f us (%.2f %%)\n", + ns_to_usf(taa_data->timer_irq_duration), + ns_to_per(total, taa_data->timer_irq_duration)); + + /* + * The amount of time that the current thread postponed the scheduler. + * + * Recalling that it is net from NMI/IRQ/Softirq interference, so there + * is no need to compute values here. + */ + printf(" Blocking thread: \t\t\t %9.2f us (%.2f %%)\n", + ns_to_usf(taa_data->thread_blocking_duration), + ns_to_per(total, taa_data->thread_blocking_duration)); + + printf(" %24s:%-9llu %9.2f us\n", + taa_data->run_thread_comm, taa_data->run_thread_pid, + ns_to_usf(taa_data->thread_blocking_duration)); + } + + /* + * Print the stack trace! + */ + trace_seq_do_printf(taa_data->stack_seq); + + /* + * NMIs can happen during the IRQ, so they are always possible. + */ + if (taa_data->thread_nmi_sum) + printf(" NMI interference \t\t\t %9.2f us (%.2f %%)\n", + ns_to_usf(taa_data->thread_nmi_sum), + ns_to_per(total, taa_data->thread_nmi_sum)); + + /* + * If it is an IRQ latency, the other factors can be skipped. + */ + if (irq) + goto print_total; + + /* + * Prints the interference caused by IRQs to the thread latency. + */ + if (taa_data->thread_irq_sum) { + printf(" IRQ interference \t\t\t %9.2f us (%.2f %%)\n", + ns_to_usf(taa_data->thread_irq_sum), + ns_to_per(total, taa_data->thread_irq_sum)); + + trace_seq_do_printf(taa_data->irqs_seq); + } + + /* + * Prints the interference caused by Softirqs to the thread latency. + */ + if (taa_data->thread_softirq_sum) { + printf(" Softirq interference \t\t\t %9.2f us (%.2f %%)\n", + ns_to_usf(taa_data->thread_softirq_sum), + ns_to_per(total, taa_data->thread_softirq_sum)); + + trace_seq_do_printf(taa_data->softirqs_seq); + } + + /* + * Prints the interference caused by other threads to the thread latency. + * + * If this happens, your timerlat is not the highest prio. OK, migration + * thread can happen. But otherwise, you are not measuring the "scheduling + * latency" only, and here is the difference from scheduling latency and + * timer handling latency. + */ + if (taa_data->thread_thread_sum) { + printf(" Thread interference \t\t\t %9.2f us (%.2f %%)\n", + ns_to_usf(taa_data->thread_thread_sum), + ns_to_per(total, taa_data->thread_thread_sum)); + + trace_seq_do_printf(taa_data->threads_seq); + } + + /* + * Done. + */ +print_total: + printf("------------------------------------------------------------------------\n"); + printf(" %s latency: \t\t\t %9.2f us (100%%)\n", irq ? "IRQ" : "Thread", + ns_to_usf(total)); +} + +static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) +{ + struct trace_instance *trace = &taa_ctx->tool->trace; + int retval; + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + return 0; + } + + return 1; +} + +/** + * timerlat_auto_analysis - Analyze the collected data + */ +void timerlat_auto_analysis(int irq_thresh, int thread_thresh) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + unsigned long long max_exit_from_idle = 0; + struct timerlat_aa_data *taa_data; + int max_exit_from_idle_cpu; + struct tep_handle *tep; + int cpu; + + timerlat_auto_analysis_collect_trace(taa_ctx); + + /* bring stop tracing to the ns scale */ + irq_thresh = irq_thresh * 1000; + thread_thresh = thread_thresh * 1000; + + for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { + taa_data = timerlat_aa_get_data(taa_ctx, cpu); + + if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) { + printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); + timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); + } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) { + printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); + timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); + } + + if (taa_data->max_exit_idle_latency > max_exit_from_idle) { + max_exit_from_idle = taa_data->max_exit_idle_latency; + max_exit_from_idle_cpu = cpu; + } + + } + + if (max_exit_from_idle) { + printf("\n"); + printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n", + ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu); + } + if (!taa_ctx->dump_tasks) + return; + + printf("\n"); + printf("Printing CPU tasks:\n"); + for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { + taa_data = timerlat_aa_get_data(taa_ctx, cpu); + tep = taa_ctx->tool->trace.tep; + + printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid); + + if (taa_data->kworker_func) + printf(" kworker:%s:%s", + tep_find_function(tep, taa_data->kworker) ? : "<...>", + tep_find_function(tep, taa_data->kworker_func)); + printf("\n"); + } + +} + +/* + * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data + */ +static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx) +{ + struct timerlat_aa_data *taa_data; + int i; + + if (!taa_ctx->taa_data) + return; + + for (i = 0; i < taa_ctx->nr_cpus; i++) { + taa_data = timerlat_aa_get_data(taa_ctx, i); + + if (taa_data->prev_irqs_seq) { + trace_seq_destroy(taa_data->prev_irqs_seq); + free(taa_data->prev_irqs_seq); + } + + if (taa_data->nmi_seq) { + trace_seq_destroy(taa_data->nmi_seq); + free(taa_data->nmi_seq); + } + + if (taa_data->irqs_seq) { + trace_seq_destroy(taa_data->irqs_seq); + free(taa_data->irqs_seq); + } + + if (taa_data->softirqs_seq) { + trace_seq_destroy(taa_data->softirqs_seq); + free(taa_data->softirqs_seq); + } + + if (taa_data->threads_seq) { + trace_seq_destroy(taa_data->threads_seq); + free(taa_data->threads_seq); + } + + if (taa_data->stack_seq) { + trace_seq_destroy(taa_data->stack_seq); + free(taa_data->stack_seq); + } + } +} + +/* + * timerlat_aa_init_seqs - Init seq files used to store parsed information + * + * Instead of keeping data structures to store raw data, use seq files to + * store parsed data. + * + * Allocates and initialize seq files. + * + * Returns 0 on success, -1 otherwise. + */ +static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx) +{ + struct timerlat_aa_data *taa_data; + int i; + + for (i = 0; i < taa_ctx->nr_cpus; i++) { + + taa_data = timerlat_aa_get_data(taa_ctx, i); + + taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq)); + if (!taa_data->prev_irqs_seq) + goto out_err; + + trace_seq_init(taa_data->prev_irqs_seq); + + taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq)); + if (!taa_data->nmi_seq) + goto out_err; + + trace_seq_init(taa_data->nmi_seq); + + taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq)); + if (!taa_data->irqs_seq) + goto out_err; + + trace_seq_init(taa_data->irqs_seq); + + taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq)); + if (!taa_data->softirqs_seq) + goto out_err; + + trace_seq_init(taa_data->softirqs_seq); + + taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq)); + if (!taa_data->threads_seq) + goto out_err; + + trace_seq_init(taa_data->threads_seq); + + taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq)); + if (!taa_data->stack_seq) + goto out_err; + + trace_seq_init(taa_data->stack_seq); + } + + return 0; + +out_err: + timerlat_aa_destroy_seqs(taa_ctx); + return -1; +} + +/* + * timerlat_aa_unregister_events - Unregister events used in the auto-analysis + */ +static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks) +{ + + tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_aa_handler, tool); + + tracefs_event_disable(tool->trace.inst, "osnoise", NULL); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", + timerlat_aa_nmi_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", + timerlat_aa_irq_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", + timerlat_aa_softirq_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", + timerlat_aa_thread_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", + timerlat_aa_stack_handler, tool); + if (!dump_tasks) + return; + + tracefs_event_disable(tool->trace.inst, "sched", "sched_switch"); + tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch", + timerlat_aa_sched_switch_handler, tool); + + tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start"); + tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", + timerlat_aa_kworker_start_handler, tool); +} + +/* + * timerlat_aa_register_events - Register events used in the auto-analysis + * + * Returns 0 on success, -1 otherwise. + */ +static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks) +{ + int retval; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_aa_handler, tool); + + + /* + * register auto-analysis handlers. + */ + retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL); + if (retval < 0 && !errno) { + err_msg("Could not find osnoise events\n"); + goto out_err; + } + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", + timerlat_aa_nmi_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", + timerlat_aa_irq_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", + timerlat_aa_softirq_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", + timerlat_aa_thread_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", + timerlat_aa_stack_handler, tool); + + if (!dump_tasks) + return 0; + + /* + * Dump task events. + */ + retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch"); + if (retval < 0 && !errno) { + err_msg("Could not find sched_switch\n"); + goto out_err; + } + + tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch", + timerlat_aa_sched_switch_handler, tool); + + retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start"); + if (retval < 0 && !errno) { + err_msg("Could not find workqueue_execute_start\n"); + goto out_err; + } + + tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", + timerlat_aa_kworker_start_handler, tool); + + return 0; + +out_err: + timerlat_aa_unregister_events(tool, dump_tasks); + return -1; +} + +/** + * timerlat_aa_destroy - Destroy timerlat auto-analysis + */ +void timerlat_aa_destroy(void) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + + if (!taa_ctx) + return; + + if (!taa_ctx->taa_data) + goto out_ctx; + + timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks); + timerlat_aa_destroy_seqs(taa_ctx); + free(taa_ctx->taa_data); +out_ctx: + free(taa_ctx); +} + +/** + * timerlat_aa_init - Initialize timerlat auto-analysis + * + * Returns 0 on success, -1 otherwise. + */ +int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks) +{ + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + struct timerlat_aa_context *taa_ctx; + int retval; + + taa_ctx = calloc(1, sizeof(*taa_ctx)); + if (!taa_ctx) + return -1; + + __timerlat_aa_ctx = taa_ctx; + + taa_ctx->nr_cpus = nr_cpus; + taa_ctx->tool = tool; + taa_ctx->dump_tasks = dump_tasks; + + taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data)); + if (!taa_ctx->taa_data) + goto out_err; + + retval = timerlat_aa_init_seqs(taa_ctx); + if (retval) + goto out_err; + + retval = timerlat_aa_register_events(tool, dump_tasks); + if (retval) + goto out_err; + + return 0; + +out_err: + timerlat_aa_destroy(); + return -1; +} diff --git a/tools/tracing/rtla/src/timerlat_aa.h b/tools/tracing/rtla/src/timerlat_aa.h new file mode 100644 index 0000000000..cea4bb1531 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_aa.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +int timerlat_aa_init(struct osnoise_tool *tool, int dump_task); +void timerlat_aa_destroy(void); + +void timerlat_auto_analysis(int irq_thresh, int thread_thresh); diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c new file mode 100644 index 0000000000..47d3d8b53c --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -0,0 +1,1128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> +#include <sched.h> +#include <pthread.h> + +#include "utils.h" +#include "osnoise.h" +#include "timerlat.h" +#include "timerlat_aa.h" +#include "timerlat_u.h" + +struct timerlat_hist_params { + char *cpus; + cpu_set_t monitored_cpus; + char *trace_output; + char *cgroup_name; + unsigned long long runtime; + long long stop_us; + long long stop_total_us; + long long timerlat_period_us; + long long print_stack; + int sleep_time; + int output_divisor; + int duration; + int set_sched; + int dma_latency; + int cgroup; + int hk_cpus; + int no_aa; + int dump_tasks; + int user_hist; + cpu_set_t hk_cpu_set; + struct sched_attr sched_param; + struct trace_events *events; + char no_irq; + char no_thread; + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +struct timerlat_hist_cpu { + int *irq; + int *thread; + int *user; + + int irq_count; + int thread_count; + int user_count; + + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; + + unsigned long long min_user; + unsigned long long sum_user; + unsigned long long max_user; +}; + +struct timerlat_hist_data { + struct timerlat_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * timerlat_free_histogram - free runtime data + */ +static void +timerlat_free_histogram(struct timerlat_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].irq) + free(data->hist[cpu].irq); + + if (data->hist[cpu].thread) + free(data->hist[cpu].thread); + + if (data->hist[cpu].user) + free(data->hist[cpu].user); + + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); + + free(data); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_hist_data +*timerlat_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct timerlat_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + /* one histogram for IRQ and one for thread, per cpu */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1)); + if (!data->hist[cpu].irq) + goto cleanup; + + data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1)); + if (!data->hist[cpu].thread) + goto cleanup; + + data->hist[cpu].user = calloc(1, sizeof(*data->hist->user) * (entries + 1)); + if (!data->hist[cpu].user) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].min_irq = ~0; + data->hist[cpu].min_thread = ~0; + data->hist[cpu].min_user = ~0; + } + + return data; + +cleanup: + timerlat_free_histogram(data); + return NULL; +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_hist_update(struct osnoise_tool *tool, int cpu, + unsigned long long context, + unsigned long long latency) +{ + struct timerlat_hist_params *params = tool->params; + struct timerlat_hist_data *data = tool->data; + int entries = data->entries; + int bucket; + int *hist; + + if (params->output_divisor) + latency = latency / params->output_divisor; + + if (data->bucket_size) + bucket = latency / data->bucket_size; + + if (!context) { + hist = data->hist[cpu].irq; + data->hist[cpu].irq_count++; + update_min(&data->hist[cpu].min_irq, &latency); + update_sum(&data->hist[cpu].sum_irq, &latency); + update_max(&data->hist[cpu].max_irq, &latency); + } else if (context == 1) { + hist = data->hist[cpu].thread; + data->hist[cpu].thread_count++; + update_min(&data->hist[cpu].min_thread, &latency); + update_sum(&data->hist[cpu].sum_thread, &latency); + update_max(&data->hist[cpu].max_thread, &latency); + } else { /* user */ + hist = data->hist[cpu].user; + data->hist[cpu].user_count++; + update_min(&data->hist[cpu].min_user, &latency); + update_sum(&data->hist[cpu].sum_user, &latency); + update_max(&data->hist[cpu].max_user, &latency); + } + + if (bucket < entries) + hist[bucket]++; + else + hist[entries]++; +} + +/* + * timerlat_hist_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_hist_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *data) +{ + struct trace_instance *trace = data; + unsigned long long context, latency; + struct osnoise_tool *tool; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + tep_get_field_val(s, event, "context", record, &context, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_hist_update(tool, cpu, context, latency); + + return 0; +} + +/* + * timerlat_hist_header - print the header of the tracer to the output + */ +static void timerlat_hist_header(struct osnoise_tool *tool) +{ + struct timerlat_hist_params *params = tool->params; + struct timerlat_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA timerlat histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->output_divisor == 1 ? "nanoseconds" : "microseconds", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->no_index) + trace_seq_printf(s, "Index"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(s, " IRQ-%03d", cpu); + + if (!params->no_thread) + trace_seq_printf(s, " Thr-%03d", cpu); + + if (params->user_hist) + trace_seq_printf(s, " Usr-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * timerlat_print_summary - print the summary of the hist data to the output + */ +static void +timerlat_print_summary(struct timerlat_hist_params *params, + struct trace_instance *trace, + struct timerlat_hist_data *data) +{ + int cpu; + + if (params->no_summary) + return; + + if (!params->no_index) + trace_seq_printf(trace->seq, "count:"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq_count); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread_count); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user_count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "min: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_thread); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_user); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "avg: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) { + if (data->hist[cpu].irq_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_irq / data->hist[cpu].irq_count); + else + trace_seq_printf(trace->seq, " - "); + } + + if (!params->no_thread) { + if (data->hist[cpu].thread_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_thread / data->hist[cpu].thread_count); + else + trace_seq_printf(trace->seq, " - "); + } + + if (params->user_hist) { + if (data->hist[cpu].user_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_user / data->hist[cpu].user_count); + else + trace_seq_printf(trace->seq, " - "); + } + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "max: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_thread); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_user); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * timerlat_print_stats - print data for all CPUs + */ +static void +timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool) +{ + struct timerlat_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int bucket, cpu; + int total; + + timerlat_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) { + total += data->hist[cpu].irq[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[bucket]); + } + + if (!params->no_thread) { + total += data->hist[cpu].thread[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[bucket]); + } + + if (params->user_hist) { + total += data->hist[cpu].user[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user[bucket]); + } + + } + + if (total == 0 && !params->with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + if (!params->no_index) + trace_seq_printf(trace->seq, "over: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[data->entries]); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[data->entries]); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + timerlat_print_summary(params, trace, data); +} + +/* + * timerlat_hist_usage - prints timerlat top usage message + */ +static void timerlat_hist_usage(char *usage) +{ + int i; + + char *msg[] = { + "", + " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\", + " [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", + " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u]", + "", + " -h/--help: print this menu", + " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", + " -d/--duration time[m|h|d]: duration of the session in seconds", + " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", + " --filter <filter>: enable a trace event filter to the previous -e event", + " --trigger <trigger>: enable a trace event trigger to the previous -e event", + " -n/--nano: display data in nanoseconds", + " --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -E/--entries N: set the number of entries of the histogram (default 256)", + " --no-irq: ignore IRQ latencies", + " --no-thread: ignore thread latencies", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct timerlat_hist_params +*timerlat_hist_parse_args(int argc, char *argv[]) +{ + struct timerlat_hist_params *params; + struct trace_events *tevent; + int auto_thresh; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* disabled by default */ + params->dma_latency = -1; + + /* display data in microseconds */ + params->output_divisor = 1000; + params->bucket_size = 1; + params->entries = 256; + + while (1) { + static struct option long_options[] = { + {"auto", required_argument, 0, 'a'}, + {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, + {"bucket-size", required_argument, 0, 'b'}, + {"debug", no_argument, 0, 'D'}, + {"entries", required_argument, 0, 'E'}, + {"duration", required_argument, 0, 'd'}, + {"house-keeping", required_argument, 0, 'H'}, + {"help", no_argument, 0, 'h'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {"user-threads", no_argument, 0, 'u'}, + {"event", required_argument, 0, 'e'}, + {"no-irq", no_argument, 0, '0'}, + {"no-thread", no_argument, 0, '1'}, + {"no-header", no_argument, 0, '2'}, + {"no-summary", no_argument, 0, '3'}, + {"no-index", no_argument, 0, '4'}, + {"with-zeros", no_argument, 0, '5'}, + {"trigger", required_argument, 0, '6'}, + {"filter", required_argument, 0, '7'}, + {"dma-latency", required_argument, 0, '8'}, + {"no-aa", no_argument, 0, '9'}, + {"dump-task", no_argument, 0, '\1'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:np:P:s:t::T:u0123456:7:8:9\1", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'a': + auto_thresh = get_llong_from_str(optarg); + + /* set thread stop to auto_thresh */ + params->stop_total_us = auto_thresh; + params->stop_us = auto_thresh; + + /* get stack trace */ + params->print_stack = auto_thresh; + + /* set trace */ + params->trace_output = "timerlat_trace.txt"; + + break; + case 'c': + retval = parse_cpu_set(optarg, ¶ms->monitored_cpus); + if (retval) + timerlat_hist_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'C': + params->cgroup = 1; + if (!optarg) { + /* will inherit this cgroup */ + params->cgroup_name = NULL; + } else if (*optarg == '=') { + /* skip the = */ + params->cgroup_name = ++optarg; + } + break; + case 'b': + params->bucket_size = get_llong_from_str(optarg); + if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) + timerlat_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + timerlat_hist_usage("Invalid -D duration\n"); + break; + case 'e': + tevent = trace_event_alloc(optarg); + if (!tevent) { + err_msg("Error alloc trace event"); + exit(EXIT_FAILURE); + } + + if (params->events) + tevent->next = params->events; + + params->events = tevent; + break; + case 'E': + params->entries = get_llong_from_str(optarg); + if ((params->entries < 10) || (params->entries > 9999999)) + timerlat_hist_usage("Entries must be > 10 and < 9999999\n"); + break; + case 'h': + case '?': + timerlat_hist_usage(NULL); + break; + case 'H': + params->hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->hk_cpu_set); + if (retval) { + err_msg("Error parsing house keeping CPUs\n"); + exit(EXIT_FAILURE); + } + break; + case 'i': + params->stop_us = get_llong_from_str(optarg); + break; + case 'n': + params->output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + timerlat_hist_usage("Period longer than 1 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + timerlat_hist_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "timerlat_trace.txt"; + break; + case 'u': + params->user_hist = 1; + break; + case '0': /* no irq */ + params->no_irq = 1; + break; + case '1': /* no thread */ + params->no_thread = 1; + break; + case '2': /* no header */ + params->no_header = 1; + break; + case '3': /* no summary */ + params->no_summary = 1; + break; + case '4': /* no index */ + params->no_index = 1; + break; + case '5': /* with zeros */ + params->with_zeros = 1; + break; + case '6': /* trigger */ + if (params->events) { + retval = trace_event_add_trigger(params->events, optarg); + if (retval) { + err_msg("Error adding trigger %s\n", optarg); + exit(EXIT_FAILURE); + } + } else { + timerlat_hist_usage("--trigger requires a previous -e\n"); + } + break; + case '7': /* filter */ + if (params->events) { + retval = trace_event_add_filter(params->events, optarg); + if (retval) { + err_msg("Error adding filter %s\n", optarg); + exit(EXIT_FAILURE); + } + } else { + timerlat_hist_usage("--filter requires a previous -e\n"); + } + break; + case '8': + params->dma_latency = get_llong_from_str(optarg); + if (params->dma_latency < 0 || params->dma_latency > 10000) { + err_msg("--dma-latency needs to be >= 0 and < 10000"); + exit(EXIT_FAILURE); + } + break; + case '9': + params->no_aa = 1; + break; + case '\1': + params->dump_tasks = 1; + break; + default: + timerlat_hist_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + if (params->no_irq && params->no_thread) + timerlat_hist_usage("no-irq and no-thread set, there is nothing to do here"); + + if (params->no_index && !params->with_zeros) + timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense"); + + /* + * Auto analysis only happens if stop tracing, thus: + */ + if (!params->stop_us && !params->stop_total_us) + params->no_aa = 1; + + return params; +} + +/* + * timerlat_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params) +{ + int retval, i; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } else { + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) + CPU_SET(i, ¶ms->monitored_cpus); + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + if (params->timerlat_period_us) { + retval = osnoise_set_timerlat_period_us(tool->context, params->timerlat_period_us); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + } + + if (params->print_stack) { + retval = osnoise_set_print_stack(tool->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + } + + if (params->hk_cpus) { + retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), + ¶ms->hk_cpu_set); + if (retval == -1) { + err_msg("Failed to set rtla to the house keeping CPUs\n"); + goto out_err; + } + } else if (params->cpus) { + /* + * Even if the user do not set a house-keeping CPU, try to + * move rtla to a CPU set different to the one where the user + * set the workload to run. + * + * No need to check results as this is an automatic attempt. + */ + auto_house_keeping(¶ms->monitored_cpus); + } + + if (params->user_hist) { + retval = osnoise_set_workload(tool->context, 0); + if (retval) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_hist - initialize a timerlat hist tool with parameters + */ +static struct osnoise_tool +*timerlat_init_hist(struct timerlat_hist_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("timerlat_hist"); + if (!tool) + return NULL; + + tool->data = timerlat_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + if (!tool->data) + goto out_err; + + tool->params = params; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_hist_handler, tool); + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_hist(int sig) +{ + stop_tracing = 1; +} + +/* + * timerlat_hist_set_signals - handles the signal to stop the tool + */ +static void +timerlat_hist_set_signals(struct timerlat_hist_params *params) +{ + signal(SIGINT, stop_hist); + if (params->duration) { + signal(SIGALRM, stop_hist); + alarm(params->duration); + } +} + +int timerlat_hist_main(int argc, char *argv[]) +{ + struct timerlat_hist_params *params; + struct osnoise_tool *record = NULL; + struct timerlat_u_params params_u; + struct osnoise_tool *tool = NULL; + struct osnoise_tool *aa = NULL; + struct trace_instance *trace; + int dma_latency_fd = -1; + int return_value = 1; + pthread_t timerlat_u; + int retval; + + params = timerlat_hist_parse_args(argc, argv); + if (!params) + exit(1); + + tool = timerlat_init_hist(params); + if (!tool) { + err_msg("Could not init osnoise hist\n"); + goto out_exit; + } + + retval = timerlat_hist_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_free; + } + + trace = &tool->trace; + + retval = enable_timerlat(trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_free; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_free; + } + } + + if (params->cgroup && !params->user_hist) { + retval = set_comm_cgroup("timerlat/", params->cgroup_name); + if (!retval) { + err_msg("Failed to move threads to cgroup\n"); + goto out_free; + } + } + + if (params->dma_latency >= 0) { + dma_latency_fd = set_cpu_dma_latency(params->dma_latency); + if (dma_latency_fd < 0) { + err_msg("Could not set /dev/cpu_dma_latency.\n"); + goto out_free; + } + } + + if (params->trace_output) { + record = osnoise_init_trace_tool("timerlat"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_free; + } + + if (params->events) { + retval = trace_events_enable(&record->trace, params->events); + if (retval) + goto out_hist; + } + } + + if (!params->no_aa) { + aa = osnoise_init_tool("timerlat_aa"); + if (!aa) + goto out_hist; + + retval = timerlat_aa_init(aa, params->dump_tasks); + if (retval) { + err_msg("Failed to enable the auto analysis instance\n"); + goto out_hist; + } + + retval = enable_timerlat(&aa->trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_hist; + } + } + + /* + * Start the tracers here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (params->trace_output) + trace_instance_start(&record->trace); + if (!params->no_aa) + trace_instance_start(&aa->trace); + trace_instance_start(trace); + + tool->start_time = time(NULL); + timerlat_hist_set_signals(params); + + if (params->user_hist) { + /* rtla asked to stop */ + params_u.should_run = 1; + /* all threads left */ + params_u.stopped_running = 0; + + params_u.set = ¶ms->monitored_cpus; + if (params->set_sched) + params_u.sched_param = ¶ms->sched_param; + else + params_u.sched_param = NULL; + + params_u.cgroup_name = params->cgroup_name; + + retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, ¶ms_u); + if (retval) + err_msg("Error creating timerlat user-space threads\n"); + } + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_hist; + } + + if (trace_is_off(&tool->trace, &record->trace)) + break; + + /* is there still any user-threads ? */ + if (params->user_hist) { + if (params_u.stopped_running) { + debug_msg("timerlat user-space threads stopped!\n"); + break; + } + } + } + if (params->user_hist && !params_u.stopped_running) { + params_u.should_run = 0; + sleep(1); + } + + timerlat_print_stats(params, tool); + + return_value = 0; + + if (trace_is_off(&tool->trace, &record->trace)) { + printf("rtla timerlat hit stop tracing\n"); + + if (!params->no_aa) + timerlat_auto_analysis(params->stop_us, params->stop_total_us); + + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_hist: + timerlat_aa_destroy(); + if (dma_latency_fd >= 0) + close(dma_latency_fd); + trace_events_destroy(&record->trace, params->events); + params->events = NULL; +out_free: + timerlat_free_histogram(tool->data); + osnoise_destroy_tool(aa); + osnoise_destroy_tool(record); + osnoise_destroy_tool(tool); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c new file mode 100644 index 0000000000..1640f121ba --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -0,0 +1,965 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> +#include <errno.h> +#include <sched.h> +#include <pthread.h> + +#include "utils.h" +#include "osnoise.h" +#include "timerlat.h" +#include "timerlat_aa.h" +#include "timerlat_u.h" + +struct timerlat_top_params { + char *cpus; + cpu_set_t monitored_cpus; + char *trace_output; + char *cgroup_name; + unsigned long long runtime; + long long stop_us; + long long stop_total_us; + long long timerlat_period_us; + long long print_stack; + int sleep_time; + int output_divisor; + int duration; + int quiet; + int set_sched; + int dma_latency; + int no_aa; + int aa_only; + int dump_tasks; + int cgroup; + int hk_cpus; + int user_top; + cpu_set_t hk_cpu_set; + struct sched_attr sched_param; + struct trace_events *events; +}; + +struct timerlat_top_cpu { + int irq_count; + int thread_count; + int user_count; + + unsigned long long cur_irq; + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long cur_thread; + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; + + unsigned long long cur_user; + unsigned long long min_user; + unsigned long long sum_user; + unsigned long long max_user; +}; + +struct timerlat_top_data { + struct timerlat_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * timerlat_free_top - free runtime data + */ +static void +timerlat_free_top(struct timerlat_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus) +{ + struct timerlat_top_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->cpu_data[cpu].min_irq = ~0; + data->cpu_data[cpu].min_thread = ~0; + data->cpu_data[cpu].min_user = ~0; + } + + return data; + +cleanup: + timerlat_free_top(data); + return NULL; +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_top_update(struct osnoise_tool *tool, int cpu, + unsigned long long thread, + unsigned long long latency) +{ + struct timerlat_top_data *data = tool->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + + if (!thread) { + cpu_data->irq_count++; + cpu_data->cur_irq = latency; + update_min(&cpu_data->min_irq, &latency); + update_sum(&cpu_data->sum_irq, &latency); + update_max(&cpu_data->max_irq, &latency); + } else if (thread == 1) { + cpu_data->thread_count++; + cpu_data->cur_thread = latency; + update_min(&cpu_data->min_thread, &latency); + update_sum(&cpu_data->sum_thread, &latency); + update_max(&cpu_data->max_thread, &latency); + } else { + cpu_data->user_count++; + cpu_data->cur_user = latency; + update_min(&cpu_data->min_user, &latency); + update_sum(&cpu_data->sum_user, &latency); + update_max(&cpu_data->max_user, &latency); + } +} + +/* + * timerlat_top_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + struct timerlat_top_params *params; + unsigned long long latency, thread; + struct osnoise_tool *top; + int cpu = record->cpu; + + top = container_of(trace, struct osnoise_tool, trace); + params = top->params; + + if (!params->aa_only) { + tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_top_update(top, cpu, thread, latency); + } + + return 0; +} + +/* + * timerlat_top_header - print the header of the tool output + */ +static void timerlat_top_header(struct osnoise_tool *top) +{ + struct timerlat_top_params *params = top->params; + struct trace_seq *s = top->trace.seq; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " Timer Latency "); + if (params->user_top) + trace_seq_printf(s, " "); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)", duration, + params->output_divisor == 1 ? "ns" : "us", + params->output_divisor == 1 ? "ns" : "us"); + + if (params->user_top) { + trace_seq_printf(s, " | Ret user Timer Latency (%s)", + params->output_divisor == 1 ? "ns" : "us"); + } + + trace_seq_printf(s, "\n"); + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max"); + if (params->user_top) + trace_seq_printf(s, " | cur min avg max"); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +/* + * timerlat_top_print - prints the output of a given CPU + */ +static void timerlat_top_print(struct osnoise_tool *top, int cpu) +{ + + struct timerlat_top_params *params = top->params; + struct timerlat_top_data *data = top->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + int divisor = params->output_divisor; + struct trace_seq *s = top->trace.seq; + + if (divisor == 0) + return; + + /* + * Skip if no data is available: is this cpu offline? + */ + if (!cpu_data->irq_count && !cpu_data->thread_count) + return; + + /* + * Unless trace is being lost, IRQ counter is always the max. + */ + trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); + + if (!cpu_data->irq_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - |"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", (cpu_data->sum_irq / cpu_data->irq_count) / divisor); + trace_seq_printf(s, "%9llu |", cpu_data->max_irq / divisor); + } + + if (!cpu_data->thread_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " -\n"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); + trace_seq_printf(s, "%9llu ", + (cpu_data->sum_thread / cpu_data->thread_count) / divisor); + trace_seq_printf(s, "%9llu", cpu_data->max_thread / divisor); + } + + if (!params->user_top) { + trace_seq_printf(s, "\n"); + return; + } + + trace_seq_printf(s, " |"); + + if (!cpu_data->user_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " -\n"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_user / divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_user / divisor); + trace_seq_printf(s, "%9llu ", + (cpu_data->sum_user / cpu_data->user_count) / divisor); + trace_seq_printf(s, "%9llu\n", cpu_data->max_user / divisor); + } +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * timerlat_print_stats - print data for all cpus + */ +static void +timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top) +{ + struct trace_instance *trace = &top->trace; + static int nr_cpus = -1; + int i; + + if (params->aa_only) + return; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->quiet) + clear_terminal(trace->seq); + + timerlat_top_header(top); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + timerlat_top_print(top, i); + } + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * timerlat_top_usage - prints timerlat top usage message + */ +static void timerlat_top_usage(char *usage) +{ + int i; + + static const char *const msg[] = { + "", + " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", + " [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", + " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u]", + "", + " -h/--help: print this menu", + " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", + " --aa-only us: stop if <us> latency is hit, only printing the auto analysis (reduces CPU usage)", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", + " -d/--duration time[m|h|d]: duration of the session in seconds", + " -D/--debug: print debug info", + " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", + " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", + " --filter <command>: enable a trace event filter to the previous -e event", + " --trigger <command>: enable a trace event trigger to the previous -e event", + " -n/--nano: display data in nanoseconds", + " --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage", + " -q/--quiet print only a summary at the end", + " --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla timerlat top: a per-cpu summary of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * timerlat_top_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct timerlat_top_params +*timerlat_top_parse_args(int argc, char **argv) +{ + struct timerlat_top_params *params; + struct trace_events *tevent; + long long auto_thresh; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* disabled by default */ + params->dma_latency = -1; + + /* display data in microseconds */ + params->output_divisor = 1000; + + while (1) { + static struct option long_options[] = { + {"auto", required_argument, 0, 'a'}, + {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"event", required_argument, 0, 'e'}, + {"help", no_argument, 0, 'h'}, + {"house-keeping", required_argument, 0, 'H'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {"user-threads", no_argument, 0, 'u'}, + {"trigger", required_argument, 0, '0'}, + {"filter", required_argument, 0, '1'}, + {"dma-latency", required_argument, 0, '2'}, + {"no-aa", no_argument, 0, '3'}, + {"dump-tasks", no_argument, 0, '4'}, + {"aa-only", required_argument, 0, '5'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:np:P:qs:t::T:u0:1:2:345:", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'a': + auto_thresh = get_llong_from_str(optarg); + + /* set thread stop to auto_thresh */ + params->stop_total_us = auto_thresh; + params->stop_us = auto_thresh; + + /* get stack trace */ + params->print_stack = auto_thresh; + + /* set trace */ + params->trace_output = "timerlat_trace.txt"; + break; + case '5': + /* it is here because it is similar to -a */ + auto_thresh = get_llong_from_str(optarg); + + /* set thread stop to auto_thresh */ + params->stop_total_us = auto_thresh; + params->stop_us = auto_thresh; + + /* get stack trace */ + params->print_stack = auto_thresh; + + /* set aa_only to avoid parsing the trace */ + params->aa_only = 1; + break; + case 'c': + retval = parse_cpu_set(optarg, ¶ms->monitored_cpus); + if (retval) + timerlat_top_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'C': + params->cgroup = 1; + if (!optarg) { + /* will inherit this cgroup */ + params->cgroup_name = NULL; + } else if (*optarg == '=') { + /* skip the = */ + params->cgroup_name = ++optarg; + } + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + timerlat_top_usage("Invalid -D duration\n"); + break; + case 'e': + tevent = trace_event_alloc(optarg); + if (!tevent) { + err_msg("Error alloc trace event"); + exit(EXIT_FAILURE); + } + + if (params->events) + tevent->next = params->events; + params->events = tevent; + break; + case 'h': + case '?': + timerlat_top_usage(NULL); + break; + case 'H': + params->hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->hk_cpu_set); + if (retval) { + err_msg("Error parsing house keeping CPUs\n"); + exit(EXIT_FAILURE); + } + break; + case 'i': + params->stop_us = get_llong_from_str(optarg); + break; + case 'n': + params->output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + timerlat_top_usage("Period longer than 1 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + timerlat_top_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'q': + params->quiet = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "timerlat_trace.txt"; + + break; + case 'u': + params->user_top = true; + break; + case '0': /* trigger */ + if (params->events) { + retval = trace_event_add_trigger(params->events, optarg); + if (retval) { + err_msg("Error adding trigger %s\n", optarg); + exit(EXIT_FAILURE); + } + } else { + timerlat_top_usage("--trigger requires a previous -e\n"); + } + break; + case '1': /* filter */ + if (params->events) { + retval = trace_event_add_filter(params->events, optarg); + if (retval) { + err_msg("Error adding filter %s\n", optarg); + exit(EXIT_FAILURE); + } + } else { + timerlat_top_usage("--filter requires a previous -e\n"); + } + break; + case '2': /* dma-latency */ + params->dma_latency = get_llong_from_str(optarg); + if (params->dma_latency < 0 || params->dma_latency > 10000) { + err_msg("--dma-latency needs to be >= 0 and < 10000"); + exit(EXIT_FAILURE); + } + break; + case '3': /* no-aa */ + params->no_aa = 1; + break; + case '4': + params->dump_tasks = 1; + break; + default: + timerlat_top_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + /* + * Auto analysis only happens if stop tracing, thus: + */ + if (!params->stop_us && !params->stop_total_us) + params->no_aa = 1; + + if (params->no_aa && params->aa_only) + timerlat_top_usage("--no-aa and --aa-only are mutually exclusive!"); + + return params; +} + +/* + * timerlat_top_apply_config - apply the top configs to the initialized tool + */ +static int +timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params) +{ + int retval; + int i; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(top->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } else { + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) + CPU_SET(i, ¶ms->monitored_cpus); + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(top->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(top->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + + if (params->timerlat_period_us) { + retval = osnoise_set_timerlat_period_us(top->context, params->timerlat_period_us); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + } + + + if (params->print_stack) { + retval = osnoise_set_print_stack(top->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + } + + if (params->hk_cpus) { + retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), + ¶ms->hk_cpu_set); + if (retval == -1) { + err_msg("Failed to set rtla to the house keeping CPUs\n"); + goto out_err; + } + } else if (params->cpus) { + /* + * Even if the user do not set a house-keeping CPU, try to + * move rtla to a CPU set different to the one where the user + * set the workload to run. + * + * No need to check results as this is an automatic attempt. + */ + auto_house_keeping(¶ms->monitored_cpus); + } + + if (params->user_top) { + retval = osnoise_set_workload(top->context, 0); + if (retval) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_top - initialize a timerlat top tool with parameters + */ +static struct osnoise_tool +*timerlat_init_top(struct timerlat_top_params *params) +{ + struct osnoise_tool *top; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + top = osnoise_init_tool("timerlat_top"); + if (!top) + return NULL; + + top->data = timerlat_alloc_top(nr_cpus); + if (!top->data) + goto out_err; + + top->params = params; + + tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat", + timerlat_top_handler, top); + + return top; + +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +static int stop_tracing; +static void stop_top(int sig) +{ + stop_tracing = 1; +} + +/* + * timerlat_top_set_signals - handles the signal to stop the tool + */ +static void +timerlat_top_set_signals(struct timerlat_top_params *params) +{ + signal(SIGINT, stop_top); + if (params->duration) { + signal(SIGALRM, stop_top); + alarm(params->duration); + } +} + +int timerlat_top_main(int argc, char *argv[]) +{ + struct timerlat_top_params *params; + struct osnoise_tool *record = NULL; + struct timerlat_u_params params_u; + struct osnoise_tool *top = NULL; + struct osnoise_tool *aa = NULL; + struct trace_instance *trace; + int dma_latency_fd = -1; + pthread_t timerlat_u; + int return_value = 1; + char *max_lat; + int retval; + + params = timerlat_top_parse_args(argc, argv); + if (!params) + exit(1); + + top = timerlat_init_top(params); + if (!top) { + err_msg("Could not init osnoise top\n"); + goto out_exit; + } + + retval = timerlat_top_apply_config(top, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_free; + } + + trace = &top->trace; + + retval = enable_timerlat(trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_free; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_free; + } + } + + if (params->cgroup && !params->user_top) { + retval = set_comm_cgroup("timerlat/", params->cgroup_name); + if (!retval) { + err_msg("Failed to move threads to cgroup\n"); + goto out_free; + } + } + + if (params->dma_latency >= 0) { + dma_latency_fd = set_cpu_dma_latency(params->dma_latency); + if (dma_latency_fd < 0) { + err_msg("Could not set /dev/cpu_dma_latency.\n"); + goto out_free; + } + } + + if (params->trace_output) { + record = osnoise_init_trace_tool("timerlat"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_free; + } + + if (params->events) { + retval = trace_events_enable(&record->trace, params->events); + if (retval) + goto out_top; + } + } + + if (!params->no_aa) { + if (params->aa_only) { + /* as top is not used for display, use it for aa */ + aa = top; + } else { + /* otherwise, a new instance is needed */ + aa = osnoise_init_tool("timerlat_aa"); + if (!aa) + goto out_top; + } + + retval = timerlat_aa_init(aa, params->dump_tasks); + if (retval) { + err_msg("Failed to enable the auto analysis instance\n"); + goto out_top; + } + + /* if it is re-using the main instance, there is no need to start it */ + if (aa != top) { + retval = enable_timerlat(&aa->trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_top; + } + } + } + + /* + * Start the tracers here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (params->trace_output) + trace_instance_start(&record->trace); + if (!params->no_aa && aa != top) + trace_instance_start(&aa->trace); + trace_instance_start(trace); + + top->start_time = time(NULL); + timerlat_top_set_signals(params); + + if (params->user_top) { + /* rtla asked to stop */ + params_u.should_run = 1; + /* all threads left */ + params_u.stopped_running = 0; + + params_u.set = ¶ms->monitored_cpus; + if (params->set_sched) + params_u.sched_param = ¶ms->sched_param; + else + params_u.sched_param = NULL; + + params_u.cgroup_name = params->cgroup_name; + + retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, ¶ms_u); + if (retval) + err_msg("Error creating timerlat user-space threads\n"); + } + + while (!stop_tracing) { + sleep(params->sleep_time); + + if (params->aa_only && !trace_is_off(&top->trace, &record->trace)) + continue; + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_top; + } + + if (!params->quiet) + timerlat_print_stats(params, top); + + if (trace_is_off(&top->trace, &record->trace)) + break; + + /* is there still any user-threads ? */ + if (params->user_top) { + if (params_u.stopped_running) { + debug_msg("timerlat user space threads stopped!\n"); + break; + } + } + } + + if (params->user_top && !params_u.stopped_running) { + params_u.should_run = 0; + sleep(1); + } + + timerlat_print_stats(params, top); + + return_value = 0; + + if (trace_is_off(&top->trace, &record->trace)) { + printf("rtla timerlat hit stop tracing\n"); + + if (!params->no_aa) + timerlat_auto_analysis(params->stop_us, params->stop_total_us); + + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } else if (params->aa_only) { + /* + * If the trace did not stop with --aa-only, at least print the + * max known latency. + */ + max_lat = tracefs_instance_file_read(trace->inst, "tracing_max_latency", NULL); + if (max_lat) { + printf(" Max latency was %s\n", max_lat); + free(max_lat); + } + } + +out_top: + timerlat_aa_destroy(); + if (dma_latency_fd >= 0) + close(dma_latency_fd); + trace_events_destroy(&record->trace, params->events); + params->events = NULL; +out_free: + timerlat_free_top(top->data); + if (aa && aa != top) + osnoise_destroy_tool(aa); + osnoise_destroy_tool(record); + osnoise_destroy_tool(top); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/timerlat_u.c b/tools/tracing/rtla/src/timerlat_u.c new file mode 100644 index 0000000000..01dbf9a6b5 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_u.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <sched.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <tracefs.h> +#include <pthread.h> +#include <sys/wait.h> +#include <sys/prctl.h> + +#include "utils.h" +#include "timerlat_u.h" + +/* + * This is the user-space main for the tool timerlatu/ threads. + * + * It is as simple as this: + * - set affinity + * - set priority + * - open tracer fd + * - spin + * - close + */ +static int timerlat_u_main(int cpu, struct timerlat_u_params *params) +{ + struct sched_param sp = { .sched_priority = 95 }; + char buffer[1024]; + int timerlat_fd; + cpu_set_t set; + int retval; + + /* + * This all is only setting up the tool. + */ + CPU_ZERO(&set); + CPU_SET(cpu, &set); + + retval = sched_setaffinity(gettid(), sizeof(set), &set); + if (retval == -1) { + debug_msg("Error setting user thread affinity %d, is the CPU online?\n", cpu); + exit(1); + } + + if (!params->sched_param) { + retval = sched_setscheduler(0, SCHED_FIFO, &sp); + if (retval < 0) { + err_msg("Error setting timerlat u default priority: %s\n", strerror(errno)); + exit(1); + } + } else { + retval = __set_sched_attr(getpid(), params->sched_param); + if (retval) { + /* __set_sched_attr prints an error message, so */ + exit(0); + } + } + + if (params->cgroup_name) { + retval = set_pid_cgroup(gettid(), params->cgroup_name); + if (!retval) { + err_msg("Error setting timerlat u cgroup pid\n"); + pthread_exit(&retval); + } + } + + /* + * This is the tool's loop. If you want to use as base for your own tool... + * go ahead. + */ + snprintf(buffer, sizeof(buffer), "osnoise/per_cpu/cpu%d/timerlat_fd", cpu); + + timerlat_fd = tracefs_instance_file_open(NULL, buffer, O_RDONLY); + if (timerlat_fd < 0) { + err_msg("Error opening %s:%s\n", buffer, strerror(errno)); + exit(1); + } + + debug_msg("User-space timerlat pid %d on cpu %d\n", gettid(), cpu); + + /* add should continue with a signal handler */ + while (true) { + retval = read(timerlat_fd, buffer, 1024); + if (retval < 0) + break; + } + + close(timerlat_fd); + + debug_msg("Leaving timerlat pid %d on cpu %d\n", gettid(), cpu); + exit(0); +} + +/* + * timerlat_u_send_kill - send a kill signal for all processes + * + * Return the number of processes that received the kill. + */ +static int timerlat_u_send_kill(pid_t *procs, int nr_cpus) +{ + int killed = 0; + int i, retval; + + for (i = 0; i < nr_cpus; i++) { + if (!procs[i]) + continue; + retval = kill(procs[i], SIGKILL); + if (!retval) + killed++; + else + err_msg("Error killing child process %d\n", procs[i]); + } + + return killed; +} + +/** + * timerlat_u_dispatcher - dispatch one timerlatu/ process per monitored CPU + * + * This is a thread main that will fork one new process for each monitored + * CPU. It will wait for: + * + * - rtla to tell to kill the child processes + * - some child process to die, and the cleanup all the processes + * + * whichever comes first. + * + */ +void *timerlat_u_dispatcher(void *data) +{ + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + struct timerlat_u_params *params = data; + char proc_name[128]; + int procs_count = 0; + int retval = 1; + pid_t *procs; + int wstatus; + pid_t pid; + int i; + + debug_msg("Dispatching timerlat u procs\n"); + + procs = calloc(nr_cpus, sizeof(pid_t)); + if (!procs) + pthread_exit(&retval); + + for (i = 0; i < nr_cpus; i++) { + if (params->set && !CPU_ISSET(i, params->set)) + continue; + + pid = fork(); + + /* child */ + if (!pid) { + + /* + * rename the process + */ + snprintf(proc_name, sizeof(proc_name), "timerlatu/%d", i); + pthread_setname_np(pthread_self(), proc_name); + prctl(PR_SET_NAME, (unsigned long)proc_name, 0, 0, 0); + + timerlat_u_main(i, params); + /* timerlat_u_main should exit()! Anyways... */ + pthread_exit(&retval); + } + + /* parent */ + if (pid == -1) { + timerlat_u_send_kill(procs, nr_cpus); + debug_msg("Failed to create child processes"); + pthread_exit(&retval); + } + + procs_count++; + procs[i] = pid; + } + + while (params->should_run) { + /* check if processes died */ + pid = waitpid(-1, &wstatus, WNOHANG); + if (pid != 0) { + for (i = 0; i < nr_cpus; i++) { + if (procs[i] == pid) { + procs[i] = 0; + procs_count--; + } + } + + if (!procs_count) + break; + } + + sleep(1); + } + + timerlat_u_send_kill(procs, nr_cpus); + + while (procs_count) { + pid = waitpid(-1, &wstatus, 0); + if (pid == -1) { + err_msg("Failed to monitor child processes"); + pthread_exit(&retval); + } + for (i = 0; i < nr_cpus; i++) { + if (procs[i] == pid) { + procs[i] = 0; + procs_count--; + } + } + } + + params->stopped_running = 1; + + free(procs); + retval = 0; + pthread_exit(&retval); + +} diff --git a/tools/tracing/rtla/src/timerlat_u.h b/tools/tracing/rtla/src/timerlat_u.h new file mode 100644 index 0000000000..6615119089 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_u.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +struct timerlat_u_params { + /* timerlat -> timerlat_u: user-space threads can keep running */ + int should_run; + /* timerlat_u -> timerlat: all timerlat_u threads left, no reason to continue */ + int stopped_running; + + /* threads config */ + cpu_set_t *set; + char *cgroup_name; + struct sched_attr *sched_param; +}; + +void *timerlat_u_dispatcher(void *data); diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c new file mode 100644 index 0000000000..e1ba6d9f42 --- /dev/null +++ b/tools/tracing/rtla/src/trace.c @@ -0,0 +1,542 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sys/sendfile.h> +#include <tracefs.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> + +#include "trace.h" +#include "utils.h" + +/* + * enable_tracer_by_name - enable a tracer on the given instance + */ +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name) +{ + enum tracefs_tracers tracer; + int retval; + + tracer = TRACEFS_TRACER_CUSTOM; + + debug_msg("Enabling %s tracer\n", tracer_name); + + retval = tracefs_tracer_set(inst, tracer, tracer_name); + if (retval < 0) { + if (errno == ENODEV) + err_msg("Tracer %s not found!\n", tracer_name); + + err_msg("Failed to enable the %s tracer\n", tracer_name); + return -1; + } + + return 0; +} + +/* + * disable_tracer - set nop tracer to the insta + */ +void disable_tracer(struct tracefs_instance *inst) +{ + enum tracefs_tracers t = TRACEFS_TRACER_NOP; + int retval; + + retval = tracefs_tracer_set(inst, t); + if (retval < 0) + err_msg("Oops, error disabling tracer\n"); +} + +/* + * create_instance - create a trace instance with *instance_name + */ +struct tracefs_instance *create_instance(char *instance_name) +{ + return tracefs_instance_create(instance_name); +} + +/* + * destroy_instance - remove a trace instance and free the data + */ +void destroy_instance(struct tracefs_instance *inst) +{ + tracefs_instance_destroy(inst); + tracefs_instance_free(inst); +} + +/* + * save_trace_to_file - save the trace output of the instance to the file + */ +int save_trace_to_file(struct tracefs_instance *inst, const char *filename) +{ + const char *file = "trace"; + mode_t mode = 0644; + char buffer[4096]; + int out_fd, in_fd; + int retval = -1; + + in_fd = tracefs_instance_file_open(inst, file, O_RDONLY); + if (in_fd < 0) { + err_msg("Failed to open trace file\n"); + return -1; + } + + out_fd = creat(filename, mode); + if (out_fd < 0) { + err_msg("Failed to create output file %s\n", filename); + goto out_close_in; + } + + do { + retval = read(in_fd, buffer, sizeof(buffer)); + if (retval <= 0) + goto out_close; + + retval = write(out_fd, buffer, retval); + if (retval < 0) + goto out_close; + } while (retval > 0); + + retval = 0; +out_close: + close(out_fd); +out_close_in: + close(in_fd); + return retval; +} + +/* + * collect_registered_events - call the existing callback function for the event + * + * If an event has a registered callback function, call it. + * Otherwise, ignore the event. + */ +int +collect_registered_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_instance *trace = context; + struct trace_seq *s = trace->seq; + + if (!event->handler) + return 0; + + event->handler(s, record, event, context); + + return 0; +} + +/* + * trace_instance_destroy - destroy and free a rtla trace instance + */ +void trace_instance_destroy(struct trace_instance *trace) +{ + if (trace->inst) { + disable_tracer(trace->inst); + destroy_instance(trace->inst); + trace->inst = NULL; + } + + if (trace->seq) { + free(trace->seq); + trace->seq = NULL; + } + + if (trace->tep) { + tep_free(trace->tep); + trace->tep = NULL; + } +} + +/* + * trace_instance_init - create an rtla trace instance + * + * It is more than the tracefs instance, as it contains other + * things required for the tracing, such as the local events and + * a seq file. + * + * Note that the trace instance is returned disabled. This allows + * the tool to apply some other configs, like setting priority + * to the kernel threads, before starting generating trace entries. + */ +int trace_instance_init(struct trace_instance *trace, char *tool_name) +{ + trace->seq = calloc(1, sizeof(*trace->seq)); + if (!trace->seq) + goto out_err; + + trace_seq_init(trace->seq); + + trace->inst = create_instance(tool_name); + if (!trace->inst) + goto out_err; + + trace->tep = tracefs_local_events(NULL); + if (!trace->tep) + goto out_err; + + /* + * Let the main enable the record after setting some other + * things such as the priority of the tracer's threads. + */ + tracefs_trace_off(trace->inst); + + return 0; + +out_err: + trace_instance_destroy(trace); + return 1; +} + +/* + * trace_instance_start - start tracing a given rtla instance + */ +int trace_instance_start(struct trace_instance *trace) +{ + return tracefs_trace_on(trace->inst); +} + +/* + * trace_events_free - free a list of trace events + */ +static void trace_events_free(struct trace_events *events) +{ + struct trace_events *tevent = events; + struct trace_events *free_event; + + while (tevent) { + free_event = tevent; + + tevent = tevent->next; + + if (free_event->filter) + free(free_event->filter); + if (free_event->trigger) + free(free_event->trigger); + free(free_event->system); + free(free_event); + } +} + +/* + * trace_event_alloc - alloc and parse a single trace event + */ +struct trace_events *trace_event_alloc(const char *event_string) +{ + struct trace_events *tevent; + + tevent = calloc(1, sizeof(*tevent)); + if (!tevent) + return NULL; + + tevent->system = strdup(event_string); + if (!tevent->system) { + free(tevent); + return NULL; + } + + tevent->event = strstr(tevent->system, ":"); + if (tevent->event) { + *tevent->event = '\0'; + tevent->event = &tevent->event[1]; + } + + return tevent; +} + +/* + * trace_event_add_filter - record an event filter + */ +int trace_event_add_filter(struct trace_events *event, char *filter) +{ + if (event->filter) + free(event->filter); + + event->filter = strdup(filter); + if (!event->filter) + return 1; + + return 0; +} + +/* + * trace_event_add_trigger - record an event trigger action + */ +int trace_event_add_trigger(struct trace_events *event, char *trigger) +{ + if (event->trigger) + free(event->trigger); + + event->trigger = strdup(trigger); + if (!event->trigger) + return 1; + + return 0; +} + +/* + * trace_event_disable_filter - disable an event filter + */ +static void trace_event_disable_filter(struct trace_instance *instance, + struct trace_events *tevent) +{ + char filter[1024]; + int retval; + + if (!tevent->filter) + return; + + if (!tevent->filter_enabled) + return; + + debug_msg("Disabling %s:%s filter %s\n", tevent->system, + tevent->event ? : "*", tevent->filter); + + snprintf(filter, 1024, "!%s\n", tevent->filter); + + retval = tracefs_event_file_write(instance->inst, tevent->system, + tevent->event, "filter", filter); + if (retval < 0) + err_msg("Error disabling %s:%s filter %s\n", tevent->system, + tevent->event ? : "*", tevent->filter); +} + +/* + * trace_event_save_hist - save the content of an event hist + * + * If the trigger is a hist: one, save the content of the hist file. + */ +static void trace_event_save_hist(struct trace_instance *instance, + struct trace_events *tevent) +{ + int retval, index, out_fd; + mode_t mode = 0644; + char path[1024]; + char *hist; + + if (!tevent) + return; + + /* trigger enables hist */ + if (!tevent->trigger) + return; + + /* is this a hist: trigger? */ + retval = strncmp(tevent->trigger, "hist:", strlen("hist:")); + if (retval) + return; + + snprintf(path, 1024, "%s_%s_hist.txt", tevent->system, tevent->event); + + printf(" Saving event %s:%s hist to %s\n", tevent->system, tevent->event, path); + + out_fd = creat(path, mode); + if (out_fd < 0) { + err_msg(" Failed to create %s output file\n", path); + return; + } + + hist = tracefs_event_file_read(instance->inst, tevent->system, tevent->event, "hist", 0); + if (!hist) { + err_msg(" Failed to read %s:%s hist file\n", tevent->system, tevent->event); + goto out_close; + } + + index = 0; + do { + index += write(out_fd, &hist[index], strlen(hist) - index); + } while (index < strlen(hist)); + + free(hist); +out_close: + close(out_fd); +} + +/* + * trace_event_disable_trigger - disable an event trigger + */ +static void trace_event_disable_trigger(struct trace_instance *instance, + struct trace_events *tevent) +{ + char trigger[1024]; + int retval; + + if (!tevent->trigger) + return; + + if (!tevent->trigger_enabled) + return; + + debug_msg("Disabling %s:%s trigger %s\n", tevent->system, + tevent->event ? : "*", tevent->trigger); + + trace_event_save_hist(instance, tevent); + + snprintf(trigger, 1024, "!%s\n", tevent->trigger); + + retval = tracefs_event_file_write(instance->inst, tevent->system, + tevent->event, "trigger", trigger); + if (retval < 0) + err_msg("Error disabling %s:%s trigger %s\n", tevent->system, + tevent->event ? : "*", tevent->trigger); +} + +/* + * trace_events_disable - disable all trace events + */ +void trace_events_disable(struct trace_instance *instance, + struct trace_events *events) +{ + struct trace_events *tevent = events; + + if (!events) + return; + + while (tevent) { + debug_msg("Disabling event %s:%s\n", tevent->system, tevent->event ? : "*"); + if (tevent->enabled) { + trace_event_disable_filter(instance, tevent); + trace_event_disable_trigger(instance, tevent); + tracefs_event_disable(instance->inst, tevent->system, tevent->event); + } + + tevent->enabled = 0; + tevent = tevent->next; + } +} + +/* + * trace_event_enable_filter - enable an event filter associated with an event + */ +static int trace_event_enable_filter(struct trace_instance *instance, + struct trace_events *tevent) +{ + char filter[1024]; + int retval; + + if (!tevent->filter) + return 0; + + if (!tevent->event) { + err_msg("Filter %s applies only for single events, not for all %s:* events\n", + tevent->filter, tevent->system); + return 1; + } + + snprintf(filter, 1024, "%s\n", tevent->filter); + + debug_msg("Enabling %s:%s filter %s\n", tevent->system, + tevent->event ? : "*", tevent->filter); + + retval = tracefs_event_file_write(instance->inst, tevent->system, + tevent->event, "filter", filter); + if (retval < 0) { + err_msg("Error enabling %s:%s filter %s\n", tevent->system, + tevent->event ? : "*", tevent->filter); + return 1; + } + + tevent->filter_enabled = 1; + return 0; +} + +/* + * trace_event_enable_trigger - enable an event trigger associated with an event + */ +static int trace_event_enable_trigger(struct trace_instance *instance, + struct trace_events *tevent) +{ + char trigger[1024]; + int retval; + + if (!tevent->trigger) + return 0; + + if (!tevent->event) { + err_msg("Trigger %s applies only for single events, not for all %s:* events\n", + tevent->trigger, tevent->system); + return 1; + } + + snprintf(trigger, 1024, "%s\n", tevent->trigger); + + debug_msg("Enabling %s:%s trigger %s\n", tevent->system, + tevent->event ? : "*", tevent->trigger); + + retval = tracefs_event_file_write(instance->inst, tevent->system, + tevent->event, "trigger", trigger); + if (retval < 0) { + err_msg("Error enabling %s:%s trigger %s\n", tevent->system, + tevent->event ? : "*", tevent->trigger); + return 1; + } + + tevent->trigger_enabled = 1; + + return 0; +} + +/* + * trace_events_enable - enable all events + */ +int trace_events_enable(struct trace_instance *instance, + struct trace_events *events) +{ + struct trace_events *tevent = events; + int retval; + + while (tevent) { + debug_msg("Enabling event %s:%s\n", tevent->system, tevent->event ? : "*"); + retval = tracefs_event_enable(instance->inst, tevent->system, tevent->event); + if (retval < 0) { + err_msg("Error enabling event %s:%s\n", tevent->system, + tevent->event ? : "*"); + return 1; + } + + retval = trace_event_enable_filter(instance, tevent); + if (retval) + return 1; + + retval = trace_event_enable_trigger(instance, tevent); + if (retval) + return 1; + + tevent->enabled = 1; + tevent = tevent->next; + } + + return 0; +} + +/* + * trace_events_destroy - disable and free all trace events + */ +void trace_events_destroy(struct trace_instance *instance, + struct trace_events *events) +{ + if (!events) + return; + + trace_events_disable(instance, events); + trace_events_free(events); +} + +int trace_is_off(struct trace_instance *tool, struct trace_instance *trace) +{ + /* + * The tool instance is always present, it is the one used to collect + * data. + */ + if (!tracefs_trace_is_on(tool->inst)) + return 1; + + /* + * The trace instance is only enabled when -t is set. IOW, when the system + * is tracing. + */ + if (trace && !tracefs_trace_is_on(trace->inst)) + return 1; + + return 0; +} diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h new file mode 100644 index 0000000000..2e9a89a256 --- /dev/null +++ b/tools/tracing/rtla/src/trace.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <tracefs.h> +#include <stddef.h> + +struct trace_events { + struct trace_events *next; + char *system; + char *event; + char *filter; + char *trigger; + char enabled; + char filter_enabled; + char trigger_enabled; +}; + +struct trace_instance { + struct tracefs_instance *inst; + struct tep_handle *tep; + struct trace_seq *seq; +}; + +int trace_instance_init(struct trace_instance *trace, char *tool_name); +int trace_instance_start(struct trace_instance *trace); +void trace_instance_destroy(struct trace_instance *trace); + +struct trace_seq *get_trace_seq(void); +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name); +void disable_tracer(struct tracefs_instance *inst); + +int enable_osnoise(struct trace_instance *trace); +int enable_timerlat(struct trace_instance *trace); + +struct tracefs_instance *create_instance(char *instance_name); +void destroy_instance(struct tracefs_instance *inst); + +int save_trace_to_file(struct tracefs_instance *inst, const char *filename); +int collect_registered_events(struct tep_event *tep, struct tep_record *record, + int cpu, void *context); + +struct trace_events *trace_event_alloc(const char *event_string); +void trace_events_disable(struct trace_instance *instance, + struct trace_events *events); +void trace_events_destroy(struct trace_instance *instance, + struct trace_events *events); +int trace_events_enable(struct trace_instance *instance, + struct trace_events *events); + +int trace_event_add_filter(struct trace_events *event, char *filter); +int trace_event_add_trigger(struct trace_events *event, char *trigger); +int trace_is_off(struct trace_instance *tool, struct trace_instance *trace); diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c new file mode 100644 index 0000000000..c769d7b384 --- /dev/null +++ b/tools/tracing/rtla/src/utils.c @@ -0,0 +1,819 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <dirent.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> + +#include "utils.h" + +#define MAX_MSG_LENGTH 1024 +int config_debug; + +/* + * err_msg - print an error message to the stderr + */ +void err_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * debug_msg - print a debug message to stderr if debug is set + */ +void debug_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + if (!config_debug) + return; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * get_llong_from_str - get a long long int from a string + */ +long long get_llong_from_str(char *start) +{ + long long value; + char *end; + + errno = 0; + value = strtoll(start, &end, 10); + if (errno || start == end) + return -1; + + return value; +} + +/* + * get_duration - fill output with a human readable duration since start_time + */ +void get_duration(time_t start_time, char *output, int output_size) +{ + time_t now = time(NULL); + struct tm *tm_info; + time_t duration; + + duration = difftime(now, start_time); + tm_info = gmtime(&duration); + + snprintf(output, output_size, "%3d %02d:%02d:%02d", + tm_info->tm_yday, + tm_info->tm_hour, + tm_info->tm_min, + tm_info->tm_sec); +} + +/* + * parse_cpu_set - parse a cpu_list filling cpu_set_t argument + * + * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set + * filling cpu_set_t argument. + * + * Returns 1 on success, 0 otherwise. + */ +int parse_cpu_set(char *cpu_list, cpu_set_t *set) +{ + const char *p; + int end_cpu; + int nr_cpus; + int cpu; + int i; + + CPU_ZERO(set); + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (p = cpu_list; *p; ) { + cpu = atoi(p); + if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) + goto err; + + while (isdigit(*p)) + p++; + if (*p == '-') { + p++; + end_cpu = atoi(p); + if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) + goto err; + while (isdigit(*p)) + p++; + } else + end_cpu = cpu; + + if (cpu == end_cpu) { + debug_msg("cpu_set: adding cpu %d\n", cpu); + CPU_SET(cpu, set); + } else { + for (i = cpu; i <= end_cpu; i++) { + debug_msg("cpu_set: adding cpu %d\n", i); + CPU_SET(i, set); + } + } + + if (*p == ',') + p++; + } + + return 0; +err: + debug_msg("Error parsing the cpu set %s\n", cpu_list); + return 1; +} + +/* + * parse_duration - parse duration with s/m/h/d suffix converting it to seconds + */ +long parse_seconds_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + switch (*end) { + case 's': + case 'S': + break; + case 'm': + case 'M': + t *= 60; + break; + case 'h': + case 'H': + t *= 60 * 60; + break; + + case 'd': + case 'D': + t *= 24 * 60 * 60; + break; + } + } + + return t; +} + +/* + * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds + */ +long parse_ns_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + if (!strncmp(end, "ns", 2)) { + return t; + } else if (!strncmp(end, "us", 2)) { + t *= 1000; + return t; + } else if (!strncmp(end, "ms", 2)) { + t *= 1000 * 1000; + return t; + } else if (!strncmp(end, "s", 1)) { + t *= 1000 * 1000 * 1000; + return t; + } + return -1; + } + + return t; +} + +/* + * This is a set of helper functions to use SCHED_DEADLINE. + */ +#ifdef __x86_64__ +# define __NR_sched_setattr 314 +# define __NR_sched_getattr 315 +#elif __i386__ +# define __NR_sched_setattr 351 +# define __NR_sched_getattr 352 +#elif __arm__ +# define __NR_sched_setattr 380 +# define __NR_sched_getattr 381 +#elif __aarch64__ || __riscv +# define __NR_sched_setattr 274 +# define __NR_sched_getattr 275 +#elif __powerpc__ +# define __NR_sched_setattr 355 +# define __NR_sched_getattr 356 +#elif __s390x__ +# define __NR_sched_setattr 345 +# define __NR_sched_getattr 346 +#endif + +#define SCHED_DEADLINE 6 + +static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, + unsigned int flags) { + return syscall(__NR_sched_setattr, pid, attr, flags); +} + +static inline int sched_getattr(pid_t pid, struct sched_attr *attr, + unsigned int size, unsigned int flags) +{ + return syscall(__NR_sched_getattr, pid, attr, size, flags); +} + +int __set_sched_attr(int pid, struct sched_attr *attr) +{ + int flags = 0; + int retval; + + retval = sched_setattr(pid, attr, flags); + if (retval < 0) { + err_msg("Failed to set sched attributes to the pid %d: %s\n", + pid, strerror(errno)); + return 1; + } + + return 0; +} + +/* + * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm + * + * Check if the procfs entry is a directory of a process, and then check if the + * process has a comm with the prefix set in char *comm_prefix. As the + * current users of this function only check for kernel threads, there is no + * need to check for the threads for the process. + * + * Return: True if the proc_entry contains a comm file with comm_prefix*. + * Otherwise returns false. + */ +static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) +{ + char buffer[MAX_PATH]; + int comm_fd, retval; + char *t_name; + + if (proc_entry->d_type != DT_DIR) + return 0; + + if (*proc_entry->d_name == '.') + return 0; + + /* check if the string is a pid */ + for (t_name = proc_entry->d_name; t_name; t_name++) { + if (!isdigit(*t_name)) + break; + } + + if (*t_name != '\0') + return 0; + + snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); + comm_fd = open(buffer, O_RDONLY); + if (comm_fd < 0) + return 0; + + memset(buffer, 0, MAX_PATH); + retval = read(comm_fd, buffer, MAX_PATH); + + close(comm_fd); + + if (retval <= 0) + return 0; + + retval = strncmp(comm_prefix, buffer, strlen(comm_prefix)); + if (retval) + return 0; + + /* comm already have \n */ + debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); + + return 1; +} + +/* + * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix + * + * This function uses procfs to list the currently running threads and then set the + * sched_attr *attr to the threads that start with char *comm_prefix. It is + * mainly used to set the priority to the kernel threads created by the + * tracers. + */ +int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) +{ + struct dirent *proc_entry; + DIR *procfs; + int retval; + + if (strlen(comm_prefix) >= MAX_PATH) { + err_msg("Command prefix is too long: %d < strlen(%s)\n", + MAX_PATH, comm_prefix); + return 1; + } + + procfs = opendir("/proc"); + if (!procfs) { + err_msg("Could not open procfs\n"); + return 1; + } + + while ((proc_entry = readdir(procfs))) { + + retval = procfs_is_workload_pid(comm_prefix, proc_entry); + if (!retval) + continue; + + /* procfs_is_workload_pid confirmed it is a pid */ + retval = __set_sched_attr(atoi(proc_entry->d_name), attr); + if (retval) { + err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); + goto out_err; + } + + debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); + } + return 0; + +out_err: + closedir(procfs); + return 1; +} + +#define INVALID_VAL (~0L) +static long get_long_ns_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = parse_ns_duration(start); + + return val; +} + +static long get_long_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = get_llong_from_str(start); + + return val; +} + +/* + * parse priority in the format: + * SCHED_OTHER: + * o:<prio> + * O:<prio> + * SCHED_RR: + * r:<prio> + * R:<prio> + * SCHED_FIFO: + * f:<prio> + * F:<prio> + * SCHED_DEADLINE: + * d:runtime:period + * D:runtime:period + */ +int parse_prio(char *arg, struct sched_attr *sched_param) +{ + long prio; + long runtime; + long period; + + memset(sched_param, 0, sizeof(*sched_param)); + sched_param->size = sizeof(*sched_param); + + switch (arg[0]) { + case 'd': + case 'D': + /* d:runtime:period */ + if (strlen(arg) < 4) + return -1; + + runtime = get_long_ns_after_colon(arg); + if (runtime == INVALID_VAL) + return -1; + + period = get_long_ns_after_colon(&arg[2]); + if (period == INVALID_VAL) + return -1; + + if (runtime > period) + return -1; + + sched_param->sched_policy = SCHED_DEADLINE; + sched_param->sched_runtime = runtime; + sched_param->sched_deadline = period; + sched_param->sched_period = period; + break; + case 'f': + case 'F': + /* f:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_FIFO)) + return -1; + if (prio > sched_get_priority_max(SCHED_FIFO)) + return -1; + + sched_param->sched_policy = SCHED_FIFO; + sched_param->sched_priority = prio; + break; + case 'r': + case 'R': + /* r:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_RR)) + return -1; + if (prio > sched_get_priority_max(SCHED_RR)) + return -1; + + sched_param->sched_policy = SCHED_RR; + sched_param->sched_priority = prio; + break; + case 'o': + case 'O': + /* o:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_OTHER)) + return -1; + if (prio > sched_get_priority_max(SCHED_OTHER)) + return -1; + + sched_param->sched_policy = SCHED_OTHER; + sched_param->sched_priority = prio; + break; + default: + return -1; + } + return 0; +} + +/* + * set_cpu_dma_latency - set the /dev/cpu_dma_latecy + * + * This is used to reduce the exit from idle latency. The value + * will be reset once the file descriptor of /dev/cpu_dma_latecy + * is closed. + * + * Return: the /dev/cpu_dma_latecy file descriptor + */ +int set_cpu_dma_latency(int32_t latency) +{ + int retval; + int fd; + + fd = open("/dev/cpu_dma_latency", O_RDWR); + if (fd < 0) { + err_msg("Error opening /dev/cpu_dma_latency\n"); + return -1; + } + + retval = write(fd, &latency, 4); + if (retval < 1) { + err_msg("Error setting /dev/cpu_dma_latency\n"); + close(fd); + return -1; + } + + debug_msg("Set /dev/cpu_dma_latency to %d\n", latency); + + return fd; +} + +#define _STR(x) #x +#define STR(x) _STR(x) + +/* + * find_mount - find a the mount point of a given fs + * + * Returns 0 if mount is not found, otherwise return 1 and fill mp + * with the mount point. + */ +static const int find_mount(const char *fs, char *mp, int sizeof_mp) +{ + char mount_point[MAX_PATH]; + char type[100]; + int found = 0; + FILE *fp; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return 0; + + while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { + if (strcmp(type, fs) == 0) { + found = 1; + break; + } + } + fclose(fp); + + if (!found) + return 0; + + memset(mp, 0, sizeof_mp); + strncpy(mp, mount_point, sizeof_mp - 1); + + debug_msg("Fs %s found at %s\n", fs, mp); + return 1; +} + +/* + * get_self_cgroup - get the current thread cgroup path + * + * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: + * + * 0::/user.slice/user-0.slice/session-3.scope'\n' + * + * This function is interested in the content after the second : and before the '\n'. + * + * Returns 1 if a string was found, 0 otherwise. + */ +static int get_self_cgroup(char *self_cg, int sizeof_self_cg) +{ + char path[MAX_PATH], *start; + int fd, retval; + + snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); + + fd = open(path, O_RDONLY); + if (fd < 0) + return 0; + + retval = read(fd, path, MAX_PATH); + + close(fd); + + if (retval <= 0) + return 0; + + start = path; + + start = strstr(start, ":"); + if (!start) + return 0; + + /* skip ":" */ + start++; + + start = strstr(start, ":"); + if (!start) + return 0; + + /* skip ":" */ + start++; + + if (strlen(start) >= sizeof_self_cg) + return 0; + + snprintf(self_cg, sizeof_self_cg, "%s", start); + + /* Swap '\n' with '\0' */ + start = strstr(self_cg, "\n"); + + /* there must be '\n' */ + if (!start) + return 0; + + /* ok, it found a string after the second : and before the \n */ + *start = '\0'; + + return 1; +} + +/* + * set_comm_cgroup - Set cgroup to pid_t pid + * + * If cgroup argument is not NULL, the threads will move to the given cgroup. + * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. + * + * Supports cgroup v2. + * + * Returns 1 on success, 0 otherwise. + */ +int set_pid_cgroup(pid_t pid, const char *cgroup) +{ + char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; + char cgroup_procs[MAX_PATH]; + char pid_str[24]; + int retval; + int cg_fd; + + retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); + if (!retval) { + err_msg("Did not find cgroupv2 mount point\n"); + return 0; + } + + if (!cgroup) { + retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path)); + if (!retval) { + err_msg("Did not find self cgroup\n"); + return 0; + } + } else { + snprintf(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); + } + + snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); + + debug_msg("Using cgroup path at: %s\n", cgroup_procs); + + cg_fd = open(cgroup_procs, O_RDWR); + if (cg_fd < 0) + return 0; + + snprintf(pid_str, sizeof(pid_str), "%d\n", pid); + + retval = write(cg_fd, pid_str, strlen(pid_str)); + if (retval < 0) + err_msg("Error setting cgroup attributes for pid:%s - %s\n", + pid_str, strerror(errno)); + else + debug_msg("Set cgroup attributes for pid:%s\n", pid_str); + + close(cg_fd); + + return (retval >= 0); +} + +/** + * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix + * + * If cgroup argument is not NULL, the threads will move to the given cgroup. + * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. + * + * Supports cgroup v2. + * + * Returns 1 on success, 0 otherwise. + */ +int set_comm_cgroup(const char *comm_prefix, const char *cgroup) +{ + char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; + char cgroup_procs[MAX_PATH]; + struct dirent *proc_entry; + DIR *procfs; + int retval; + int cg_fd; + + if (strlen(comm_prefix) >= MAX_PATH) { + err_msg("Command prefix is too long: %d < strlen(%s)\n", + MAX_PATH, comm_prefix); + return 0; + } + + retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); + if (!retval) { + err_msg("Did not find cgroupv2 mount point\n"); + return 0; + } + + if (!cgroup) { + retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path)); + if (!retval) { + err_msg("Did not find self cgroup\n"); + return 0; + } + } else { + snprintf(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); + } + + snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); + + debug_msg("Using cgroup path at: %s\n", cgroup_procs); + + cg_fd = open(cgroup_procs, O_RDWR); + if (cg_fd < 0) + return 0; + + procfs = opendir("/proc"); + if (!procfs) { + err_msg("Could not open procfs\n"); + goto out_cg; + } + + while ((proc_entry = readdir(procfs))) { + + retval = procfs_is_workload_pid(comm_prefix, proc_entry); + if (!retval) + continue; + + retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); + if (retval < 0) { + err_msg("Error setting cgroup attributes for pid:%s - %s\n", + proc_entry->d_name, strerror(errno)); + goto out_procfs; + } + + debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); + } + + closedir(procfs); + close(cg_fd); + return 1; + +out_procfs: + closedir(procfs); +out_cg: + close(cg_fd); + return 0; +} + +/** + * auto_house_keeping - Automatically move rtla out of measurement threads + * + * Try to move rtla away from the tracer, if possible. + * + * Returns 1 on success, 0 otherwise. + */ +int auto_house_keeping(cpu_set_t *monitored_cpus) +{ + cpu_set_t rtla_cpus, house_keeping_cpus; + int retval; + + /* first get the CPUs in which rtla can actually run. */ + retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); + if (retval == -1) { + debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); + return 0; + } + + /* then check if the existing setup is already good. */ + CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); + if (!CPU_COUNT(&house_keeping_cpus)) { + debug_msg("rtla and the monitored CPUs do not share CPUs."); + debug_msg("Skipping auto house-keeping\n"); + return 1; + } + + /* remove the intersection */ + CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); + + /* get only those that rtla can run */ + CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); + + /* is there any cpu left? */ + if (!CPU_COUNT(&house_keeping_cpus)) { + debug_msg("Could not find any CPU for auto house-keeping\n"); + return 0; + } + + retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); + if (retval == -1) { + debug_msg("Could not set affinity for auto house-keeping\n"); + return 0; + } + + debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); + + return 1; +} diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h new file mode 100644 index 0000000000..04ed1e6504 --- /dev/null +++ b/tools/tracing/rtla/src/utils.h @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdint.h> +#include <time.h> +#include <sched.h> + +/* + * '18446744073709551615\0' + */ +#define BUFF_U64_STR_SIZE 24 +#define MAX_PATH 1024 + +#define container_of(ptr, type, member)({ \ + const typeof(((type *)0)->member) *__mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)) ; }) + +extern int config_debug; +void debug_msg(const char *fmt, ...); +void err_msg(const char *fmt, ...); + +long parse_seconds_duration(char *val); +void get_duration(time_t start_time, char *output, int output_size); + +int parse_cpu_list(char *cpu_list, char **monitored_cpus); +long long get_llong_from_str(char *start); + +static inline void +update_min(unsigned long long *a, unsigned long long *b) +{ + if (*a > *b) + *a = *b; +} + +static inline void +update_max(unsigned long long *a, unsigned long long *b) +{ + if (*a < *b) + *a = *b; +} + +static inline void +update_sum(unsigned long long *a, unsigned long long *b) +{ + *a += *b; +} + +struct sched_attr { + uint32_t size; + uint32_t sched_policy; + uint64_t sched_flags; + int32_t sched_nice; + uint32_t sched_priority; + uint64_t sched_runtime; + uint64_t sched_deadline; + uint64_t sched_period; +}; + +int parse_prio(char *arg, struct sched_attr *sched_param); +int parse_cpu_set(char *cpu_list, cpu_set_t *set); +int __set_sched_attr(int pid, struct sched_attr *attr); +int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr); +int set_comm_cgroup(const char *comm_prefix, const char *cgroup); +int set_pid_cgroup(pid_t pid, const char *cgroup); +int set_cpu_dma_latency(int32_t latency); +int auto_house_keeping(cpu_set_t *monitored_cpus); + +#define ns_to_usf(x) (((double)x/1000)) +#define ns_to_per(total, part) ((part * 100) / (double)total) |