diff options
Diffstat (limited to 'tools/lib')
103 files changed, 24465 insertions, 0 deletions
diff --git a/tools/lib/api/Build b/tools/lib/api/Build new file mode 100644 index 000000000..6e2373db5 --- /dev/null +++ b/tools/lib/api/Build @@ -0,0 +1,9 @@ +libapi-y += fd/ +libapi-y += fs/ +libapi-y += cpu.o +libapi-y += debug.o +libapi-y += str_error_r.o + +$(OUTPUT)str_error_r.o: ../str_error_r.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile new file mode 100644 index 000000000..a13e9c7f1 --- /dev/null +++ b/tools/lib/api/Makefile @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.include +include ../../scripts/utilities.mak # QUIET_CLEAN + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +CC ?= $(CROSS_COMPILE)gcc +AR ?= $(CROSS_COMPILE)ar +LD ?= $(CROSS_COMPILE)ld + +MAKEFLAGS += --no-print-directory + +LIBFILE = $(OUTPUT)libapi.a + +CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -fPIC + +ifeq ($(DEBUG),0) +ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -O3 +else + CFLAGS += -O6 +endif +endif + +ifeq ($(DEBUG),0) + CFLAGS += -D_FORTIFY_SOURCE +endif + +# Treat warnings as errors unless directed not to +ifneq ($(WERROR),0) + CFLAGS += -Werror +endif + +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +CFLAGS += -I$(srctree)/tools/lib/api +CFLAGS += -I$(srctree)/tools/include + +RM = rm -f + +API_IN := $(OUTPUT)libapi-in.o + +all: + +export srctree OUTPUT CC LD CFLAGS V +include $(srctree)/tools/build/Makefile.include + +all: fixdep $(LIBFILE) + +$(API_IN): FORCE + @$(MAKE) $(build)=libapi + +$(LIBFILE): $(API_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN) + +clean: + $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ + find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + +FORCE: + +.PHONY: clean FORCE diff --git a/tools/lib/api/cpu.c b/tools/lib/api/cpu.c new file mode 100644 index 000000000..4af6d4b7a --- /dev/null +++ b/tools/lib/api/cpu.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> + +#include "cpu.h" +#include "fs/fs.h" + +int cpu__get_max_freq(unsigned long long *freq) +{ + char entry[PATH_MAX]; + int cpu; + + if (sysfs__read_int("devices/system/cpu/online", &cpu) < 0) + return -1; + + snprintf(entry, sizeof(entry), + "devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpu); + + return sysfs__read_ull(entry, freq); +} diff --git a/tools/lib/api/cpu.h b/tools/lib/api/cpu.h new file mode 100644 index 000000000..90a102fb2 --- /dev/null +++ b/tools/lib/api/cpu.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __API_CPU__ +#define __API_CPU__ + +int cpu__get_max_freq(unsigned long long *freq); + +#endif /* __API_CPU__ */ diff --git a/tools/lib/api/debug-internal.h b/tools/lib/api/debug-internal.h new file mode 100644 index 000000000..80c783497 --- /dev/null +++ b/tools/lib/api/debug-internal.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __API_DEBUG_INTERNAL_H__ +#define __API_DEBUG_INTERNAL_H__ + +#include "debug.h" + +#define __pr(func, fmt, ...) \ +do { \ + if ((func)) \ + (func)("libapi: " fmt, ##__VA_ARGS__); \ +} while (0) + +extern libapi_print_fn_t __pr_warning; +extern libapi_print_fn_t __pr_info; +extern libapi_print_fn_t __pr_debug; + +#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__) + +#endif /* __API_DEBUG_INTERNAL_H__ */ diff --git a/tools/lib/api/debug.c b/tools/lib/api/debug.c new file mode 100644 index 000000000..69b1ba3d1 --- /dev/null +++ b/tools/lib/api/debug.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdarg.h> +#include "debug.h" +#include "debug-internal.h" + +static int __base_pr(const char *format, ...) +{ + va_list args; + int err; + + va_start(args, format); + err = vfprintf(stderr, format, args); + va_end(args); + return err; +} + +libapi_print_fn_t __pr_warning = __base_pr; +libapi_print_fn_t __pr_info = __base_pr; +libapi_print_fn_t __pr_debug; + +void libapi_set_print(libapi_print_fn_t warn, + libapi_print_fn_t info, + libapi_print_fn_t debug) +{ + __pr_warning = warn; + __pr_info = info; + __pr_debug = debug; +} diff --git a/tools/lib/api/debug.h b/tools/lib/api/debug.h new file mode 100644 index 000000000..3684dd6e0 --- /dev/null +++ b/tools/lib/api/debug.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __API_DEBUG_H__ +#define __API_DEBUG_H__ + +typedef int (*libapi_print_fn_t)(const char *, ...); + +void libapi_set_print(libapi_print_fn_t warn, + libapi_print_fn_t info, + libapi_print_fn_t debug); + +#endif /* __API_DEBUG_H__ */ diff --git a/tools/lib/api/fd/Build b/tools/lib/api/fd/Build new file mode 100644 index 000000000..605d99f6d --- /dev/null +++ b/tools/lib/api/fd/Build @@ -0,0 +1 @@ +libapi-y += array.o diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c new file mode 100644 index 000000000..b0a035fc8 --- /dev/null +++ b/tools/lib/api/fd/array.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2014, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + * + * Released under the GPL v2. (and only v2, not any later version) + */ +#include "array.h" +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <stdlib.h> +#include <unistd.h> + +void fdarray__init(struct fdarray *fda, int nr_autogrow) +{ + fda->entries = NULL; + fda->priv = NULL; + fda->nr = fda->nr_alloc = 0; + fda->nr_autogrow = nr_autogrow; +} + +int fdarray__grow(struct fdarray *fda, int nr) +{ + void *priv; + int nr_alloc = fda->nr_alloc + nr; + size_t psize = sizeof(fda->priv[0]) * nr_alloc; + size_t size = sizeof(struct pollfd) * nr_alloc; + struct pollfd *entries = realloc(fda->entries, size); + + if (entries == NULL) + return -ENOMEM; + + priv = realloc(fda->priv, psize); + if (priv == NULL) { + free(entries); + return -ENOMEM; + } + + fda->nr_alloc = nr_alloc; + fda->entries = entries; + fda->priv = priv; + return 0; +} + +struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow) +{ + struct fdarray *fda = calloc(1, sizeof(*fda)); + + if (fda != NULL) { + if (fdarray__grow(fda, nr_alloc)) { + free(fda); + fda = NULL; + } else { + fda->nr_autogrow = nr_autogrow; + } + } + + return fda; +} + +void fdarray__exit(struct fdarray *fda) +{ + free(fda->entries); + free(fda->priv); + fdarray__init(fda, 0); +} + +void fdarray__delete(struct fdarray *fda) +{ + fdarray__exit(fda); + free(fda); +} + +int fdarray__add(struct fdarray *fda, int fd, short revents) +{ + int pos = fda->nr; + + if (fda->nr == fda->nr_alloc && + fdarray__grow(fda, fda->nr_autogrow) < 0) + return -ENOMEM; + + fda->entries[fda->nr].fd = fd; + fda->entries[fda->nr].events = revents; + fda->nr++; + return pos; +} + +int fdarray__filter(struct fdarray *fda, short revents, + void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), + void *arg) +{ + int fd, nr = 0; + + if (fda->nr == 0) + return 0; + + for (fd = 0; fd < fda->nr; ++fd) { + if (fda->entries[fd].revents & revents) { + if (entry_destructor) + entry_destructor(fda, fd, arg); + + continue; + } + + if (fd != nr) { + fda->entries[nr] = fda->entries[fd]; + fda->priv[nr] = fda->priv[fd]; + } + + ++nr; + } + + return fda->nr = nr; +} + +int fdarray__poll(struct fdarray *fda, int timeout) +{ + return poll(fda->entries, fda->nr, timeout); +} + +int fdarray__fprintf(struct fdarray *fda, FILE *fp) +{ + int fd, printed = fprintf(fp, "%d [ ", fda->nr); + + for (fd = 0; fd < fda->nr; ++fd) + printed += fprintf(fp, "%s%d", fd ? ", " : "", fda->entries[fd].fd); + + return printed + fprintf(fp, " ]"); +} diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h new file mode 100644 index 000000000..b39557d1a --- /dev/null +++ b/tools/lib/api/fd/array.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __API_FD_ARRAY__ +#define __API_FD_ARRAY__ + +#include <stdio.h> + +struct pollfd; + +/** + * struct fdarray: Array of file descriptors + * + * @priv: Per array entry priv area, users should access just its contents, + * not set it to anything, as it is kept in synch with @entries, being + * realloc'ed, * for instance, in fdarray__{grow,filter}. + * + * I.e. using 'fda->priv[N].idx = * value' where N < fda->nr is ok, + * but doing 'fda->priv = malloc(M)' is not allowed. + */ +struct fdarray { + int nr; + int nr_alloc; + int nr_autogrow; + struct pollfd *entries; + union { + int idx; + void *ptr; + } *priv; +}; + +void fdarray__init(struct fdarray *fda, int nr_autogrow); +void fdarray__exit(struct fdarray *fda); + +struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow); +void fdarray__delete(struct fdarray *fda); + +int fdarray__add(struct fdarray *fda, int fd, short revents); +int fdarray__poll(struct fdarray *fda, int timeout); +int fdarray__filter(struct fdarray *fda, short revents, + void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), + void *arg); +int fdarray__grow(struct fdarray *fda, int extra); +int fdarray__fprintf(struct fdarray *fda, FILE *fp); + +static inline int fdarray__available_entries(struct fdarray *fda) +{ + return fda->nr_alloc - fda->nr; +} + +#endif /* __API_FD_ARRAY__ */ diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build new file mode 100644 index 000000000..f4ed9629a --- /dev/null +++ b/tools/lib/api/fs/Build @@ -0,0 +1,2 @@ +libapi-y += fs.o +libapi-y += tracing_path.o diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c new file mode 100644 index 000000000..4cc69675c --- /dev/null +++ b/tools/lib/api/fs/fs.c @@ -0,0 +1,558 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/vfs.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/mount.h> + +#include "fs.h" +#include "debug-internal.h" + +#define _STR(x) #x +#define STR(x) _STR(x) + +#ifndef SYSFS_MAGIC +#define SYSFS_MAGIC 0x62656572 +#endif + +#ifndef PROC_SUPER_MAGIC +#define PROC_SUPER_MAGIC 0x9fa0 +#endif + +#ifndef DEBUGFS_MAGIC +#define DEBUGFS_MAGIC 0x64626720 +#endif + +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + +#ifndef HUGETLBFS_MAGIC +#define HUGETLBFS_MAGIC 0x958458f6 +#endif + +#ifndef BPF_FS_MAGIC +#define BPF_FS_MAGIC 0xcafe4a11 +#endif + +static const char * const sysfs__fs_known_mountpoints[] = { + "/sys", + 0, +}; + +static const char * const procfs__known_mountpoints[] = { + "/proc", + 0, +}; + +#ifndef DEBUGFS_DEFAULT_PATH +#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug" +#endif + +static const char * const debugfs__known_mountpoints[] = { + DEBUGFS_DEFAULT_PATH, + "/debug", + 0, +}; + + +#ifndef TRACEFS_DEFAULT_PATH +#define TRACEFS_DEFAULT_PATH "/sys/kernel/tracing" +#endif + +static const char * const tracefs__known_mountpoints[] = { + TRACEFS_DEFAULT_PATH, + "/sys/kernel/debug/tracing", + "/tracing", + "/trace", + 0, +}; + +static const char * const hugetlbfs__known_mountpoints[] = { + 0, +}; + +static const char * const bpf_fs__known_mountpoints[] = { + "/sys/fs/bpf", + 0, +}; + +struct fs { + const char *name; + const char * const *mounts; + char path[PATH_MAX]; + bool found; + bool checked; + long magic; +}; + +enum { + FS__SYSFS = 0, + FS__PROCFS = 1, + FS__DEBUGFS = 2, + FS__TRACEFS = 3, + FS__HUGETLBFS = 4, + FS__BPF_FS = 5, +}; + +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + +static struct fs fs__entries[] = { + [FS__SYSFS] = { + .name = "sysfs", + .mounts = sysfs__fs_known_mountpoints, + .magic = SYSFS_MAGIC, + .checked = false, + }, + [FS__PROCFS] = { + .name = "proc", + .mounts = procfs__known_mountpoints, + .magic = PROC_SUPER_MAGIC, + .checked = false, + }, + [FS__DEBUGFS] = { + .name = "debugfs", + .mounts = debugfs__known_mountpoints, + .magic = DEBUGFS_MAGIC, + .checked = false, + }, + [FS__TRACEFS] = { + .name = "tracefs", + .mounts = tracefs__known_mountpoints, + .magic = TRACEFS_MAGIC, + .checked = false, + }, + [FS__HUGETLBFS] = { + .name = "hugetlbfs", + .mounts = hugetlbfs__known_mountpoints, + .magic = HUGETLBFS_MAGIC, + .checked = false, + }, + [FS__BPF_FS] = { + .name = "bpf", + .mounts = bpf_fs__known_mountpoints, + .magic = BPF_FS_MAGIC, + .checked = false, + }, +}; + +static bool fs__read_mounts(struct fs *fs) +{ + bool found = false; + char type[100]; + FILE *fp; + + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + while (!found && + fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", + fs->path, type) == 2) { + + if (strcmp(type, fs->name) == 0) + found = true; + } + + fclose(fp); + fs->checked = true; + return fs->found = found; +} + +static int fs__valid_mount(const char *fs, long magic) +{ + struct statfs st_fs; + + if (statfs(fs, &st_fs) < 0) + return -ENOENT; + else if ((long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +static bool fs__check_mounts(struct fs *fs) +{ + const char * const *ptr; + + ptr = fs->mounts; + while (*ptr) { + if (fs__valid_mount(*ptr, fs->magic) == 0) { + fs->found = true; + strcpy(fs->path, *ptr); + return true; + } + ptr++; + } + + return false; +} + +static void mem_toupper(char *f, size_t len) +{ + while (len) { + *f = toupper(*f); + f++; + len--; + } +} + +/* + * Check for "NAME_PATH" environment variable to override fs location (for + * testing). This matches the recommendation in Documentation/admin-guide/sysfs-rules.rst + * for SYSFS_PATH. + */ +static bool fs__env_override(struct fs *fs) +{ + char *override_path; + size_t name_len = strlen(fs->name); + /* name + "_PATH" + '\0' */ + char upper_name[name_len + 5 + 1]; + + memcpy(upper_name, fs->name, name_len); + mem_toupper(upper_name, name_len); + strcpy(&upper_name[name_len], "_PATH"); + + override_path = getenv(upper_name); + if (!override_path) + return false; + + fs->found = true; + fs->checked = true; + strncpy(fs->path, override_path, sizeof(fs->path) - 1); + fs->path[sizeof(fs->path) - 1] = '\0'; + return true; +} + +static const char *fs__get_mountpoint(struct fs *fs) +{ + if (fs__env_override(fs)) + return fs->path; + + if (fs__check_mounts(fs)) + return fs->path; + + if (fs__read_mounts(fs)) + return fs->path; + + return NULL; +} + +static const char *fs__mountpoint(int idx) +{ + struct fs *fs = &fs__entries[idx]; + + if (fs->found) + return (const char *)fs->path; + + /* the mount point was already checked for the mount point + * but and did not exist, so return NULL to avoid scanning again. + * This makes the found and not found paths cost equivalent + * in case of multiple calls. + */ + if (fs->checked) + return NULL; + + return fs__get_mountpoint(fs); +} + +static const char *mount_overload(struct fs *fs) +{ + size_t name_len = strlen(fs->name); + /* "PERF_" + name + "_ENVIRONMENT" + '\0' */ + char upper_name[5 + name_len + 12 + 1]; + + snprintf(upper_name, name_len, "PERF_%s_ENVIRONMENT", fs->name); + mem_toupper(upper_name, name_len); + + return getenv(upper_name) ?: *fs->mounts; +} + +static const char *fs__mount(int idx) +{ + struct fs *fs = &fs__entries[idx]; + const char *mountpoint; + + if (fs__mountpoint(idx)) + return (const char *)fs->path; + + mountpoint = mount_overload(fs); + + if (mount(NULL, mountpoint, fs->name, 0, NULL) < 0) + return NULL; + + return fs__check_mounts(fs) ? fs->path : NULL; +} + +#define FS(name, idx) \ +const char *name##__mountpoint(void) \ +{ \ + return fs__mountpoint(idx); \ +} \ + \ +const char *name##__mount(void) \ +{ \ + return fs__mount(idx); \ +} \ + \ +bool name##__configured(void) \ +{ \ + return name##__mountpoint() != NULL; \ +} + +FS(sysfs, FS__SYSFS); +FS(procfs, FS__PROCFS); +FS(debugfs, FS__DEBUGFS); +FS(tracefs, FS__TRACEFS); +FS(hugetlbfs, FS__HUGETLBFS); +FS(bpf_fs, FS__BPF_FS); + +int filename__read_int(const char *filename, int *value) +{ + char line[64]; + int fd = open(filename, O_RDONLY), err = -1; + + if (fd < 0) + return -1; + + if (read(fd, line, sizeof(line)) > 0) { + *value = atoi(line); + err = 0; + } + + close(fd); + return err; +} + +static int filename__read_ull_base(const char *filename, + unsigned long long *value, int base) +{ + char line[64]; + int fd = open(filename, O_RDONLY), err = -1; + + if (fd < 0) + return -1; + + if (read(fd, line, sizeof(line)) > 0) { + *value = strtoull(line, NULL, base); + if (*value != ULLONG_MAX) + err = 0; + } + + close(fd); + return err; +} + +/* + * Parses @value out of @filename with strtoull. + * By using 16 for base to treat the number as hex. + */ +int filename__read_xll(const char *filename, unsigned long long *value) +{ + return filename__read_ull_base(filename, value, 16); +} + +/* + * Parses @value out of @filename with strtoull. + * By using 0 for base, the strtoull detects the + * base automatically (see man strtoull). + */ +int filename__read_ull(const char *filename, unsigned long long *value) +{ + return filename__read_ull_base(filename, value, 0); +} + +#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ + +int filename__read_str(const char *filename, char **buf, size_t *sizep) +{ + size_t size = 0, alloc_size = 0; + void *bf = NULL, *nbf; + int fd, n, err = 0; + char sbuf[STRERR_BUFSIZE]; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -errno; + + do { + if (size == alloc_size) { + alloc_size += BUFSIZ; + nbf = realloc(bf, alloc_size); + if (!nbf) { + err = -ENOMEM; + break; + } + + bf = nbf; + } + + n = read(fd, bf + size, alloc_size - size); + if (n < 0) { + if (size) { + pr_warning("read failed %d: %s\n", errno, + strerror_r(errno, sbuf, sizeof(sbuf))); + err = 0; + } else + err = -errno; + + break; + } + + size += n; + } while (n > 0); + + if (!err) { + *sizep = size; + *buf = bf; + } else + free(bf); + + close(fd); + return err; +} + +int filename__write_int(const char *filename, int value) +{ + int fd = open(filename, O_WRONLY), err = -1; + char buf[64]; + + if (fd < 0) + return err; + + sprintf(buf, "%d", value); + if (write(fd, buf, sizeof(buf)) == sizeof(buf)) + err = 0; + + close(fd); + return err; +} + +int procfs__read_str(const char *entry, char **buf, size_t *sizep) +{ + char path[PATH_MAX]; + const char *procfs = procfs__mountpoint(); + + if (!procfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", procfs, entry); + + return filename__read_str(path, buf, sizep); +} + +static int sysfs__read_ull_base(const char *entry, + unsigned long long *value, int base) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + + return filename__read_ull_base(path, value, base); +} + +int sysfs__read_xll(const char *entry, unsigned long long *value) +{ + return sysfs__read_ull_base(entry, value, 16); +} + +int sysfs__read_ull(const char *entry, unsigned long long *value) +{ + return sysfs__read_ull_base(entry, value, 0); +} + +int sysfs__read_int(const char *entry, int *value) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + + return filename__read_int(path, value); +} + +int sysfs__read_str(const char *entry, char **buf, size_t *sizep) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + + return filename__read_str(path, buf, sizep); +} + +int sysfs__read_bool(const char *entry, bool *value) +{ + char *buf; + size_t size; + int ret; + + ret = sysfs__read_str(entry, &buf, &size); + if (ret < 0) + return ret; + + switch (buf[0]) { + case '1': + case 'y': + case 'Y': + *value = true; + break; + case '0': + case 'n': + case 'N': + *value = false; + break; + default: + ret = -1; + } + + free(buf); + + return ret; +} +int sysctl__read_int(const char *sysctl, int *value) +{ + char path[PATH_MAX]; + const char *procfs = procfs__mountpoint(); + + if (!procfs) + return -1; + + snprintf(path, sizeof(path), "%s/sys/%s", procfs, sysctl); + + return filename__read_int(path, value); +} + +int sysfs__write_int(const char *entry, int value) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX) + return -1; + + return filename__write_int(path, value); +} diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h new file mode 100644 index 000000000..3b70003e7 --- /dev/null +++ b/tools/lib/api/fs/fs.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __API_FS__ +#define __API_FS__ + +#include <stdbool.h> +#include <unistd.h> + +/* + * On most systems <limits.h> would have given us this, but not on some systems + * (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#define FS(name) \ + const char *name##__mountpoint(void); \ + const char *name##__mount(void); \ + bool name##__configured(void); \ + +/* + * The xxxx__mountpoint() entry points find the first match mount point for each + * filesystems listed below, where xxxx is the filesystem type. + * + * The interface is as follows: + * + * - If a mount point is found on first call, it is cached and used for all + * subsequent calls. + * + * - If a mount point is not found, NULL is returned on first call and all + * subsequent calls. + */ +FS(sysfs) +FS(procfs) +FS(debugfs) +FS(tracefs) +FS(hugetlbfs) +FS(bpf_fs) + +#undef FS + + +int filename__read_int(const char *filename, int *value); +int filename__read_ull(const char *filename, unsigned long long *value); +int filename__read_xll(const char *filename, unsigned long long *value); +int filename__read_str(const char *filename, char **buf, size_t *sizep); + +int filename__write_int(const char *filename, int value); + +int procfs__read_str(const char *entry, char **buf, size_t *sizep); + +int sysctl__read_int(const char *sysctl, int *value); +int sysfs__read_int(const char *entry, int *value); +int sysfs__read_ull(const char *entry, unsigned long long *value); +int sysfs__read_xll(const char *entry, unsigned long long *value); +int sysfs__read_str(const char *entry, char **buf, size_t *sizep); +int sysfs__read_bool(const char *entry, bool *value); + +int sysfs__write_int(const char *entry, int value); +#endif /* __API_FS__ */ diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c new file mode 100644 index 000000000..5afb11b30 --- /dev/null +++ b/tools/lib/api/fs/tracing_path.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <linux/string.h> +#include <errno.h> +#include <unistd.h> +#include "fs.h" + +#include "tracing_path.h" + +static char tracing_mnt[PATH_MAX] = "/sys/kernel/debug"; +static char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing"; +static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events"; + +static void __tracing_path_set(const char *tracing, const char *mountpoint) +{ + snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint); + snprintf(tracing_path, sizeof(tracing_path), "%s/%s", + mountpoint, tracing); + snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", + mountpoint, tracing, "events"); +} + +static const char *tracing_path_tracefs_mount(void) +{ + const char *mnt; + + mnt = tracefs__mount(); + if (!mnt) + return NULL; + + __tracing_path_set("", mnt); + + return tracing_path; +} + +static const char *tracing_path_debugfs_mount(void) +{ + const char *mnt; + + mnt = debugfs__mount(); + if (!mnt) + return NULL; + + __tracing_path_set("tracing/", mnt); + + return tracing_path; +} + +const char *tracing_path_mount(void) +{ + const char *mnt; + + mnt = tracing_path_tracefs_mount(); + if (mnt) + return mnt; + + mnt = tracing_path_debugfs_mount(); + + return mnt; +} + +void tracing_path_set(const char *mntpt) +{ + __tracing_path_set("tracing/", mntpt); +} + +char *get_tracing_file(const char *name) +{ + char *file; + + if (asprintf(&file, "%s/%s", tracing_path_mount(), name) < 0) + return NULL; + + return file; +} + +void put_tracing_file(char *file) +{ + free(file); +} + +char *get_events_file(const char *name) +{ + char *file; + + if (asprintf(&file, "%s/events/%s", tracing_path_mount(), name) < 0) + return NULL; + + return file; +} + +void put_events_file(char *file) +{ + free(file); +} + +DIR *tracing_events__opendir(void) +{ + DIR *dir = NULL; + char *path = get_tracing_file("events"); + + if (path) { + dir = opendir(path); + put_events_file(path); + } + + return dir; +} + +int tracing_path__strerror_open_tp(int err, char *buf, size_t size, + const char *sys, const char *name) +{ + char sbuf[128]; + char filename[PATH_MAX]; + + snprintf(filename, PATH_MAX, "%s/%s", sys, name ?: "*"); + + switch (err) { + case ENOENT: + /* + * We will get here if we can't find the tracepoint, but one of + * debugfs or tracefs is configured, which means you probably + * want some tracepoint which wasn't compiled in your kernel. + * - jirka + */ + if (debugfs__configured() || tracefs__configured()) { + /* sdt markers */ + if (!strncmp(filename, "sdt_", 4)) { + snprintf(buf, size, + "Error:\tFile %s/%s not found.\n" + "Hint:\tSDT event cannot be directly recorded on.\n" + "\tPlease first use 'perf probe %s:%s' before recording it.\n", + tracing_events_path, filename, sys, name); + } else { + snprintf(buf, size, + "Error:\tFile %s/%s not found.\n" + "Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n", + tracing_events_path, filename); + } + break; + } + snprintf(buf, size, "%s", + "Error:\tUnable to find debugfs/tracefs\n" + "Hint:\tWas your kernel compiled with debugfs/tracefs support?\n" + "Hint:\tIs the debugfs/tracefs filesystem mounted?\n" + "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); + break; + case EACCES: { + snprintf(buf, size, + "Error:\tNo permissions to read %s/%s\n" + "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", + tracing_events_path, filename, tracing_path_mount()); + } + break; + default: + snprintf(buf, size, "%s", str_error_r(err, sbuf, sizeof(sbuf))); + break; + } + + return 0; +} diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h new file mode 100644 index 000000000..a19136b08 --- /dev/null +++ b/tools/lib/api/fs/tracing_path.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __API_FS_TRACING_PATH_H +#define __API_FS_TRACING_PATH_H + +#include <linux/types.h> +#include <dirent.h> + +DIR *tracing_events__opendir(void); + +void tracing_path_set(const char *mountpoint); +const char *tracing_path_mount(void); + +char *get_tracing_file(const char *name); +void put_tracing_file(char *file); + +char *get_events_file(const char *name); +void put_events_file(char *file); + +#define zput_events_file(ptr) ({ free(*ptr); *ptr = NULL; }) + +int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name); +#endif /* __API_FS_TRACING_PATH_H */ diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c new file mode 100644 index 000000000..38748b0e3 --- /dev/null +++ b/tools/lib/bitmap.c @@ -0,0 +1,75 @@ +/* + * From lib/bitmap.c + * Helper functions for bitmap.h. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include <linux/bitmap.h> + +int __bitmap_weight(const unsigned long *bitmap, int bits) +{ + int k, w = 0, lim = bits/BITS_PER_LONG; + + for (k = 0; k < lim; k++) + w += hweight_long(bitmap[k]); + + if (bits % BITS_PER_LONG) + w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits)); + + return w; +} + +void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] | bitmap2[k]; +} + +size_t bitmap_scnprintf(unsigned long *bitmap, int nbits, + char *buf, size_t size) +{ + /* current bit is 'cur', most recently seen range is [rbot, rtop] */ + int cur, rbot, rtop; + bool first = true; + size_t ret = 0; + + rbot = cur = find_first_bit(bitmap, nbits); + while (cur < nbits) { + rtop = cur; + cur = find_next_bit(bitmap, nbits, cur + 1); + if (cur < nbits && cur <= rtop + 1) + continue; + + if (!first) + ret += scnprintf(buf + ret, size - ret, ","); + + first = false; + + ret += scnprintf(buf + ret, size - ret, "%d", rbot); + if (rbot < rtop) + ret += scnprintf(buf + ret, size - ret, "-%d", rtop); + + rbot = cur; + } + return ret; +} + +int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) +{ + unsigned int k; + unsigned int lim = bits/BITS_PER_LONG; + unsigned long result = 0; + + for (k = 0; k < lim; k++) + result |= (dst[k] = bitmap1[k] & bitmap2[k]); + if (bits % BITS_PER_LONG) + result |= (dst[k] = bitmap1[k] & bitmap2[k] & + BITMAP_LAST_WORD_MASK(bits)); + return result != 0; +} diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore new file mode 100644 index 000000000..f81e549dd --- /dev/null +++ b/tools/lib/bpf/.gitignore @@ -0,0 +1,2 @@ +libbpf_version.h +FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build new file mode 100644 index 000000000..6eb9bacd1 --- /dev/null +++ b/tools/lib/bpf/Build @@ -0,0 +1 @@ +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile new file mode 100644 index 000000000..6f57d3844 --- /dev/null +++ b/tools/lib/bpf/Makefile @@ -0,0 +1,221 @@ +# SPDX-License-Identifier: GPL-2.0 +# Most of this file is copied from tools/lib/traceevent/Makefile + +BPF_VERSION = 0 +BPF_PATCHLEVEL = 0 +BPF_EXTRAVERSION = 1 + +MAKEFLAGS += --no-print-directory + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) + +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +include $(srctree)/tools/scripts/Makefile.arch + +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= /usr/local +libdir = $(prefix)/$(libdir_relative) +man_dir = $(prefix)/share/man +man_dir_SQ = '$(subst ','\'',$(man_dir))' + +export man_dir man_dir_SQ INSTALL +export DESTDIR DESTDIR_SQ + +include ../../scripts/Makefile.include + +# copy a bit from Linux kbuild + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +FEATURE_USER = .libbpf +FEATURE_TESTS = libelf libelf-mmap bpf reallocarray +FEATURE_DISPLAY = libelf bpf + +INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf +FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) + +check_feat := 1 +NON_CHECK_FEAT_TARGETS := clean TAGS tags cscope help +ifdef MAKECMDGOALS +ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) + check_feat := 0 +endif +endif + +ifeq ($(check_feat),1) +ifeq ($(FEATURES_DUMP),) +include $(srctree)/tools/build/Makefile.feature +else +include $(FEATURES_DUMP) +endif +endif + +export prefix libdir src obj + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) +libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) + +LIB_FILE = libbpf.a libbpf.so + +VERSION = $(BPF_VERSION) +PATCHLEVEL = $(BPF_PATCHLEVEL) +EXTRAVERSION = $(BPF_EXTRAVERSION) + +OBJ = $@ +N = + +LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) + +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif + +ifeq ($(feature-libelf-mmap), 1) + override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT +endif + +ifeq ($(feature-reallocarray), 0) + override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY +endif + +# Append required CFLAGS +override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += -Werror -Wall +override CFLAGS += -fPIC +override CFLAGS += $(INCLUDES) + +ifeq ($(VERBOSE),1) + Q = +else + Q = @ +endif + +# Disable command line variables (CFLAGS) override from top +# level Makefile (perf), otherwise build Makefile will get +# the same command line setup. +MAKEOVERRIDES= + +all: + +export srctree OUTPUT CC LD CFLAGS V +include $(srctree)/tools/build/Makefile.include + +BPF_IN := $(OUTPUT)libbpf-in.o +LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) + +CMD_TARGETS = $(LIB_FILE) + +TARGETS = $(CMD_TARGETS) + +all: fixdep + $(Q)$(MAKE) all_cmd + +all_cmd: $(CMD_TARGETS) + +$(BPF_IN): force elfdep bpfdep + @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ + (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true + @(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \ + (diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true + @(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \ + (diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true + @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \ + (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true + $(Q)$(MAKE) $(build)=libbpf + +$(OUTPUT)libbpf.so: $(BPF_IN) + $(QUIET_LINK)$(CC) --shared $^ -o $@ + +$(OUTPUT)libbpf.a: $(BPF_IN) + $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' +endef + +install_lib: all_cmd + $(call QUIET_INSTALL, $(LIB_FILE)) \ + $(call do_install,$(LIB_FILE),$(libdir_SQ)) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ + $(call do_install,libbpf.h,$(prefix)/include/bpf,644); + $(call do_install,btf.h,$(prefix)/include/bpf,644); + +install: install_lib + +### Cleaning rules + +config-clean: + $(call QUIET_CLEAN, config) + $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null + +clean: + $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \ + $(RM) LIBBPF-CFLAGS + $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf + + + +PHONY += force elfdep bpfdep +force: + +elfdep: + @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi + +bpfdep: + @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable so we can use it in if_changed and friends. +.PHONY: $(PHONY) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c new file mode 100644 index 000000000..f28ae6a68 --- /dev/null +++ b/tools/lib/bpf/bpf.c @@ -0,0 +1,691 @@ +// SPDX-License-Identifier: LGPL-2.1 + +/* + * common eBPF ELF operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + */ + +#include <stdlib.h> +#include <memory.h> +#include <unistd.h> +#include <asm/unistd.h> +#include <linux/bpf.h> +#include "bpf.h" +#include "libbpf.h" +#include "nlattr.h" +#include <linux/rtnetlink.h> +#include <linux/if_link.h> +#include <sys/socket.h> +#include <errno.h> + +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + +/* + * When building perf, unistd.h is overridden. __NR_bpf is + * required to be defined explicitly. + */ +#ifndef __NR_bpf +# if defined(__i386__) +# define __NR_bpf 357 +# elif defined(__x86_64__) +# define __NR_bpf 321 +# elif defined(__aarch64__) +# define __NR_bpf 280 +# elif defined(__sparc__) +# define __NR_bpf 349 +# elif defined(__s390__) +# define __NR_bpf 351 +# elif defined(__arc__) +# define __NR_bpf 280 +# else +# error __NR_bpf not defined. libbpf does not support your arch. +# endif +#endif + +#ifndef min +#define min(x, y) ((x) < (y) ? (x) : (y)) +#endif + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size) +{ + return syscall(__NR_bpf, cmd, attr, size); +} + +static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) +{ + int fd; + + do { + fd = sys_bpf(BPF_PROG_LOAD, attr, size); + } while (fd < 0 && errno == EAGAIN); + + return fd; +} + +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) +{ + __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; + union bpf_attr attr; + int ret; + + memset(&attr, '\0', sizeof(attr)); + + attr.map_type = create_attr->map_type; + attr.key_size = create_attr->key_size; + attr.value_size = create_attr->value_size; + attr.max_entries = create_attr->max_entries; + attr.map_flags = create_attr->map_flags; + memcpy(attr.map_name, create_attr->name, + min(name_len, BPF_OBJ_NAME_LEN - 1)); + attr.numa_node = create_attr->numa_node; + attr.btf_fd = create_attr->btf_fd; + attr.btf_key_type_id = create_attr->btf_key_type_id; + attr.btf_value_type_id = create_attr->btf_value_type_id; + attr.map_ifindex = create_attr->map_ifindex; + attr.inner_map_fd = create_attr->inner_map_fd; + + ret = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + if (ret < 0 && errno == EINVAL && create_attr->name) { + /* Retry the same syscall, but without the name. + * Pre v4.14 kernels don't support map names. + */ + memset(attr.map_name, 0, sizeof(attr.map_name)); + return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + } + return ret; +} + +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node) +{ + struct bpf_create_map_attr map_attr = {}; + + map_attr.name = name; + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + if (node >= 0) { + map_attr.numa_node = node; + map_attr.map_flags |= BPF_F_NUMA_NODE; + } + + return bpf_create_map_xattr(&map_attr); +} + +int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags) +{ + struct bpf_create_map_attr map_attr = {}; + + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + + return bpf_create_map_xattr(&map_attr); +} + +int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags) +{ + struct bpf_create_map_attr map_attr = {}; + + map_attr.name = name; + map_attr.map_type = map_type; + map_attr.map_flags = map_flags; + map_attr.key_size = key_size; + map_attr.value_size = value_size; + map_attr.max_entries = max_entries; + + return bpf_create_map_xattr(&map_attr); +} + +int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags, int node) +{ + __u32 name_len = name ? strlen(name) : 0; + union bpf_attr attr; + + memset(&attr, '\0', sizeof(attr)); + + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = 4; + attr.inner_map_fd = inner_map_fd; + attr.max_entries = max_entries; + attr.map_flags = map_flags; + memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + + if (node >= 0) { + attr.map_flags |= BPF_F_NUMA_NODE; + attr.numa_node = node; + } + + return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} + +int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags) +{ + return bpf_create_map_in_map_node(map_type, name, key_size, + inner_map_fd, max_entries, map_flags, + -1); +} + +int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz) +{ + union bpf_attr attr; + __u32 name_len; + int fd; + + if (!load_attr) + return -EINVAL; + + name_len = load_attr->name ? strlen(load_attr->name) : 0; + + bzero(&attr, sizeof(attr)); + attr.prog_type = load_attr->prog_type; + attr.expected_attach_type = load_attr->expected_attach_type; + attr.insn_cnt = (__u32)load_attr->insns_cnt; + attr.insns = ptr_to_u64(load_attr->insns); + attr.license = ptr_to_u64(load_attr->license); + attr.log_buf = ptr_to_u64(NULL); + attr.log_size = 0; + attr.log_level = 0; + attr.kern_version = load_attr->kern_version; + attr.prog_ifindex = load_attr->prog_ifindex; + memcpy(attr.prog_name, load_attr->name, + min(name_len, BPF_OBJ_NAME_LEN - 1)); + + fd = sys_bpf_prog_load(&attr, sizeof(attr)); + if (fd >= 0 || !log_buf || !log_buf_sz) + return fd; + + /* Try again with log */ + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + attr.log_level = 1; + log_buf[0] = 0; + return sys_bpf_prog_load(&attr, sizeof(attr)); +} + +int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) +{ + struct bpf_load_program_attr load_attr; + + memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); + load_attr.prog_type = type; + load_attr.expected_attach_type = 0; + load_attr.name = NULL; + load_attr.insns = insns; + load_attr.insns_cnt = insns_cnt; + load_attr.license = license; + load_attr.kern_version = kern_version; + + return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); +} + +int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, __u32 prog_flags, const char *license, + __u32 kern_version, char *log_buf, size_t log_buf_sz, + int log_level) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.prog_type = type; + attr.insn_cnt = (__u32)insns_cnt; + attr.insns = ptr_to_u64(insns); + attr.license = ptr_to_u64(license); + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + attr.log_level = log_level; + log_buf[0] = 0; + attr.kern_version = kern_version; + attr.prog_flags = prog_flags; + + return sys_bpf_prog_load(&attr, sizeof(attr)); +} + +int bpf_map_update_elem(int fd, const void *key, const void *value, + __u64 flags) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + attr.flags = flags; + + return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); +} + +int bpf_map_lookup_elem(int fd, const void *key, void *value) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + + return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); +} + +int bpf_map_delete_elem(int fd, const void *key) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + + return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); +} + +int bpf_map_get_next_key(int fd, const void *key, void *next_key) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.next_key = ptr_to_u64(next_key); + + return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); +} + +int bpf_obj_pin(int fd, const char *pathname) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.pathname = ptr_to_u64((void *)pathname); + attr.bpf_fd = fd; + + return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); +} + +int bpf_obj_get(const char *pathname) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.pathname = ptr_to_u64((void *)pathname); + + return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); +} + +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, + unsigned int flags) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + attr.attach_flags = flags; + + return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); +} + +int bpf_prog_detach(int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} + +int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} + +int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, + __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) +{ + union bpf_attr attr; + int ret; + + bzero(&attr, sizeof(attr)); + attr.query.target_fd = target_fd; + attr.query.attach_type = type; + attr.query.query_flags = query_flags; + attr.query.prog_cnt = *prog_cnt; + attr.query.prog_ids = ptr_to_u64(prog_ids); + + ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); + if (attach_flags) + *attach_flags = attr.query.attach_flags; + *prog_cnt = attr.query.prog_cnt; + return ret; +} + +int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, + void *data_out, __u32 *size_out, __u32 *retval, + __u32 *duration) +{ + union bpf_attr attr; + int ret; + + bzero(&attr, sizeof(attr)); + attr.test.prog_fd = prog_fd; + attr.test.data_in = ptr_to_u64(data); + attr.test.data_out = ptr_to_u64(data_out); + attr.test.data_size_in = size; + attr.test.repeat = repeat; + + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + if (size_out) + *size_out = attr.test.data_size_out; + if (retval) + *retval = attr.test.retval; + if (duration) + *duration = attr.test.duration; + return ret; +} + +int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) +{ + union bpf_attr attr; + int err; + + bzero(&attr, sizeof(attr)); + attr.start_id = start_id; + + err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr)); + if (!err) + *next_id = attr.next_id; + + return err; +} + +int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) +{ + union bpf_attr attr; + int err; + + bzero(&attr, sizeof(attr)); + attr.start_id = start_id; + + err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr)); + if (!err) + *next_id = attr.next_id; + + return err; +} + +int bpf_prog_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.prog_id = id; + + return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +int bpf_map_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.map_id = id; + + return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +int bpf_btf_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.btf_id = id; + + return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) +{ + union bpf_attr attr; + int err; + + bzero(&attr, sizeof(attr)); + attr.info.bpf_fd = prog_fd; + attr.info.info_len = *info_len; + attr.info.info = ptr_to_u64(info); + + err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); + if (!err) + *info_len = attr.info.info_len; + + return err; +} + +int bpf_raw_tracepoint_open(const char *name, int prog_fd) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.raw_tracepoint.name = ptr_to_u64(name); + attr.raw_tracepoint.prog_fd = prog_fd; + + return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); +} + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) +{ + struct sockaddr_nl sa; + int sock, seq = 0, len, ret = -1; + char buf[4096]; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + struct nlmsghdr *nh; + struct nlmsgerr *err; + socklen_t addrlen; + int one = 1; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + return -errno; + } + + if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)) < 0) { + fprintf(stderr, "Netlink error reporting not supported\n"); + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + ret = -errno; + goto cleanup; + } + + addrlen = sizeof(sa); + if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { + ret = -errno; + goto cleanup; + } + + if (addrlen != sizeof(sa)) { + ret = -LIBBPF_ERRNO__INTERNAL; + goto cleanup; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + + /* started nested attribute for XDP */ + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | IFLA_XDP; + nla->nla_len = NLA_HDRLEN; + + /* add XDP fd */ + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FD; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len += nla_xdp->nla_len; + + /* if user passed in any flags, add those too */ + if (flags) { + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FLAGS; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); + memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); + nla->nla_len += nla_xdp->nla_len; + } + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + ret = -errno; + goto cleanup; + } + + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + ret = -errno; + goto cleanup; + } + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != sa.nl_pid) { + ret = -LIBBPF_ERRNO__WRNGPID; + goto cleanup; + } + if (nh->nlmsg_seq != seq) { + ret = -LIBBPF_ERRNO__INVSEQ; + goto cleanup; + } + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + ret = err->error; + nla_dump_errormsg(nh); + goto cleanup; + case NLMSG_DONE: + break; + default: + break; + } + } + + ret = 0; + +cleanup: + close(sock); + return ret; +} + +int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, + bool do_log) +{ + union bpf_attr attr = {}; + int fd; + + attr.btf = ptr_to_u64(btf); + attr.btf_size = btf_size; + +retry: + if (do_log && log_buf && log_buf_size) { + attr.btf_log_level = 1; + attr.btf_log_size = log_buf_size; + attr.btf_log_buf = ptr_to_u64(log_buf); + } + + fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); + if (fd == -1 && !do_log && log_buf && log_buf_size) { + do_log = true; + goto retry; + } + + return fd; +} + +int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, + __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, + __u64 *probe_addr) +{ + union bpf_attr attr = {}; + int err; + + attr.task_fd_query.pid = pid; + attr.task_fd_query.fd = fd; + attr.task_fd_query.flags = flags; + attr.task_fd_query.buf = ptr_to_u64(buf); + attr.task_fd_query.buf_len = *buf_len; + + err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr)); + *buf_len = attr.task_fd_query.buf_len; + *prog_id = attr.task_fd_query.prog_id; + *fd_type = attr.task_fd_query.fd_type; + *probe_offset = attr.task_fd_query.probe_offset; + *probe_addr = attr.task_fd_query.probe_addr; + + return err; +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h new file mode 100644 index 000000000..7f2e947d9 --- /dev/null +++ b/tools/lib/bpf/bpf.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ + +/* + * common eBPF ELF operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + */ +#ifndef __BPF_BPF_H +#define __BPF_BPF_H + +#include <linux/bpf.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +struct bpf_create_map_attr { + const char *name; + enum bpf_map_type map_type; + __u32 map_flags; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 numa_node; + __u32 btf_fd; + __u32 btf_key_type_id; + __u32 btf_value_type_id; + __u32 map_ifindex; + __u32 inner_map_fd; +}; + +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node); +int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags); +int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, + int max_entries, __u32 map_flags); +int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags, int node); +int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags); + +struct bpf_load_program_attr { + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + const char *name; + const struct bpf_insn *insns; + size_t insns_cnt; + const char *license; + __u32 kern_version; + __u32 prog_ifindex; +}; + +/* Recommend log buffer size */ +#define BPF_LOG_BUF_SIZE (256 * 1024) +int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz); +int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz); +int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, __u32 prog_flags, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz, int log_level); + +int bpf_map_update_elem(int fd, const void *key, const void *value, + __u64 flags); + +int bpf_map_lookup_elem(int fd, const void *key, void *value); +int bpf_map_delete_elem(int fd, const void *key); +int bpf_map_get_next_key(int fd, const void *key, void *next_key); +int bpf_obj_pin(int fd, const char *pathname); +int bpf_obj_get(const char *pathname); +int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, + unsigned int flags); +int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); +int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); +int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, + void *data_out, __u32 *size_out, __u32 *retval, + __u32 *duration); +int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); +int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); +int bpf_prog_get_fd_by_id(__u32 id); +int bpf_map_get_fd_by_id(__u32 id); +int bpf_btf_get_fd_by_id(__u32 id); +int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); +int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, + __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); +int bpf_raw_tracepoint_open(const char *name, int prog_fd); +int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, + bool do_log); +int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, + __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, + __u64 *probe_addr); +#endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c new file mode 100644 index 000000000..cf94b0770 --- /dev/null +++ b/tools/lib/bpf/btf.c @@ -0,0 +1,395 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* Copyright (c) 2018 Facebook */ + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <linux/err.h> +#include <linux/btf.h> +#include "btf.h" +#include "bpf.h" + +#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); } +#define max(a, b) ((a) > (b) ? (a) : (b)) +#define min(a, b) ((a) < (b) ? (a) : (b)) + +#define BTF_MAX_NR_TYPES 65535 + +#define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \ + ((k) == BTF_KIND_VOLATILE) || \ + ((k) == BTF_KIND_CONST) || \ + ((k) == BTF_KIND_RESTRICT)) + +static struct btf_type btf_void; + +struct btf { + union { + struct btf_header *hdr; + void *data; + }; + struct btf_type **types; + const char *strings; + void *nohdr_data; + __u32 nr_types; + __u32 types_size; + __u32 data_size; + int fd; +}; + +static int btf_add_type(struct btf *btf, struct btf_type *t) +{ + if (btf->types_size - btf->nr_types < 2) { + struct btf_type **new_types; + __u32 expand_by, new_size; + + if (btf->types_size == BTF_MAX_NR_TYPES) + return -E2BIG; + + expand_by = max(btf->types_size >> 2, 16); + new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); + + new_types = realloc(btf->types, sizeof(*new_types) * new_size); + if (!new_types) + return -ENOMEM; + + if (btf->nr_types == 0) + new_types[0] = &btf_void; + + btf->types = new_types; + btf->types_size = new_size; + } + + btf->types[++(btf->nr_types)] = t; + + return 0; +} + +static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) +{ + const struct btf_header *hdr = btf->hdr; + __u32 meta_left; + + if (btf->data_size < sizeof(struct btf_header)) { + elog("BTF header not found\n"); + return -EINVAL; + } + + if (hdr->magic != BTF_MAGIC) { + elog("Invalid BTF magic:%x\n", hdr->magic); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + elog("Unsupported BTF version:%u\n", hdr->version); + return -ENOTSUP; + } + + if (hdr->flags) { + elog("Unsupported BTF flags:%x\n", hdr->flags); + return -ENOTSUP; + } + + meta_left = btf->data_size - sizeof(*hdr); + if (!meta_left) { + elog("BTF has no data\n"); + return -EINVAL; + } + + if (meta_left < hdr->type_off) { + elog("Invalid BTF type section offset:%u\n", hdr->type_off); + return -EINVAL; + } + + if (meta_left < hdr->str_off) { + elog("Invalid BTF string section offset:%u\n", hdr->str_off); + return -EINVAL; + } + + if (hdr->type_off >= hdr->str_off) { + elog("BTF type section offset >= string section offset. No type?\n"); + return -EINVAL; + } + + if (hdr->type_off & 0x02) { + elog("BTF type section is not aligned to 4 bytes\n"); + return -EINVAL; + } + + btf->nohdr_data = btf->hdr + 1; + + return 0; +} + +static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) +{ + const struct btf_header *hdr = btf->hdr; + const char *start = btf->nohdr_data + hdr->str_off; + const char *end = start + btf->hdr->str_len; + + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || + start[0] || end[-1]) { + elog("Invalid BTF string section\n"); + return -EINVAL; + } + + btf->strings = start; + + return 0; +} + +static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) +{ + struct btf_header *hdr = btf->hdr; + void *nohdr_data = btf->nohdr_data; + void *next_type = nohdr_data + hdr->type_off; + void *end_type = nohdr_data + hdr->str_off; + + while (next_type < end_type) { + struct btf_type *t = next_type; + __u16 vlen = BTF_INFO_VLEN(t->info); + int err; + + next_type += sizeof(*t); + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + next_type += sizeof(int); + break; + case BTF_KIND_ARRAY: + next_type += sizeof(struct btf_array); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + next_type += vlen * sizeof(struct btf_member); + break; + case BTF_KIND_ENUM: + next_type += vlen * sizeof(struct btf_enum); + break; + case BTF_KIND_TYPEDEF: + case BTF_KIND_PTR: + case BTF_KIND_FWD: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + break; + default: + elog("Unsupported BTF_KIND:%u\n", + BTF_INFO_KIND(t->info)); + return -EINVAL; + } + + err = btf_add_type(btf, t); + if (err) + return err; + } + + return 0; +} + +const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) +{ + if (type_id > btf->nr_types) + return NULL; + + return btf->types[type_id]; +} + +static bool btf_type_is_void(const struct btf_type *t) +{ + return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD; +} + +static bool btf_type_is_void_or_null(const struct btf_type *t) +{ + return !t || btf_type_is_void(t); +} + +static __s64 btf_type_size(const struct btf_type *t) +{ + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + return t->size; + case BTF_KIND_PTR: + return sizeof(void *); + default: + return -EINVAL; + } +} + +#define MAX_RESOLVE_DEPTH 32 + +__s64 btf__resolve_size(const struct btf *btf, __u32 type_id) +{ + const struct btf_array *array; + const struct btf_type *t; + __u32 nelems = 1; + __s64 size = -1; + int i; + + t = btf__type_by_id(btf, type_id); + for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); + i++) { + size = btf_type_size(t); + if (size >= 0) + break; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + type_id = t->type; + break; + case BTF_KIND_ARRAY: + array = (const struct btf_array *)(t + 1); + if (nelems && array->nelems > UINT32_MAX / nelems) + return -E2BIG; + nelems *= array->nelems; + type_id = array->type; + break; + default: + return -EINVAL; + } + + t = btf__type_by_id(btf, type_id); + } + + if (size < 0) + return -EINVAL; + + if (nelems && size > UINT32_MAX / nelems) + return -E2BIG; + + return nelems * size; +} + +int btf__resolve_type(const struct btf *btf, __u32 type_id) +{ + const struct btf_type *t; + int depth = 0; + + t = btf__type_by_id(btf, type_id); + while (depth < MAX_RESOLVE_DEPTH && + !btf_type_is_void_or_null(t) && + IS_MODIFIER(BTF_INFO_KIND(t->info))) { + type_id = t->type; + t = btf__type_by_id(btf, type_id); + depth++; + } + + if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t)) + return -EINVAL; + + return type_id; +} + +__s32 btf__find_by_name(const struct btf *btf, const char *type_name) +{ + __u32 i; + + if (!strcmp(type_name, "void")) + return 0; + + for (i = 1; i <= btf->nr_types; i++) { + const struct btf_type *t = btf->types[i]; + const char *name = btf__name_by_offset(btf, t->name_off); + + if (name && !strcmp(type_name, name)) + return i; + } + + return -ENOENT; +} + +void btf__free(struct btf *btf) +{ + if (!btf) + return; + + if (btf->fd != -1) + close(btf->fd); + + free(btf->data); + free(btf->types); + free(btf); +} + +struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +{ + __u32 log_buf_size = 0; + char *log_buf = NULL; + struct btf *btf; + int err; + + btf = calloc(1, sizeof(struct btf)); + if (!btf) + return ERR_PTR(-ENOMEM); + + btf->fd = -1; + + if (err_log) { + log_buf = malloc(BPF_LOG_BUF_SIZE); + if (!log_buf) { + err = -ENOMEM; + goto done; + } + *log_buf = 0; + log_buf_size = BPF_LOG_BUF_SIZE; + } + + btf->data = malloc(size); + if (!btf->data) { + err = -ENOMEM; + goto done; + } + + memcpy(btf->data, data, size); + btf->data_size = size; + + btf->fd = bpf_load_btf(btf->data, btf->data_size, + log_buf, log_buf_size, false); + + if (btf->fd == -1) { + err = -errno; + elog("Error loading BTF: %s(%d)\n", strerror(errno), errno); + if (log_buf && *log_buf) + elog("%s\n", log_buf); + goto done; + } + + err = btf_parse_hdr(btf, err_log); + if (err) + goto done; + + err = btf_parse_str_sec(btf, err_log); + if (err) + goto done; + + err = btf_parse_type_sec(btf, err_log); + +done: + free(log_buf); + + if (err) { + btf__free(btf); + return ERR_PTR(err); + } + + return btf; +} + +int btf__fd(const struct btf *btf) +{ + return btf->fd; +} + +const char *btf__name_by_offset(const struct btf *btf, __u32 offset) +{ + if (offset < btf->hdr->str_len) + return &btf->strings[offset]; + else + return NULL; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h new file mode 100644 index 000000000..4897e0724 --- /dev/null +++ b/tools/lib/bpf/btf.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* Copyright (c) 2018 Facebook */ + +#ifndef __BPF_BTF_H +#define __BPF_BTF_H + +#include <linux/types.h> + +#define BTF_ELF_SEC ".BTF" + +struct btf; +struct btf_type; + +typedef int (*btf_print_fn_t)(const char *, ...) + __attribute__((format(printf, 1, 2))); + +void btf__free(struct btf *btf); +struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +__s32 btf__find_by_name(const struct btf *btf, const char *type_name); +const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); +__s64 btf__resolve_size(const struct btf *btf, __u32 type_id); +int btf__resolve_type(const struct btf *btf, __u32 type_id); +int btf__fd(const struct btf *btf); +const char *btf__name_by_offset(const struct btf *btf, __u32 offset); + +#endif diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c new file mode 100644 index 000000000..249fa8d73 --- /dev/null +++ b/tools/lib/bpf/libbpf.c @@ -0,0 +1,2431 @@ +// SPDX-License-Identifier: LGPL-2.1 + +/* + * Common eBPF ELF object loading operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + * Copyright (C) 2017 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <libgen.h> +#include <inttypes.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <perf-sys.h> +#include <asm/unistd.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/list.h> +#include <linux/limits.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/vfs.h> +#include <tools/libc_compat.h> +#include <libelf.h> +#include <gelf.h> + +#include "libbpf.h" +#include "bpf.h" +#include "btf.h" +#include "str_error.h" + +#ifndef EM_BPF +#define EM_BPF 247 +#endif + +#ifndef BPF_FS_MAGIC +#define BPF_FS_MAGIC 0xcafe4a11 +#endif + +#define __printf(a, b) __attribute__((format(printf, a, b))) + +__printf(1, 2) +static int __base_pr(const char *format, ...) +{ + va_list args; + int err; + + va_start(args, format); + err = vfprintf(stderr, format, args); + va_end(args); + return err; +} + +static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr; +static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr; +static __printf(1, 2) libbpf_print_fn_t __pr_debug; + +#define __pr(func, fmt, ...) \ +do { \ + if ((func)) \ + (func)("libbpf: " fmt, ##__VA_ARGS__); \ +} while (0) + +#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__) + +void libbpf_set_print(libbpf_print_fn_t warn, + libbpf_print_fn_t info, + libbpf_print_fn_t debug) +{ + __pr_warning = warn; + __pr_info = info; + __pr_debug = debug; +} + +#define STRERR_BUFSIZE 128 + +#define CHECK_ERR(action, err, out) do { \ + err = action; \ + if (err) \ + goto out; \ +} while(0) + + +/* Copied from tools/perf/util/util.h */ +#ifndef zfree +# define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) +#endif + +#ifndef zclose +# define zclose(fd) ({ \ + int ___err = 0; \ + if ((fd) >= 0) \ + ___err = close((fd)); \ + fd = -1; \ + ___err; }) +#endif + +#ifdef HAVE_LIBELF_MMAP_SUPPORT +# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP +#else +# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ +#endif + +/* + * bpf_prog should be a better name but it has been used in + * linux/filter.h. + */ +struct bpf_program { + /* Index in elf obj file, for relocation use. */ + int idx; + char *name; + int prog_ifindex; + char *section_name; + struct bpf_insn *insns; + size_t insns_cnt, main_prog_cnt; + enum bpf_prog_type type; + + struct reloc_desc { + enum { + RELO_LD64, + RELO_CALL, + } type; + int insn_idx; + union { + int map_idx; + int text_off; + }; + } *reloc_desc; + int nr_reloc; + + struct { + int nr; + int *fds; + } instances; + bpf_program_prep_t preprocessor; + + struct bpf_object *obj; + void *priv; + bpf_program_clear_priv_t clear_priv; + + enum bpf_attach_type expected_attach_type; +}; + +struct bpf_map { + int fd; + char *name; + size_t offset; + int map_ifindex; + struct bpf_map_def def; + __u32 btf_key_type_id; + __u32 btf_value_type_id; + void *priv; + bpf_map_clear_priv_t clear_priv; +}; + +static LIST_HEAD(bpf_objects_list); + +struct bpf_object { + char license[64]; + u32 kern_version; + + struct bpf_program *programs; + size_t nr_programs; + struct bpf_map *maps; + size_t nr_maps; + + bool loaded; + bool has_pseudo_calls; + + /* + * Information when doing elf related work. Only valid if fd + * is valid. + */ + struct { + int fd; + void *obj_buf; + size_t obj_buf_sz; + Elf *elf; + GElf_Ehdr ehdr; + Elf_Data *symbols; + size_t strtabidx; + struct { + GElf_Shdr shdr; + Elf_Data *data; + } *reloc; + int nr_reloc; + int maps_shndx; + int text_shndx; + } efile; + /* + * All loaded bpf_object is linked in a list, which is + * hidden to caller. bpf_objects__<func> handlers deal with + * all objects. + */ + struct list_head list; + + struct btf *btf; + + void *priv; + bpf_object_clear_priv_t clear_priv; + + char path[]; +}; +#define obj_elf_valid(o) ((o)->efile.elf) + +static void bpf_program__unload(struct bpf_program *prog) +{ + int i; + + if (!prog) + return; + + /* + * If the object is opened but the program was never loaded, + * it is possible that prog->instances.nr == -1. + */ + if (prog->instances.nr > 0) { + for (i = 0; i < prog->instances.nr; i++) + zclose(prog->instances.fds[i]); + } else if (prog->instances.nr != -1) { + pr_warning("Internal error: instances.nr is %d\n", + prog->instances.nr); + } + + prog->instances.nr = -1; + zfree(&prog->instances.fds); +} + +static void bpf_program__exit(struct bpf_program *prog) +{ + if (!prog) + return; + + if (prog->clear_priv) + prog->clear_priv(prog, prog->priv); + + prog->priv = NULL; + prog->clear_priv = NULL; + + bpf_program__unload(prog); + zfree(&prog->name); + zfree(&prog->section_name); + zfree(&prog->insns); + zfree(&prog->reloc_desc); + + prog->nr_reloc = 0; + prog->insns_cnt = 0; + prog->idx = -1; +} + +static int +bpf_program__init(void *data, size_t size, char *section_name, int idx, + struct bpf_program *prog) +{ + if (size < sizeof(struct bpf_insn)) { + pr_warning("corrupted section '%s'\n", section_name); + return -EINVAL; + } + + bzero(prog, sizeof(*prog)); + + prog->section_name = strdup(section_name); + if (!prog->section_name) { + pr_warning("failed to alloc name for prog under section(%d) %s\n", + idx, section_name); + goto errout; + } + + prog->insns = malloc(size); + if (!prog->insns) { + pr_warning("failed to alloc insns for prog under section %s\n", + section_name); + goto errout; + } + prog->insns_cnt = size / sizeof(struct bpf_insn); + memcpy(prog->insns, data, + prog->insns_cnt * sizeof(struct bpf_insn)); + prog->idx = idx; + prog->instances.fds = NULL; + prog->instances.nr = -1; + prog->type = BPF_PROG_TYPE_KPROBE; + + return 0; +errout: + bpf_program__exit(prog); + return -ENOMEM; +} + +static int +bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, + char *section_name, int idx) +{ + struct bpf_program prog, *progs; + int nr_progs, err; + + err = bpf_program__init(data, size, section_name, idx, &prog); + if (err) + return err; + + progs = obj->programs; + nr_progs = obj->nr_programs; + + progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0])); + if (!progs) { + /* + * In this case the original obj->programs + * is still valid, so don't need special treat for + * bpf_close_object(). + */ + pr_warning("failed to alloc a new program under section '%s'\n", + section_name); + bpf_program__exit(&prog); + return -ENOMEM; + } + + pr_debug("found program %s\n", prog.section_name); + obj->programs = progs; + obj->nr_programs = nr_progs + 1; + prog.obj = obj; + progs[nr_progs] = prog; + return 0; +} + +static int +bpf_object__init_prog_names(struct bpf_object *obj) +{ + Elf_Data *symbols = obj->efile.symbols; + struct bpf_program *prog; + size_t pi, si; + + for (pi = 0; pi < obj->nr_programs; pi++) { + const char *name = NULL; + + prog = &obj->programs[pi]; + + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; + si++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, si, &sym)) + continue; + if (sym.st_shndx != prog->idx) + continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL) + continue; + + name = elf_strptr(obj->efile.elf, + obj->efile.strtabidx, + sym.st_name); + if (!name) { + pr_warning("failed to get sym name string for prog %s\n", + prog->section_name); + return -LIBBPF_ERRNO__LIBELF; + } + } + + if (!name && prog->idx == obj->efile.text_shndx) + name = ".text"; + + if (!name) { + pr_warning("failed to find sym for prog %s\n", + prog->section_name); + return -EINVAL; + } + + prog->name = strdup(name); + if (!prog->name) { + pr_warning("failed to allocate memory for prog sym %s\n", + name); + return -ENOMEM; + } + } + + return 0; +} + +static struct bpf_object *bpf_object__new(const char *path, + void *obj_buf, + size_t obj_buf_sz) +{ + struct bpf_object *obj; + + obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); + if (!obj) { + pr_warning("alloc memory failed for %s\n", path); + return ERR_PTR(-ENOMEM); + } + + strcpy(obj->path, path); + obj->efile.fd = -1; + + /* + * Caller of this function should also calls + * bpf_object__elf_finish() after data collection to return + * obj_buf to user. If not, we should duplicate the buffer to + * avoid user freeing them before elf finish. + */ + obj->efile.obj_buf = obj_buf; + obj->efile.obj_buf_sz = obj_buf_sz; + obj->efile.maps_shndx = -1; + + obj->loaded = false; + + INIT_LIST_HEAD(&obj->list); + list_add(&obj->list, &bpf_objects_list); + return obj; +} + +static void bpf_object__elf_finish(struct bpf_object *obj) +{ + if (!obj_elf_valid(obj)) + return; + + if (obj->efile.elf) { + elf_end(obj->efile.elf); + obj->efile.elf = NULL; + } + obj->efile.symbols = NULL; + + zfree(&obj->efile.reloc); + obj->efile.nr_reloc = 0; + zclose(obj->efile.fd); + obj->efile.obj_buf = NULL; + obj->efile.obj_buf_sz = 0; +} + +static int bpf_object__elf_init(struct bpf_object *obj) +{ + int err = 0; + GElf_Ehdr *ep; + + if (obj_elf_valid(obj)) { + pr_warning("elf init: internal error\n"); + return -LIBBPF_ERRNO__LIBELF; + } + + if (obj->efile.obj_buf_sz > 0) { + /* + * obj_buf should have been validated by + * bpf_object__open_buffer(). + */ + obj->efile.elf = elf_memory(obj->efile.obj_buf, + obj->efile.obj_buf_sz); + } else { + obj->efile.fd = open(obj->path, O_RDONLY); + if (obj->efile.fd < 0) { + char errmsg[STRERR_BUFSIZE]; + char *cp = str_error(errno, errmsg, sizeof(errmsg)); + + pr_warning("failed to open %s: %s\n", obj->path, cp); + return -errno; + } + + obj->efile.elf = elf_begin(obj->efile.fd, + LIBBPF_ELF_C_READ_MMAP, + NULL); + } + + if (!obj->efile.elf) { + pr_warning("failed to open %s as ELF file\n", + obj->path); + err = -LIBBPF_ERRNO__LIBELF; + goto errout; + } + + if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { + pr_warning("failed to get EHDR from %s\n", + obj->path); + err = -LIBBPF_ERRNO__FORMAT; + goto errout; + } + ep = &obj->efile.ehdr; + + /* Old LLVM set e_machine to EM_NONE */ + if ((ep->e_type != ET_REL) || (ep->e_machine && (ep->e_machine != EM_BPF))) { + pr_warning("%s is not an eBPF object file\n", + obj->path); + err = -LIBBPF_ERRNO__FORMAT; + goto errout; + } + + return 0; +errout: + bpf_object__elf_finish(obj); + return err; +} + +static int +bpf_object__check_endianness(struct bpf_object *obj) +{ + static unsigned int const endian = 1; + + switch (obj->efile.ehdr.e_ident[EI_DATA]) { + case ELFDATA2LSB: + /* We are big endian, BPF obj is little endian. */ + if (*(unsigned char const *)&endian != 1) + goto mismatch; + break; + + case ELFDATA2MSB: + /* We are little endian, BPF obj is big endian. */ + if (*(unsigned char const *)&endian != 0) + goto mismatch; + break; + default: + return -LIBBPF_ERRNO__ENDIAN; + } + + return 0; + +mismatch: + pr_warning("Error: endianness mismatch.\n"); + return -LIBBPF_ERRNO__ENDIAN; +} + +static int +bpf_object__init_license(struct bpf_object *obj, + void *data, size_t size) +{ + memcpy(obj->license, data, + min(size, sizeof(obj->license) - 1)); + pr_debug("license of %s is %s\n", obj->path, obj->license); + return 0; +} + +static int +bpf_object__init_kversion(struct bpf_object *obj, + void *data, size_t size) +{ + u32 kver; + + if (size != sizeof(kver)) { + pr_warning("invalid kver section in %s\n", obj->path); + return -LIBBPF_ERRNO__FORMAT; + } + memcpy(&kver, data, sizeof(kver)); + obj->kern_version = kver; + pr_debug("kernel version of %s is %x\n", obj->path, + obj->kern_version); + return 0; +} + +static int compare_bpf_map(const void *_a, const void *_b) +{ + const struct bpf_map *a = _a; + const struct bpf_map *b = _b; + + return a->offset - b->offset; +} + +static int +bpf_object__init_maps(struct bpf_object *obj) +{ + int i, map_idx, map_def_sz, nr_maps = 0; + Elf_Scn *scn; + Elf_Data *data; + Elf_Data *symbols = obj->efile.symbols; + + if (obj->efile.maps_shndx < 0) + return -EINVAL; + if (!symbols) + return -EINVAL; + + scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx); + if (scn) + data = elf_getdata(scn, NULL); + if (!scn || !data) { + pr_warning("failed to get Elf_Data from map section %d\n", + obj->efile.maps_shndx); + return -EINVAL; + } + + /* + * Count number of maps. Each map has a name. + * Array of maps is not supported: only the first element is + * considered. + * + * TODO: Detect array of map and report error. + */ + for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, i, &sym)) + continue; + if (sym.st_shndx != obj->efile.maps_shndx) + continue; + nr_maps++; + } + + /* Alloc obj->maps and fill nr_maps. */ + pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, + nr_maps, data->d_size); + + if (!nr_maps) + return 0; + + /* Assume equally sized map definitions */ + map_def_sz = data->d_size / nr_maps; + if (!data->d_size || (data->d_size % nr_maps) != 0) { + pr_warning("unable to determine map definition size " + "section %s, %d maps in %zd bytes\n", + obj->path, nr_maps, data->d_size); + return -EINVAL; + } + + obj->maps = calloc(nr_maps, sizeof(obj->maps[0])); + if (!obj->maps) { + pr_warning("alloc maps for object failed\n"); + return -ENOMEM; + } + obj->nr_maps = nr_maps; + + /* + * fill all fd with -1 so won't close incorrect + * fd (fd=0 is stdin) when failure (zclose won't close + * negative fd)). + */ + for (i = 0; i < nr_maps; i++) + obj->maps[i].fd = -1; + + /* + * Fill obj->maps using data in "maps" section. + */ + for (i = 0, map_idx = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + GElf_Sym sym; + const char *map_name; + struct bpf_map_def *def; + + if (!gelf_getsym(symbols, i, &sym)) + continue; + if (sym.st_shndx != obj->efile.maps_shndx) + continue; + + map_name = elf_strptr(obj->efile.elf, + obj->efile.strtabidx, + sym.st_name); + obj->maps[map_idx].offset = sym.st_value; + if (sym.st_value + map_def_sz > data->d_size) { + pr_warning("corrupted maps section in %s: last map \"%s\" too small\n", + obj->path, map_name); + return -EINVAL; + } + + obj->maps[map_idx].name = strdup(map_name); + if (!obj->maps[map_idx].name) { + pr_warning("failed to alloc map name\n"); + return -ENOMEM; + } + pr_debug("map %d is \"%s\"\n", map_idx, + obj->maps[map_idx].name); + def = (struct bpf_map_def *)(data->d_buf + sym.st_value); + /* + * If the definition of the map in the object file fits in + * bpf_map_def, copy it. Any extra fields in our version + * of bpf_map_def will default to zero as a result of the + * calloc above. + */ + if (map_def_sz <= sizeof(struct bpf_map_def)) { + memcpy(&obj->maps[map_idx].def, def, map_def_sz); + } else { + /* + * Here the map structure being read is bigger than what + * we expect, truncate if the excess bits are all zero. + * If they are not zero, reject this map as + * incompatible. + */ + char *b; + for (b = ((char *)def) + sizeof(struct bpf_map_def); + b < ((char *)def) + map_def_sz; b++) { + if (*b != 0) { + pr_warning("maps section in %s: \"%s\" " + "has unrecognized, non-zero " + "options\n", + obj->path, map_name); + return -EINVAL; + } + } + memcpy(&obj->maps[map_idx].def, def, + sizeof(struct bpf_map_def)); + } + map_idx++; + } + + qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map); + return 0; +} + +static bool section_have_execinstr(struct bpf_object *obj, int idx) +{ + Elf_Scn *scn; + GElf_Shdr sh; + + scn = elf_getscn(obj->efile.elf, idx); + if (!scn) + return false; + + if (gelf_getshdr(scn, &sh) != &sh) + return false; + + if (sh.sh_flags & SHF_EXECINSTR) + return true; + + return false; +} + +static int bpf_object__elf_collect(struct bpf_object *obj) +{ + Elf *elf = obj->efile.elf; + GElf_Ehdr *ep = &obj->efile.ehdr; + Elf_Scn *scn = NULL; + int idx = 0, err = 0; + + /* Elf is corrupted/truncated, avoid calling elf_strptr. */ + if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { + pr_warning("failed to get e_shstrndx from %s\n", + obj->path); + return -LIBBPF_ERRNO__FORMAT; + } + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + char *name; + GElf_Shdr sh; + Elf_Data *data; + + idx++; + if (gelf_getshdr(scn, &sh) != &sh) { + pr_warning("failed to get section(%d) header from %s\n", + idx, obj->path); + err = -LIBBPF_ERRNO__FORMAT; + goto out; + } + + name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); + if (!name) { + pr_warning("failed to get section(%d) name from %s\n", + idx, obj->path); + err = -LIBBPF_ERRNO__FORMAT; + goto out; + } + + data = elf_getdata(scn, 0); + if (!data) { + pr_warning("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); + err = -LIBBPF_ERRNO__FORMAT; + goto out; + } + pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", + idx, name, (unsigned long)data->d_size, + (int)sh.sh_link, (unsigned long)sh.sh_flags, + (int)sh.sh_type); + + if (strcmp(name, "license") == 0) + err = bpf_object__init_license(obj, + data->d_buf, + data->d_size); + else if (strcmp(name, "version") == 0) + err = bpf_object__init_kversion(obj, + data->d_buf, + data->d_size); + else if (strcmp(name, "maps") == 0) + obj->efile.maps_shndx = idx; + else if (strcmp(name, BTF_ELF_SEC) == 0) { + obj->btf = btf__new(data->d_buf, data->d_size, + __pr_debug); + if (IS_ERR(obj->btf)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_ELF_SEC, PTR_ERR(obj->btf)); + obj->btf = NULL; + } + } else if (sh.sh_type == SHT_SYMTAB) { + if (obj->efile.symbols) { + pr_warning("bpf: multiple SYMTAB in %s\n", + obj->path); + err = -LIBBPF_ERRNO__FORMAT; + } else { + obj->efile.symbols = data; + obj->efile.strtabidx = sh.sh_link; + } + } else if ((sh.sh_type == SHT_PROGBITS) && + (sh.sh_flags & SHF_EXECINSTR) && + (data->d_size > 0)) { + if (strcmp(name, ".text") == 0) + obj->efile.text_shndx = idx; + err = bpf_object__add_program(obj, data->d_buf, + data->d_size, name, idx); + if (err) { + char errmsg[STRERR_BUFSIZE]; + char *cp = str_error(-err, errmsg, sizeof(errmsg)); + + pr_warning("failed to alloc program %s (%s): %s", + name, obj->path, cp); + } + } else if (sh.sh_type == SHT_REL) { + void *reloc = obj->efile.reloc; + int nr_reloc = obj->efile.nr_reloc + 1; + int sec = sh.sh_info; /* points to other section */ + + /* Only do relo for section with exec instructions */ + if (!section_have_execinstr(obj, sec)) { + pr_debug("skip relo %s(%d) for section(%d)\n", + name, idx, sec); + continue; + } + + reloc = reallocarray(reloc, nr_reloc, + sizeof(*obj->efile.reloc)); + if (!reloc) { + pr_warning("realloc failed\n"); + err = -ENOMEM; + } else { + int n = nr_reloc - 1; + + obj->efile.reloc = reloc; + obj->efile.nr_reloc = nr_reloc; + + obj->efile.reloc[n].shdr = sh; + obj->efile.reloc[n].data = data; + } + } else { + pr_debug("skip section(%d) %s\n", idx, name); + } + if (err) + goto out; + } + + if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) { + pr_warning("Corrupted ELF file: index of strtab invalid\n"); + return LIBBPF_ERRNO__FORMAT; + } + if (obj->efile.maps_shndx >= 0) { + err = bpf_object__init_maps(obj); + if (err) + goto out; + } + err = bpf_object__init_prog_names(obj); +out: + return err; +} + +static struct bpf_program * +bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) +{ + struct bpf_program *prog; + size_t i; + + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + if (prog->idx == idx) + return prog; + } + return NULL; +} + +struct bpf_program * +bpf_object__find_program_by_title(struct bpf_object *obj, const char *title) +{ + struct bpf_program *pos; + + bpf_object__for_each_program(pos, obj) { + if (pos->section_name && !strcmp(pos->section_name, title)) + return pos; + } + return NULL; +} + +static int +bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, + Elf_Data *data, struct bpf_object *obj) +{ + Elf_Data *symbols = obj->efile.symbols; + int text_shndx = obj->efile.text_shndx; + int maps_shndx = obj->efile.maps_shndx; + struct bpf_map *maps = obj->maps; + size_t nr_maps = obj->nr_maps; + int i, nrels; + + pr_debug("collecting relocating info for: '%s'\n", + prog->section_name); + nrels = shdr->sh_size / shdr->sh_entsize; + + prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels); + if (!prog->reloc_desc) { + pr_warning("failed to alloc memory in relocation\n"); + return -ENOMEM; + } + prog->nr_reloc = nrels; + + for (i = 0; i < nrels; i++) { + GElf_Sym sym; + GElf_Rel rel; + unsigned int insn_idx; + struct bpf_insn *insns = prog->insns; + size_t map_idx; + + if (!gelf_getrel(data, i, &rel)) { + pr_warning("relocation: failed to get %d reloc\n", i); + return -LIBBPF_ERRNO__FORMAT; + } + + if (!gelf_getsym(symbols, + GELF_R_SYM(rel.r_info), + &sym)) { + pr_warning("relocation: symbol %"PRIx64" not found\n", + GELF_R_SYM(rel.r_info)); + return -LIBBPF_ERRNO__FORMAT; + } + pr_debug("relo for %lld value %lld name %d\n", + (long long) (rel.r_info >> 32), + (long long) sym.st_value, sym.st_name); + + if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) { + pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", + prog->section_name, sym.st_shndx); + return -LIBBPF_ERRNO__RELOC; + } + + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + pr_debug("relocation: insn_idx=%u\n", insn_idx); + + if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) { + if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) { + pr_warning("incorrect bpf_call opcode\n"); + return -LIBBPF_ERRNO__RELOC; + } + prog->reloc_desc[i].type = RELO_CALL; + prog->reloc_desc[i].insn_idx = insn_idx; + prog->reloc_desc[i].text_off = sym.st_value; + obj->has_pseudo_calls = true; + continue; + } + + if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { + pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", + insn_idx, insns[insn_idx].code); + return -LIBBPF_ERRNO__RELOC; + } + + /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */ + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + if (maps[map_idx].offset == sym.st_value) { + pr_debug("relocation: find map %zd (%s) for insn %u\n", + map_idx, maps[map_idx].name, insn_idx); + break; + } + } + + if (map_idx >= nr_maps) { + pr_warning("bpf relocation: map_idx %d large than %d\n", + (int)map_idx, (int)nr_maps - 1); + return -LIBBPF_ERRNO__RELOC; + } + + prog->reloc_desc[i].type = RELO_LD64; + prog->reloc_desc[i].insn_idx = insn_idx; + prog->reloc_desc[i].map_idx = map_idx; + } + return 0; +} + +static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) +{ + const struct btf_type *container_type; + const struct btf_member *key, *value; + struct bpf_map_def *def = &map->def; + const size_t max_name = 256; + char container_name[max_name]; + __s64 key_size, value_size; + __s32 container_id; + + if (snprintf(container_name, max_name, "____btf_map_%s", map->name) == + max_name) { + pr_warning("map:%s length of '____btf_map_%s' is too long\n", + map->name, map->name); + return -EINVAL; + } + + container_id = btf__find_by_name(btf, container_name); + if (container_id < 0) { + pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", + map->name, container_name); + return container_id; + } + + container_type = btf__type_by_id(btf, container_id); + if (!container_type) { + pr_warning("map:%s cannot find BTF type for container_id:%u\n", + map->name, container_id); + return -EINVAL; + } + + if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT || + BTF_INFO_VLEN(container_type->info) < 2) { + pr_warning("map:%s container_name:%s is an invalid container struct\n", + map->name, container_name); + return -EINVAL; + } + + key = (struct btf_member *)(container_type + 1); + value = key + 1; + + key_size = btf__resolve_size(btf, key->type); + if (key_size < 0) { + pr_warning("map:%s invalid BTF key_type_size\n", + map->name); + return key_size; + } + + if (def->key_size != key_size) { + pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", + map->name, (__u32)key_size, def->key_size); + return -EINVAL; + } + + value_size = btf__resolve_size(btf, value->type); + if (value_size < 0) { + pr_warning("map:%s invalid BTF value_type_size\n", map->name); + return value_size; + } + + if (def->value_size != value_size) { + pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", + map->name, (__u32)value_size, def->value_size); + return -EINVAL; + } + + map->btf_key_type_id = key->type; + map->btf_value_type_id = value->type; + + return 0; +} + +int bpf_map__reuse_fd(struct bpf_map *map, int fd) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int new_fd, err; + char *new_name; + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) + return err; + + new_name = strdup(info.name); + if (!new_name) + return -errno; + + new_fd = open("/", O_RDONLY | O_CLOEXEC); + if (new_fd < 0) { + err = -errno; + goto err_free_new_name; + } + + new_fd = dup3(fd, new_fd, O_CLOEXEC); + if (new_fd < 0) { + err = -errno; + goto err_close_new_fd; + } + + err = zclose(map->fd); + if (err) { + err = -errno; + goto err_close_new_fd; + } + free(map->name); + + map->fd = new_fd; + map->name = new_name; + map->def.type = info.type; + map->def.key_size = info.key_size; + map->def.value_size = info.value_size; + map->def.max_entries = info.max_entries; + map->def.map_flags = info.map_flags; + map->btf_key_type_id = info.btf_key_type_id; + map->btf_value_type_id = info.btf_value_type_id; + + return 0; + +err_close_new_fd: + close(new_fd); +err_free_new_name: + free(new_name); + return err; +} + +static int +bpf_object__create_maps(struct bpf_object *obj) +{ + struct bpf_create_map_attr create_attr = {}; + unsigned int i; + int err; + + for (i = 0; i < obj->nr_maps; i++) { + struct bpf_map *map = &obj->maps[i]; + struct bpf_map_def *def = &map->def; + char *cp, errmsg[STRERR_BUFSIZE]; + int *pfd = &map->fd; + + if (map->fd >= 0) { + pr_debug("skip map create (preset) %s: fd=%d\n", + map->name, map->fd); + continue; + } + + create_attr.name = map->name; + create_attr.map_ifindex = map->map_ifindex; + create_attr.map_type = def->type; + create_attr.map_flags = def->map_flags; + create_attr.key_size = def->key_size; + create_attr.value_size = def->value_size; + create_attr.max_entries = def->max_entries; + create_attr.btf_fd = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; + + if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) { + create_attr.btf_fd = btf__fd(obj->btf); + create_attr.btf_key_type_id = map->btf_key_type_id; + create_attr.btf_value_type_id = map->btf_value_type_id; + } + + *pfd = bpf_create_map_xattr(&create_attr); + if (*pfd < 0 && create_attr.btf_key_type_id) { + cp = str_error(errno, errmsg, sizeof(errmsg)); + pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", + map->name, cp, errno); + create_attr.btf_fd = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; + map->btf_key_type_id = 0; + map->btf_value_type_id = 0; + *pfd = bpf_create_map_xattr(&create_attr); + } + + if (*pfd < 0) { + size_t j; + + err = *pfd; + cp = str_error(errno, errmsg, sizeof(errmsg)); + pr_warning("failed to create map (name: '%s'): %s\n", + map->name, cp); + for (j = 0; j < i; j++) + zclose(obj->maps[j].fd); + return err; + } + pr_debug("create map %s: fd=%d\n", map->name, *pfd); + } + + return 0; +} + +static int +bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, + struct reloc_desc *relo) +{ + struct bpf_insn *insn, *new_insn; + struct bpf_program *text; + size_t new_cnt; + + if (relo->type != RELO_CALL) + return -LIBBPF_ERRNO__RELOC; + + if (prog->idx == obj->efile.text_shndx) { + pr_warning("relo in .text insn %d into off %d\n", + relo->insn_idx, relo->text_off); + return -LIBBPF_ERRNO__RELOC; + } + + if (prog->main_prog_cnt == 0) { + text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); + if (!text) { + pr_warning("no .text section found yet relo into text exist\n"); + return -LIBBPF_ERRNO__RELOC; + } + new_cnt = prog->insns_cnt + text->insns_cnt; + new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn)); + if (!new_insn) { + pr_warning("oom in prog realloc\n"); + return -ENOMEM; + } + memcpy(new_insn + prog->insns_cnt, text->insns, + text->insns_cnt * sizeof(*insn)); + prog->insns = new_insn; + prog->main_prog_cnt = prog->insns_cnt; + prog->insns_cnt = new_cnt; + pr_debug("added %zd insn from %s to prog %s\n", + text->insns_cnt, text->section_name, + prog->section_name); + } + insn = &prog->insns[relo->insn_idx]; + insn->imm += prog->main_prog_cnt - relo->insn_idx; + return 0; +} + +static int +bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) +{ + int i, err; + + if (!prog || !prog->reloc_desc) + return 0; + + for (i = 0; i < prog->nr_reloc; i++) { + if (prog->reloc_desc[i].type == RELO_LD64) { + struct bpf_insn *insns = prog->insns; + int insn_idx, map_idx; + + insn_idx = prog->reloc_desc[i].insn_idx; + map_idx = prog->reloc_desc[i].map_idx; + + if (insn_idx >= (int)prog->insns_cnt) { + pr_warning("relocation out of range: '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__RELOC; + } + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + insns[insn_idx].imm = obj->maps[map_idx].fd; + } else { + err = bpf_program__reloc_text(prog, obj, + &prog->reloc_desc[i]); + if (err) + return err; + } + } + + zfree(&prog->reloc_desc); + prog->nr_reloc = 0; + return 0; +} + + +static int +bpf_object__relocate(struct bpf_object *obj) +{ + struct bpf_program *prog; + size_t i; + int err; + + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + + err = bpf_program__relocate(prog, obj); + if (err) { + pr_warning("failed to relocate '%s'\n", + prog->section_name); + return err; + } + } + return 0; +} + +static int bpf_object__collect_reloc(struct bpf_object *obj) +{ + int i, err; + + if (!obj_elf_valid(obj)) { + pr_warning("Internal error: elf object is closed\n"); + return -LIBBPF_ERRNO__INTERNAL; + } + + for (i = 0; i < obj->efile.nr_reloc; i++) { + GElf_Shdr *shdr = &obj->efile.reloc[i].shdr; + Elf_Data *data = obj->efile.reloc[i].data; + int idx = shdr->sh_info; + struct bpf_program *prog; + + if (shdr->sh_type != SHT_REL) { + pr_warning("internal error at %d\n", __LINE__); + return -LIBBPF_ERRNO__INTERNAL; + } + + prog = bpf_object__find_prog_by_idx(obj, idx); + if (!prog) { + pr_warning("relocation failed: no section(%d)\n", idx); + return -LIBBPF_ERRNO__RELOC; + } + + err = bpf_program__collect_reloc(prog, + shdr, data, + obj); + if (err) + return err; + } + return 0; +} + +static int +load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, + const char *name, struct bpf_insn *insns, int insns_cnt, + char *license, u32 kern_version, int *pfd, int prog_ifindex) +{ + struct bpf_load_program_attr load_attr; + char *cp, errmsg[STRERR_BUFSIZE]; + char *log_buf; + int ret; + + memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); + load_attr.prog_type = type; + load_attr.expected_attach_type = expected_attach_type; + load_attr.name = name; + load_attr.insns = insns; + load_attr.insns_cnt = insns_cnt; + load_attr.license = license; + load_attr.kern_version = kern_version; + load_attr.prog_ifindex = prog_ifindex; + + if (!load_attr.insns || !load_attr.insns_cnt) + return -EINVAL; + + log_buf = malloc(BPF_LOG_BUF_SIZE); + if (!log_buf) + pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); + + ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE); + + if (ret >= 0) { + *pfd = ret; + ret = 0; + goto out; + } + + ret = -LIBBPF_ERRNO__LOAD; + cp = str_error(errno, errmsg, sizeof(errmsg)); + pr_warning("load bpf program failed: %s\n", cp); + + if (log_buf && log_buf[0] != '\0') { + ret = -LIBBPF_ERRNO__VERIFY; + pr_warning("-- BEGIN DUMP LOG ---\n"); + pr_warning("\n%s\n", log_buf); + pr_warning("-- END LOG --\n"); + } else if (load_attr.insns_cnt >= BPF_MAXINSNS) { + pr_warning("Program too large (%zu insns), at most %d insns\n", + load_attr.insns_cnt, BPF_MAXINSNS); + ret = -LIBBPF_ERRNO__PROG2BIG; + } else { + /* Wrong program type? */ + if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { + int fd; + + load_attr.prog_type = BPF_PROG_TYPE_KPROBE; + load_attr.expected_attach_type = 0; + fd = bpf_load_program_xattr(&load_attr, NULL, 0); + if (fd >= 0) { + close(fd); + ret = -LIBBPF_ERRNO__PROGTYPE; + goto out; + } + } + + if (log_buf) + ret = -LIBBPF_ERRNO__KVER; + } + +out: + free(log_buf); + return ret; +} + +static int +bpf_program__load(struct bpf_program *prog, + char *license, u32 kern_version) +{ + int err = 0, fd, i; + + if (prog->instances.nr < 0 || !prog->instances.fds) { + if (prog->preprocessor) { + pr_warning("Internal error: can't load program '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__INTERNAL; + } + + prog->instances.fds = malloc(sizeof(int)); + if (!prog->instances.fds) { + pr_warning("Not enough memory for BPF fds\n"); + return -ENOMEM; + } + prog->instances.nr = 1; + prog->instances.fds[0] = -1; + } + + if (!prog->preprocessor) { + if (prog->instances.nr != 1) { + pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", + prog->section_name, prog->instances.nr); + } + err = load_program(prog->type, prog->expected_attach_type, + prog->name, prog->insns, prog->insns_cnt, + license, kern_version, &fd, + prog->prog_ifindex); + if (!err) + prog->instances.fds[0] = fd; + goto out; + } + + for (i = 0; i < prog->instances.nr; i++) { + struct bpf_prog_prep_result result; + bpf_program_prep_t preprocessor = prog->preprocessor; + + bzero(&result, sizeof(result)); + err = preprocessor(prog, i, prog->insns, + prog->insns_cnt, &result); + if (err) { + pr_warning("Preprocessing the %dth instance of program '%s' failed\n", + i, prog->section_name); + goto out; + } + + if (!result.new_insn_ptr || !result.new_insn_cnt) { + pr_debug("Skip loading the %dth instance of program '%s'\n", + i, prog->section_name); + prog->instances.fds[i] = -1; + if (result.pfd) + *result.pfd = -1; + continue; + } + + err = load_program(prog->type, prog->expected_attach_type, + prog->name, result.new_insn_ptr, + result.new_insn_cnt, + license, kern_version, &fd, + prog->prog_ifindex); + + if (err) { + pr_warning("Loading the %dth instance of program '%s' failed\n", + i, prog->section_name); + goto out; + } + + if (result.pfd) + *result.pfd = fd; + prog->instances.fds[i] = fd; + } +out: + if (err) + pr_warning("failed to load program '%s'\n", + prog->section_name); + zfree(&prog->insns); + prog->insns_cnt = 0; + return err; +} + +static bool bpf_program__is_function_storage(struct bpf_program *prog, + struct bpf_object *obj) +{ + return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls; +} + +static int +bpf_object__load_progs(struct bpf_object *obj) +{ + size_t i; + int err; + + for (i = 0; i < obj->nr_programs; i++) { + if (bpf_program__is_function_storage(&obj->programs[i], obj)) + continue; + err = bpf_program__load(&obj->programs[i], + obj->license, + obj->kern_version); + if (err) + return err; + } + return 0; +} + +static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) +{ + switch (type) { + case BPF_PROG_TYPE_SOCKET_FILTER: + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + case BPF_PROG_TYPE_XDP: + case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_XMIT: + case BPF_PROG_TYPE_LWT_SEG6LOCAL: + case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_SK_SKB: + case BPF_PROG_TYPE_CGROUP_DEVICE: + case BPF_PROG_TYPE_SK_MSG: + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: + case BPF_PROG_TYPE_LIRC_MODE2: + case BPF_PROG_TYPE_SK_REUSEPORT: + return false; + case BPF_PROG_TYPE_UNSPEC: + case BPF_PROG_TYPE_KPROBE: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + default: + return true; + } +} + +static int bpf_object__validate(struct bpf_object *obj, bool needs_kver) +{ + if (needs_kver && obj->kern_version == 0) { + pr_warning("%s doesn't provide kernel version\n", + obj->path); + return -LIBBPF_ERRNO__KVERSION; + } + return 0; +} + +static struct bpf_object * +__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, + bool needs_kver) +{ + struct bpf_object *obj; + int err; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warning("failed to init libelf for %s\n", path); + return ERR_PTR(-LIBBPF_ERRNO__LIBELF); + } + + obj = bpf_object__new(path, obj_buf, obj_buf_sz); + if (IS_ERR(obj)) + return obj; + + CHECK_ERR(bpf_object__elf_init(obj), err, out); + CHECK_ERR(bpf_object__check_endianness(obj), err, out); + CHECK_ERR(bpf_object__elf_collect(obj), err, out); + CHECK_ERR(bpf_object__collect_reloc(obj), err, out); + CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out); + + bpf_object__elf_finish(obj); + return obj; +out: + bpf_object__close(obj); + return ERR_PTR(err); +} + +struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) +{ + /* param validation */ + if (!attr->file) + return NULL; + + pr_debug("loading %s\n", attr->file); + + return __bpf_object__open(attr->file, NULL, 0, + bpf_prog_type__needs_kver(attr->prog_type)); +} + +struct bpf_object *bpf_object__open(const char *path) +{ + struct bpf_object_open_attr attr = { + .file = path, + .prog_type = BPF_PROG_TYPE_UNSPEC, + }; + + return bpf_object__open_xattr(&attr); +} + +struct bpf_object *bpf_object__open_buffer(void *obj_buf, + size_t obj_buf_sz, + const char *name) +{ + char tmp_name[64]; + + /* param validation */ + if (!obj_buf || obj_buf_sz <= 0) + return NULL; + + if (!name) { + snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", + (unsigned long)obj_buf, + (unsigned long)obj_buf_sz); + tmp_name[sizeof(tmp_name) - 1] = '\0'; + name = tmp_name; + } + pr_debug("loading object '%s' from buffer\n", + name); + + return __bpf_object__open(name, obj_buf, obj_buf_sz, true); +} + +int bpf_object__unload(struct bpf_object *obj) +{ + size_t i; + + if (!obj) + return -EINVAL; + + for (i = 0; i < obj->nr_maps; i++) + zclose(obj->maps[i].fd); + + for (i = 0; i < obj->nr_programs; i++) + bpf_program__unload(&obj->programs[i]); + + return 0; +} + +int bpf_object__load(struct bpf_object *obj) +{ + int err; + + if (!obj) + return -EINVAL; + + if (obj->loaded) { + pr_warning("object should not be loaded twice\n"); + return -EINVAL; + } + + obj->loaded = true; + + CHECK_ERR(bpf_object__create_maps(obj), err, out); + CHECK_ERR(bpf_object__relocate(obj), err, out); + CHECK_ERR(bpf_object__load_progs(obj), err, out); + + return 0; +out: + bpf_object__unload(obj); + pr_warning("failed to load object '%s'\n", obj->path); + return err; +} + +static int check_path(const char *path) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct statfs st_fs; + char *dname, *dir; + int err = 0; + + if (path == NULL) + return -EINVAL; + + dname = strdup(path); + if (dname == NULL) + return -ENOMEM; + + dir = dirname(dname); + if (statfs(dir, &st_fs)) { + cp = str_error(errno, errmsg, sizeof(errmsg)); + pr_warning("failed to statfs %s: %s\n", dir, cp); + err = -errno; + } + free(dname); + + if (!err && st_fs.f_type != BPF_FS_MAGIC) { + pr_warning("specified path %s is not on BPF FS\n", path); + err = -EINVAL; + } + + return err; +} + +int bpf_program__pin_instance(struct bpf_program *prog, const char *path, + int instance) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warning("invalid program pointer\n"); + return -EINVAL; + } + + if (instance < 0 || instance >= prog->instances.nr) { + pr_warning("invalid prog instance %d of prog %s (max %d)\n", + instance, prog->section_name, prog->instances.nr); + return -EINVAL; + } + + if (bpf_obj_pin(prog->instances.fds[instance], path)) { + cp = str_error(errno, errmsg, sizeof(errmsg)); + pr_warning("failed to pin program: %s\n", cp); + return -errno; + } + pr_debug("pinned program '%s'\n", path); + + return 0; +} + +static int make_dir(const char *path) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err = 0; + + if (mkdir(path, 0700) && errno != EEXIST) + err = -errno; + + if (err) { + cp = str_error(-err, errmsg, sizeof(errmsg)); + pr_warning("failed to mkdir %s: %s\n", path, cp); + } + return err; +} + +int bpf_program__pin(struct bpf_program *prog, const char *path) +{ + int i, err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warning("invalid program pointer\n"); + return -EINVAL; + } + + if (prog->instances.nr <= 0) { + pr_warning("no instances of prog %s to pin\n", + prog->section_name); + return -EINVAL; + } + + err = make_dir(path); + if (err) + return err; + + for (i = 0; i < prog->instances.nr; i++) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + err = bpf_program__pin_instance(prog, buf, i); + if (err) + return err; + } + + return 0; +} + +int bpf_map__pin(struct bpf_map *map, const char *path) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err; + + err = check_path(path); + if (err) + return err; + + if (map == NULL) { + pr_warning("invalid map pointer\n"); + return -EINVAL; + } + + if (bpf_obj_pin(map->fd, path)) { + cp = str_error(errno, errmsg, sizeof(errmsg)); + pr_warning("failed to pin map: %s\n", cp); + return -errno; + } + + pr_debug("pinned map '%s'\n", path); + return 0; +} + +int bpf_object__pin(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + struct bpf_map *map; + int err; + + if (!obj) + return -ENOENT; + + if (!obj->loaded) { + pr_warning("object not yet loaded; load it first\n"); + return -ENOENT; + } + + err = make_dir(path); + if (err) + return err; + + bpf_map__for_each(map, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + err = bpf_map__pin(map, buf); + if (err) + return err; + } + + bpf_object__for_each_program(prog, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->section_name); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + err = bpf_program__pin(prog, buf); + if (err) + return err; + } + + return 0; +} + +void bpf_object__close(struct bpf_object *obj) +{ + size_t i; + + if (!obj) + return; + + if (obj->clear_priv) + obj->clear_priv(obj, obj->priv); + + bpf_object__elf_finish(obj); + bpf_object__unload(obj); + btf__free(obj->btf); + + for (i = 0; i < obj->nr_maps; i++) { + zfree(&obj->maps[i].name); + if (obj->maps[i].clear_priv) + obj->maps[i].clear_priv(&obj->maps[i], + obj->maps[i].priv); + obj->maps[i].priv = NULL; + obj->maps[i].clear_priv = NULL; + } + zfree(&obj->maps); + obj->nr_maps = 0; + + if (obj->programs && obj->nr_programs) { + for (i = 0; i < obj->nr_programs; i++) + bpf_program__exit(&obj->programs[i]); + } + zfree(&obj->programs); + + list_del(&obj->list); + free(obj); +} + +struct bpf_object * +bpf_object__next(struct bpf_object *prev) +{ + struct bpf_object *next; + + if (!prev) + next = list_first_entry(&bpf_objects_list, + struct bpf_object, + list); + else + next = list_next_entry(prev, list); + + /* Empty list is noticed here so don't need checking on entry. */ + if (&next->list == &bpf_objects_list) + return NULL; + + return next; +} + +const char *bpf_object__name(struct bpf_object *obj) +{ + return obj ? obj->path : ERR_PTR(-EINVAL); +} + +unsigned int bpf_object__kversion(struct bpf_object *obj) +{ + return obj ? obj->kern_version : 0; +} + +int bpf_object__btf_fd(const struct bpf_object *obj) +{ + return obj->btf ? btf__fd(obj->btf) : -1; +} + +int bpf_object__set_priv(struct bpf_object *obj, void *priv, + bpf_object_clear_priv_t clear_priv) +{ + if (obj->priv && obj->clear_priv) + obj->clear_priv(obj, obj->priv); + + obj->priv = priv; + obj->clear_priv = clear_priv; + return 0; +} + +void *bpf_object__priv(struct bpf_object *obj) +{ + return obj ? obj->priv : ERR_PTR(-EINVAL); +} + +static struct bpf_program * +__bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) +{ + size_t idx; + + if (!obj->programs) + return NULL; + /* First handler */ + if (prev == NULL) + return &obj->programs[0]; + + if (prev->obj != obj) { + pr_warning("error: program handler doesn't match object\n"); + return NULL; + } + + idx = (prev - obj->programs) + 1; + if (idx >= obj->nr_programs) + return NULL; + return &obj->programs[idx]; +} + +struct bpf_program * +bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) +{ + struct bpf_program *prog = prev; + + do { + prog = __bpf_program__next(prog, obj); + } while (prog && bpf_program__is_function_storage(prog, obj)); + + return prog; +} + +int bpf_program__set_priv(struct bpf_program *prog, void *priv, + bpf_program_clear_priv_t clear_priv) +{ + if (prog->priv && prog->clear_priv) + prog->clear_priv(prog, prog->priv); + + prog->priv = priv; + prog->clear_priv = clear_priv; + return 0; +} + +void *bpf_program__priv(struct bpf_program *prog) +{ + return prog ? prog->priv : ERR_PTR(-EINVAL); +} + +void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) +{ + prog->prog_ifindex = ifindex; +} + +const char *bpf_program__title(struct bpf_program *prog, bool needs_copy) +{ + const char *title; + + title = prog->section_name; + if (needs_copy) { + title = strdup(title); + if (!title) { + pr_warning("failed to strdup program title\n"); + return ERR_PTR(-ENOMEM); + } + } + + return title; +} + +int bpf_program__fd(struct bpf_program *prog) +{ + return bpf_program__nth_fd(prog, 0); +} + +int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, + bpf_program_prep_t prep) +{ + int *instances_fds; + + if (nr_instances <= 0 || !prep) + return -EINVAL; + + if (prog->instances.nr > 0 || prog->instances.fds) { + pr_warning("Can't set pre-processor after loading\n"); + return -EINVAL; + } + + instances_fds = malloc(sizeof(int) * nr_instances); + if (!instances_fds) { + pr_warning("alloc memory failed for fds\n"); + return -ENOMEM; + } + + /* fill all fd with -1 */ + memset(instances_fds, -1, sizeof(int) * nr_instances); + + prog->instances.nr = nr_instances; + prog->instances.fds = instances_fds; + prog->preprocessor = prep; + return 0; +} + +int bpf_program__nth_fd(struct bpf_program *prog, int n) +{ + int fd; + + if (!prog) + return -EINVAL; + + if (n >= prog->instances.nr || n < 0) { + pr_warning("Can't get the %dth fd from program %s: only %d instances\n", + n, prog->section_name, prog->instances.nr); + return -EINVAL; + } + + fd = prog->instances.fds[n]; + if (fd < 0) { + pr_warning("%dth instance of program '%s' is invalid\n", + n, prog->section_name); + return -ENOENT; + } + + return fd; +} + +void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) +{ + prog->type = type; +} + +static bool bpf_program__is_type(struct bpf_program *prog, + enum bpf_prog_type type) +{ + return prog ? (prog->type == type) : false; +} + +#define BPF_PROG_TYPE_FNS(NAME, TYPE) \ +int bpf_program__set_##NAME(struct bpf_program *prog) \ +{ \ + if (!prog) \ + return -EINVAL; \ + bpf_program__set_type(prog, TYPE); \ + return 0; \ +} \ + \ +bool bpf_program__is_##NAME(struct bpf_program *prog) \ +{ \ + return bpf_program__is_type(prog, TYPE); \ +} \ + +BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER); +BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); +BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS); +BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT); +BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); +BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); +BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); +BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); + +void bpf_program__set_expected_attach_type(struct bpf_program *prog, + enum bpf_attach_type type) +{ + prog->expected_attach_type = type; +} + +#define BPF_PROG_SEC_FULL(string, ptype, atype) \ + { string, sizeof(string) - 1, ptype, atype } + +#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0) + +#define BPF_S_PROG_SEC(string, ptype) \ + BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype) + +#define BPF_SA_PROG_SEC(string, ptype) \ + BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype) + +static const struct { + const char *sec; + size_t len; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; +} section_names[] = { + BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), + BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), + BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), + BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), + BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), + BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), + BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), + BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK), + BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE), + BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), + BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), + BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT), + BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL), + BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS), + BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), + BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG), + BPF_PROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2), + BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND), + BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND), + BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT), + BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT), + BPF_SA_PROG_SEC("cgroup/sendmsg4", BPF_CGROUP_UDP4_SENDMSG), + BPF_SA_PROG_SEC("cgroup/sendmsg6", BPF_CGROUP_UDP6_SENDMSG), + BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND), + BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND), +}; + +#undef BPF_PROG_SEC +#undef BPF_PROG_SEC_FULL +#undef BPF_S_PROG_SEC +#undef BPF_SA_PROG_SEC + +int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type) +{ + int i; + + if (!name) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(section_names); i++) { + if (strncmp(name, section_names[i].sec, section_names[i].len)) + continue; + *prog_type = section_names[i].prog_type; + *expected_attach_type = section_names[i].expected_attach_type; + return 0; + } + return -EINVAL; +} + +static int +bpf_program__identify_section(struct bpf_program *prog, + enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type) +{ + return libbpf_prog_type_by_name(prog->section_name, prog_type, + expected_attach_type); +} + +int bpf_map__fd(struct bpf_map *map) +{ + return map ? map->fd : -EINVAL; +} + +const struct bpf_map_def *bpf_map__def(struct bpf_map *map) +{ + return map ? &map->def : ERR_PTR(-EINVAL); +} + +const char *bpf_map__name(struct bpf_map *map) +{ + return map ? map->name : NULL; +} + +__u32 bpf_map__btf_key_type_id(const struct bpf_map *map) +{ + return map ? map->btf_key_type_id : 0; +} + +__u32 bpf_map__btf_value_type_id(const struct bpf_map *map) +{ + return map ? map->btf_value_type_id : 0; +} + +int bpf_map__set_priv(struct bpf_map *map, void *priv, + bpf_map_clear_priv_t clear_priv) +{ + if (!map) + return -EINVAL; + + if (map->priv) { + if (map->clear_priv) + map->clear_priv(map, map->priv); + } + + map->priv = priv; + map->clear_priv = clear_priv; + return 0; +} + +void *bpf_map__priv(struct bpf_map *map) +{ + return map ? map->priv : ERR_PTR(-EINVAL); +} + +bool bpf_map__is_offload_neutral(struct bpf_map *map) +{ + return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; +} + +void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) +{ + map->map_ifindex = ifindex; +} + +struct bpf_map * +bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) +{ + size_t idx; + struct bpf_map *s, *e; + + if (!obj || !obj->maps) + return NULL; + + s = obj->maps; + e = obj->maps + obj->nr_maps; + + if (prev == NULL) + return s; + + if ((prev < s) || (prev >= e)) { + pr_warning("error in %s: map handler doesn't belong to object\n", + __func__); + return NULL; + } + + idx = (prev - obj->maps) + 1; + if (idx >= obj->nr_maps) + return NULL; + return &obj->maps[idx]; +} + +struct bpf_map * +bpf_object__find_map_by_name(struct bpf_object *obj, const char *name) +{ + struct bpf_map *pos; + + bpf_map__for_each(pos, obj) { + if (pos->name && !strcmp(pos->name, name)) + return pos; + } + return NULL; +} + +struct bpf_map * +bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) +{ + int i; + + for (i = 0; i < obj->nr_maps; i++) { + if (obj->maps[i].offset == offset) + return &obj->maps[i]; + } + return ERR_PTR(-ENOENT); +} + +long libbpf_get_error(const void *ptr) +{ + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + return 0; +} + +int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_prog_load_attr attr; + + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = file; + attr.prog_type = type; + attr.expected_attach_type = 0; + + return bpf_prog_load_xattr(&attr, pobj, prog_fd); +} + +int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, + struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_object_open_attr open_attr = {}; + struct bpf_program *prog, *first_prog = NULL; + enum bpf_attach_type expected_attach_type; + enum bpf_prog_type prog_type; + struct bpf_object *obj; + struct bpf_map *map; + int err; + + if (!attr) + return -EINVAL; + if (!attr->file) + return -EINVAL; + + open_attr.file = attr->file; + open_attr.prog_type = attr->prog_type; + + obj = bpf_object__open_xattr(&open_attr); + if (IS_ERR_OR_NULL(obj)) + return -ENOENT; + + bpf_object__for_each_program(prog, obj) { + /* + * If type is not specified, try to guess it based on + * section name. + */ + prog_type = attr->prog_type; + prog->prog_ifindex = attr->ifindex; + expected_attach_type = attr->expected_attach_type; + if (prog_type == BPF_PROG_TYPE_UNSPEC) { + err = bpf_program__identify_section(prog, &prog_type, + &expected_attach_type); + if (err < 0) { + pr_warning("failed to guess program type based on section name %s\n", + prog->section_name); + bpf_object__close(obj); + return -EINVAL; + } + } + + bpf_program__set_type(prog, prog_type); + bpf_program__set_expected_attach_type(prog, + expected_attach_type); + + if (!bpf_program__is_function_storage(prog, obj) && !first_prog) + first_prog = prog; + } + + bpf_map__for_each(map, obj) { + if (!bpf_map__is_offload_neutral(map)) + map->map_ifindex = attr->ifindex; + } + + if (!first_prog) { + pr_warning("object file doesn't contain bpf program\n"); + bpf_object__close(obj); + return -ENOENT; + } + + err = bpf_object__load(obj); + if (err) { + bpf_object__close(obj); + return -EINVAL; + } + + *pobj = obj; + *prog_fd = bpf_program__fd(first_prog); + return 0; +} + +enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mem, unsigned long size, + unsigned long page_size, void **buf, size_t *buf_len, + bpf_perf_event_print_t fn, void *priv) +{ + volatile struct perf_event_mmap_page *header = mem; + __u64 data_tail = header->data_tail; + __u64 data_head = header->data_head; + int ret = LIBBPF_PERF_EVENT_ERROR; + void *base, *begin, *end; + + asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ + if (data_head == data_tail) + return LIBBPF_PERF_EVENT_CONT; + + base = ((char *)header) + page_size; + + begin = base + data_tail % size; + end = base + data_head % size; + + while (begin != end) { + struct perf_event_header *ehdr; + + ehdr = begin; + if (begin + ehdr->size > base + size) { + long len = base + size - begin; + + if (*buf_len < ehdr->size) { + free(*buf); + *buf = malloc(ehdr->size); + if (!*buf) { + ret = LIBBPF_PERF_EVENT_ERROR; + break; + } + *buf_len = ehdr->size; + } + + memcpy(*buf, begin, len); + memcpy(*buf + len, base, ehdr->size - len); + ehdr = (void *)*buf; + begin = base + ehdr->size - len; + } else if (begin + ehdr->size == base + size) { + begin = base; + } else { + begin += ehdr->size; + } + + ret = fn(ehdr, priv); + if (ret != LIBBPF_PERF_EVENT_CONT) + break; + + data_tail += ehdr->size; + } + + __sync_synchronize(); /* smp_mb() */ + header->data_tail = data_tail; + + return ret; +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h new file mode 100644 index 000000000..96c55fac5 --- /dev/null +++ b/tools/lib/bpf/libbpf.h @@ -0,0 +1,300 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ + +/* + * Common eBPF ELF object loading operations. + * + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + */ +#ifndef __BPF_LIBBPF_H +#define __BPF_LIBBPF_H + +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <sys/types.h> // for size_t +#include <linux/bpf.h> + +enum libbpf_errno { + __LIBBPF_ERRNO__START = 4000, + + /* Something wrong in libelf */ + LIBBPF_ERRNO__LIBELF = __LIBBPF_ERRNO__START, + LIBBPF_ERRNO__FORMAT, /* BPF object format invalid */ + LIBBPF_ERRNO__KVERSION, /* Incorrect or no 'version' section */ + LIBBPF_ERRNO__ENDIAN, /* Endian mismatch */ + LIBBPF_ERRNO__INTERNAL, /* Internal error in libbpf */ + LIBBPF_ERRNO__RELOC, /* Relocation failed */ + LIBBPF_ERRNO__LOAD, /* Load program failure for unknown reason */ + LIBBPF_ERRNO__VERIFY, /* Kernel verifier blocks program loading */ + LIBBPF_ERRNO__PROG2BIG, /* Program too big */ + LIBBPF_ERRNO__KVER, /* Incorrect kernel version */ + LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */ + LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */ + LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */ + __LIBBPF_ERRNO__END, +}; + +int libbpf_strerror(int err, char *buf, size_t size); + +/* + * __printf is defined in include/linux/compiler-gcc.h. However, + * it would be better if libbpf.h didn't depend on Linux header files. + * So instead of __printf, here we use gcc attribute directly. + */ +typedef int (*libbpf_print_fn_t)(const char *, ...) + __attribute__((format(printf, 1, 2))); + +void libbpf_set_print(libbpf_print_fn_t warn, + libbpf_print_fn_t info, + libbpf_print_fn_t debug); + +/* Hide internal to user */ +struct bpf_object; + +struct bpf_object_open_attr { + const char *file; + enum bpf_prog_type prog_type; +}; + +struct bpf_object *bpf_object__open(const char *path); +struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr); +struct bpf_object *bpf_object__open_buffer(void *obj_buf, + size_t obj_buf_sz, + const char *name); +int bpf_object__pin(struct bpf_object *object, const char *path); +void bpf_object__close(struct bpf_object *object); + +/* Load/unload object into/from kernel */ +int bpf_object__load(struct bpf_object *obj); +int bpf_object__unload(struct bpf_object *obj); +const char *bpf_object__name(struct bpf_object *obj); +unsigned int bpf_object__kversion(struct bpf_object *obj); +int bpf_object__btf_fd(const struct bpf_object *obj); + +struct bpf_program * +bpf_object__find_program_by_title(struct bpf_object *obj, const char *title); + +struct bpf_object *bpf_object__next(struct bpf_object *prev); +#define bpf_object__for_each_safe(pos, tmp) \ + for ((pos) = bpf_object__next(NULL), \ + (tmp) = bpf_object__next(pos); \ + (pos) != NULL; \ + (pos) = (tmp), (tmp) = bpf_object__next(tmp)) + +typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); +int bpf_object__set_priv(struct bpf_object *obj, void *priv, + bpf_object_clear_priv_t clear_priv); +void *bpf_object__priv(struct bpf_object *prog); + +int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type); + +/* Accessors of bpf_program */ +struct bpf_program; +struct bpf_program *bpf_program__next(struct bpf_program *prog, + struct bpf_object *obj); + +#define bpf_object__for_each_program(pos, obj) \ + for ((pos) = bpf_program__next(NULL, (obj)); \ + (pos) != NULL; \ + (pos) = bpf_program__next((pos), (obj))) + +typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, + void *); + +int bpf_program__set_priv(struct bpf_program *prog, void *priv, + bpf_program_clear_priv_t clear_priv); + +void *bpf_program__priv(struct bpf_program *prog); +void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); + +const char *bpf_program__title(struct bpf_program *prog, bool needs_copy); + +int bpf_program__fd(struct bpf_program *prog); +int bpf_program__pin_instance(struct bpf_program *prog, const char *path, + int instance); +int bpf_program__pin(struct bpf_program *prog, const char *path); + +struct bpf_insn; + +/* + * Libbpf allows callers to adjust BPF programs before being loaded + * into kernel. One program in an object file can be transformed into + * multiple variants to be attached to different hooks. + * + * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd + * form an API for this purpose. + * + * - bpf_program_prep_t: + * Defines a 'preprocessor', which is a caller defined function + * passed to libbpf through bpf_program__set_prep(), and will be + * called before program is loaded. The processor should adjust + * the program one time for each instance according to the instance id + * passed to it. + * + * - bpf_program__set_prep: + * Attaches a preprocessor to a BPF program. The number of instances + * that should be created is also passed through this function. + * + * - bpf_program__nth_fd: + * After the program is loaded, get resulting FD of a given instance + * of the BPF program. + * + * If bpf_program__set_prep() is not used, the program would be loaded + * without adjustment during bpf_object__load(). The program has only + * one instance. In this case bpf_program__fd(prog) is equal to + * bpf_program__nth_fd(prog, 0). + */ + +struct bpf_prog_prep_result { + /* + * If not NULL, load new instruction array. + * If set to NULL, don't load this instance. + */ + struct bpf_insn *new_insn_ptr; + int new_insn_cnt; + + /* If not NULL, result FD is written to it. */ + int *pfd; +}; + +/* + * Parameters of bpf_program_prep_t: + * - prog: The bpf_program being loaded. + * - n: Index of instance being generated. + * - insns: BPF instructions array. + * - insns_cnt:Number of instructions in insns. + * - res: Output parameter, result of transformation. + * + * Return value: + * - Zero: pre-processing success. + * - Non-zero: pre-processing error, stop loading. + */ +typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, + struct bpf_insn *insns, int insns_cnt, + struct bpf_prog_prep_result *res); + +int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, + bpf_program_prep_t prep); + +int bpf_program__nth_fd(struct bpf_program *prog, int n); + +/* + * Adjust type of BPF program. Default is kprobe. + */ +int bpf_program__set_socket_filter(struct bpf_program *prog); +int bpf_program__set_tracepoint(struct bpf_program *prog); +int bpf_program__set_raw_tracepoint(struct bpf_program *prog); +int bpf_program__set_kprobe(struct bpf_program *prog); +int bpf_program__set_sched_cls(struct bpf_program *prog); +int bpf_program__set_sched_act(struct bpf_program *prog); +int bpf_program__set_xdp(struct bpf_program *prog); +int bpf_program__set_perf_event(struct bpf_program *prog); +void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); +void bpf_program__set_expected_attach_type(struct bpf_program *prog, + enum bpf_attach_type type); + +bool bpf_program__is_socket_filter(struct bpf_program *prog); +bool bpf_program__is_tracepoint(struct bpf_program *prog); +bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); +bool bpf_program__is_kprobe(struct bpf_program *prog); +bool bpf_program__is_sched_cls(struct bpf_program *prog); +bool bpf_program__is_sched_act(struct bpf_program *prog); +bool bpf_program__is_xdp(struct bpf_program *prog); +bool bpf_program__is_perf_event(struct bpf_program *prog); + +/* + * No need for __attribute__((packed)), all members of 'bpf_map_def' + * are all aligned. In addition, using __attribute__((packed)) + * would trigger a -Wpacked warning message, and lead to an error + * if -Werror is set. + */ +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + +/* + * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel, + * so no need to worry about a name clash. + */ +struct bpf_map; +struct bpf_map * +bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); + +/* + * Get bpf_map through the offset of corresponding struct bpf_map_def + * in the BPF object file. + */ +struct bpf_map * +bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); + +struct bpf_map * +bpf_map__next(struct bpf_map *map, struct bpf_object *obj); +#define bpf_map__for_each(pos, obj) \ + for ((pos) = bpf_map__next(NULL, (obj)); \ + (pos) != NULL; \ + (pos) = bpf_map__next((pos), (obj))) + +int bpf_map__fd(struct bpf_map *map); +const struct bpf_map_def *bpf_map__def(struct bpf_map *map); +const char *bpf_map__name(struct bpf_map *map); +__u32 bpf_map__btf_key_type_id(const struct bpf_map *map); +__u32 bpf_map__btf_value_type_id(const struct bpf_map *map); + +typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); +int bpf_map__set_priv(struct bpf_map *map, void *priv, + bpf_map_clear_priv_t clear_priv); +void *bpf_map__priv(struct bpf_map *map); +int bpf_map__reuse_fd(struct bpf_map *map, int fd); +bool bpf_map__is_offload_neutral(struct bpf_map *map); +void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); +int bpf_map__pin(struct bpf_map *map, const char *path); + +long libbpf_get_error(const void *ptr); + +struct bpf_prog_load_attr { + const char *file; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + int ifindex; +}; + +int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, + struct bpf_object **pobj, int *prog_fd); +int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd); + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); + +enum bpf_perf_event_ret { + LIBBPF_PERF_EVENT_DONE = 0, + LIBBPF_PERF_EVENT_ERROR = -1, + LIBBPF_PERF_EVENT_CONT = -2, +}; + +typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event, + void *priv); +int bpf_perf_event_read_simple(void *mem, unsigned long size, + unsigned long page_size, + void **buf, size_t *buf_len, + bpf_perf_event_print_t fn, void *priv); +#endif diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c new file mode 100644 index 000000000..d2d17226d --- /dev/null +++ b/tools/lib/bpf/libbpf_errno.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: LGPL-2.1 + +/* + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + * Copyright (C) 2017 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + */ + +#undef _GNU_SOURCE +#include <stdio.h> +#include <string.h> + +#include "libbpf.h" + +#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) +#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) +#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) + +static const char *libbpf_strerror_table[NR_ERRNO] = { + [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf", + [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid", + [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost", + [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch", + [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf", + [ERRCODE_OFFSET(RELOC)] = "Relocation failed", + [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading", + [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", + [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", + [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", + [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", + [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", +}; + +int libbpf_strerror(int err, char *buf, size_t size) +{ + if (!buf || !size) + return -1; + + err = err > 0 ? err : -err; + + if (err < __LIBBPF_ERRNO__START) { + int ret; + + ret = strerror_r(err, buf, size); + buf[size - 1] = '\0'; + return ret; + } + + if (err < __LIBBPF_ERRNO__END) { + const char *msg; + + msg = libbpf_strerror_table[ERRNO_OFFSET(err)]; + snprintf(buf, size, "%s", msg); + buf[size - 1] = '\0'; + return 0; + } + + snprintf(buf, size, "Unknown libbpf error %d", err); + buf[size - 1] = '\0'; + return -1; +} diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c new file mode 100644 index 000000000..471943427 --- /dev/null +++ b/tools/lib/bpf/nlattr.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: LGPL-2.1 + +/* + * NETLINK Netlink attributes + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation version 2.1 + * of the License. + * + * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch> + */ + +#include <errno.h> +#include "nlattr.h" +#include <linux/rtnetlink.h> +#include <string.h> +#include <stdio.h> + +static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = { + [NLA_U8] = sizeof(uint8_t), + [NLA_U16] = sizeof(uint16_t), + [NLA_U32] = sizeof(uint32_t), + [NLA_U64] = sizeof(uint64_t), + [NLA_STRING] = 1, + [NLA_FLAG] = 0, +}; + +static int nla_len(const struct nlattr *nla) +{ + return nla->nla_len - NLA_HDRLEN; +} + +static struct nlattr *nla_next(const struct nlattr *nla, int *remaining) +{ + int totlen = NLA_ALIGN(nla->nla_len); + + *remaining -= totlen; + return (struct nlattr *) ((char *) nla + totlen); +} + +static int nla_ok(const struct nlattr *nla, int remaining) +{ + return remaining >= sizeof(*nla) && + nla->nla_len >= sizeof(*nla) && + nla->nla_len <= remaining; +} + +static void *nla_data(const struct nlattr *nla) +{ + return (char *) nla + NLA_HDRLEN; +} + +static int nla_type(const struct nlattr *nla) +{ + return nla->nla_type & NLA_TYPE_MASK; +} + +static int validate_nla(struct nlattr *nla, int maxtype, + struct nla_policy *policy) +{ + struct nla_policy *pt; + unsigned int minlen = 0; + int type = nla_type(nla); + + if (type < 0 || type > maxtype) + return 0; + + pt = &policy[type]; + + if (pt->type > NLA_TYPE_MAX) + return 0; + + if (pt->minlen) + minlen = pt->minlen; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (nla_len(nla) < minlen) + return -1; + + if (pt->maxlen && nla_len(nla) > pt->maxlen) + return -1; + + if (pt->type == NLA_STRING) { + char *data = nla_data(nla); + if (data[nla_len(nla) - 1] != '\0') + return -1; + } + + return 0; +} + +static inline int nlmsg_len(const struct nlmsghdr *nlh) +{ + return nlh->nlmsg_len - NLMSG_HDRLEN; +} + +/** + * Create attribute index based on a stream of attributes. + * @arg tb Index array to be filled (maxtype+1 elements). + * @arg maxtype Maximum attribute type expected and accepted. + * @arg head Head of attribute stream. + * @arg len Length of attribute stream. + * @arg policy Attribute validation policy. + * + * Iterates over the stream of attributes and stores a pointer to each + * attribute in the index array using the attribute type as index to + * the array. Attribute with a type greater than the maximum type + * specified will be silently ignored in order to maintain backwards + * compatibility. If \a policy is not NULL, the attribute will be + * validated using the specified policy. + * + * @see nla_validate + * @return 0 on success or a negative error code. + */ +static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + int type = nla_type(nla); + + if (type > maxtype) + continue; + + if (policy) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + if (tb[type]) + fprintf(stderr, "Attribute of type %#x found multiple times in message, " + "previous attribute is being ignored.\n", type); + + tb[type] = nla; + } + + err = 0; +errout: + return err; +} + +/* dump netlink extended ack error message */ +int nla_dump_errormsg(struct nlmsghdr *nlh) +{ + struct nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = { + [NLMSGERR_ATTR_MSG] = { .type = NLA_STRING }, + [NLMSGERR_ATTR_OFFS] = { .type = NLA_U32 }, + }; + struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr; + struct nlmsgerr *err; + char *errmsg = NULL; + int hlen, alen; + + /* no TLVs, nothing to do here */ + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return 0; + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + hlen = sizeof(*err); + + /* if NLM_F_CAPPED is set then the inner err msg was capped */ + if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + hlen += nlmsg_len(&err->msg); + + attr = (struct nlattr *) ((void *) err + hlen); + alen = nlh->nlmsg_len - hlen; + + if (nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) { + fprintf(stderr, + "Failed to parse extended error attributes\n"); + return 0; + } + + if (tb[NLMSGERR_ATTR_MSG]) + errmsg = (char *) nla_data(tb[NLMSGERR_ATTR_MSG]); + + fprintf(stderr, "Kernel error message: %s\n", errmsg); + + return 0; +} diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h new file mode 100644 index 000000000..931a71f68 --- /dev/null +++ b/tools/lib/bpf/nlattr.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ + +/* + * NETLINK Netlink attributes + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation version 2.1 + * of the License. + * + * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch> + */ + +#ifndef __NLATTR_H +#define __NLATTR_H + +#include <stdint.h> +#include <linux/netlink.h> +/* avoid multiple definition of netlink features */ +#define __LINUX_NETLINK_H + +/** + * Standard attribute types to specify validation policy + */ +enum { + NLA_UNSPEC, /**< Unspecified type, binary data chunk */ + NLA_U8, /**< 8 bit integer */ + NLA_U16, /**< 16 bit integer */ + NLA_U32, /**< 32 bit integer */ + NLA_U64, /**< 64 bit integer */ + NLA_STRING, /**< NUL terminated character string */ + NLA_FLAG, /**< Flag */ + NLA_MSECS, /**< Micro seconds (64bit) */ + NLA_NESTED, /**< Nested attributes */ + __NLA_TYPE_MAX, +}; + +#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1) + +/** + * @ingroup attr + * Attribute validation policy. + * + * See section @core_doc{core_attr_parse,Attribute Parsing} for more details. + */ +struct nla_policy { + /** Type of attribute or NLA_UNSPEC */ + uint16_t type; + + /** Minimal length of payload required */ + uint16_t minlen; + + /** Maximal length of payload allowed */ + uint16_t maxlen; +}; + +/** + * @ingroup attr + * Iterate over a stream of attributes + * @arg pos loop counter, set to current attribute + * @arg head head of attribute stream + * @arg len length of attribute stream + * @arg rem initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_attr(pos, head, len, rem) \ + for (pos = head, rem = len; \ + nla_ok(pos, rem); \ + pos = nla_next(pos, &(rem))) + +int nla_dump_errormsg(struct nlmsghdr *nlh); + +#endif /* __NLATTR_H */ diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c new file mode 100644 index 000000000..b8798114a --- /dev/null +++ b/tools/lib/bpf/str_error.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: LGPL-2.1 +#undef _GNU_SOURCE +#include <string.h> +#include <stdio.h> +#include "str_error.h" + +/* + * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl + * libc, while checking strerror_r() return to avoid having to check this in + * all places calling it. + */ +char *str_error(int err, char *dst, int len) +{ + int ret = strerror_r(err, dst, len); + if (ret) + snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); + return dst; +} diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h new file mode 100644 index 000000000..355b1db57 --- /dev/null +++ b/tools/lib/bpf/str_error.h @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 +#ifndef BPF_STR_ERROR +#define BPF_STR_ERROR + +char *str_error(int err, char *dst, int len); +#endif // BPF_STR_ERROR diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c new file mode 100644 index 000000000..a88bd5070 --- /dev/null +++ b/tools/lib/find_bit.c @@ -0,0 +1,128 @@ +/* bit search implementation + * + * Copied from lib/find_bit.c to tools/lib/find_bit.c + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * Copyright (C) 2008 IBM Corporation + * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au> + * (Inspired by David Howell's find_next_bit implementation) + * + * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease + * size and improve performance, 2015. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/bitops.h> +#include <linux/bitmap.h> +#include <linux/kernel.h> + +#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ + !defined(find_next_and_bit) + +/* + * This is a common helper function for find_next_bit, find_next_zero_bit, and + * find_next_and_bit. The differences are: + * - The "invert" argument, which is XORed with each fetched word before + * searching it for one bits. + * - The optional "addr2", which is anded with "addr1" if present. + */ +static inline unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert) +{ + unsigned long tmp; + + if (unlikely(start >= nbits)) + return nbits; + + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; + + /* Handle 1st word. */ + tmp &= BITMAP_FIRST_WORD_MASK(start); + start = round_down(start, BITS_PER_LONG); + + while (!tmp) { + start += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + tmp = addr1[start / BITS_PER_LONG]; + if (addr2) + tmp &= addr2[start / BITS_PER_LONG]; + tmp ^= invert; + } + + return min(start + __ffs(tmp), nbits); +} +#endif + +#ifndef find_next_bit +/* + * Find the next set bit in a memory region. + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, NULL, size, offset, 0UL); +} +#endif + +#ifndef find_first_bit +/* + * Find the first set bit in a memory region. + */ +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +{ + unsigned long idx; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + if (addr[idx]) + return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size); + } + + return size; +} +#endif + +#ifndef find_first_zero_bit +/* + * Find the first cleared bit in a memory region. + */ +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + unsigned long idx; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + if (addr[idx] != ~0UL) + return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); + } + + return size; +} +#endif + +#ifndef find_next_zero_bit +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, NULL, size, offset, ~0UL); +} +#endif + +#ifndef find_next_and_bit +unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr1, addr2, size, offset, 0UL); +} +#endif diff --git a/tools/lib/hweight.c b/tools/lib/hweight.c new file mode 100644 index 000000000..a16ebf515 --- /dev/null +++ b/tools/lib/hweight.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bitops.h> +#include <asm/types.h> + +/** + * hweightN - returns the hamming weight of a N-bit word + * @x: the word to weigh + * + * The Hamming Weight of a number is the total number of bits set in it. + */ + +unsigned int __sw_hweight32(unsigned int w) +{ +#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER + w -= (w >> 1) & 0x55555555; + w = (w & 0x33333333) + ((w >> 2) & 0x33333333); + w = (w + (w >> 4)) & 0x0f0f0f0f; + return (w * 0x01010101) >> 24; +#else + unsigned int res = w - ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res + (res >> 4)) & 0x0F0F0F0F; + res = res + (res >> 8); + return (res + (res >> 16)) & 0x000000FF; +#endif +} + +unsigned int __sw_hweight16(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x5555); + res = (res & 0x3333) + ((res >> 2) & 0x3333); + res = (res + (res >> 4)) & 0x0F0F; + return (res + (res >> 8)) & 0x00FF; +} + +unsigned int __sw_hweight8(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x55); + res = (res & 0x33) + ((res >> 2) & 0x33); + return (res + (res >> 4)) & 0x0F; +} + +unsigned long __sw_hweight64(__u64 w) +{ +#if BITS_PER_LONG == 32 + return __sw_hweight32((unsigned int)(w >> 32)) + + __sw_hweight32((unsigned int)w); +#elif BITS_PER_LONG == 64 +#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER + w -= (w >> 1) & 0x5555555555555555ul; + w = (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul); + w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful; + return (w * 0x0101010101010101ul) >> 56; +#else + __u64 res = w - ((w >> 1) & 0x5555555555555555ul); + res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); + res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; + res = res + (res >> 8); + res = res + (res >> 16); + return (res + (res >> 32)) & 0x00000000000000FFul; +#endif +#endif +} diff --git a/tools/lib/lockdep/.gitignore b/tools/lib/lockdep/.gitignore new file mode 100644 index 000000000..cc0e7a9f9 --- /dev/null +++ b/tools/lib/lockdep/.gitignore @@ -0,0 +1 @@ +liblockdep.so.* diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build new file mode 100644 index 000000000..6f667355b --- /dev/null +++ b/tools/lib/lockdep/Build @@ -0,0 +1 @@ +liblockdep-y += common.o lockdep.o preload.o rbtree.o diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile new file mode 100644 index 000000000..9dafb8cb7 --- /dev/null +++ b/tools/lib/lockdep/Makefile @@ -0,0 +1,162 @@ +# SPDX-License-Identifier: GPL-2.0 +# file format version +FILE_VERSION = 1 + +LIBLOCKDEP_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion) + +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,LD,$(CROSS_COMPILE)ld) + +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +prefix ?= /usr/local +libdir_relative = lib +libdir = $(prefix)/$(libdir_relative) +bindir_relative = bin +bindir = $(prefix)/$(bindir_relative) + +export DESTDIR DESTDIR_SQ INSTALL + +MAKEFLAGS += --no-print-directory + +include ../../scripts/Makefile.include + +# copy a bit from Linux kbuild + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) +bindir_SQ = $(subst ','\'',$(bindir)) + +LIB_IN := $(OUTPUT)liblockdep-in.o + +BIN_FILE = lockdep +LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION) + +CONFIG_INCLUDES = +CONFIG_LIBS = +CONFIG_FLAGS = + +OBJ = $@ +N = + +export Q VERBOSE + +INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES) + +# Set compile option CFLAGS if not set elsewhere +CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g +CFLAGS += -fPIC +CFLAGS += -Wall + +override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) + +ifeq ($(VERBOSE),1) + Q = + print_shared_lib_compile = + print_install = +else + Q = @ + print_shared_lib_compile = echo ' LD '$(OBJ); + print_static_lib_build = echo ' LD '$(OBJ); + print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; +endif + +all: + +export srctree OUTPUT CC LD CFLAGS V +include $(srctree)/tools/build/Makefile.include + +do_compile_shared_library = \ + ($(print_shared_lib_compile) \ + $(CC) $(LDFLAGS) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='$(@F)';$(shell ln -sf $(@F) $(@D)/liblockdep.so)) + +do_build_static_lib = \ + ($(print_static_lib_build) \ + $(RM) $@; $(AR) rcs $@ $^) + +CMD_TARGETS = $(LIB_FILE) + +TARGETS = $(CMD_TARGETS) + + +all: fixdep all_cmd + +all_cmd: $(CMD_TARGETS) + +$(LIB_IN): force + $(Q)$(MAKE) $(build)=liblockdep + +$(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN) + $(Q)$(do_compile_shared_library) + +$(OUTPUT)liblockdep.a: $(LIB_IN) + $(Q)$(do_build_static_lib) + +tags: force + $(RM) tags + find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ + --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/' + +TAGS: force + $(RM) TAGS + find . -name '*.[ch]' | xargs etags \ + --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/' + +define do_install + $(print_install) \ + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 '$(DESTDIR_SQ)$2' +endef + +install_lib: all_cmd + $(Q)$(call do_install,$(LIB_FILE),$(libdir_SQ)) + $(Q)$(call do_install,$(BIN_FILE),$(bindir_SQ)) + +install: install_lib + +clean: + $(RM) $(OUTPUT)*.o *~ $(TARGETS) $(OUTPUT)*.a $(OUTPUT)*liblockdep*.so* $(VERSION_FILES) $(OUTPUT).*.d $(OUTPUT).*.cmd + $(RM) tags TAGS + +PHONY += force +force: + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable so we can use it in if_changed and friends. +.PHONY: $(PHONY) diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c new file mode 100644 index 000000000..5c3b58cce --- /dev/null +++ b/tools/lib/lockdep/common.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <stdbool.h> +#include <linux/compiler.h> +#include <linux/lockdep.h> +#include <unistd.h> +#include <sys/syscall.h> + +static __thread struct task_struct current_obj; + +/* lockdep wants these */ +bool debug_locks = true; +bool debug_locks_silent; + +__attribute__((destructor)) static void liblockdep_exit(void) +{ + debug_check_no_locks_held(); +} + +struct task_struct *__curr(void) +{ + if (current_obj.pid == 0) { + /* Makes lockdep output pretty */ + prctl(PR_GET_NAME, current_obj.comm); + current_obj.pid = syscall(__NR_gettid); + } + + return ¤t_obj; +} diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h new file mode 100644 index 000000000..8862da809 --- /dev/null +++ b/tools/lib/lockdep/include/liblockdep/common.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LIBLOCKDEP_COMMON_H +#define _LIBLOCKDEP_COMMON_H + +#include <pthread.h> + +#define NR_LOCKDEP_CACHING_CLASSES 2 +#define MAX_LOCKDEP_SUBCLASSES 8UL + +#ifndef CALLER_ADDR0 +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#endif + +#ifndef _RET_IP_ +#define _RET_IP_ CALLER_ADDR0 +#endif + +#ifndef _THIS_IP_ +#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) +#endif + +struct lockdep_subclass_key { + char __one_byte; +}; + +struct lock_class_key { + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; +}; + +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; + const char *name; +#ifdef CONFIG_LOCK_STAT + int cpu; + unsigned long ip; +#endif +}; + +void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass); +void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *nest_lock, unsigned long ip); +void lock_release(struct lockdep_map *lock, int nested, + unsigned long ip); +extern void debug_check_no_locks_freed(const void *from, unsigned long len); + +#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ + { .name = (_name), .key = (void *)(_key), } + +#endif diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h new file mode 100644 index 000000000..a80ac39f9 --- /dev/null +++ b/tools/lib/lockdep/include/liblockdep/mutex.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LIBLOCKDEP_MUTEX_H +#define _LIBLOCKDEP_MUTEX_H + +#include <pthread.h> +#include "common.h" + +struct liblockdep_pthread_mutex { + pthread_mutex_t mutex; + struct lockdep_map dep_map; +}; + +typedef struct liblockdep_pthread_mutex liblockdep_pthread_mutex_t; + +#define LIBLOCKDEP_PTHREAD_MUTEX_INITIALIZER(mtx) \ + (const struct liblockdep_pthread_mutex) { \ + .mutex = PTHREAD_MUTEX_INITIALIZER, \ + .dep_map = STATIC_LOCKDEP_MAP_INIT(#mtx, &((&(mtx))->dep_map)), \ +} + +static inline int __mutex_init(liblockdep_pthread_mutex_t *lock, + const char *name, + struct lock_class_key *key, + const pthread_mutexattr_t *__mutexattr) +{ + lockdep_init_map(&lock->dep_map, name, key, 0); + return pthread_mutex_init(&lock->mutex, __mutexattr); +} + +#define liblockdep_pthread_mutex_init(mutex, mutexattr) \ +({ \ + static struct lock_class_key __key; \ + \ + __mutex_init((mutex), #mutex, &__key, (mutexattr)); \ +}) + +static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); + return pthread_mutex_lock(&lock->mutex); +} + +static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lock) +{ + lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_); + return pthread_mutex_unlock(&lock->mutex); +} + +static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); + return pthread_mutex_trylock(&lock->mutex) == 0 ? 1 : 0; +} + +static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t *lock) +{ + return pthread_mutex_destroy(&lock->mutex); +} + +#ifdef __USE_LIBLOCKDEP + +#define pthread_mutex_t liblockdep_pthread_mutex_t +#define pthread_mutex_init liblockdep_pthread_mutex_init +#define pthread_mutex_lock liblockdep_pthread_mutex_lock +#define pthread_mutex_unlock liblockdep_pthread_mutex_unlock +#define pthread_mutex_trylock liblockdep_pthread_mutex_trylock +#define pthread_mutex_destroy liblockdep_pthread_mutex_destroy + +#endif + +#endif diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h new file mode 100644 index 000000000..a96c3bf0f --- /dev/null +++ b/tools/lib/lockdep/include/liblockdep/rwlock.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LIBLOCKDEP_RWLOCK_H +#define _LIBLOCKDEP_RWLOCK_H + +#include <pthread.h> +#include "common.h" + +struct liblockdep_pthread_rwlock { + pthread_rwlock_t rwlock; + struct lockdep_map dep_map; +}; + +typedef struct liblockdep_pthread_rwlock liblockdep_pthread_rwlock_t; + +#define LIBLOCKDEP_PTHREAD_RWLOCK_INITIALIZER(rwl) \ + (struct liblockdep_pthread_rwlock) { \ + .rwlock = PTHREAD_RWLOCK_INITIALIZER, \ + .dep_map = STATIC_LOCKDEP_MAP_INIT(#rwl, &((&(rwl))->dep_map)), \ +} + +static inline int __rwlock_init(liblockdep_pthread_rwlock_t *lock, + const char *name, + struct lock_class_key *key, + const pthread_rwlockattr_t *attr) +{ + lockdep_init_map(&lock->dep_map, name, key, 0); + + return pthread_rwlock_init(&lock->rwlock, attr); +} + +#define liblockdep_pthread_rwlock_init(lock, attr) \ +({ \ + static struct lock_class_key __key; \ + \ + __rwlock_init((lock), #lock, &__key, (attr)); \ +}) + +static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_); + return pthread_rwlock_rdlock(&lock->rwlock); + +} + +static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t *lock) +{ + lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_); + return pthread_rwlock_unlock(&lock->rwlock); +} + +static inline int liblockdep_pthread_rwlock_wrlock(liblockdep_pthread_rwlock_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); + return pthread_rwlock_wrlock(&lock->rwlock); +} + +static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_); + return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0; +} + +static inline int liblockdep_pthread_rwlock_trywlock(liblockdep_pthread_rwlock_t *lock) +{ + lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); + return pthread_rwlock_trywlock(&lock->rwlock) == 0 ? 1 : 0; +} + +static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock) +{ + return pthread_rwlock_destroy(&lock->rwlock); +} + +#ifdef __USE_LIBLOCKDEP + +#define pthread_rwlock_t liblockdep_pthread_rwlock_t +#define pthread_rwlock_init liblockdep_pthread_rwlock_init +#define pthread_rwlock_rdlock liblockdep_pthread_rwlock_rdlock +#define pthread_rwlock_unlock liblockdep_pthread_rwlock_unlock +#define pthread_rwlock_wrlock liblockdep_pthread_rwlock_wrlock +#define pthread_rwlock_tryrdlock liblockdep_pthread_rwlock_tryrdlock +#define pthread_rwlock_trywlock liblockdep_pthread_rwlock_trywlock +#define pthread_rwlock_destroy liblockdep_rwlock_destroy + +#endif + +#endif diff --git a/tools/lib/lockdep/lockdep b/tools/lib/lockdep/lockdep new file mode 100755 index 000000000..49af9fe19 --- /dev/null +++ b/tools/lib/lockdep/lockdep @@ -0,0 +1,3 @@ +#!/bin/bash + +LD_PRELOAD="./liblockdep.so $LD_PRELOAD" "$@" diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c new file mode 100644 index 000000000..6002fcf2f --- /dev/null +++ b/tools/lib/lockdep/lockdep.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/lockdep.h> +#include <stdlib.h> + +/* Trivial API wrappers, we don't (yet) have RCU in user-space: */ +#define hlist_for_each_entry_rcu hlist_for_each_entry +#define hlist_add_head_rcu hlist_add_head +#define hlist_del_rcu hlist_del +#define list_for_each_entry_rcu list_for_each_entry +#define list_add_tail_rcu list_add_tail + +u32 prandom_u32(void) +{ + /* Used only by lock_pin_lock() which is dead code */ + abort(); +} + +static struct new_utsname *init_utsname(void) +{ + static struct new_utsname n = (struct new_utsname) { + .release = "liblockdep", + .version = LIBLOCKDEP_VERSION, + }; + + return &n; +} + +#include "../../../kernel/locking/lockdep.c" diff --git a/tools/lib/lockdep/lockdep_internals.h b/tools/lib/lockdep/lockdep_internals.h new file mode 100644 index 000000000..29d0c954c --- /dev/null +++ b/tools/lib/lockdep/lockdep_internals.h @@ -0,0 +1 @@ +#include "../../../kernel/locking/lockdep_internals.h" diff --git a/tools/lib/lockdep/lockdep_states.h b/tools/lib/lockdep/lockdep_states.h new file mode 100644 index 000000000..248d235ef --- /dev/null +++ b/tools/lib/lockdep/lockdep_states.h @@ -0,0 +1 @@ +#include "../../../kernel/locking/lockdep_states.h" diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c new file mode 100644 index 000000000..76245d161 --- /dev/null +++ b/tools/lib/lockdep/preload.c @@ -0,0 +1,443 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <pthread.h> +#include <stdio.h> +#include <dlfcn.h> +#include <stdlib.h> +#include <sysexits.h> +#include <unistd.h> +#include "include/liblockdep/mutex.h" +#include "../../include/linux/rbtree.h" + +/** + * struct lock_lookup - liblockdep's view of a single unique lock + * @orig: pointer to the original pthread lock, used for lookups + * @dep_map: lockdep's dep_map structure + * @key: lockdep's key structure + * @node: rb-tree node used to store the lock in a global tree + * @name: a unique name for the lock + */ +struct lock_lookup { + void *orig; /* Original pthread lock, used for lookups */ + struct lockdep_map dep_map; /* Since all locks are dynamic, we need + * a dep_map and a key for each lock */ + /* + * Wait, there's no support for key classes? Yup :( + * Most big projects wrap the pthread api with their own calls to + * be compatible with different locking methods. This means that + * "classes" will be brokes since the function that creates all + * locks will point to a generic locking function instead of the + * actual code that wants to do the locking. + */ + struct lock_class_key key; + struct rb_node node; +#define LIBLOCKDEP_MAX_LOCK_NAME 22 + char name[LIBLOCKDEP_MAX_LOCK_NAME]; +}; + +/* This is where we store our locks */ +static struct rb_root locks = RB_ROOT; +static pthread_rwlock_t locks_rwlock = PTHREAD_RWLOCK_INITIALIZER; + +/* pthread mutex API */ + +#ifdef __GLIBC__ +extern int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr); +extern int __pthread_mutex_lock(pthread_mutex_t *mutex); +extern int __pthread_mutex_trylock(pthread_mutex_t *mutex); +extern int __pthread_mutex_unlock(pthread_mutex_t *mutex); +extern int __pthread_mutex_destroy(pthread_mutex_t *mutex); +#else +#define __pthread_mutex_init NULL +#define __pthread_mutex_lock NULL +#define __pthread_mutex_trylock NULL +#define __pthread_mutex_unlock NULL +#define __pthread_mutex_destroy NULL +#endif +static int (*ll_pthread_mutex_init)(pthread_mutex_t *mutex, + const pthread_mutexattr_t *attr) = __pthread_mutex_init; +static int (*ll_pthread_mutex_lock)(pthread_mutex_t *mutex) = __pthread_mutex_lock; +static int (*ll_pthread_mutex_trylock)(pthread_mutex_t *mutex) = __pthread_mutex_trylock; +static int (*ll_pthread_mutex_unlock)(pthread_mutex_t *mutex) = __pthread_mutex_unlock; +static int (*ll_pthread_mutex_destroy)(pthread_mutex_t *mutex) = __pthread_mutex_destroy; + +/* pthread rwlock API */ + +#ifdef __GLIBC__ +extern int __pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr); +extern int __pthread_rwlock_destroy(pthread_rwlock_t *rwlock); +extern int __pthread_rwlock_wrlock(pthread_rwlock_t *rwlock); +extern int __pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock); +extern int __pthread_rwlock_rdlock(pthread_rwlock_t *rwlock); +extern int __pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock); +extern int __pthread_rwlock_unlock(pthread_rwlock_t *rwlock); +#else +#define __pthread_rwlock_init NULL +#define __pthread_rwlock_destroy NULL +#define __pthread_rwlock_wrlock NULL +#define __pthread_rwlock_trywrlock NULL +#define __pthread_rwlock_rdlock NULL +#define __pthread_rwlock_tryrdlock NULL +#define __pthread_rwlock_unlock NULL +#endif + +static int (*ll_pthread_rwlock_init)(pthread_rwlock_t *rwlock, + const pthread_rwlockattr_t *attr) = __pthread_rwlock_init; +static int (*ll_pthread_rwlock_destroy)(pthread_rwlock_t *rwlock) = __pthread_rwlock_destroy; +static int (*ll_pthread_rwlock_rdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_rdlock; +static int (*ll_pthread_rwlock_tryrdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_tryrdlock; +static int (*ll_pthread_rwlock_trywrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_trywrlock; +static int (*ll_pthread_rwlock_wrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_wrlock; +static int (*ll_pthread_rwlock_unlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_unlock; + +enum { none, prepare, done, } __init_state; +static void init_preload(void); +static void try_init_preload(void) +{ + if (__init_state != done) + init_preload(); +} + +static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent) +{ + struct rb_node **node = &locks.rb_node; + struct lock_lookup *l; + + *parent = NULL; + + while (*node) { + l = rb_entry(*node, struct lock_lookup, node); + + *parent = *node; + if (lock < l->orig) + node = &l->node.rb_left; + else if (lock > l->orig) + node = &l->node.rb_right; + else + return node; + } + + return node; +} + +#ifndef LIBLOCKDEP_STATIC_ENTRIES +#define LIBLOCKDEP_STATIC_ENTRIES 1024 +#endif + +static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES]; +static int __locks_nr; + +static inline bool is_static_lock(struct lock_lookup *lock) +{ + return lock >= __locks && lock < __locks + ARRAY_SIZE(__locks); +} + +static struct lock_lookup *alloc_lock(void) +{ + if (__init_state != done) { + /* + * Some programs attempt to initialize and use locks in their + * allocation path. This means that a call to malloc() would + * result in locks being initialized and locked. + * + * Why is it an issue for us? dlsym() below will try allocating + * to give us the original function. Since this allocation will + * result in a locking operations, we have to let pthread deal + * with it, but we can't! we don't have the pointer to the + * original API since we're inside dlsym() trying to get it + */ + + int idx = __locks_nr++; + if (idx >= ARRAY_SIZE(__locks)) { + dprintf(STDERR_FILENO, + "LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n"); + exit(EX_UNAVAILABLE); + } + return __locks + idx; + } + + return malloc(sizeof(struct lock_lookup)); +} + +static inline void free_lock(struct lock_lookup *lock) +{ + if (likely(!is_static_lock(lock))) + free(lock); +} + +/** + * __get_lock - find or create a lock instance + * @lock: pointer to a pthread lock function + * + * Try to find an existing lock in the rbtree using the provided pointer. If + * one wasn't found - create it. + */ +static struct lock_lookup *__get_lock(void *lock) +{ + struct rb_node **node, *parent; + struct lock_lookup *l; + + ll_pthread_rwlock_rdlock(&locks_rwlock); + node = __get_lock_node(lock, &parent); + ll_pthread_rwlock_unlock(&locks_rwlock); + if (*node) { + return rb_entry(*node, struct lock_lookup, node); + } + + /* We didn't find the lock, let's create it */ + l = alloc_lock(); + if (l == NULL) + return NULL; + + l->orig = lock; + /* + * Currently the name of the lock is the ptr value of the pthread lock, + * while not optimal, it makes debugging a bit easier. + * + * TODO: Get the real name of the lock using libdwarf + */ + sprintf(l->name, "%p", lock); + lockdep_init_map(&l->dep_map, l->name, &l->key, 0); + + ll_pthread_rwlock_wrlock(&locks_rwlock); + /* This might have changed since the last time we fetched it */ + node = __get_lock_node(lock, &parent); + rb_link_node(&l->node, parent, node); + rb_insert_color(&l->node, &locks); + ll_pthread_rwlock_unlock(&locks_rwlock); + + return l; +} + +static void __del_lock(struct lock_lookup *lock) +{ + ll_pthread_rwlock_wrlock(&locks_rwlock); + rb_erase(&lock->node, &locks); + ll_pthread_rwlock_unlock(&locks_rwlock); + free_lock(lock); +} + +int pthread_mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *attr) +{ + int r; + + /* + * We keep trying to init our preload module because there might be + * code in init sections that tries to touch locks before we are + * initialized, in that case we'll need to manually call preload + * to get us going. + * + * Funny enough, kernel's lockdep had the same issue, and used + * (almost) the same solution. See look_up_lock_class() in + * kernel/locking/lockdep.c for details. + */ + try_init_preload(); + + r = ll_pthread_mutex_init(mutex, attr); + if (r == 0) + /* + * We do a dummy initialization here so that lockdep could + * warn us if something fishy is going on - such as + * initializing a held lock. + */ + __get_lock(mutex); + + return r; +} + +int pthread_mutex_lock(pthread_mutex_t *mutex) +{ + int r; + + try_init_preload(); + + lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, + (unsigned long)_RET_IP_); + /* + * Here's the thing with pthread mutexes: unlike the kernel variant, + * they can fail. + * + * This means that the behaviour here is a bit different from what's + * going on in the kernel: there we just tell lockdep that we took the + * lock before actually taking it, but here we must deal with the case + * that locking failed. + * + * To do that we'll "release" the lock if locking failed - this way + * we'll get lockdep doing the correct checks when we try to take + * the lock, and if that fails - we'll be back to the correct + * state by releasing it. + */ + r = ll_pthread_mutex_lock(mutex); + if (r) + lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_mutex_trylock(pthread_mutex_t *mutex) +{ + int r; + + try_init_preload(); + + lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); + r = ll_pthread_mutex_trylock(mutex); + if (r) + lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + int r; + + try_init_preload(); + + lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_); + /* + * Just like taking a lock, only in reverse! + * + * If we fail releasing the lock, tell lockdep we're holding it again. + */ + r = ll_pthread_mutex_unlock(mutex); + if (r) + lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_mutex_destroy(pthread_mutex_t *mutex) +{ + try_init_preload(); + + /* + * Let's see if we're releasing a lock that's held. + * + * TODO: Hook into free() and add that check there as well. + */ + debug_check_no_locks_freed(mutex, sizeof(*mutex)); + __del_lock(__get_lock(mutex)); + return ll_pthread_mutex_destroy(mutex); +} + +/* This is the rwlock part, very similar to what happened with mutex above */ +int pthread_rwlock_init(pthread_rwlock_t *rwlock, + const pthread_rwlockattr_t *attr) +{ + int r; + + try_init_preload(); + + r = ll_pthread_rwlock_init(rwlock, attr); + if (r == 0) + __get_lock(rwlock); + + return r; +} + +int pthread_rwlock_destroy(pthread_rwlock_t *rwlock) +{ + try_init_preload(); + + debug_check_no_locks_freed(rwlock, sizeof(*rwlock)); + __del_lock(__get_lock(rwlock)); + return ll_pthread_rwlock_destroy(rwlock); +} + +int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock) +{ + int r; + + init_preload(); + + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_); + r = ll_pthread_rwlock_rdlock(rwlock); + if (r) + lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock) +{ + int r; + + init_preload(); + + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_); + r = ll_pthread_rwlock_tryrdlock(rwlock); + if (r) + lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock) +{ + int r; + + init_preload(); + + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); + r = ll_pthread_rwlock_trywrlock(rwlock); + if (r) + lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock) +{ + int r; + + init_preload(); + + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); + r = ll_pthread_rwlock_wrlock(rwlock); + if (r) + lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); + + return r; +} + +int pthread_rwlock_unlock(pthread_rwlock_t *rwlock) +{ + int r; + + init_preload(); + + lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_); + r = ll_pthread_rwlock_unlock(rwlock); + if (r) + lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_); + + return r; +} + +__attribute__((constructor)) static void init_preload(void) +{ + if (__init_state == done) + return; + +#ifndef __GLIBC__ + __init_state = prepare; + + ll_pthread_mutex_init = dlsym(RTLD_NEXT, "pthread_mutex_init"); + ll_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock"); + ll_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock"); + ll_pthread_mutex_unlock = dlsym(RTLD_NEXT, "pthread_mutex_unlock"); + ll_pthread_mutex_destroy = dlsym(RTLD_NEXT, "pthread_mutex_destroy"); + + ll_pthread_rwlock_init = dlsym(RTLD_NEXT, "pthread_rwlock_init"); + ll_pthread_rwlock_destroy = dlsym(RTLD_NEXT, "pthread_rwlock_destroy"); + ll_pthread_rwlock_rdlock = dlsym(RTLD_NEXT, "pthread_rwlock_rdlock"); + ll_pthread_rwlock_tryrdlock = dlsym(RTLD_NEXT, "pthread_rwlock_tryrdlock"); + ll_pthread_rwlock_wrlock = dlsym(RTLD_NEXT, "pthread_rwlock_wrlock"); + ll_pthread_rwlock_trywrlock = dlsym(RTLD_NEXT, "pthread_rwlock_trywrlock"); + ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock"); +#endif + + __init_state = done; +} diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c new file mode 100644 index 000000000..297c30457 --- /dev/null +++ b/tools/lib/lockdep/rbtree.c @@ -0,0 +1 @@ +#include "../../lib/rbtree.c" diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh new file mode 100755 index 000000000..2e570a188 --- /dev/null +++ b/tools/lib/lockdep/run_tests.sh @@ -0,0 +1,32 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +make &> /dev/null + +for i in `ls tests/*.c`; do + testname=$(basename "$i" .c) + gcc -o tests/$testname -pthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null + echo -ne "$testname... " + if [ $(timeout 1 ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then + echo "PASSED!" + else + echo "FAILED!" + fi + if [ -f "tests/$testname" ]; then + rm tests/$testname + fi +done + +for i in `ls tests/*.c`; do + testname=$(basename "$i" .c) + gcc -o tests/$testname -pthread -Iinclude $i &> /dev/null + echo -ne "(PRELOAD) $testname... " + if [ $(timeout 1 ./lockdep ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then + echo "PASSED!" + else + echo "FAILED!" + fi + if [ -f "tests/$testname" ]; then + rm tests/$testname + fi +done diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c new file mode 100644 index 000000000..63c7ce97b --- /dev/null +++ b/tools/lib/lockdep/tests/AA.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <liblockdep/mutex.h> + +int main(void) +{ + pthread_mutex_t a; + + pthread_mutex_init(&a, NULL); + + pthread_mutex_lock(&a); + pthread_mutex_lock(&a); + + return 0; +} diff --git a/tools/lib/lockdep/tests/ABA.c b/tools/lib/lockdep/tests/ABA.c new file mode 100644 index 000000000..efa39b23f --- /dev/null +++ b/tools/lib/lockdep/tests/ABA.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <liblockdep/mutex.h> + +void main(void) +{ + pthread_mutex_t a, b; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + + pthread_mutex_lock(&a); + pthread_mutex_lock(&b); + pthread_mutex_lock(&a); +} diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c new file mode 100644 index 000000000..1460afd33 --- /dev/null +++ b/tools/lib/lockdep/tests/ABBA.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <liblockdep/mutex.h> +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(b, a); +} diff --git a/tools/lib/lockdep/tests/ABBA_2threads.c b/tools/lib/lockdep/tests/ABBA_2threads.c new file mode 100644 index 000000000..39325ef8a --- /dev/null +++ b/tools/lib/lockdep/tests/ABBA_2threads.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <pthread.h> + +pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER; +pthread_barrier_t bar; + +void *ba_lock(void *arg) +{ + int ret, i; + + pthread_mutex_lock(&b); + + if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD) + pthread_barrier_destroy(&bar); + + pthread_mutex_lock(&a); + + pthread_mutex_unlock(&a); + pthread_mutex_unlock(&b); +} + +int main(void) +{ + pthread_t t; + + pthread_barrier_init(&bar, NULL, 2); + + if (pthread_create(&t, NULL, ba_lock, NULL)) { + fprintf(stderr, "pthread_create() failed\n"); + return 1; + } + pthread_mutex_lock(&a); + + if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD) + pthread_barrier_destroy(&bar); + + pthread_mutex_lock(&b); + + pthread_mutex_unlock(&b); + pthread_mutex_unlock(&a); + + pthread_join(t, NULL); + + return 0; +} diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c new file mode 100644 index 000000000..a54c1b2af --- /dev/null +++ b/tools/lib/lockdep/tests/ABBCCA.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <liblockdep/mutex.h> +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b, c; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + pthread_mutex_init(&c, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(b, c); + LOCK_UNLOCK_2(c, a); +} diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c b/tools/lib/lockdep/tests/ABBCCDDA.c new file mode 100644 index 000000000..aa5d194e8 --- /dev/null +++ b/tools/lib/lockdep/tests/ABBCCDDA.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <liblockdep/mutex.h> +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b, c, d; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + pthread_mutex_init(&c, NULL); + pthread_mutex_init(&d, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(b, c); + LOCK_UNLOCK_2(c, d); + LOCK_UNLOCK_2(d, a); +} diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c new file mode 100644 index 000000000..b54a08e60 --- /dev/null +++ b/tools/lib/lockdep/tests/ABCABC.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <liblockdep/mutex.h> +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b, c; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + pthread_mutex_init(&c, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(c, a); + LOCK_UNLOCK_2(b, c); +} diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c b/tools/lib/lockdep/tests/ABCDBCDA.c new file mode 100644 index 000000000..a56742250 --- /dev/null +++ b/tools/lib/lockdep/tests/ABCDBCDA.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <liblockdep/mutex.h> +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b, c, d; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + pthread_mutex_init(&c, NULL); + pthread_mutex_init(&d, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(c, d); + LOCK_UNLOCK_2(b, c); + LOCK_UNLOCK_2(d, a); +} diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c b/tools/lib/lockdep/tests/ABCDBDDA.c new file mode 100644 index 000000000..238a3353f --- /dev/null +++ b/tools/lib/lockdep/tests/ABCDBDDA.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <liblockdep/mutex.h> +#include "common.h" + +void main(void) +{ + pthread_mutex_t a, b, c, d; + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + pthread_mutex_init(&c, NULL); + pthread_mutex_init(&d, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(c, d); + LOCK_UNLOCK_2(b, d); + LOCK_UNLOCK_2(d, a); +} diff --git a/tools/lib/lockdep/tests/WW.c b/tools/lib/lockdep/tests/WW.c new file mode 100644 index 000000000..eee88df7f --- /dev/null +++ b/tools/lib/lockdep/tests/WW.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <liblockdep/rwlock.h> + +void main(void) +{ + pthread_rwlock_t a, b; + + pthread_rwlock_init(&a, NULL); + pthread_rwlock_init(&b, NULL); + + pthread_rwlock_wrlock(&a); + pthread_rwlock_rdlock(&b); + pthread_rwlock_wrlock(&a); +} diff --git a/tools/lib/lockdep/tests/common.h b/tools/lib/lockdep/tests/common.h new file mode 100644 index 000000000..3026c29cc --- /dev/null +++ b/tools/lib/lockdep/tests/common.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LIBLOCKDEP_TEST_COMMON_H +#define _LIBLOCKDEP_TEST_COMMON_H + +#define LOCK_UNLOCK_2(a, b) \ + do { \ + pthread_mutex_lock(&(a)); \ + pthread_mutex_lock(&(b)); \ + pthread_mutex_unlock(&(b)); \ + pthread_mutex_unlock(&(a)); \ + } while(0) + +#endif diff --git a/tools/lib/lockdep/tests/unlock_balance.c b/tools/lib/lockdep/tests/unlock_balance.c new file mode 100644 index 000000000..34cf32f68 --- /dev/null +++ b/tools/lib/lockdep/tests/unlock_balance.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <liblockdep/mutex.h> + +void main(void) +{ + pthread_mutex_t a; + + pthread_mutex_init(&a, NULL); + + pthread_mutex_lock(&a); + pthread_mutex_unlock(&a); + pthread_mutex_unlock(&a); +} diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c new file mode 100644 index 000000000..17c2b596f --- /dev/null +++ b/tools/lib/rbtree.c @@ -0,0 +1,548 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@suse.de> + (C) 2002 David Woodhouse <dwmw2@infradead.org> + (C) 2012 Michel Lespinasse <walken@google.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include <linux/rbtree_augmented.h> + +/* + * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * + * 1) A node is either red or black + * 2) The root is black + * 3) All leaves (NULL) are black + * 4) Both children of every red node are black + * 5) Every simple path from root to leaves contains the same number + * of black nodes. + * + * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two + * consecutive red nodes in a path and every red node is therefore followed by + * a black. So if B is the number of black nodes on every simple path (as per + * 5), then the longest possible path due to 4 is 2B. + * + * We shall indicate color with case, where black nodes are uppercase and red + * nodes will be lowercase. Unknown color nodes shall be drawn as red within + * parentheses and have some accompanying text comment. + */ + +static inline void rb_set_black(struct rb_node *rb) +{ + rb->__rb_parent_color |= RB_BLACK; +} + +static inline struct rb_node *rb_red_parent(struct rb_node *red) +{ + return (struct rb_node *)red->__rb_parent_color; +} + +/* + * Helper function for rotations: + * - old's parent and color get assigned to new + * - old gets assigned new as a parent and 'color' as a color. + */ +static inline void +__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, + struct rb_root *root, int color) +{ + struct rb_node *parent = rb_parent(old); + new->__rb_parent_color = old->__rb_parent_color; + rb_set_parent_color(old, new, color); + __rb_change_child(old, new, parent, root); +} + +static __always_inline void +__rb_insert(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + + while (true) { + /* + * Loop invariant: node is red + * + * If there is a black parent, we are done. + * Otherwise, take some corrective action as we don't + * want a red root or two consecutive red nodes. + */ + if (!parent) { + rb_set_parent_color(node, NULL, RB_BLACK); + break; + } else if (rb_is_black(parent)) + break; + + gparent = rb_red_parent(parent); + + tmp = gparent->rb_right; + if (parent != tmp) { /* parent == gparent->rb_left */ + if (tmp && rb_is_red(tmp)) { + /* + * Case 1 - color flips + * + * G g + * / \ / \ + * p u --> P U + * / / + * n n + * + * However, since g's parent might be red, and + * 4) does not allow this, we need to recurse + * at g. + */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_right; + if (node == tmp) { + /* + * Case 2 - left rotate at parent + * + * G G + * / \ / \ + * p U --> n U + * \ / + * n p + * + * This still leaves us in violation of 4), the + * continuation into Case 3 will fix that. + */ + parent->rb_right = tmp = node->rb_left; + node->rb_left = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_right; + } + + /* + * Case 3 - right rotate at gparent + * + * G P + * / \ / \ + * p U --> n g + * / \ + * n U + */ + gparent->rb_left = tmp; /* == parent->rb_right */ + parent->rb_right = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } else { + tmp = gparent->rb_left; + if (tmp && rb_is_red(tmp)) { + /* Case 1 - color flips */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_left; + if (node == tmp) { + /* Case 2 - right rotate at parent */ + parent->rb_left = tmp = node->rb_right; + node->rb_right = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_left; + } + + /* Case 3 - left rotate at gparent */ + gparent->rb_right = tmp; /* == parent->rb_left */ + parent->rb_left = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } + } +} + +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +static __always_inline void +____rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; + + while (true) { + /* + * Loop invariants: + * - node is black (or NULL on first iteration) + * - node is not the root (parent is not NULL) + * - All leaf paths going through parent and node have a + * black node count that is 1 lower than other leaf paths. + */ + sibling = parent->rb_right; + if (node != sibling) { /* node == parent->rb_left */ + if (rb_is_red(sibling)) { + /* + * Case 1 - left rotate at parent + * + * P S + * / \ / \ + * N s --> p Sr + * / \ / \ + * Sl Sr N Sl + */ + parent->rb_right = tmp1 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_right; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_left; + if (!tmp2 || rb_is_black(tmp2)) { + /* + * Case 2 - sibling color flip + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N s + * / \ / \ + * Sl Sr Sl Sr + * + * This leaves us violating 5) which + * can be fixed by flipping p to black + * if it was red, or by recursing at p. + * p is red when coming from Case 1. + */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* + * Case 3 - right rotate at sibling + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N Sl + * / \ \ + * sl Sr s + * \ + * Sr + */ + sibling->rb_left = tmp1 = tmp2->rb_right; + tmp2->rb_right = sibling; + parent->rb_right = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* + * Case 4 - left rotate at parent + color flips + * (p and sl could be either color here. + * After rotation, p becomes black, s acquires + * p's color, and sl keeps its color) + * + * (p) (s) + * / \ / \ + * N S --> P Sr + * / \ / \ + * (sl) sr N (sl) + */ + parent->rb_right = tmp2 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } else { + sibling = parent->rb_left; + if (rb_is_red(sibling)) { + /* Case 1 - right rotate at parent */ + parent->rb_left = tmp1 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_left; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_right; + if (!tmp2 || rb_is_black(tmp2)) { + /* Case 2 - sibling color flip */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* Case 3 - right rotate at sibling */ + sibling->rb_right = tmp1 = tmp2->rb_left; + tmp2->rb_left = sibling; + parent->rb_left = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* Case 4 - left rotate at parent + color flips */ + parent->rb_left = tmp2 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } + } +} + +/* Non-inline version for rb_erase_augmented() use */ +void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + ____rb_erase_color(parent, root, augment_rotate); +} + +/* + * Non-augmented rbtree manipulation functions. + * + * We use dummy augmented callbacks here, and have the compiler optimize them + * out of the rb_insert_color() and rb_erase() function definitions. + */ + +static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {} +static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} +static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} + +static const struct rb_augment_callbacks dummy_callbacks = { + dummy_propagate, dummy_copy, dummy_rotate +}; + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + __rb_insert(node, root, dummy_rotate); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, root, dummy_rotate); +} + +/* + * Augmented rbtree manipulation functions. + * + * This instantiates the same __always_inline functions as in the non-augmented + * case, but this time with user-defined callbacks. + */ + +void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + __rb_insert(node, root, augment_rotate); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a right-hand child, go down and then left as far + * as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return (struct rb_node *)node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a left-hand child, go down and then right as far + * as we can. + */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return (struct rb_node *)node; + } + + /* + * No left-hand children. Go up till we find an ancestor which + * is a right-hand child of its parent. + */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + __rb_change_child(victim, new, parent, root); + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} + +static struct rb_node *rb_left_deepest_node(const struct rb_node *node) +{ + for (;;) { + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + else + return (struct rb_node *)node; + } +} + +struct rb_node *rb_next_postorder(const struct rb_node *node) +{ + const struct rb_node *parent; + if (!node) + return NULL; + parent = rb_parent(node); + + /* If we're sitting on node, we've already seen our children */ + if (parent && node == parent->rb_left && parent->rb_right) { + /* If we are the parent's left node, go to the parent's right + * node then all the way down to the left */ + return rb_left_deepest_node(parent->rb_right); + } else + /* Otherwise we are the parent's right node, and the parent + * should be next */ + return (struct rb_node *)parent; +} + +struct rb_node *rb_first_postorder(const struct rb_root *root) +{ + if (!root->rb_node) + return NULL; + + return rb_left_deepest_node(root->rb_node); +} diff --git a/tools/lib/str_error_r.c b/tools/lib/str_error_r.c new file mode 100644 index 000000000..6aad8308a --- /dev/null +++ b/tools/lib/str_error_r.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#undef _GNU_SOURCE +#include <string.h> +#include <stdio.h> +#include <linux/string.h> + +/* + * The tools so far have been using the strerror_r() GNU variant, that returns + * a string, be it the buffer passed or something else. + * + * But that, besides being tricky in cases where we expect that the function + * using strerror_r() returns the error formatted in a provided buffer (we have + * to check if it returned something else and copy that instead), breaks the + * build on systems not using glibc, like Alpine Linux, where musl libc is + * used. + * + * So, introduce yet another wrapper, str_error_r(), that has the GNU + * interface, but uses the portable XSI variant of strerror_r(), so that users + * rest asured that the provided buffer is used and it is what is returned. + */ +char *str_error_r(int errnum, char *buf, size_t buflen) +{ + int err = strerror_r(errnum, buf, buflen); + if (err) + snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, [buf], %zd)=%d", errnum, buflen, err); + return buf; +} diff --git a/tools/lib/string.c b/tools/lib/string.c new file mode 100644 index 000000000..ee0afcbdd --- /dev/null +++ b/tools/lib/string.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/tools/lib/string.c + * + * Copied from linux/lib/string.c, where it is: + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * More specifically, the first copied function was strtobool, which + * was introduced by: + * + * d0f1fed29e6e ("Add a strtobool function matching semantics of existing in kernel equivalents") + * Author: Jonathan Cameron <jic23@cam.ac.uk> + */ + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <linux/string.h> +#include <linux/compiler.h> + +/** + * memdup - duplicate region of memory + * + * @src: memory region to duplicate + * @len: memory region length + */ +void *memdup(const void *src, size_t len) +{ + void *p = malloc(len); + + if (p) + memcpy(p, src, len); + + return p; +} + +/** + * strtobool - convert common user inputs into boolean values + * @s: input string + * @res: result + * + * This routine returns 0 iff the first character is one of 'Yy1Nn0', or + * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value + * pointed to by res is updated upon finding a match. + */ +int strtobool(const char *s, bool *res) +{ + if (!s) + return -EINVAL; + + switch (s[0]) { + case 'y': + case 'Y': + case '1': + *res = true; + return 0; + case 'n': + case 'N': + case '0': + *res = false; + return 0; + case 'o': + case 'O': + switch (s[1]) { + case 'n': + case 'N': + *res = true; + return 0; + case 'f': + case 'F': + *res = false; + return 0; + default: + break; + } + default: + break; + } + + return -EINVAL; +} + +/** + * strlcpy - Copy a C-string into a sized buffer + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @size: size of destination buffer + * + * Compatible with *BSD: the result is always a valid + * NUL-terminated string that fits in the buffer (unless, + * of course, the buffer size is zero). It does not pad + * out the result like strncpy() does. + * + * If libc has strlcpy() then that version will override this + * implementation: + */ +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wignored-attributes" +#endif +size_t __weak strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = strlen(src); + + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} +#ifdef __clang__ +#pragma clang diagnostic pop +#endif diff --git a/tools/lib/subcmd/Build b/tools/lib/subcmd/Build new file mode 100644 index 000000000..ee3128878 --- /dev/null +++ b/tools/lib/subcmd/Build @@ -0,0 +1,7 @@ +libsubcmd-y += exec-cmd.o +libsubcmd-y += help.o +libsubcmd-y += pager.o +libsubcmd-y += parse-options.o +libsubcmd-y += run-command.o +libsubcmd-y += sigchain.o +libsubcmd-y += subcmd-config.o diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile new file mode 100644 index 000000000..5dbb0dde2 --- /dev/null +++ b/tools/lib/subcmd/Makefile @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.include +include ../../scripts/utilities.mak # QUIET_CLEAN + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +CC ?= $(CROSS_COMPILE)gcc +LD ?= $(CROSS_COMPILE)ld +AR ?= $(CROSS_COMPILE)ar + +RM = rm -f + +MAKEFLAGS += --no-print-directory + +LIBFILE = $(OUTPUT)libsubcmd.a + +CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -fPIC + +ifeq ($(DEBUG),0) + ifeq ($(feature-fortify-source), 1) + CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 + endif +endif + +ifeq ($(DEBUG),1) + CFLAGS += -O0 +else ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -O3 +else + CFLAGS += -O6 +endif + +# Treat warnings as errors unless directed not to +ifneq ($(WERROR),0) + CFLAGS += -Werror +endif + +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE + +CFLAGS += -I$(srctree)/tools/include/ + +SUBCMD_IN := $(OUTPUT)libsubcmd-in.o + +all: + +export srctree OUTPUT CC LD CFLAGS V +include $(srctree)/tools/build/Makefile.include + +all: fixdep $(LIBFILE) + +$(SUBCMD_IN): FORCE + @$(MAKE) $(build)=libsubcmd + +$(LIBFILE): $(SUBCMD_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN) + +clean: + $(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \ + find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + +FORCE: + +.PHONY: clean FORCE diff --git a/tools/lib/subcmd/exec-cmd.c b/tools/lib/subcmd/exec-cmd.c new file mode 100644 index 000000000..33e94fb83 --- /dev/null +++ b/tools/lib/subcmd/exec-cmd.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <linux/string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include "subcmd-util.h" +#include "exec-cmd.h" +#include "subcmd-config.h" + +#define MAX_ARGS 32 +#define PATH_MAX 4096 + +static const char *argv_exec_path; +static const char *argv0_path; + +void exec_cmd_init(const char *exec_name, const char *prefix, + const char *exec_path, const char *exec_path_env) +{ + subcmd_config.exec_name = exec_name; + subcmd_config.prefix = prefix; + subcmd_config.exec_path = exec_path; + subcmd_config.exec_path_env = exec_path_env; +} + +#define is_dir_sep(c) ((c) == '/') + +static int is_absolute_path(const char *path) +{ + return path[0] == '/'; +} + +static const char *get_pwd_cwd(void) +{ + static char cwd[PATH_MAX + 1]; + char *pwd; + struct stat cwd_stat, pwd_stat; + if (getcwd(cwd, PATH_MAX) == NULL) + return NULL; + pwd = getenv("PWD"); + if (pwd && strcmp(pwd, cwd)) { + stat(cwd, &cwd_stat); + if (!stat(pwd, &pwd_stat) && + pwd_stat.st_dev == cwd_stat.st_dev && + pwd_stat.st_ino == cwd_stat.st_ino) { + strlcpy(cwd, pwd, PATH_MAX); + } + } + return cwd; +} + +static const char *make_nonrelative_path(const char *path) +{ + static char buf[PATH_MAX + 1]; + + if (is_absolute_path(path)) { + if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) + die("Too long path: %.*s", 60, path); + } else { + const char *cwd = get_pwd_cwd(); + if (!cwd) + die("Cannot determine the current working directory"); + if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) + die("Too long path: %.*s", 60, path); + } + return buf; +} + +char *system_path(const char *path) +{ + char *buf = NULL; + + if (is_absolute_path(path)) + return strdup(path); + + astrcatf(&buf, "%s/%s", subcmd_config.prefix, path); + + return buf; +} + +const char *extract_argv0_path(const char *argv0) +{ + const char *slash; + + if (!argv0 || !*argv0) + return NULL; + slash = argv0 + strlen(argv0); + + while (argv0 <= slash && !is_dir_sep(*slash)) + slash--; + + if (slash >= argv0) { + argv0_path = strndup(argv0, slash - argv0); + return argv0_path ? slash + 1 : NULL; + } + + return argv0; +} + +void set_argv_exec_path(const char *exec_path) +{ + argv_exec_path = exec_path; + /* + * Propagate this setting to external programs. + */ + setenv(subcmd_config.exec_path_env, exec_path, 1); +} + + +/* Returns the highest-priority location to look for subprograms. */ +char *get_argv_exec_path(void) +{ + char *env; + + if (argv_exec_path) + return strdup(argv_exec_path); + + env = getenv(subcmd_config.exec_path_env); + if (env && *env) + return strdup(env); + + return system_path(subcmd_config.exec_path); +} + +static void add_path(char **out, const char *path) +{ + if (path && *path) { + if (is_absolute_path(path)) + astrcat(out, path); + else + astrcat(out, make_nonrelative_path(path)); + + astrcat(out, ":"); + } +} + +void setup_path(void) +{ + const char *old_path = getenv("PATH"); + char *new_path = NULL; + char *tmp = get_argv_exec_path(); + + add_path(&new_path, tmp); + add_path(&new_path, argv0_path); + free(tmp); + + if (old_path) + astrcat(&new_path, old_path); + else + astrcat(&new_path, "/usr/local/bin:/usr/bin:/bin"); + + setenv("PATH", new_path, 1); + + free(new_path); +} + +static const char **prepare_exec_cmd(const char **argv) +{ + int argc; + const char **nargv; + + for (argc = 0; argv[argc]; argc++) + ; /* just counting */ + nargv = malloc(sizeof(*nargv) * (argc + 2)); + + nargv[0] = subcmd_config.exec_name; + for (argc = 0; argv[argc]; argc++) + nargv[argc + 1] = argv[argc]; + nargv[argc + 1] = NULL; + return nargv; +} + +int execv_cmd(const char **argv) { + const char **nargv = prepare_exec_cmd(argv); + + /* execvp() can only ever return if it fails */ + execvp(subcmd_config.exec_name, (char **)nargv); + + free(nargv); + return -1; +} + + +int execl_cmd(const char *cmd,...) +{ + int argc; + const char *argv[MAX_ARGS + 1]; + const char *arg; + va_list param; + + va_start(param, cmd); + argv[0] = cmd; + argc = 1; + while (argc < MAX_ARGS) { + arg = argv[argc++] = va_arg(param, char *); + if (!arg) + break; + } + va_end(param); + if (MAX_ARGS <= argc) { + fprintf(stderr, " Error: too many args to run %s\n", cmd); + return -1; + } + + argv[argc] = NULL; + return execv_cmd(argv); +} diff --git a/tools/lib/subcmd/exec-cmd.h b/tools/lib/subcmd/exec-cmd.h new file mode 100644 index 000000000..aba591b8d --- /dev/null +++ b/tools/lib/subcmd/exec-cmd.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SUBCMD_EXEC_CMD_H +#define __SUBCMD_EXEC_CMD_H + +extern void exec_cmd_init(const char *exec_name, const char *prefix, + const char *exec_path, const char *exec_path_env); + +extern void set_argv_exec_path(const char *exec_path); +extern const char *extract_argv0_path(const char *path); +extern void setup_path(void); +extern int execv_cmd(const char **argv); /* NULL terminated */ +extern int execl_cmd(const char *cmd, ...); +/* get_argv_exec_path and system_path return malloc'd string, caller must free it */ +extern char *get_argv_exec_path(void); +extern char *system_path(const char *path); + +#endif /* __SUBCMD_EXEC_CMD_H */ diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c new file mode 100644 index 000000000..2859f107a --- /dev/null +++ b/tools/lib/subcmd/help.c @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <linux/string.h> +#include <termios.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <dirent.h> +#include "subcmd-util.h" +#include "help.h" +#include "exec-cmd.h" + +void add_cmdname(struct cmdnames *cmds, const char *name, size_t len) +{ + struct cmdname *ent = malloc(sizeof(*ent) + len + 1); + + ent->len = len; + memcpy(ent->name, name, len); + ent->name[len] = 0; + + ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc); + cmds->names[cmds->cnt++] = ent; +} + +void clean_cmdnames(struct cmdnames *cmds) +{ + unsigned int i; + + for (i = 0; i < cmds->cnt; ++i) + zfree(&cmds->names[i]); + zfree(&cmds->names); + cmds->cnt = 0; + cmds->alloc = 0; +} + +int cmdname_compare(const void *a_, const void *b_) +{ + struct cmdname *a = *(struct cmdname **)a_; + struct cmdname *b = *(struct cmdname **)b_; + return strcmp(a->name, b->name); +} + +void uniq(struct cmdnames *cmds) +{ + unsigned int i, j; + + if (!cmds->cnt) + return; + + for (i = j = 1; i < cmds->cnt; i++) + if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) + cmds->names[j++] = cmds->names[i]; + + cmds->cnt = j; +} + +void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) +{ + size_t ci, cj, ei; + int cmp; + + ci = cj = ei = 0; + while (ci < cmds->cnt && ei < excludes->cnt) { + cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); + if (cmp < 0) + cmds->names[cj++] = cmds->names[ci++]; + else if (cmp == 0) + ci++, ei++; + else if (cmp > 0) + ei++; + } + + while (ci < cmds->cnt) + cmds->names[cj++] = cmds->names[ci++]; + + cmds->cnt = cj; +} + +static void get_term_dimensions(struct winsize *ws) +{ + char *s = getenv("LINES"); + + if (s != NULL) { + ws->ws_row = atoi(s); + s = getenv("COLUMNS"); + if (s != NULL) { + ws->ws_col = atoi(s); + if (ws->ws_row && ws->ws_col) + return; + } + } +#ifdef TIOCGWINSZ + if (ioctl(1, TIOCGWINSZ, ws) == 0 && + ws->ws_row && ws->ws_col) + return; +#endif + ws->ws_row = 25; + ws->ws_col = 80; +} + +static void pretty_print_string_list(struct cmdnames *cmds, int longest) +{ + int cols = 1, rows; + int space = longest + 1; /* min 1 SP between words */ + struct winsize win; + int max_cols; + int i, j; + + get_term_dimensions(&win); + max_cols = win.ws_col - 1; /* don't print *on* the edge */ + + if (space < max_cols) + cols = max_cols / space; + rows = (cmds->cnt + cols - 1) / cols; + + for (i = 0; i < rows; i++) { + printf(" "); + + for (j = 0; j < cols; j++) { + unsigned int n = j * rows + i; + unsigned int size = space; + + if (n >= cmds->cnt) + break; + if (j == cols-1 || n + rows >= cmds->cnt) + size = 1; + printf("%-*s", size, cmds->names[n]->name); + } + putchar('\n'); + } +} + +static int is_executable(const char *name) +{ + struct stat st; + + if (stat(name, &st) || /* stat, not lstat */ + !S_ISREG(st.st_mode)) + return 0; + + return st.st_mode & S_IXUSR; +} + +static int has_extension(const char *filename, const char *ext) +{ + size_t len = strlen(filename); + size_t extlen = strlen(ext); + + return len > extlen && !memcmp(filename + len - extlen, ext, extlen); +} + +static void list_commands_in_dir(struct cmdnames *cmds, + const char *path, + const char *prefix) +{ + int prefix_len; + DIR *dir = opendir(path); + struct dirent *de; + char *buf = NULL; + + if (!dir) + return; + if (!prefix) + prefix = "perf-"; + prefix_len = strlen(prefix); + + astrcatf(&buf, "%s/", path); + + while ((de = readdir(dir)) != NULL) { + int entlen; + + if (!strstarts(de->d_name, prefix)) + continue; + + astrcat(&buf, de->d_name); + if (!is_executable(buf)) + continue; + + entlen = strlen(de->d_name) - prefix_len; + if (has_extension(de->d_name, ".exe")) + entlen -= 4; + + add_cmdname(cmds, de->d_name + prefix_len, entlen); + } + closedir(dir); + free(buf); +} + +void load_command_list(const char *prefix, + struct cmdnames *main_cmds, + struct cmdnames *other_cmds) +{ + const char *env_path = getenv("PATH"); + char *exec_path = get_argv_exec_path(); + + if (exec_path) { + list_commands_in_dir(main_cmds, exec_path, prefix); + qsort(main_cmds->names, main_cmds->cnt, + sizeof(*main_cmds->names), cmdname_compare); + uniq(main_cmds); + } + + if (env_path) { + char *paths, *path, *colon; + path = paths = strdup(env_path); + while (1) { + if ((colon = strchr(path, ':'))) + *colon = 0; + if (!exec_path || strcmp(path, exec_path)) + list_commands_in_dir(other_cmds, path, prefix); + + if (!colon) + break; + path = colon + 1; + } + free(paths); + + qsort(other_cmds->names, other_cmds->cnt, + sizeof(*other_cmds->names), cmdname_compare); + uniq(other_cmds); + } + free(exec_path); + exclude_cmds(other_cmds, main_cmds); +} + +void list_commands(const char *title, struct cmdnames *main_cmds, + struct cmdnames *other_cmds) +{ + unsigned int i, longest = 0; + + for (i = 0; i < main_cmds->cnt; i++) + if (longest < main_cmds->names[i]->len) + longest = main_cmds->names[i]->len; + for (i = 0; i < other_cmds->cnt; i++) + if (longest < other_cmds->names[i]->len) + longest = other_cmds->names[i]->len; + + if (main_cmds->cnt) { + char *exec_path = get_argv_exec_path(); + printf("available %s in '%s'\n", title, exec_path); + printf("----------------"); + mput_char('-', strlen(title) + strlen(exec_path)); + putchar('\n'); + pretty_print_string_list(main_cmds, longest); + putchar('\n'); + free(exec_path); + } + + if (other_cmds->cnt) { + printf("%s available from elsewhere on your $PATH\n", title); + printf("---------------------------------------"); + mput_char('-', strlen(title)); + putchar('\n'); + pretty_print_string_list(other_cmds, longest); + putchar('\n'); + } +} + +int is_in_cmdlist(struct cmdnames *c, const char *s) +{ + unsigned int i; + + for (i = 0; i < c->cnt; i++) + if (!strcmp(s, c->names[i]->name)) + return 1; + return 0; +} diff --git a/tools/lib/subcmd/help.h b/tools/lib/subcmd/help.h new file mode 100644 index 000000000..355c066c8 --- /dev/null +++ b/tools/lib/subcmd/help.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SUBCMD_HELP_H +#define __SUBCMD_HELP_H + +#include <sys/types.h> +#include <stdio.h> + +struct cmdnames { + size_t alloc; + size_t cnt; + struct cmdname { + size_t len; /* also used for similarity index in help.c */ + char name[]; + } **names; +}; + +static inline void mput_char(char c, unsigned int num) +{ + while(num--) + putchar(c); +} + +void load_command_list(const char *prefix, + struct cmdnames *main_cmds, + struct cmdnames *other_cmds); +void add_cmdname(struct cmdnames *cmds, const char *name, size_t len); +void clean_cmdnames(struct cmdnames *cmds); +int cmdname_compare(const void *a, const void *b); +void uniq(struct cmdnames *cmds); +/* Here we require that excludes is a sorted list. */ +void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); +int is_in_cmdlist(struct cmdnames *c, const char *s); +void list_commands(const char *title, struct cmdnames *main_cmds, + struct cmdnames *other_cmds); + +#endif /* __SUBCMD_HELP_H */ diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c new file mode 100644 index 000000000..9997a8805 --- /dev/null +++ b/tools/lib/subcmd/pager.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/select.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <signal.h> +#include <sys/ioctl.h> +#include "pager.h" +#include "run-command.h" +#include "sigchain.h" +#include "subcmd-config.h" + +/* + * This is split up from the rest of git so that we can do + * something different on Windows. + */ + +static int spawned_pager; +static int pager_columns; + +void pager_init(const char *pager_env) +{ + subcmd_config.pager_env = pager_env; +} + +static void pager_preexec(void) +{ + /* + * Work around bug in "less" by not starting it until we + * have real input + */ + fd_set in; + fd_set exception; + + FD_ZERO(&in); + FD_ZERO(&exception); + FD_SET(0, &in); + FD_SET(0, &exception); + select(1, &in, NULL, &exception, NULL); + + setenv("LESS", "FRSX", 0); +} + +static const char *pager_argv[] = { "sh", "-c", NULL, NULL }; +static struct child_process pager_process; + +static void wait_for_pager(void) +{ + fflush(stdout); + fflush(stderr); + /* signal EOF to pager */ + close(1); + close(2); + finish_command(&pager_process); +} + +static void wait_for_pager_signal(int signo) +{ + wait_for_pager(); + sigchain_pop(signo); + raise(signo); +} + +void setup_pager(void) +{ + const char *pager = getenv(subcmd_config.pager_env); + struct winsize sz; + + if (!isatty(1)) + return; + if (ioctl(1, TIOCGWINSZ, &sz) == 0) + pager_columns = sz.ws_col; + if (!pager) + pager = getenv("PAGER"); + if (!(pager || access("/usr/bin/pager", X_OK))) + pager = "/usr/bin/pager"; + if (!(pager || access("/usr/bin/less", X_OK))) + pager = "/usr/bin/less"; + if (!pager) + pager = "cat"; + if (!*pager || !strcmp(pager, "cat")) + return; + + spawned_pager = 1; /* means we are emitting to terminal */ + + /* spawn the pager */ + pager_argv[2] = pager; + pager_process.argv = pager_argv; + pager_process.in = -1; + pager_process.preexec_cb = pager_preexec; + + if (start_command(&pager_process)) + return; + + /* original process continues, but writes to the pipe */ + dup2(pager_process.in, 1); + if (isatty(2)) + dup2(pager_process.in, 2); + close(pager_process.in); + + /* this makes sure that the parent terminates after the pager */ + sigchain_push_common(wait_for_pager_signal); + atexit(wait_for_pager); +} + +int pager_in_use(void) +{ + return spawned_pager; +} + +int pager_get_columns(void) +{ + char *s; + + s = getenv("COLUMNS"); + if (s) + return atoi(s); + + return (pager_columns ? pager_columns : 80) - 2; +} diff --git a/tools/lib/subcmd/pager.h b/tools/lib/subcmd/pager.h new file mode 100644 index 000000000..f1a53cf29 --- /dev/null +++ b/tools/lib/subcmd/pager.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SUBCMD_PAGER_H +#define __SUBCMD_PAGER_H + +extern void pager_init(const char *pager_env); + +extern void setup_pager(void); +extern int pager_in_use(void); +extern int pager_get_columns(void); + +#endif /* __SUBCMD_PAGER_H */ diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c new file mode 100644 index 000000000..cb7154ecc --- /dev/null +++ b/tools/lib/subcmd/parse-options.c @@ -0,0 +1,1006 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <linux/string.h> +#include <linux/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <ctype.h> +#include "subcmd-util.h" +#include "parse-options.h" +#include "subcmd-config.h" +#include "pager.h" + +#define OPT_SHORT 1 +#define OPT_UNSET 2 + +char *error_buf; + +static int opterror(const struct option *opt, const char *reason, int flags) +{ + if (flags & OPT_SHORT) + fprintf(stderr, " Error: switch `%c' %s", opt->short_name, reason); + else if (flags & OPT_UNSET) + fprintf(stderr, " Error: option `no-%s' %s", opt->long_name, reason); + else + fprintf(stderr, " Error: option `%s' %s", opt->long_name, reason); + + return -1; +} + +static const char *skip_prefix(const char *str, const char *prefix) +{ + size_t len = strlen(prefix); + return strncmp(str, prefix, len) ? NULL : str + len; +} + +static void optwarning(const struct option *opt, const char *reason, int flags) +{ + if (flags & OPT_SHORT) + fprintf(stderr, " Warning: switch `%c' %s", opt->short_name, reason); + else if (flags & OPT_UNSET) + fprintf(stderr, " Warning: option `no-%s' %s", opt->long_name, reason); + else + fprintf(stderr, " Warning: option `%s' %s", opt->long_name, reason); +} + +static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt, + int flags, const char **arg) +{ + const char *res; + + if (p->opt) { + res = p->opt; + p->opt = NULL; + } else if ((opt->flags & PARSE_OPT_LASTARG_DEFAULT) && (p->argc == 1 || + **(p->argv + 1) == '-')) { + res = (const char *)opt->defval; + } else if (p->argc > 1) { + p->argc--; + res = *++p->argv; + } else + return opterror(opt, "requires a value", flags); + if (arg) + *arg = res; + return 0; +} + +static int get_value(struct parse_opt_ctx_t *p, + const struct option *opt, int flags) +{ + const char *s, *arg = NULL; + const int unset = flags & OPT_UNSET; + int err; + + if (unset && p->opt) + return opterror(opt, "takes no value", flags); + if (unset && (opt->flags & PARSE_OPT_NONEG)) + return opterror(opt, "isn't available", flags); + if (opt->flags & PARSE_OPT_DISABLED) + return opterror(opt, "is not usable", flags); + + if (opt->flags & PARSE_OPT_EXCLUSIVE) { + if (p->excl_opt && p->excl_opt != opt) { + char msg[128]; + + if (((flags & OPT_SHORT) && p->excl_opt->short_name) || + p->excl_opt->long_name == NULL) { + snprintf(msg, sizeof(msg), "cannot be used with switch `%c'", + p->excl_opt->short_name); + } else { + snprintf(msg, sizeof(msg), "cannot be used with %s", + p->excl_opt->long_name); + } + opterror(opt, msg, flags); + return -3; + } + p->excl_opt = opt; + } + if (!(flags & OPT_SHORT) && p->opt) { + switch (opt->type) { + case OPTION_CALLBACK: + if (!(opt->flags & PARSE_OPT_NOARG)) + break; + /* FALLTHROUGH */ + case OPTION_BOOLEAN: + case OPTION_INCR: + case OPTION_BIT: + case OPTION_SET_UINT: + case OPTION_SET_PTR: + return opterror(opt, "takes no value", flags); + case OPTION_END: + case OPTION_ARGUMENT: + case OPTION_GROUP: + case OPTION_STRING: + case OPTION_INTEGER: + case OPTION_UINTEGER: + case OPTION_LONG: + case OPTION_U64: + default: + break; + } + } + + if (opt->flags & PARSE_OPT_NOBUILD) { + char reason[128]; + bool noarg = false; + + err = snprintf(reason, sizeof(reason), + opt->flags & PARSE_OPT_CANSKIP ? + "is being ignored because %s " : + "is not available because %s", + opt->build_opt); + reason[sizeof(reason) - 1] = '\0'; + + if (err < 0) + strncpy(reason, opt->flags & PARSE_OPT_CANSKIP ? + "is being ignored" : + "is not available", + sizeof(reason)); + + if (!(opt->flags & PARSE_OPT_CANSKIP)) + return opterror(opt, reason, flags); + + err = 0; + if (unset) + noarg = true; + if (opt->flags & PARSE_OPT_NOARG) + noarg = true; + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) + noarg = true; + + switch (opt->type) { + case OPTION_BOOLEAN: + case OPTION_INCR: + case OPTION_BIT: + case OPTION_SET_UINT: + case OPTION_SET_PTR: + case OPTION_END: + case OPTION_ARGUMENT: + case OPTION_GROUP: + noarg = true; + break; + case OPTION_CALLBACK: + case OPTION_STRING: + case OPTION_INTEGER: + case OPTION_UINTEGER: + case OPTION_LONG: + case OPTION_U64: + default: + break; + } + + if (!noarg) + err = get_arg(p, opt, flags, NULL); + if (err) + return err; + + optwarning(opt, reason, flags); + return 0; + } + + switch (opt->type) { + case OPTION_BIT: + if (unset) + *(int *)opt->value &= ~opt->defval; + else + *(int *)opt->value |= opt->defval; + return 0; + + case OPTION_BOOLEAN: + *(bool *)opt->value = unset ? false : true; + if (opt->set) + *(bool *)opt->set = true; + return 0; + + case OPTION_INCR: + *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1; + return 0; + + case OPTION_SET_UINT: + *(unsigned int *)opt->value = unset ? 0 : opt->defval; + return 0; + + case OPTION_SET_PTR: + *(void **)opt->value = unset ? NULL : (void *)opt->defval; + return 0; + + case OPTION_STRING: + err = 0; + if (unset) + *(const char **)opt->value = NULL; + else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) + *(const char **)opt->value = (const char *)opt->defval; + else + err = get_arg(p, opt, flags, (const char **)opt->value); + + if (opt->set) + *(bool *)opt->set = true; + + /* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */ + if (opt->flags & PARSE_OPT_NOEMPTY) { + const char *val = *(const char **)opt->value; + + if (!val) + return err; + + /* Similar to unset if we are given an empty string. */ + if (val[0] == '\0') { + *(const char **)opt->value = NULL; + return 0; + } + } + + return err; + + case OPTION_CALLBACK: + if (unset) + return (*opt->callback)(opt, NULL, 1) ? (-1) : 0; + if (opt->flags & PARSE_OPT_NOARG) + return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) + return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; + if (get_arg(p, opt, flags, &arg)) + return -1; + return (*opt->callback)(opt, arg, 0) ? (-1) : 0; + + case OPTION_INTEGER: + if (unset) { + *(int *)opt->value = 0; + return 0; + } + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { + *(int *)opt->value = opt->defval; + return 0; + } + if (get_arg(p, opt, flags, &arg)) + return -1; + *(int *)opt->value = strtol(arg, (char **)&s, 10); + if (*s) + return opterror(opt, "expects a numerical value", flags); + return 0; + + case OPTION_UINTEGER: + if (unset) { + *(unsigned int *)opt->value = 0; + return 0; + } + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { + *(unsigned int *)opt->value = opt->defval; + return 0; + } + if (get_arg(p, opt, flags, &arg)) + return -1; + if (arg[0] == '-') + return opterror(opt, "expects an unsigned numerical value", flags); + *(unsigned int *)opt->value = strtol(arg, (char **)&s, 10); + if (*s) + return opterror(opt, "expects a numerical value", flags); + return 0; + + case OPTION_LONG: + if (unset) { + *(long *)opt->value = 0; + return 0; + } + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { + *(long *)opt->value = opt->defval; + return 0; + } + if (get_arg(p, opt, flags, &arg)) + return -1; + *(long *)opt->value = strtol(arg, (char **)&s, 10); + if (*s) + return opterror(opt, "expects a numerical value", flags); + return 0; + + case OPTION_U64: + if (unset) { + *(u64 *)opt->value = 0; + return 0; + } + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { + *(u64 *)opt->value = opt->defval; + return 0; + } + if (get_arg(p, opt, flags, &arg)) + return -1; + if (arg[0] == '-') + return opterror(opt, "expects an unsigned numerical value", flags); + *(u64 *)opt->value = strtoull(arg, (char **)&s, 10); + if (*s) + return opterror(opt, "expects a numerical value", flags); + return 0; + + case OPTION_END: + case OPTION_ARGUMENT: + case OPTION_GROUP: + default: + die("should not happen, someone must be hit on the forehead"); + } +} + +static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options) +{ +retry: + for (; options->type != OPTION_END; options++) { + if (options->short_name == *p->opt) { + p->opt = p->opt[1] ? p->opt + 1 : NULL; + return get_value(p, options, OPT_SHORT); + } + } + + if (options->parent) { + options = options->parent; + goto retry; + } + + return -2; +} + +static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, + const struct option *options) +{ + const char *arg_end = strchr(arg, '='); + const struct option *abbrev_option = NULL, *ambiguous_option = NULL; + int abbrev_flags = 0, ambiguous_flags = 0; + + if (!arg_end) + arg_end = arg + strlen(arg); + +retry: + for (; options->type != OPTION_END; options++) { + const char *rest; + int flags = 0; + + if (!options->long_name) + continue; + + rest = skip_prefix(arg, options->long_name); + if (options->type == OPTION_ARGUMENT) { + if (!rest) + continue; + if (*rest == '=') + return opterror(options, "takes no value", flags); + if (*rest) + continue; + p->out[p->cpidx++] = arg - 2; + return 0; + } + if (!rest) { + if (strstarts(options->long_name, "no-")) { + /* + * The long name itself starts with "no-", so + * accept the option without "no-" so that users + * do not have to enter "no-no-" to get the + * negation. + */ + rest = skip_prefix(arg, options->long_name + 3); + if (rest) { + flags |= OPT_UNSET; + goto match; + } + /* Abbreviated case */ + if (strstarts(options->long_name + 3, arg)) { + flags |= OPT_UNSET; + goto is_abbreviated; + } + } + /* abbreviated? */ + if (!strncmp(options->long_name, arg, arg_end - arg)) { +is_abbreviated: + if (abbrev_option) { + /* + * If this is abbreviated, it is + * ambiguous. So when there is no + * exact match later, we need to + * error out. + */ + ambiguous_option = abbrev_option; + ambiguous_flags = abbrev_flags; + } + if (!(flags & OPT_UNSET) && *arg_end) + p->opt = arg_end + 1; + abbrev_option = options; + abbrev_flags = flags; + continue; + } + /* negated and abbreviated very much? */ + if (strstarts("no-", arg)) { + flags |= OPT_UNSET; + goto is_abbreviated; + } + /* negated? */ + if (strncmp(arg, "no-", 3)) + continue; + flags |= OPT_UNSET; + rest = skip_prefix(arg + 3, options->long_name); + /* abbreviated and negated? */ + if (!rest && strstarts(options->long_name, arg + 3)) + goto is_abbreviated; + if (!rest) + continue; + } +match: + if (*rest) { + if (*rest != '=') + continue; + p->opt = rest + 1; + } + return get_value(p, options, flags); + } + + if (ambiguous_option) { + fprintf(stderr, + " Error: Ambiguous option: %s (could be --%s%s or --%s%s)\n", + arg, + (ambiguous_flags & OPT_UNSET) ? "no-" : "", + ambiguous_option->long_name, + (abbrev_flags & OPT_UNSET) ? "no-" : "", + abbrev_option->long_name); + return -1; + } + if (abbrev_option) + return get_value(p, abbrev_option, abbrev_flags); + + if (options->parent) { + options = options->parent; + goto retry; + } + + return -2; +} + +static void check_typos(const char *arg, const struct option *options) +{ + if (strlen(arg) < 3) + return; + + if (strstarts(arg, "no-")) { + fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg); + exit(129); + } + + for (; options->type != OPTION_END; options++) { + if (!options->long_name) + continue; + if (strstarts(options->long_name, arg)) { + fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg); + exit(129); + } + } +} + +static void parse_options_start(struct parse_opt_ctx_t *ctx, + int argc, const char **argv, int flags) +{ + memset(ctx, 0, sizeof(*ctx)); + ctx->argc = argc - 1; + ctx->argv = argv + 1; + ctx->out = argv; + ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0); + ctx->flags = flags; + if ((flags & PARSE_OPT_KEEP_UNKNOWN) && + (flags & PARSE_OPT_STOP_AT_NON_OPTION)) + die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together"); +} + +static int usage_with_options_internal(const char * const *, + const struct option *, int, + struct parse_opt_ctx_t *); + +static int parse_options_step(struct parse_opt_ctx_t *ctx, + const struct option *options, + const char * const usagestr[]) +{ + int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP); + int excl_short_opt = 1; + const char *arg; + + /* we must reset ->opt, unknown short option leave it dangling */ + ctx->opt = NULL; + + for (; ctx->argc; ctx->argc--, ctx->argv++) { + arg = ctx->argv[0]; + if (*arg != '-' || !arg[1]) { + if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) + break; + ctx->out[ctx->cpidx++] = ctx->argv[0]; + continue; + } + + if (arg[1] != '-') { + ctx->opt = ++arg; + if (internal_help && *ctx->opt == 'h') { + return usage_with_options_internal(usagestr, options, 0, ctx); + } + switch (parse_short_opt(ctx, options)) { + case -1: + return parse_options_usage(usagestr, options, arg, 1); + case -2: + goto unknown; + case -3: + goto exclusive; + default: + break; + } + if (ctx->opt) + check_typos(arg, options); + while (ctx->opt) { + if (internal_help && *ctx->opt == 'h') + return usage_with_options_internal(usagestr, options, 0, ctx); + arg = ctx->opt; + switch (parse_short_opt(ctx, options)) { + case -1: + return parse_options_usage(usagestr, options, arg, 1); + case -2: + /* fake a short option thing to hide the fact that we may have + * started to parse aggregated stuff + * + * This is leaky, too bad. + */ + ctx->argv[0] = strdup(ctx->opt - 1); + *(char *)ctx->argv[0] = '-'; + goto unknown; + case -3: + goto exclusive; + default: + break; + } + } + continue; + } + + if (!arg[2]) { /* "--" */ + if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) { + ctx->argc--; + ctx->argv++; + } + break; + } + + arg += 2; + if (internal_help && !strcmp(arg, "help-all")) + return usage_with_options_internal(usagestr, options, 1, ctx); + if (internal_help && !strcmp(arg, "help")) + return usage_with_options_internal(usagestr, options, 0, ctx); + if (!strcmp(arg, "list-opts")) + return PARSE_OPT_LIST_OPTS; + if (!strcmp(arg, "list-cmds")) + return PARSE_OPT_LIST_SUBCMDS; + switch (parse_long_opt(ctx, arg, options)) { + case -1: + return parse_options_usage(usagestr, options, arg, 0); + case -2: + goto unknown; + case -3: + excl_short_opt = 0; + goto exclusive; + default: + break; + } + continue; +unknown: + if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN)) + return PARSE_OPT_UNKNOWN; + ctx->out[ctx->cpidx++] = ctx->argv[0]; + ctx->opt = NULL; + } + return PARSE_OPT_DONE; + +exclusive: + parse_options_usage(usagestr, options, arg, excl_short_opt); + if ((excl_short_opt && ctx->excl_opt->short_name) || + ctx->excl_opt->long_name == NULL) { + char opt = ctx->excl_opt->short_name; + parse_options_usage(NULL, options, &opt, 1); + } else { + parse_options_usage(NULL, options, ctx->excl_opt->long_name, 0); + } + return PARSE_OPT_HELP; +} + +static int parse_options_end(struct parse_opt_ctx_t *ctx) +{ + memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out)); + ctx->out[ctx->cpidx + ctx->argc] = NULL; + return ctx->cpidx + ctx->argc; +} + +int parse_options_subcommand(int argc, const char **argv, const struct option *options, + const char *const subcommands[], const char *usagestr[], int flags) +{ + struct parse_opt_ctx_t ctx; + + /* build usage string if it's not provided */ + if (subcommands && !usagestr[0]) { + char *buf = NULL; + + astrcatf(&buf, "%s %s [<options>] {", subcmd_config.exec_name, argv[0]); + + for (int i = 0; subcommands[i]; i++) { + if (i) + astrcat(&buf, "|"); + astrcat(&buf, subcommands[i]); + } + astrcat(&buf, "}"); + + usagestr[0] = buf; + } + + parse_options_start(&ctx, argc, argv, flags); + switch (parse_options_step(&ctx, options, usagestr)) { + case PARSE_OPT_HELP: + exit(129); + case PARSE_OPT_DONE: + break; + case PARSE_OPT_LIST_OPTS: + while (options->type != OPTION_END) { + if (options->long_name) + printf("--%s ", options->long_name); + options++; + } + putchar('\n'); + exit(130); + case PARSE_OPT_LIST_SUBCMDS: + if (subcommands) { + for (int i = 0; subcommands[i]; i++) + printf("%s ", subcommands[i]); + } + putchar('\n'); + exit(130); + default: /* PARSE_OPT_UNKNOWN */ + if (ctx.argv[0][1] == '-') + astrcatf(&error_buf, "unknown option `%s'", + ctx.argv[0] + 2); + else + astrcatf(&error_buf, "unknown switch `%c'", *ctx.opt); + usage_with_options(usagestr, options); + } + + return parse_options_end(&ctx); +} + +int parse_options(int argc, const char **argv, const struct option *options, + const char * const usagestr[], int flags) +{ + return parse_options_subcommand(argc, argv, options, NULL, + (const char **) usagestr, flags); +} + +#define USAGE_OPTS_WIDTH 24 +#define USAGE_GAP 2 + +static void print_option_help(const struct option *opts, int full) +{ + size_t pos; + int pad; + + if (opts->type == OPTION_GROUP) { + fputc('\n', stderr); + if (*opts->help) + fprintf(stderr, "%s\n", opts->help); + return; + } + if (!full && (opts->flags & PARSE_OPT_HIDDEN)) + return; + if (opts->flags & PARSE_OPT_DISABLED) + return; + + pos = fprintf(stderr, " "); + if (opts->short_name) + pos += fprintf(stderr, "-%c", opts->short_name); + else + pos += fprintf(stderr, " "); + + if (opts->long_name && opts->short_name) + pos += fprintf(stderr, ", "); + if (opts->long_name) + pos += fprintf(stderr, "--%s", opts->long_name); + + switch (opts->type) { + case OPTION_ARGUMENT: + break; + case OPTION_LONG: + case OPTION_U64: + case OPTION_INTEGER: + case OPTION_UINTEGER: + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=<n>]"); + else + pos += fprintf(stderr, "[<n>]"); + else + pos += fprintf(stderr, " <n>"); + break; + case OPTION_CALLBACK: + if (opts->flags & PARSE_OPT_NOARG) + break; + /* FALLTHROUGH */ + case OPTION_STRING: + if (opts->argh) { + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=<%s>]", opts->argh); + else + pos += fprintf(stderr, "[<%s>]", opts->argh); + else + pos += fprintf(stderr, " <%s>", opts->argh); + } else { + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=...]"); + else + pos += fprintf(stderr, "[...]"); + else + pos += fprintf(stderr, " ..."); + } + break; + default: /* OPTION_{BIT,BOOLEAN,SET_UINT,SET_PTR} */ + case OPTION_END: + case OPTION_GROUP: + case OPTION_BIT: + case OPTION_BOOLEAN: + case OPTION_INCR: + case OPTION_SET_UINT: + case OPTION_SET_PTR: + break; + } + + if (pos <= USAGE_OPTS_WIDTH) + pad = USAGE_OPTS_WIDTH - pos; + else { + fputc('\n', stderr); + pad = USAGE_OPTS_WIDTH; + } + fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); + if (opts->flags & PARSE_OPT_NOBUILD) + fprintf(stderr, "%*s(not built-in because %s)\n", + USAGE_OPTS_WIDTH + USAGE_GAP, "", + opts->build_opt); +} + +static int option__cmp(const void *va, const void *vb) +{ + const struct option *a = va, *b = vb; + int sa = tolower(a->short_name), sb = tolower(b->short_name), ret; + + if (sa == 0) + sa = 'z' + 1; + if (sb == 0) + sb = 'z' + 1; + + ret = sa - sb; + + if (ret == 0) { + const char *la = a->long_name ?: "", + *lb = b->long_name ?: ""; + ret = strcmp(la, lb); + } + + return ret; +} + +static struct option *options__order(const struct option *opts) +{ + int nr_opts = 0, len; + const struct option *o = opts; + struct option *ordered; + + for (o = opts; o->type != OPTION_END; o++) + ++nr_opts; + + len = sizeof(*o) * (nr_opts + 1); + ordered = malloc(len); + if (!ordered) + goto out; + memcpy(ordered, opts, len); + + qsort(ordered, nr_opts, sizeof(*o), option__cmp); +out: + return ordered; +} + +static bool option__in_argv(const struct option *opt, const struct parse_opt_ctx_t *ctx) +{ + int i; + + for (i = 1; i < ctx->argc; ++i) { + const char *arg = ctx->argv[i]; + + if (arg[0] != '-') { + if (arg[1] == '\0') { + if (arg[0] == opt->short_name) + return true; + continue; + } + + if (opt->long_name && strcmp(opt->long_name, arg) == 0) + return true; + + if (opt->help && strcasestr(opt->help, arg) != NULL) + return true; + + continue; + } + + if (arg[1] == opt->short_name || + (arg[1] == '-' && opt->long_name && strcmp(opt->long_name, arg + 2) == 0)) + return true; + } + + return false; +} + +static int usage_with_options_internal(const char * const *usagestr, + const struct option *opts, int full, + struct parse_opt_ctx_t *ctx) +{ + struct option *ordered; + + if (!usagestr) + return PARSE_OPT_HELP; + + setup_pager(); + + if (error_buf) { + fprintf(stderr, " Error: %s\n", error_buf); + zfree(&error_buf); + } + + fprintf(stderr, "\n Usage: %s\n", *usagestr++); + while (*usagestr && **usagestr) + fprintf(stderr, " or: %s\n", *usagestr++); + while (*usagestr) { + fprintf(stderr, "%s%s\n", + **usagestr ? " " : "", + *usagestr); + usagestr++; + } + + if (opts->type != OPTION_GROUP) + fputc('\n', stderr); + + ordered = options__order(opts); + if (ordered) + opts = ordered; + + for ( ; opts->type != OPTION_END; opts++) { + if (ctx && ctx->argc > 1 && !option__in_argv(opts, ctx)) + continue; + print_option_help(opts, full); + } + + fputc('\n', stderr); + + free(ordered); + + return PARSE_OPT_HELP; +} + +void usage_with_options(const char * const *usagestr, + const struct option *opts) +{ + usage_with_options_internal(usagestr, opts, 0, NULL); + exit(129); +} + +void usage_with_options_msg(const char * const *usagestr, + const struct option *opts, const char *fmt, ...) +{ + va_list ap; + char *tmp = error_buf; + + va_start(ap, fmt); + if (vasprintf(&error_buf, fmt, ap) == -1) + die("vasprintf failed"); + va_end(ap); + + free(tmp); + + usage_with_options_internal(usagestr, opts, 0, NULL); + exit(129); +} + +int parse_options_usage(const char * const *usagestr, + const struct option *opts, + const char *optstr, bool short_opt) +{ + if (!usagestr) + goto opt; + + fprintf(stderr, "\n Usage: %s\n", *usagestr++); + while (*usagestr && **usagestr) + fprintf(stderr, " or: %s\n", *usagestr++); + while (*usagestr) { + fprintf(stderr, "%s%s\n", + **usagestr ? " " : "", + *usagestr); + usagestr++; + } + fputc('\n', stderr); + +opt: + for ( ; opts->type != OPTION_END; opts++) { + if (short_opt) { + if (opts->short_name == *optstr) { + print_option_help(opts, 0); + break; + } + continue; + } + + if (opts->long_name == NULL) + continue; + + if (strstarts(opts->long_name, optstr)) + print_option_help(opts, 0); + if (strstarts("no-", optstr) && + strstarts(opts->long_name, optstr + 3)) + print_option_help(opts, 0); + } + + return PARSE_OPT_HELP; +} + + +int parse_opt_verbosity_cb(const struct option *opt, + const char *arg __maybe_unused, + int unset) +{ + int *target = opt->value; + + if (unset) + /* --no-quiet, --no-verbose */ + *target = 0; + else if (opt->short_name == 'v') { + if (*target >= 0) + (*target)++; + else + *target = 1; + } else { + if (*target <= 0) + (*target)--; + else + *target = -1; + } + return 0; +} + +static struct option * +find_option(struct option *opts, int shortopt, const char *longopt) +{ + for (; opts->type != OPTION_END; opts++) { + if ((shortopt && opts->short_name == shortopt) || + (opts->long_name && longopt && + !strcmp(opts->long_name, longopt))) + return opts; + } + return NULL; +} + +void set_option_flag(struct option *opts, int shortopt, const char *longopt, + int flag) +{ + struct option *opt = find_option(opts, shortopt, longopt); + + if (opt) + opt->flags |= flag; + return; +} + +void set_option_nobuild(struct option *opts, int shortopt, + const char *longopt, + const char *build_opt, + bool can_skip) +{ + struct option *opt = find_option(opts, shortopt, longopt); + + if (!opt) + return; + + opt->flags |= PARSE_OPT_NOBUILD; + opt->flags |= can_skip ? PARSE_OPT_CANSKIP : 0; + opt->build_opt = build_opt; +} diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h new file mode 100644 index 000000000..92fdbe151 --- /dev/null +++ b/tools/lib/subcmd/parse-options.h @@ -0,0 +1,242 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SUBCMD_PARSE_OPTIONS_H +#define __SUBCMD_PARSE_OPTIONS_H + +#include <linux/kernel.h> +#include <stdbool.h> +#include <stdint.h> + +#ifndef NORETURN +#define NORETURN __attribute__((__noreturn__)) +#endif + +enum parse_opt_type { + /* special types */ + OPTION_END, + OPTION_ARGUMENT, + OPTION_GROUP, + /* options with no arguments */ + OPTION_BIT, + OPTION_BOOLEAN, + OPTION_INCR, + OPTION_SET_UINT, + OPTION_SET_PTR, + /* options with arguments (usually) */ + OPTION_STRING, + OPTION_INTEGER, + OPTION_LONG, + OPTION_CALLBACK, + OPTION_U64, + OPTION_UINTEGER, +}; + +enum parse_opt_flags { + PARSE_OPT_KEEP_DASHDASH = 1, + PARSE_OPT_STOP_AT_NON_OPTION = 2, + PARSE_OPT_KEEP_ARGV0 = 4, + PARSE_OPT_KEEP_UNKNOWN = 8, + PARSE_OPT_NO_INTERNAL_HELP = 16, +}; + +enum parse_opt_option_flags { + PARSE_OPT_OPTARG = 1, + PARSE_OPT_NOARG = 2, + PARSE_OPT_NONEG = 4, + PARSE_OPT_HIDDEN = 8, + PARSE_OPT_LASTARG_DEFAULT = 16, + PARSE_OPT_DISABLED = 32, + PARSE_OPT_EXCLUSIVE = 64, + PARSE_OPT_NOEMPTY = 128, + PARSE_OPT_NOBUILD = 256, + PARSE_OPT_CANSKIP = 512, +}; + +struct option; +typedef int parse_opt_cb(const struct option *, const char *arg, int unset); + +/* + * `type`:: + * holds the type of the option, you must have an OPTION_END last in your + * array. + * + * `short_name`:: + * the character to use as a short option name, '\0' if none. + * + * `long_name`:: + * the long option name, without the leading dashes, NULL if none. + * + * `value`:: + * stores pointers to the values to be filled. + * + * `argh`:: + * token to explain the kind of argument this option wants. Keep it + * homogenous across the repository. + * + * `help`:: + * the short help associated to what the option does. + * Must never be NULL (except for OPTION_END). + * OPTION_GROUP uses this pointer to store the group header. + * + * `flags`:: + * mask of parse_opt_option_flags. + * PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs) + * PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs + * PARSE_OPT_NONEG: says that this option cannot be negated + * PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in + * the long one. + * + * `callback`:: + * pointer to the callback to use for OPTION_CALLBACK. + * + * `defval`:: + * default value to fill (*->value) with for PARSE_OPT_OPTARG. + * OPTION_{BIT,SET_UINT,SET_PTR} store the {mask,integer,pointer} to put in + * the value when met. + * CALLBACKS can use it like they want. + * + * `set`:: + * whether an option was set by the user + */ +struct option { + enum parse_opt_type type; + int short_name; + const char *long_name; + void *value; + const char *argh; + const char *help; + const char *build_opt; + + int flags; + parse_opt_cb *callback; + intptr_t defval; + bool *set; + void *data; + const struct option *parent; +}; + +#define check_vtype(v, type) ( BUILD_BUG_ON_ZERO(!__builtin_types_compatible_p(typeof(v), type)) + v ) + +#define OPT_END() { .type = OPTION_END } +#define OPT_PARENT(p) { .type = OPTION_END, .parent = (p) } +#define OPT_ARGUMENT(l, h) { .type = OPTION_ARGUMENT, .long_name = (l), .help = (h) } +#define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } +#define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) } +#define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h) } +#define OPT_BOOLEAN_FLAG(s, l, v, h, f) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h), .flags = (f) } +#define OPT_BOOLEAN_SET(s, l, v, os, h) \ + { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), \ + .value = check_vtype(v, bool *), .help = (h), \ + .set = check_vtype(os, bool *)} +#define OPT_INCR(s, l, v, h) { .type = OPTION_INCR, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) } +#define OPT_SET_UINT(s, l, v, h, i) { .type = OPTION_SET_UINT, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h), .defval = (i) } +#define OPT_SET_PTR(s, l, v, h, p) { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) } +#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) } +#define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) } +#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) } +#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) } +#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) } +#define OPT_STRING_OPTARG(s, l, v, a, h, d) \ + { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ + .value = check_vtype(v, const char **), .argh =(a), .help = (h), \ + .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) } +#define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \ + { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ + .value = check_vtype(v, const char **), .argh = (a), .help = (h), \ + .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \ + .set = check_vtype(os, bool *)} +#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} +#define OPT_DATE(s, l, v, h) \ + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } +#define OPT_CALLBACK(s, l, v, a, h, f) \ + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f) } +#define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \ + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG } +#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } +#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \ + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\ + .value = (v), .arg = (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\ + .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG} +#define OPT_CALLBACK_OPTARG(s, l, v, d, a, h, f) \ + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), \ + .value = (v), .argh = (a), .help = (h), .callback = (f), \ + .flags = PARSE_OPT_OPTARG, .data = (d) } + +/* parse_options() will filter out the processed options and leave the + * non-option argments in argv[]. + * Returns the number of arguments left in argv[]. + * + * NOTE: parse_options() and parse_options_subcommand() may call exit() in the + * case of an error (or for 'special' options like --list-cmds or --list-opts). + */ +extern int parse_options(int argc, const char **argv, + const struct option *options, + const char * const usagestr[], int flags); + +extern int parse_options_subcommand(int argc, const char **argv, + const struct option *options, + const char *const subcommands[], + const char *usagestr[], int flags); + +extern NORETURN void usage_with_options(const char * const *usagestr, + const struct option *options); +extern NORETURN __attribute__((format(printf,3,4))) +void usage_with_options_msg(const char * const *usagestr, + const struct option *options, + const char *fmt, ...); + +/*----- incremantal advanced APIs -----*/ + +enum { + PARSE_OPT_HELP = -1, + PARSE_OPT_DONE, + PARSE_OPT_LIST_OPTS, + PARSE_OPT_LIST_SUBCMDS, + PARSE_OPT_UNKNOWN, +}; + +/* + * It's okay for the caller to consume argv/argc in the usual way. + * Other fields of that structure are private to parse-options and should not + * be modified in any way. + */ +struct parse_opt_ctx_t { + const char **argv; + const char **out; + int argc, cpidx; + const char *opt; + const struct option *excl_opt; + int flags; +}; + +extern int parse_options_usage(const char * const *usagestr, + const struct option *opts, + const char *optstr, + bool short_opt); + + +/*----- some often used options -----*/ +extern int parse_opt_abbrev_cb(const struct option *, const char *, int); +extern int parse_opt_approxidate_cb(const struct option *, const char *, int); +extern int parse_opt_verbosity_cb(const struct option *, const char *, int); + +#define OPT__VERBOSE(var) OPT_BOOLEAN('v', "verbose", (var), "be verbose") +#define OPT__QUIET(var) OPT_BOOLEAN('q', "quiet", (var), "be quiet") +#define OPT__VERBOSITY(var) \ + { OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \ + PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \ + { OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \ + PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 } +#define OPT__DRY_RUN(var) OPT_BOOLEAN('n', "dry-run", (var), "dry run") +#define OPT__ABBREV(var) \ + { OPTION_CALLBACK, 0, "abbrev", (var), "n", \ + "use <n> digits to display SHA-1s", \ + PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 } + +extern const char *parse_options_fix_filename(const char *prefix, const char *file); + +void set_option_flag(struct option *opts, int sopt, const char *lopt, int flag); +void set_option_nobuild(struct option *opts, int shortopt, const char *longopt, + const char *build_opt, bool can_skip); + +#endif /* __SUBCMD_PARSE_OPTIONS_H */ diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c new file mode 100644 index 000000000..5cdac2162 --- /dev/null +++ b/tools/lib/subcmd/run-command.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> +#include <linux/string.h> +#include <errno.h> +#include <sys/wait.h> +#include "subcmd-util.h" +#include "run-command.h" +#include "exec-cmd.h" + +#define STRERR_BUFSIZE 128 + +static inline void close_pair(int fd[2]) +{ + close(fd[0]); + close(fd[1]); +} + +static inline void dup_devnull(int to) +{ + int fd = open("/dev/null", O_RDWR); + dup2(fd, to); + close(fd); +} + +int start_command(struct child_process *cmd) +{ + int need_in, need_out, need_err; + int fdin[2], fdout[2], fderr[2]; + char sbuf[STRERR_BUFSIZE]; + + /* + * In case of errors we must keep the promise to close FDs + * that have been passed in via ->in and ->out. + */ + + need_in = !cmd->no_stdin && cmd->in < 0; + if (need_in) { + if (pipe(fdin) < 0) { + if (cmd->out > 0) + close(cmd->out); + return -ERR_RUN_COMMAND_PIPE; + } + cmd->in = fdin[1]; + } + + need_out = !cmd->no_stdout + && !cmd->stdout_to_stderr + && cmd->out < 0; + if (need_out) { + if (pipe(fdout) < 0) { + if (need_in) + close_pair(fdin); + else if (cmd->in) + close(cmd->in); + return -ERR_RUN_COMMAND_PIPE; + } + cmd->out = fdout[0]; + } + + need_err = !cmd->no_stderr && cmd->err < 0; + if (need_err) { + if (pipe(fderr) < 0) { + if (need_in) + close_pair(fdin); + else if (cmd->in) + close(cmd->in); + if (need_out) + close_pair(fdout); + else if (cmd->out) + close(cmd->out); + return -ERR_RUN_COMMAND_PIPE; + } + cmd->err = fderr[0]; + } + + fflush(NULL); + cmd->pid = fork(); + if (!cmd->pid) { + if (cmd->no_stdin) + dup_devnull(0); + else if (need_in) { + dup2(fdin[0], 0); + close_pair(fdin); + } else if (cmd->in) { + dup2(cmd->in, 0); + close(cmd->in); + } + + if (cmd->no_stderr) + dup_devnull(2); + else if (need_err) { + dup2(fderr[1], 2); + close_pair(fderr); + } + + if (cmd->no_stdout) + dup_devnull(1); + else if (cmd->stdout_to_stderr) + dup2(2, 1); + else if (need_out) { + dup2(fdout[1], 1); + close_pair(fdout); + } else if (cmd->out > 1) { + dup2(cmd->out, 1); + close(cmd->out); + } + + if (cmd->dir && chdir(cmd->dir)) + die("exec %s: cd to %s failed (%s)", cmd->argv[0], + cmd->dir, str_error_r(errno, sbuf, sizeof(sbuf))); + if (cmd->env) { + for (; *cmd->env; cmd->env++) { + if (strchr(*cmd->env, '=')) + putenv((char*)*cmd->env); + else + unsetenv(*cmd->env); + } + } + if (cmd->preexec_cb) + cmd->preexec_cb(); + if (cmd->exec_cmd) { + execv_cmd(cmd->argv); + } else { + execvp(cmd->argv[0], (char *const*) cmd->argv); + } + exit(127); + } + + if (cmd->pid < 0) { + int err = errno; + if (need_in) + close_pair(fdin); + else if (cmd->in) + close(cmd->in); + if (need_out) + close_pair(fdout); + else if (cmd->out) + close(cmd->out); + if (need_err) + close_pair(fderr); + return err == ENOENT ? + -ERR_RUN_COMMAND_EXEC : + -ERR_RUN_COMMAND_FORK; + } + + if (need_in) + close(fdin[0]); + else if (cmd->in) + close(cmd->in); + + if (need_out) + close(fdout[1]); + else if (cmd->out) + close(cmd->out); + + if (need_err) + close(fderr[1]); + + return 0; +} + +static int wait_or_whine(pid_t pid) +{ + char sbuf[STRERR_BUFSIZE]; + + for (;;) { + int status, code; + pid_t waiting = waitpid(pid, &status, 0); + + if (waiting < 0) { + if (errno == EINTR) + continue; + fprintf(stderr, " Error: waitpid failed (%s)", + str_error_r(errno, sbuf, sizeof(sbuf))); + return -ERR_RUN_COMMAND_WAITPID; + } + if (waiting != pid) + return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; + if (WIFSIGNALED(status)) + return -ERR_RUN_COMMAND_WAITPID_SIGNAL; + + if (!WIFEXITED(status)) + return -ERR_RUN_COMMAND_WAITPID_NOEXIT; + code = WEXITSTATUS(status); + switch (code) { + case 127: + return -ERR_RUN_COMMAND_EXEC; + case 0: + return 0; + default: + return -code; + } + } +} + +int finish_command(struct child_process *cmd) +{ + return wait_or_whine(cmd->pid); +} + +int run_command(struct child_process *cmd) +{ + int code = start_command(cmd); + if (code) + return code; + return finish_command(cmd); +} + +static void prepare_run_command_v_opt(struct child_process *cmd, + const char **argv, + int opt) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->argv = argv; + cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0; + cmd->exec_cmd = opt & RUN_EXEC_CMD ? 1 : 0; + cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0; +} + +int run_command_v_opt(const char **argv, int opt) +{ + struct child_process cmd; + prepare_run_command_v_opt(&cmd, argv, opt); + return run_command(&cmd); +} diff --git a/tools/lib/subcmd/run-command.h b/tools/lib/subcmd/run-command.h new file mode 100644 index 000000000..17d969c6a --- /dev/null +++ b/tools/lib/subcmd/run-command.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SUBCMD_RUN_COMMAND_H +#define __SUBCMD_RUN_COMMAND_H + +#include <unistd.h> + +enum { + ERR_RUN_COMMAND_FORK = 10000, + ERR_RUN_COMMAND_EXEC, + ERR_RUN_COMMAND_PIPE, + ERR_RUN_COMMAND_WAITPID, + ERR_RUN_COMMAND_WAITPID_WRONG_PID, + ERR_RUN_COMMAND_WAITPID_SIGNAL, + ERR_RUN_COMMAND_WAITPID_NOEXIT, +}; +#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK) + +struct child_process { + const char **argv; + pid_t pid; + /* + * Using .in, .out, .err: + * - Specify 0 for no redirections (child inherits stdin, stdout, + * stderr from parent). + * - Specify -1 to have a pipe allocated as follows: + * .in: returns the writable pipe end; parent writes to it, + * the readable pipe end becomes child's stdin + * .out, .err: returns the readable pipe end; parent reads from + * it, the writable pipe end becomes child's stdout/stderr + * The caller of start_command() must close the returned FDs + * after it has completed reading from/writing to it! + * - Specify > 0 to set a channel to a particular FD as follows: + * .in: a readable FD, becomes child's stdin + * .out: a writable FD, becomes child's stdout/stderr + * .err > 0 not supported + * The specified FD is closed by start_command(), even in case + * of errors! + */ + int in; + int out; + int err; + const char *dir; + const char *const *env; + unsigned no_stdin:1; + unsigned no_stdout:1; + unsigned no_stderr:1; + unsigned exec_cmd:1; /* if this is to be external sub-command */ + unsigned stdout_to_stderr:1; + void (*preexec_cb)(void); +}; + +int start_command(struct child_process *); +int finish_command(struct child_process *); +int run_command(struct child_process *); + +#define RUN_COMMAND_NO_STDIN 1 +#define RUN_EXEC_CMD 2 /*If this is to be external sub-command */ +#define RUN_COMMAND_STDOUT_TO_STDERR 4 +int run_command_v_opt(const char **argv, int opt); + +#endif /* __SUBCMD_RUN_COMMAND_H */ diff --git a/tools/lib/subcmd/sigchain.c b/tools/lib/subcmd/sigchain.c new file mode 100644 index 000000000..f0fe3dbef --- /dev/null +++ b/tools/lib/subcmd/sigchain.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <signal.h> +#include "subcmd-util.h" +#include "sigchain.h" + +#define SIGCHAIN_MAX_SIGNALS 32 + +struct sigchain_signal { + sigchain_fun *old; + int n; + int alloc; +}; +static struct sigchain_signal signals[SIGCHAIN_MAX_SIGNALS]; + +static void check_signum(int sig) +{ + if (sig < 1 || sig >= SIGCHAIN_MAX_SIGNALS) + die("BUG: signal out of range: %d", sig); +} + +static int sigchain_push(int sig, sigchain_fun f) +{ + struct sigchain_signal *s = signals + sig; + check_signum(sig); + + ALLOC_GROW(s->old, s->n + 1, s->alloc); + s->old[s->n] = signal(sig, f); + if (s->old[s->n] == SIG_ERR) + return -1; + s->n++; + return 0; +} + +int sigchain_pop(int sig) +{ + struct sigchain_signal *s = signals + sig; + check_signum(sig); + if (s->n < 1) + return 0; + + if (signal(sig, s->old[s->n - 1]) == SIG_ERR) + return -1; + s->n--; + return 0; +} + +void sigchain_push_common(sigchain_fun f) +{ + sigchain_push(SIGINT, f); + sigchain_push(SIGHUP, f); + sigchain_push(SIGTERM, f); + sigchain_push(SIGQUIT, f); + sigchain_push(SIGPIPE, f); +} diff --git a/tools/lib/subcmd/sigchain.h b/tools/lib/subcmd/sigchain.h new file mode 100644 index 000000000..1ec663af4 --- /dev/null +++ b/tools/lib/subcmd/sigchain.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SUBCMD_SIGCHAIN_H +#define __SUBCMD_SIGCHAIN_H + +typedef void (*sigchain_fun)(int); + +int sigchain_pop(int sig); + +void sigchain_push_common(sigchain_fun f); + +#endif /* __SUBCMD_SIGCHAIN_H */ diff --git a/tools/lib/subcmd/subcmd-config.c b/tools/lib/subcmd/subcmd-config.c new file mode 100644 index 000000000..84a7cf6c7 --- /dev/null +++ b/tools/lib/subcmd/subcmd-config.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "subcmd-config.h" + +#define UNDEFINED "SUBCMD_HAS_NOT_BEEN_INITIALIZED" + +struct subcmd_config subcmd_config = { + .exec_name = UNDEFINED, + .prefix = UNDEFINED, + .exec_path = UNDEFINED, + .exec_path_env = UNDEFINED, + .pager_env = UNDEFINED, +}; diff --git a/tools/lib/subcmd/subcmd-config.h b/tools/lib/subcmd/subcmd-config.h new file mode 100644 index 000000000..9024dc17d --- /dev/null +++ b/tools/lib/subcmd/subcmd-config.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_SUBCMD_CONFIG_H +#define __PERF_SUBCMD_CONFIG_H + +struct subcmd_config { + const char *exec_name; + const char *prefix; + const char *exec_path; + const char *exec_path_env; + const char *pager_env; +}; + +extern struct subcmd_config subcmd_config; + +#endif /* __PERF_SUBCMD_CONFIG_H */ diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h new file mode 100644 index 000000000..b2aec04fc --- /dev/null +++ b/tools/lib/subcmd/subcmd-util.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SUBCMD_UTIL_H +#define __SUBCMD_UTIL_H + +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> + +#define NORETURN __attribute__((__noreturn__)) + +static inline void report(const char *prefix, const char *err, va_list params) +{ + char msg[1024]; + vsnprintf(msg, sizeof(msg), err, params); + fprintf(stderr, " %s%s\n", prefix, msg); +} + +static NORETURN inline void die(const char *err, ...) +{ + va_list params; + + va_start(params, err); + report(" Fatal: ", err, params); + exit(128); + va_end(params); +} + +#define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) + +#define alloc_nr(x) (((x)+16)*3/2) + +/* + * Realloc the buffer pointed at by variable 'x' so that it can hold + * at least 'nr' entries; the number of entries currently allocated + * is 'alloc', using the standard growing factor alloc_nr() macro. + * + * DO NOT USE any expression with side-effect for 'x' or 'alloc'. + */ +#define ALLOC_GROW(x, nr, alloc) \ + do { \ + if ((nr) > alloc) { \ + if (alloc_nr(alloc) < (nr)) \ + alloc = (nr); \ + else \ + alloc = alloc_nr(alloc); \ + x = xrealloc((x), alloc * sizeof(*(x))); \ + } \ + } while(0) + +static inline void *xrealloc(void *ptr, size_t size) +{ + void *ret = realloc(ptr, size); + if (!ret) + die("Out of memory, realloc failed"); + return ret; +} + +#define astrcatf(out, fmt, ...) \ +({ \ + char *tmp = *(out); \ + if (asprintf((out), "%s" fmt, tmp ?: "", ## __VA_ARGS__) == -1) \ + die("asprintf failed"); \ + free(tmp); \ +}) + +static inline void astrcat(char **out, const char *add) +{ + char *tmp = *out; + + if (asprintf(out, "%s%s", tmp ?: "", add) == -1) + die("asprintf failed"); + + free(tmp); +} + +#endif /* __SUBCMD_UTIL_H */ diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c new file mode 100644 index 000000000..96d830545 --- /dev/null +++ b/tools/lib/symbol/kallsyms.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <ctype.h> +#include "symbol/kallsyms.h" +#include <stdio.h> +#include <stdlib.h> + +u8 kallsyms2elf_type(char type) +{ + type = tolower(type); + return (type == 't' || type == 'w') ? STT_FUNC : STT_OBJECT; +} + +bool kallsyms__is_function(char symbol_type) +{ + symbol_type = toupper(symbol_type); + return symbol_type == 'T' || symbol_type == 'W'; +} + +int kallsyms__parse(const char *filename, void *arg, + int (*process_symbol)(void *arg, const char *name, + char type, u64 start)) +{ + char *line = NULL; + size_t n; + int err = -1; + FILE *file = fopen(filename, "r"); + + if (file == NULL) + goto out_failure; + + err = 0; + + while (!feof(file)) { + u64 start; + int line_len, len; + char symbol_type; + char *symbol_name; + + line_len = getline(&line, &n, file); + if (line_len < 0 || !line) + break; + + line[--line_len] = '\0'; /* \n */ + + len = hex2u64(line, &start); + + /* Skip the line if we failed to parse the address. */ + if (!len) + continue; + + len++; + if (len + 2 >= line_len) + continue; + + symbol_type = line[len]; + len += 2; + symbol_name = line + len; + len = line_len - len; + + if (len >= KSYM_NAME_LEN) { + err = -1; + break; + } + + err = process_symbol(arg, symbol_name, symbol_type, start); + if (err) + break; + } + + free(line); + fclose(file); + return err; + +out_failure: + return -1; +} diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h new file mode 100644 index 000000000..72ab98704 --- /dev/null +++ b/tools/lib/symbol/kallsyms.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOLS_KALLSYMS_H_ +#define __TOOLS_KALLSYMS_H_ 1 + +#include <elf.h> +#include <linux/ctype.h> +#include <linux/types.h> + +#ifndef KSYM_NAME_LEN +#define KSYM_NAME_LEN 256 +#endif + +static inline u8 kallsyms2elf_binding(char type) +{ + if (type == 'W') + return STB_WEAK; + + return isupper(type) ? STB_GLOBAL : STB_LOCAL; +} + +u8 kallsyms2elf_type(char type); + +bool kallsyms__is_function(char symbol_type); + +int kallsyms__parse(const char *filename, void *arg, + int (*process_symbol)(void *arg, const char *name, + char type, u64 start)); + +#endif /* __TOOLS_KALLSYMS_H_ */ diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore new file mode 100644 index 000000000..9e9f25fb1 --- /dev/null +++ b/tools/lib/traceevent/.gitignore @@ -0,0 +1,3 @@ +TRACEEVENT-CFLAGS +libtraceevent-dynamic-list +libtraceevent.so.* diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build new file mode 100644 index 000000000..c681d0575 --- /dev/null +++ b/tools/lib/traceevent/Build @@ -0,0 +1,17 @@ +libtraceevent-y += event-parse.o +libtraceevent-y += event-plugin.o +libtraceevent-y += trace-seq.o +libtraceevent-y += parse-filter.o +libtraceevent-y += parse-utils.o +libtraceevent-y += kbuffer-parse.o + +plugin_jbd2-y += plugin_jbd2.o +plugin_hrtimer-y += plugin_hrtimer.o +plugin_kmem-y += plugin_kmem.o +plugin_kvm-y += plugin_kvm.o +plugin_mac80211-y += plugin_mac80211.o +plugin_sched_switch-y += plugin_sched_switch.o +plugin_function-y += plugin_function.o +plugin_xen-y += plugin_xen.o +plugin_scsi-y += plugin_scsi.o +plugin_cfg80211-y += plugin_cfg80211.o diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile new file mode 100644 index 000000000..b7f7e4e54 --- /dev/null +++ b/tools/lib/traceevent/Makefile @@ -0,0 +1,301 @@ +# SPDX-License-Identifier: GPL-2.0 +# trace-cmd version +EP_VERSION = 1 +EP_PATCHLEVEL = 1 +EP_EXTRAVERSION = 0 + +# file format version +FILE_VERSION = 6 + +MAKEFLAGS += --no-print-directory + + +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,NM,$(CROSS_COMPILE)nm) + +EXT = -std=gnu99 +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= /usr/local +libdir = $(prefix)/$(libdir_relative) +man_dir = $(prefix)/share/man +man_dir_SQ = '$(subst ','\'',$(man_dir))' + +export man_dir man_dir_SQ INSTALL +export DESTDIR DESTDIR_SQ + +set_plugin_dir := 1 + +# Set plugin_dir to preffered global plugin location +# If we install under $HOME directory we go under +# $(HOME)/.local/lib/traceevent/plugins +# +# We dont set PLUGIN_DIR in case we install under $HOME +# directory, because by default the code looks under: +# $(HOME)/.local/lib/traceevent/plugins by default. +# +ifeq ($(plugin_dir),) +ifeq ($(prefix),$(HOME)) +override plugin_dir = $(HOME)/.local/lib/traceevent/plugins +set_plugin_dir := 0 +else +override plugin_dir = $(libdir)/traceevent/plugins +endif +endif + +ifeq ($(set_plugin_dir),1) +PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)" +PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' +endif + +include ../../scripts/Makefile.include + +# copy a bit from Linux kbuild + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +export prefix libdir src obj + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) +libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) +plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) + +CONFIG_INCLUDES = +CONFIG_LIBS = +CONFIG_FLAGS = + +VERSION = $(EP_VERSION) +PATCHLEVEL = $(EP_PATCHLEVEL) +EXTRAVERSION = $(EP_EXTRAVERSION) + +OBJ = $@ +N = + +EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) + +LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION) +LIB_INSTALL = libtraceevent.a libtraceevent.so* +LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL)) + +INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) + +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif + +# Append required CFLAGS +override CFLAGS += -fPIC +override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) +override CFLAGS += $(udis86-flags) -D_GNU_SOURCE + +ifeq ($(VERBOSE),1) + Q = +else + Q = @ +endif + +# Disable command line variables (CFLAGS) override from top +# level Makefile (perf), otherwise build Makefile will get +# the same command line setup. +MAKEOVERRIDES= + +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj + +PLUGINS = plugin_jbd2.so +PLUGINS += plugin_hrtimer.so +PLUGINS += plugin_kmem.so +PLUGINS += plugin_kvm.so +PLUGINS += plugin_mac80211.so +PLUGINS += plugin_sched_switch.so +PLUGINS += plugin_function.so +PLUGINS += plugin_xen.so +PLUGINS += plugin_scsi.so +PLUGINS += plugin_cfg80211.so + +PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) +PLUGINS_IN := $(PLUGINS:.so=-in.o) + +TE_IN := $(OUTPUT)libtraceevent-in.o +LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) +DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list + +CMD_TARGETS = $(LIB_TARGET) $(PLUGINS) $(DYNAMIC_LIST_FILE) + +TARGETS = $(CMD_TARGETS) + +all: all_cmd + +all_cmd: $(CMD_TARGETS) + +$(TE_IN): force + $(Q)$(MAKE) $(build)=libtraceevent + +$(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN) + $(QUIET_LINK)$(CC) --shared $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@ + @ln -sf $(@F) $(OUTPUT)libtraceevent.so + @ln -sf $(@F) $(OUTPUT)libtraceevent.so.$(EP_VERSION) + +$(OUTPUT)libtraceevent.a: $(TE_IN) + $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ + +$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS) + $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@) + +plugins: $(PLUGINS) + +__plugin_obj = $(notdir $@) + plugin_obj = $(__plugin_obj:-in.o=) + +$(PLUGINS_IN): force + $(Q)$(MAKE) $(build)=$(plugin_obj) + +$(OUTPUT)%.so: $(OUTPUT)%-in.o + $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $^ + +define make_version.h + (echo '/* This file is automatically generated. Do not modify. */'; \ + echo \#define VERSION_CODE $(shell \ + expr $(VERSION) \* 256 + $(PATCHLEVEL)); \ + echo '#define EXTRAVERSION ' $(EXTRAVERSION); \ + echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \ + echo '#define FILE_VERSION '$(FILE_VERSION); \ + ) > $1 +endef + +define update_version.h + ($(call make_version.h, $@.tmp); \ + if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + rm -f $@.tmp; \ + else \ + echo ' UPDATE $@'; \ + mv -f $@.tmp $@; \ + fi); +endef + +ep_version.h: force + $(Q)$(N)$(call update_version.h) + +VERSION_FILES = ep_version.h + +define update_dir + (echo $1 > $@.tmp; \ + if [ -r $@ ] && cmp -s $@ $@.tmp; then \ + rm -f $@.tmp; \ + else \ + echo ' UPDATE $@'; \ + mv -f $@.tmp $@; \ + fi); +endef + +tags: force + $(RM) tags + find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ + --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/' + +TAGS: force + $(RM) TAGS + find . -name '*.[ch]' | xargs etags \ + --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/' + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + $(call do_install_mkdir,$2); \ + $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' +endef + +define do_install_plugins + for plugin in $1; do \ + $(call do_install,$$plugin,$(plugin_dir_SQ)); \ + done +endef + +define do_generate_dynamic_list_file + symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \ + xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\ + if [ "$$symbol_type" = "U W" ];then \ + (echo '{'; \ + $(NM) -u -D $1 | awk 'NF>1 {sub("@.*", "", $$2); print "\t"$$2";"}' | sort -u;\ + echo '};'; \ + ) > $2; \ + else \ + (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\ + fi +endef + +install_lib: all_cmd install_plugins + $(call QUIET_INSTALL, $(LIB_TARGET)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ) + +install_plugins: $(PLUGINS) + $(call QUIET_INSTALL, trace_plugins) \ + $(call do_install_plugins, $(PLUGINS)) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,event-parse.h,$(prefix)/include/traceevent,644); \ + $(call do_install,event-utils.h,$(prefix)/include/traceevent,644); \ + $(call do_install,kbuffer.h,$(prefix)/include/traceevent,644) + +install: install_lib + +clean: + $(call QUIET_CLEAN, libtraceevent) \ + $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \ + $(RM) TRACEEVENT-CFLAGS tags TAGS + +PHONY += force plugins +force: + +# Declare the contents of the .PHONY variable as phony. We keep that +# information in a variable so we can use it in if_changed and friends. +.PHONY: $(PHONY) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c new file mode 100644 index 000000000..c0fcc8af2 --- /dev/null +++ b/tools/lib/traceevent/event-parse.c @@ -0,0 +1,6892 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * + * The parts for function graph printing was taken and modified from the + * Linux Kernel that were written by + * - Copyright (C) 2009 Frederic Weisbecker, + * Frederic Weisbecker gave his permission to relicense the code to + * the Lesser General Public License. + */ +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <ctype.h> +#include <errno.h> +#include <stdint.h> +#include <limits.h> +#include <linux/string.h> +#include <linux/time64.h> + +#include <netinet/in.h> +#include "event-parse.h" +#include "event-utils.h" + +static const char *input_buf; +static unsigned long long input_buf_ptr; +static unsigned long long input_buf_siz; + +static int is_flag_field; +static int is_symbolic_field; + +static int show_warning = 1; + +#define do_warning(fmt, ...) \ + do { \ + if (show_warning) \ + warning(fmt, ##__VA_ARGS__); \ + } while (0) + +#define do_warning_event(event, fmt, ...) \ + do { \ + if (!show_warning) \ + continue; \ + \ + if (event) \ + warning("[%s:%s] " fmt, event->system, \ + event->name, ##__VA_ARGS__); \ + else \ + warning(fmt, ##__VA_ARGS__); \ + } while (0) + +static void init_input_buf(const char *buf, unsigned long long size) +{ + input_buf = buf; + input_buf_siz = size; + input_buf_ptr = 0; +} + +const char *tep_get_input_buf(void) +{ + return input_buf; +} + +unsigned long long tep_get_input_buf_ptr(void) +{ + return input_buf_ptr; +} + +struct event_handler { + struct event_handler *next; + int id; + const char *sys_name; + const char *event_name; + tep_event_handler_func func; + void *context; +}; + +struct func_params { + struct func_params *next; + enum tep_func_arg_type type; +}; + +struct tep_function_handler { + struct tep_function_handler *next; + enum tep_func_arg_type ret_type; + char *name; + tep_func_handler func; + struct func_params *params; + int nr_args; +}; + +static unsigned long long +process_defined_func(struct trace_seq *s, void *data, int size, + struct event_format *event, struct print_arg *arg); + +static void free_func_handle(struct tep_function_handler *func); + +/** + * tep_buffer_init - init buffer for parsing + * @buf: buffer to parse + * @size: the size of the buffer + * + * For use with tep_read_token(), this initializes the internal + * buffer that tep_read_token() will parse. + */ +void tep_buffer_init(const char *buf, unsigned long long size) +{ + init_input_buf(buf, size); +} + +void breakpoint(void) +{ + static int x; + x++; +} + +struct print_arg *alloc_arg(void) +{ + return calloc(1, sizeof(struct print_arg)); +} + +struct cmdline { + char *comm; + int pid; +}; + +static int cmdline_cmp(const void *a, const void *b) +{ + const struct cmdline *ca = a; + const struct cmdline *cb = b; + + if (ca->pid < cb->pid) + return -1; + if (ca->pid > cb->pid) + return 1; + + return 0; +} + +struct cmdline_list { + struct cmdline_list *next; + char *comm; + int pid; +}; + +static int cmdline_init(struct tep_handle *pevent) +{ + struct cmdline_list *cmdlist = pevent->cmdlist; + struct cmdline_list *item; + struct cmdline *cmdlines; + int i; + + cmdlines = malloc(sizeof(*cmdlines) * pevent->cmdline_count); + if (!cmdlines) + return -1; + + i = 0; + while (cmdlist) { + cmdlines[i].pid = cmdlist->pid; + cmdlines[i].comm = cmdlist->comm; + i++; + item = cmdlist; + cmdlist = cmdlist->next; + free(item); + } + + qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp); + + pevent->cmdlines = cmdlines; + pevent->cmdlist = NULL; + + return 0; +} + +static const char *find_cmdline(struct tep_handle *pevent, int pid) +{ + const struct cmdline *comm; + struct cmdline key; + + if (!pid) + return "<idle>"; + + if (!pevent->cmdlines && cmdline_init(pevent)) + return "<not enough memory for cmdlines!>"; + + key.pid = pid; + + comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count, + sizeof(*pevent->cmdlines), cmdline_cmp); + + if (comm) + return comm->comm; + return "<...>"; +} + +/** + * tep_pid_is_registered - return if a pid has a cmdline registered + * @pevent: handle for the pevent + * @pid: The pid to check if it has a cmdline registered with. + * + * Returns 1 if the pid has a cmdline mapped to it + * 0 otherwise. + */ +int tep_pid_is_registered(struct tep_handle *pevent, int pid) +{ + const struct cmdline *comm; + struct cmdline key; + + if (!pid) + return 1; + + if (!pevent->cmdlines && cmdline_init(pevent)) + return 0; + + key.pid = pid; + + comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count, + sizeof(*pevent->cmdlines), cmdline_cmp); + + if (comm) + return 1; + return 0; +} + +/* + * If the command lines have been converted to an array, then + * we must add this pid. This is much slower than when cmdlines + * are added before the array is initialized. + */ +static int add_new_comm(struct tep_handle *pevent, const char *comm, int pid) +{ + struct cmdline *cmdlines = pevent->cmdlines; + const struct cmdline *cmdline; + struct cmdline key; + + if (!pid) + return 0; + + /* avoid duplicates */ + key.pid = pid; + + cmdline = bsearch(&key, pevent->cmdlines, pevent->cmdline_count, + sizeof(*pevent->cmdlines), cmdline_cmp); + if (cmdline) { + errno = EEXIST; + return -1; + } + + cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (pevent->cmdline_count + 1)); + if (!cmdlines) { + errno = ENOMEM; + return -1; + } + pevent->cmdlines = cmdlines; + + cmdlines[pevent->cmdline_count].comm = strdup(comm); + if (!cmdlines[pevent->cmdline_count].comm) { + errno = ENOMEM; + return -1; + } + + cmdlines[pevent->cmdline_count].pid = pid; + + if (cmdlines[pevent->cmdline_count].comm) + pevent->cmdline_count++; + + qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp); + + return 0; +} + +/** + * tep_register_comm - register a pid / comm mapping + * @pevent: handle for the pevent + * @comm: the command line to register + * @pid: the pid to map the command line to + * + * This adds a mapping to search for command line names with + * a given pid. The comm is duplicated. + */ +int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid) +{ + struct cmdline_list *item; + + if (pevent->cmdlines) + return add_new_comm(pevent, comm, pid); + + item = malloc(sizeof(*item)); + if (!item) + return -1; + + if (comm) + item->comm = strdup(comm); + else + item->comm = strdup("<...>"); + if (!item->comm) { + free(item); + return -1; + } + item->pid = pid; + item->next = pevent->cmdlist; + + pevent->cmdlist = item; + pevent->cmdline_count++; + + return 0; +} + +int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock) +{ + pevent->trace_clock = strdup(trace_clock); + if (!pevent->trace_clock) { + errno = ENOMEM; + return -1; + } + return 0; +} + +struct func_map { + unsigned long long addr; + char *func; + char *mod; +}; + +struct func_list { + struct func_list *next; + unsigned long long addr; + char *func; + char *mod; +}; + +static int func_cmp(const void *a, const void *b) +{ + const struct func_map *fa = a; + const struct func_map *fb = b; + + if (fa->addr < fb->addr) + return -1; + if (fa->addr > fb->addr) + return 1; + + return 0; +} + +/* + * We are searching for a record in between, not an exact + * match. + */ +static int func_bcmp(const void *a, const void *b) +{ + const struct func_map *fa = a; + const struct func_map *fb = b; + + if ((fa->addr == fb->addr) || + + (fa->addr > fb->addr && + fa->addr < (fb+1)->addr)) + return 0; + + if (fa->addr < fb->addr) + return -1; + + return 1; +} + +static int func_map_init(struct tep_handle *pevent) +{ + struct func_list *funclist; + struct func_list *item; + struct func_map *func_map; + int i; + + func_map = malloc(sizeof(*func_map) * (pevent->func_count + 1)); + if (!func_map) + return -1; + + funclist = pevent->funclist; + + i = 0; + while (funclist) { + func_map[i].func = funclist->func; + func_map[i].addr = funclist->addr; + func_map[i].mod = funclist->mod; + i++; + item = funclist; + funclist = funclist->next; + free(item); + } + + qsort(func_map, pevent->func_count, sizeof(*func_map), func_cmp); + + /* + * Add a special record at the end. + */ + func_map[pevent->func_count].func = NULL; + func_map[pevent->func_count].addr = 0; + func_map[pevent->func_count].mod = NULL; + + pevent->func_map = func_map; + pevent->funclist = NULL; + + return 0; +} + +static struct func_map * +__find_func(struct tep_handle *pevent, unsigned long long addr) +{ + struct func_map *func; + struct func_map key; + + if (!pevent->func_map) + func_map_init(pevent); + + key.addr = addr; + + func = bsearch(&key, pevent->func_map, pevent->func_count, + sizeof(*pevent->func_map), func_bcmp); + + return func; +} + +struct func_resolver { + tep_func_resolver_t *func; + void *priv; + struct func_map map; +}; + +/** + * tep_set_function_resolver - set an alternative function resolver + * @pevent: handle for the pevent + * @resolver: function to be used + * @priv: resolver function private state. + * + * Some tools may have already a way to resolve kernel functions, allow them to + * keep using it instead of duplicating all the entries inside + * pevent->funclist. + */ +int tep_set_function_resolver(struct tep_handle *pevent, + tep_func_resolver_t *func, void *priv) +{ + struct func_resolver *resolver = malloc(sizeof(*resolver)); + + if (resolver == NULL) + return -1; + + resolver->func = func; + resolver->priv = priv; + + free(pevent->func_resolver); + pevent->func_resolver = resolver; + + return 0; +} + +/** + * tep_reset_function_resolver - reset alternative function resolver + * @pevent: handle for the pevent + * + * Stop using whatever alternative resolver was set, use the default + * one instead. + */ +void tep_reset_function_resolver(struct tep_handle *pevent) +{ + free(pevent->func_resolver); + pevent->func_resolver = NULL; +} + +static struct func_map * +find_func(struct tep_handle *pevent, unsigned long long addr) +{ + struct func_map *map; + + if (!pevent->func_resolver) + return __find_func(pevent, addr); + + map = &pevent->func_resolver->map; + map->mod = NULL; + map->addr = addr; + map->func = pevent->func_resolver->func(pevent->func_resolver->priv, + &map->addr, &map->mod); + if (map->func == NULL) + return NULL; + + return map; +} + +/** + * tep_find_function - find a function by a given address + * @pevent: handle for the pevent + * @addr: the address to find the function with + * + * Returns a pointer to the function stored that has the given + * address. Note, the address does not have to be exact, it + * will select the function that would contain the address. + */ +const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr) +{ + struct func_map *map; + + map = find_func(pevent, addr); + if (!map) + return NULL; + + return map->func; +} + +/** + * tep_find_function_address - find a function address by a given address + * @pevent: handle for the pevent + * @addr: the address to find the function with + * + * Returns the address the function starts at. This can be used in + * conjunction with tep_find_function to print both the function + * name and the function offset. + */ +unsigned long long +tep_find_function_address(struct tep_handle *pevent, unsigned long long addr) +{ + struct func_map *map; + + map = find_func(pevent, addr); + if (!map) + return 0; + + return map->addr; +} + +/** + * tep_register_function - register a function with a given address + * @pevent: handle for the pevent + * @function: the function name to register + * @addr: the address the function starts at + * @mod: the kernel module the function may be in (NULL for none) + * + * This registers a function name with an address and module. + * The @func passed in is duplicated. + */ +int tep_register_function(struct tep_handle *pevent, char *func, + unsigned long long addr, char *mod) +{ + struct func_list *item = malloc(sizeof(*item)); + + if (!item) + return -1; + + item->next = pevent->funclist; + item->func = strdup(func); + if (!item->func) + goto out_free; + + if (mod) { + item->mod = strdup(mod); + if (!item->mod) + goto out_free_func; + } else + item->mod = NULL; + item->addr = addr; + + pevent->funclist = item; + pevent->func_count++; + + return 0; + +out_free_func: + free(item->func); + item->func = NULL; +out_free: + free(item); + errno = ENOMEM; + return -1; +} + +/** + * tep_print_funcs - print out the stored functions + * @pevent: handle for the pevent + * + * This prints out the stored functions. + */ +void tep_print_funcs(struct tep_handle *pevent) +{ + int i; + + if (!pevent->func_map) + func_map_init(pevent); + + for (i = 0; i < (int)pevent->func_count; i++) { + printf("%016llx %s", + pevent->func_map[i].addr, + pevent->func_map[i].func); + if (pevent->func_map[i].mod) + printf(" [%s]\n", pevent->func_map[i].mod); + else + printf("\n"); + } +} + +struct printk_map { + unsigned long long addr; + char *printk; +}; + +struct printk_list { + struct printk_list *next; + unsigned long long addr; + char *printk; +}; + +static int printk_cmp(const void *a, const void *b) +{ + const struct printk_map *pa = a; + const struct printk_map *pb = b; + + if (pa->addr < pb->addr) + return -1; + if (pa->addr > pb->addr) + return 1; + + return 0; +} + +static int printk_map_init(struct tep_handle *pevent) +{ + struct printk_list *printklist; + struct printk_list *item; + struct printk_map *printk_map; + int i; + + printk_map = malloc(sizeof(*printk_map) * (pevent->printk_count + 1)); + if (!printk_map) + return -1; + + printklist = pevent->printklist; + + i = 0; + while (printklist) { + printk_map[i].printk = printklist->printk; + printk_map[i].addr = printklist->addr; + i++; + item = printklist; + printklist = printklist->next; + free(item); + } + + qsort(printk_map, pevent->printk_count, sizeof(*printk_map), printk_cmp); + + pevent->printk_map = printk_map; + pevent->printklist = NULL; + + return 0; +} + +static struct printk_map * +find_printk(struct tep_handle *pevent, unsigned long long addr) +{ + struct printk_map *printk; + struct printk_map key; + + if (!pevent->printk_map && printk_map_init(pevent)) + return NULL; + + key.addr = addr; + + printk = bsearch(&key, pevent->printk_map, pevent->printk_count, + sizeof(*pevent->printk_map), printk_cmp); + + return printk; +} + +/** + * tep_register_print_string - register a string by its address + * @pevent: handle for the pevent + * @fmt: the string format to register + * @addr: the address the string was located at + * + * This registers a string by the address it was stored in the kernel. + * The @fmt passed in is duplicated. + */ +int tep_register_print_string(struct tep_handle *pevent, const char *fmt, + unsigned long long addr) +{ + struct printk_list *item = malloc(sizeof(*item)); + char *p; + + if (!item) + return -1; + + item->next = pevent->printklist; + item->addr = addr; + + /* Strip off quotes and '\n' from the end */ + if (fmt[0] == '"') + fmt++; + item->printk = strdup(fmt); + if (!item->printk) + goto out_free; + + p = item->printk + strlen(item->printk) - 1; + if (*p == '"') + *p = 0; + + p -= 2; + if (strcmp(p, "\\n") == 0) + *p = 0; + + pevent->printklist = item; + pevent->printk_count++; + + return 0; + +out_free: + free(item); + errno = ENOMEM; + return -1; +} + +/** + * tep_print_printk - print out the stored strings + * @pevent: handle for the pevent + * + * This prints the string formats that were stored. + */ +void tep_print_printk(struct tep_handle *pevent) +{ + int i; + + if (!pevent->printk_map) + printk_map_init(pevent); + + for (i = 0; i < (int)pevent->printk_count; i++) { + printf("%016llx %s\n", + pevent->printk_map[i].addr, + pevent->printk_map[i].printk); + } +} + +static struct event_format *alloc_event(void) +{ + return calloc(1, sizeof(struct event_format)); +} + +static int add_event(struct tep_handle *pevent, struct event_format *event) +{ + int i; + struct event_format **events = realloc(pevent->events, sizeof(event) * + (pevent->nr_events + 1)); + if (!events) + return -1; + + pevent->events = events; + + for (i = 0; i < pevent->nr_events; i++) { + if (pevent->events[i]->id > event->id) + break; + } + if (i < pevent->nr_events) + memmove(&pevent->events[i + 1], + &pevent->events[i], + sizeof(event) * (pevent->nr_events - i)); + + pevent->events[i] = event; + pevent->nr_events++; + + event->pevent = pevent; + + return 0; +} + +static int event_item_type(enum event_type type) +{ + switch (type) { + case EVENT_ITEM ... EVENT_SQUOTE: + return 1; + case EVENT_ERROR ... EVENT_DELIM: + default: + return 0; + } +} + +static void free_flag_sym(struct print_flag_sym *fsym) +{ + struct print_flag_sym *next; + + while (fsym) { + next = fsym->next; + free(fsym->value); + free(fsym->str); + free(fsym); + fsym = next; + } +} + +static void free_arg(struct print_arg *arg) +{ + struct print_arg *farg; + + if (!arg) + return; + + switch (arg->type) { + case PRINT_ATOM: + free(arg->atom.atom); + break; + case PRINT_FIELD: + free(arg->field.name); + break; + case PRINT_FLAGS: + free_arg(arg->flags.field); + free(arg->flags.delim); + free_flag_sym(arg->flags.flags); + break; + case PRINT_SYMBOL: + free_arg(arg->symbol.field); + free_flag_sym(arg->symbol.symbols); + break; + case PRINT_HEX: + case PRINT_HEX_STR: + free_arg(arg->hex.field); + free_arg(arg->hex.size); + break; + case PRINT_INT_ARRAY: + free_arg(arg->int_array.field); + free_arg(arg->int_array.count); + free_arg(arg->int_array.el_size); + break; + case PRINT_TYPE: + free(arg->typecast.type); + free_arg(arg->typecast.item); + break; + case PRINT_STRING: + case PRINT_BSTRING: + free(arg->string.string); + break; + case PRINT_BITMASK: + free(arg->bitmask.bitmask); + break; + case PRINT_DYNAMIC_ARRAY: + case PRINT_DYNAMIC_ARRAY_LEN: + free(arg->dynarray.index); + break; + case PRINT_OP: + free(arg->op.op); + free_arg(arg->op.left); + free_arg(arg->op.right); + break; + case PRINT_FUNC: + while (arg->func.args) { + farg = arg->func.args; + arg->func.args = farg->next; + free_arg(farg); + } + break; + + case PRINT_NULL: + default: + break; + } + + free(arg); +} + +static enum event_type get_type(int ch) +{ + if (ch == '\n') + return EVENT_NEWLINE; + if (isspace(ch)) + return EVENT_SPACE; + if (isalnum(ch) || ch == '_') + return EVENT_ITEM; + if (ch == '\'') + return EVENT_SQUOTE; + if (ch == '"') + return EVENT_DQUOTE; + if (!isprint(ch)) + return EVENT_NONE; + if (ch == '(' || ch == ')' || ch == ',') + return EVENT_DELIM; + + return EVENT_OP; +} + +static int __read_char(void) +{ + if (input_buf_ptr >= input_buf_siz) + return -1; + + return input_buf[input_buf_ptr++]; +} + +static int __peek_char(void) +{ + if (input_buf_ptr >= input_buf_siz) + return -1; + + return input_buf[input_buf_ptr]; +} + +/** + * tep_peek_char - peek at the next character that will be read + * + * Returns the next character read, or -1 if end of buffer. + */ +int tep_peek_char(void) +{ + return __peek_char(); +} + +static int extend_token(char **tok, char *buf, int size) +{ + char *newtok = realloc(*tok, size); + + if (!newtok) { + free(*tok); + *tok = NULL; + return -1; + } + + if (!*tok) + strcpy(newtok, buf); + else + strcat(newtok, buf); + *tok = newtok; + + return 0; +} + +static enum event_type force_token(const char *str, char **tok); + +static enum event_type __read_token(char **tok) +{ + char buf[BUFSIZ]; + int ch, last_ch, quote_ch, next_ch; + int i = 0; + int tok_size = 0; + enum event_type type; + + *tok = NULL; + + + ch = __read_char(); + if (ch < 0) + return EVENT_NONE; + + type = get_type(ch); + if (type == EVENT_NONE) + return type; + + buf[i++] = ch; + + switch (type) { + case EVENT_NEWLINE: + case EVENT_DELIM: + if (asprintf(tok, "%c", ch) < 0) + return EVENT_ERROR; + + return type; + + case EVENT_OP: + switch (ch) { + case '-': + next_ch = __peek_char(); + if (next_ch == '>') { + buf[i++] = __read_char(); + break; + } + /* fall through */ + case '+': + case '|': + case '&': + case '>': + case '<': + last_ch = ch; + ch = __peek_char(); + if (ch != last_ch) + goto test_equal; + buf[i++] = __read_char(); + switch (last_ch) { + case '>': + case '<': + goto test_equal; + default: + break; + } + break; + case '!': + case '=': + goto test_equal; + default: /* what should we do instead? */ + break; + } + buf[i] = 0; + *tok = strdup(buf); + return type; + + test_equal: + ch = __peek_char(); + if (ch == '=') + buf[i++] = __read_char(); + goto out; + + case EVENT_DQUOTE: + case EVENT_SQUOTE: + /* don't keep quotes */ + i--; + quote_ch = ch; + last_ch = 0; + concat: + do { + if (i == (BUFSIZ - 1)) { + buf[i] = 0; + tok_size += BUFSIZ; + + if (extend_token(tok, buf, tok_size) < 0) + return EVENT_NONE; + i = 0; + } + last_ch = ch; + ch = __read_char(); + buf[i++] = ch; + /* the '\' '\' will cancel itself */ + if (ch == '\\' && last_ch == '\\') + last_ch = 0; + } while (ch != quote_ch || last_ch == '\\'); + /* remove the last quote */ + i--; + + /* + * For strings (double quotes) check the next token. + * If it is another string, concatinate the two. + */ + if (type == EVENT_DQUOTE) { + unsigned long long save_input_buf_ptr = input_buf_ptr; + + do { + ch = __read_char(); + } while (isspace(ch)); + if (ch == '"') + goto concat; + input_buf_ptr = save_input_buf_ptr; + } + + goto out; + + case EVENT_ERROR ... EVENT_SPACE: + case EVENT_ITEM: + default: + break; + } + + while (get_type(__peek_char()) == type) { + if (i == (BUFSIZ - 1)) { + buf[i] = 0; + tok_size += BUFSIZ; + + if (extend_token(tok, buf, tok_size) < 0) + return EVENT_NONE; + i = 0; + } + ch = __read_char(); + buf[i++] = ch; + } + + out: + buf[i] = 0; + if (extend_token(tok, buf, tok_size + i + 1) < 0) + return EVENT_NONE; + + if (type == EVENT_ITEM) { + /* + * Older versions of the kernel has a bug that + * creates invalid symbols and will break the mac80211 + * parsing. This is a work around to that bug. + * + * See Linux kernel commit: + * 811cb50baf63461ce0bdb234927046131fc7fa8b + */ + if (strcmp(*tok, "LOCAL_PR_FMT") == 0) { + free(*tok); + *tok = NULL; + return force_token("\"%s\" ", tok); + } else if (strcmp(*tok, "STA_PR_FMT") == 0) { + free(*tok); + *tok = NULL; + return force_token("\" sta:%pM\" ", tok); + } else if (strcmp(*tok, "VIF_PR_FMT") == 0) { + free(*tok); + *tok = NULL; + return force_token("\" vif:%p(%d)\" ", tok); + } + } + + return type; +} + +static enum event_type force_token(const char *str, char **tok) +{ + const char *save_input_buf; + unsigned long long save_input_buf_ptr; + unsigned long long save_input_buf_siz; + enum event_type type; + + /* save off the current input pointers */ + save_input_buf = input_buf; + save_input_buf_ptr = input_buf_ptr; + save_input_buf_siz = input_buf_siz; + + init_input_buf(str, strlen(str)); + + type = __read_token(tok); + + /* reset back to original token */ + input_buf = save_input_buf; + input_buf_ptr = save_input_buf_ptr; + input_buf_siz = save_input_buf_siz; + + return type; +} + +static void free_token(char *tok) +{ + if (tok) + free(tok); +} + +static enum event_type read_token(char **tok) +{ + enum event_type type; + + for (;;) { + type = __read_token(tok); + if (type != EVENT_SPACE) + return type; + + free_token(*tok); + } + + /* not reached */ + *tok = NULL; + return EVENT_NONE; +} + +/** + * tep_read_token - access to utilites to use the pevent parser + * @tok: The token to return + * + * This will parse tokens from the string given by + * tep_init_data(). + * + * Returns the token type. + */ +enum event_type tep_read_token(char **tok) +{ + return read_token(tok); +} + +/** + * tep_free_token - free a token returned by tep_read_token + * @token: the token to free + */ +void tep_free_token(char *token) +{ + free_token(token); +} + +/* no newline */ +static enum event_type read_token_item(char **tok) +{ + enum event_type type; + + for (;;) { + type = __read_token(tok); + if (type != EVENT_SPACE && type != EVENT_NEWLINE) + return type; + free_token(*tok); + *tok = NULL; + } + + /* not reached */ + *tok = NULL; + return EVENT_NONE; +} + +static int test_type(enum event_type type, enum event_type expect) +{ + if (type != expect) { + do_warning("Error: expected type %d but read %d", + expect, type); + return -1; + } + return 0; +} + +static int test_type_token(enum event_type type, const char *token, + enum event_type expect, const char *expect_tok) +{ + if (type != expect) { + do_warning("Error: expected type %d but read %d", + expect, type); + return -1; + } + + if (strcmp(token, expect_tok) != 0) { + do_warning("Error: expected '%s' but read '%s'", + expect_tok, token); + return -1; + } + return 0; +} + +static int __read_expect_type(enum event_type expect, char **tok, int newline_ok) +{ + enum event_type type; + + if (newline_ok) + type = read_token(tok); + else + type = read_token_item(tok); + return test_type(type, expect); +} + +static int read_expect_type(enum event_type expect, char **tok) +{ + return __read_expect_type(expect, tok, 1); +} + +static int __read_expected(enum event_type expect, const char *str, + int newline_ok) +{ + enum event_type type; + char *token; + int ret; + + if (newline_ok) + type = read_token(&token); + else + type = read_token_item(&token); + + ret = test_type_token(type, token, expect, str); + + free_token(token); + + return ret; +} + +static int read_expected(enum event_type expect, const char *str) +{ + return __read_expected(expect, str, 1); +} + +static int read_expected_item(enum event_type expect, const char *str) +{ + return __read_expected(expect, str, 0); +} + +static char *event_read_name(void) +{ + char *token; + + if (read_expected(EVENT_ITEM, "name") < 0) + return NULL; + + if (read_expected(EVENT_OP, ":") < 0) + return NULL; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + + return token; + + fail: + free_token(token); + return NULL; +} + +static int event_read_id(void) +{ + char *token; + int id; + + if (read_expected_item(EVENT_ITEM, "ID") < 0) + return -1; + + if (read_expected(EVENT_OP, ":") < 0) + return -1; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + + id = strtoul(token, NULL, 0); + free_token(token); + return id; + + fail: + free_token(token); + return -1; +} + +static int field_is_string(struct format_field *field) +{ + if ((field->flags & FIELD_IS_ARRAY) && + (strstr(field->type, "char") || strstr(field->type, "u8") || + strstr(field->type, "s8"))) + return 1; + + return 0; +} + +static int field_is_dynamic(struct format_field *field) +{ + if (strncmp(field->type, "__data_loc", 10) == 0) + return 1; + + return 0; +} + +static int field_is_long(struct format_field *field) +{ + /* includes long long */ + if (strstr(field->type, "long")) + return 1; + + return 0; +} + +static unsigned int type_size(const char *name) +{ + /* This covers all FIELD_IS_STRING types. */ + static struct { + const char *type; + unsigned int size; + } table[] = { + { "u8", 1 }, + { "u16", 2 }, + { "u32", 4 }, + { "u64", 8 }, + { "s8", 1 }, + { "s16", 2 }, + { "s32", 4 }, + { "s64", 8 }, + { "char", 1 }, + { }, + }; + int i; + + for (i = 0; table[i].type; i++) { + if (!strcmp(table[i].type, name)) + return table[i].size; + } + + return 0; +} + +static int event_read_fields(struct event_format *event, struct format_field **fields) +{ + struct format_field *field = NULL; + enum event_type type; + char *token; + char *last_token; + int count = 0; + + do { + unsigned int size_dynamic = 0; + + type = read_token(&token); + if (type == EVENT_NEWLINE) { + free_token(token); + return count; + } + + count++; + + if (test_type_token(type, token, EVENT_ITEM, "field")) + goto fail; + free_token(token); + + type = read_token(&token); + /* + * The ftrace fields may still use the "special" name. + * Just ignore it. + */ + if (event->flags & EVENT_FL_ISFTRACE && + type == EVENT_ITEM && strcmp(token, "special") == 0) { + free_token(token); + type = read_token(&token); + } + + if (test_type_token(type, token, EVENT_OP, ":") < 0) + goto fail; + + free_token(token); + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + + last_token = token; + + field = calloc(1, sizeof(*field)); + if (!field) + goto fail; + + field->event = event; + + /* read the rest of the type */ + for (;;) { + type = read_token(&token); + if (type == EVENT_ITEM || + (type == EVENT_OP && strcmp(token, "*") == 0) || + /* + * Some of the ftrace fields are broken and have + * an illegal "." in them. + */ + (event->flags & EVENT_FL_ISFTRACE && + type == EVENT_OP && strcmp(token, ".") == 0)) { + + if (strcmp(token, "*") == 0) + field->flags |= FIELD_IS_POINTER; + + if (field->type) { + char *new_type; + new_type = realloc(field->type, + strlen(field->type) + + strlen(last_token) + 2); + if (!new_type) { + free(last_token); + goto fail; + } + field->type = new_type; + strcat(field->type, " "); + strcat(field->type, last_token); + free(last_token); + } else + field->type = last_token; + last_token = token; + continue; + } + + break; + } + + if (!field->type) { + do_warning_event(event, "%s: no type found", __func__); + goto fail; + } + field->name = field->alias = last_token; + + if (test_type(type, EVENT_OP)) + goto fail; + + if (strcmp(token, "[") == 0) { + enum event_type last_type = type; + char *brackets = token; + char *new_brackets; + int len; + + field->flags |= FIELD_IS_ARRAY; + + type = read_token(&token); + + if (type == EVENT_ITEM) + field->arraylen = strtoul(token, NULL, 0); + else + field->arraylen = 0; + + while (strcmp(token, "]") != 0) { + if (last_type == EVENT_ITEM && + type == EVENT_ITEM) + len = 2; + else + len = 1; + last_type = type; + + new_brackets = realloc(brackets, + strlen(brackets) + + strlen(token) + len); + if (!new_brackets) { + free(brackets); + goto fail; + } + brackets = new_brackets; + if (len == 2) + strcat(brackets, " "); + strcat(brackets, token); + /* We only care about the last token */ + field->arraylen = strtoul(token, NULL, 0); + free_token(token); + type = read_token(&token); + if (type == EVENT_NONE) { + do_warning_event(event, "failed to find token"); + goto fail; + } + } + + free_token(token); + + new_brackets = realloc(brackets, strlen(brackets) + 2); + if (!new_brackets) { + free(brackets); + goto fail; + } + brackets = new_brackets; + strcat(brackets, "]"); + + /* add brackets to type */ + + type = read_token(&token); + /* + * If the next token is not an OP, then it is of + * the format: type [] item; + */ + if (type == EVENT_ITEM) { + char *new_type; + new_type = realloc(field->type, + strlen(field->type) + + strlen(field->name) + + strlen(brackets) + 2); + if (!new_type) { + free(brackets); + goto fail; + } + field->type = new_type; + strcat(field->type, " "); + strcat(field->type, field->name); + size_dynamic = type_size(field->name); + free_token(field->name); + strcat(field->type, brackets); + field->name = field->alias = token; + type = read_token(&token); + } else { + char *new_type; + new_type = realloc(field->type, + strlen(field->type) + + strlen(brackets) + 1); + if (!new_type) { + free(brackets); + goto fail; + } + field->type = new_type; + strcat(field->type, brackets); + } + free(brackets); + } + + if (field_is_string(field)) + field->flags |= FIELD_IS_STRING; + if (field_is_dynamic(field)) + field->flags |= FIELD_IS_DYNAMIC; + if (field_is_long(field)) + field->flags |= FIELD_IS_LONG; + + if (test_type_token(type, token, EVENT_OP, ";")) + goto fail; + free_token(token); + + if (read_expected(EVENT_ITEM, "offset") < 0) + goto fail_expect; + + if (read_expected(EVENT_OP, ":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + field->offset = strtoul(token, NULL, 0); + free_token(token); + + if (read_expected(EVENT_OP, ";") < 0) + goto fail_expect; + + if (read_expected(EVENT_ITEM, "size") < 0) + goto fail_expect; + + if (read_expected(EVENT_OP, ":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + field->size = strtoul(token, NULL, 0); + free_token(token); + + if (read_expected(EVENT_OP, ";") < 0) + goto fail_expect; + + type = read_token(&token); + if (type != EVENT_NEWLINE) { + /* newer versions of the kernel have a "signed" type */ + if (test_type_token(type, token, EVENT_ITEM, "signed")) + goto fail; + + free_token(token); + + if (read_expected(EVENT_OP, ":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + + if (strtoul(token, NULL, 0)) + field->flags |= FIELD_IS_SIGNED; + + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + } + + free_token(token); + + if (field->flags & FIELD_IS_ARRAY) { + if (field->arraylen) + field->elementsize = field->size / field->arraylen; + else if (field->flags & FIELD_IS_DYNAMIC) + field->elementsize = size_dynamic; + else if (field->flags & FIELD_IS_STRING) + field->elementsize = 1; + else if (field->flags & FIELD_IS_LONG) + field->elementsize = event->pevent ? + event->pevent->long_size : + sizeof(long); + } else + field->elementsize = field->size; + + *fields = field; + fields = &field->next; + + } while (1); + + return 0; + +fail: + free_token(token); +fail_expect: + if (field) { + free(field->type); + free(field->name); + free(field); + } + return -1; +} + +static int event_read_format(struct event_format *event) +{ + char *token; + int ret; + + if (read_expected_item(EVENT_ITEM, "format") < 0) + return -1; + + if (read_expected(EVENT_OP, ":") < 0) + return -1; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + free_token(token); + + ret = event_read_fields(event, &event->format.common_fields); + if (ret < 0) + return ret; + event->format.nr_common = ret; + + ret = event_read_fields(event, &event->format.fields); + if (ret < 0) + return ret; + event->format.nr_fields = ret; + + return 0; + + fail: + free_token(token); + return -1; +} + +static enum event_type +process_arg_token(struct event_format *event, struct print_arg *arg, + char **tok, enum event_type type); + +static enum event_type +process_arg(struct event_format *event, struct print_arg *arg, char **tok) +{ + enum event_type type; + char *token; + + type = read_token(&token); + *tok = token; + + return process_arg_token(event, arg, tok, type); +} + +static enum event_type +process_op(struct event_format *event, struct print_arg *arg, char **tok); + +/* + * For __print_symbolic() and __print_flags, we need to completely + * evaluate the first argument, which defines what to print next. + */ +static enum event_type +process_field_arg(struct event_format *event, struct print_arg *arg, char **tok) +{ + enum event_type type; + + type = process_arg(event, arg, tok); + + while (type == EVENT_OP) { + type = process_op(event, arg, tok); + } + + return type; +} + +static enum event_type +process_cond(struct event_format *event, struct print_arg *top, char **tok) +{ + struct print_arg *arg, *left, *right; + enum event_type type; + char *token = NULL; + + arg = alloc_arg(); + left = alloc_arg(); + right = alloc_arg(); + + if (!arg || !left || !right) { + do_warning_event(event, "%s: not enough memory!", __func__); + /* arg will be freed at out_free */ + free_arg(left); + free_arg(right); + goto out_free; + } + + arg->type = PRINT_OP; + arg->op.left = left; + arg->op.right = right; + + *tok = NULL; + type = process_arg(event, left, &token); + + again: + if (type == EVENT_ERROR) + goto out_free; + + /* Handle other operations in the arguments */ + if (type == EVENT_OP && strcmp(token, ":") != 0) { + type = process_op(event, left, &token); + goto again; + } + + if (test_type_token(type, token, EVENT_OP, ":")) + goto out_free; + + arg->op.op = token; + + type = process_arg(event, right, &token); + + top->op.right = arg; + + *tok = token; + return type; + +out_free: + /* Top may point to itself */ + top->op.right = NULL; + free_token(token); + free_arg(arg); + return EVENT_ERROR; +} + +static enum event_type +process_array(struct event_format *event, struct print_arg *top, char **tok) +{ + struct print_arg *arg; + enum event_type type; + char *token = NULL; + + arg = alloc_arg(); + if (!arg) { + do_warning_event(event, "%s: not enough memory!", __func__); + /* '*tok' is set to top->op.op. No need to free. */ + *tok = NULL; + return EVENT_ERROR; + } + + *tok = NULL; + type = process_arg(event, arg, &token); + if (test_type_token(type, token, EVENT_OP, "]")) + goto out_free; + + top->op.right = arg; + + free_token(token); + type = read_token_item(&token); + *tok = token; + + return type; + +out_free: + free_token(token); + free_arg(arg); + return EVENT_ERROR; +} + +static int get_op_prio(char *op) +{ + if (!op[1]) { + switch (op[0]) { + case '~': + case '!': + return 4; + case '*': + case '/': + case '%': + return 6; + case '+': + case '-': + return 7; + /* '>>' and '<<' are 8 */ + case '<': + case '>': + return 9; + /* '==' and '!=' are 10 */ + case '&': + return 11; + case '^': + return 12; + case '|': + return 13; + case '?': + return 16; + default: + do_warning("unknown op '%c'", op[0]); + return -1; + } + } else { + if (strcmp(op, "++") == 0 || + strcmp(op, "--") == 0) { + return 3; + } else if (strcmp(op, ">>") == 0 || + strcmp(op, "<<") == 0) { + return 8; + } else if (strcmp(op, ">=") == 0 || + strcmp(op, "<=") == 0) { + return 9; + } else if (strcmp(op, "==") == 0 || + strcmp(op, "!=") == 0) { + return 10; + } else if (strcmp(op, "&&") == 0) { + return 14; + } else if (strcmp(op, "||") == 0) { + return 15; + } else { + do_warning("unknown op '%s'", op); + return -1; + } + } +} + +static int set_op_prio(struct print_arg *arg) +{ + + /* single ops are the greatest */ + if (!arg->op.left || arg->op.left->type == PRINT_NULL) + arg->op.prio = 0; + else + arg->op.prio = get_op_prio(arg->op.op); + + return arg->op.prio; +} + +/* Note, *tok does not get freed, but will most likely be saved */ +static enum event_type +process_op(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct print_arg *left, *right = NULL; + enum event_type type; + char *token; + + /* the op is passed in via tok */ + token = *tok; + + if (arg->type == PRINT_OP && !arg->op.left) { + /* handle single op */ + if (token[1]) { + do_warning_event(event, "bad op token %s", token); + goto out_free; + } + switch (token[0]) { + case '~': + case '!': + case '+': + case '-': + break; + default: + do_warning_event(event, "bad op token %s", token); + goto out_free; + + } + + /* make an empty left */ + left = alloc_arg(); + if (!left) + goto out_warn_free; + + left->type = PRINT_NULL; + arg->op.left = left; + + right = alloc_arg(); + if (!right) + goto out_warn_free; + + arg->op.right = right; + + /* do not free the token, it belongs to an op */ + *tok = NULL; + type = process_arg(event, right, tok); + + } else if (strcmp(token, "?") == 0) { + + left = alloc_arg(); + if (!left) + goto out_warn_free; + + /* copy the top arg to the left */ + *left = *arg; + + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = left; + arg->op.prio = 0; + + /* it will set arg->op.right */ + type = process_cond(event, arg, tok); + + } else if (strcmp(token, ">>") == 0 || + strcmp(token, "<<") == 0 || + strcmp(token, "&") == 0 || + strcmp(token, "|") == 0 || + strcmp(token, "&&") == 0 || + strcmp(token, "||") == 0 || + strcmp(token, "-") == 0 || + strcmp(token, "+") == 0 || + strcmp(token, "*") == 0 || + strcmp(token, "^") == 0 || + strcmp(token, "/") == 0 || + strcmp(token, "%") == 0 || + strcmp(token, "<") == 0 || + strcmp(token, ">") == 0 || + strcmp(token, "<=") == 0 || + strcmp(token, ">=") == 0 || + strcmp(token, "==") == 0 || + strcmp(token, "!=") == 0) { + + left = alloc_arg(); + if (!left) + goto out_warn_free; + + /* copy the top arg to the left */ + *left = *arg; + + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = left; + arg->op.right = NULL; + + if (set_op_prio(arg) == -1) { + event->flags |= EVENT_FL_FAILED; + /* arg->op.op (= token) will be freed at out_free */ + arg->op.op = NULL; + goto out_free; + } + + type = read_token_item(&token); + *tok = token; + + /* could just be a type pointer */ + if ((strcmp(arg->op.op, "*") == 0) && + type == EVENT_DELIM && (strcmp(token, ")") == 0)) { + char *new_atom; + + if (left->type != PRINT_ATOM) { + do_warning_event(event, "bad pointer type"); + goto out_free; + } + new_atom = realloc(left->atom.atom, + strlen(left->atom.atom) + 3); + if (!new_atom) + goto out_warn_free; + + left->atom.atom = new_atom; + strcat(left->atom.atom, " *"); + free(arg->op.op); + *arg = *left; + free(left); + + return type; + } + + right = alloc_arg(); + if (!right) + goto out_warn_free; + + type = process_arg_token(event, right, tok, type); + if (type == EVENT_ERROR) { + free_arg(right); + /* token was freed in process_arg_token() via *tok */ + token = NULL; + goto out_free; + } + + if (right->type == PRINT_OP && + get_op_prio(arg->op.op) < get_op_prio(right->op.op)) { + struct print_arg tmp; + + /* rotate ops according to the priority */ + arg->op.right = right->op.left; + + tmp = *arg; + *arg = *right; + *right = tmp; + + arg->op.left = right; + } else { + arg->op.right = right; + } + + } else if (strcmp(token, "[") == 0) { + + left = alloc_arg(); + if (!left) + goto out_warn_free; + + *left = *arg; + + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = left; + + arg->op.prio = 0; + + /* it will set arg->op.right */ + type = process_array(event, arg, tok); + + } else { + do_warning_event(event, "unknown op '%s'", token); + event->flags |= EVENT_FL_FAILED; + /* the arg is now the left side */ + goto out_free; + } + + if (type == EVENT_OP && strcmp(*tok, ":") != 0) { + int prio; + + /* higher prios need to be closer to the root */ + prio = get_op_prio(*tok); + + if (prio > arg->op.prio) + return process_op(event, arg, tok); + + return process_op(event, right, tok); + } + + return type; + +out_warn_free: + do_warning_event(event, "%s: not enough memory!", __func__); +out_free: + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_entry(struct event_format *event __maybe_unused, struct print_arg *arg, + char **tok) +{ + enum event_type type; + char *field; + char *token; + + if (read_expected(EVENT_OP, "->") < 0) + goto out_err; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto out_free; + field = token; + + arg->type = PRINT_FIELD; + arg->field.name = field; + + if (is_flag_field) { + arg->field.field = tep_find_any_field(event, arg->field.name); + arg->field.field->flags |= FIELD_IS_FLAG; + is_flag_field = 0; + } else if (is_symbolic_field) { + arg->field.field = tep_find_any_field(event, arg->field.name); + arg->field.field->flags |= FIELD_IS_SYMBOLIC; + is_symbolic_field = 0; + } + + type = read_token(&token); + *tok = token; + + return type; + + out_free: + free_token(token); + out_err: + *tok = NULL; + return EVENT_ERROR; +} + +static int alloc_and_process_delim(struct event_format *event, char *next_token, + struct print_arg **print_arg) +{ + struct print_arg *field; + enum event_type type; + char *token; + int ret = 0; + + field = alloc_arg(); + if (!field) { + do_warning_event(event, "%s: not enough memory!", __func__); + errno = ENOMEM; + return -1; + } + + type = process_arg(event, field, &token); + + if (test_type_token(type, token, EVENT_DELIM, next_token)) { + errno = EINVAL; + ret = -1; + free_arg(field); + goto out_free_token; + } + + *print_arg = field; + +out_free_token: + free_token(token); + + return ret; +} + +static char *arg_eval (struct print_arg *arg); + +static unsigned long long +eval_type_str(unsigned long long val, const char *type, int pointer) +{ + int sign = 0; + char *ref; + int len; + + len = strlen(type); + + if (pointer) { + + if (type[len-1] != '*') { + do_warning("pointer expected with non pointer type"); + return val; + } + + ref = malloc(len); + if (!ref) { + do_warning("%s: not enough memory!", __func__); + return val; + } + memcpy(ref, type, len); + + /* chop off the " *" */ + ref[len - 2] = 0; + + val = eval_type_str(val, ref, 0); + free(ref); + return val; + } + + /* check if this is a pointer */ + if (type[len - 1] == '*') + return val; + + /* Try to figure out the arg size*/ + if (strncmp(type, "struct", 6) == 0) + /* all bets off */ + return val; + + if (strcmp(type, "u8") == 0) + return val & 0xff; + + if (strcmp(type, "u16") == 0) + return val & 0xffff; + + if (strcmp(type, "u32") == 0) + return val & 0xffffffff; + + if (strcmp(type, "u64") == 0 || + strcmp(type, "s64") == 0) + return val; + + if (strcmp(type, "s8") == 0) + return (unsigned long long)(char)val & 0xff; + + if (strcmp(type, "s16") == 0) + return (unsigned long long)(short)val & 0xffff; + + if (strcmp(type, "s32") == 0) + return (unsigned long long)(int)val & 0xffffffff; + + if (strncmp(type, "unsigned ", 9) == 0) { + sign = 0; + type += 9; + } + + if (strcmp(type, "char") == 0) { + if (sign) + return (unsigned long long)(char)val & 0xff; + else + return val & 0xff; + } + + if (strcmp(type, "short") == 0) { + if (sign) + return (unsigned long long)(short)val & 0xffff; + else + return val & 0xffff; + } + + if (strcmp(type, "int") == 0) { + if (sign) + return (unsigned long long)(int)val & 0xffffffff; + else + return val & 0xffffffff; + } + + return val; +} + +/* + * Try to figure out the type. + */ +static unsigned long long +eval_type(unsigned long long val, struct print_arg *arg, int pointer) +{ + if (arg->type != PRINT_TYPE) { + do_warning("expected type argument"); + return 0; + } + + return eval_type_str(val, arg->typecast.type, pointer); +} + +static int arg_num_eval(struct print_arg *arg, long long *val) +{ + long long left, right; + int ret = 1; + + switch (arg->type) { + case PRINT_ATOM: + *val = strtoll(arg->atom.atom, NULL, 0); + break; + case PRINT_TYPE: + ret = arg_num_eval(arg->typecast.item, val); + if (!ret) + break; + *val = eval_type(*val, arg, 0); + break; + case PRINT_OP: + switch (arg->op.op[0]) { + case '|': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + if (arg->op.op[1]) + *val = left || right; + else + *val = left | right; + break; + case '&': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + if (arg->op.op[1]) + *val = left && right; + else + *val = left & right; + break; + case '<': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + switch (arg->op.op[1]) { + case 0: + *val = left < right; + break; + case '<': + *val = left << right; + break; + case '=': + *val = left <= right; + break; + default: + do_warning("unknown op '%s'", arg->op.op); + ret = 0; + } + break; + case '>': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + switch (arg->op.op[1]) { + case 0: + *val = left > right; + break; + case '>': + *val = left >> right; + break; + case '=': + *val = left >= right; + break; + default: + do_warning("unknown op '%s'", arg->op.op); + ret = 0; + } + break; + case '=': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + + if (arg->op.op[1] != '=') { + do_warning("unknown op '%s'", arg->op.op); + ret = 0; + } else + *val = left == right; + break; + case '!': + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + + switch (arg->op.op[1]) { + case '=': + *val = left != right; + break; + default: + do_warning("unknown op '%s'", arg->op.op); + ret = 0; + } + break; + case '-': + /* check for negative */ + if (arg->op.left->type == PRINT_NULL) + left = 0; + else + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + *val = left - right; + break; + case '+': + if (arg->op.left->type == PRINT_NULL) + left = 0; + else + ret = arg_num_eval(arg->op.left, &left); + if (!ret) + break; + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + *val = left + right; + break; + case '~': + ret = arg_num_eval(arg->op.right, &right); + if (!ret) + break; + *val = ~right; + break; + default: + do_warning("unknown op '%s'", arg->op.op); + ret = 0; + } + break; + + case PRINT_NULL: + case PRINT_FIELD ... PRINT_SYMBOL: + case PRINT_STRING: + case PRINT_BSTRING: + case PRINT_BITMASK: + default: + do_warning("invalid eval type %d", arg->type); + ret = 0; + + } + return ret; +} + +static char *arg_eval (struct print_arg *arg) +{ + long long val; + static char buf[24]; + + switch (arg->type) { + case PRINT_ATOM: + return arg->atom.atom; + case PRINT_TYPE: + return arg_eval(arg->typecast.item); + case PRINT_OP: + if (!arg_num_eval(arg, &val)) + break; + sprintf(buf, "%lld", val); + return buf; + + case PRINT_NULL: + case PRINT_FIELD ... PRINT_SYMBOL: + case PRINT_STRING: + case PRINT_BSTRING: + case PRINT_BITMASK: + default: + do_warning("invalid eval type %d", arg->type); + break; + } + + return NULL; +} + +static enum event_type +process_fields(struct event_format *event, struct print_flag_sym **list, char **tok) +{ + enum event_type type; + struct print_arg *arg = NULL; + struct print_flag_sym *field; + char *token = *tok; + char *value; + + do { + free_token(token); + type = read_token_item(&token); + if (test_type_token(type, token, EVENT_OP, "{")) + break; + + arg = alloc_arg(); + if (!arg) + goto out_free; + + free_token(token); + type = process_arg(event, arg, &token); + + if (type == EVENT_OP) + type = process_op(event, arg, &token); + + if (type == EVENT_ERROR) + goto out_free; + + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto out_free; + + field = calloc(1, sizeof(*field)); + if (!field) + goto out_free; + + value = arg_eval(arg); + if (value == NULL) + goto out_free_field; + field->value = strdup(value); + if (field->value == NULL) + goto out_free_field; + + free_arg(arg); + arg = alloc_arg(); + if (!arg) + goto out_free; + + free_token(token); + type = process_arg(event, arg, &token); + if (test_type_token(type, token, EVENT_OP, "}")) + goto out_free_field; + + value = arg_eval(arg); + if (value == NULL) + goto out_free_field; + field->str = strdup(value); + if (field->str == NULL) + goto out_free_field; + free_arg(arg); + arg = NULL; + + *list = field; + list = &field->next; + + free_token(token); + type = read_token_item(&token); + } while (type == EVENT_DELIM && strcmp(token, ",") == 0); + + *tok = token; + return type; + +out_free_field: + free_flag_sym(field); +out_free: + free_arg(arg); + free_token(token); + *tok = NULL; + + return EVENT_ERROR; +} + +static enum event_type +process_flags(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct print_arg *field; + enum event_type type; + char *token = NULL; + + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_FLAGS; + + field = alloc_arg(); + if (!field) { + do_warning_event(event, "%s: not enough memory!", __func__); + goto out_free; + } + + type = process_field_arg(event, field, &token); + + /* Handle operations in the first argument */ + while (type == EVENT_OP) + type = process_op(event, field, &token); + + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto out_free_field; + free_token(token); + + arg->flags.field = field; + + type = read_token_item(&token); + if (event_item_type(type)) { + arg->flags.delim = token; + type = read_token_item(&token); + } + + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto out_free; + + type = process_fields(event, &arg->flags.flags, &token); + if (test_type_token(type, token, EVENT_DELIM, ")")) + goto out_free; + + free_token(token); + type = read_token_item(tok); + return type; + +out_free_field: + free_arg(field); +out_free: + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_symbols(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct print_arg *field; + enum event_type type; + char *token = NULL; + + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_SYMBOL; + + field = alloc_arg(); + if (!field) { + do_warning_event(event, "%s: not enough memory!", __func__); + goto out_free; + } + + type = process_field_arg(event, field, &token); + + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto out_free_field; + + arg->symbol.field = field; + + type = process_fields(event, &arg->symbol.symbols, &token); + if (test_type_token(type, token, EVENT_DELIM, ")")) + goto out_free; + + free_token(token); + type = read_token_item(tok); + return type; + +out_free_field: + free_arg(field); +out_free: + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_hex_common(struct event_format *event, struct print_arg *arg, + char **tok, enum print_arg_type type) +{ + memset(arg, 0, sizeof(*arg)); + arg->type = type; + + if (alloc_and_process_delim(event, ",", &arg->hex.field)) + goto out; + + if (alloc_and_process_delim(event, ")", &arg->hex.size)) + goto free_field; + + return read_token_item(tok); + +free_field: + free_arg(arg->hex.field); + arg->hex.field = NULL; +out: + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_hex(struct event_format *event, struct print_arg *arg, char **tok) +{ + return process_hex_common(event, arg, tok, PRINT_HEX); +} + +static enum event_type +process_hex_str(struct event_format *event, struct print_arg *arg, + char **tok) +{ + return process_hex_common(event, arg, tok, PRINT_HEX_STR); +} + +static enum event_type +process_int_array(struct event_format *event, struct print_arg *arg, char **tok) +{ + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_INT_ARRAY; + + if (alloc_and_process_delim(event, ",", &arg->int_array.field)) + goto out; + + if (alloc_and_process_delim(event, ",", &arg->int_array.count)) + goto free_field; + + if (alloc_and_process_delim(event, ")", &arg->int_array.el_size)) + goto free_size; + + return read_token_item(tok); + +free_size: + free_arg(arg->int_array.count); + arg->int_array.count = NULL; +free_field: + free_arg(arg->int_array.field); + arg->int_array.field = NULL; +out: + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_dynamic_array(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct format_field *field; + enum event_type type; + char *token; + + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_DYNAMIC_ARRAY; + + /* + * The item within the parenthesis is another field that holds + * the index into where the array starts. + */ + type = read_token(&token); + *tok = token; + if (type != EVENT_ITEM) + goto out_free; + + /* Find the field */ + + field = tep_find_field(event, token); + if (!field) + goto out_free; + + arg->dynarray.field = field; + arg->dynarray.index = 0; + + if (read_expected(EVENT_DELIM, ")") < 0) + goto out_free; + + free_token(token); + type = read_token_item(&token); + *tok = token; + if (type != EVENT_OP || strcmp(token, "[") != 0) + return type; + + free_token(token); + arg = alloc_arg(); + if (!arg) { + do_warning_event(event, "%s: not enough memory!", __func__); + *tok = NULL; + return EVENT_ERROR; + } + + type = process_arg(event, arg, &token); + if (type == EVENT_ERROR) + goto out_free_arg; + + if (!test_type_token(type, token, EVENT_OP, "]")) + goto out_free_arg; + + free_token(token); + type = read_token_item(tok); + return type; + + out_free_arg: + free_arg(arg); + out_free: + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_dynamic_array_len(struct event_format *event, struct print_arg *arg, + char **tok) +{ + struct format_field *field; + enum event_type type; + char *token; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto out_free; + + arg->type = PRINT_DYNAMIC_ARRAY_LEN; + + /* Find the field */ + field = tep_find_field(event, token); + if (!field) + goto out_free; + + arg->dynarray.field = field; + arg->dynarray.index = 0; + + if (read_expected(EVENT_DELIM, ")") < 0) + goto out_err; + + free_token(token); + type = read_token(&token); + *tok = token; + + return type; + + out_free: + free_token(token); + out_err: + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_paren(struct event_format *event, struct print_arg *arg, char **tok) +{ + struct print_arg *item_arg; + enum event_type type; + char *token; + + type = process_arg(event, arg, &token); + + if (type == EVENT_ERROR) + goto out_free; + + if (type == EVENT_OP) + type = process_op(event, arg, &token); + + if (type == EVENT_ERROR) + goto out_free; + + if (test_type_token(type, token, EVENT_DELIM, ")")) + goto out_free; + + free_token(token); + type = read_token_item(&token); + + /* + * If the next token is an item or another open paren, then + * this was a typecast. + */ + if (event_item_type(type) || + (type == EVENT_DELIM && strcmp(token, "(") == 0)) { + + /* make this a typecast and contine */ + + /* prevous must be an atom */ + if (arg->type != PRINT_ATOM) { + do_warning_event(event, "previous needed to be PRINT_ATOM"); + goto out_free; + } + + item_arg = alloc_arg(); + if (!item_arg) { + do_warning_event(event, "%s: not enough memory!", + __func__); + goto out_free; + } + + arg->type = PRINT_TYPE; + arg->typecast.type = arg->atom.atom; + arg->typecast.item = item_arg; + type = process_arg_token(event, item_arg, &token, type); + + } + + *tok = token; + return type; + + out_free: + free_token(token); + *tok = NULL; + return EVENT_ERROR; +} + + +static enum event_type +process_str(struct event_format *event __maybe_unused, struct print_arg *arg, + char **tok) +{ + enum event_type type; + char *token; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto out_free; + + arg->type = PRINT_STRING; + arg->string.string = token; + arg->string.offset = -1; + + if (read_expected(EVENT_DELIM, ")") < 0) + goto out_err; + + type = read_token(&token); + *tok = token; + + return type; + + out_free: + free_token(token); + out_err: + *tok = NULL; + return EVENT_ERROR; +} + +static enum event_type +process_bitmask(struct event_format *event __maybe_unused, struct print_arg *arg, + char **tok) +{ + enum event_type type; + char *token; + + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto out_free; + + arg->type = PRINT_BITMASK; + arg->bitmask.bitmask = token; + arg->bitmask.offset = -1; + + if (read_expected(EVENT_DELIM, ")") < 0) + goto out_err; + + type = read_token(&token); + *tok = token; + + return type; + + out_free: + free_token(token); + out_err: + *tok = NULL; + return EVENT_ERROR; +} + +static struct tep_function_handler * +find_func_handler(struct tep_handle *pevent, char *func_name) +{ + struct tep_function_handler *func; + + if (!pevent) + return NULL; + + for (func = pevent->func_handlers; func; func = func->next) { + if (strcmp(func->name, func_name) == 0) + break; + } + + return func; +} + +static void remove_func_handler(struct tep_handle *pevent, char *func_name) +{ + struct tep_function_handler *func; + struct tep_function_handler **next; + + next = &pevent->func_handlers; + while ((func = *next)) { + if (strcmp(func->name, func_name) == 0) { + *next = func->next; + free_func_handle(func); + break; + } + next = &func->next; + } +} + +static enum event_type +process_func_handler(struct event_format *event, struct tep_function_handler *func, + struct print_arg *arg, char **tok) +{ + struct print_arg **next_arg; + struct print_arg *farg; + enum event_type type; + char *token; + int i; + + arg->type = PRINT_FUNC; + arg->func.func = func; + + *tok = NULL; + + next_arg = &(arg->func.args); + for (i = 0; i < func->nr_args; i++) { + farg = alloc_arg(); + if (!farg) { + do_warning_event(event, "%s: not enough memory!", + __func__); + return EVENT_ERROR; + } + + type = process_arg(event, farg, &token); + if (i < (func->nr_args - 1)) { + if (type != EVENT_DELIM || strcmp(token, ",") != 0) { + do_warning_event(event, + "Error: function '%s()' expects %d arguments but event %s only uses %d", + func->name, func->nr_args, + event->name, i + 1); + goto err; + } + } else { + if (type != EVENT_DELIM || strcmp(token, ")") != 0) { + do_warning_event(event, + "Error: function '%s()' only expects %d arguments but event %s has more", + func->name, func->nr_args, event->name); + goto err; + } + } + + *next_arg = farg; + next_arg = &(farg->next); + free_token(token); + } + + type = read_token(&token); + *tok = token; + + return type; + +err: + free_arg(farg); + free_token(token); + return EVENT_ERROR; +} + +static enum event_type +process_function(struct event_format *event, struct print_arg *arg, + char *token, char **tok) +{ + struct tep_function_handler *func; + + if (strcmp(token, "__print_flags") == 0) { + free_token(token); + is_flag_field = 1; + return process_flags(event, arg, tok); + } + if (strcmp(token, "__print_symbolic") == 0) { + free_token(token); + is_symbolic_field = 1; + return process_symbols(event, arg, tok); + } + if (strcmp(token, "__print_hex") == 0) { + free_token(token); + return process_hex(event, arg, tok); + } + if (strcmp(token, "__print_hex_str") == 0) { + free_token(token); + return process_hex_str(event, arg, tok); + } + if (strcmp(token, "__print_array") == 0) { + free_token(token); + return process_int_array(event, arg, tok); + } + if (strcmp(token, "__get_str") == 0) { + free_token(token); + return process_str(event, arg, tok); + } + if (strcmp(token, "__get_bitmask") == 0) { + free_token(token); + return process_bitmask(event, arg, tok); + } + if (strcmp(token, "__get_dynamic_array") == 0) { + free_token(token); + return process_dynamic_array(event, arg, tok); + } + if (strcmp(token, "__get_dynamic_array_len") == 0) { + free_token(token); + return process_dynamic_array_len(event, arg, tok); + } + + func = find_func_handler(event->pevent, token); + if (func) { + free_token(token); + return process_func_handler(event, func, arg, tok); + } + + do_warning_event(event, "function %s not defined", token); + free_token(token); + return EVENT_ERROR; +} + +static enum event_type +process_arg_token(struct event_format *event, struct print_arg *arg, + char **tok, enum event_type type) +{ + char *token; + char *atom; + + token = *tok; + + switch (type) { + case EVENT_ITEM: + if (strcmp(token, "REC") == 0) { + free_token(token); + type = process_entry(event, arg, &token); + break; + } + atom = token; + /* test the next token */ + type = read_token_item(&token); + + /* + * If the next token is a parenthesis, then this + * is a function. + */ + if (type == EVENT_DELIM && strcmp(token, "(") == 0) { + free_token(token); + token = NULL; + /* this will free atom. */ + type = process_function(event, arg, atom, &token); + break; + } + /* atoms can be more than one token long */ + while (type == EVENT_ITEM) { + char *new_atom; + new_atom = realloc(atom, + strlen(atom) + strlen(token) + 2); + if (!new_atom) { + free(atom); + *tok = NULL; + free_token(token); + return EVENT_ERROR; + } + atom = new_atom; + strcat(atom, " "); + strcat(atom, token); + free_token(token); + type = read_token_item(&token); + } + + arg->type = PRINT_ATOM; + arg->atom.atom = atom; + break; + + case EVENT_DQUOTE: + case EVENT_SQUOTE: + arg->type = PRINT_ATOM; + arg->atom.atom = token; + type = read_token_item(&token); + break; + case EVENT_DELIM: + if (strcmp(token, "(") == 0) { + free_token(token); + type = process_paren(event, arg, &token); + break; + } + case EVENT_OP: + /* handle single ops */ + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = NULL; + type = process_op(event, arg, &token); + + /* On error, the op is freed */ + if (type == EVENT_ERROR) + arg->op.op = NULL; + + /* return error type if errored */ + break; + + case EVENT_ERROR ... EVENT_NEWLINE: + default: + do_warning_event(event, "unexpected type %d", type); + return EVENT_ERROR; + } + *tok = token; + + return type; +} + +static int event_read_print_args(struct event_format *event, struct print_arg **list) +{ + enum event_type type = EVENT_ERROR; + struct print_arg *arg; + char *token; + int args = 0; + + do { + if (type == EVENT_NEWLINE) { + type = read_token_item(&token); + continue; + } + + arg = alloc_arg(); + if (!arg) { + do_warning_event(event, "%s: not enough memory!", + __func__); + return -1; + } + + type = process_arg(event, arg, &token); + + if (type == EVENT_ERROR) { + free_token(token); + free_arg(arg); + return -1; + } + + *list = arg; + args++; + + if (type == EVENT_OP) { + type = process_op(event, arg, &token); + free_token(token); + if (type == EVENT_ERROR) { + *list = NULL; + free_arg(arg); + return -1; + } + list = &arg->next; + continue; + } + + if (type == EVENT_DELIM && strcmp(token, ",") == 0) { + free_token(token); + *list = arg; + list = &arg->next; + continue; + } + break; + } while (type != EVENT_NONE); + + if (type != EVENT_NONE && type != EVENT_ERROR) + free_token(token); + + return args; +} + +static int event_read_print(struct event_format *event) +{ + enum event_type type; + char *token; + int ret; + + if (read_expected_item(EVENT_ITEM, "print") < 0) + return -1; + + if (read_expected(EVENT_ITEM, "fmt") < 0) + return -1; + + if (read_expected(EVENT_OP, ":") < 0) + return -1; + + if (read_expect_type(EVENT_DQUOTE, &token) < 0) + goto fail; + + concat: + event->print_fmt.format = token; + event->print_fmt.args = NULL; + + /* ok to have no arg */ + type = read_token_item(&token); + + if (type == EVENT_NONE) + return 0; + + /* Handle concatenation of print lines */ + if (type == EVENT_DQUOTE) { + char *cat; + + if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0) + goto fail; + free_token(token); + free_token(event->print_fmt.format); + event->print_fmt.format = NULL; + token = cat; + goto concat; + } + + if (test_type_token(type, token, EVENT_DELIM, ",")) + goto fail; + + free_token(token); + + ret = event_read_print_args(event, &event->print_fmt.args); + if (ret < 0) + return -1; + + return ret; + + fail: + free_token(token); + return -1; +} + +/** + * tep_find_common_field - return a common field by event + * @event: handle for the event + * @name: the name of the common field to return + * + * Returns a common field from the event by the given @name. + * This only searchs the common fields and not all field. + */ +struct format_field * +tep_find_common_field(struct event_format *event, const char *name) +{ + struct format_field *format; + + for (format = event->format.common_fields; + format; format = format->next) { + if (strcmp(format->name, name) == 0) + break; + } + + return format; +} + +/** + * tep_find_field - find a non-common field + * @event: handle for the event + * @name: the name of the non-common field + * + * Returns a non-common field by the given @name. + * This does not search common fields. + */ +struct format_field * +tep_find_field(struct event_format *event, const char *name) +{ + struct format_field *format; + + for (format = event->format.fields; + format; format = format->next) { + if (strcmp(format->name, name) == 0) + break; + } + + return format; +} + +/** + * tep_find_any_field - find any field by name + * @event: handle for the event + * @name: the name of the field + * + * Returns a field by the given @name. + * This searchs the common field names first, then + * the non-common ones if a common one was not found. + */ +struct format_field * +tep_find_any_field(struct event_format *event, const char *name) +{ + struct format_field *format; + + format = tep_find_common_field(event, name); + if (format) + return format; + return tep_find_field(event, name); +} + +/** + * tep_read_number - read a number from data + * @pevent: handle for the pevent + * @ptr: the raw data + * @size: the size of the data that holds the number + * + * Returns the number (converted to host) from the + * raw data. + */ +unsigned long long tep_read_number(struct tep_handle *pevent, + const void *ptr, int size) +{ + switch (size) { + case 1: + return *(unsigned char *)ptr; + case 2: + return data2host2(pevent, ptr); + case 4: + return data2host4(pevent, ptr); + case 8: + return data2host8(pevent, ptr); + default: + /* BUG! */ + return 0; + } +} + +/** + * tep_read_number_field - read a number from data + * @field: a handle to the field + * @data: the raw data to read + * @value: the value to place the number in + * + * Reads raw data according to a field offset and size, + * and translates it into @value. + * + * Returns 0 on success, -1 otherwise. + */ +int tep_read_number_field(struct format_field *field, const void *data, + unsigned long long *value) +{ + if (!field) + return -1; + switch (field->size) { + case 1: + case 2: + case 4: + case 8: + *value = tep_read_number(field->event->pevent, + data + field->offset, field->size); + return 0; + default: + return -1; + } +} + +static int get_common_info(struct tep_handle *pevent, + const char *type, int *offset, int *size) +{ + struct event_format *event; + struct format_field *field; + + /* + * All events should have the same common elements. + * Pick any event to find where the type is; + */ + if (!pevent->events) { + do_warning("no event_list!"); + return -1; + } + + event = pevent->events[0]; + field = tep_find_common_field(event, type); + if (!field) + return -1; + + *offset = field->offset; + *size = field->size; + + return 0; +} + +static int __parse_common(struct tep_handle *pevent, void *data, + int *size, int *offset, const char *name) +{ + int ret; + + if (!*size) { + ret = get_common_info(pevent, name, offset, size); + if (ret < 0) + return ret; + } + return tep_read_number(pevent, data + *offset, *size); +} + +static int trace_parse_common_type(struct tep_handle *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->type_size, &pevent->type_offset, + "common_type"); +} + +static int parse_common_pid(struct tep_handle *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->pid_size, &pevent->pid_offset, + "common_pid"); +} + +static int parse_common_pc(struct tep_handle *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->pc_size, &pevent->pc_offset, + "common_preempt_count"); +} + +static int parse_common_flags(struct tep_handle *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->flags_size, &pevent->flags_offset, + "common_flags"); +} + +static int parse_common_lock_depth(struct tep_handle *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->ld_size, &pevent->ld_offset, + "common_lock_depth"); +} + +static int parse_common_migrate_disable(struct tep_handle *pevent, void *data) +{ + return __parse_common(pevent, data, + &pevent->ld_size, &pevent->ld_offset, + "common_migrate_disable"); +} + +static int events_id_cmp(const void *a, const void *b); + +/** + * tep_find_event - find an event by given id + * @pevent: a handle to the pevent + * @id: the id of the event + * + * Returns an event that has a given @id. + */ +struct event_format *tep_find_event(struct tep_handle *pevent, int id) +{ + struct event_format **eventptr; + struct event_format key; + struct event_format *pkey = &key; + + /* Check cache first */ + if (pevent->last_event && pevent->last_event->id == id) + return pevent->last_event; + + key.id = id; + + eventptr = bsearch(&pkey, pevent->events, pevent->nr_events, + sizeof(*pevent->events), events_id_cmp); + + if (eventptr) { + pevent->last_event = *eventptr; + return *eventptr; + } + + return NULL; +} + +/** + * tep_find_event_by_name - find an event by given name + * @pevent: a handle to the pevent + * @sys: the system name to search for + * @name: the name of the event to search for + * + * This returns an event with a given @name and under the system + * @sys. If @sys is NULL the first event with @name is returned. + */ +struct event_format * +tep_find_event_by_name(struct tep_handle *pevent, + const char *sys, const char *name) +{ + struct event_format *event; + int i; + + if (pevent->last_event && + strcmp(pevent->last_event->name, name) == 0 && + (!sys || strcmp(pevent->last_event->system, sys) == 0)) + return pevent->last_event; + + for (i = 0; i < pevent->nr_events; i++) { + event = pevent->events[i]; + if (strcmp(event->name, name) == 0) { + if (!sys) + break; + if (strcmp(event->system, sys) == 0) + break; + } + } + if (i == pevent->nr_events) + event = NULL; + + pevent->last_event = event; + return event; +} + +static unsigned long long +eval_num_arg(void *data, int size, struct event_format *event, struct print_arg *arg) +{ + struct tep_handle *pevent = event->pevent; + unsigned long long val = 0; + unsigned long long left, right; + struct print_arg *typearg = NULL; + struct print_arg *larg; + unsigned long offset; + unsigned int field_size; + + switch (arg->type) { + case PRINT_NULL: + /* ?? */ + return 0; + case PRINT_ATOM: + return strtoull(arg->atom.atom, NULL, 0); + case PRINT_FIELD: + if (!arg->field.field) { + arg->field.field = tep_find_any_field(event, arg->field.name); + if (!arg->field.field) + goto out_warning_field; + + } + /* must be a number */ + val = tep_read_number(pevent, data + arg->field.field->offset, + arg->field.field->size); + break; + case PRINT_FLAGS: + case PRINT_SYMBOL: + case PRINT_INT_ARRAY: + case PRINT_HEX: + case PRINT_HEX_STR: + break; + case PRINT_TYPE: + val = eval_num_arg(data, size, event, arg->typecast.item); + return eval_type(val, arg, 0); + case PRINT_STRING: + case PRINT_BSTRING: + case PRINT_BITMASK: + return 0; + case PRINT_FUNC: { + struct trace_seq s; + trace_seq_init(&s); + val = process_defined_func(&s, data, size, event, arg); + trace_seq_destroy(&s); + return val; + } + case PRINT_OP: + if (strcmp(arg->op.op, "[") == 0) { + /* + * Arrays are special, since we don't want + * to read the arg as is. + */ + right = eval_num_arg(data, size, event, arg->op.right); + + /* handle typecasts */ + larg = arg->op.left; + while (larg->type == PRINT_TYPE) { + if (!typearg) + typearg = larg; + larg = larg->typecast.item; + } + + /* Default to long size */ + field_size = pevent->long_size; + + switch (larg->type) { + case PRINT_DYNAMIC_ARRAY: + offset = tep_read_number(pevent, + data + larg->dynarray.field->offset, + larg->dynarray.field->size); + if (larg->dynarray.field->elementsize) + field_size = larg->dynarray.field->elementsize; + /* + * The actual length of the dynamic array is stored + * in the top half of the field, and the offset + * is in the bottom half of the 32 bit field. + */ + offset &= 0xffff; + offset += right; + break; + case PRINT_FIELD: + if (!larg->field.field) { + larg->field.field = + tep_find_any_field(event, larg->field.name); + if (!larg->field.field) { + arg = larg; + goto out_warning_field; + } + } + field_size = larg->field.field->elementsize; + offset = larg->field.field->offset + + right * larg->field.field->elementsize; + break; + default: + goto default_op; /* oops, all bets off */ + } + val = tep_read_number(pevent, + data + offset, field_size); + if (typearg) + val = eval_type(val, typearg, 1); + break; + } else if (strcmp(arg->op.op, "?") == 0) { + left = eval_num_arg(data, size, event, arg->op.left); + arg = arg->op.right; + if (left) + val = eval_num_arg(data, size, event, arg->op.left); + else + val = eval_num_arg(data, size, event, arg->op.right); + break; + } + default_op: + left = eval_num_arg(data, size, event, arg->op.left); + right = eval_num_arg(data, size, event, arg->op.right); + switch (arg->op.op[0]) { + case '!': + switch (arg->op.op[1]) { + case 0: + val = !right; + break; + case '=': + val = left != right; + break; + default: + goto out_warning_op; + } + break; + case '~': + val = ~right; + break; + case '|': + if (arg->op.op[1]) + val = left || right; + else + val = left | right; + break; + case '&': + if (arg->op.op[1]) + val = left && right; + else + val = left & right; + break; + case '<': + switch (arg->op.op[1]) { + case 0: + val = left < right; + break; + case '<': + val = left << right; + break; + case '=': + val = left <= right; + break; + default: + goto out_warning_op; + } + break; + case '>': + switch (arg->op.op[1]) { + case 0: + val = left > right; + break; + case '>': + val = left >> right; + break; + case '=': + val = left >= right; + break; + default: + goto out_warning_op; + } + break; + case '=': + if (arg->op.op[1] != '=') + goto out_warning_op; + + val = left == right; + break; + case '-': + val = left - right; + break; + case '+': + val = left + right; + break; + case '/': + val = left / right; + break; + case '%': + val = left % right; + break; + case '*': + val = left * right; + break; + default: + goto out_warning_op; + } + break; + case PRINT_DYNAMIC_ARRAY_LEN: + offset = tep_read_number(pevent, + data + arg->dynarray.field->offset, + arg->dynarray.field->size); + /* + * The total allocated length of the dynamic array is + * stored in the top half of the field, and the offset + * is in the bottom half of the 32 bit field. + */ + val = (unsigned long long)(offset >> 16); + break; + case PRINT_DYNAMIC_ARRAY: + /* Without [], we pass the address to the dynamic data */ + offset = tep_read_number(pevent, + data + arg->dynarray.field->offset, + arg->dynarray.field->size); + /* + * The total allocated length of the dynamic array is + * stored in the top half of the field, and the offset + * is in the bottom half of the 32 bit field. + */ + offset &= 0xffff; + val = (unsigned long long)((unsigned long)data + offset); + break; + default: /* not sure what to do there */ + return 0; + } + return val; + +out_warning_op: + do_warning_event(event, "%s: unknown op '%s'", __func__, arg->op.op); + return 0; + +out_warning_field: + do_warning_event(event, "%s: field %s not found", + __func__, arg->field.name); + return 0; +} + +struct flag { + const char *name; + unsigned long long value; +}; + +static const struct flag flags[] = { + { "HI_SOFTIRQ", 0 }, + { "TIMER_SOFTIRQ", 1 }, + { "NET_TX_SOFTIRQ", 2 }, + { "NET_RX_SOFTIRQ", 3 }, + { "BLOCK_SOFTIRQ", 4 }, + { "IRQ_POLL_SOFTIRQ", 5 }, + { "TASKLET_SOFTIRQ", 6 }, + { "SCHED_SOFTIRQ", 7 }, + { "HRTIMER_SOFTIRQ", 8 }, + { "RCU_SOFTIRQ", 9 }, + + { "HRTIMER_NORESTART", 0 }, + { "HRTIMER_RESTART", 1 }, +}; + +static long long eval_flag(const char *flag) +{ + int i; + + /* + * Some flags in the format files do not get converted. + * If the flag is not numeric, see if it is something that + * we already know about. + */ + if (isdigit(flag[0])) + return strtoull(flag, NULL, 0); + + for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++) + if (strcmp(flags[i].name, flag) == 0) + return flags[i].value; + + return -1LL; +} + +static void print_str_to_seq(struct trace_seq *s, const char *format, + int len_arg, const char *str) +{ + if (len_arg >= 0) + trace_seq_printf(s, format, len_arg, str); + else + trace_seq_printf(s, format, str); +} + +static void print_bitmask_to_seq(struct tep_handle *pevent, + struct trace_seq *s, const char *format, + int len_arg, const void *data, int size) +{ + int nr_bits = size * 8; + int str_size = (nr_bits + 3) / 4; + int len = 0; + char buf[3]; + char *str; + int index; + int i; + + /* + * The kernel likes to put in commas every 32 bits, we + * can do the same. + */ + str_size += (nr_bits - 1) / 32; + + str = malloc(str_size + 1); + if (!str) { + do_warning("%s: not enough memory!", __func__); + return; + } + str[str_size] = 0; + + /* Start out with -2 for the two chars per byte */ + for (i = str_size - 2; i >= 0; i -= 2) { + /* + * data points to a bit mask of size bytes. + * In the kernel, this is an array of long words, thus + * endianess is very important. + */ + if (pevent->file_bigendian) + index = size - (len + 1); + else + index = len; + + snprintf(buf, 3, "%02x", *((unsigned char *)data + index)); + memcpy(str + i, buf, 2); + len++; + if (!(len & 3) && i > 0) { + i--; + str[i] = ','; + } + } + + if (len_arg >= 0) + trace_seq_printf(s, format, len_arg, str); + else + trace_seq_printf(s, format, str); + + free(str); +} + +static void print_str_arg(struct trace_seq *s, void *data, int size, + struct event_format *event, const char *format, + int len_arg, struct print_arg *arg) +{ + struct tep_handle *pevent = event->pevent; + struct print_flag_sym *flag; + struct format_field *field; + struct printk_map *printk; + long long val, fval; + unsigned long long addr; + char *str; + unsigned char *hex; + int print; + int i, len; + + switch (arg->type) { + case PRINT_NULL: + /* ?? */ + return; + case PRINT_ATOM: + print_str_to_seq(s, format, len_arg, arg->atom.atom); + return; + case PRINT_FIELD: + field = arg->field.field; + if (!field) { + field = tep_find_any_field(event, arg->field.name); + if (!field) { + str = arg->field.name; + goto out_warning_field; + } + arg->field.field = field; + } + /* Zero sized fields, mean the rest of the data */ + len = field->size ? : size - field->offset; + + /* + * Some events pass in pointers. If this is not an array + * and the size is the same as long_size, assume that it + * is a pointer. + */ + if (!(field->flags & FIELD_IS_ARRAY) && + field->size == pevent->long_size) { + + /* Handle heterogeneous recording and processing + * architectures + * + * CASE I: + * Traces recorded on 32-bit devices (32-bit + * addressing) and processed on 64-bit devices: + * In this case, only 32 bits should be read. + * + * CASE II: + * Traces recorded on 64 bit devices and processed + * on 32-bit devices: + * In this case, 64 bits must be read. + */ + addr = (pevent->long_size == 8) ? + *(unsigned long long *)(data + field->offset) : + (unsigned long long)*(unsigned int *)(data + field->offset); + + /* Check if it matches a print format */ + printk = find_printk(pevent, addr); + if (printk) + trace_seq_puts(s, printk->printk); + else + trace_seq_printf(s, "%llx", addr); + break; + } + str = malloc(len + 1); + if (!str) { + do_warning_event(event, "%s: not enough memory!", + __func__); + return; + } + memcpy(str, data + field->offset, len); + str[len] = 0; + print_str_to_seq(s, format, len_arg, str); + free(str); + break; + case PRINT_FLAGS: + val = eval_num_arg(data, size, event, arg->flags.field); + print = 0; + for (flag = arg->flags.flags; flag; flag = flag->next) { + fval = eval_flag(flag->value); + if (!val && fval < 0) { + print_str_to_seq(s, format, len_arg, flag->str); + break; + } + if (fval > 0 && (val & fval) == fval) { + if (print && arg->flags.delim) + trace_seq_puts(s, arg->flags.delim); + print_str_to_seq(s, format, len_arg, flag->str); + print = 1; + val &= ~fval; + } + } + if (val) { + if (print && arg->flags.delim) + trace_seq_puts(s, arg->flags.delim); + trace_seq_printf(s, "0x%llx", val); + } + break; + case PRINT_SYMBOL: + val = eval_num_arg(data, size, event, arg->symbol.field); + for (flag = arg->symbol.symbols; flag; flag = flag->next) { + fval = eval_flag(flag->value); + if (val == fval) { + print_str_to_seq(s, format, len_arg, flag->str); + break; + } + } + if (!flag) + trace_seq_printf(s, "0x%llx", val); + break; + case PRINT_HEX: + case PRINT_HEX_STR: + if (arg->hex.field->type == PRINT_DYNAMIC_ARRAY) { + unsigned long offset; + offset = tep_read_number(pevent, + data + arg->hex.field->dynarray.field->offset, + arg->hex.field->dynarray.field->size); + hex = data + (offset & 0xffff); + } else { + field = arg->hex.field->field.field; + if (!field) { + str = arg->hex.field->field.name; + field = tep_find_any_field(event, str); + if (!field) + goto out_warning_field; + arg->hex.field->field.field = field; + } + hex = data + field->offset; + } + len = eval_num_arg(data, size, event, arg->hex.size); + for (i = 0; i < len; i++) { + if (i && arg->type == PRINT_HEX) + trace_seq_putc(s, ' '); + trace_seq_printf(s, "%02x", hex[i]); + } + break; + + case PRINT_INT_ARRAY: { + void *num; + int el_size; + + if (arg->int_array.field->type == PRINT_DYNAMIC_ARRAY) { + unsigned long offset; + struct format_field *field = + arg->int_array.field->dynarray.field; + offset = tep_read_number(pevent, + data + field->offset, + field->size); + num = data + (offset & 0xffff); + } else { + field = arg->int_array.field->field.field; + if (!field) { + str = arg->int_array.field->field.name; + field = tep_find_any_field(event, str); + if (!field) + goto out_warning_field; + arg->int_array.field->field.field = field; + } + num = data + field->offset; + } + len = eval_num_arg(data, size, event, arg->int_array.count); + el_size = eval_num_arg(data, size, event, + arg->int_array.el_size); + for (i = 0; i < len; i++) { + if (i) + trace_seq_putc(s, ' '); + + if (el_size == 1) { + trace_seq_printf(s, "%u", *(uint8_t *)num); + } else if (el_size == 2) { + trace_seq_printf(s, "%u", *(uint16_t *)num); + } else if (el_size == 4) { + trace_seq_printf(s, "%u", *(uint32_t *)num); + } else if (el_size == 8) { + trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num); + } else { + trace_seq_printf(s, "BAD SIZE:%d 0x%x", + el_size, *(uint8_t *)num); + el_size = 1; + } + + num += el_size; + } + break; + } + case PRINT_TYPE: + break; + case PRINT_STRING: { + int str_offset; + + if (arg->string.offset == -1) { + struct format_field *f; + + f = tep_find_any_field(event, arg->string.string); + arg->string.offset = f->offset; + } + str_offset = data2host4(pevent, data + arg->string.offset); + str_offset &= 0xffff; + print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); + break; + } + case PRINT_BSTRING: + print_str_to_seq(s, format, len_arg, arg->string.string); + break; + case PRINT_BITMASK: { + int bitmask_offset; + int bitmask_size; + + if (arg->bitmask.offset == -1) { + struct format_field *f; + + f = tep_find_any_field(event, arg->bitmask.bitmask); + arg->bitmask.offset = f->offset; + } + bitmask_offset = data2host4(pevent, data + arg->bitmask.offset); + bitmask_size = bitmask_offset >> 16; + bitmask_offset &= 0xffff; + print_bitmask_to_seq(pevent, s, format, len_arg, + data + bitmask_offset, bitmask_size); + break; + } + case PRINT_OP: + /* + * The only op for string should be ? : + */ + if (arg->op.op[0] != '?') + return; + val = eval_num_arg(data, size, event, arg->op.left); + if (val) + print_str_arg(s, data, size, event, + format, len_arg, arg->op.right->op.left); + else + print_str_arg(s, data, size, event, + format, len_arg, arg->op.right->op.right); + break; + case PRINT_FUNC: + process_defined_func(s, data, size, event, arg); + break; + default: + /* well... */ + break; + } + + return; + +out_warning_field: + do_warning_event(event, "%s: field %s not found", + __func__, arg->field.name); +} + +static unsigned long long +process_defined_func(struct trace_seq *s, void *data, int size, + struct event_format *event, struct print_arg *arg) +{ + struct tep_function_handler *func_handle = arg->func.func; + struct func_params *param; + unsigned long long *args; + unsigned long long ret; + struct print_arg *farg; + struct trace_seq str; + struct save_str { + struct save_str *next; + char *str; + } *strings = NULL, *string; + int i; + + if (!func_handle->nr_args) { + ret = (*func_handle->func)(s, NULL); + goto out; + } + + farg = arg->func.args; + param = func_handle->params; + + ret = ULLONG_MAX; + args = malloc(sizeof(*args) * func_handle->nr_args); + if (!args) + goto out; + + for (i = 0; i < func_handle->nr_args; i++) { + switch (param->type) { + case TEP_FUNC_ARG_INT: + case TEP_FUNC_ARG_LONG: + case TEP_FUNC_ARG_PTR: + args[i] = eval_num_arg(data, size, event, farg); + break; + case TEP_FUNC_ARG_STRING: + trace_seq_init(&str); + print_str_arg(&str, data, size, event, "%s", -1, farg); + trace_seq_terminate(&str); + string = malloc(sizeof(*string)); + if (!string) { + do_warning_event(event, "%s(%d): malloc str", + __func__, __LINE__); + goto out_free; + } + string->next = strings; + string->str = strdup(str.buffer); + if (!string->str) { + free(string); + do_warning_event(event, "%s(%d): malloc str", + __func__, __LINE__); + goto out_free; + } + args[i] = (uintptr_t)string->str; + strings = string; + trace_seq_destroy(&str); + break; + default: + /* + * Something went totally wrong, this is not + * an input error, something in this code broke. + */ + do_warning_event(event, "Unexpected end of arguments\n"); + goto out_free; + } + farg = farg->next; + param = param->next; + } + + ret = (*func_handle->func)(s, args); +out_free: + free(args); + while (strings) { + string = strings; + strings = string->next; + free(string->str); + free(string); + } + + out: + /* TBD : handle return type here */ + return ret; +} + +static void free_args(struct print_arg *args) +{ + struct print_arg *next; + + while (args) { + next = args->next; + + free_arg(args); + args = next; + } +} + +static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struct event_format *event) +{ + struct tep_handle *pevent = event->pevent; + struct format_field *field, *ip_field; + struct print_arg *args, *arg, **next; + unsigned long long ip, val; + char *ptr; + void *bptr; + int vsize; + + field = pevent->bprint_buf_field; + ip_field = pevent->bprint_ip_field; + + if (!field) { + field = tep_find_field(event, "buf"); + if (!field) { + do_warning_event(event, "can't find buffer field for binary printk"); + return NULL; + } + ip_field = tep_find_field(event, "ip"); + if (!ip_field) { + do_warning_event(event, "can't find ip field for binary printk"); + return NULL; + } + pevent->bprint_buf_field = field; + pevent->bprint_ip_field = ip_field; + } + + ip = tep_read_number(pevent, data + ip_field->offset, ip_field->size); + + /* + * The first arg is the IP pointer. + */ + args = alloc_arg(); + if (!args) { + do_warning_event(event, "%s(%d): not enough memory!", + __func__, __LINE__); + return NULL; + } + arg = args; + arg->next = NULL; + next = &arg->next; + + arg->type = PRINT_ATOM; + + if (asprintf(&arg->atom.atom, "%lld", ip) < 0) + goto out_free; + + /* skip the first "%ps: " */ + for (ptr = fmt + 5, bptr = data + field->offset; + bptr < data + size && *ptr; ptr++) { + int ls = 0; + + if (*ptr == '%') { + process_again: + ptr++; + switch (*ptr) { + case '%': + break; + case 'l': + ls++; + goto process_again; + case 'L': + ls = 2; + goto process_again; + case '0' ... '9': + goto process_again; + case '.': + goto process_again; + case 'z': + case 'Z': + ls = 1; + goto process_again; + case 'p': + ls = 1; + if (isalnum(ptr[1])) { + ptr++; + /* Check for special pointers */ + switch (*ptr) { + case 's': + case 'S': + case 'f': + case 'F': + break; + default: + /* + * Older kernels do not process + * dereferenced pointers. + * Only process if the pointer + * value is a printable. + */ + if (isprint(*(char *)bptr)) + goto process_string; + } + } + /* fall through */ + case 'd': + case 'u': + case 'x': + case 'i': + switch (ls) { + case 0: + vsize = 4; + break; + case 1: + vsize = pevent->long_size; + break; + case 2: + vsize = 8; + break; + default: + vsize = ls; /* ? */ + break; + } + /* fall through */ + case '*': + if (*ptr == '*') + vsize = 4; + + /* the pointers are always 4 bytes aligned */ + bptr = (void *)(((unsigned long)bptr + 3) & + ~3); + val = tep_read_number(pevent, bptr, vsize); + bptr += vsize; + arg = alloc_arg(); + if (!arg) { + do_warning_event(event, "%s(%d): not enough memory!", + __func__, __LINE__); + goto out_free; + } + arg->next = NULL; + arg->type = PRINT_ATOM; + if (asprintf(&arg->atom.atom, "%lld", val) < 0) { + free(arg); + goto out_free; + } + *next = arg; + next = &arg->next; + /* + * The '*' case means that an arg is used as the length. + * We need to continue to figure out for what. + */ + if (*ptr == '*') + goto process_again; + + break; + case 's': + process_string: + arg = alloc_arg(); + if (!arg) { + do_warning_event(event, "%s(%d): not enough memory!", + __func__, __LINE__); + goto out_free; + } + arg->next = NULL; + arg->type = PRINT_BSTRING; + arg->string.string = strdup(bptr); + if (!arg->string.string) + goto out_free; + bptr += strlen(bptr) + 1; + *next = arg; + next = &arg->next; + default: + break; + } + } + } + + return args; + +out_free: + free_args(args); + return NULL; +} + +static char * +get_bprint_format(void *data, int size __maybe_unused, + struct event_format *event) +{ + struct tep_handle *pevent = event->pevent; + unsigned long long addr; + struct format_field *field; + struct printk_map *printk; + char *format; + + field = pevent->bprint_fmt_field; + + if (!field) { + field = tep_find_field(event, "fmt"); + if (!field) { + do_warning_event(event, "can't find format field for binary printk"); + return NULL; + } + pevent->bprint_fmt_field = field; + } + + addr = tep_read_number(pevent, data + field->offset, field->size); + + printk = find_printk(pevent, addr); + if (!printk) { + if (asprintf(&format, "%%pf: (NO FORMAT FOUND at %llx)\n", addr) < 0) + return NULL; + return format; + } + + if (asprintf(&format, "%s: %s", "%pf", printk->printk) < 0) + return NULL; + + return format; +} + +static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size, + struct event_format *event, struct print_arg *arg) +{ + unsigned char *buf; + const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"; + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", + arg->type); + return; + } + + if (mac == 'm') + fmt = "%.2x%.2x%.2x%.2x%.2x%.2x"; + if (!arg->field.field) { + arg->field.field = + tep_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning_event(event, "%s: field %s not found", + __func__, arg->field.name); + return; + } + } + if (arg->field.field->size != 6) { + trace_seq_printf(s, "INVALIDMAC"); + return; + } + buf = data + arg->field.field->offset; + trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); +} + +static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf) +{ + const char *fmt; + + if (i == 'i') + fmt = "%03d.%03d.%03d.%03d"; + else + fmt = "%d.%d.%d.%d"; + + trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]); +} + +static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) +{ + return ((unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) | + (unsigned long)(a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL; +} + +static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr) +{ + return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); +} + +static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr) +{ + int i, j, range; + unsigned char zerolength[8]; + int longest = 1; + int colonpos = -1; + uint16_t word; + uint8_t hi, lo; + bool needcolon = false; + bool useIPv4; + struct in6_addr in6; + + memcpy(&in6, addr, sizeof(struct in6_addr)); + + useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6); + + memset(zerolength, 0, sizeof(zerolength)); + + if (useIPv4) + range = 6; + else + range = 8; + + /* find position of longest 0 run */ + for (i = 0; i < range; i++) { + for (j = i; j < range; j++) { + if (in6.s6_addr16[j] != 0) + break; + zerolength[i]++; + } + } + for (i = 0; i < range; i++) { + if (zerolength[i] > longest) { + longest = zerolength[i]; + colonpos = i; + } + } + if (longest == 1) /* don't compress a single 0 */ + colonpos = -1; + + /* emit address */ + for (i = 0; i < range; i++) { + if (i == colonpos) { + if (needcolon || i == 0) + trace_seq_printf(s, ":"); + trace_seq_printf(s, ":"); + needcolon = false; + i += longest - 1; + continue; + } + if (needcolon) { + trace_seq_printf(s, ":"); + needcolon = false; + } + /* hex u16 without leading 0s */ + word = ntohs(in6.s6_addr16[i]); + hi = word >> 8; + lo = word & 0xff; + if (hi) + trace_seq_printf(s, "%x%02x", hi, lo); + else + trace_seq_printf(s, "%x", lo); + + needcolon = true; + } + + if (useIPv4) { + if (needcolon) + trace_seq_printf(s, ":"); + print_ip4_addr(s, 'I', &in6.s6_addr[12]); + } + + return; +} + +static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf) +{ + int j; + + for (j = 0; j < 16; j += 2) { + trace_seq_printf(s, "%02x%02x", buf[j], buf[j+1]); + if (i == 'I' && j < 14) + trace_seq_printf(s, ":"); + } +} + +/* + * %pi4 print an IPv4 address with leading zeros + * %pI4 print an IPv4 address without leading zeros + * %pi6 print an IPv6 address without colons + * %pI6 print an IPv6 address with colons + * %pI6c print an IPv6 address in compressed form with colons + * %pISpc print an IP address based on sockaddr; p adds port. + */ +static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + unsigned char *buf; + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return 0; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return 0; + } + + if (!arg->field.field) { + arg->field.field = + tep_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return 0; + } + } + + buf = data + arg->field.field->offset; + + if (arg->field.field->size != 4) { + trace_seq_printf(s, "INVALIDIPv4"); + return 0; + } + print_ip4_addr(s, i, buf); + + return 0; +} + +static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char have_c = 0; + unsigned char *buf; + int rc = 0; + + /* pI6c */ + if (i == 'I' && *ptr == 'c') { + have_c = 1; + ptr++; + rc++; + } + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return rc; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return rc; + } + + if (!arg->field.field) { + arg->field.field = + tep_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return rc; + } + } + + buf = data + arg->field.field->offset; + + if (arg->field.field->size != 16) { + trace_seq_printf(s, "INVALIDIPv6"); + return rc; + } + + if (have_c) + print_ip6c_addr(s, buf); + else + print_ip6_addr(s, i, buf); + + return rc; +} + +static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char have_c = 0, have_p = 0; + unsigned char *buf; + struct sockaddr_storage *sa; + int rc = 0; + + /* pISpc */ + if (i == 'I') { + if (*ptr == 'p') { + have_p = 1; + ptr++; + rc++; + } + if (*ptr == 'c') { + have_c = 1; + ptr++; + rc++; + } + } + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return rc; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return rc; + } + + if (!arg->field.field) { + arg->field.field = + tep_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return rc; + } + } + + sa = (struct sockaddr_storage *) (data + arg->field.field->offset); + + if (sa->ss_family == AF_INET) { + struct sockaddr_in *sa4 = (struct sockaddr_in *) sa; + + if (arg->field.field->size < sizeof(struct sockaddr_in)) { + trace_seq_printf(s, "INVALIDIPv4"); + return rc; + } + + print_ip4_addr(s, i, (unsigned char *) &sa4->sin_addr); + if (have_p) + trace_seq_printf(s, ":%d", ntohs(sa4->sin_port)); + + + } else if (sa->ss_family == AF_INET6) { + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa; + + if (arg->field.field->size < sizeof(struct sockaddr_in6)) { + trace_seq_printf(s, "INVALIDIPv6"); + return rc; + } + + if (have_p) + trace_seq_printf(s, "["); + + buf = (unsigned char *) &sa6->sin6_addr; + if (have_c) + print_ip6c_addr(s, buf); + else + print_ip6_addr(s, i, buf); + + if (have_p) + trace_seq_printf(s, "]:%d", ntohs(sa6->sin6_port)); + } + + return rc; +} + +static int print_ip_arg(struct trace_seq *s, const char *ptr, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char i = *ptr; /* 'i' or 'I' */ + char ver; + int rc = 0; + + ptr++; + rc++; + + ver = *ptr; + ptr++; + rc++; + + switch (ver) { + case '4': + rc += print_ipv4_arg(s, ptr, i, data, size, event, arg); + break; + case '6': + rc += print_ipv6_arg(s, ptr, i, data, size, event, arg); + break; + case 'S': + rc += print_ipsa_arg(s, ptr, i, data, size, event, arg); + break; + default: + return 0; + } + + return rc; +} + +static int is_printable_array(char *p, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < len && p[i]; i++) + if (!isprint(p[i]) && !isspace(p[i])) + return 0; + return 1; +} + +void tep_print_field(struct trace_seq *s, void *data, + struct format_field *field) +{ + unsigned long long val; + unsigned int offset, len, i; + struct tep_handle *pevent = field->event->pevent; + + if (field->flags & FIELD_IS_ARRAY) { + offset = field->offset; + len = field->size; + if (field->flags & FIELD_IS_DYNAMIC) { + val = tep_read_number(pevent, data + offset, len); + offset = val; + len = offset >> 16; + offset &= 0xffff; + } + if (field->flags & FIELD_IS_STRING && + is_printable_array(data + offset, len)) { + trace_seq_printf(s, "%s", (char *)data + offset); + } else { + trace_seq_puts(s, "ARRAY["); + for (i = 0; i < len; i++) { + if (i) + trace_seq_puts(s, ", "); + trace_seq_printf(s, "%02x", + *((unsigned char *)data + offset + i)); + } + trace_seq_putc(s, ']'); + field->flags &= ~FIELD_IS_STRING; + } + } else { + val = tep_read_number(pevent, data + field->offset, + field->size); + if (field->flags & FIELD_IS_POINTER) { + trace_seq_printf(s, "0x%llx", val); + } else if (field->flags & FIELD_IS_SIGNED) { + switch (field->size) { + case 4: + /* + * If field is long then print it in hex. + * A long usually stores pointers. + */ + if (field->flags & FIELD_IS_LONG) + trace_seq_printf(s, "0x%x", (int)val); + else + trace_seq_printf(s, "%d", (int)val); + break; + case 2: + trace_seq_printf(s, "%2d", (short)val); + break; + case 1: + trace_seq_printf(s, "%1d", (char)val); + break; + default: + trace_seq_printf(s, "%lld", val); + } + } else { + if (field->flags & FIELD_IS_LONG) + trace_seq_printf(s, "0x%llx", val); + else + trace_seq_printf(s, "%llu", val); + } + } +} + +void tep_print_fields(struct trace_seq *s, void *data, + int size __maybe_unused, struct event_format *event) +{ + struct format_field *field; + + field = event->format.fields; + while (field) { + trace_seq_printf(s, " %s=", field->name); + tep_print_field(s, data, field); + field = field->next; + } +} + +static void pretty_print(struct trace_seq *s, void *data, int size, struct event_format *event) +{ + struct tep_handle *pevent = event->pevent; + struct print_fmt *print_fmt = &event->print_fmt; + struct print_arg *arg = print_fmt->args; + struct print_arg *args = NULL; + const char *ptr = print_fmt->format; + unsigned long long val; + struct func_map *func; + const char *saveptr; + struct trace_seq p; + char *bprint_fmt = NULL; + char format[32]; + int show_func; + int len_as_arg; + int len_arg; + int len; + int ls; + + if (event->flags & EVENT_FL_FAILED) { + trace_seq_printf(s, "[FAILED TO PARSE]"); + tep_print_fields(s, data, size, event); + return; + } + + if (event->flags & EVENT_FL_ISBPRINT) { + bprint_fmt = get_bprint_format(data, size, event); + args = make_bprint_args(bprint_fmt, data, size, event); + arg = args; + ptr = bprint_fmt; + } + + for (; *ptr; ptr++) { + ls = 0; + if (*ptr == '\\') { + ptr++; + switch (*ptr) { + case 'n': + trace_seq_putc(s, '\n'); + break; + case 't': + trace_seq_putc(s, '\t'); + break; + case 'r': + trace_seq_putc(s, '\r'); + break; + case '\\': + trace_seq_putc(s, '\\'); + break; + default: + trace_seq_putc(s, *ptr); + break; + } + + } else if (*ptr == '%') { + saveptr = ptr; + show_func = 0; + len_as_arg = 0; + cont_process: + ptr++; + switch (*ptr) { + case '%': + trace_seq_putc(s, '%'); + break; + case '#': + /* FIXME: need to handle properly */ + goto cont_process; + case 'h': + ls--; + goto cont_process; + case 'l': + ls++; + goto cont_process; + case 'L': + ls = 2; + goto cont_process; + case '*': + /* The argument is the length. */ + if (!arg) { + do_warning_event(event, "no argument match"); + event->flags |= EVENT_FL_FAILED; + goto out_failed; + } + len_arg = eval_num_arg(data, size, event, arg); + len_as_arg = 1; + arg = arg->next; + goto cont_process; + case '.': + case 'z': + case 'Z': + case '0' ... '9': + case '-': + goto cont_process; + case 'p': + if (pevent->long_size == 4) + ls = 1; + else + ls = 2; + + if (isalnum(ptr[1])) + ptr++; + + if (arg->type == PRINT_BSTRING) { + trace_seq_puts(s, arg->string.string); + arg = arg->next; + break; + } + + if (*ptr == 'F' || *ptr == 'f' || + *ptr == 'S' || *ptr == 's') { + show_func = *ptr; + } else if (*ptr == 'M' || *ptr == 'm') { + print_mac_arg(s, *ptr, data, size, event, arg); + arg = arg->next; + break; + } else if (*ptr == 'I' || *ptr == 'i') { + int n; + + n = print_ip_arg(s, ptr, data, size, event, arg); + if (n > 0) { + ptr += n - 1; + arg = arg->next; + break; + } + } + + /* fall through */ + case 'd': + case 'i': + case 'x': + case 'X': + case 'u': + if (!arg) { + do_warning_event(event, "no argument match"); + event->flags |= EVENT_FL_FAILED; + goto out_failed; + } + + len = ((unsigned long)ptr + 1) - + (unsigned long)saveptr; + + /* should never happen */ + if (len > 31) { + do_warning_event(event, "bad format!"); + event->flags |= EVENT_FL_FAILED; + len = 31; + } + + memcpy(format, saveptr, len); + format[len] = 0; + + val = eval_num_arg(data, size, event, arg); + arg = arg->next; + + if (show_func) { + func = find_func(pevent, val); + if (func) { + trace_seq_puts(s, func->func); + if (show_func == 'F') + trace_seq_printf(s, + "+0x%llx", + val - func->addr); + break; + } + } + if (pevent->long_size == 8 && ls == 1 && + sizeof(long) != 8) { + char *p; + + /* make %l into %ll */ + if (ls == 1 && (p = strchr(format, 'l'))) + memmove(p+1, p, strlen(p)+1); + else if (strcmp(format, "%p") == 0) + strcpy(format, "0x%llx"); + ls = 2; + } + switch (ls) { + case -2: + if (len_as_arg) + trace_seq_printf(s, format, len_arg, (char)val); + else + trace_seq_printf(s, format, (char)val); + break; + case -1: + if (len_as_arg) + trace_seq_printf(s, format, len_arg, (short)val); + else + trace_seq_printf(s, format, (short)val); + break; + case 0: + if (len_as_arg) + trace_seq_printf(s, format, len_arg, (int)val); + else + trace_seq_printf(s, format, (int)val); + break; + case 1: + if (len_as_arg) + trace_seq_printf(s, format, len_arg, (long)val); + else + trace_seq_printf(s, format, (long)val); + break; + case 2: + if (len_as_arg) + trace_seq_printf(s, format, len_arg, + (long long)val); + else + trace_seq_printf(s, format, (long long)val); + break; + default: + do_warning_event(event, "bad count (%d)", ls); + event->flags |= EVENT_FL_FAILED; + } + break; + case 's': + if (!arg) { + do_warning_event(event, "no matching argument"); + event->flags |= EVENT_FL_FAILED; + goto out_failed; + } + + len = ((unsigned long)ptr + 1) - + (unsigned long)saveptr; + + /* should never happen */ + if (len > 31) { + do_warning_event(event, "bad format!"); + event->flags |= EVENT_FL_FAILED; + len = 31; + } + + memcpy(format, saveptr, len); + format[len] = 0; + if (!len_as_arg) + len_arg = -1; + /* Use helper trace_seq */ + trace_seq_init(&p); + print_str_arg(&p, data, size, event, + format, len_arg, arg); + trace_seq_terminate(&p); + trace_seq_puts(s, p.buffer); + trace_seq_destroy(&p); + arg = arg->next; + break; + default: + trace_seq_printf(s, ">%c<", *ptr); + + } + } else + trace_seq_putc(s, *ptr); + } + + if (event->flags & EVENT_FL_FAILED) { +out_failed: + trace_seq_printf(s, "[FAILED TO PARSE]"); + } + + if (args) { + free_args(args); + free(bprint_fmt); + } +} + +/** + * tep_data_lat_fmt - parse the data for the latency format + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @record: the record to read from + * + * This parses out the Latency format (interrupts disabled, + * need rescheduling, in hard/soft interrupt, preempt count + * and lock depth) and places it into the trace_seq. + */ +void tep_data_lat_fmt(struct tep_handle *pevent, + struct trace_seq *s, struct tep_record *record) +{ + static int check_lock_depth = 1; + static int check_migrate_disable = 1; + static int lock_depth_exists; + static int migrate_disable_exists; + unsigned int lat_flags; + unsigned int pc; + int lock_depth; + int migrate_disable; + int hardirq; + int softirq; + void *data = record->data; + + lat_flags = parse_common_flags(pevent, data); + pc = parse_common_pc(pevent, data); + /* lock_depth may not always exist */ + if (lock_depth_exists) + lock_depth = parse_common_lock_depth(pevent, data); + else if (check_lock_depth) { + lock_depth = parse_common_lock_depth(pevent, data); + if (lock_depth < 0) + check_lock_depth = 0; + else + lock_depth_exists = 1; + } + + /* migrate_disable may not always exist */ + if (migrate_disable_exists) + migrate_disable = parse_common_migrate_disable(pevent, data); + else if (check_migrate_disable) { + migrate_disable = parse_common_migrate_disable(pevent, data); + if (migrate_disable < 0) + check_migrate_disable = 0; + else + migrate_disable_exists = 1; + } + + hardirq = lat_flags & TRACE_FLAG_HARDIRQ; + softirq = lat_flags & TRACE_FLAG_SOFTIRQ; + + trace_seq_printf(s, "%c%c%c", + (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ? + 'X' : '.', + (lat_flags & TRACE_FLAG_NEED_RESCHED) ? + 'N' : '.', + (hardirq && softirq) ? 'H' : + hardirq ? 'h' : softirq ? 's' : '.'); + + if (pc) + trace_seq_printf(s, "%x", pc); + else + trace_seq_putc(s, '.'); + + if (migrate_disable_exists) { + if (migrate_disable < 0) + trace_seq_putc(s, '.'); + else + trace_seq_printf(s, "%d", migrate_disable); + } + + if (lock_depth_exists) { + if (lock_depth < 0) + trace_seq_putc(s, '.'); + else + trace_seq_printf(s, "%d", lock_depth); + } + + trace_seq_terminate(s); +} + +/** + * tep_data_type - parse out the given event type + * @pevent: a handle to the pevent + * @rec: the record to read from + * + * This returns the event id from the @rec. + */ +int tep_data_type(struct tep_handle *pevent, struct tep_record *rec) +{ + return trace_parse_common_type(pevent, rec->data); +} + +/** + * tep_data_event_from_type - find the event by a given type + * @pevent: a handle to the pevent + * @type: the type of the event. + * + * This returns the event form a given @type; + */ +struct event_format *tep_data_event_from_type(struct tep_handle *pevent, int type) +{ + return tep_find_event(pevent, type); +} + +/** + * tep_data_pid - parse the PID from record + * @pevent: a handle to the pevent + * @rec: the record to parse + * + * This returns the PID from a record. + */ +int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec) +{ + return parse_common_pid(pevent, rec->data); +} + +/** + * tep_data_preempt_count - parse the preempt count from the record + * @pevent: a handle to the pevent + * @rec: the record to parse + * + * This returns the preempt count from a record. + */ +int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec) +{ + return parse_common_pc(pevent, rec->data); +} + +/** + * tep_data_flags - parse the latency flags from the record + * @pevent: a handle to the pevent + * @rec: the record to parse + * + * This returns the latency flags from a record. + * + * Use trace_flag_type enum for the flags (see event-parse.h). + */ +int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec) +{ + return parse_common_flags(pevent, rec->data); +} + +/** + * tep_data_comm_from_pid - return the command line from PID + * @pevent: a handle to the pevent + * @pid: the PID of the task to search for + * + * This returns a pointer to the command line that has the given + * @pid. + */ +const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid) +{ + const char *comm; + + comm = find_cmdline(pevent, pid); + return comm; +} + +static struct cmdline * +pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *next) +{ + struct cmdline_list *cmdlist = (struct cmdline_list *)next; + + if (cmdlist) + cmdlist = cmdlist->next; + else + cmdlist = pevent->cmdlist; + + while (cmdlist && strcmp(cmdlist->comm, comm) != 0) + cmdlist = cmdlist->next; + + return (struct cmdline *)cmdlist; +} + +/** + * tep_data_pid_from_comm - return the pid from a given comm + * @pevent: a handle to the pevent + * @comm: the cmdline to find the pid from + * @next: the cmdline structure to find the next comm + * + * This returns the cmdline structure that holds a pid for a given + * comm, or NULL if none found. As there may be more than one pid for + * a given comm, the result of this call can be passed back into + * a recurring call in the @next paramater, and then it will find the + * next pid. + * Also, it does a linear seach, so it may be slow. + */ +struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm, + struct cmdline *next) +{ + struct cmdline *cmdline; + + /* + * If the cmdlines have not been converted yet, then use + * the list. + */ + if (!pevent->cmdlines) + return pid_from_cmdlist(pevent, comm, next); + + if (next) { + /* + * The next pointer could have been still from + * a previous call before cmdlines were created + */ + if (next < pevent->cmdlines || + next >= pevent->cmdlines + pevent->cmdline_count) + next = NULL; + else + cmdline = next++; + } + + if (!next) + cmdline = pevent->cmdlines; + + while (cmdline < pevent->cmdlines + pevent->cmdline_count) { + if (strcmp(cmdline->comm, comm) == 0) + return cmdline; + cmdline++; + } + return NULL; +} + +/** + * tep_cmdline_pid - return the pid associated to a given cmdline + * @cmdline: The cmdline structure to get the pid from + * + * Returns the pid for a give cmdline. If @cmdline is NULL, then + * -1 is returned. + */ +int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline) +{ + struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline; + + if (!cmdline) + return -1; + + /* + * If cmdlines have not been created yet, or cmdline is + * not part of the array, then treat it as a cmdlist instead. + */ + if (!pevent->cmdlines || + cmdline < pevent->cmdlines || + cmdline >= pevent->cmdlines + pevent->cmdline_count) + return cmdlist->pid; + + return cmdline->pid; +} + +/** + * tep_event_info - parse the data into the print format + * @s: the trace_seq to write to + * @event: the handle to the event + * @record: the record to read from + * + * This parses the raw @data using the given @event information and + * writes the print format into the trace_seq. + */ +void tep_event_info(struct trace_seq *s, struct event_format *event, + struct tep_record *record) +{ + int print_pretty = 1; + + if (event->pevent->print_raw || (event->flags & EVENT_FL_PRINTRAW)) + tep_print_fields(s, record->data, record->size, event); + else { + + if (event->handler && !(event->flags & EVENT_FL_NOHANDLE)) + print_pretty = event->handler(s, record, event, + event->context); + + if (print_pretty) + pretty_print(s, record->data, record->size, event); + } + + trace_seq_terminate(s); +} + +static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock) +{ + if (!use_trace_clock) + return true; + + if (!strcmp(trace_clock, "local") || !strcmp(trace_clock, "global") + || !strcmp(trace_clock, "uptime") || !strcmp(trace_clock, "perf")) + return true; + + /* trace_clock is setting in tsc or counter mode */ + return false; +} + +/** + * tep_find_event_by_record - return the event from a given record + * @pevent: a handle to the pevent + * @record: The record to get the event from + * + * Returns the associated event for a given record, or NULL if non is + * is found. + */ +struct event_format * +tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record) +{ + int type; + + if (record->size < 0) { + do_warning("ug! negative record size %d", record->size); + return NULL; + } + + type = trace_parse_common_type(pevent, record->data); + + return tep_find_event(pevent, type); +} + +/** + * tep_print_event_task - Write the event task comm, pid and CPU + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * + * Writes the tasks comm, pid and CPU to @s. + */ +void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s, + struct event_format *event, + struct tep_record *record) +{ + void *data = record->data; + const char *comm; + int pid; + + pid = parse_common_pid(pevent, data); + comm = find_cmdline(pevent, pid); + + if (pevent->latency_format) { + trace_seq_printf(s, "%8.8s-%-5d %3d", + comm, pid, record->cpu); + } else + trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu); +} + +/** + * tep_print_event_time - Write the event timestamp + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * @use_trace_clock: Set to parse according to the @pevent->trace_clock + * + * Writes the timestamp of the record into @s. + */ +void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s, + struct event_format *event, + struct tep_record *record, + bool use_trace_clock) +{ + unsigned long secs; + unsigned long usecs; + unsigned long nsecs; + int p; + bool use_usec_format; + + use_usec_format = is_timestamp_in_us(pevent->trace_clock, + use_trace_clock); + if (use_usec_format) { + secs = record->ts / NSEC_PER_SEC; + nsecs = record->ts - secs * NSEC_PER_SEC; + } + + if (pevent->latency_format) { + tep_data_lat_fmt(pevent, s, record); + } + + if (use_usec_format) { + if (pevent->flags & TEP_NSEC_OUTPUT) { + usecs = nsecs; + p = 9; + } else { + usecs = (nsecs + 500) / NSEC_PER_USEC; + /* To avoid usecs larger than 1 sec */ + if (usecs >= USEC_PER_SEC) { + usecs -= USEC_PER_SEC; + secs++; + } + p = 6; + } + + trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs); + } else + trace_seq_printf(s, " %12llu:", record->ts); +} + +/** + * tep_print_event_data - Write the event data section + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * + * Writes the parsing of the record's data to @s. + */ +void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s, + struct event_format *event, + struct tep_record *record) +{ + static const char *spaces = " "; /* 20 spaces */ + int len; + + trace_seq_printf(s, " %s: ", event->name); + + /* Space out the event names evenly. */ + len = strlen(event->name); + if (len < 20) + trace_seq_printf(s, "%.*s", 20 - len, spaces); + + tep_event_info(s, event, record); +} + +void tep_print_event(struct tep_handle *pevent, struct trace_seq *s, + struct tep_record *record, bool use_trace_clock) +{ + struct event_format *event; + + event = tep_find_event_by_record(pevent, record); + if (!event) { + int i; + int type = trace_parse_common_type(pevent, record->data); + + do_warning("ug! no event found for type %d", type); + trace_seq_printf(s, "[UNKNOWN TYPE %d]", type); + for (i = 0; i < record->size; i++) + trace_seq_printf(s, " %02x", + ((unsigned char *)record->data)[i]); + return; + } + + tep_print_event_task(pevent, s, event, record); + tep_print_event_time(pevent, s, event, record, use_trace_clock); + tep_print_event_data(pevent, s, event, record); +} + +static int events_id_cmp(const void *a, const void *b) +{ + struct event_format * const * ea = a; + struct event_format * const * eb = b; + + if ((*ea)->id < (*eb)->id) + return -1; + + if ((*ea)->id > (*eb)->id) + return 1; + + return 0; +} + +static int events_name_cmp(const void *a, const void *b) +{ + struct event_format * const * ea = a; + struct event_format * const * eb = b; + int res; + + res = strcmp((*ea)->name, (*eb)->name); + if (res) + return res; + + res = strcmp((*ea)->system, (*eb)->system); + if (res) + return res; + + return events_id_cmp(a, b); +} + +static int events_system_cmp(const void *a, const void *b) +{ + struct event_format * const * ea = a; + struct event_format * const * eb = b; + int res; + + res = strcmp((*ea)->system, (*eb)->system); + if (res) + return res; + + res = strcmp((*ea)->name, (*eb)->name); + if (res) + return res; + + return events_id_cmp(a, b); +} + +struct event_format **tep_list_events(struct tep_handle *pevent, enum event_sort_type sort_type) +{ + struct event_format **events; + int (*sort)(const void *a, const void *b); + + events = pevent->sort_events; + + if (events && pevent->last_type == sort_type) + return events; + + if (!events) { + events = malloc(sizeof(*events) * (pevent->nr_events + 1)); + if (!events) + return NULL; + + memcpy(events, pevent->events, sizeof(*events) * pevent->nr_events); + events[pevent->nr_events] = NULL; + + pevent->sort_events = events; + + /* the internal events are sorted by id */ + if (sort_type == EVENT_SORT_ID) { + pevent->last_type = sort_type; + return events; + } + } + + switch (sort_type) { + case EVENT_SORT_ID: + sort = events_id_cmp; + break; + case EVENT_SORT_NAME: + sort = events_name_cmp; + break; + case EVENT_SORT_SYSTEM: + sort = events_system_cmp; + break; + default: + return events; + } + + qsort(events, pevent->nr_events, sizeof(*events), sort); + pevent->last_type = sort_type; + + return events; +} + +static struct format_field ** +get_event_fields(const char *type, const char *name, + int count, struct format_field *list) +{ + struct format_field **fields; + struct format_field *field; + int i = 0; + + fields = malloc(sizeof(*fields) * (count + 1)); + if (!fields) + return NULL; + + for (field = list; field; field = field->next) { + fields[i++] = field; + if (i == count + 1) { + do_warning("event %s has more %s fields than specified", + name, type); + i--; + break; + } + } + + if (i != count) + do_warning("event %s has less %s fields than specified", + name, type); + + fields[i] = NULL; + + return fields; +} + +/** + * tep_event_common_fields - return a list of common fields for an event + * @event: the event to return the common fields of. + * + * Returns an allocated array of fields. The last item in the array is NULL. + * The array must be freed with free(). + */ +struct format_field **tep_event_common_fields(struct event_format *event) +{ + return get_event_fields("common", event->name, + event->format.nr_common, + event->format.common_fields); +} + +/** + * tep_event_fields - return a list of event specific fields for an event + * @event: the event to return the fields of. + * + * Returns an allocated array of fields. The last item in the array is NULL. + * The array must be freed with free(). + */ +struct format_field **tep_event_fields(struct event_format *event) +{ + return get_event_fields("event", event->name, + event->format.nr_fields, + event->format.fields); +} + +static void print_fields(struct trace_seq *s, struct print_flag_sym *field) +{ + trace_seq_printf(s, "{ %s, %s }", field->value, field->str); + if (field->next) { + trace_seq_puts(s, ", "); + print_fields(s, field->next); + } +} + +/* for debugging */ +static void print_args(struct print_arg *args) +{ + int print_paren = 1; + struct trace_seq s; + + switch (args->type) { + case PRINT_NULL: + printf("null"); + break; + case PRINT_ATOM: + printf("%s", args->atom.atom); + break; + case PRINT_FIELD: + printf("REC->%s", args->field.name); + break; + case PRINT_FLAGS: + printf("__print_flags("); + print_args(args->flags.field); + printf(", %s, ", args->flags.delim); + trace_seq_init(&s); + print_fields(&s, args->flags.flags); + trace_seq_do_printf(&s); + trace_seq_destroy(&s); + printf(")"); + break; + case PRINT_SYMBOL: + printf("__print_symbolic("); + print_args(args->symbol.field); + printf(", "); + trace_seq_init(&s); + print_fields(&s, args->symbol.symbols); + trace_seq_do_printf(&s); + trace_seq_destroy(&s); + printf(")"); + break; + case PRINT_HEX: + printf("__print_hex("); + print_args(args->hex.field); + printf(", "); + print_args(args->hex.size); + printf(")"); + break; + case PRINT_HEX_STR: + printf("__print_hex_str("); + print_args(args->hex.field); + printf(", "); + print_args(args->hex.size); + printf(")"); + break; + case PRINT_INT_ARRAY: + printf("__print_array("); + print_args(args->int_array.field); + printf(", "); + print_args(args->int_array.count); + printf(", "); + print_args(args->int_array.el_size); + printf(")"); + break; + case PRINT_STRING: + case PRINT_BSTRING: + printf("__get_str(%s)", args->string.string); + break; + case PRINT_BITMASK: + printf("__get_bitmask(%s)", args->bitmask.bitmask); + break; + case PRINT_TYPE: + printf("(%s)", args->typecast.type); + print_args(args->typecast.item); + break; + case PRINT_OP: + if (strcmp(args->op.op, ":") == 0) + print_paren = 0; + if (print_paren) + printf("("); + print_args(args->op.left); + printf(" %s ", args->op.op); + print_args(args->op.right); + if (print_paren) + printf(")"); + break; + default: + /* we should warn... */ + return; + } + if (args->next) { + printf("\n"); + print_args(args->next); + } +} + +static void parse_header_field(const char *field, + int *offset, int *size, int mandatory) +{ + unsigned long long save_input_buf_ptr; + unsigned long long save_input_buf_siz; + char *token; + int type; + + save_input_buf_ptr = input_buf_ptr; + save_input_buf_siz = input_buf_siz; + + if (read_expected(EVENT_ITEM, "field") < 0) + return; + if (read_expected(EVENT_OP, ":") < 0) + return; + + /* type */ + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + free_token(token); + + /* + * If this is not a mandatory field, then test it first. + */ + if (mandatory) { + if (read_expected(EVENT_ITEM, field) < 0) + return; + } else { + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + if (strcmp(token, field) != 0) + goto discard; + free_token(token); + } + + if (read_expected(EVENT_OP, ";") < 0) + return; + if (read_expected(EVENT_ITEM, "offset") < 0) + return; + if (read_expected(EVENT_OP, ":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + *offset = atoi(token); + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + return; + if (read_expected(EVENT_ITEM, "size") < 0) + return; + if (read_expected(EVENT_OP, ":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + *size = atoi(token); + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + return; + type = read_token(&token); + if (type != EVENT_NEWLINE) { + /* newer versions of the kernel have a "signed" type */ + if (type != EVENT_ITEM) + goto fail; + + if (strcmp(token, "signed") != 0) + goto fail; + + free_token(token); + + if (read_expected(EVENT_OP, ":") < 0) + return; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + return; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + } + fail: + free_token(token); + return; + + discard: + input_buf_ptr = save_input_buf_ptr; + input_buf_siz = save_input_buf_siz; + *offset = 0; + *size = 0; + free_token(token); +} + +/** + * tep_parse_header_page - parse the data stored in the header page + * @pevent: the handle to the pevent + * @buf: the buffer storing the header page format string + * @size: the size of @buf + * @long_size: the long size to use if there is no header + * + * This parses the header page format for information on the + * ring buffer used. The @buf should be copied from + * + * /sys/kernel/debug/tracing/events/header_page + */ +int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long size, + int long_size) +{ + int ignore; + + if (!size) { + /* + * Old kernels did not have header page info. + * Sorry but we just use what we find here in user space. + */ + pevent->header_page_ts_size = sizeof(long long); + pevent->header_page_size_size = long_size; + pevent->header_page_data_offset = sizeof(long long) + long_size; + pevent->old_format = 1; + return -1; + } + init_input_buf(buf, size); + + parse_header_field("timestamp", &pevent->header_page_ts_offset, + &pevent->header_page_ts_size, 1); + parse_header_field("commit", &pevent->header_page_size_offset, + &pevent->header_page_size_size, 1); + parse_header_field("overwrite", &pevent->header_page_overwrite, + &ignore, 0); + parse_header_field("data", &pevent->header_page_data_offset, + &pevent->header_page_data_size, 1); + + return 0; +} + +static int event_matches(struct event_format *event, + int id, const char *sys_name, + const char *event_name) +{ + if (id >= 0 && id != event->id) + return 0; + + if (event_name && (strcmp(event_name, event->name) != 0)) + return 0; + + if (sys_name && (strcmp(sys_name, event->system) != 0)) + return 0; + + return 1; +} + +static void free_handler(struct event_handler *handle) +{ + free((void *)handle->sys_name); + free((void *)handle->event_name); + free(handle); +} + +static int find_event_handle(struct tep_handle *pevent, struct event_format *event) +{ + struct event_handler *handle, **next; + + for (next = &pevent->handlers; *next; + next = &(*next)->next) { + handle = *next; + if (event_matches(event, handle->id, + handle->sys_name, + handle->event_name)) + break; + } + + if (!(*next)) + return 0; + + pr_stat("overriding event (%d) %s:%s with new print handler", + event->id, event->system, event->name); + + event->handler = handle->func; + event->context = handle->context; + + *next = handle->next; + free_handler(handle); + + return 1; +} + +/** + * __tep_parse_format - parse the event format + * @buf: the buffer storing the event format string + * @size: the size of @buf + * @sys: the system the event belongs to + * + * This parses the event format and creates an event structure + * to quickly parse raw data for a given event. + * + * These files currently come from: + * + * /sys/kernel/debug/tracing/events/.../.../format + */ +enum tep_errno __tep_parse_format(struct event_format **eventp, + struct tep_handle *pevent, const char *buf, + unsigned long size, const char *sys) +{ + struct event_format *event; + int ret; + + init_input_buf(buf, size); + + *eventp = event = alloc_event(); + if (!event) + return TEP_ERRNO__MEM_ALLOC_FAILED; + + event->name = event_read_name(); + if (!event->name) { + /* Bad event? */ + ret = TEP_ERRNO__MEM_ALLOC_FAILED; + goto event_alloc_failed; + } + + if (strcmp(sys, "ftrace") == 0) { + event->flags |= EVENT_FL_ISFTRACE; + + if (strcmp(event->name, "bprint") == 0) + event->flags |= EVENT_FL_ISBPRINT; + } + + event->id = event_read_id(); + if (event->id < 0) { + ret = TEP_ERRNO__READ_ID_FAILED; + /* + * This isn't an allocation error actually. + * But as the ID is critical, just bail out. + */ + goto event_alloc_failed; + } + + event->system = strdup(sys); + if (!event->system) { + ret = TEP_ERRNO__MEM_ALLOC_FAILED; + goto event_alloc_failed; + } + + /* Add pevent to event so that it can be referenced */ + event->pevent = pevent; + + ret = event_read_format(event); + if (ret < 0) { + ret = TEP_ERRNO__READ_FORMAT_FAILED; + goto event_parse_failed; + } + + /* + * If the event has an override, don't print warnings if the event + * print format fails to parse. + */ + if (pevent && find_event_handle(pevent, event)) + show_warning = 0; + + ret = event_read_print(event); + show_warning = 1; + + if (ret < 0) { + ret = TEP_ERRNO__READ_PRINT_FAILED; + goto event_parse_failed; + } + + if (!ret && (event->flags & EVENT_FL_ISFTRACE)) { + struct format_field *field; + struct print_arg *arg, **list; + + /* old ftrace had no args */ + list = &event->print_fmt.args; + for (field = event->format.fields; field; field = field->next) { + arg = alloc_arg(); + if (!arg) { + event->flags |= EVENT_FL_FAILED; + return TEP_ERRNO__OLD_FTRACE_ARG_FAILED; + } + arg->type = PRINT_FIELD; + arg->field.name = strdup(field->name); + if (!arg->field.name) { + event->flags |= EVENT_FL_FAILED; + free_arg(arg); + return TEP_ERRNO__OLD_FTRACE_ARG_FAILED; + } + arg->field.field = field; + *list = arg; + list = &arg->next; + } + return 0; + } + + return 0; + + event_parse_failed: + event->flags |= EVENT_FL_FAILED; + return ret; + + event_alloc_failed: + free(event->system); + free(event->name); + free(event); + *eventp = NULL; + return ret; +} + +static enum tep_errno +__parse_event(struct tep_handle *pevent, + struct event_format **eventp, + const char *buf, unsigned long size, + const char *sys) +{ + int ret = __tep_parse_format(eventp, pevent, buf, size, sys); + struct event_format *event = *eventp; + + if (event == NULL) + return ret; + + if (pevent && add_event(pevent, event)) { + ret = TEP_ERRNO__MEM_ALLOC_FAILED; + goto event_add_failed; + } + +#define PRINT_ARGS 0 + if (PRINT_ARGS && event->print_fmt.args) + print_args(event->print_fmt.args); + + return 0; + +event_add_failed: + tep_free_format(event); + return ret; +} + +/** + * tep_parse_format - parse the event format + * @pevent: the handle to the pevent + * @eventp: returned format + * @buf: the buffer storing the event format string + * @size: the size of @buf + * @sys: the system the event belongs to + * + * This parses the event format and creates an event structure + * to quickly parse raw data for a given event. + * + * These files currently come from: + * + * /sys/kernel/debug/tracing/events/.../.../format + */ +enum tep_errno tep_parse_format(struct tep_handle *pevent, + struct event_format **eventp, + const char *buf, + unsigned long size, const char *sys) +{ + return __parse_event(pevent, eventp, buf, size, sys); +} + +/** + * tep_parse_event - parse the event format + * @pevent: the handle to the pevent + * @buf: the buffer storing the event format string + * @size: the size of @buf + * @sys: the system the event belongs to + * + * This parses the event format and creates an event structure + * to quickly parse raw data for a given event. + * + * These files currently come from: + * + * /sys/kernel/debug/tracing/events/.../.../format + */ +enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf, + unsigned long size, const char *sys) +{ + struct event_format *event = NULL; + return __parse_event(pevent, &event, buf, size, sys); +} + +#undef _PE +#define _PE(code, str) str +static const char * const tep_error_str[] = { + TEP_ERRORS +}; +#undef _PE + +int tep_strerror(struct tep_handle *pevent __maybe_unused, + enum tep_errno errnum, char *buf, size_t buflen) +{ + int idx; + const char *msg; + + if (errnum >= 0) { + str_error_r(errnum, buf, buflen); + return 0; + } + + if (errnum <= __TEP_ERRNO__START || + errnum >= __TEP_ERRNO__END) + return -1; + + idx = errnum - __TEP_ERRNO__START - 1; + msg = tep_error_str[idx]; + snprintf(buf, buflen, "%s", msg); + + return 0; +} + +int get_field_val(struct trace_seq *s, struct format_field *field, + const char *name, struct tep_record *record, + unsigned long long *val, int err) +{ + if (!field) { + if (err) + trace_seq_printf(s, "<CANT FIND FIELD %s>", name); + return -1; + } + + if (tep_read_number_field(field, record->data, val)) { + if (err) + trace_seq_printf(s, " %s=INVALID", name); + return -1; + } + + return 0; +} + +/** + * tep_get_field_raw - return the raw pointer into the data field + * @s: The seq to print to on error + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @len: place to store the field length. + * @err: print default error if failed. + * + * Returns a pointer into record->data of the field and places + * the length of the field in @len. + * + * On failure, it returns NULL. + */ +void *tep_get_field_raw(struct trace_seq *s, struct event_format *event, + const char *name, struct tep_record *record, + int *len, int err) +{ + struct format_field *field; + void *data = record->data; + unsigned offset; + int dummy; + + if (!event) + return NULL; + + field = tep_find_field(event, name); + + if (!field) { + if (err) + trace_seq_printf(s, "<CANT FIND FIELD %s>", name); + return NULL; + } + + /* Allow @len to be NULL */ + if (!len) + len = &dummy; + + offset = field->offset; + if (field->flags & FIELD_IS_DYNAMIC) { + offset = tep_read_number(event->pevent, + data + offset, field->size); + *len = offset >> 16; + offset &= 0xffff; + } else + *len = field->size; + + return data + offset; +} + +/** + * tep_get_field_val - find a field and return its value + * @s: The seq to print to on error + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @val: place to store the value of the field. + * @err: print default error if failed. + * + * Returns 0 on success -1 on field not found. + */ +int tep_get_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct tep_record *record, + unsigned long long *val, int err) +{ + struct format_field *field; + + if (!event) + return -1; + + field = tep_find_field(event, name); + + return get_field_val(s, field, name, record, val, err); +} + +/** + * tep_get_common_field_val - find a common field and return its value + * @s: The seq to print to on error + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @val: place to store the value of the field. + * @err: print default error if failed. + * + * Returns 0 on success -1 on field not found. + */ +int tep_get_common_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct tep_record *record, + unsigned long long *val, int err) +{ + struct format_field *field; + + if (!event) + return -1; + + field = tep_find_common_field(event, name); + + return get_field_val(s, field, name, record, val, err); +} + +/** + * tep_get_any_field_val - find a any field and return its value + * @s: The seq to print to on error + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @val: place to store the value of the field. + * @err: print default error if failed. + * + * Returns 0 on success -1 on field not found. + */ +int tep_get_any_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct tep_record *record, + unsigned long long *val, int err) +{ + struct format_field *field; + + if (!event) + return -1; + + field = tep_find_any_field(event, name); + + return get_field_val(s, field, name, record, val, err); +} + +/** + * tep_print_num_field - print a field and a format + * @s: The seq to print to + * @fmt: The printf format to print the field with. + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @err: print default error if failed. + * + * Returns: 0 on success, -1 field not found, or 1 if buffer is full. + */ +int tep_print_num_field(struct trace_seq *s, const char *fmt, + struct event_format *event, const char *name, + struct tep_record *record, int err) +{ + struct format_field *field = tep_find_field(event, name); + unsigned long long val; + + if (!field) + goto failed; + + if (tep_read_number_field(field, record->data, &val)) + goto failed; + + return trace_seq_printf(s, fmt, val); + + failed: + if (err) + trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name); + return -1; +} + +/** + * tep_print_func_field - print a field and a format for function pointers + * @s: The seq to print to + * @fmt: The printf format to print the field with. + * @event: the event that the field is for + * @name: The name of the field + * @record: The record with the field name. + * @err: print default error if failed. + * + * Returns: 0 on success, -1 field not found, or 1 if buffer is full. + */ +int tep_print_func_field(struct trace_seq *s, const char *fmt, + struct event_format *event, const char *name, + struct tep_record *record, int err) +{ + struct format_field *field = tep_find_field(event, name); + struct tep_handle *pevent = event->pevent; + unsigned long long val; + struct func_map *func; + char tmp[128]; + + if (!field) + goto failed; + + if (tep_read_number_field(field, record->data, &val)) + goto failed; + + func = find_func(pevent, val); + + if (func) + snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val); + else + sprintf(tmp, "0x%08llx", val); + + return trace_seq_printf(s, fmt, tmp); + + failed: + if (err) + trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name); + return -1; +} + +static void free_func_handle(struct tep_function_handler *func) +{ + struct func_params *params; + + free(func->name); + + while (func->params) { + params = func->params; + func->params = params->next; + free(params); + } + + free(func); +} + +/** + * tep_register_print_function - register a helper function + * @pevent: the handle to the pevent + * @func: the function to process the helper function + * @ret_type: the return type of the helper function + * @name: the name of the helper function + * @parameters: A list of enum tep_func_arg_type + * + * Some events may have helper functions in the print format arguments. + * This allows a plugin to dynamically create a way to process one + * of these functions. + * + * The @parameters is a variable list of tep_func_arg_type enums that + * must end with TEP_FUNC_ARG_VOID. + */ +int tep_register_print_function(struct tep_handle *pevent, + tep_func_handler func, + enum tep_func_arg_type ret_type, + char *name, ...) +{ + struct tep_function_handler *func_handle; + struct func_params **next_param; + struct func_params *param; + enum tep_func_arg_type type; + va_list ap; + int ret; + + func_handle = find_func_handler(pevent, name); + if (func_handle) { + /* + * This is most like caused by the users own + * plugins updating the function. This overrides the + * system defaults. + */ + pr_stat("override of function helper '%s'", name); + remove_func_handler(pevent, name); + } + + func_handle = calloc(1, sizeof(*func_handle)); + if (!func_handle) { + do_warning("Failed to allocate function handler"); + return TEP_ERRNO__MEM_ALLOC_FAILED; + } + + func_handle->ret_type = ret_type; + func_handle->name = strdup(name); + func_handle->func = func; + if (!func_handle->name) { + do_warning("Failed to allocate function name"); + free(func_handle); + return TEP_ERRNO__MEM_ALLOC_FAILED; + } + + next_param = &(func_handle->params); + va_start(ap, name); + for (;;) { + type = va_arg(ap, enum tep_func_arg_type); + if (type == TEP_FUNC_ARG_VOID) + break; + + if (type >= TEP_FUNC_ARG_MAX_TYPES) { + do_warning("Invalid argument type %d", type); + ret = TEP_ERRNO__INVALID_ARG_TYPE; + goto out_free; + } + + param = malloc(sizeof(*param)); + if (!param) { + do_warning("Failed to allocate function param"); + ret = TEP_ERRNO__MEM_ALLOC_FAILED; + goto out_free; + } + param->type = type; + param->next = NULL; + + *next_param = param; + next_param = &(param->next); + + func_handle->nr_args++; + } + va_end(ap); + + func_handle->next = pevent->func_handlers; + pevent->func_handlers = func_handle; + + return 0; + out_free: + va_end(ap); + free_func_handle(func_handle); + return ret; +} + +/** + * tep_unregister_print_function - unregister a helper function + * @pevent: the handle to the pevent + * @func: the function to process the helper function + * @name: the name of the helper function + * + * This function removes existing print handler for function @name. + * + * Returns 0 if the handler was removed successully, -1 otherwise. + */ +int tep_unregister_print_function(struct tep_handle *pevent, + tep_func_handler func, char *name) +{ + struct tep_function_handler *func_handle; + + func_handle = find_func_handler(pevent, name); + if (func_handle && func_handle->func == func) { + remove_func_handler(pevent, name); + return 0; + } + return -1; +} + +static struct event_format *search_event(struct tep_handle *pevent, int id, + const char *sys_name, + const char *event_name) +{ + struct event_format *event; + + if (id >= 0) { + /* search by id */ + event = tep_find_event(pevent, id); + if (!event) + return NULL; + if (event_name && (strcmp(event_name, event->name) != 0)) + return NULL; + if (sys_name && (strcmp(sys_name, event->system) != 0)) + return NULL; + } else { + event = tep_find_event_by_name(pevent, sys_name, event_name); + if (!event) + return NULL; + } + return event; +} + +/** + * tep_register_event_handler - register a way to parse an event + * @pevent: the handle to the pevent + * @id: the id of the event to register + * @sys_name: the system name the event belongs to + * @event_name: the name of the event + * @func: the function to call to parse the event information + * @context: the data to be passed to @func + * + * This function allows a developer to override the parsing of + * a given event. If for some reason the default print format + * is not sufficient, this function will register a function + * for an event to be used to parse the data instead. + * + * If @id is >= 0, then it is used to find the event. + * else @sys_name and @event_name are used. + */ +int tep_register_event_handler(struct tep_handle *pevent, int id, + const char *sys_name, const char *event_name, + tep_event_handler_func func, void *context) +{ + struct event_format *event; + struct event_handler *handle; + + event = search_event(pevent, id, sys_name, event_name); + if (event == NULL) + goto not_found; + + pr_stat("overriding event (%d) %s:%s with new print handler", + event->id, event->system, event->name); + + event->handler = func; + event->context = context; + return 0; + + not_found: + /* Save for later use. */ + handle = calloc(1, sizeof(*handle)); + if (!handle) { + do_warning("Failed to allocate event handler"); + return TEP_ERRNO__MEM_ALLOC_FAILED; + } + + handle->id = id; + if (event_name) + handle->event_name = strdup(event_name); + if (sys_name) + handle->sys_name = strdup(sys_name); + + if ((event_name && !handle->event_name) || + (sys_name && !handle->sys_name)) { + do_warning("Failed to allocate event/sys name"); + free((void *)handle->event_name); + free((void *)handle->sys_name); + free(handle); + return TEP_ERRNO__MEM_ALLOC_FAILED; + } + + handle->func = func; + handle->next = pevent->handlers; + pevent->handlers = handle; + handle->context = context; + + return -1; +} + +static int handle_matches(struct event_handler *handler, int id, + const char *sys_name, const char *event_name, + tep_event_handler_func func, void *context) +{ + if (id >= 0 && id != handler->id) + return 0; + + if (event_name && (strcmp(event_name, handler->event_name) != 0)) + return 0; + + if (sys_name && (strcmp(sys_name, handler->sys_name) != 0)) + return 0; + + if (func != handler->func || context != handler->context) + return 0; + + return 1; +} + +/** + * tep_unregister_event_handler - unregister an existing event handler + * @pevent: the handle to the pevent + * @id: the id of the event to unregister + * @sys_name: the system name the handler belongs to + * @event_name: the name of the event handler + * @func: the function to call to parse the event information + * @context: the data to be passed to @func + * + * This function removes existing event handler (parser). + * + * If @id is >= 0, then it is used to find the event. + * else @sys_name and @event_name are used. + * + * Returns 0 if handler was removed successfully, -1 if event was not found. + */ +int tep_unregister_event_handler(struct tep_handle *pevent, int id, + const char *sys_name, const char *event_name, + tep_event_handler_func func, void *context) +{ + struct event_format *event; + struct event_handler *handle; + struct event_handler **next; + + event = search_event(pevent, id, sys_name, event_name); + if (event == NULL) + goto not_found; + + if (event->handler == func && event->context == context) { + pr_stat("removing override handler for event (%d) %s:%s. Going back to default handler.", + event->id, event->system, event->name); + + event->handler = NULL; + event->context = NULL; + return 0; + } + +not_found: + for (next = &pevent->handlers; *next; next = &(*next)->next) { + handle = *next; + if (handle_matches(handle, id, sys_name, event_name, + func, context)) + break; + } + + if (!(*next)) + return -1; + + *next = handle->next; + free_handler(handle); + + return 0; +} + +/** + * tep_alloc - create a pevent handle + */ +struct tep_handle *tep_alloc(void) +{ + struct tep_handle *pevent = calloc(1, sizeof(*pevent)); + + if (pevent) + pevent->ref_count = 1; + + return pevent; +} + +void tep_ref(struct tep_handle *pevent) +{ + pevent->ref_count++; +} + +void tep_free_format_field(struct format_field *field) +{ + free(field->type); + if (field->alias != field->name) + free(field->alias); + free(field->name); + free(field); +} + +static void free_format_fields(struct format_field *field) +{ + struct format_field *next; + + while (field) { + next = field->next; + tep_free_format_field(field); + field = next; + } +} + +static void free_formats(struct format *format) +{ + free_format_fields(format->common_fields); + free_format_fields(format->fields); +} + +void tep_free_format(struct event_format *event) +{ + free(event->name); + free(event->system); + + free_formats(&event->format); + + free(event->print_fmt.format); + free_args(event->print_fmt.args); + + free(event); +} + +/** + * tep_free - free a pevent handle + * @pevent: the pevent handle to free + */ +void tep_free(struct tep_handle *pevent) +{ + struct cmdline_list *cmdlist, *cmdnext; + struct func_list *funclist, *funcnext; + struct printk_list *printklist, *printknext; + struct tep_function_handler *func_handler; + struct event_handler *handle; + int i; + + if (!pevent) + return; + + cmdlist = pevent->cmdlist; + funclist = pevent->funclist; + printklist = pevent->printklist; + + pevent->ref_count--; + if (pevent->ref_count) + return; + + if (pevent->cmdlines) { + for (i = 0; i < pevent->cmdline_count; i++) + free(pevent->cmdlines[i].comm); + free(pevent->cmdlines); + } + + while (cmdlist) { + cmdnext = cmdlist->next; + free(cmdlist->comm); + free(cmdlist); + cmdlist = cmdnext; + } + + if (pevent->func_map) { + for (i = 0; i < (int)pevent->func_count; i++) { + free(pevent->func_map[i].func); + free(pevent->func_map[i].mod); + } + free(pevent->func_map); + } + + while (funclist) { + funcnext = funclist->next; + free(funclist->func); + free(funclist->mod); + free(funclist); + funclist = funcnext; + } + + while (pevent->func_handlers) { + func_handler = pevent->func_handlers; + pevent->func_handlers = func_handler->next; + free_func_handle(func_handler); + } + + if (pevent->printk_map) { + for (i = 0; i < (int)pevent->printk_count; i++) + free(pevent->printk_map[i].printk); + free(pevent->printk_map); + } + + while (printklist) { + printknext = printklist->next; + free(printklist->printk); + free(printklist); + printklist = printknext; + } + + for (i = 0; i < pevent->nr_events; i++) + tep_free_format(pevent->events[i]); + + while (pevent->handlers) { + handle = pevent->handlers; + pevent->handlers = handle->next; + free_handler(handle); + } + + free(pevent->trace_clock); + free(pevent->events); + free(pevent->sort_events); + free(pevent->func_resolver); + + free(pevent); +} + +void tep_unref(struct tep_handle *pevent) +{ + tep_free(pevent); +} diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h new file mode 100644 index 000000000..44b7c2d41 --- /dev/null +++ b/tools/lib/traceevent/event-parse.h @@ -0,0 +1,991 @@ +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#ifndef _PARSE_EVENTS_H +#define _PARSE_EVENTS_H + +#include <stdbool.h> +#include <stdarg.h> +#include <stdio.h> +#include <regex.h> +#include <string.h> + +#ifndef __maybe_unused +#define __maybe_unused __attribute__((unused)) +#endif + +/* ----------------------- trace_seq ----------------------- */ + + +#ifndef TRACE_SEQ_BUF_SIZE +#define TRACE_SEQ_BUF_SIZE 4096 +#endif + +#ifndef DEBUG_RECORD +#define DEBUG_RECORD 0 +#endif + +struct tep_record { + unsigned long long ts; + unsigned long long offset; + long long missed_events; /* buffer dropped events before */ + int record_size; /* size of binary record */ + int size; /* size of data */ + void *data; + int cpu; + int ref_count; + int locked; /* Do not free, even if ref_count is zero */ + void *priv; +#if DEBUG_RECORD + struct tep_record *prev; + struct tep_record *next; + long alloc_addr; +#endif +}; + +enum trace_seq_fail { + TRACE_SEQ__GOOD, + TRACE_SEQ__BUFFER_POISONED, + TRACE_SEQ__MEM_ALLOC_FAILED, +}; + +/* + * Trace sequences are used to allow a function to call several other functions + * to create a string of data to use (up to a max of PAGE_SIZE). + */ + +struct trace_seq { + char *buffer; + unsigned int buffer_size; + unsigned int len; + unsigned int readpos; + enum trace_seq_fail state; +}; + +void trace_seq_init(struct trace_seq *s); +void trace_seq_reset(struct trace_seq *s); +void trace_seq_destroy(struct trace_seq *s); + +extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) + __attribute__ ((format (printf, 2, 0))); + +extern int trace_seq_puts(struct trace_seq *s, const char *str); +extern int trace_seq_putc(struct trace_seq *s, unsigned char c); + +extern void trace_seq_terminate(struct trace_seq *s); + +extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp); +extern int trace_seq_do_printf(struct trace_seq *s); + + +/* ----------------------- pevent ----------------------- */ + +struct tep_handle; +struct event_format; + +typedef int (*tep_event_handler_func)(struct trace_seq *s, + struct tep_record *record, + struct event_format *event, + void *context); + +typedef int (*tep_plugin_load_func)(struct tep_handle *pevent); +typedef int (*tep_plugin_unload_func)(struct tep_handle *pevent); + +struct tep_plugin_option { + struct tep_plugin_option *next; + void *handle; + char *file; + char *name; + char *plugin_alias; + char *description; + const char *value; + void *priv; + int set; +}; + +/* + * Plugin hooks that can be called: + * + * TEP_PLUGIN_LOADER: (required) + * The function name to initialized the plugin. + * + * int TEP_PLUGIN_LOADER(struct tep_handle *pevent) + * + * TEP_PLUGIN_UNLOADER: (optional) + * The function called just before unloading + * + * int TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) + * + * TEP_PLUGIN_OPTIONS: (optional) + * Plugin options that can be set before loading + * + * struct tep_plugin_option TEP_PLUGIN_OPTIONS[] = { + * { + * .name = "option-name", + * .plugin_alias = "override-file-name", (optional) + * .description = "description of option to show users", + * }, + * { + * .name = NULL, + * }, + * }; + * + * Array must end with .name = NULL; + * + * + * .plugin_alias is used to give a shorter name to access + * the vairable. Useful if a plugin handles more than one event. + * + * If .value is not set, then it is considered a boolean and only + * .set will be processed. If .value is defined, then it is considered + * a string option and .set will be ignored. + * + * TEP_PLUGIN_ALIAS: (optional) + * The name to use for finding options (uses filename if not defined) + */ +#define TEP_PLUGIN_LOADER tep_plugin_loader +#define TEP_PLUGIN_UNLOADER tep_plugin_unloader +#define TEP_PLUGIN_OPTIONS tep_plugin_options +#define TEP_PLUGIN_ALIAS tep_plugin_alias +#define _MAKE_STR(x) #x +#define MAKE_STR(x) _MAKE_STR(x) +#define TEP_PLUGIN_LOADER_NAME MAKE_STR(TEP_PLUGIN_LOADER) +#define TEP_PLUGIN_UNLOADER_NAME MAKE_STR(TEP_PLUGIN_UNLOADER) +#define TEP_PLUGIN_OPTIONS_NAME MAKE_STR(TEP_PLUGIN_OPTIONS) +#define TEP_PLUGIN_ALIAS_NAME MAKE_STR(TEP_PLUGIN_ALIAS) + +enum format_flags { + FIELD_IS_ARRAY = 1, + FIELD_IS_POINTER = 2, + FIELD_IS_SIGNED = 4, + FIELD_IS_STRING = 8, + FIELD_IS_DYNAMIC = 16, + FIELD_IS_LONG = 32, + FIELD_IS_FLAG = 64, + FIELD_IS_SYMBOLIC = 128, +}; + +struct format_field { + struct format_field *next; + struct event_format *event; + char *type; + char *name; + char *alias; + int offset; + int size; + unsigned int arraylen; + unsigned int elementsize; + unsigned long flags; +}; + +struct format { + int nr_common; + int nr_fields; + struct format_field *common_fields; + struct format_field *fields; +}; + +struct print_arg_atom { + char *atom; +}; + +struct print_arg_string { + char *string; + int offset; +}; + +struct print_arg_bitmask { + char *bitmask; + int offset; +}; + +struct print_arg_field { + char *name; + struct format_field *field; +}; + +struct print_flag_sym { + struct print_flag_sym *next; + char *value; + char *str; +}; + +struct print_arg_typecast { + char *type; + struct print_arg *item; +}; + +struct print_arg_flags { + struct print_arg *field; + char *delim; + struct print_flag_sym *flags; +}; + +struct print_arg_symbol { + struct print_arg *field; + struct print_flag_sym *symbols; +}; + +struct print_arg_hex { + struct print_arg *field; + struct print_arg *size; +}; + +struct print_arg_int_array { + struct print_arg *field; + struct print_arg *count; + struct print_arg *el_size; +}; + +struct print_arg_dynarray { + struct format_field *field; + struct print_arg *index; +}; + +struct print_arg; + +struct print_arg_op { + char *op; + int prio; + struct print_arg *left; + struct print_arg *right; +}; + +struct tep_function_handler; + +struct print_arg_func { + struct tep_function_handler *func; + struct print_arg *args; +}; + +enum print_arg_type { + PRINT_NULL, + PRINT_ATOM, + PRINT_FIELD, + PRINT_FLAGS, + PRINT_SYMBOL, + PRINT_HEX, + PRINT_INT_ARRAY, + PRINT_TYPE, + PRINT_STRING, + PRINT_BSTRING, + PRINT_DYNAMIC_ARRAY, + PRINT_OP, + PRINT_FUNC, + PRINT_BITMASK, + PRINT_DYNAMIC_ARRAY_LEN, + PRINT_HEX_STR, +}; + +struct print_arg { + struct print_arg *next; + enum print_arg_type type; + union { + struct print_arg_atom atom; + struct print_arg_field field; + struct print_arg_typecast typecast; + struct print_arg_flags flags; + struct print_arg_symbol symbol; + struct print_arg_hex hex; + struct print_arg_int_array int_array; + struct print_arg_func func; + struct print_arg_string string; + struct print_arg_bitmask bitmask; + struct print_arg_op op; + struct print_arg_dynarray dynarray; + }; +}; + +struct print_fmt { + char *format; + struct print_arg *args; +}; + +struct event_format { + struct tep_handle *pevent; + char *name; + int id; + int flags; + struct format format; + struct print_fmt print_fmt; + char *system; + tep_event_handler_func handler; + void *context; +}; + +enum { + EVENT_FL_ISFTRACE = 0x01, + EVENT_FL_ISPRINT = 0x02, + EVENT_FL_ISBPRINT = 0x04, + EVENT_FL_ISFUNCENT = 0x10, + EVENT_FL_ISFUNCRET = 0x20, + EVENT_FL_NOHANDLE = 0x40, + EVENT_FL_PRINTRAW = 0x80, + + EVENT_FL_FAILED = 0x80000000 +}; + +enum event_sort_type { + EVENT_SORT_ID, + EVENT_SORT_NAME, + EVENT_SORT_SYSTEM, +}; + +enum event_type { + EVENT_ERROR, + EVENT_NONE, + EVENT_SPACE, + EVENT_NEWLINE, + EVENT_OP, + EVENT_DELIM, + EVENT_ITEM, + EVENT_DQUOTE, + EVENT_SQUOTE, +}; + +typedef unsigned long long (*tep_func_handler)(struct trace_seq *s, + unsigned long long *args); + +enum tep_func_arg_type { + TEP_FUNC_ARG_VOID, + TEP_FUNC_ARG_INT, + TEP_FUNC_ARG_LONG, + TEP_FUNC_ARG_STRING, + TEP_FUNC_ARG_PTR, + TEP_FUNC_ARG_MAX_TYPES +}; + +enum tep_flag { + TEP_NSEC_OUTPUT = 1, /* output in NSECS */ + TEP_DISABLE_SYS_PLUGINS = 1 << 1, + TEP_DISABLE_PLUGINS = 1 << 2, +}; + +#define TEP_ERRORS \ + _PE(MEM_ALLOC_FAILED, "failed to allocate memory"), \ + _PE(PARSE_EVENT_FAILED, "failed to parse event"), \ + _PE(READ_ID_FAILED, "failed to read event id"), \ + _PE(READ_FORMAT_FAILED, "failed to read event format"), \ + _PE(READ_PRINT_FAILED, "failed to read event print fmt"), \ + _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\ + _PE(INVALID_ARG_TYPE, "invalid argument type"), \ + _PE(INVALID_EXP_TYPE, "invalid expression type"), \ + _PE(INVALID_OP_TYPE, "invalid operator type"), \ + _PE(INVALID_EVENT_NAME, "invalid event name"), \ + _PE(EVENT_NOT_FOUND, "no event found"), \ + _PE(SYNTAX_ERROR, "syntax error"), \ + _PE(ILLEGAL_RVALUE, "illegal rvalue"), \ + _PE(ILLEGAL_LVALUE, "illegal lvalue for string comparison"), \ + _PE(INVALID_REGEX, "regex did not compute"), \ + _PE(ILLEGAL_STRING_CMP, "illegal comparison for string"), \ + _PE(ILLEGAL_INTEGER_CMP,"illegal comparison for integer"), \ + _PE(REPARENT_NOT_OP, "cannot reparent other than OP"), \ + _PE(REPARENT_FAILED, "failed to reparent filter OP"), \ + _PE(BAD_FILTER_ARG, "bad arg in filter tree"), \ + _PE(UNEXPECTED_TYPE, "unexpected type (not a value)"), \ + _PE(ILLEGAL_TOKEN, "illegal token"), \ + _PE(INVALID_PAREN, "open parenthesis cannot come here"), \ + _PE(UNBALANCED_PAREN, "unbalanced number of parenthesis"), \ + _PE(UNKNOWN_TOKEN, "unknown token"), \ + _PE(FILTER_NOT_FOUND, "no filter found"), \ + _PE(NOT_A_NUMBER, "must have number field"), \ + _PE(NO_FILTER, "no filters exists"), \ + _PE(FILTER_MISS, "record does not match to filter") + +#undef _PE +#define _PE(__code, __str) TEP_ERRNO__ ## __code +enum tep_errno { + TEP_ERRNO__SUCCESS = 0, + TEP_ERRNO__FILTER_MATCH = TEP_ERRNO__SUCCESS, + + /* + * Choose an arbitrary negative big number not to clash with standard + * errno since SUS requires the errno has distinct positive values. + * See 'Issue 6' in the link below. + * + * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + */ + __TEP_ERRNO__START = -100000, + + TEP_ERRORS, + + __TEP_ERRNO__END, +}; +#undef _PE + +struct plugin_list; + +#define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1)) + +struct plugin_list *tep_load_plugins(struct tep_handle *pevent); +void tep_unload_plugins(struct plugin_list *plugin_list, + struct tep_handle *pevent); +char **tep_plugin_list_options(void); +void tep_plugin_free_options_list(char **list); +int tep_plugin_add_options(const char *name, + struct tep_plugin_option *options); +void tep_plugin_remove_options(struct tep_plugin_option *options); +void tep_print_plugins(struct trace_seq *s, + const char *prefix, const char *suffix, + const struct plugin_list *list); + +struct cmdline; +struct cmdline_list; +struct func_map; +struct func_list; +struct event_handler; +struct func_resolver; + +typedef char *(tep_func_resolver_t)(void *priv, + unsigned long long *addrp, char **modp); + +struct tep_handle { + int ref_count; + + int header_page_ts_offset; + int header_page_ts_size; + int header_page_size_offset; + int header_page_size_size; + int header_page_data_offset; + int header_page_data_size; + int header_page_overwrite; + + int file_bigendian; + int host_bigendian; + + int latency_format; + + int old_format; + + int cpus; + int long_size; + int page_size; + + struct cmdline *cmdlines; + struct cmdline_list *cmdlist; + int cmdline_count; + + struct func_map *func_map; + struct func_resolver *func_resolver; + struct func_list *funclist; + unsigned int func_count; + + struct printk_map *printk_map; + struct printk_list *printklist; + unsigned int printk_count; + + + struct event_format **events; + int nr_events; + struct event_format **sort_events; + enum event_sort_type last_type; + + int type_offset; + int type_size; + + int pid_offset; + int pid_size; + + int pc_offset; + int pc_size; + + int flags_offset; + int flags_size; + + int ld_offset; + int ld_size; + + int print_raw; + + int test_filters; + + int flags; + + struct format_field *bprint_ip_field; + struct format_field *bprint_fmt_field; + struct format_field *bprint_buf_field; + + struct event_handler *handlers; + struct tep_function_handler *func_handlers; + + /* cache */ + struct event_format *last_event; + + char *trace_clock; +}; + +static inline void tep_set_flag(struct tep_handle *pevent, int flag) +{ + pevent->flags |= flag; +} + +static inline unsigned short +__data2host2(struct tep_handle *pevent, unsigned short data) +{ + unsigned short swap; + + if (pevent->host_bigendian == pevent->file_bigendian) + return data; + + swap = ((data & 0xffULL) << 8) | + ((data & (0xffULL << 8)) >> 8); + + return swap; +} + +static inline unsigned int +__data2host4(struct tep_handle *pevent, unsigned int data) +{ + unsigned int swap; + + if (pevent->host_bigendian == pevent->file_bigendian) + return data; + + swap = ((data & 0xffULL) << 24) | + ((data & (0xffULL << 8)) << 8) | + ((data & (0xffULL << 16)) >> 8) | + ((data & (0xffULL << 24)) >> 24); + + return swap; +} + +static inline unsigned long long +__data2host8(struct tep_handle *pevent, unsigned long long data) +{ + unsigned long long swap; + + if (pevent->host_bigendian == pevent->file_bigendian) + return data; + + swap = ((data & 0xffULL) << 56) | + ((data & (0xffULL << 8)) << 40) | + ((data & (0xffULL << 16)) << 24) | + ((data & (0xffULL << 24)) << 8) | + ((data & (0xffULL << 32)) >> 8) | + ((data & (0xffULL << 40)) >> 24) | + ((data & (0xffULL << 48)) >> 40) | + ((data & (0xffULL << 56)) >> 56); + + return swap; +} + +#define data2host2(pevent, ptr) __data2host2(pevent, *(unsigned short *)(ptr)) +#define data2host4(pevent, ptr) __data2host4(pevent, *(unsigned int *)(ptr)) +#define data2host8(pevent, ptr) \ +({ \ + unsigned long long __val; \ + \ + memcpy(&__val, (ptr), sizeof(unsigned long long)); \ + __data2host8(pevent, __val); \ +}) + +static inline int tep_host_bigendian(void) +{ + unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 }; + unsigned int val; + + memcpy(&val, str, 4); + return val == 0x01020304; +} + +/* taken from kernel/trace/trace.h */ +enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_IRQS_NOSUPPORT = 0x02, + TRACE_FLAG_NEED_RESCHED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, +}; + +int tep_set_function_resolver(struct tep_handle *pevent, + tep_func_resolver_t *func, void *priv); +void tep_reset_function_resolver(struct tep_handle *pevent); +int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid); +int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock); +int tep_register_function(struct tep_handle *pevent, char *name, + unsigned long long addr, char *mod); +int tep_register_print_string(struct tep_handle *pevent, const char *fmt, + unsigned long long addr); +int tep_pid_is_registered(struct tep_handle *pevent, int pid); + +void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s, + struct event_format *event, + struct tep_record *record); +void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s, + struct event_format *event, + struct tep_record *record, + bool use_trace_clock); +void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s, + struct event_format *event, + struct tep_record *record); +void tep_print_event(struct tep_handle *pevent, struct trace_seq *s, + struct tep_record *record, bool use_trace_clock); + +int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long size, + int long_size); + +enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf, + unsigned long size, const char *sys); +enum tep_errno tep_parse_format(struct tep_handle *pevent, + struct event_format **eventp, + const char *buf, + unsigned long size, const char *sys); +void tep_free_format(struct event_format *event); +void tep_free_format_field(struct format_field *field); + +void *tep_get_field_raw(struct trace_seq *s, struct event_format *event, + const char *name, struct tep_record *record, + int *len, int err); + +int tep_get_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct tep_record *record, + unsigned long long *val, int err); +int tep_get_common_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct tep_record *record, + unsigned long long *val, int err); +int tep_get_any_field_val(struct trace_seq *s, struct event_format *event, + const char *name, struct tep_record *record, + unsigned long long *val, int err); + +int tep_print_num_field(struct trace_seq *s, const char *fmt, + struct event_format *event, const char *name, + struct tep_record *record, int err); + +int tep_print_func_field(struct trace_seq *s, const char *fmt, + struct event_format *event, const char *name, + struct tep_record *record, int err); + +int tep_register_event_handler(struct tep_handle *pevent, int id, + const char *sys_name, const char *event_name, + tep_event_handler_func func, void *context); +int tep_unregister_event_handler(struct tep_handle *pevent, int id, + const char *sys_name, const char *event_name, + tep_event_handler_func func, void *context); +int tep_register_print_function(struct tep_handle *pevent, + tep_func_handler func, + enum tep_func_arg_type ret_type, + char *name, ...); +int tep_unregister_print_function(struct tep_handle *pevent, + tep_func_handler func, char *name); + +struct format_field *tep_find_common_field(struct event_format *event, const char *name); +struct format_field *tep_find_field(struct event_format *event, const char *name); +struct format_field *tep_find_any_field(struct event_format *event, const char *name); + +const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr); +unsigned long long +tep_find_function_address(struct tep_handle *pevent, unsigned long long addr); +unsigned long long tep_read_number(struct tep_handle *pevent, const void *ptr, int size); +int tep_read_number_field(struct format_field *field, const void *data, + unsigned long long *value); + +struct event_format *tep_find_event(struct tep_handle *pevent, int id); + +struct event_format * +tep_find_event_by_name(struct tep_handle *pevent, const char *sys, const char *name); + +struct event_format * +tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record); + +void tep_data_lat_fmt(struct tep_handle *pevent, + struct trace_seq *s, struct tep_record *record); +int tep_data_type(struct tep_handle *pevent, struct tep_record *rec); +struct event_format *tep_data_event_from_type(struct tep_handle *pevent, int type); +int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec); +int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec); +int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec); +const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid); +struct cmdline; +struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm, + struct cmdline *next); +int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline); + +void tep_print_field(struct trace_seq *s, void *data, + struct format_field *field); +void tep_print_fields(struct trace_seq *s, void *data, + int size __maybe_unused, struct event_format *event); +void tep_event_info(struct trace_seq *s, struct event_format *event, + struct tep_record *record); +int tep_strerror(struct tep_handle *pevent, enum tep_errno errnum, + char *buf, size_t buflen); + +struct event_format **tep_list_events(struct tep_handle *pevent, enum event_sort_type); +struct format_field **tep_event_common_fields(struct event_format *event); +struct format_field **tep_event_fields(struct event_format *event); + +static inline int tep_get_cpus(struct tep_handle *pevent) +{ + return pevent->cpus; +} + +static inline void tep_set_cpus(struct tep_handle *pevent, int cpus) +{ + pevent->cpus = cpus; +} + +static inline int tep_get_long_size(struct tep_handle *pevent) +{ + return pevent->long_size; +} + +static inline void tep_set_long_size(struct tep_handle *pevent, int long_size) +{ + pevent->long_size = long_size; +} + +static inline int tep_get_page_size(struct tep_handle *pevent) +{ + return pevent->page_size; +} + +static inline void tep_set_page_size(struct tep_handle *pevent, int _page_size) +{ + pevent->page_size = _page_size; +} + +static inline int tep_is_file_bigendian(struct tep_handle *pevent) +{ + return pevent->file_bigendian; +} + +static inline void tep_set_file_bigendian(struct tep_handle *pevent, int endian) +{ + pevent->file_bigendian = endian; +} + +static inline int tep_is_host_bigendian(struct tep_handle *pevent) +{ + return pevent->host_bigendian; +} + +static inline void tep_set_host_bigendian(struct tep_handle *pevent, int endian) +{ + pevent->host_bigendian = endian; +} + +static inline int tep_is_latency_format(struct tep_handle *pevent) +{ + return pevent->latency_format; +} + +static inline void tep_set_latency_format(struct tep_handle *pevent, int lat) +{ + pevent->latency_format = lat; +} + +struct tep_handle *tep_alloc(void); +void tep_free(struct tep_handle *pevent); +void tep_ref(struct tep_handle *pevent); +void tep_unref(struct tep_handle *pevent); + +/* access to the internal parser */ +void tep_buffer_init(const char *buf, unsigned long long size); +enum event_type tep_read_token(char **tok); +void tep_free_token(char *token); +int tep_peek_char(void); +const char *tep_get_input_buf(void); +unsigned long long tep_get_input_buf_ptr(void); + +/* for debugging */ +void tep_print_funcs(struct tep_handle *pevent); +void tep_print_printk(struct tep_handle *pevent); + +/* ----------------------- filtering ----------------------- */ + +enum filter_boolean_type { + FILTER_FALSE, + FILTER_TRUE, +}; + +enum filter_op_type { + FILTER_OP_AND = 1, + FILTER_OP_OR, + FILTER_OP_NOT, +}; + +enum filter_cmp_type { + FILTER_CMP_NONE, + FILTER_CMP_EQ, + FILTER_CMP_NE, + FILTER_CMP_GT, + FILTER_CMP_LT, + FILTER_CMP_GE, + FILTER_CMP_LE, + FILTER_CMP_MATCH, + FILTER_CMP_NOT_MATCH, + FILTER_CMP_REGEX, + FILTER_CMP_NOT_REGEX, +}; + +enum filter_exp_type { + FILTER_EXP_NONE, + FILTER_EXP_ADD, + FILTER_EXP_SUB, + FILTER_EXP_MUL, + FILTER_EXP_DIV, + FILTER_EXP_MOD, + FILTER_EXP_RSHIFT, + FILTER_EXP_LSHIFT, + FILTER_EXP_AND, + FILTER_EXP_OR, + FILTER_EXP_XOR, + FILTER_EXP_NOT, +}; + +enum filter_arg_type { + FILTER_ARG_NONE, + FILTER_ARG_BOOLEAN, + FILTER_ARG_VALUE, + FILTER_ARG_FIELD, + FILTER_ARG_EXP, + FILTER_ARG_OP, + FILTER_ARG_NUM, + FILTER_ARG_STR, +}; + +enum filter_value_type { + FILTER_NUMBER, + FILTER_STRING, + FILTER_CHAR +}; + +struct fliter_arg; + +struct filter_arg_boolean { + enum filter_boolean_type value; +}; + +struct filter_arg_field { + struct format_field *field; +}; + +struct filter_arg_value { + enum filter_value_type type; + union { + char *str; + unsigned long long val; + }; +}; + +struct filter_arg_op { + enum filter_op_type type; + struct filter_arg *left; + struct filter_arg *right; +}; + +struct filter_arg_exp { + enum filter_exp_type type; + struct filter_arg *left; + struct filter_arg *right; +}; + +struct filter_arg_num { + enum filter_cmp_type type; + struct filter_arg *left; + struct filter_arg *right; +}; + +struct filter_arg_str { + enum filter_cmp_type type; + struct format_field *field; + char *val; + char *buffer; + regex_t reg; +}; + +struct filter_arg { + enum filter_arg_type type; + union { + struct filter_arg_boolean boolean; + struct filter_arg_field field; + struct filter_arg_value value; + struct filter_arg_op op; + struct filter_arg_exp exp; + struct filter_arg_num num; + struct filter_arg_str str; + }; +}; + +struct filter_type { + int event_id; + struct event_format *event; + struct filter_arg *filter; +}; + +#define TEP_FILTER_ERROR_BUFSZ 1024 + +struct event_filter { + struct tep_handle *pevent; + int filters; + struct filter_type *event_filters; + char error_buffer[TEP_FILTER_ERROR_BUFSZ]; +}; + +struct event_filter *tep_filter_alloc(struct tep_handle *pevent); + +/* for backward compatibility */ +#define FILTER_NONE TEP_ERRNO__NO_FILTER +#define FILTER_NOEXIST TEP_ERRNO__FILTER_NOT_FOUND +#define FILTER_MISS TEP_ERRNO__FILTER_MISS +#define FILTER_MATCH TEP_ERRNO__FILTER_MATCH + +enum filter_trivial_type { + FILTER_TRIVIAL_FALSE, + FILTER_TRIVIAL_TRUE, + FILTER_TRIVIAL_BOTH, +}; + +enum tep_errno tep_filter_add_filter_str(struct event_filter *filter, + const char *filter_str); + +enum tep_errno tep_filter_match(struct event_filter *filter, + struct tep_record *record); + +int tep_filter_strerror(struct event_filter *filter, enum tep_errno err, + char *buf, size_t buflen); + +int tep_event_filtered(struct event_filter *filter, + int event_id); + +void tep_filter_reset(struct event_filter *filter); + +int tep_filter_clear_trivial(struct event_filter *filter, + enum filter_trivial_type type); + +void tep_filter_free(struct event_filter *filter); + +char *tep_filter_make_string(struct event_filter *filter, int event_id); + +int tep_filter_remove_event(struct event_filter *filter, + int event_id); + +int tep_filter_event_has_trivial(struct event_filter *filter, + int event_id, + enum filter_trivial_type type); + +int tep_filter_copy(struct event_filter *dest, struct event_filter *source); + +int tep_update_trivial(struct event_filter *dest, struct event_filter *source, + enum filter_trivial_type type); + +int tep_filter_compare(struct event_filter *filter1, struct event_filter *filter2); + +#endif /* _PARSE_EVENTS_H */ diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c new file mode 100644 index 000000000..52874eb94 --- /dev/null +++ b/tools/lib/traceevent/event-plugin.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ + +#include <ctype.h> +#include <stdio.h> +#include <string.h> +#include <dlfcn.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <dirent.h> +#include "event-parse.h" +#include "event-utils.h" + +#define LOCAL_PLUGIN_DIR ".local/lib/traceevent/plugins/" + +static struct registered_plugin_options { + struct registered_plugin_options *next; + struct tep_plugin_option *options; +} *registered_options; + +static struct trace_plugin_options { + struct trace_plugin_options *next; + char *plugin; + char *option; + char *value; +} *trace_plugin_options; + +struct plugin_list { + struct plugin_list *next; + char *name; + void *handle; +}; + +static void lower_case(char *str) +{ + if (!str) + return; + for (; *str; str++) + *str = tolower(*str); +} + +static int update_option_value(struct tep_plugin_option *op, const char *val) +{ + char *op_val; + + if (!val) { + /* toggle, only if option is boolean */ + if (op->value) + /* Warn? */ + return 0; + op->set ^= 1; + return 0; + } + + /* + * If the option has a value then it takes a string + * otherwise the option is a boolean. + */ + if (op->value) { + op->value = val; + return 0; + } + + /* Option is boolean, must be either "1", "0", "true" or "false" */ + + op_val = strdup(val); + if (!op_val) + return -1; + lower_case(op_val); + + if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0) + op->set = 1; + else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0) + op->set = 0; + free(op_val); + + return 0; +} + +/** + * tep_plugin_list_options - get list of plugin options + * + * Returns an array of char strings that list the currently registered + * plugin options in the format of <plugin>:<option>. This list can be + * used by toggling the option. + * + * Returns NULL if there's no options registered. On error it returns + * INVALID_PLUGIN_LIST_OPTION + * + * Must be freed with tep_plugin_free_options_list(). + */ +char **tep_plugin_list_options(void) +{ + struct registered_plugin_options *reg; + struct tep_plugin_option *op; + char **list = NULL; + char *name; + int count = 0; + + for (reg = registered_options; reg; reg = reg->next) { + for (op = reg->options; op->name; op++) { + char *alias = op->plugin_alias ? op->plugin_alias : op->file; + char **temp = list; + int ret; + + ret = asprintf(&name, "%s:%s", alias, op->name); + if (ret < 0) + goto err; + + list = realloc(list, count + 2); + if (!list) { + list = temp; + free(name); + goto err; + } + list[count++] = name; + list[count] = NULL; + } + } + return list; + + err: + while (--count >= 0) + free(list[count]); + free(list); + + return INVALID_PLUGIN_LIST_OPTION; +} + +void tep_plugin_free_options_list(char **list) +{ + int i; + + if (!list) + return; + + if (list == INVALID_PLUGIN_LIST_OPTION) + return; + + for (i = 0; list[i]; i++) + free(list[i]); + + free(list); +} + +static int +update_option(const char *file, struct tep_plugin_option *option) +{ + struct trace_plugin_options *op; + char *plugin; + int ret = 0; + + if (option->plugin_alias) { + plugin = strdup(option->plugin_alias); + if (!plugin) + return -1; + } else { + char *p; + plugin = strdup(file); + if (!plugin) + return -1; + p = strstr(plugin, "."); + if (p) + *p = '\0'; + } + + /* first look for named options */ + for (op = trace_plugin_options; op; op = op->next) { + if (!op->plugin) + continue; + if (strcmp(op->plugin, plugin) != 0) + continue; + if (strcmp(op->option, option->name) != 0) + continue; + + ret = update_option_value(option, op->value); + if (ret) + goto out; + break; + } + + /* first look for unnamed options */ + for (op = trace_plugin_options; op; op = op->next) { + if (op->plugin) + continue; + if (strcmp(op->option, option->name) != 0) + continue; + + ret = update_option_value(option, op->value); + break; + } + + out: + free(plugin); + return ret; +} + +/** + * tep_plugin_add_options - Add a set of options by a plugin + * @name: The name of the plugin adding the options + * @options: The set of options being loaded + * + * Sets the options with the values that have been added by user. + */ +int tep_plugin_add_options(const char *name, + struct tep_plugin_option *options) +{ + struct registered_plugin_options *reg; + + reg = malloc(sizeof(*reg)); + if (!reg) + return -1; + reg->next = registered_options; + reg->options = options; + registered_options = reg; + + while (options->name) { + update_option(name, options); + options++; + } + return 0; +} + +/** + * tep_plugin_remove_options - remove plugin options that were registered + * @options: Options to removed that were registered with tep_plugin_add_options + */ +void tep_plugin_remove_options(struct tep_plugin_option *options) +{ + struct registered_plugin_options **last; + struct registered_plugin_options *reg; + + for (last = ®istered_options; *last; last = &(*last)->next) { + if ((*last)->options == options) { + reg = *last; + *last = reg->next; + free(reg); + return; + } + } +} + +/** + * tep_print_plugins - print out the list of plugins loaded + * @s: the trace_seq descripter to write to + * @prefix: The prefix string to add before listing the option name + * @suffix: The suffix string ot append after the option name + * @list: The list of plugins (usually returned by tep_load_plugins() + * + * Writes to the trace_seq @s the list of plugins (files) that is + * returned by tep_load_plugins(). Use @prefix and @suffix for formating: + * @prefix = " ", @suffix = "\n". + */ +void tep_print_plugins(struct trace_seq *s, + const char *prefix, const char *suffix, + const struct plugin_list *list) +{ + while (list) { + trace_seq_printf(s, "%s%s%s", prefix, list->name, suffix); + list = list->next; + } +} + +static void +load_plugin(struct tep_handle *pevent, const char *path, + const char *file, void *data) +{ + struct plugin_list **plugin_list = data; + tep_plugin_load_func func; + struct plugin_list *list; + const char *alias; + char *plugin; + void *handle; + int ret; + + ret = asprintf(&plugin, "%s/%s", path, file); + if (ret < 0) { + warning("could not allocate plugin memory\n"); + return; + } + + handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL); + if (!handle) { + warning("could not load plugin '%s'\n%s\n", + plugin, dlerror()); + goto out_free; + } + + alias = dlsym(handle, TEP_PLUGIN_ALIAS_NAME); + if (!alias) + alias = file; + + func = dlsym(handle, TEP_PLUGIN_LOADER_NAME); + if (!func) { + warning("could not find func '%s' in plugin '%s'\n%s\n", + TEP_PLUGIN_LOADER_NAME, plugin, dlerror()); + goto out_free; + } + + list = malloc(sizeof(*list)); + if (!list) { + warning("could not allocate plugin memory\n"); + goto out_free; + } + + list->next = *plugin_list; + list->handle = handle; + list->name = plugin; + *plugin_list = list; + + pr_stat("registering plugin: %s", plugin); + func(pevent); + return; + + out_free: + free(plugin); +} + +static void +load_plugins_dir(struct tep_handle *pevent, const char *suffix, + const char *path, + void (*load_plugin)(struct tep_handle *pevent, + const char *path, + const char *name, + void *data), + void *data) +{ + struct dirent *dent; + struct stat st; + DIR *dir; + int ret; + + ret = stat(path, &st); + if (ret < 0) + return; + + if (!S_ISDIR(st.st_mode)) + return; + + dir = opendir(path); + if (!dir) + return; + + while ((dent = readdir(dir))) { + const char *name = dent->d_name; + + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) + continue; + + /* Only load plugins that end in suffix */ + if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0) + continue; + + load_plugin(pevent, path, name, data); + } + + closedir(dir); +} + +static void +load_plugins(struct tep_handle *pevent, const char *suffix, + void (*load_plugin)(struct tep_handle *pevent, + const char *path, + const char *name, + void *data), + void *data) +{ + char *home; + char *path; + char *envdir; + int ret; + + if (pevent->flags & TEP_DISABLE_PLUGINS) + return; + + /* + * If a system plugin directory was defined, + * check that first. + */ +#ifdef PLUGIN_DIR + if (!(pevent->flags & TEP_DISABLE_SYS_PLUGINS)) + load_plugins_dir(pevent, suffix, PLUGIN_DIR, + load_plugin, data); +#endif + + /* + * Next let the environment-set plugin directory + * override the system defaults. + */ + envdir = getenv("TRACEEVENT_PLUGIN_DIR"); + if (envdir) + load_plugins_dir(pevent, suffix, envdir, load_plugin, data); + + /* + * Now let the home directory override the environment + * or system defaults. + */ + home = getenv("HOME"); + if (!home) + return; + + ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR); + if (ret < 0) { + warning("could not allocate plugin memory\n"); + return; + } + + load_plugins_dir(pevent, suffix, path, load_plugin, data); + + free(path); +} + +struct plugin_list* +tep_load_plugins(struct tep_handle *pevent) +{ + struct plugin_list *list = NULL; + + load_plugins(pevent, ".so", load_plugin, &list); + return list; +} + +void +tep_unload_plugins(struct plugin_list *plugin_list, struct tep_handle *pevent) +{ + tep_plugin_unload_func func; + struct plugin_list *list; + + while (plugin_list) { + list = plugin_list; + plugin_list = list->next; + func = dlsym(list->handle, TEP_PLUGIN_UNLOADER_NAME); + if (func) + func(pevent); + dlclose(list->handle); + free(list->name); + free(list); + } +} diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h new file mode 100644 index 000000000..0560b96a3 --- /dev/null +++ b/tools/lib/traceevent/event-utils.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#ifndef __UTIL_H +#define __UTIL_H + +#include <ctype.h> + +/* Can be overridden */ +void warning(const char *fmt, ...); +void pr_stat(const char *fmt, ...); +void vpr_stat(const char *fmt, va_list ap); + +/* Always available */ +void __warning(const char *fmt, ...); +void __pr_stat(const char *fmt, ...); + +void __vwarning(const char *fmt, ...); +void __vpr_stat(const char *fmt, ...); + +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +static inline char *strim(char *string) +{ + char *ret; + + if (!string) + return NULL; + while (*string) { + if (!isspace(*string)) + break; + string++; + } + ret = string; + + string = ret + strlen(ret) - 1; + while (string > ret) { + if (!isspace(*string)) + break; + string--; + } + string[1] = 0; + + return ret; +} + +static inline int has_text(const char *text) +{ + if (!text) + return 0; + + while (*text) { + if (!isspace(*text)) + return 1; + text++; + } + + return 0; +} + +#endif diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c new file mode 100644 index 000000000..af2a1f3b7 --- /dev/null +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -0,0 +1,729 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "kbuffer.h" + +#define MISSING_EVENTS (1UL << 31) +#define MISSING_STORED (1UL << 30) + +#define COMMIT_MASK ((1 << 27) - 1) + +enum { + KBUFFER_FL_HOST_BIG_ENDIAN = (1<<0), + KBUFFER_FL_BIG_ENDIAN = (1<<1), + KBUFFER_FL_LONG_8 = (1<<2), + KBUFFER_FL_OLD_FORMAT = (1<<3), +}; + +#define ENDIAN_MASK (KBUFFER_FL_HOST_BIG_ENDIAN | KBUFFER_FL_BIG_ENDIAN) + +/** kbuffer + * @timestamp - timestamp of current event + * @lost_events - # of lost events between this subbuffer and previous + * @flags - special flags of the kbuffer + * @subbuffer - pointer to the sub-buffer page + * @data - pointer to the start of data on the sub-buffer page + * @index - index from @data to the @curr event data + * @curr - offset from @data to the start of current event + * (includes metadata) + * @next - offset from @data to the start of next event + * @size - The size of data on @data + * @start - The offset from @subbuffer where @data lives + * + * @read_4 - Function to read 4 raw bytes (may swap) + * @read_8 - Function to read 8 raw bytes (may swap) + * @read_long - Function to read a long word (4 or 8 bytes with needed swap) + */ +struct kbuffer { + unsigned long long timestamp; + long long lost_events; + unsigned long flags; + void *subbuffer; + void *data; + unsigned int index; + unsigned int curr; + unsigned int next; + unsigned int size; + unsigned int start; + + unsigned int (*read_4)(void *ptr); + unsigned long long (*read_8)(void *ptr); + unsigned long long (*read_long)(struct kbuffer *kbuf, void *ptr); + int (*next_event)(struct kbuffer *kbuf); +}; + +static void *zmalloc(size_t size) +{ + return calloc(1, size); +} + +static int host_is_bigendian(void) +{ + unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 }; + unsigned int *ptr; + + ptr = (unsigned int *)str; + return *ptr == 0x01020304; +} + +static int do_swap(struct kbuffer *kbuf) +{ + return ((kbuf->flags & KBUFFER_FL_HOST_BIG_ENDIAN) + kbuf->flags) & + ENDIAN_MASK; +} + +static unsigned long long __read_8(void *ptr) +{ + unsigned long long data = *(unsigned long long *)ptr; + + return data; +} + +static unsigned long long __read_8_sw(void *ptr) +{ + unsigned long long data = *(unsigned long long *)ptr; + unsigned long long swap; + + swap = ((data & 0xffULL) << 56) | + ((data & (0xffULL << 8)) << 40) | + ((data & (0xffULL << 16)) << 24) | + ((data & (0xffULL << 24)) << 8) | + ((data & (0xffULL << 32)) >> 8) | + ((data & (0xffULL << 40)) >> 24) | + ((data & (0xffULL << 48)) >> 40) | + ((data & (0xffULL << 56)) >> 56); + + return swap; +} + +static unsigned int __read_4(void *ptr) +{ + unsigned int data = *(unsigned int *)ptr; + + return data; +} + +static unsigned int __read_4_sw(void *ptr) +{ + unsigned int data = *(unsigned int *)ptr; + unsigned int swap; + + swap = ((data & 0xffULL) << 24) | + ((data & (0xffULL << 8)) << 8) | + ((data & (0xffULL << 16)) >> 8) | + ((data & (0xffULL << 24)) >> 24); + + return swap; +} + +static unsigned long long read_8(struct kbuffer *kbuf, void *ptr) +{ + return kbuf->read_8(ptr); +} + +static unsigned int read_4(struct kbuffer *kbuf, void *ptr) +{ + return kbuf->read_4(ptr); +} + +static unsigned long long __read_long_8(struct kbuffer *kbuf, void *ptr) +{ + return kbuf->read_8(ptr); +} + +static unsigned long long __read_long_4(struct kbuffer *kbuf, void *ptr) +{ + return kbuf->read_4(ptr); +} + +static unsigned long long read_long(struct kbuffer *kbuf, void *ptr) +{ + return kbuf->read_long(kbuf, ptr); +} + +static int calc_index(struct kbuffer *kbuf, void *ptr) +{ + return (unsigned long)ptr - (unsigned long)kbuf->data; +} + +static int __next_event(struct kbuffer *kbuf); + +/** + * kbuffer_alloc - allocat a new kbuffer + * @size; enum to denote size of word + * @endian: enum to denote endianness + * + * Allocates and returns a new kbuffer. + */ +struct kbuffer * +kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian) +{ + struct kbuffer *kbuf; + int flags = 0; + + switch (size) { + case KBUFFER_LSIZE_4: + break; + case KBUFFER_LSIZE_8: + flags |= KBUFFER_FL_LONG_8; + break; + default: + return NULL; + } + + switch (endian) { + case KBUFFER_ENDIAN_LITTLE: + break; + case KBUFFER_ENDIAN_BIG: + flags |= KBUFFER_FL_BIG_ENDIAN; + break; + default: + return NULL; + } + + kbuf = zmalloc(sizeof(*kbuf)); + if (!kbuf) + return NULL; + + kbuf->flags = flags; + + if (host_is_bigendian()) + kbuf->flags |= KBUFFER_FL_HOST_BIG_ENDIAN; + + if (do_swap(kbuf)) { + kbuf->read_8 = __read_8_sw; + kbuf->read_4 = __read_4_sw; + } else { + kbuf->read_8 = __read_8; + kbuf->read_4 = __read_4; + } + + if (kbuf->flags & KBUFFER_FL_LONG_8) + kbuf->read_long = __read_long_8; + else + kbuf->read_long = __read_long_4; + + /* May be changed by kbuffer_set_old_format() */ + kbuf->next_event = __next_event; + + return kbuf; +} + +/** kbuffer_free - free an allocated kbuffer + * @kbuf: The kbuffer to free + * + * Can take NULL as a parameter. + */ +void kbuffer_free(struct kbuffer *kbuf) +{ + free(kbuf); +} + +static unsigned int type4host(struct kbuffer *kbuf, + unsigned int type_len_ts) +{ + if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN) + return (type_len_ts >> 29) & 3; + else + return type_len_ts & 3; +} + +static unsigned int len4host(struct kbuffer *kbuf, + unsigned int type_len_ts) +{ + if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN) + return (type_len_ts >> 27) & 7; + else + return (type_len_ts >> 2) & 7; +} + +static unsigned int type_len4host(struct kbuffer *kbuf, + unsigned int type_len_ts) +{ + if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN) + return (type_len_ts >> 27) & ((1 << 5) - 1); + else + return type_len_ts & ((1 << 5) - 1); +} + +static unsigned int ts4host(struct kbuffer *kbuf, + unsigned int type_len_ts) +{ + if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN) + return type_len_ts & ((1 << 27) - 1); + else + return type_len_ts >> 5; +} + +/* + * Linux 2.6.30 and earlier (not much ealier) had a different + * ring buffer format. It should be obsolete, but we handle it anyway. + */ +enum old_ring_buffer_type { + OLD_RINGBUF_TYPE_PADDING, + OLD_RINGBUF_TYPE_TIME_EXTEND, + OLD_RINGBUF_TYPE_TIME_STAMP, + OLD_RINGBUF_TYPE_DATA, +}; + +static unsigned int old_update_pointers(struct kbuffer *kbuf) +{ + unsigned long long extend; + unsigned int type_len_ts; + unsigned int type; + unsigned int len; + unsigned int delta; + unsigned int length; + void *ptr = kbuf->data + kbuf->curr; + + type_len_ts = read_4(kbuf, ptr); + ptr += 4; + + type = type4host(kbuf, type_len_ts); + len = len4host(kbuf, type_len_ts); + delta = ts4host(kbuf, type_len_ts); + + switch (type) { + case OLD_RINGBUF_TYPE_PADDING: + kbuf->next = kbuf->size; + return 0; + + case OLD_RINGBUF_TYPE_TIME_EXTEND: + extend = read_4(kbuf, ptr); + extend <<= TS_SHIFT; + extend += delta; + delta = extend; + ptr += 4; + length = 0; + break; + + case OLD_RINGBUF_TYPE_TIME_STAMP: + /* should never happen! */ + kbuf->curr = kbuf->size; + kbuf->next = kbuf->size; + kbuf->index = kbuf->size; + return -1; + default: + if (len) + length = len * 4; + else { + length = read_4(kbuf, ptr); + length -= 4; + ptr += 4; + } + break; + } + + kbuf->timestamp += delta; + kbuf->index = calc_index(kbuf, ptr); + kbuf->next = kbuf->index + length; + + return type; +} + +static int __old_next_event(struct kbuffer *kbuf) +{ + int type; + + do { + kbuf->curr = kbuf->next; + if (kbuf->next >= kbuf->size) + return -1; + type = old_update_pointers(kbuf); + } while (type == OLD_RINGBUF_TYPE_TIME_EXTEND || type == OLD_RINGBUF_TYPE_PADDING); + + return 0; +} + +static unsigned int +translate_data(struct kbuffer *kbuf, void *data, void **rptr, + unsigned long long *delta, int *length) +{ + unsigned long long extend; + unsigned int type_len_ts; + unsigned int type_len; + + type_len_ts = read_4(kbuf, data); + data += 4; + + type_len = type_len4host(kbuf, type_len_ts); + *delta = ts4host(kbuf, type_len_ts); + + switch (type_len) { + case KBUFFER_TYPE_PADDING: + *length = read_4(kbuf, data); + break; + + case KBUFFER_TYPE_TIME_EXTEND: + extend = read_4(kbuf, data); + data += 4; + extend <<= TS_SHIFT; + extend += *delta; + *delta = extend; + *length = 0; + break; + + case KBUFFER_TYPE_TIME_STAMP: + data += 12; + *length = 0; + break; + case 0: + *length = read_4(kbuf, data) - 4; + *length = (*length + 3) & ~3; + data += 4; + break; + default: + *length = type_len * 4; + break; + } + + *rptr = data; + + return type_len; +} + +static unsigned int update_pointers(struct kbuffer *kbuf) +{ + unsigned long long delta; + unsigned int type_len; + int length; + void *ptr = kbuf->data + kbuf->curr; + + type_len = translate_data(kbuf, ptr, &ptr, &delta, &length); + + kbuf->timestamp += delta; + kbuf->index = calc_index(kbuf, ptr); + kbuf->next = kbuf->index + length; + + return type_len; +} + +/** + * kbuffer_translate_data - read raw data to get a record + * @swap: Set to 1 if bytes in words need to be swapped when read + * @data: The raw data to read + * @size: Address to store the size of the event data. + * + * Returns a pointer to the event data. To determine the entire + * record size (record metadata + data) just add the difference between + * @data and the returned value to @size. + */ +void *kbuffer_translate_data(int swap, void *data, unsigned int *size) +{ + unsigned long long delta; + struct kbuffer kbuf; + int type_len; + int length; + void *ptr; + + if (swap) { + kbuf.read_8 = __read_8_sw; + kbuf.read_4 = __read_4_sw; + kbuf.flags = host_is_bigendian() ? 0 : KBUFFER_FL_BIG_ENDIAN; + } else { + kbuf.read_8 = __read_8; + kbuf.read_4 = __read_4; + kbuf.flags = host_is_bigendian() ? KBUFFER_FL_BIG_ENDIAN: 0; + } + + type_len = translate_data(&kbuf, data, &ptr, &delta, &length); + switch (type_len) { + case KBUFFER_TYPE_PADDING: + case KBUFFER_TYPE_TIME_EXTEND: + case KBUFFER_TYPE_TIME_STAMP: + return NULL; + }; + + *size = length; + + return ptr; +} + +static int __next_event(struct kbuffer *kbuf) +{ + int type; + + do { + kbuf->curr = kbuf->next; + if (kbuf->next >= kbuf->size) + return -1; + type = update_pointers(kbuf); + } while (type == KBUFFER_TYPE_TIME_EXTEND || type == KBUFFER_TYPE_PADDING); + + return 0; +} + +static int next_event(struct kbuffer *kbuf) +{ + return kbuf->next_event(kbuf); +} + +/** + * kbuffer_next_event - increment the current pointer + * @kbuf: The kbuffer to read + * @ts: Address to store the next record's timestamp (may be NULL to ignore) + * + * Increments the pointers into the subbuffer of the kbuffer to point to the + * next event so that the next kbuffer_read_event() will return a + * new event. + * + * Returns the data of the next event if a new event exists on the subbuffer, + * NULL otherwise. + */ +void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts) +{ + int ret; + + if (!kbuf || !kbuf->subbuffer) + return NULL; + + ret = next_event(kbuf); + if (ret < 0) + return NULL; + + if (ts) + *ts = kbuf->timestamp; + + return kbuf->data + kbuf->index; +} + +/** + * kbuffer_load_subbuffer - load a new subbuffer into the kbuffer + * @kbuf: The kbuffer to load + * @subbuffer: The subbuffer to load into @kbuf. + * + * Load a new subbuffer (page) into @kbuf. This will reset all + * the pointers and update the @kbuf timestamp. The next read will + * return the first event on @subbuffer. + * + * Returns 0 on succes, -1 otherwise. + */ +int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer) +{ + unsigned long long flags; + void *ptr = subbuffer; + + if (!kbuf || !subbuffer) + return -1; + + kbuf->subbuffer = subbuffer; + + kbuf->timestamp = read_8(kbuf, ptr); + ptr += 8; + + kbuf->curr = 0; + + if (kbuf->flags & KBUFFER_FL_LONG_8) + kbuf->start = 16; + else + kbuf->start = 12; + + kbuf->data = subbuffer + kbuf->start; + + flags = read_long(kbuf, ptr); + kbuf->size = (unsigned int)flags & COMMIT_MASK; + + if (flags & MISSING_EVENTS) { + if (flags & MISSING_STORED) { + ptr = kbuf->data + kbuf->size; + kbuf->lost_events = read_long(kbuf, ptr); + } else + kbuf->lost_events = -1; + } else + kbuf->lost_events = 0; + + kbuf->index = 0; + kbuf->next = 0; + + next_event(kbuf); + + return 0; +} + +/** + * kbuffer_read_event - read the next event in the kbuffer subbuffer + * @kbuf: The kbuffer to read from + * @ts: The address to store the timestamp of the event (may be NULL to ignore) + * + * Returns a pointer to the data part of the current event. + * NULL if no event is left on the subbuffer. + */ +void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts) +{ + if (!kbuf || !kbuf->subbuffer) + return NULL; + + if (kbuf->curr >= kbuf->size) + return NULL; + + if (ts) + *ts = kbuf->timestamp; + return kbuf->data + kbuf->index; +} + +/** + * kbuffer_timestamp - Return the timestamp of the current event + * @kbuf: The kbuffer to read from + * + * Returns the timestamp of the current (next) event. + */ +unsigned long long kbuffer_timestamp(struct kbuffer *kbuf) +{ + return kbuf->timestamp; +} + +/** + * kbuffer_read_at_offset - read the event that is at offset + * @kbuf: The kbuffer to read from + * @offset: The offset into the subbuffer + * @ts: The address to store the timestamp of the event (may be NULL to ignore) + * + * The @offset must be an index from the @kbuf subbuffer beginning. + * If @offset is bigger than the stored subbuffer, NULL will be returned. + * + * Returns the data of the record that is at @offset. Note, @offset does + * not need to be the start of the record, the offset just needs to be + * in the record (or beginning of it). + * + * Note, the kbuf timestamp and pointers are updated to the + * returned record. That is, kbuffer_read_event() will return the same + * data and timestamp, and kbuffer_next_event() will increment from + * this record. + */ +void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, + unsigned long long *ts) +{ + void *data; + + if (offset < kbuf->start) + offset = 0; + else + offset -= kbuf->start; + + /* Reset the buffer */ + kbuffer_load_subbuffer(kbuf, kbuf->subbuffer); + data = kbuffer_read_event(kbuf, ts); + + while (kbuf->curr < offset) { + data = kbuffer_next_event(kbuf, ts); + if (!data) + break; + } + + return data; +} + +/** + * kbuffer_subbuffer_size - the size of the loaded subbuffer + * @kbuf: The kbuffer to read from + * + * Returns the size of the subbuffer. Note, this size is + * where the last event resides. The stored subbuffer may actually be + * bigger due to padding and such. + */ +int kbuffer_subbuffer_size(struct kbuffer *kbuf) +{ + return kbuf->size; +} + +/** + * kbuffer_curr_index - Return the index of the record + * @kbuf: The kbuffer to read from + * + * Returns the index from the start of the data part of + * the subbuffer to the current location. Note this is not + * from the start of the subbuffer. An index of zero will + * point to the first record. Use kbuffer_curr_offset() for + * the actually offset (that can be used by kbuffer_read_at_offset()) + */ +int kbuffer_curr_index(struct kbuffer *kbuf) +{ + return kbuf->curr; +} + +/** + * kbuffer_curr_offset - Return the offset of the record + * @kbuf: The kbuffer to read from + * + * Returns the offset from the start of the subbuffer to the + * current location. + */ +int kbuffer_curr_offset(struct kbuffer *kbuf) +{ + return kbuf->curr + kbuf->start; +} + +/** + * kbuffer_event_size - return the size of the event data + * @kbuf: The kbuffer to read + * + * Returns the size of the event data (the payload not counting + * the meta data of the record) of the current event. + */ +int kbuffer_event_size(struct kbuffer *kbuf) +{ + return kbuf->next - kbuf->index; +} + +/** + * kbuffer_curr_size - return the size of the entire record + * @kbuf: The kbuffer to read + * + * Returns the size of the entire record (meta data and payload) + * of the current event. + */ +int kbuffer_curr_size(struct kbuffer *kbuf) +{ + return kbuf->next - kbuf->curr; +} + +/** + * kbuffer_missed_events - return the # of missed events from last event. + * @kbuf: The kbuffer to read from + * + * Returns the # of missed events (if recorded) before the current + * event. Note, only events on the beginning of a subbuffer can + * have missed events, all other events within the buffer will be + * zero. + */ +int kbuffer_missed_events(struct kbuffer *kbuf) +{ + /* Only the first event can have missed events */ + if (kbuf->curr) + return 0; + + return kbuf->lost_events; +} + +/** + * kbuffer_set_old_forma - set the kbuffer to use the old format parsing + * @kbuf: The kbuffer to set + * + * This is obsolete (or should be). The first kernels to use the + * new ring buffer had a slightly different ring buffer format + * (2.6.30 and earlier). It is still somewhat supported by kbuffer, + * but should not be counted on in the future. + */ +void kbuffer_set_old_format(struct kbuffer *kbuf) +{ + kbuf->flags |= KBUFFER_FL_OLD_FORMAT; + + kbuf->next_event = __old_next_event; +} + +/** + * kbuffer_start_of_data - return offset of where data starts on subbuffer + * @kbuf: The kbuffer + * + * Returns the location on the subbuffer where the data starts. + */ +int kbuffer_start_of_data(struct kbuffer *kbuf) +{ + return kbuf->start; +} diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h new file mode 100644 index 000000000..03dce7575 --- /dev/null +++ b/tools/lib/traceevent/kbuffer.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#ifndef _KBUFFER_H +#define _KBUFFER_H + +#ifndef TS_SHIFT +#define TS_SHIFT 27 +#endif + +enum kbuffer_endian { + KBUFFER_ENDIAN_BIG, + KBUFFER_ENDIAN_LITTLE, +}; + +enum kbuffer_long_size { + KBUFFER_LSIZE_4, + KBUFFER_LSIZE_8, +}; + +enum { + KBUFFER_TYPE_PADDING = 29, + KBUFFER_TYPE_TIME_EXTEND = 30, + KBUFFER_TYPE_TIME_STAMP = 31, +}; + +struct kbuffer; + +struct kbuffer *kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian); +void kbuffer_free(struct kbuffer *kbuf); +int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer); +void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts); +void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts); +unsigned long long kbuffer_timestamp(struct kbuffer *kbuf); + +void *kbuffer_translate_data(int swap, void *data, unsigned int *size); + +void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, unsigned long long *ts); + +int kbuffer_curr_index(struct kbuffer *kbuf); + +int kbuffer_curr_offset(struct kbuffer *kbuf); +int kbuffer_curr_size(struct kbuffer *kbuf); +int kbuffer_event_size(struct kbuffer *kbuf); +int kbuffer_missed_events(struct kbuffer *kbuf); +int kbuffer_subbuffer_size(struct kbuffer *kbuf); + +void kbuffer_set_old_format(struct kbuffer *kbuf); +int kbuffer_start_of_data(struct kbuffer *kbuf); + +#endif /* _K_BUFFER_H */ diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c new file mode 100644 index 000000000..27248a0aa --- /dev/null +++ b/tools/lib/traceevent/parse-filter.c @@ -0,0 +1,2445 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <errno.h> +#include <sys/types.h> + +#include "event-parse.h" +#include "event-utils.h" + +#define COMM "COMM" +#define CPU "CPU" + +static struct format_field comm = { + .name = "COMM", +}; + +static struct format_field cpu = { + .name = "CPU", +}; + +struct event_list { + struct event_list *next; + struct event_format *event; +}; + +static void show_error(char *error_buf, const char *fmt, ...) +{ + unsigned long long index; + const char *input; + va_list ap; + int len; + int i; + + input = tep_get_input_buf(); + index = tep_get_input_buf_ptr(); + len = input ? strlen(input) : 0; + + if (len) { + strcpy(error_buf, input); + error_buf[len] = '\n'; + for (i = 1; i < len && i < index; i++) + error_buf[len+i] = ' '; + error_buf[len + i] = '^'; + error_buf[len + i + 1] = '\n'; + len += i+2; + } + + va_start(ap, fmt); + vsnprintf(error_buf + len, TEP_FILTER_ERROR_BUFSZ - len, fmt, ap); + va_end(ap); +} + +static void free_token(char *token) +{ + tep_free_token(token); +} + +static enum event_type read_token(char **tok) +{ + enum event_type type; + char *token = NULL; + + do { + free_token(token); + type = tep_read_token(&token); + } while (type == EVENT_NEWLINE || type == EVENT_SPACE); + + /* If token is = or ! check to see if the next char is ~ */ + if (token && + (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) && + tep_peek_char() == '~') { + /* append it */ + *tok = malloc(3); + if (*tok == NULL) { + free_token(token); + return EVENT_ERROR; + } + sprintf(*tok, "%c%c", *token, '~'); + free_token(token); + /* Now remove the '~' from the buffer */ + tep_read_token(&token); + free_token(token); + } else + *tok = token; + + return type; +} + +static int filter_cmp(const void *a, const void *b) +{ + const struct filter_type *ea = a; + const struct filter_type *eb = b; + + if (ea->event_id < eb->event_id) + return -1; + + if (ea->event_id > eb->event_id) + return 1; + + return 0; +} + +static struct filter_type * +find_filter_type(struct event_filter *filter, int id) +{ + struct filter_type *filter_type; + struct filter_type key; + + key.event_id = id; + + filter_type = bsearch(&key, filter->event_filters, + filter->filters, + sizeof(*filter->event_filters), + filter_cmp); + + return filter_type; +} + +static struct filter_type * +add_filter_type(struct event_filter *filter, int id) +{ + struct filter_type *filter_type; + int i; + + filter_type = find_filter_type(filter, id); + if (filter_type) + return filter_type; + + filter_type = realloc(filter->event_filters, + sizeof(*filter->event_filters) * + (filter->filters + 1)); + if (!filter_type) + return NULL; + + filter->event_filters = filter_type; + + for (i = 0; i < filter->filters; i++) { + if (filter->event_filters[i].event_id > id) + break; + } + + if (i < filter->filters) + memmove(&filter->event_filters[i+1], + &filter->event_filters[i], + sizeof(*filter->event_filters) * + (filter->filters - i)); + + filter_type = &filter->event_filters[i]; + filter_type->event_id = id; + filter_type->event = tep_find_event(filter->pevent, id); + filter_type->filter = NULL; + + filter->filters++; + + return filter_type; +} + +/** + * tep_filter_alloc - create a new event filter + * @pevent: The pevent that this filter is associated with + */ +struct event_filter *tep_filter_alloc(struct tep_handle *pevent) +{ + struct event_filter *filter; + + filter = malloc(sizeof(*filter)); + if (filter == NULL) + return NULL; + + memset(filter, 0, sizeof(*filter)); + filter->pevent = pevent; + tep_ref(pevent); + + return filter; +} + +static struct filter_arg *allocate_arg(void) +{ + return calloc(1, sizeof(struct filter_arg)); +} + +static void free_arg(struct filter_arg *arg) +{ + if (!arg) + return; + + switch (arg->type) { + case FILTER_ARG_NONE: + case FILTER_ARG_BOOLEAN: + break; + + case FILTER_ARG_NUM: + free_arg(arg->num.left); + free_arg(arg->num.right); + break; + + case FILTER_ARG_EXP: + free_arg(arg->exp.left); + free_arg(arg->exp.right); + break; + + case FILTER_ARG_STR: + free(arg->str.val); + regfree(&arg->str.reg); + free(arg->str.buffer); + break; + + case FILTER_ARG_VALUE: + if (arg->value.type == FILTER_STRING || + arg->value.type == FILTER_CHAR) + free(arg->value.str); + break; + + case FILTER_ARG_OP: + free_arg(arg->op.left); + free_arg(arg->op.right); + default: + break; + } + + free(arg); +} + +static int add_event(struct event_list **events, + struct event_format *event) +{ + struct event_list *list; + + list = malloc(sizeof(*list)); + if (list == NULL) + return -1; + + list->next = *events; + *events = list; + list->event = event; + return 0; +} + +static int event_match(struct event_format *event, + regex_t *sreg, regex_t *ereg) +{ + if (sreg) { + return !regexec(sreg, event->system, 0, NULL, 0) && + !regexec(ereg, event->name, 0, NULL, 0); + } + + return !regexec(ereg, event->system, 0, NULL, 0) || + !regexec(ereg, event->name, 0, NULL, 0); +} + +static enum tep_errno +find_event(struct tep_handle *pevent, struct event_list **events, + char *sys_name, char *event_name) +{ + struct event_format *event; + regex_t ereg; + regex_t sreg; + int match = 0; + int fail = 0; + char *reg; + int ret; + int i; + + if (!event_name) { + /* if no name is given, then swap sys and name */ + event_name = sys_name; + sys_name = NULL; + } + + ret = asprintf(®, "^%s$", event_name); + if (ret < 0) + return TEP_ERRNO__MEM_ALLOC_FAILED; + + ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB); + free(reg); + + if (ret) + return TEP_ERRNO__INVALID_EVENT_NAME; + + if (sys_name) { + ret = asprintf(®, "^%s$", sys_name); + if (ret < 0) { + regfree(&ereg); + return TEP_ERRNO__MEM_ALLOC_FAILED; + } + + ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB); + free(reg); + if (ret) { + regfree(&ereg); + return TEP_ERRNO__INVALID_EVENT_NAME; + } + } + + for (i = 0; i < pevent->nr_events; i++) { + event = pevent->events[i]; + if (event_match(event, sys_name ? &sreg : NULL, &ereg)) { + match = 1; + if (add_event(events, event) < 0) { + fail = 1; + break; + } + } + } + + regfree(&ereg); + if (sys_name) + regfree(&sreg); + + if (!match) + return TEP_ERRNO__EVENT_NOT_FOUND; + if (fail) + return TEP_ERRNO__MEM_ALLOC_FAILED; + + return 0; +} + +static void free_events(struct event_list *events) +{ + struct event_list *event; + + while (events) { + event = events; + events = events->next; + free(event); + } +} + +static enum tep_errno +create_arg_item(struct event_format *event, const char *token, + enum event_type type, struct filter_arg **parg, char *error_str) +{ + struct format_field *field; + struct filter_arg *arg; + + arg = allocate_arg(); + if (arg == NULL) { + show_error(error_str, "failed to allocate filter arg"); + return TEP_ERRNO__MEM_ALLOC_FAILED; + } + + switch (type) { + + case EVENT_SQUOTE: + case EVENT_DQUOTE: + arg->type = FILTER_ARG_VALUE; + arg->value.type = + type == EVENT_DQUOTE ? FILTER_STRING : FILTER_CHAR; + arg->value.str = strdup(token); + if (!arg->value.str) { + free_arg(arg); + show_error(error_str, "failed to allocate string filter arg"); + return TEP_ERRNO__MEM_ALLOC_FAILED; + } + break; + case EVENT_ITEM: + /* if it is a number, then convert it */ + if (isdigit(token[0])) { + arg->type = FILTER_ARG_VALUE; + arg->value.type = FILTER_NUMBER; + arg->value.val = strtoull(token, NULL, 0); + break; + } + /* Consider this a field */ + field = tep_find_any_field(event, token); + if (!field) { + /* If token is 'COMM' or 'CPU' then it is special */ + if (strcmp(token, COMM) == 0) { + field = &comm; + } else if (strcmp(token, CPU) == 0) { + field = &cpu; + } else { + /* not a field, Make it false */ + arg->type = FILTER_ARG_BOOLEAN; + arg->boolean.value = FILTER_FALSE; + break; + } + } + arg->type = FILTER_ARG_FIELD; + arg->field.field = field; + break; + default: + free_arg(arg); + show_error(error_str, "expected a value but found %s", token); + return TEP_ERRNO__UNEXPECTED_TYPE; + } + *parg = arg; + return 0; +} + +static struct filter_arg * +create_arg_op(enum filter_op_type btype) +{ + struct filter_arg *arg; + + arg = allocate_arg(); + if (!arg) + return NULL; + + arg->type = FILTER_ARG_OP; + arg->op.type = btype; + + return arg; +} + +static struct filter_arg * +create_arg_exp(enum filter_exp_type etype) +{ + struct filter_arg *arg; + + arg = allocate_arg(); + if (!arg) + return NULL; + + arg->type = FILTER_ARG_EXP; + arg->exp.type = etype; + + return arg; +} + +static struct filter_arg * +create_arg_cmp(enum filter_cmp_type ctype) +{ + struct filter_arg *arg; + + arg = allocate_arg(); + if (!arg) + return NULL; + + /* Use NUM and change if necessary */ + arg->type = FILTER_ARG_NUM; + arg->num.type = ctype; + + return arg; +} + +static enum tep_errno +add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str) +{ + struct filter_arg *left; + char *str; + int op_type; + int ret; + + switch (op->type) { + case FILTER_ARG_EXP: + if (op->exp.right) + goto out_fail; + op->exp.right = arg; + break; + + case FILTER_ARG_OP: + if (op->op.right) + goto out_fail; + op->op.right = arg; + break; + + case FILTER_ARG_NUM: + if (op->op.right) + goto out_fail; + /* + * The arg must be num, str, or field + */ + switch (arg->type) { + case FILTER_ARG_VALUE: + case FILTER_ARG_FIELD: + break; + default: + show_error(error_str, "Illegal rvalue"); + return TEP_ERRNO__ILLEGAL_RVALUE; + } + + /* + * Depending on the type, we may need to + * convert this to a string or regex. + */ + switch (arg->value.type) { + case FILTER_CHAR: + /* + * A char should be converted to number if + * the string is 1 byte, and the compare + * is not a REGEX. + */ + if (strlen(arg->value.str) == 1 && + op->num.type != FILTER_CMP_REGEX && + op->num.type != FILTER_CMP_NOT_REGEX) { + arg->value.type = FILTER_NUMBER; + goto do_int; + } + /* fall through */ + case FILTER_STRING: + + /* convert op to a string arg */ + op_type = op->num.type; + left = op->num.left; + str = arg->value.str; + + /* reset the op for the new field */ + memset(op, 0, sizeof(*op)); + + /* + * If left arg was a field not found then + * NULL the entire op. + */ + if (left->type == FILTER_ARG_BOOLEAN) { + free_arg(left); + free_arg(arg); + op->type = FILTER_ARG_BOOLEAN; + op->boolean.value = FILTER_FALSE; + break; + } + + /* Left arg must be a field */ + if (left->type != FILTER_ARG_FIELD) { + show_error(error_str, + "Illegal lvalue for string comparison"); + return TEP_ERRNO__ILLEGAL_LVALUE; + } + + /* Make sure this is a valid string compare */ + switch (op_type) { + case FILTER_CMP_EQ: + op_type = FILTER_CMP_MATCH; + break; + case FILTER_CMP_NE: + op_type = FILTER_CMP_NOT_MATCH; + break; + + case FILTER_CMP_REGEX: + case FILTER_CMP_NOT_REGEX: + ret = regcomp(&op->str.reg, str, REG_ICASE|REG_NOSUB); + if (ret) { + show_error(error_str, + "RegEx '%s' did not compute", + str); + return TEP_ERRNO__INVALID_REGEX; + } + break; + default: + show_error(error_str, + "Illegal comparison for string"); + return TEP_ERRNO__ILLEGAL_STRING_CMP; + } + + op->type = FILTER_ARG_STR; + op->str.type = op_type; + op->str.field = left->field.field; + op->str.val = strdup(str); + if (!op->str.val) { + show_error(error_str, "Failed to allocate string filter"); + return TEP_ERRNO__MEM_ALLOC_FAILED; + } + /* + * Need a buffer to copy data for tests + */ + op->str.buffer = malloc(op->str.field->size + 1); + if (!op->str.buffer) { + show_error(error_str, "Failed to allocate string filter"); + return TEP_ERRNO__MEM_ALLOC_FAILED; + } + /* Null terminate this buffer */ + op->str.buffer[op->str.field->size] = 0; + + /* We no longer have left or right args */ + free_arg(arg); + free_arg(left); + + break; + + case FILTER_NUMBER: + + do_int: + switch (op->num.type) { + case FILTER_CMP_REGEX: + case FILTER_CMP_NOT_REGEX: + show_error(error_str, + "Op not allowed with integers"); + return TEP_ERRNO__ILLEGAL_INTEGER_CMP; + + default: + break; + } + + /* numeric compare */ + op->num.right = arg; + break; + default: + goto out_fail; + } + break; + default: + goto out_fail; + } + + return 0; + + out_fail: + show_error(error_str, "Syntax error"); + return TEP_ERRNO__SYNTAX_ERROR; +} + +static struct filter_arg * +rotate_op_right(struct filter_arg *a, struct filter_arg *b) +{ + struct filter_arg *arg; + + arg = a->op.right; + a->op.right = b; + return arg; +} + +static enum tep_errno add_left(struct filter_arg *op, struct filter_arg *arg) +{ + switch (op->type) { + case FILTER_ARG_EXP: + if (arg->type == FILTER_ARG_OP) + arg = rotate_op_right(arg, op); + op->exp.left = arg; + break; + + case FILTER_ARG_OP: + op->op.left = arg; + break; + case FILTER_ARG_NUM: + if (arg->type == FILTER_ARG_OP) + arg = rotate_op_right(arg, op); + + /* left arg of compares must be a field */ + if (arg->type != FILTER_ARG_FIELD && + arg->type != FILTER_ARG_BOOLEAN) + return TEP_ERRNO__INVALID_ARG_TYPE; + op->num.left = arg; + break; + default: + return TEP_ERRNO__INVALID_ARG_TYPE; + } + return 0; +} + +enum op_type { + OP_NONE, + OP_BOOL, + OP_NOT, + OP_EXP, + OP_CMP, +}; + +static enum op_type process_op(const char *token, + enum filter_op_type *btype, + enum filter_cmp_type *ctype, + enum filter_exp_type *etype) +{ + *btype = FILTER_OP_NOT; + *etype = FILTER_EXP_NONE; + *ctype = FILTER_CMP_NONE; + + if (strcmp(token, "&&") == 0) + *btype = FILTER_OP_AND; + else if (strcmp(token, "||") == 0) + *btype = FILTER_OP_OR; + else if (strcmp(token, "!") == 0) + return OP_NOT; + + if (*btype != FILTER_OP_NOT) + return OP_BOOL; + + /* Check for value expressions */ + if (strcmp(token, "+") == 0) { + *etype = FILTER_EXP_ADD; + } else if (strcmp(token, "-") == 0) { + *etype = FILTER_EXP_SUB; + } else if (strcmp(token, "*") == 0) { + *etype = FILTER_EXP_MUL; + } else if (strcmp(token, "/") == 0) { + *etype = FILTER_EXP_DIV; + } else if (strcmp(token, "%") == 0) { + *etype = FILTER_EXP_MOD; + } else if (strcmp(token, ">>") == 0) { + *etype = FILTER_EXP_RSHIFT; + } else if (strcmp(token, "<<") == 0) { + *etype = FILTER_EXP_LSHIFT; + } else if (strcmp(token, "&") == 0) { + *etype = FILTER_EXP_AND; + } else if (strcmp(token, "|") == 0) { + *etype = FILTER_EXP_OR; + } else if (strcmp(token, "^") == 0) { + *etype = FILTER_EXP_XOR; + } else if (strcmp(token, "~") == 0) + *etype = FILTER_EXP_NOT; + + if (*etype != FILTER_EXP_NONE) + return OP_EXP; + + /* Check for compares */ + if (strcmp(token, "==") == 0) + *ctype = FILTER_CMP_EQ; + else if (strcmp(token, "!=") == 0) + *ctype = FILTER_CMP_NE; + else if (strcmp(token, "<") == 0) + *ctype = FILTER_CMP_LT; + else if (strcmp(token, ">") == 0) + *ctype = FILTER_CMP_GT; + else if (strcmp(token, "<=") == 0) + *ctype = FILTER_CMP_LE; + else if (strcmp(token, ">=") == 0) + *ctype = FILTER_CMP_GE; + else if (strcmp(token, "=~") == 0) + *ctype = FILTER_CMP_REGEX; + else if (strcmp(token, "!~") == 0) + *ctype = FILTER_CMP_NOT_REGEX; + else + return OP_NONE; + + return OP_CMP; +} + +static int check_op_done(struct filter_arg *arg) +{ + switch (arg->type) { + case FILTER_ARG_EXP: + return arg->exp.right != NULL; + + case FILTER_ARG_OP: + return arg->op.right != NULL; + + case FILTER_ARG_NUM: + return arg->num.right != NULL; + + case FILTER_ARG_STR: + /* A string conversion is always done */ + return 1; + + case FILTER_ARG_BOOLEAN: + /* field not found, is ok */ + return 1; + + default: + return 0; + } +} + +enum filter_vals { + FILTER_VAL_NORM, + FILTER_VAL_FALSE, + FILTER_VAL_TRUE, +}; + +static enum tep_errno +reparent_op_arg(struct filter_arg *parent, struct filter_arg *old_child, + struct filter_arg *arg, char *error_str) +{ + struct filter_arg *other_child; + struct filter_arg **ptr; + + if (parent->type != FILTER_ARG_OP && + arg->type != FILTER_ARG_OP) { + show_error(error_str, "can not reparent other than OP"); + return TEP_ERRNO__REPARENT_NOT_OP; + } + + /* Get the sibling */ + if (old_child->op.right == arg) { + ptr = &old_child->op.right; + other_child = old_child->op.left; + } else if (old_child->op.left == arg) { + ptr = &old_child->op.left; + other_child = old_child->op.right; + } else { + show_error(error_str, "Error in reparent op, find other child"); + return TEP_ERRNO__REPARENT_FAILED; + } + + /* Detach arg from old_child */ + *ptr = NULL; + + /* Check for root */ + if (parent == old_child) { + free_arg(other_child); + *parent = *arg; + /* Free arg without recussion */ + free(arg); + return 0; + } + + if (parent->op.right == old_child) + ptr = &parent->op.right; + else if (parent->op.left == old_child) + ptr = &parent->op.left; + else { + show_error(error_str, "Error in reparent op"); + return TEP_ERRNO__REPARENT_FAILED; + } + + *ptr = arg; + + free_arg(old_child); + return 0; +} + +/* Returns either filter_vals (success) or tep_errno (failfure) */ +static int test_arg(struct filter_arg *parent, struct filter_arg *arg, + char *error_str) +{ + int lval, rval; + + switch (arg->type) { + + /* bad case */ + case FILTER_ARG_BOOLEAN: + return FILTER_VAL_FALSE + arg->boolean.value; + + /* good cases: */ + case FILTER_ARG_STR: + case FILTER_ARG_VALUE: + case FILTER_ARG_FIELD: + return FILTER_VAL_NORM; + + case FILTER_ARG_EXP: + lval = test_arg(arg, arg->exp.left, error_str); + if (lval != FILTER_VAL_NORM) + return lval; + rval = test_arg(arg, arg->exp.right, error_str); + if (rval != FILTER_VAL_NORM) + return rval; + return FILTER_VAL_NORM; + + case FILTER_ARG_NUM: + lval = test_arg(arg, arg->num.left, error_str); + if (lval != FILTER_VAL_NORM) + return lval; + rval = test_arg(arg, arg->num.right, error_str); + if (rval != FILTER_VAL_NORM) + return rval; + return FILTER_VAL_NORM; + + case FILTER_ARG_OP: + if (arg->op.type != FILTER_OP_NOT) { + lval = test_arg(arg, arg->op.left, error_str); + switch (lval) { + case FILTER_VAL_NORM: + break; + case FILTER_VAL_TRUE: + if (arg->op.type == FILTER_OP_OR) + return FILTER_VAL_TRUE; + rval = test_arg(arg, arg->op.right, error_str); + if (rval != FILTER_VAL_NORM) + return rval; + + return reparent_op_arg(parent, arg, arg->op.right, + error_str); + + case FILTER_VAL_FALSE: + if (arg->op.type == FILTER_OP_AND) + return FILTER_VAL_FALSE; + rval = test_arg(arg, arg->op.right, error_str); + if (rval != FILTER_VAL_NORM) + return rval; + + return reparent_op_arg(parent, arg, arg->op.right, + error_str); + + default: + return lval; + } + } + + rval = test_arg(arg, arg->op.right, error_str); + switch (rval) { + case FILTER_VAL_NORM: + default: + break; + + case FILTER_VAL_TRUE: + if (arg->op.type == FILTER_OP_OR) + return FILTER_VAL_TRUE; + if (arg->op.type == FILTER_OP_NOT) + return FILTER_VAL_FALSE; + + return reparent_op_arg(parent, arg, arg->op.left, + error_str); + + case FILTER_VAL_FALSE: + if (arg->op.type == FILTER_OP_AND) + return FILTER_VAL_FALSE; + if (arg->op.type == FILTER_OP_NOT) + return FILTER_VAL_TRUE; + + return reparent_op_arg(parent, arg, arg->op.left, + error_str); + } + + return rval; + default: + show_error(error_str, "bad arg in filter tree"); + return TEP_ERRNO__BAD_FILTER_ARG; + } + return FILTER_VAL_NORM; +} + +/* Remove any unknown event fields */ +static int collapse_tree(struct filter_arg *arg, + struct filter_arg **arg_collapsed, char *error_str) +{ + int ret; + + ret = test_arg(arg, arg, error_str); + switch (ret) { + case FILTER_VAL_NORM: + break; + + case FILTER_VAL_TRUE: + case FILTER_VAL_FALSE: + free_arg(arg); + arg = allocate_arg(); + if (arg) { + arg->type = FILTER_ARG_BOOLEAN; + arg->boolean.value = ret == FILTER_VAL_TRUE; + } else { + show_error(error_str, "Failed to allocate filter arg"); + ret = TEP_ERRNO__MEM_ALLOC_FAILED; + } + break; + + default: + /* test_arg() already set the error_str */ + free_arg(arg); + arg = NULL; + break; + } + + *arg_collapsed = arg; + return ret; +} + +static enum tep_errno +process_filter(struct event_format *event, struct filter_arg **parg, + char *error_str, int not) +{ + enum event_type type; + char *token = NULL; + struct filter_arg *current_op = NULL; + struct filter_arg *current_exp = NULL; + struct filter_arg *left_item = NULL; + struct filter_arg *arg = NULL; + enum op_type op_type; + enum filter_op_type btype; + enum filter_exp_type etype; + enum filter_cmp_type ctype; + enum tep_errno ret; + + *parg = NULL; + + do { + free(token); + type = read_token(&token); + switch (type) { + case EVENT_SQUOTE: + case EVENT_DQUOTE: + case EVENT_ITEM: + ret = create_arg_item(event, token, type, &arg, error_str); + if (ret < 0) + goto fail; + if (!left_item) + left_item = arg; + else if (current_exp) { + ret = add_right(current_exp, arg, error_str); + if (ret < 0) + goto fail; + left_item = NULL; + /* Not's only one one expression */ + if (not) { + arg = NULL; + if (current_op) + goto fail_syntax; + free(token); + *parg = current_exp; + return 0; + } + } else + goto fail_syntax; + arg = NULL; + break; + + case EVENT_DELIM: + if (*token == ',') { + show_error(error_str, "Illegal token ','"); + ret = TEP_ERRNO__ILLEGAL_TOKEN; + goto fail; + } + + if (*token == '(') { + if (left_item) { + show_error(error_str, + "Open paren can not come after item"); + ret = TEP_ERRNO__INVALID_PAREN; + goto fail; + } + if (current_exp) { + show_error(error_str, + "Open paren can not come after expression"); + ret = TEP_ERRNO__INVALID_PAREN; + goto fail; + } + + ret = process_filter(event, &arg, error_str, 0); + if (ret != TEP_ERRNO__UNBALANCED_PAREN) { + if (ret == 0) { + show_error(error_str, + "Unbalanced number of '('"); + ret = TEP_ERRNO__UNBALANCED_PAREN; + } + goto fail; + } + ret = 0; + + /* A not wants just one expression */ + if (not) { + if (current_op) + goto fail_syntax; + *parg = arg; + return 0; + } + + if (current_op) + ret = add_right(current_op, arg, error_str); + else + current_exp = arg; + + if (ret < 0) + goto fail; + + } else { /* ')' */ + if (!current_op && !current_exp) + goto fail_syntax; + + /* Make sure everything is finished at this level */ + if (current_exp && !check_op_done(current_exp)) + goto fail_syntax; + if (current_op && !check_op_done(current_op)) + goto fail_syntax; + + if (current_op) + *parg = current_op; + else + *parg = current_exp; + free(token); + return TEP_ERRNO__UNBALANCED_PAREN; + } + break; + + case EVENT_OP: + op_type = process_op(token, &btype, &ctype, &etype); + + /* All expect a left arg except for NOT */ + switch (op_type) { + case OP_BOOL: + /* Logic ops need a left expression */ + if (!current_exp && !current_op) + goto fail_syntax; + /* fall through */ + case OP_NOT: + /* logic only processes ops and exp */ + if (left_item) + goto fail_syntax; + break; + case OP_EXP: + case OP_CMP: + if (!left_item) + goto fail_syntax; + break; + case OP_NONE: + show_error(error_str, + "Unknown op token %s", token); + ret = TEP_ERRNO__UNKNOWN_TOKEN; + goto fail; + } + + ret = 0; + switch (op_type) { + case OP_BOOL: + arg = create_arg_op(btype); + if (arg == NULL) + goto fail_alloc; + if (current_op) + ret = add_left(arg, current_op); + else + ret = add_left(arg, current_exp); + current_op = arg; + current_exp = NULL; + break; + + case OP_NOT: + arg = create_arg_op(btype); + if (arg == NULL) + goto fail_alloc; + if (current_op) + ret = add_right(current_op, arg, error_str); + if (ret < 0) + goto fail; + current_exp = arg; + ret = process_filter(event, &arg, error_str, 1); + if (ret < 0) + goto fail; + ret = add_right(current_exp, arg, error_str); + if (ret < 0) + goto fail; + break; + + case OP_EXP: + case OP_CMP: + if (op_type == OP_EXP) + arg = create_arg_exp(etype); + else + arg = create_arg_cmp(ctype); + if (arg == NULL) + goto fail_alloc; + + if (current_op) + ret = add_right(current_op, arg, error_str); + if (ret < 0) + goto fail; + ret = add_left(arg, left_item); + if (ret < 0) { + arg = NULL; + goto fail_syntax; + } + current_exp = arg; + break; + default: + break; + } + arg = NULL; + if (ret < 0) + goto fail_syntax; + break; + case EVENT_NONE: + break; + case EVENT_ERROR: + goto fail_alloc; + default: + goto fail_syntax; + } + } while (type != EVENT_NONE); + + if (!current_op && !current_exp) + goto fail_syntax; + + if (!current_op) + current_op = current_exp; + + ret = collapse_tree(current_op, parg, error_str); + /* collapse_tree() may free current_op, and updates parg accordingly */ + current_op = NULL; + if (ret < 0) + goto fail; + + free(token); + return 0; + + fail_alloc: + show_error(error_str, "failed to allocate filter arg"); + ret = TEP_ERRNO__MEM_ALLOC_FAILED; + goto fail; + fail_syntax: + show_error(error_str, "Syntax error"); + ret = TEP_ERRNO__SYNTAX_ERROR; + fail: + free_arg(current_op); + free_arg(current_exp); + free_arg(arg); + free(token); + return ret; +} + +static enum tep_errno +process_event(struct event_format *event, const char *filter_str, + struct filter_arg **parg, char *error_str) +{ + int ret; + + tep_buffer_init(filter_str, strlen(filter_str)); + + ret = process_filter(event, parg, error_str, 0); + if (ret < 0) + return ret; + + /* If parg is NULL, then make it into FALSE */ + if (!*parg) { + *parg = allocate_arg(); + if (*parg == NULL) + return TEP_ERRNO__MEM_ALLOC_FAILED; + + (*parg)->type = FILTER_ARG_BOOLEAN; + (*parg)->boolean.value = FILTER_FALSE; + } + + return 0; +} + +static enum tep_errno +filter_event(struct event_filter *filter, struct event_format *event, + const char *filter_str, char *error_str) +{ + struct filter_type *filter_type; + struct filter_arg *arg; + enum tep_errno ret; + + if (filter_str) { + ret = process_event(event, filter_str, &arg, error_str); + if (ret < 0) + return ret; + + } else { + /* just add a TRUE arg */ + arg = allocate_arg(); + if (arg == NULL) + return TEP_ERRNO__MEM_ALLOC_FAILED; + + arg->type = FILTER_ARG_BOOLEAN; + arg->boolean.value = FILTER_TRUE; + } + + filter_type = add_filter_type(filter, event->id); + if (filter_type == NULL) { + free_arg(arg); + return TEP_ERRNO__MEM_ALLOC_FAILED; + } + + if (filter_type->filter) + free_arg(filter_type->filter); + filter_type->filter = arg; + + return 0; +} + +static void filter_init_error_buf(struct event_filter *filter) +{ + /* clear buffer to reset show error */ + tep_buffer_init("", 0); + filter->error_buffer[0] = '\0'; +} + +/** + * tep_filter_add_filter_str - add a new filter + * @filter: the event filter to add to + * @filter_str: the filter string that contains the filter + * + * Returns 0 if the filter was successfully added or a + * negative error code. Use tep_filter_strerror() to see + * actual error message in case of error. + */ +enum tep_errno tep_filter_add_filter_str(struct event_filter *filter, + const char *filter_str) +{ + struct tep_handle *pevent = filter->pevent; + struct event_list *event; + struct event_list *events = NULL; + const char *filter_start; + const char *next_event; + char *this_event; + char *event_name = NULL; + char *sys_name = NULL; + char *sp; + enum tep_errno rtn = 0; /* TEP_ERRNO__SUCCESS */ + int len; + int ret; + + filter_init_error_buf(filter); + + filter_start = strchr(filter_str, ':'); + if (filter_start) + len = filter_start - filter_str; + else + len = strlen(filter_str); + + do { + next_event = strchr(filter_str, ','); + if (next_event && + (!filter_start || next_event < filter_start)) + len = next_event - filter_str; + else if (filter_start) + len = filter_start - filter_str; + else + len = strlen(filter_str); + + this_event = malloc(len + 1); + if (this_event == NULL) { + /* This can only happen when events is NULL, but still */ + free_events(events); + return TEP_ERRNO__MEM_ALLOC_FAILED; + } + memcpy(this_event, filter_str, len); + this_event[len] = 0; + + if (next_event) + next_event++; + + filter_str = next_event; + + sys_name = strtok_r(this_event, "/", &sp); + event_name = strtok_r(NULL, "/", &sp); + + if (!sys_name) { + /* This can only happen when events is NULL, but still */ + free_events(events); + free(this_event); + return TEP_ERRNO__FILTER_NOT_FOUND; + } + + /* Find this event */ + ret = find_event(pevent, &events, strim(sys_name), strim(event_name)); + if (ret < 0) { + free_events(events); + free(this_event); + return ret; + } + free(this_event); + } while (filter_str); + + /* Skip the ':' */ + if (filter_start) + filter_start++; + + /* filter starts here */ + for (event = events; event; event = event->next) { + ret = filter_event(filter, event->event, filter_start, + filter->error_buffer); + /* Failures are returned if a parse error happened */ + if (ret < 0) + rtn = ret; + + if (ret >= 0 && pevent->test_filters) { + char *test; + test = tep_filter_make_string(filter, event->event->id); + if (test) { + printf(" '%s: %s'\n", event->event->name, test); + free(test); + } + } + } + + free_events(events); + + if (rtn >= 0 && pevent->test_filters) + exit(0); + + return rtn; +} + +static void free_filter_type(struct filter_type *filter_type) +{ + free_arg(filter_type->filter); +} + +/** + * tep_filter_strerror - fill error message in a buffer + * @filter: the event filter contains error + * @err: the error code + * @buf: the buffer to be filled in + * @buflen: the size of the buffer + * + * Returns 0 if message was filled successfully, -1 if error + */ +int tep_filter_strerror(struct event_filter *filter, enum tep_errno err, + char *buf, size_t buflen) +{ + if (err <= __TEP_ERRNO__START || err >= __TEP_ERRNO__END) + return -1; + + if (strlen(filter->error_buffer) > 0) { + size_t len = snprintf(buf, buflen, "%s", filter->error_buffer); + + if (len > buflen) + return -1; + return 0; + } + + return tep_strerror(filter->pevent, err, buf, buflen); +} + +/** + * tep_filter_remove_event - remove a filter for an event + * @filter: the event filter to remove from + * @event_id: the event to remove a filter for + * + * Removes the filter saved for an event defined by @event_id + * from the @filter. + * + * Returns 1: if an event was removed + * 0: if the event was not found + */ +int tep_filter_remove_event(struct event_filter *filter, + int event_id) +{ + struct filter_type *filter_type; + unsigned long len; + + if (!filter->filters) + return 0; + + filter_type = find_filter_type(filter, event_id); + + if (!filter_type) + return 0; + + free_filter_type(filter_type); + + /* The filter_type points into the event_filters array */ + len = (unsigned long)(filter->event_filters + filter->filters) - + (unsigned long)(filter_type + 1); + + memmove(filter_type, filter_type + 1, len); + filter->filters--; + + memset(&filter->event_filters[filter->filters], 0, + sizeof(*filter_type)); + + return 1; +} + +/** + * tep_filter_reset - clear all filters in a filter + * @filter: the event filter to reset + * + * Removes all filters from a filter and resets it. + */ +void tep_filter_reset(struct event_filter *filter) +{ + int i; + + for (i = 0; i < filter->filters; i++) + free_filter_type(&filter->event_filters[i]); + + free(filter->event_filters); + filter->filters = 0; + filter->event_filters = NULL; +} + +void tep_filter_free(struct event_filter *filter) +{ + tep_unref(filter->pevent); + + tep_filter_reset(filter); + + free(filter); +} + +static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg); + +static int copy_filter_type(struct event_filter *filter, + struct event_filter *source, + struct filter_type *filter_type) +{ + struct filter_arg *arg; + struct event_format *event; + const char *sys; + const char *name; + char *str; + + /* Can't assume that the pevent's are the same */ + sys = filter_type->event->system; + name = filter_type->event->name; + event = tep_find_event_by_name(filter->pevent, sys, name); + if (!event) + return -1; + + str = arg_to_str(source, filter_type->filter); + if (!str) + return -1; + + if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) { + /* Add trivial event */ + arg = allocate_arg(); + if (arg == NULL) { + free(str); + return -1; + } + + arg->type = FILTER_ARG_BOOLEAN; + if (strcmp(str, "TRUE") == 0) + arg->boolean.value = 1; + else + arg->boolean.value = 0; + + filter_type = add_filter_type(filter, event->id); + if (filter_type == NULL) { + free(str); + free_arg(arg); + return -1; + } + + filter_type->filter = arg; + + free(str); + return 0; + } + + filter_event(filter, event, str, NULL); + free(str); + + return 0; +} + +/** + * tep_filter_copy - copy a filter using another filter + * @dest - the filter to copy to + * @source - the filter to copy from + * + * Returns 0 on success and -1 if not all filters were copied + */ +int tep_filter_copy(struct event_filter *dest, struct event_filter *source) +{ + int ret = 0; + int i; + + tep_filter_reset(dest); + + for (i = 0; i < source->filters; i++) { + if (copy_filter_type(dest, source, &source->event_filters[i])) + ret = -1; + } + return ret; +} + + +/** + * tep_update_trivial - update the trivial filters with the given filter + * @dest - the filter to update + * @source - the filter as the source of the update + * @type - the type of trivial filter to update. + * + * Scan dest for trivial events matching @type to replace with the source. + * + * Returns 0 on success and -1 if there was a problem updating, but + * events may have still been updated on error. + */ +int tep_update_trivial(struct event_filter *dest, struct event_filter *source, + enum filter_trivial_type type) +{ + struct tep_handle *src_pevent; + struct tep_handle *dest_pevent; + struct event_format *event; + struct filter_type *filter_type; + struct filter_arg *arg; + char *str; + int i; + + src_pevent = source->pevent; + dest_pevent = dest->pevent; + + /* Do nothing if either of the filters has nothing to filter */ + if (!dest->filters || !source->filters) + return 0; + + for (i = 0; i < dest->filters; i++) { + filter_type = &dest->event_filters[i]; + arg = filter_type->filter; + if (arg->type != FILTER_ARG_BOOLEAN) + continue; + if ((arg->boolean.value && type == FILTER_TRIVIAL_FALSE) || + (!arg->boolean.value && type == FILTER_TRIVIAL_TRUE)) + continue; + + event = filter_type->event; + + if (src_pevent != dest_pevent) { + /* do a look up */ + event = tep_find_event_by_name(src_pevent, + event->system, + event->name); + if (!event) + return -1; + } + + str = tep_filter_make_string(source, event->id); + if (!str) + continue; + + /* Don't bother if the filter is trivial too */ + if (strcmp(str, "TRUE") != 0 && strcmp(str, "FALSE") != 0) + filter_event(dest, event, str, NULL); + free(str); + } + return 0; +} + +/** + * tep_filter_clear_trivial - clear TRUE and FALSE filters + * @filter: the filter to remove trivial filters from + * @type: remove only true, false, or both + * + * Removes filters that only contain a TRUE or FALES boolean arg. + * + * Returns 0 on success and -1 if there was a problem. + */ +int tep_filter_clear_trivial(struct event_filter *filter, + enum filter_trivial_type type) +{ + struct filter_type *filter_type; + int count = 0; + int *ids = NULL; + int i; + + if (!filter->filters) + return 0; + + /* + * Two steps, first get all ids with trivial filters. + * then remove those ids. + */ + for (i = 0; i < filter->filters; i++) { + int *new_ids; + + filter_type = &filter->event_filters[i]; + if (filter_type->filter->type != FILTER_ARG_BOOLEAN) + continue; + switch (type) { + case FILTER_TRIVIAL_FALSE: + if (filter_type->filter->boolean.value) + continue; + break; + case FILTER_TRIVIAL_TRUE: + if (!filter_type->filter->boolean.value) + continue; + default: + break; + } + + new_ids = realloc(ids, sizeof(*ids) * (count + 1)); + if (!new_ids) { + free(ids); + return -1; + } + + ids = new_ids; + ids[count++] = filter_type->event_id; + } + + if (!count) + return 0; + + for (i = 0; i < count; i++) + tep_filter_remove_event(filter, ids[i]); + + free(ids); + return 0; +} + +/** + * tep_filter_event_has_trivial - return true event contains trivial filter + * @filter: the filter with the information + * @event_id: the id of the event to test + * @type: trivial type to test for (TRUE, FALSE, EITHER) + * + * Returns 1 if the event contains a matching trivial type + * otherwise 0. + */ +int tep_filter_event_has_trivial(struct event_filter *filter, + int event_id, + enum filter_trivial_type type) +{ + struct filter_type *filter_type; + + if (!filter->filters) + return 0; + + filter_type = find_filter_type(filter, event_id); + + if (!filter_type) + return 0; + + if (filter_type->filter->type != FILTER_ARG_BOOLEAN) + return 0; + + switch (type) { + case FILTER_TRIVIAL_FALSE: + return !filter_type->filter->boolean.value; + + case FILTER_TRIVIAL_TRUE: + return filter_type->filter->boolean.value; + default: + return 1; + } +} + +static int test_filter(struct event_format *event, struct filter_arg *arg, + struct tep_record *record, enum tep_errno *err); + +static const char * +get_comm(struct event_format *event, struct tep_record *record) +{ + const char *comm; + int pid; + + pid = tep_data_pid(event->pevent, record); + comm = tep_data_comm_from_pid(event->pevent, pid); + return comm; +} + +static unsigned long long +get_value(struct event_format *event, + struct format_field *field, struct tep_record *record) +{ + unsigned long long val; + + /* Handle our dummy "comm" field */ + if (field == &comm) { + const char *name; + + name = get_comm(event, record); + return (unsigned long)name; + } + + /* Handle our dummy "cpu" field */ + if (field == &cpu) + return record->cpu; + + tep_read_number_field(field, record->data, &val); + + if (!(field->flags & FIELD_IS_SIGNED)) + return val; + + switch (field->size) { + case 1: + return (char)val; + case 2: + return (short)val; + case 4: + return (int)val; + case 8: + return (long long)val; + } + return val; +} + +static unsigned long long +get_arg_value(struct event_format *event, struct filter_arg *arg, + struct tep_record *record, enum tep_errno *err); + +static unsigned long long +get_exp_value(struct event_format *event, struct filter_arg *arg, + struct tep_record *record, enum tep_errno *err) +{ + unsigned long long lval, rval; + + lval = get_arg_value(event, arg->exp.left, record, err); + rval = get_arg_value(event, arg->exp.right, record, err); + + if (*err) { + /* + * There was an error, no need to process anymore. + */ + return 0; + } + + switch (arg->exp.type) { + case FILTER_EXP_ADD: + return lval + rval; + + case FILTER_EXP_SUB: + return lval - rval; + + case FILTER_EXP_MUL: + return lval * rval; + + case FILTER_EXP_DIV: + return lval / rval; + + case FILTER_EXP_MOD: + return lval % rval; + + case FILTER_EXP_RSHIFT: + return lval >> rval; + + case FILTER_EXP_LSHIFT: + return lval << rval; + + case FILTER_EXP_AND: + return lval & rval; + + case FILTER_EXP_OR: + return lval | rval; + + case FILTER_EXP_XOR: + return lval ^ rval; + + case FILTER_EXP_NOT: + default: + if (!*err) + *err = TEP_ERRNO__INVALID_EXP_TYPE; + } + return 0; +} + +static unsigned long long +get_arg_value(struct event_format *event, struct filter_arg *arg, + struct tep_record *record, enum tep_errno *err) +{ + switch (arg->type) { + case FILTER_ARG_FIELD: + return get_value(event, arg->field.field, record); + + case FILTER_ARG_VALUE: + if (arg->value.type != FILTER_NUMBER) { + if (!*err) + *err = TEP_ERRNO__NOT_A_NUMBER; + } + return arg->value.val; + + case FILTER_ARG_EXP: + return get_exp_value(event, arg, record, err); + + default: + if (!*err) + *err = TEP_ERRNO__INVALID_ARG_TYPE; + } + return 0; +} + +static int test_num(struct event_format *event, struct filter_arg *arg, + struct tep_record *record, enum tep_errno *err) +{ + unsigned long long lval, rval; + + lval = get_arg_value(event, arg->num.left, record, err); + rval = get_arg_value(event, arg->num.right, record, err); + + if (*err) { + /* + * There was an error, no need to process anymore. + */ + return 0; + } + + switch (arg->num.type) { + case FILTER_CMP_EQ: + return lval == rval; + + case FILTER_CMP_NE: + return lval != rval; + + case FILTER_CMP_GT: + return lval > rval; + + case FILTER_CMP_LT: + return lval < rval; + + case FILTER_CMP_GE: + return lval >= rval; + + case FILTER_CMP_LE: + return lval <= rval; + + default: + if (!*err) + *err = TEP_ERRNO__ILLEGAL_INTEGER_CMP; + return 0; + } +} + +static const char *get_field_str(struct filter_arg *arg, struct tep_record *record) +{ + struct event_format *event; + struct tep_handle *pevent; + unsigned long long addr; + const char *val = NULL; + unsigned int size; + char hex[64]; + + /* If the field is not a string convert it */ + if (arg->str.field->flags & FIELD_IS_STRING) { + val = record->data + arg->str.field->offset; + size = arg->str.field->size; + + if (arg->str.field->flags & FIELD_IS_DYNAMIC) { + addr = *(unsigned int *)val; + val = record->data + (addr & 0xffff); + size = addr >> 16; + } + + /* + * We need to copy the data since we can't be sure the field + * is null terminated. + */ + if (*(val + size - 1)) { + /* copy it */ + memcpy(arg->str.buffer, val, arg->str.field->size); + /* the buffer is already NULL terminated */ + val = arg->str.buffer; + } + + } else { + event = arg->str.field->event; + pevent = event->pevent; + addr = get_value(event, arg->str.field, record); + + if (arg->str.field->flags & (FIELD_IS_POINTER | FIELD_IS_LONG)) + /* convert to a kernel symbol */ + val = tep_find_function(pevent, addr); + + if (val == NULL) { + /* just use the hex of the string name */ + snprintf(hex, 64, "0x%llx", addr); + val = hex; + } + } + + return val; +} + +static int test_str(struct event_format *event, struct filter_arg *arg, + struct tep_record *record, enum tep_errno *err) +{ + const char *val; + + if (arg->str.field == &comm) + val = get_comm(event, record); + else + val = get_field_str(arg, record); + + switch (arg->str.type) { + case FILTER_CMP_MATCH: + return strcmp(val, arg->str.val) == 0; + + case FILTER_CMP_NOT_MATCH: + return strcmp(val, arg->str.val) != 0; + + case FILTER_CMP_REGEX: + /* Returns zero on match */ + return !regexec(&arg->str.reg, val, 0, NULL, 0); + + case FILTER_CMP_NOT_REGEX: + return regexec(&arg->str.reg, val, 0, NULL, 0); + + default: + if (!*err) + *err = TEP_ERRNO__ILLEGAL_STRING_CMP; + return 0; + } +} + +static int test_op(struct event_format *event, struct filter_arg *arg, + struct tep_record *record, enum tep_errno *err) +{ + switch (arg->op.type) { + case FILTER_OP_AND: + return test_filter(event, arg->op.left, record, err) && + test_filter(event, arg->op.right, record, err); + + case FILTER_OP_OR: + return test_filter(event, arg->op.left, record, err) || + test_filter(event, arg->op.right, record, err); + + case FILTER_OP_NOT: + return !test_filter(event, arg->op.right, record, err); + + default: + if (!*err) + *err = TEP_ERRNO__INVALID_OP_TYPE; + return 0; + } +} + +static int test_filter(struct event_format *event, struct filter_arg *arg, + struct tep_record *record, enum tep_errno *err) +{ + if (*err) { + /* + * There was an error, no need to process anymore. + */ + return 0; + } + + switch (arg->type) { + case FILTER_ARG_BOOLEAN: + /* easy case */ + return arg->boolean.value; + + case FILTER_ARG_OP: + return test_op(event, arg, record, err); + + case FILTER_ARG_NUM: + return test_num(event, arg, record, err); + + case FILTER_ARG_STR: + return test_str(event, arg, record, err); + + case FILTER_ARG_EXP: + case FILTER_ARG_VALUE: + case FILTER_ARG_FIELD: + /* + * Expressions, fields and values evaluate + * to true if they return non zero + */ + return !!get_arg_value(event, arg, record, err); + + default: + if (!*err) + *err = TEP_ERRNO__INVALID_ARG_TYPE; + return 0; + } +} + +/** + * tep_event_filtered - return true if event has filter + * @filter: filter struct with filter information + * @event_id: event id to test if filter exists + * + * Returns 1 if filter found for @event_id + * otherwise 0; + */ +int tep_event_filtered(struct event_filter *filter, int event_id) +{ + struct filter_type *filter_type; + + if (!filter->filters) + return 0; + + filter_type = find_filter_type(filter, event_id); + + return filter_type ? 1 : 0; +} + +/** + * tep_filter_match - test if a record matches a filter + * @filter: filter struct with filter information + * @record: the record to test against the filter + * + * Returns: match result or error code (prefixed with TEP_ERRNO__) + * FILTER_MATCH - filter found for event and @record matches + * FILTER_MISS - filter found for event and @record does not match + * FILTER_NOT_FOUND - no filter found for @record's event + * NO_FILTER - if no filters exist + * otherwise - error occurred during test + */ +enum tep_errno tep_filter_match(struct event_filter *filter, + struct tep_record *record) +{ + struct tep_handle *pevent = filter->pevent; + struct filter_type *filter_type; + int event_id; + int ret; + enum tep_errno err = 0; + + filter_init_error_buf(filter); + + if (!filter->filters) + return TEP_ERRNO__NO_FILTER; + + event_id = tep_data_type(pevent, record); + + filter_type = find_filter_type(filter, event_id); + if (!filter_type) + return TEP_ERRNO__FILTER_NOT_FOUND; + + ret = test_filter(filter_type->event, filter_type->filter, record, &err); + if (err) + return err; + + return ret ? TEP_ERRNO__FILTER_MATCH : TEP_ERRNO__FILTER_MISS; +} + +static char *op_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *str = NULL; + char *left = NULL; + char *right = NULL; + char *op = NULL; + int left_val = -1; + int right_val = -1; + int val; + + switch (arg->op.type) { + case FILTER_OP_AND: + op = "&&"; + /* fall through */ + case FILTER_OP_OR: + if (!op) + op = "||"; + + left = arg_to_str(filter, arg->op.left); + right = arg_to_str(filter, arg->op.right); + if (!left || !right) + break; + + /* Try to consolidate boolean values */ + if (strcmp(left, "TRUE") == 0) + left_val = 1; + else if (strcmp(left, "FALSE") == 0) + left_val = 0; + + if (strcmp(right, "TRUE") == 0) + right_val = 1; + else if (strcmp(right, "FALSE") == 0) + right_val = 0; + + if (left_val >= 0) { + if ((arg->op.type == FILTER_OP_AND && !left_val) || + (arg->op.type == FILTER_OP_OR && left_val)) { + /* Just return left value */ + str = left; + left = NULL; + break; + } + if (right_val >= 0) { + /* just evaluate this. */ + val = 0; + switch (arg->op.type) { + case FILTER_OP_AND: + val = left_val && right_val; + break; + case FILTER_OP_OR: + val = left_val || right_val; + break; + default: + break; + } + asprintf(&str, val ? "TRUE" : "FALSE"); + break; + } + } + if (right_val >= 0) { + if ((arg->op.type == FILTER_OP_AND && !right_val) || + (arg->op.type == FILTER_OP_OR && right_val)) { + /* Just return right value */ + str = right; + right = NULL; + break; + } + /* The right value is meaningless */ + str = left; + left = NULL; + break; + } + + asprintf(&str, "(%s) %s (%s)", left, op, right); + break; + + case FILTER_OP_NOT: + op = "!"; + right = arg_to_str(filter, arg->op.right); + if (!right) + break; + + /* See if we can consolidate */ + if (strcmp(right, "TRUE") == 0) + right_val = 1; + else if (strcmp(right, "FALSE") == 0) + right_val = 0; + if (right_val >= 0) { + /* just return the opposite */ + asprintf(&str, right_val ? "FALSE" : "TRUE"); + break; + } + asprintf(&str, "%s(%s)", op, right); + break; + + default: + /* ?? */ + break; + } + free(left); + free(right); + return str; +} + +static char *val_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *str = NULL; + + asprintf(&str, "%lld", arg->value.val); + + return str; +} + +static char *field_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + return strdup(arg->field.field->name); +} + +static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *lstr; + char *rstr; + char *op; + char *str = NULL; + + lstr = arg_to_str(filter, arg->exp.left); + rstr = arg_to_str(filter, arg->exp.right); + if (!lstr || !rstr) + goto out; + + switch (arg->exp.type) { + case FILTER_EXP_ADD: + op = "+"; + break; + case FILTER_EXP_SUB: + op = "-"; + break; + case FILTER_EXP_MUL: + op = "*"; + break; + case FILTER_EXP_DIV: + op = "/"; + break; + case FILTER_EXP_MOD: + op = "%"; + break; + case FILTER_EXP_RSHIFT: + op = ">>"; + break; + case FILTER_EXP_LSHIFT: + op = "<<"; + break; + case FILTER_EXP_AND: + op = "&"; + break; + case FILTER_EXP_OR: + op = "|"; + break; + case FILTER_EXP_XOR: + op = "^"; + break; + default: + op = "[ERROR IN EXPRESSION TYPE]"; + break; + } + + asprintf(&str, "%s %s %s", lstr, op, rstr); +out: + free(lstr); + free(rstr); + + return str; +} + +static char *num_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *lstr; + char *rstr; + char *str = NULL; + char *op = NULL; + + lstr = arg_to_str(filter, arg->num.left); + rstr = arg_to_str(filter, arg->num.right); + if (!lstr || !rstr) + goto out; + + switch (arg->num.type) { + case FILTER_CMP_EQ: + op = "=="; + /* fall through */ + case FILTER_CMP_NE: + if (!op) + op = "!="; + /* fall through */ + case FILTER_CMP_GT: + if (!op) + op = ">"; + /* fall through */ + case FILTER_CMP_LT: + if (!op) + op = "<"; + /* fall through */ + case FILTER_CMP_GE: + if (!op) + op = ">="; + /* fall through */ + case FILTER_CMP_LE: + if (!op) + op = "<="; + + asprintf(&str, "%s %s %s", lstr, op, rstr); + break; + + default: + /* ?? */ + break; + } + +out: + free(lstr); + free(rstr); + return str; +} + +static char *str_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *str = NULL; + char *op = NULL; + + switch (arg->str.type) { + case FILTER_CMP_MATCH: + op = "=="; + /* fall through */ + case FILTER_CMP_NOT_MATCH: + if (!op) + op = "!="; + /* fall through */ + case FILTER_CMP_REGEX: + if (!op) + op = "=~"; + /* fall through */ + case FILTER_CMP_NOT_REGEX: + if (!op) + op = "!~"; + + asprintf(&str, "%s %s \"%s\"", + arg->str.field->name, op, arg->str.val); + break; + + default: + /* ?? */ + break; + } + return str; +} + +static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg) +{ + char *str = NULL; + + switch (arg->type) { + case FILTER_ARG_BOOLEAN: + asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE"); + return str; + + case FILTER_ARG_OP: + return op_to_str(filter, arg); + + case FILTER_ARG_NUM: + return num_to_str(filter, arg); + + case FILTER_ARG_STR: + return str_to_str(filter, arg); + + case FILTER_ARG_VALUE: + return val_to_str(filter, arg); + + case FILTER_ARG_FIELD: + return field_to_str(filter, arg); + + case FILTER_ARG_EXP: + return exp_to_str(filter, arg); + + default: + /* ?? */ + return NULL; + } + +} + +/** + * tep_filter_make_string - return a string showing the filter + * @filter: filter struct with filter information + * @event_id: the event id to return the filter string with + * + * Returns a string that displays the filter contents. + * This string must be freed with free(str). + * NULL is returned if no filter is found or allocation failed. + */ +char * +tep_filter_make_string(struct event_filter *filter, int event_id) +{ + struct filter_type *filter_type; + + if (!filter->filters) + return NULL; + + filter_type = find_filter_type(filter, event_id); + + if (!filter_type) + return NULL; + + return arg_to_str(filter, filter_type->filter); +} + +/** + * tep_filter_compare - compare two filters and return if they are the same + * @filter1: Filter to compare with @filter2 + * @filter2: Filter to compare with @filter1 + * + * Returns: + * 1 if the two filters hold the same content. + * 0 if they do not. + */ +int tep_filter_compare(struct event_filter *filter1, struct event_filter *filter2) +{ + struct filter_type *filter_type1; + struct filter_type *filter_type2; + char *str1, *str2; + int result; + int i; + + /* Do the easy checks first */ + if (filter1->filters != filter2->filters) + return 0; + if (!filter1->filters && !filter2->filters) + return 1; + + /* + * Now take a look at each of the events to see if they have the same + * filters to them. + */ + for (i = 0; i < filter1->filters; i++) { + filter_type1 = &filter1->event_filters[i]; + filter_type2 = find_filter_type(filter2, filter_type1->event_id); + if (!filter_type2) + break; + if (filter_type1->filter->type != filter_type2->filter->type) + break; + switch (filter_type1->filter->type) { + case FILTER_TRIVIAL_FALSE: + case FILTER_TRIVIAL_TRUE: + /* trivial types just need the type compared */ + continue; + default: + break; + } + /* The best way to compare complex filters is with strings */ + str1 = arg_to_str(filter1, filter_type1->filter); + str2 = arg_to_str(filter2, filter_type2->filter); + if (str1 && str2) + result = strcmp(str1, str2) != 0; + else + /* bail out if allocation fails */ + result = 1; + + free(str1); + free(str2); + if (result) + break; + } + + if (i < filter1->filters) + return 0; + return 1; +} + diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c new file mode 100644 index 000000000..77e4ec640 --- /dev/null +++ b/tools/lib/traceevent/parse-utils.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <errno.h> + +#define __weak __attribute__((weak)) + +void __vwarning(const char *fmt, va_list ap) +{ + if (errno) + perror("trace-cmd"); + errno = 0; + + fprintf(stderr, " "); + vfprintf(stderr, fmt, ap); + + fprintf(stderr, "\n"); +} + +void __warning(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __vwarning(fmt, ap); + va_end(ap); +} + +void __weak warning(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __vwarning(fmt, ap); + va_end(ap); +} + +void __vpr_stat(const char *fmt, va_list ap) +{ + vprintf(fmt, ap); + printf("\n"); +} + +void __pr_stat(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __vpr_stat(fmt, ap); + va_end(ap); +} + +void __weak vpr_stat(const char *fmt, va_list ap) +{ + __vpr_stat(fmt, ap); +} + +void __weak pr_stat(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + __vpr_stat(fmt, ap); + va_end(ap); +} diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c new file mode 100644 index 000000000..a51b366f4 --- /dev/null +++ b/tools/lib/traceevent/plugin_cfg80211.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <endian.h> +#include "event-parse.h" + +/* + * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5, + * Fedora6. + */ +#ifndef le16toh +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define le16toh(x) (x) +# else +# define le16toh(x) __bswap_16 (x) +# endif +#endif + + +static unsigned long long +process___le16_to_cpup(struct trace_seq *s, unsigned long long *args) +{ + uint16_t *val = (uint16_t *) (unsigned long) args[0]; + return val ? (long long) le16toh(*val) : 0; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +{ + tep_register_print_function(pevent, + process___le16_to_cpup, + TEP_FUNC_ARG_INT, + "__le16_to_cpup", + TEP_FUNC_ARG_PTR, + TEP_FUNC_ARG_VOID); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +{ + tep_unregister_print_function(pevent, process___le16_to_cpup, + "__le16_to_cpup"); +} diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c new file mode 100644 index 000000000..424747475 --- /dev/null +++ b/tools/lib/traceevent/plugin_function.c @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" +#include "event-utils.h" + +static struct func_stack { + int size; + char **stack; +} *fstack; + +static int cpus = -1; + +#define STK_BLK 10 + +struct tep_plugin_option plugin_options[] = +{ + { + .name = "parent", + .plugin_alias = "ftrace", + .description = + "Print parent of functions for function events", + }, + { + .name = "indent", + .plugin_alias = "ftrace", + .description = + "Try to show function call indents, based on parents", + .set = 1, + }, + { + .name = NULL, + } +}; + +static struct tep_plugin_option *ftrace_parent = &plugin_options[0]; +static struct tep_plugin_option *ftrace_indent = &plugin_options[1]; + +static void add_child(struct func_stack *stack, const char *child, int pos) +{ + int i; + + if (!child) + return; + + if (pos < stack->size) + free(stack->stack[pos]); + else { + char **ptr; + + ptr = realloc(stack->stack, sizeof(char *) * + (stack->size + STK_BLK)); + if (!ptr) { + warning("could not allocate plugin memory\n"); + return; + } + + stack->stack = ptr; + + for (i = stack->size; i < stack->size + STK_BLK; i++) + stack->stack[i] = NULL; + stack->size += STK_BLK; + } + + stack->stack[pos] = strdup(child); +} + +static int add_and_get_index(const char *parent, const char *child, int cpu) +{ + int i; + + if (cpu < 0) + return 0; + + if (cpu > cpus) { + struct func_stack *ptr; + + ptr = realloc(fstack, sizeof(*fstack) * (cpu + 1)); + if (!ptr) { + warning("could not allocate plugin memory\n"); + return 0; + } + + fstack = ptr; + + /* Account for holes in the cpu count */ + for (i = cpus + 1; i <= cpu; i++) + memset(&fstack[i], 0, sizeof(fstack[i])); + cpus = cpu; + } + + for (i = 0; i < fstack[cpu].size && fstack[cpu].stack[i]; i++) { + if (strcmp(parent, fstack[cpu].stack[i]) == 0) { + add_child(&fstack[cpu], child, i+1); + return i; + } + } + + /* Not found */ + add_child(&fstack[cpu], parent, 0); + add_child(&fstack[cpu], child, 1); + return 0; +} + +static int function_handler(struct trace_seq *s, struct tep_record *record, + struct event_format *event, void *context) +{ + struct tep_handle *pevent = event->pevent; + unsigned long long function; + unsigned long long pfunction; + const char *func; + const char *parent; + int index = 0; + + if (tep_get_field_val(s, event, "ip", record, &function, 1)) + return trace_seq_putc(s, '!'); + + func = tep_find_function(pevent, function); + + if (tep_get_field_val(s, event, "parent_ip", record, &pfunction, 1)) + return trace_seq_putc(s, '!'); + + parent = tep_find_function(pevent, pfunction); + + if (parent && ftrace_indent->set) + index = add_and_get_index(parent, func, record->cpu); + + trace_seq_printf(s, "%*s", index*3, ""); + + if (func) + trace_seq_printf(s, "%s", func); + else + trace_seq_printf(s, "0x%llx", function); + + if (ftrace_parent->set) { + trace_seq_printf(s, " <-- "); + if (parent) + trace_seq_printf(s, "%s", parent); + else + trace_seq_printf(s, "0x%llx", pfunction); + } + + return 0; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +{ + tep_register_event_handler(pevent, -1, "ftrace", "function", + function_handler, NULL); + + tep_plugin_add_options("ftrace", plugin_options); + + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +{ + int i, x; + + tep_unregister_event_handler(pevent, -1, "ftrace", "function", + function_handler, NULL); + + for (i = 0; i <= cpus; i++) { + for (x = 0; x < fstack[i].size && fstack[i].stack[x]; x++) + free(fstack[i].stack[x]); + free(fstack[i].stack); + } + + tep_plugin_remove_options(plugin_options); + + free(fstack); + fstack = NULL; + cpus = -1; +} diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugin_hrtimer.c new file mode 100644 index 000000000..b43bfec56 --- /dev/null +++ b/tools/lib/traceevent/plugin_hrtimer.c @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +static int timer_expire_handler(struct trace_seq *s, + struct tep_record *record, + struct event_format *event, void *context) +{ + trace_seq_printf(s, "hrtimer="); + + if (tep_print_num_field(s, "0x%llx", event, "timer", + record, 0) == -1) + tep_print_num_field(s, "0x%llx", event, "hrtimer", + record, 1); + + trace_seq_printf(s, " now="); + + tep_print_num_field(s, "%llu", event, "now", record, 1); + + tep_print_func_field(s, " function=%s", event, "function", + record, 0); + return 0; +} + +static int timer_start_handler(struct trace_seq *s, + struct tep_record *record, + struct event_format *event, void *context) +{ + trace_seq_printf(s, "hrtimer="); + + if (tep_print_num_field(s, "0x%llx", event, "timer", + record, 0) == -1) + tep_print_num_field(s, "0x%llx", event, "hrtimer", + record, 1); + + tep_print_func_field(s, " function=%s", event, "function", + record, 0); + + trace_seq_printf(s, " expires="); + tep_print_num_field(s, "%llu", event, "expires", record, 1); + + trace_seq_printf(s, " softexpires="); + tep_print_num_field(s, "%llu", event, "softexpires", record, 1); + return 0; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +{ + tep_register_event_handler(pevent, -1, + "timer", "hrtimer_expire_entry", + timer_expire_handler, NULL); + + tep_register_event_handler(pevent, -1, "timer", "hrtimer_start", + timer_start_handler, NULL); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +{ + tep_unregister_event_handler(pevent, -1, + "timer", "hrtimer_expire_entry", + timer_expire_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "timer", "hrtimer_start", + timer_start_handler, NULL); +} diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugin_jbd2.c new file mode 100644 index 000000000..45a9acd19 --- /dev/null +++ b/tools/lib/traceevent/plugin_jbd2.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) + +static unsigned long long +process_jbd2_dev_to_name(struct trace_seq *s, unsigned long long *args) +{ + unsigned int dev = args[0]; + + trace_seq_printf(s, "%d:%d", MAJOR(dev), MINOR(dev)); + return 0; +} + +static unsigned long long +process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args) +{ + unsigned long long jiffies = args[0]; + + trace_seq_printf(s, "%lld", jiffies); + return jiffies; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +{ + tep_register_print_function(pevent, + process_jbd2_dev_to_name, + TEP_FUNC_ARG_STRING, + "jbd2_dev_to_name", + TEP_FUNC_ARG_INT, + TEP_FUNC_ARG_VOID); + + tep_register_print_function(pevent, + process_jiffies_to_msecs, + TEP_FUNC_ARG_LONG, + "jiffies_to_msecs", + TEP_FUNC_ARG_LONG, + TEP_FUNC_ARG_VOID); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +{ + tep_unregister_print_function(pevent, process_jbd2_dev_to_name, + "jbd2_dev_to_name"); + + tep_unregister_print_function(pevent, process_jiffies_to_msecs, + "jiffies_to_msecs"); +} diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c new file mode 100644 index 000000000..73966b05a --- /dev/null +++ b/tools/lib/traceevent/plugin_kmem.c @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +static int call_site_handler(struct trace_seq *s, struct tep_record *record, + struct event_format *event, void *context) +{ + struct format_field *field; + unsigned long long val, addr; + void *data = record->data; + const char *func; + + field = tep_find_field(event, "call_site"); + if (!field) + return 1; + + if (tep_read_number_field(field, data, &val)) + return 1; + + func = tep_find_function(event->pevent, val); + if (!func) + return 1; + + addr = tep_find_function_address(event->pevent, val); + + trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr)); + return 1; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +{ + tep_register_event_handler(pevent, -1, "kmem", "kfree", + call_site_handler, NULL); + + tep_register_event_handler(pevent, -1, "kmem", "kmalloc", + call_site_handler, NULL); + + tep_register_event_handler(pevent, -1, "kmem", "kmalloc_node", + call_site_handler, NULL); + + tep_register_event_handler(pevent, -1, "kmem", "kmem_cache_alloc", + call_site_handler, NULL); + + tep_register_event_handler(pevent, -1, "kmem", + "kmem_cache_alloc_node", + call_site_handler, NULL); + + tep_register_event_handler(pevent, -1, "kmem", "kmem_cache_free", + call_site_handler, NULL); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +{ + tep_unregister_event_handler(pevent, -1, "kmem", "kfree", + call_site_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "kmem", "kmalloc", + call_site_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "kmem", "kmalloc_node", + call_site_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_alloc", + call_site_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "kmem", + "kmem_cache_alloc_node", + call_site_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_free", + call_site_handler, NULL); +} diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c new file mode 100644 index 000000000..1d0d15906 --- /dev/null +++ b/tools/lib/traceevent/plugin_kvm.c @@ -0,0 +1,522 @@ +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> + +#include "event-parse.h" + +#ifdef HAVE_UDIS86 + +#include <udis86.h> + +static ud_t ud; + +static void init_disassembler(void) +{ + ud_init(&ud); + ud_set_syntax(&ud, UD_SYN_ATT); +} + +static const char *disassemble(unsigned char *insn, int len, uint64_t rip, + int cr0_pe, int eflags_vm, + int cs_d, int cs_l) +{ + int mode; + + if (!cr0_pe) + mode = 16; + else if (eflags_vm) + mode = 16; + else if (cs_l) + mode = 64; + else if (cs_d) + mode = 32; + else + mode = 16; + + ud_set_pc(&ud, rip); + ud_set_mode(&ud, mode); + ud_set_input_buffer(&ud, insn, len); + ud_disassemble(&ud); + return ud_insn_asm(&ud); +} + +#else + +static void init_disassembler(void) +{ +} + +static const char *disassemble(unsigned char *insn, int len, uint64_t rip, + int cr0_pe, int eflags_vm, + int cs_d, int cs_l) +{ + static char out[15*3+1]; + int i; + + for (i = 0; i < len; ++i) + sprintf(out + i * 3, "%02x ", insn[i]); + out[len*3-1] = '\0'; + return out; +} + +#endif + + +#define VMX_EXIT_REASONS \ + _ER(EXCEPTION_NMI, 0) \ + _ER(EXTERNAL_INTERRUPT, 1) \ + _ER(TRIPLE_FAULT, 2) \ + _ER(PENDING_INTERRUPT, 7) \ + _ER(NMI_WINDOW, 8) \ + _ER(TASK_SWITCH, 9) \ + _ER(CPUID, 10) \ + _ER(HLT, 12) \ + _ER(INVD, 13) \ + _ER(INVLPG, 14) \ + _ER(RDPMC, 15) \ + _ER(RDTSC, 16) \ + _ER(VMCALL, 18) \ + _ER(VMCLEAR, 19) \ + _ER(VMLAUNCH, 20) \ + _ER(VMPTRLD, 21) \ + _ER(VMPTRST, 22) \ + _ER(VMREAD, 23) \ + _ER(VMRESUME, 24) \ + _ER(VMWRITE, 25) \ + _ER(VMOFF, 26) \ + _ER(VMON, 27) \ + _ER(CR_ACCESS, 28) \ + _ER(DR_ACCESS, 29) \ + _ER(IO_INSTRUCTION, 30) \ + _ER(MSR_READ, 31) \ + _ER(MSR_WRITE, 32) \ + _ER(MWAIT_INSTRUCTION, 36) \ + _ER(MONITOR_INSTRUCTION, 39) \ + _ER(PAUSE_INSTRUCTION, 40) \ + _ER(MCE_DURING_VMENTRY, 41) \ + _ER(TPR_BELOW_THRESHOLD, 43) \ + _ER(APIC_ACCESS, 44) \ + _ER(EOI_INDUCED, 45) \ + _ER(EPT_VIOLATION, 48) \ + _ER(EPT_MISCONFIG, 49) \ + _ER(INVEPT, 50) \ + _ER(PREEMPTION_TIMER, 52) \ + _ER(WBINVD, 54) \ + _ER(XSETBV, 55) \ + _ER(APIC_WRITE, 56) \ + _ER(INVPCID, 58) \ + _ER(PML_FULL, 62) \ + _ER(XSAVES, 63) \ + _ER(XRSTORS, 64) + +#define SVM_EXIT_REASONS \ + _ER(EXIT_READ_CR0, 0x000) \ + _ER(EXIT_READ_CR3, 0x003) \ + _ER(EXIT_READ_CR4, 0x004) \ + _ER(EXIT_READ_CR8, 0x008) \ + _ER(EXIT_WRITE_CR0, 0x010) \ + _ER(EXIT_WRITE_CR3, 0x013) \ + _ER(EXIT_WRITE_CR4, 0x014) \ + _ER(EXIT_WRITE_CR8, 0x018) \ + _ER(EXIT_READ_DR0, 0x020) \ + _ER(EXIT_READ_DR1, 0x021) \ + _ER(EXIT_READ_DR2, 0x022) \ + _ER(EXIT_READ_DR3, 0x023) \ + _ER(EXIT_READ_DR4, 0x024) \ + _ER(EXIT_READ_DR5, 0x025) \ + _ER(EXIT_READ_DR6, 0x026) \ + _ER(EXIT_READ_DR7, 0x027) \ + _ER(EXIT_WRITE_DR0, 0x030) \ + _ER(EXIT_WRITE_DR1, 0x031) \ + _ER(EXIT_WRITE_DR2, 0x032) \ + _ER(EXIT_WRITE_DR3, 0x033) \ + _ER(EXIT_WRITE_DR4, 0x034) \ + _ER(EXIT_WRITE_DR5, 0x035) \ + _ER(EXIT_WRITE_DR6, 0x036) \ + _ER(EXIT_WRITE_DR7, 0x037) \ + _ER(EXIT_EXCP_BASE, 0x040) \ + _ER(EXIT_INTR, 0x060) \ + _ER(EXIT_NMI, 0x061) \ + _ER(EXIT_SMI, 0x062) \ + _ER(EXIT_INIT, 0x063) \ + _ER(EXIT_VINTR, 0x064) \ + _ER(EXIT_CR0_SEL_WRITE, 0x065) \ + _ER(EXIT_IDTR_READ, 0x066) \ + _ER(EXIT_GDTR_READ, 0x067) \ + _ER(EXIT_LDTR_READ, 0x068) \ + _ER(EXIT_TR_READ, 0x069) \ + _ER(EXIT_IDTR_WRITE, 0x06a) \ + _ER(EXIT_GDTR_WRITE, 0x06b) \ + _ER(EXIT_LDTR_WRITE, 0x06c) \ + _ER(EXIT_TR_WRITE, 0x06d) \ + _ER(EXIT_RDTSC, 0x06e) \ + _ER(EXIT_RDPMC, 0x06f) \ + _ER(EXIT_PUSHF, 0x070) \ + _ER(EXIT_POPF, 0x071) \ + _ER(EXIT_CPUID, 0x072) \ + _ER(EXIT_RSM, 0x073) \ + _ER(EXIT_IRET, 0x074) \ + _ER(EXIT_SWINT, 0x075) \ + _ER(EXIT_INVD, 0x076) \ + _ER(EXIT_PAUSE, 0x077) \ + _ER(EXIT_HLT, 0x078) \ + _ER(EXIT_INVLPG, 0x079) \ + _ER(EXIT_INVLPGA, 0x07a) \ + _ER(EXIT_IOIO, 0x07b) \ + _ER(EXIT_MSR, 0x07c) \ + _ER(EXIT_TASK_SWITCH, 0x07d) \ + _ER(EXIT_FERR_FREEZE, 0x07e) \ + _ER(EXIT_SHUTDOWN, 0x07f) \ + _ER(EXIT_VMRUN, 0x080) \ + _ER(EXIT_VMMCALL, 0x081) \ + _ER(EXIT_VMLOAD, 0x082) \ + _ER(EXIT_VMSAVE, 0x083) \ + _ER(EXIT_STGI, 0x084) \ + _ER(EXIT_CLGI, 0x085) \ + _ER(EXIT_SKINIT, 0x086) \ + _ER(EXIT_RDTSCP, 0x087) \ + _ER(EXIT_ICEBP, 0x088) \ + _ER(EXIT_WBINVD, 0x089) \ + _ER(EXIT_MONITOR, 0x08a) \ + _ER(EXIT_MWAIT, 0x08b) \ + _ER(EXIT_MWAIT_COND, 0x08c) \ + _ER(EXIT_NPF, 0x400) \ + _ER(EXIT_ERR, -1) + +#define _ER(reason, val) { #reason, val }, +struct str_values { + const char *str; + int val; +}; + +static struct str_values vmx_exit_reasons[] = { + VMX_EXIT_REASONS + { NULL, -1} +}; + +static struct str_values svm_exit_reasons[] = { + SVM_EXIT_REASONS + { NULL, -1} +}; + +static struct isa_exit_reasons { + unsigned isa; + struct str_values *strings; +} isa_exit_reasons[] = { + { .isa = 1, .strings = vmx_exit_reasons }, + { .isa = 2, .strings = svm_exit_reasons }, + { } +}; + +static const char *find_exit_reason(unsigned isa, int val) +{ + struct str_values *strings = NULL; + int i; + + for (i = 0; isa_exit_reasons[i].strings; ++i) + if (isa_exit_reasons[i].isa == isa) { + strings = isa_exit_reasons[i].strings; + break; + } + if (!strings) + return "UNKNOWN-ISA"; + for (i = 0; strings[i].val >= 0; i++) + if (strings[i].val == val) + break; + + return strings[i].str; +} + +static int print_exit_reason(struct trace_seq *s, struct tep_record *record, + struct event_format *event, const char *field) +{ + unsigned long long isa; + unsigned long long val; + const char *reason; + + if (tep_get_field_val(s, event, field, record, &val, 1) < 0) + return -1; + + if (tep_get_field_val(s, event, "isa", record, &isa, 0) < 0) + isa = 1; + + reason = find_exit_reason(isa, val); + if (reason) + trace_seq_printf(s, "reason %s", reason); + else + trace_seq_printf(s, "reason UNKNOWN (%llu)", val); + return 0; +} + +static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record, + struct event_format *event, void *context) +{ + unsigned long long info1 = 0, info2 = 0; + + if (print_exit_reason(s, record, event, "exit_reason") < 0) + return -1; + + tep_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1); + + if (tep_get_field_val(s, event, "info1", record, &info1, 0) >= 0 + && tep_get_field_val(s, event, "info2", record, &info2, 0) >= 0) + trace_seq_printf(s, " info %llx %llx", info1, info2); + + return 0; +} + +#define KVM_EMUL_INSN_F_CR0_PE (1 << 0) +#define KVM_EMUL_INSN_F_EFL_VM (1 << 1) +#define KVM_EMUL_INSN_F_CS_D (1 << 2) +#define KVM_EMUL_INSN_F_CS_L (1 << 3) + +static int kvm_emulate_insn_handler(struct trace_seq *s, + struct tep_record *record, + struct event_format *event, void *context) +{ + unsigned long long rip, csbase, len, flags, failed; + int llen; + uint8_t *insn; + const char *disasm; + + if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0) + return -1; + + if (tep_get_field_val(s, event, "csbase", record, &csbase, 1) < 0) + return -1; + + if (tep_get_field_val(s, event, "len", record, &len, 1) < 0) + return -1; + + if (tep_get_field_val(s, event, "flags", record, &flags, 1) < 0) + return -1; + + if (tep_get_field_val(s, event, "failed", record, &failed, 1) < 0) + return -1; + + insn = tep_get_field_raw(s, event, "insn", record, &llen, 1); + if (!insn) + return -1; + + disasm = disassemble(insn, len, rip, + flags & KVM_EMUL_INSN_F_CR0_PE, + flags & KVM_EMUL_INSN_F_EFL_VM, + flags & KVM_EMUL_INSN_F_CS_D, + flags & KVM_EMUL_INSN_F_CS_L); + + trace_seq_printf(s, "%llx:%llx: %s%s", csbase, rip, disasm, + failed ? " FAIL" : ""); + return 0; +} + + +static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record, + struct event_format *event, void *context) +{ + if (print_exit_reason(s, record, event, "exit_code") < 0) + return -1; + + tep_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1); + tep_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1); + tep_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1); + tep_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1); + + return 0; +} + +static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record, + struct event_format *event, void *context) +{ + tep_print_num_field(s, "rip %llx ", event, "rip", record, 1); + + return kvm_nested_vmexit_inject_handler(s, record, event, context); +} + +union kvm_mmu_page_role { + unsigned word; + struct { + unsigned level:4; + unsigned cr4_pae:1; + unsigned quadrant:2; + unsigned direct:1; + unsigned access:3; + unsigned invalid:1; + unsigned nxe:1; + unsigned cr0_wp:1; + unsigned smep_and_not_wp:1; + unsigned smap_and_not_wp:1; + unsigned pad_for_nice_hex_output:8; + unsigned smm:8; + }; +}; + +static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record, + struct event_format *event, void *context) +{ + unsigned long long val; + static const char *access_str[] = { + "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" + }; + union kvm_mmu_page_role role; + + if (tep_get_field_val(s, event, "role", record, &val, 1) < 0) + return -1; + + role.word = (int)val; + + /* + * We can only use the structure if file is of the same + * endianess. + */ + if (tep_is_file_bigendian(event->pevent) == + tep_is_host_bigendian(event->pevent)) { + + trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s", + role.level, + role.quadrant, + role.direct ? " direct" : "", + access_str[role.access], + role.invalid ? " invalid" : "", + role.cr4_pae ? "" : "!", + role.nxe ? "" : "!", + role.cr0_wp ? "" : "!", + role.smep_and_not_wp ? " smep" : "", + role.smap_and_not_wp ? " smap" : "", + role.smm ? " smm" : ""); + } else + trace_seq_printf(s, "WORD: %08x", role.word); + + tep_print_num_field(s, " root %u ", event, + "root_count", record, 1); + + if (tep_get_field_val(s, event, "unsync", record, &val, 1) < 0) + return -1; + + trace_seq_printf(s, "%s%c", val ? "unsync" : "sync", 0); + return 0; +} + +static int kvm_mmu_get_page_handler(struct trace_seq *s, + struct tep_record *record, + struct event_format *event, void *context) +{ + unsigned long long val; + + if (tep_get_field_val(s, event, "created", record, &val, 1) < 0) + return -1; + + trace_seq_printf(s, "%s ", val ? "new" : "existing"); + + if (tep_get_field_val(s, event, "gfn", record, &val, 1) < 0) + return -1; + + trace_seq_printf(s, "sp gfn %llx ", val); + return kvm_mmu_print_role(s, record, event, context); +} + +#define PT_WRITABLE_SHIFT 1 +#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) + +static unsigned long long +process_is_writable_pte(struct trace_seq *s, unsigned long long *args) +{ + unsigned long pte = args[0]; + return pte & PT_WRITABLE_MASK; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +{ + init_disassembler(); + + tep_register_event_handler(pevent, -1, "kvm", "kvm_exit", + kvm_exit_handler, NULL); + + tep_register_event_handler(pevent, -1, "kvm", "kvm_emulate_insn", + kvm_emulate_insn_handler, NULL); + + tep_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit", + kvm_nested_vmexit_handler, NULL); + + tep_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject", + kvm_nested_vmexit_inject_handler, NULL); + + tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page", + kvm_mmu_get_page_handler, NULL); + + tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page", + kvm_mmu_print_role, NULL); + + tep_register_event_handler(pevent, -1, + "kvmmmu", "kvm_mmu_unsync_page", + kvm_mmu_print_role, NULL); + + tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page", + kvm_mmu_print_role, NULL); + + tep_register_event_handler(pevent, -1, "kvmmmu", + "kvm_mmu_prepare_zap_page", kvm_mmu_print_role, + NULL); + + tep_register_print_function(pevent, + process_is_writable_pte, + TEP_FUNC_ARG_INT, + "is_writable_pte", + TEP_FUNC_ARG_LONG, + TEP_FUNC_ARG_VOID); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +{ + tep_unregister_event_handler(pevent, -1, "kvm", "kvm_exit", + kvm_exit_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "kvm", "kvm_emulate_insn", + kvm_emulate_insn_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit", + kvm_nested_vmexit_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject", + kvm_nested_vmexit_inject_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page", + kvm_mmu_get_page_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page", + kvm_mmu_print_role, NULL); + + tep_unregister_event_handler(pevent, -1, + "kvmmmu", "kvm_mmu_unsync_page", + kvm_mmu_print_role, NULL); + + tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page", + kvm_mmu_print_role, NULL); + + tep_unregister_event_handler(pevent, -1, "kvmmmu", + "kvm_mmu_prepare_zap_page", kvm_mmu_print_role, + NULL); + + tep_unregister_print_function(pevent, process_is_writable_pte, + "is_writable_pte"); +} diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugin_mac80211.c new file mode 100644 index 000000000..de50a5316 --- /dev/null +++ b/tools/lib/traceevent/plugin_mac80211.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +#define INDENT 65 + +static void print_string(struct trace_seq *s, struct event_format *event, + const char *name, const void *data) +{ + struct format_field *f = tep_find_field(event, name); + int offset; + int length; + + if (!f) { + trace_seq_printf(s, "NOTFOUND:%s", name); + return; + } + + offset = f->offset; + length = f->size; + + if (!strncmp(f->type, "__data_loc", 10)) { + unsigned long long v; + if (tep_read_number_field(f, data, &v)) { + trace_seq_printf(s, "invalid_data_loc"); + return; + } + offset = v & 0xffff; + length = v >> 16; + } + + trace_seq_printf(s, "%.*s", length, (char *)data + offset); +} + +#define SF(fn) tep_print_num_field(s, fn ":%d", event, fn, record, 0) +#define SFX(fn) tep_print_num_field(s, fn ":%#x", event, fn, record, 0) +#define SP() trace_seq_putc(s, ' ') + +static int drv_bss_info_changed(struct trace_seq *s, + struct tep_record *record, + struct event_format *event, void *context) +{ + void *data = record->data; + + print_string(s, event, "wiphy_name", data); + trace_seq_printf(s, " vif:"); + print_string(s, event, "vif_name", data); + tep_print_num_field(s, "(%d)", event, "vif_type", record, 1); + + trace_seq_printf(s, "\n%*s", INDENT, ""); + SF("assoc"); SP(); + SF("aid"); SP(); + SF("cts"); SP(); + SF("shortpre"); SP(); + SF("shortslot"); SP(); + SF("dtimper"); SP(); + trace_seq_printf(s, "\n%*s", INDENT, ""); + SF("bcnint"); SP(); + SFX("assoc_cap"); SP(); + SFX("basic_rates"); SP(); + SF("enable_beacon"); + trace_seq_printf(s, "\n%*s", INDENT, ""); + SF("ht_operation_mode"); + + return 0; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +{ + tep_register_event_handler(pevent, -1, "mac80211", + "drv_bss_info_changed", + drv_bss_info_changed, NULL); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +{ + tep_unregister_event_handler(pevent, -1, "mac80211", + "drv_bss_info_changed", + drv_bss_info_changed, NULL); +} diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c new file mode 100644 index 000000000..eecb4bd95 --- /dev/null +++ b/tools/lib/traceevent/plugin_sched_switch.c @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "event-parse.h" + +static void write_state(struct trace_seq *s, int val) +{ + const char states[] = "SDTtZXxW"; + int found = 0; + int i; + + for (i = 0; i < (sizeof(states) - 1); i++) { + if (!(val & (1 << i))) + continue; + + if (found) + trace_seq_putc(s, '|'); + + found = 1; + trace_seq_putc(s, states[i]); + } + + if (!found) + trace_seq_putc(s, 'R'); +} + +static void write_and_save_comm(struct format_field *field, + struct tep_record *record, + struct trace_seq *s, int pid) +{ + const char *comm; + int len; + + comm = (char *)(record->data + field->offset); + len = s->len; + trace_seq_printf(s, "%.*s", + field->size, comm); + + /* make sure the comm has a \0 at the end. */ + trace_seq_terminate(s); + comm = &s->buffer[len]; + + /* Help out the comm to ids. This will handle dups */ + tep_register_comm(field->event->pevent, comm, pid); +} + +static int sched_wakeup_handler(struct trace_seq *s, + struct tep_record *record, + struct event_format *event, void *context) +{ + struct format_field *field; + unsigned long long val; + + if (tep_get_field_val(s, event, "pid", record, &val, 1)) + return trace_seq_putc(s, '!'); + + field = tep_find_any_field(event, "comm"); + if (field) { + write_and_save_comm(field, record, s, val); + trace_seq_putc(s, ':'); + } + trace_seq_printf(s, "%lld", val); + + if (tep_get_field_val(s, event, "prio", record, &val, 0) == 0) + trace_seq_printf(s, " [%lld]", val); + + if (tep_get_field_val(s, event, "success", record, &val, 1) == 0) + trace_seq_printf(s, " success=%lld", val); + + if (tep_get_field_val(s, event, "target_cpu", record, &val, 0) == 0) + trace_seq_printf(s, " CPU:%03llu", val); + + return 0; +} + +static int sched_switch_handler(struct trace_seq *s, + struct tep_record *record, + struct event_format *event, void *context) +{ + struct format_field *field; + unsigned long long val; + + if (tep_get_field_val(s, event, "prev_pid", record, &val, 1)) + return trace_seq_putc(s, '!'); + + field = tep_find_any_field(event, "prev_comm"); + if (field) { + write_and_save_comm(field, record, s, val); + trace_seq_putc(s, ':'); + } + trace_seq_printf(s, "%lld ", val); + + if (tep_get_field_val(s, event, "prev_prio", record, &val, 0) == 0) + trace_seq_printf(s, "[%d] ", (int) val); + + if (tep_get_field_val(s, event, "prev_state", record, &val, 0) == 0) + write_state(s, val); + + trace_seq_puts(s, " ==> "); + + if (tep_get_field_val(s, event, "next_pid", record, &val, 1)) + return trace_seq_putc(s, '!'); + + field = tep_find_any_field(event, "next_comm"); + if (field) { + write_and_save_comm(field, record, s, val); + trace_seq_putc(s, ':'); + } + trace_seq_printf(s, "%lld", val); + + if (tep_get_field_val(s, event, "next_prio", record, &val, 0) == 0) + trace_seq_printf(s, " [%d]", (int) val); + + return 0; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +{ + tep_register_event_handler(pevent, -1, "sched", "sched_switch", + sched_switch_handler, NULL); + + tep_register_event_handler(pevent, -1, "sched", "sched_wakeup", + sched_wakeup_handler, NULL); + + tep_register_event_handler(pevent, -1, "sched", "sched_wakeup_new", + sched_wakeup_handler, NULL); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +{ + tep_unregister_event_handler(pevent, -1, "sched", "sched_switch", + sched_switch_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "sched", "sched_wakeup", + sched_wakeup_handler, NULL); + + tep_unregister_event_handler(pevent, -1, "sched", "sched_wakeup_new", + sched_wakeup_handler, NULL); +} diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugin_scsi.c new file mode 100644 index 000000000..5ec346f6b --- /dev/null +++ b/tools/lib/traceevent/plugin_scsi.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include "event-parse.h" + +typedef unsigned long sector_t; +typedef uint64_t u64; +typedef unsigned int u32; + +/* + * SCSI opcodes + */ +#define TEST_UNIT_READY 0x00 +#define REZERO_UNIT 0x01 +#define REQUEST_SENSE 0x03 +#define FORMAT_UNIT 0x04 +#define READ_BLOCK_LIMITS 0x05 +#define REASSIGN_BLOCKS 0x07 +#define INITIALIZE_ELEMENT_STATUS 0x07 +#define READ_6 0x08 +#define WRITE_6 0x0a +#define SEEK_6 0x0b +#define READ_REVERSE 0x0f +#define WRITE_FILEMARKS 0x10 +#define SPACE 0x11 +#define INQUIRY 0x12 +#define RECOVER_BUFFERED_DATA 0x14 +#define MODE_SELECT 0x15 +#define RESERVE 0x16 +#define RELEASE 0x17 +#define COPY 0x18 +#define ERASE 0x19 +#define MODE_SENSE 0x1a +#define START_STOP 0x1b +#define RECEIVE_DIAGNOSTIC 0x1c +#define SEND_DIAGNOSTIC 0x1d +#define ALLOW_MEDIUM_REMOVAL 0x1e + +#define READ_FORMAT_CAPACITIES 0x23 +#define SET_WINDOW 0x24 +#define READ_CAPACITY 0x25 +#define READ_10 0x28 +#define WRITE_10 0x2a +#define SEEK_10 0x2b +#define POSITION_TO_ELEMENT 0x2b +#define WRITE_VERIFY 0x2e +#define VERIFY 0x2f +#define SEARCH_HIGH 0x30 +#define SEARCH_EQUAL 0x31 +#define SEARCH_LOW 0x32 +#define SET_LIMITS 0x33 +#define PRE_FETCH 0x34 +#define READ_POSITION 0x34 +#define SYNCHRONIZE_CACHE 0x35 +#define LOCK_UNLOCK_CACHE 0x36 +#define READ_DEFECT_DATA 0x37 +#define MEDIUM_SCAN 0x38 +#define COMPARE 0x39 +#define COPY_VERIFY 0x3a +#define WRITE_BUFFER 0x3b +#define READ_BUFFER 0x3c +#define UPDATE_BLOCK 0x3d +#define READ_LONG 0x3e +#define WRITE_LONG 0x3f +#define CHANGE_DEFINITION 0x40 +#define WRITE_SAME 0x41 +#define UNMAP 0x42 +#define READ_TOC 0x43 +#define READ_HEADER 0x44 +#define GET_EVENT_STATUS_NOTIFICATION 0x4a +#define LOG_SELECT 0x4c +#define LOG_SENSE 0x4d +#define XDWRITEREAD_10 0x53 +#define MODE_SELECT_10 0x55 +#define RESERVE_10 0x56 +#define RELEASE_10 0x57 +#define MODE_SENSE_10 0x5a +#define PERSISTENT_RESERVE_IN 0x5e +#define PERSISTENT_RESERVE_OUT 0x5f +#define VARIABLE_LENGTH_CMD 0x7f +#define REPORT_LUNS 0xa0 +#define SECURITY_PROTOCOL_IN 0xa2 +#define MAINTENANCE_IN 0xa3 +#define MAINTENANCE_OUT 0xa4 +#define MOVE_MEDIUM 0xa5 +#define EXCHANGE_MEDIUM 0xa6 +#define READ_12 0xa8 +#define SERVICE_ACTION_OUT_12 0xa9 +#define WRITE_12 0xaa +#define SERVICE_ACTION_IN_12 0xab +#define WRITE_VERIFY_12 0xae +#define VERIFY_12 0xaf +#define SEARCH_HIGH_12 0xb0 +#define SEARCH_EQUAL_12 0xb1 +#define SEARCH_LOW_12 0xb2 +#define SECURITY_PROTOCOL_OUT 0xb5 +#define READ_ELEMENT_STATUS 0xb8 +#define SEND_VOLUME_TAG 0xb6 +#define WRITE_LONG_2 0xea +#define EXTENDED_COPY 0x83 +#define RECEIVE_COPY_RESULTS 0x84 +#define ACCESS_CONTROL_IN 0x86 +#define ACCESS_CONTROL_OUT 0x87 +#define READ_16 0x88 +#define WRITE_16 0x8a +#define READ_ATTRIBUTE 0x8c +#define WRITE_ATTRIBUTE 0x8d +#define VERIFY_16 0x8f +#define SYNCHRONIZE_CACHE_16 0x91 +#define WRITE_SAME_16 0x93 +#define SERVICE_ACTION_BIDIRECTIONAL 0x9d +#define SERVICE_ACTION_IN_16 0x9e +#define SERVICE_ACTION_OUT_16 0x9f +/* values for service action in */ +#define SAI_READ_CAPACITY_16 0x10 +#define SAI_GET_LBA_STATUS 0x12 +/* values for VARIABLE_LENGTH_CMD service action codes + * see spc4r17 Section D.3.5, table D.7 and D.8 */ +#define VLC_SA_RECEIVE_CREDENTIAL 0x1800 +/* values for maintenance in */ +#define MI_REPORT_IDENTIFYING_INFORMATION 0x05 +#define MI_REPORT_TARGET_PGS 0x0a +#define MI_REPORT_ALIASES 0x0b +#define MI_REPORT_SUPPORTED_OPERATION_CODES 0x0c +#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS 0x0d +#define MI_REPORT_PRIORITY 0x0e +#define MI_REPORT_TIMESTAMP 0x0f +#define MI_MANAGEMENT_PROTOCOL_IN 0x10 +/* value for MI_REPORT_TARGET_PGS ext header */ +#define MI_EXT_HDR_PARAM_FMT 0x20 +/* values for maintenance out */ +#define MO_SET_IDENTIFYING_INFORMATION 0x06 +#define MO_SET_TARGET_PGS 0x0a +#define MO_CHANGE_ALIASES 0x0b +#define MO_SET_PRIORITY 0x0e +#define MO_SET_TIMESTAMP 0x0f +#define MO_MANAGEMENT_PROTOCOL_OUT 0x10 +/* values for variable length command */ +#define XDREAD_32 0x03 +#define XDWRITE_32 0x04 +#define XPWRITE_32 0x06 +#define XDWRITEREAD_32 0x07 +#define READ_32 0x09 +#define VERIFY_32 0x0a +#define WRITE_32 0x0b +#define WRITE_SAME_32 0x0d + +#define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f) +#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9]) + +static const char * +scsi_trace_misc(struct trace_seq *, unsigned char *, int); + +static const char * +scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + sector_t lba = 0, txlen = 0; + + lba |= ((cdb[1] & 0x1F) << 16); + lba |= (cdb[2] << 8); + lba |= cdb[3]; + txlen = cdb[4]; + + trace_seq_printf(p, "lba=%llu txlen=%llu", + (unsigned long long)lba, (unsigned long long)txlen); + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + sector_t lba = 0, txlen = 0; + + lba |= (cdb[2] << 24); + lba |= (cdb[3] << 16); + lba |= (cdb[4] << 8); + lba |= cdb[5]; + txlen |= (cdb[7] << 8); + txlen |= cdb[8]; + + trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", + (unsigned long long)lba, (unsigned long long)txlen, + cdb[1] >> 5); + + if (cdb[0] == WRITE_SAME) + trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1); + + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + sector_t lba = 0, txlen = 0; + + lba |= (cdb[2] << 24); + lba |= (cdb[3] << 16); + lba |= (cdb[4] << 8); + lba |= cdb[5]; + txlen |= (cdb[6] << 24); + txlen |= (cdb[7] << 16); + txlen |= (cdb[8] << 8); + txlen |= cdb[9]; + + trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", + (unsigned long long)lba, (unsigned long long)txlen, + cdb[1] >> 5); + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + sector_t lba = 0, txlen = 0; + + lba |= ((u64)cdb[2] << 56); + lba |= ((u64)cdb[3] << 48); + lba |= ((u64)cdb[4] << 40); + lba |= ((u64)cdb[5] << 32); + lba |= (cdb[6] << 24); + lba |= (cdb[7] << 16); + lba |= (cdb[8] << 8); + lba |= cdb[9]; + txlen |= (cdb[10] << 24); + txlen |= (cdb[11] << 16); + txlen |= (cdb[12] << 8); + txlen |= cdb[13]; + + trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", + (unsigned long long)lba, (unsigned long long)txlen, + cdb[1] >> 5); + + if (cdb[0] == WRITE_SAME_16) + trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1); + + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len, *cmd; + sector_t lba = 0, txlen = 0; + u32 ei_lbrt = 0; + + switch (SERVICE_ACTION32(cdb)) { + case READ_32: + cmd = "READ"; + break; + case VERIFY_32: + cmd = "VERIFY"; + break; + case WRITE_32: + cmd = "WRITE"; + break; + case WRITE_SAME_32: + cmd = "WRITE_SAME"; + break; + default: + trace_seq_printf(p, "UNKNOWN"); + goto out; + } + + lba |= ((u64)cdb[12] << 56); + lba |= ((u64)cdb[13] << 48); + lba |= ((u64)cdb[14] << 40); + lba |= ((u64)cdb[15] << 32); + lba |= (cdb[16] << 24); + lba |= (cdb[17] << 16); + lba |= (cdb[18] << 8); + lba |= cdb[19]; + ei_lbrt |= (cdb[20] << 24); + ei_lbrt |= (cdb[21] << 16); + ei_lbrt |= (cdb[22] << 8); + ei_lbrt |= cdb[23]; + txlen |= (cdb[28] << 24); + txlen |= (cdb[29] << 16); + txlen |= (cdb[30] << 8); + txlen |= cdb[31]; + + trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u", + cmd, (unsigned long long)lba, + (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt); + + if (SERVICE_ACTION32(cdb) == WRITE_SAME_32) + trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1); + +out: + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + unsigned int regions = cdb[7] << 8 | cdb[8]; + + trace_seq_printf(p, "regions=%u", (regions - 8) / 16); + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len, *cmd; + sector_t lba = 0; + u32 alloc_len = 0; + + switch (SERVICE_ACTION16(cdb)) { + case SAI_READ_CAPACITY_16: + cmd = "READ_CAPACITY_16"; + break; + case SAI_GET_LBA_STATUS: + cmd = "GET_LBA_STATUS"; + break; + default: + trace_seq_printf(p, "UNKNOWN"); + goto out; + } + + lba |= ((u64)cdb[2] << 56); + lba |= ((u64)cdb[3] << 48); + lba |= ((u64)cdb[4] << 40); + lba |= ((u64)cdb[5] << 32); + lba |= (cdb[6] << 24); + lba |= (cdb[7] << 16); + lba |= (cdb[8] << 8); + lba |= cdb[9]; + alloc_len |= (cdb[10] << 24); + alloc_len |= (cdb[11] << 16); + alloc_len |= (cdb[12] << 8); + alloc_len |= cdb[13]; + + trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd, + (unsigned long long)lba, alloc_len); + +out: + trace_seq_putc(p, 0); + return ret; +} + +static const char * +scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len) +{ + switch (SERVICE_ACTION32(cdb)) { + case READ_32: + case VERIFY_32: + case WRITE_32: + case WRITE_SAME_32: + return scsi_trace_rw32(p, cdb, len); + default: + return scsi_trace_misc(p, cdb, len); + } +} + +static const char * +scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len) +{ + const char *ret = p->buffer + p->len; + + trace_seq_printf(p, "-"); + trace_seq_putc(p, 0); + return ret; +} + +const char * +scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len) +{ + switch (cdb[0]) { + case READ_6: + case WRITE_6: + return scsi_trace_rw6(p, cdb, len); + case READ_10: + case VERIFY: + case WRITE_10: + case WRITE_SAME: + return scsi_trace_rw10(p, cdb, len); + case READ_12: + case VERIFY_12: + case WRITE_12: + return scsi_trace_rw12(p, cdb, len); + case READ_16: + case VERIFY_16: + case WRITE_16: + case WRITE_SAME_16: + return scsi_trace_rw16(p, cdb, len); + case UNMAP: + return scsi_trace_unmap(p, cdb, len); + case SERVICE_ACTION_IN_16: + return scsi_trace_service_action_in(p, cdb, len); + case VARIABLE_LENGTH_CMD: + return scsi_trace_varlen(p, cdb, len); + default: + return scsi_trace_misc(p, cdb, len); + } +} + +unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s, + unsigned long long *args) +{ + scsi_trace_parse_cdb(s, (unsigned char *) (unsigned long) args[1], args[2]); + return 0; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +{ + tep_register_print_function(pevent, + process_scsi_trace_parse_cdb, + TEP_FUNC_ARG_STRING, + "scsi_trace_parse_cdb", + TEP_FUNC_ARG_PTR, + TEP_FUNC_ARG_PTR, + TEP_FUNC_ARG_INT, + TEP_FUNC_ARG_VOID); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +{ + tep_unregister_print_function(pevent, process_scsi_trace_parse_cdb, + "scsi_trace_parse_cdb"); +} diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugin_xen.c new file mode 100644 index 000000000..b2acbd6e9 --- /dev/null +++ b/tools/lib/traceevent/plugin_xen.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "event-parse.h" + +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op_compat 6 +#define __HYPERVISOR_dom0_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_acm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_tmem_op 38 + +/* Architecture-specific hypercall definitions. */ +#define __HYPERVISOR_arch_0 48 +#define __HYPERVISOR_arch_1 49 +#define __HYPERVISOR_arch_2 50 +#define __HYPERVISOR_arch_3 51 +#define __HYPERVISOR_arch_4 52 +#define __HYPERVISOR_arch_5 53 +#define __HYPERVISOR_arch_6 54 +#define __HYPERVISOR_arch_7 55 + +#define N(x) [__HYPERVISOR_##x] = "("#x")" +static const char *xen_hypercall_names[] = { + N(set_trap_table), + N(mmu_update), + N(set_gdt), + N(stack_switch), + N(set_callbacks), + N(fpu_taskswitch), + N(sched_op_compat), + N(dom0_op), + N(set_debugreg), + N(get_debugreg), + N(update_descriptor), + N(memory_op), + N(multicall), + N(update_va_mapping), + N(set_timer_op), + N(event_channel_op_compat), + N(xen_version), + N(console_io), + N(physdev_op_compat), + N(grant_table_op), + N(vm_assist), + N(update_va_mapping_otherdomain), + N(iret), + N(vcpu_op), + N(set_segment_base), + N(mmuext_op), + N(acm_op), + N(nmi_op), + N(sched_op), + N(callback_op), + N(xenoprof_op), + N(event_channel_op), + N(physdev_op), + N(hvm_op), + +/* Architecture-specific hypercall definitions. */ + N(arch_0), + N(arch_1), + N(arch_2), + N(arch_3), + N(arch_4), + N(arch_5), + N(arch_6), + N(arch_7), +}; +#undef N + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +static const char *xen_hypercall_name(unsigned op) +{ + if (op < ARRAY_SIZE(xen_hypercall_names) && + xen_hypercall_names[op] != NULL) + return xen_hypercall_names[op]; + + return ""; +} + +unsigned long long process_xen_hypercall_name(struct trace_seq *s, + unsigned long long *args) +{ + unsigned int op = args[0]; + + trace_seq_printf(s, "%s", xen_hypercall_name(op)); + return 0; +} + +int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +{ + tep_register_print_function(pevent, + process_xen_hypercall_name, + TEP_FUNC_ARG_STRING, + "xen_hypercall_name", + TEP_FUNC_ARG_INT, + TEP_FUNC_ARG_VOID); + return 0; +} + +void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +{ + tep_unregister_print_function(pevent, process_xen_hypercall_name, + "xen_hypercall_name"); +} diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c new file mode 100644 index 000000000..e3bac4543 --- /dev/null +++ b/tools/lib/traceevent/trace-seq.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> + +#include <asm/bug.h> +#include "event-parse.h" +#include "event-utils.h" + +/* + * The TRACE_SEQ_POISON is to catch the use of using + * a trace_seq structure after it was destroyed. + */ +#define TRACE_SEQ_POISON ((void *)0xdeadbeef) +#define TRACE_SEQ_CHECK(s) \ +do { \ + if (WARN_ONCE((s)->buffer == TRACE_SEQ_POISON, \ + "Usage of trace_seq after it was destroyed")) \ + (s)->state = TRACE_SEQ__BUFFER_POISONED; \ +} while (0) + +#define TRACE_SEQ_CHECK_RET_N(s, n) \ +do { \ + TRACE_SEQ_CHECK(s); \ + if ((s)->state != TRACE_SEQ__GOOD) \ + return n; \ +} while (0) + +#define TRACE_SEQ_CHECK_RET(s) TRACE_SEQ_CHECK_RET_N(s, ) +#define TRACE_SEQ_CHECK_RET0(s) TRACE_SEQ_CHECK_RET_N(s, 0) + +/** + * trace_seq_init - initialize the trace_seq structure + * @s: a pointer to the trace_seq structure to initialize + */ +void trace_seq_init(struct trace_seq *s) +{ + s->len = 0; + s->readpos = 0; + s->buffer_size = TRACE_SEQ_BUF_SIZE; + s->buffer = malloc(s->buffer_size); + if (s->buffer != NULL) + s->state = TRACE_SEQ__GOOD; + else + s->state = TRACE_SEQ__MEM_ALLOC_FAILED; +} + +/** + * trace_seq_reset - re-initialize the trace_seq structure + * @s: a pointer to the trace_seq structure to reset + */ +void trace_seq_reset(struct trace_seq *s) +{ + if (!s) + return; + TRACE_SEQ_CHECK(s); + s->len = 0; + s->readpos = 0; +} + +/** + * trace_seq_destroy - free up memory of a trace_seq + * @s: a pointer to the trace_seq to free the buffer + * + * Only frees the buffer, not the trace_seq struct itself. + */ +void trace_seq_destroy(struct trace_seq *s) +{ + if (!s) + return; + TRACE_SEQ_CHECK_RET(s); + free(s->buffer); + s->buffer = TRACE_SEQ_POISON; +} + +static void expand_buffer(struct trace_seq *s) +{ + char *buf; + + buf = realloc(s->buffer, s->buffer_size + TRACE_SEQ_BUF_SIZE); + if (WARN_ONCE(!buf, "Can't allocate trace_seq buffer memory")) { + s->state = TRACE_SEQ__MEM_ALLOC_FAILED; + return; + } + + s->buffer = buf; + s->buffer_size += TRACE_SEQ_BUF_SIZE; +} + +/** + * trace_seq_printf - sequence printing of trace information + * @s: trace sequence descriptor + * @fmt: printf format string + * + * It returns 0 if the trace oversizes the buffer's free + * space, 1 otherwise. + * + * The tracer may use either sequence operations or its own + * copy to user routines. To simplify formating of a trace + * trace_seq_printf is used to store strings into a special + * buffer (@s). Then the output may be either used by + * the sequencer or pulled into another buffer. + */ +int +trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +{ + va_list ap; + int len; + int ret; + + try_again: + TRACE_SEQ_CHECK_RET0(s); + + len = (s->buffer_size - 1) - s->len; + + va_start(ap, fmt); + ret = vsnprintf(s->buffer + s->len, len, fmt, ap); + va_end(ap); + + if (ret >= len) { + expand_buffer(s); + goto try_again; + } + + s->len += ret; + + return 1; +} + +/** + * trace_seq_vprintf - sequence printing of trace information + * @s: trace sequence descriptor + * @fmt: printf format string + * + * The tracer may use either sequence operations or its own + * copy to user routines. To simplify formating of a trace + * trace_seq_printf is used to store strings into a special + * buffer (@s). Then the output may be either used by + * the sequencer or pulled into another buffer. + */ +int +trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) +{ + int len; + int ret; + + try_again: + TRACE_SEQ_CHECK_RET0(s); + + len = (s->buffer_size - 1) - s->len; + + ret = vsnprintf(s->buffer + s->len, len, fmt, args); + + if (ret >= len) { + expand_buffer(s); + goto try_again; + } + + s->len += ret; + + return len; +} + +/** + * trace_seq_puts - trace sequence printing of simple string + * @s: trace sequence descriptor + * @str: simple string to record + * + * The tracer may use either the sequence operations or its own + * copy to user routines. This function records a simple string + * into a special buffer (@s) for later retrieval by a sequencer + * or other mechanism. + */ +int trace_seq_puts(struct trace_seq *s, const char *str) +{ + int len; + + TRACE_SEQ_CHECK_RET0(s); + + len = strlen(str); + + while (len > ((s->buffer_size - 1) - s->len)) + expand_buffer(s); + + TRACE_SEQ_CHECK_RET0(s); + + memcpy(s->buffer + s->len, str, len); + s->len += len; + + return len; +} + +int trace_seq_putc(struct trace_seq *s, unsigned char c) +{ + TRACE_SEQ_CHECK_RET0(s); + + while (s->len >= (s->buffer_size - 1)) + expand_buffer(s); + + TRACE_SEQ_CHECK_RET0(s); + + s->buffer[s->len++] = c; + + return 1; +} + +void trace_seq_terminate(struct trace_seq *s) +{ + TRACE_SEQ_CHECK_RET(s); + + /* There's always one character left on the buffer */ + s->buffer[s->len] = 0; +} + +int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp) +{ + TRACE_SEQ_CHECK(s); + + switch (s->state) { + case TRACE_SEQ__GOOD: + return fprintf(fp, "%.*s", s->len, s->buffer); + case TRACE_SEQ__BUFFER_POISONED: + fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed"); + break; + case TRACE_SEQ__MEM_ALLOC_FAILED: + fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory"); + break; + } + return -1; +} + +int trace_seq_do_printf(struct trace_seq *s) +{ + return trace_seq_do_fprintf(s, stdout); +} diff --git a/tools/lib/vsprintf.c b/tools/lib/vsprintf.c new file mode 100644 index 000000000..e08ee147e --- /dev/null +++ b/tools/lib/vsprintf.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/types.h> +#include <linux/kernel.h> +#include <stdio.h> + +int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int i = vsnprintf(buf, size, fmt, args); + ssize_t ssize = size; + + return (i >= ssize) ? (ssize - 1) : i; +} + +int scnprintf(char * buf, size_t size, const char * fmt, ...) +{ + ssize_t ssize = size; + va_list args; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, size, fmt, args); + va_end(args); + + return (i >= ssize) ? (ssize - 1) : i; +} |