diff options
Diffstat (limited to '')
-rw-r--r-- | src/perf/perf.cpp | 266 | ||||
-rw-r--r-- | src/perf/perf.h | 76 | ||||
-rw-r--r-- | src/perf/perf_bundle.cpp | 348 | ||||
-rw-r--r-- | src/perf/perf_bundle.h | 59 | ||||
-rw-r--r-- | src/perf/perf_event.h | 910 |
5 files changed, 1659 insertions, 0 deletions
diff --git a/src/perf/perf.cpp b/src/perf/perf.cpp new file mode 100644 index 0000000..9ed0ba8 --- /dev/null +++ b/src/perf/perf.cpp @@ -0,0 +1,266 @@ +/* + * Copyright 2010, Intel Corporation + * + * This file is part of PowerTOP + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc, + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + * or just google for it. + * + * Authors: + * Arjan van de Ven <arjan@linux.intel.com> + */ + +#include <iostream> +#include <fstream> + +#include <errno.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> + +#include <fcntl.h> + +#include "perf_event.h" +#include "perf.h" +#include "../lib.h" +#include "../display.h" + +struct pevent *perf_event::pevent; + +static int get_trace_type(const char *eventname) +{ + string str; + int this_trace; + + str = read_sysfs_string("/sys/kernel/debug/tracing/events/%s/id", + eventname); + if (str.length() < 1) + return -1; + + this_trace = strtoull(str.c_str(), NULL, 10); + return this_trace; +} + +static inline int sys_perf_event_open(struct perf_event_attr *attr, + pid_t pid, int cpu, int group_fd, + unsigned long flags) +{ + attr->size = sizeof(*attr); + return syscall(__NR_perf_event_open, attr, pid, cpu, + group_fd, flags); +} + +void perf_event::create_perf_event(char *eventname, int _cpu) +{ + struct perf_event_attr attr; + int ret; + int err; + + struct { + __u64 count; + __u64 time_enabled; + __u64 time_running; + __u64 id; + } read_data; + + if (perf_fd != -1) + clear(); + + memset(&attr, 0, sizeof(attr)); + + attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING | + PERF_FORMAT_ID; + + attr.sample_freq = 0; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_RAW | PERF_SAMPLE_CPU | PERF_SAMPLE_TIME; + + attr.mmap = 1; + attr.comm = 1; + attr.inherit = 0; + attr.disabled = 1; + + attr.type = PERF_TYPE_TRACEPOINT; + attr.config = trace_type; + + if (attr.config <= 0) + return; + + perf_fd = sys_perf_event_open(&attr, -1, _cpu, -1, 0); + + if (perf_fd < 0) { + err = errno; + reset_display(); + if (err == EMFILE) + fprintf(stderr, _("Too many open files, please increase the limit of open file descriptors.\n")); + else { + fprintf(stderr, _("PowerTOP %s needs the kernel to support the 'perf' subsystem\n"), PACKAGE_VERSION); + fprintf(stderr, _("as well as support for trace points in the kernel:\n")); + fprintf(stderr, "CONFIG_PERF_EVENTS=y\nCONFIG_PERF_COUNTERS=y\nCONFIG_TRACEPOINTS=y\nCONFIG_TRACING=y\n"); + } + exit(EXIT_FAILURE); + } + if (read(perf_fd, &read_data, sizeof(read_data)) == -1) { + reset_display(); + perror("Unable to read perf file descriptor\n"); + exit(-1); + } + + fcntl(perf_fd, F_SETFL, O_NONBLOCK); + + perf_mmap = mmap(NULL, (bufsize+1)*getpagesize(), + PROT_READ | PROT_WRITE, MAP_SHARED, perf_fd, 0); + if (perf_mmap == MAP_FAILED) { + fprintf(stderr, "failed to mmap with %d (%s)\n", errno, strerror(errno)); + return; + } + + ret = ioctl(perf_fd, PERF_EVENT_IOC_ENABLE, 0); + + if (ret < 0) { + fprintf(stderr, "failed to enable perf \n"); + } + + pc = (perf_event_mmap_page *)perf_mmap; + data_mmap = (unsigned char *)perf_mmap + getpagesize(); + + +} + +void perf_event::set_event_name(const char *event_name) +{ + free(name); + name = strdup(event_name); + if (!name) { + fprintf(stderr, "failed to allocate event name\n"); + return; + } + + char *c; + + c = strchr(name, ':'); + if (c) + *c = '/'; + + trace_type = get_trace_type(name); +} + +perf_event::~perf_event(void) +{ + free(name); + + if (perf_event::pevent->ref_count == 1) { + pevent_free(perf_event::pevent); + perf_event::pevent = NULL; + clear(); + } else + pevent_unref(perf_event::pevent); +} + +void perf_event::set_cpu(int _cpu) +{ + cpu = _cpu; +} + +static void allocate_pevent(void) +{ + if (!perf_event::pevent) + perf_event::pevent = pevent_alloc(); + else + pevent_ref(perf_event::pevent); +} + +perf_event::perf_event(const char *event_name, int _cpu, int buffer_size) +{ + allocate_pevent(); + name = NULL; + perf_fd = -1; + bufsize = buffer_size; + cpu = _cpu; + perf_mmap = NULL; + trace_type = 0; + set_event_name(event_name); +} + +perf_event::perf_event(void) +{ + allocate_pevent(); + name = NULL; + perf_fd = -1; + bufsize = 128; + perf_mmap = NULL; + cpu = 0; + trace_type = 0; +} + +void perf_event::start(void) +{ + create_perf_event(name, cpu); +} + +void perf_event::stop(void) +{ + int ret; + ret = ioctl(perf_fd, PERF_EVENT_IOC_DISABLE, 0); + if (ret) + cout << "stop failing\n"; +} + +void perf_event::process(void *cookie) +{ + struct perf_event_header *header; + + if (perf_fd < 0) + return; + + while (pc->data_tail != pc->data_head ) { + while (pc->data_tail >= (unsigned int)bufsize * getpagesize()) + pc->data_tail -= bufsize * getpagesize(); + + header = (struct perf_event_header *)( (unsigned char *)data_mmap + pc->data_tail); + + if (header->size == 0) + break; + + pc->data_tail += header->size; + + while (pc->data_tail >= (unsigned int)bufsize * getpagesize()) + pc->data_tail -= bufsize * getpagesize(); + + if (header->type == PERF_RECORD_SAMPLE) + handle_event(header, cookie); + } + pc->data_tail = pc->data_head; +} + +void perf_event::clear(void) +{ + if (perf_mmap) { +// memset(perf_mmap, 0, (bufsize)*getpagesize()); + munmap(perf_mmap, (bufsize+1)*getpagesize()); + perf_mmap = NULL; + } + if (perf_fd != -1) + close(perf_fd); + perf_fd = -1; +} diff --git a/src/perf/perf.h b/src/perf/perf.h new file mode 100644 index 0000000..ee072ae --- /dev/null +++ b/src/perf/perf.h @@ -0,0 +1,76 @@ +/* + * Copyright 2010, Intel Corporation + * + * This file is part of PowerTOP + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc, + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + * or just google for it. + * + * Authors: + * Arjan van de Ven <arjan@linux.intel.com> + */ +#ifndef _INCLUDE_GUARD_PERF_H_ +#define _INCLUDE_GUARD_PERF_H_ + +#include <iostream> + + +extern "C" { + #include "../traceevent/event-parse.h" +} + + +using namespace std; + +class perf_event { +protected: + int perf_fd; + void * perf_mmap; + void * data_mmap; + struct perf_event_mmap_page *pc; + + + + int bufsize; + char *name; + int cpu; + void create_perf_event(char *eventname, int cpu); + +public: + unsigned int trace_type; + + perf_event(void); + perf_event(const char *event_name, int cpu = 0, int buffer_size = 128); + + virtual ~perf_event(void); + + + void set_event_name(const char *event_name); + void set_cpu(int cpu); + + void start(void); + void stop(void); + void clear(void); + + void process(void *cookie); + + virtual void handle_event(struct perf_event_header *header, void *cookie) { }; + + static struct pevent *pevent; + +}; + +#endif diff --git a/src/perf/perf_bundle.cpp b/src/perf/perf_bundle.cpp new file mode 100644 index 0000000..3d216ff --- /dev/null +++ b/src/perf/perf_bundle.cpp @@ -0,0 +1,348 @@ +/* + * Copyright 2010, Intel Corporation + * + * This file is part of PowerTOP + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc, + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + * or just google for it. + * + * Authors: + * Arjan van de Ven <arjan@linux.intel.com> + */ +#include <iostream> +#include <malloc.h> +#include <algorithm> +#include <string.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> + +#include "perf_bundle.h" +#include "perf_event.h" +#include "perf.h" + +#include "../cpu/cpu.h" + +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L) +# define USE_DECLTYPE +#endif + +class perf_bundle_event: public perf_event +{ +public: + perf_bundle_event(void); + virtual void handle_event(struct perf_event_header *header, void *cookie); +}; + +perf_bundle_event::perf_bundle_event(void) : perf_event() +{ +} + + +void perf_bundle_event::handle_event(struct perf_event_header *header, void *cookie) +{ + unsigned char *buffer; + vector<void *> *vector; + + buffer = (unsigned char *)malloc(header->size); + memcpy(buffer, header, header->size); + +#ifdef USE_DECLTYPE + vector = (decltype(vector))cookie; +#else + vector = (typeof(vector))cookie; +#endif + vector->push_back(buffer); +} + + +void perf_bundle::release(void) +{ + class perf_event *ev; + unsigned int i = 0; + + for (i = 0; i < events.size(); i++) { + ev = events[i]; + if (!ev) + continue; + ev->clear(); + delete ev; + } + events.clear(); + + for (i = 0; i < event_names.size(); i++) { + free((void*)event_names[i]); + } + event_names.clear(); + + for(i = 0; i < records.size(); i++) { + free(records[i]); + } + records.clear(); +} + +static char * read_file(const char *file) +{ + char *buffer = NULL; /* quient gcc */ + char buf[4096]; + int len = 0; + int fd; + int r; + + fd = open(file, O_RDONLY); + if (fd < 0) + exit(-1); + + while((r = read(fd, buf, 4096)) > 0) { + if (len) { + char *tmp = (char *)realloc(buffer, len + r + 1); + if (!tmp) + free(buffer); + buffer = tmp; + } else + buffer = (char *)malloc(r + 1); + if (!buffer) + goto out; + memcpy(buffer + len, buf, r); + len += r; + buffer[len] = '\0'; + } +out: + close(fd); + return buffer; +} + +static void parse_event_format(const char *event_name) +{ + char *tptr; + char *name = strdup(event_name); + char *sys = strtok_r(name, ":", &tptr); + char *event = strtok_r(NULL, ":", &tptr); + char *file; + char *buf; + + file = (char *)malloc(strlen(sys) + strlen(event) + + strlen("/sys/kernel/debug/tracing/events////format") + 2); + sprintf(file, "/sys/kernel/debug/tracing/events/%s/%s/format", sys, event); + + buf = read_file(file); + free(file); + if (!buf) { + free(name); + return; + } + + pevent_parse_event(perf_event::pevent, buf, strlen(buf), sys); + free(name); + free(buf); +} + +bool perf_bundle::add_event(const char *event_name) +{ + unsigned int i; + int event_added = false; + class perf_event *ev; + + + for (i = 0; i < all_cpus.size(); i++) { + + if (!all_cpus[i]) + continue; + + ev = new class perf_bundle_event(); + + ev->set_event_name(event_name); + ev->set_cpu(i); + + if ((int)ev->trace_type >= 0) { + if (event_names.find(ev->trace_type) == event_names.end()) { + event_names[ev->trace_type] = strdup(event_name); + parse_event_format(event_name); + } + events.push_back(ev); + event_added = true; + } else { + delete ev; + } + } + return event_added; +} + +void perf_bundle::start(void) +{ + unsigned int i; + class perf_event *ev; + + for (i = 0; i < events.size(); i++) { + ev = events[i]; + if (!ev) + continue; + ev->start(); + } +} +void perf_bundle::stop(void) +{ + unsigned int i; + class perf_event *ev; + + for (i = 0; i < events.size(); i++) { + ev = events[i]; + if (!ev) + continue; + ev->stop(); + } +} +void perf_bundle::clear(void) +{ + unsigned int i; + + class perf_event *ev; + + for (i = 0; i < events.size(); i++) { + ev = events[i]; + if (!ev) + continue; + ev->clear(); + } + + for (i = 0; i < records.size(); i++) { + free(records[i]); + } + records.resize(0); +} + + +struct trace_entry { + uint64_t time; + uint32_t cpu; + uint32_t res; + __u32 size; +} __attribute__((packed));; + + +struct perf_sample { + struct perf_event_header header; + struct trace_entry trace; + unsigned char data[0]; +} __attribute__((packed)); + +static uint64_t timestamp(perf_event_header *event) +{ + struct perf_sample *sample; + + if (event->type != PERF_RECORD_SAMPLE) + return 0; + + sample = (struct perf_sample *)event; + +#if 0 + int i; + unsigned char *x; + + printf("header:\n"); + printf(" type is %x \n", sample->header.type); + printf(" misc is %x \n", sample->header.misc); + printf(" size is %i \n", sample->header.size); + printf("sample:\n"); + printf(" time is %llx \n", sample->trace.time); + printf(" cpu is %i / %x \n", sample->trace.cpu, sample->trace.cpu); + printf(" res is %i / %x \n", sample->trace.res, sample->trace.res); + printf(" size is %i / %x \n", sample->trace.size, sample->trace.size); + printf(" type is %i / %x \n", sample->trace.type, sample->trace.type); + printf(" flags is %i / %x \n", sample->trace.flags, sample->trace.flags); + printf(" p/c is %i / %x \n", sample->trace.preempt_count, sample->trace.preempt_count); + printf(" pid is %i / %x \n", sample->trace.pid, sample->trace.pid); + printf(" lock dept is %i / %x \n", sample->trace.lock_depth, sample->trace.lock_depth); + + x = (unsigned char *)sample; + for (i = 0; i < sample->header.size; i++) + printf("%02x ", *(x+i)); + printf("\n"); +#endif + return sample->trace.time; + +} + +static bool event_sort_function (void *i, void *j) +{ + struct perf_event_header *I, *J; + + I = (struct perf_event_header *) i; + J = (struct perf_event_header *) j; + return (timestamp(I)<timestamp(J)); +} + +/* + * sample's PERF_SAMPLE_CPU cpu nr is a raw_smp_processor_id() by the + * time of perf_event_output(), which may differ from struct perf_event + * cpu, thus we need to fix sample->trace.cpu. + */ +static void fixup_sample_trace_cpu(struct perf_sample *sample) +{ + struct event_format *event; + struct pevent_record rec; + unsigned long long cpu_nr; + int type; + int ret; + + rec.data = &sample->data; + type = pevent_data_type(perf_event::pevent, &rec); + event = pevent_find_event(perf_event::pevent, type); + if (!event) + return; + /** don't touch trace if event does not contain cpu_id field*/ + ret = pevent_get_field_val(NULL, event, "cpu_id", &rec, &cpu_nr, 0); + if (ret < 0) + return; + sample->trace.cpu = cpu_nr; +} + +void perf_bundle::process(void) +{ + unsigned int i; + class perf_event *ev; + + /* fixme: reserve enough space in the array in one go */ + for (i = 0; i < events.size(); i++) { + ev = events[i]; + if (!ev) + continue; + ev->process(&records); + } + sort(records.begin(), records.end(), event_sort_function); + + for (i = 0; i < records.size(); i++) { + struct perf_sample *sample; + + sample = (struct perf_sample *)records[i]; + if (!sample) + continue; + + if (sample->header.type != PERF_RECORD_SAMPLE) + continue; + + fixup_sample_trace_cpu(sample); + handle_trace_point(&sample->data, sample->trace.cpu, sample->trace.time); + } +} + +void perf_bundle::handle_trace_point(void *trace, int cpu, uint64_t time) +{ + printf("UH OH... abstract handle_trace_point called\n"); +} diff --git a/src/perf/perf_bundle.h b/src/perf/perf_bundle.h new file mode 100644 index 0000000..ec50744 --- /dev/null +++ b/src/perf/perf_bundle.h @@ -0,0 +1,59 @@ +/* + * Copyright 2010, Intel Corporation + * + * This file is part of PowerTOP + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc, + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + * or just google for it. + * + * Authors: + * Arjan van de Ven <arjan@linux.intel.com> + */ +#ifndef _INCLUDE_GUARD_PERF_BUNDLE_H_ +#define _INCLUDE_GUARD_PERF_BUNDLE_H_ + +#include <iostream> +#include <vector> +#include <map> + +using namespace std; + +#include "perf.h" +class perf_event; + + +class perf_bundle { +protected: + vector<class perf_event *> events; + std::map<int, char*> event_names; +public: + vector<void *> records; + virtual ~perf_bundle() {}; + + virtual void release(void); + bool add_event(const char *event_name); + + void start(void); + void stop(void); + void clear(void); + + void process(void); + + virtual void handle_trace_point(void *trace, int cpu = 0, uint64_t time = 0); +}; + + +#endif diff --git a/src/perf/perf_event.h b/src/perf/perf_event.h new file mode 100644 index 0000000..92a38b8 --- /dev/null +++ b/src/perf/perf_event.h @@ -0,0 +1,910 @@ +/* + * Copyright 2010, Intel Corporation + * + * This file is part of PowerTOP + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc, + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + * or just google for it. + * + * Authors: + * Arjan van de Ven <arjan@linux.intel.com> + */ +/* + * Performance events: + * + * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> + * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_PERF_EVENT_H +#define _LINUX_PERF_EVENT_H + +#include <linux/types.h> +#include <linux/ioctl.h> +#include <asm/byteorder.h> +#include <sys/syscall.h> + + +/* + * User-space ABI bits: + */ + +/* + * attr.type + */ +enum perf_type_id { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, + + PERF_TYPE_MAX, /* non-ABI */ +}; + +/* + * Generalized performance event event_id types, used by the + * attr.event_id parameter of the sys_perf_event_open() + * syscall: + */ +enum perf_hw_id { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + + PERF_COUNT_HW_MAX, /* non-ABI */ +}; + +/* + * Generalized hardware cache events: + * + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x + * { read, write, prefetch } x + * { accesses, misses } + */ +enum perf_hw_cache_id { + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_LL = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + + PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, + + PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, + PERF_COUNT_HW_CACHE_RESULT_MISS = 1, + + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ +}; + +/* + * Special "software" events provided by the kernel, even if the hardware + * does not support performance events. These events measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): + */ +enum perf_sw_ids { + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + + PERF_COUNT_SW_MAX, /* non-ABI */ +}; + +/* + * Bits that can be set in attr.sample_type to request information + * in the overflow packets. + */ +enum perf_event_sample_format { + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_READ = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_ID = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, + PERF_SAMPLE_STREAM_ID = 1U << 9, + PERF_SAMPLE_RAW = 1U << 10, + + PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ +}; + +/* + * The format of the data returned by read() on a perf event fd, + * as specified by attr.read_format: + * + * struct read_format { + * { u64 value; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 id; } && PERF_FORMAT_ID + * } && !PERF_FORMAT_GROUP + * + * { u64 nr; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 value; + * { u64 id; } && PERF_FORMAT_ID + * } cntr[nr]; + * } && PERF_FORMAT_GROUP + * }; + */ +enum perf_event_read_format { + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, + PERF_FORMAT_GROUP = 1U << 3, + + PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ +}; + +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + +/* + * Hardware event_id to monitor via a performance monitoring event: + */ +struct perf_event_attr { + + /* + * Major type: hardware/software/tracepoint/etc. + */ + __u32 type; + + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; + + /* + * Type specific configuration information. + */ + __u64 config; + + union { + __u64 sample_period; + __u64 sample_freq; + }; + + __u64 sample_type; + __u64 read_format; + + __u64 disabled : 1, /* off by default */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + mmap : 1, /* include mmap data */ + comm : 1, /* include comm data */ + freq : 1, /* use freq, not period */ + inherit_stat : 1, /* per task counts */ + enable_on_exec : 1, /* next exec enables */ + task : 1, /* trace fork/exit */ + watermark : 1, /* wakeup_watermark */ + + __reserved_1 : 49; + + union { + __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_watermark; /* bytes before wakeup */ + }; + __u32 __reserved_2; + + __u64 __reserved_3; +}; + +/* + * Ioctls that can be done on a perf event fd: + */ +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) + +enum perf_event_ioc_flags { + PERF_IOC_FLAG_GROUP = 1U << 0, +}; + +/* + * Structure of the page that can be mapped via mmap + */ +struct perf_event_mmap_page { + __u32 version; /* version number of this structure */ + __u32 compat_version; /* lowest version this is compat with */ + + /* + * Bits needed to read the hw events in user-space. + * + * u32 seq; + * s64 count; + * + * do { + * seq = pc->lock; + * + * barrier() + * if (pc->index) { + * count = pmc_read(pc->index - 1); + * count += pc->offset; + * } else + * goto regular_read; + * + * barrier(); + * } while (pc->lock != seq); + * + * NOTE: for obvious reason this only works on self-monitoring + * processes. + */ + __u32 lock; /* seqlock for synchronization */ + __u32 index; /* hardware event identifier */ + __s64 offset; /* add to hardware event value */ + __u64 time_enabled; /* time event active */ + __u64 time_running; /* time event on cpu */ + + /* + * Hole for extension of the self monitor capabilities + */ + + __u64 __reserved[123]; /* align to 1k */ + + /* + * Control data for the mmap() data buffer. + * + * User-space reading the @data_head value should issue an rmb(), on + * SMP capable platforms, after reading this value -- see + * perf_event_wakeup(). + * + * When the mapping is PROT_WRITE the @data_tail value should be + * written by userspace to reflect the last read data. In this case + * the kernel will not over-write unread data. + */ + __u64 data_head; /* head in the data section */ + __u64 data_tail; /* user-space written tail */ +}; + +#define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0) +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_RECORD_MISC_KERNEL (1 << 0) +#define PERF_RECORD_MISC_USER (2 << 0) +#define PERF_RECORD_MISC_HYPERVISOR (3 << 0) + +struct perf_event_header { + __u32 type; + __u16 misc; + __u16 size; +}; + +enum perf_event_type { + + /* + * The MMAP events record the PROT_EXEC mappings so that we can + * correlate userspace IPs to code. They have the following structure: + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; + * }; + */ + PERF_RECORD_MMAP = 1, + + /* + * struct { + * struct perf_event_header header; + * u64 id; + * u64 lost; + * }; + */ + PERF_RECORD_LOST = 2, + + /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * char comm[]; + * }; + */ + PERF_RECORD_COMM = 3, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * u64 time; + * }; + */ + PERF_RECORD_EXIT = 4, + + /* + * struct { + * struct perf_event_header header; + * u64 time; + * u64 id; + * u64 stream_id; + * }; + */ + PERF_RECORD_THROTTLE = 5, + PERF_RECORD_UNTHROTTLE = 6, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * u64 time; + * }; + */ + PERF_RECORD_FORK = 7, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, tid; + * + * struct read_format values; + * }; + */ + PERF_RECORD_READ = 8, + + /* + * struct { + * struct perf_event_header header; + * + * { u64 ip; } && PERF_SAMPLE_IP + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 addr; } && PERF_SAMPLE_ADDR + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 period; } && PERF_SAMPLE_PERIOD + * + * { struct read_format values; } && PERF_SAMPLE_READ + * + * { u64 nr, + * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN + * + * # + * # The RAW record below is opaque data wrt the ABI + * # + * # That is, the ABI doesn't make any promises wrt to + * # the stability of its content, it may vary depending + * # on event, hardware, kernel version and phase of + * # the moon. + * # + * # In other words, PERF_SAMPLE_RAW contents are not an ABI. + * # + * + * { u32 size; + * char data[size];}&& PERF_SAMPLE_RAW + * }; + */ + PERF_RECORD_SAMPLE = 9, + + PERF_RECORD_MAX, /* non-ABI */ +}; + +enum perf_callchain_context { + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, + + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, + + PERF_CONTEXT_MAX = (__u64)-4095, +}; + +#define PERF_FLAG_FD_NO_GROUP (1U << 0) +#define PERF_FLAG_FD_OUTPUT (1U << 1) + +#ifdef __KERNEL__ +/* + * Kernel-internal data types and definitions: + */ + +#ifdef CONFIG_PERF_EVENTS +# include <asm/perf_event.h> +#endif + +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/spinlock.h> +#include <linux/hrtimer.h> +#include <linux/fs.h> +#include <linux/pid_namespace.h> +#include <linux/workqueue.h> +#include <asm/atomic.h> + +#define PERF_MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + __u64 nr; + __u64 ip[PERF_MAX_STACK_DEPTH]; +}; + +struct perf_raw_record { + u32 size; + void *data; +}; + +struct task_struct; + +/** + * struct hw_perf_event - performance event hardware details: + */ +struct hw_perf_event { +#ifdef CONFIG_PERF_EVENTS + union { + struct { /* hardware */ + u64 config; + unsigned long config_base; + unsigned long event_base; + int idx; + }; + struct { /* software */ + s64 remaining; + struct hrtimer hrtimer; + }; + }; + atomic64_t prev_count; + u64 sample_period; + u64 last_period; + atomic64_t period_left; + u64 interrupts; + + u64 freq_count; + u64 freq_interrupts; + u64 freq_stamp; +#endif +}; + +struct perf_event; + +/** + * struct pmu - generic performance monitoring unit + */ +struct pmu { + int (*enable) (struct perf_event *event); + void (*disable) (struct perf_event *event); + void (*read) (struct perf_event *event); + void (*unthrottle) (struct perf_event *event); +}; + +/** + * enum perf_event_active_state - the states of a event + */ +enum perf_event_active_state { + PERF_EVENT_STATE_ERROR = -2, + PERF_EVENT_STATE_OFF = -1, + PERF_EVENT_STATE_INACTIVE = 0, + PERF_EVENT_STATE_ACTIVE = 1, +}; + +struct file; + +struct perf_mmap_data { + struct rcu_head rcu_head; +#ifdef CONFIG_PERF_USE_VMALLOC + struct work_struct work; +#endif + int data_order; + int nr_pages; /* nr of data pages */ + int writable; /* are we writable */ + int nr_locked; /* nr pages mlocked */ + + atomic_t poll; /* POLL_ for wakeups */ + atomic_t events; /* event_id limit */ + + atomic_long_t head; /* write position */ + atomic_long_t done_head; /* completed head */ + + atomic_t lock; /* concurrent writes */ + atomic_t wakeup; /* needs a wakeup */ + atomic_t lost; /* nr records lost */ + + long watermark; /* wakeup watermark */ + + struct perf_event_mmap_page *user_page; + void *data_pages[0]; +}; + +struct perf_pending_entry { + struct perf_pending_entry *next; + void (*func)(struct perf_pending_entry *); +}; + +/** + * struct perf_event - performance event kernel representation: + */ +struct perf_event { +#ifdef CONFIG_PERF_EVENTS + struct list_head group_entry; + struct list_head event_entry; + struct list_head sibling_list; + int nr_siblings; + struct perf_event *group_leader; + struct perf_event *output; + const struct pmu *pmu; + + enum perf_event_active_state state; + atomic64_t count; + + /* + * These are the total time in nanoseconds that the event + * has been enabled (i.e. eligible to run, and the task has + * been scheduled in, if this is a per-task event) + * and running (scheduled onto the CPU), respectively. + * + * They are computed from tstamp_enabled, tstamp_running and + * tstamp_stopped when the event is in INACTIVE or ACTIVE state. + */ + u64 total_time_enabled; + u64 total_time_running; + + /* + * These are timestamps used for computing total_time_enabled + * and total_time_running when the event is in INACTIVE or + * ACTIVE state, measured in nanoseconds from an arbitrary point + * in time. + * tstamp_enabled: the notional time when the event was enabled + * tstamp_running: the notional time when the event was scheduled on + * tstamp_stopped: in INACTIVE state, the notional time when the + * event was scheduled off. + */ + u64 tstamp_enabled; + u64 tstamp_running; + u64 tstamp_stopped; + + struct perf_event_attr attr; + struct hw_perf_event hw; + + struct perf_event_context *ctx; + struct file *filp; + + /* + * These accumulate total time (in nanoseconds) that children + * events have been enabled and running, respectively. + */ + atomic64_t child_total_time_enabled; + atomic64_t child_total_time_running; + + /* + * Protect attach/detach and child_list: + */ + struct mutex child_mutex; + struct list_head child_list; + struct perf_event *parent; + + int oncpu; + int cpu; + + struct list_head owner_entry; + struct task_struct *owner; + + /* mmap bits */ + struct mutex mmap_mutex; + atomic_t mmap_count; + struct perf_mmap_data *data; + + /* poll related */ + wait_queue_head_t waitq; + struct fasync_struct *fasync; + + /* delayed work for NMIs and such */ + int pending_wakeup; + int pending_kill; + int pending_disable; + struct perf_pending_entry pending; + + atomic_t event_limit; + + void (*destroy)(struct perf_event *); + struct rcu_head rcu_head; + + struct pid_namespace *ns; + u64 id; +#endif +}; + +/** + * struct perf_event_context - event context structure + * + * Used as a container for task events and CPU events as well: + */ +struct perf_event_context { + /* + * Protect the states of the events in the list, + * nr_active, and the list: + */ + spinlock_t lock; + /* + * Protect the list of events. Locking either mutex or lock + * is sufficient to ensure the list doesn't change; to change + * the list you need to lock both the mutex and the spinlock. + */ + struct mutex mutex; + + struct list_head group_list; + struct list_head event_list; + int nr_events; + int nr_active; + int is_active; + int nr_stat; + atomic_t refcount; + struct task_struct *task; + + /* + * Context clock, runs when context enabled. + */ + u64 time; + u64 timestamp; + + /* + * These fields let us detect when two contexts have both + * been cloned (inherited) from a common ancestor. + */ + struct perf_event_context *parent_ctx; + u64 parent_gen; + u64 generation; + int pin_count; + struct rcu_head rcu_head; +}; + +/** + * struct perf_event_cpu_context - per cpu event context structure + */ +struct perf_cpu_context { + struct perf_event_context ctx; + struct perf_event_context *task_ctx; + int active_oncpu; + int max_pertask; + int exclusive; + + /* + * Recursion avoidance: + * + * task, softirq, irq, nmi context + */ + int recursion[4]; +}; + +struct perf_output_handle { + struct perf_event *event; + struct perf_mmap_data *data; + unsigned long head; + unsigned long offset; + int nmi; + int sample; + int locked; + unsigned long flags; +}; + +#ifdef CONFIG_PERF_EVENTS + +/* + * Set by architecture code: + */ +extern int perf_max_events; + +extern const struct pmu *hw_perf_event_init(struct perf_event *event); + +extern void perf_event_task_sched_in(struct task_struct *task, int cpu); +extern void perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu); +extern void perf_event_task_tick(struct task_struct *task, int cpu); +extern int perf_event_init_task(struct task_struct *child); +extern void perf_event_exit_task(struct task_struct *child); +extern void perf_event_free_task(struct task_struct *task); +extern void set_perf_event_pending(void); +extern void perf_event_do_pending(void); +extern void perf_event_print_debug(void); +extern void __perf_disable(void); +extern bool __perf_enable(void); +extern void perf_disable(void); +extern void perf_enable(void); +extern int perf_event_task_disable(void); +extern int perf_event_task_enable(void); +extern int hw_perf_group_sched_in(struct perf_event *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu); +extern void perf_event_update_userpage(struct perf_event *event); + +struct perf_sample_data { + u64 type; + + u64 ip; + struct { + u32 pid; + u32 tid; + } tid_entry; + u64 time; + u64 addr; + u64 id; + u64 stream_id; + struct { + u32 cpu; + u32 reserved; + } cpu_entry; + u64 period; + struct perf_callchain_entry *callchain; + struct perf_raw_record *raw; +}; + +extern void perf_output_sample(struct perf_output_handle *handle, + struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event); +extern void perf_prepare_sample(struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event, + struct pt_regs *regs); + +extern int perf_event_overflow(struct perf_event *event, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs); + +/* + * Return 1 for a software event, 0 for a hardware event + */ +static inline int is_software_event(struct perf_event *event) +{ + return (event->attr.type != PERF_TYPE_RAW) && + (event->attr.type != PERF_TYPE_HARDWARE) && + (event->attr.type != PERF_TYPE_HW_CACHE); +} + +extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; + +extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); + +static inline void +perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) +{ + if (atomic_read(&perf_swevent_enabled[event_id])) + __perf_sw_event(event_id, nr, nmi, regs, addr); +} + +extern void __perf_event_mmap(struct vm_area_struct *vma); + +static inline void perf_event_mmap(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_EXEC) + __perf_event_mmap(vma); +} + +extern void perf_event_comm(struct task_struct *tsk); +extern void perf_event_fork(struct task_struct *tsk); + +extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); + +extern int sysctl_perf_event_paranoid; +extern int sysctl_perf_event_mlock; +extern int sysctl_perf_event_sample_rate; + +extern void perf_event_init(void); +extern void perf_tp_event(int event_id, u64 addr, u64 count, + void *record, int entry_size); + +#ifndef perf_misc_flags +#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ + PERF_RECORD_MISC_KERNEL) +#define perf_instruction_pointer(regs) instruction_pointer(regs) +#endif + +extern int perf_output_begin(struct perf_output_handle *handle, + struct perf_event *event, unsigned int size, + int nmi, int sample); +extern void perf_output_end(struct perf_output_handle *handle); +extern void perf_output_copy(struct perf_output_handle *handle, + const void *buf, unsigned int len); +#else +static inline void +perf_event_task_sched_in(struct task_struct *task, int cpu) { } +static inline void +perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu) { } +static inline void +perf_event_task_tick(struct task_struct *task, int cpu) { } +static inline int perf_event_init_task(struct task_struct *child) { return 0; } +static inline void perf_event_exit_task(struct task_struct *child) { } +static inline void perf_event_free_task(struct task_struct *task) { } +static inline void perf_event_do_pending(void) { } +static inline void perf_event_print_debug(void) { } +static inline void perf_disable(void) { } +static inline void perf_enable(void) { } +static inline int perf_event_task_disable(void) { return -EINVAL; } +static inline int perf_event_task_enable(void) { return -EINVAL; } + +static inline void +perf_sw_event(u32 event_id, u64 nr, int nmi, + struct pt_regs *regs, u64 addr) { } + +static inline void perf_event_mmap(struct vm_area_struct *vma) { } +static inline void perf_event_comm(struct task_struct *tsk) { } +static inline void perf_event_fork(struct task_struct *tsk) { } +static inline void perf_event_init(void) { } + +#endif + +#define perf_output_put(handle, x) \ + perf_output_copy((handle), &(x), sizeof(x)) + +#endif /* __KERNEL__ */ + + +#if 0 +/* + * trace_flag_type is an enumeration that holds different + * states when a trace occurs. These are: + * IRQS_OFF - interrupts were disabled + * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags + * NEED_RESCED - reschedule is requested + * HARDIRQ - inside an interrupt handler + * SOFTIRQ - inside a softirq handler + */ +enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_IRQS_NOSUPPORT = 0x02, + TRACE_FLAG_NEED_RESCHED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, +}; +#endif + +#endif /* _LINUX_PERF_EVENT_H */ |