397 lines
10 KiB
C
397 lines
10 KiB
C
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
|
|
/*
|
|
* Copyright 2013-2019 IBM Corp.
|
|
*/
|
|
|
|
#include <config.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <sched.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <unistd.h>
|
|
#include <stdio.h>
|
|
#include <stdbool.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
|
|
#include <skiboot-valgrind.h>
|
|
|
|
/* Don't include these: PPC-specific */
|
|
#define __CPU_H
|
|
#define __TIME_H
|
|
#define __PROCESSOR_H
|
|
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
/* This is more than a lwsync, but it'll work */
|
|
static void full_barrier(void)
|
|
{
|
|
asm volatile("mfence" : : : "memory");
|
|
}
|
|
#define lwsync full_barrier
|
|
#elif defined(__powerpc__) || defined(__powerpc64__)
|
|
static inline void lwsync(void)
|
|
{
|
|
asm volatile("lwsync" : : : "memory");
|
|
}
|
|
#else
|
|
#error "Define lwsync for this arch"
|
|
#endif
|
|
|
|
#define zalloc(size) calloc((size), 1)
|
|
|
|
struct cpu_thread {
|
|
uint32_t pir;
|
|
uint32_t chip_id;
|
|
struct trace_info *trace;
|
|
uint32_t server_no;
|
|
bool is_secondary;
|
|
struct cpu_thread *primary;
|
|
};
|
|
static struct cpu_thread *this_cpu(void);
|
|
|
|
#define CPUS 4
|
|
|
|
static struct cpu_thread fake_cpus[CPUS];
|
|
|
|
static inline struct cpu_thread *next_cpu(struct cpu_thread *cpu)
|
|
{
|
|
if (cpu == NULL)
|
|
return &fake_cpus[0];
|
|
cpu++;
|
|
if (cpu == &fake_cpus[CPUS])
|
|
return NULL;
|
|
return cpu;
|
|
}
|
|
|
|
#define first_cpu() next_cpu(NULL)
|
|
|
|
#define for_each_cpu(cpu) \
|
|
for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))
|
|
|
|
static unsigned long timestamp;
|
|
static unsigned long mftb(void)
|
|
{
|
|
return timestamp;
|
|
}
|
|
|
|
static void *local_alloc(unsigned int chip_id,
|
|
size_t size, size_t align)
|
|
{
|
|
void *p;
|
|
|
|
(void)chip_id;
|
|
if (posix_memalign(&p, align, size))
|
|
p = NULL;
|
|
return p;
|
|
}
|
|
|
|
struct dt_node;
|
|
extern struct dt_node *opal_node;
|
|
|
|
#include "../trace.c"
|
|
|
|
#include "../external/trace/trace.c"
|
|
static struct trace_reader trace_readers[CPUS];
|
|
struct trace_reader *my_trace_reader;
|
|
#include "../device.c"
|
|
|
|
char __rodata_start[1], __rodata_end[1];
|
|
struct dt_node *opal_node;
|
|
struct debug_descriptor debug_descriptor = {
|
|
.trace_mask = -1
|
|
};
|
|
|
|
const char *nvram_query_safe(const char *key __unused)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
void lock_caller(struct lock *l, const char *caller)
|
|
{
|
|
(void)caller;
|
|
assert(!l->lock_val);
|
|
l->lock_val = 1;
|
|
}
|
|
|
|
void unlock(struct lock *l)
|
|
{
|
|
assert(l->lock_val);
|
|
l->lock_val = 0;
|
|
}
|
|
|
|
struct cpu_thread *my_fake_cpu;
|
|
static struct cpu_thread *this_cpu(void)
|
|
{
|
|
return my_fake_cpu;
|
|
}
|
|
|
|
#include <sys/mman.h>
|
|
#define PER_CHILD_TRACES ((RUNNING_ON_VALGRIND) ? (1024*16) : (1024*1024))
|
|
|
|
static void write_trace_entries(int id)
|
|
{
|
|
void exit(int);
|
|
unsigned int i;
|
|
union trace trace;
|
|
|
|
timestamp = id;
|
|
for (i = 0; i < PER_CHILD_TRACES; i++) {
|
|
timestamp = i * CPUS + id;
|
|
assert(sizeof(trace.hdr) % 8 == 0);
|
|
/* First child never repeats, second repeats once, etc. */
|
|
trace_add(&trace, 3 + ((i / (id + 1)) % 0x40),
|
|
sizeof(trace.hdr));
|
|
}
|
|
|
|
/* Final entry has special type, so parent knows it's over. */
|
|
trace_add(&trace, 0x70, sizeof(trace.hdr));
|
|
exit(0);
|
|
}
|
|
|
|
static bool all_done(const bool done[])
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < CPUS; i++)
|
|
if (!done[i])
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
static void test_parallel(void)
|
|
{
|
|
void *p;
|
|
unsigned int cpu;
|
|
unsigned int i, counts[CPUS] = { 0 }, overflows[CPUS] = { 0 };
|
|
unsigned int repeats[CPUS] = { 0 }, num_overflows[CPUS] = { 0 };
|
|
bool done[CPUS] = { false };
|
|
size_t len = sizeof(struct trace_info) + TBUF_SZ + sizeof(union trace);
|
|
int last = 0;
|
|
|
|
/* Use a shared mmap to test actual parallel buffers. */
|
|
i = (CPUS*len + getpagesize()-1)&~(getpagesize()-1);
|
|
p = mmap(NULL, i, PROT_READ|PROT_WRITE,
|
|
MAP_ANONYMOUS|MAP_SHARED, -1, 0);
|
|
|
|
for (i = 0; i < CPUS; i++) {
|
|
fake_cpus[i].trace = p + i * len;
|
|
fake_cpus[i].trace->tb.buf_size = cpu_to_be64(TBUF_SZ);
|
|
fake_cpus[i].trace->tb.max_size = cpu_to_be32(sizeof(union trace));
|
|
fake_cpus[i].is_secondary = false;
|
|
memset(&trace_readers[i], 0, sizeof(struct trace_reader));
|
|
trace_readers[i].tb = &fake_cpus[i].trace->tb;
|
|
}
|
|
|
|
for (i = 0; i < CPUS; i++) {
|
|
if (!fork()) {
|
|
/* Child. */
|
|
my_fake_cpu = &fake_cpus[i];
|
|
write_trace_entries(i);
|
|
}
|
|
}
|
|
|
|
while (!all_done(done)) {
|
|
union trace t;
|
|
|
|
for (i = 0; i < CPUS; i++) {
|
|
if (trace_get(&t, &trace_readers[(i+last) % CPUS]))
|
|
break;
|
|
}
|
|
|
|
if (i == CPUS) {
|
|
sched_yield();
|
|
continue;
|
|
}
|
|
i = (i + last) % CPUS;
|
|
last = i;
|
|
|
|
if (t.hdr.type == TRACE_OVERFLOW) {
|
|
/* Conveniently, each record is 16 bytes here. */
|
|
assert(be64_to_cpu(t.overflow.bytes_missed) % 16 == 0);
|
|
overflows[i] += be64_to_cpu(t.overflow.bytes_missed) / 16;
|
|
num_overflows[i]++;
|
|
continue;
|
|
}
|
|
|
|
assert(be16_to_cpu(t.hdr.cpu) < CPUS);
|
|
assert(!done[be16_to_cpu(t.hdr.cpu)]);
|
|
assert(be64_to_cpu(t.hdr.timestamp) % CPUS == be16_to_cpu(t.hdr.cpu));
|
|
if (t.hdr.type == TRACE_REPEAT) {
|
|
assert(t.hdr.len_div_8 * 8 == sizeof(t.repeat));
|
|
assert(be16_to_cpu(t.repeat.num) != 0);
|
|
assert(be16_to_cpu(t.repeat.num) <= be16_to_cpu(t.hdr.cpu));
|
|
repeats[be16_to_cpu(t.hdr.cpu)] += be16_to_cpu(t.repeat.num);
|
|
} else if (t.hdr.type == 0x70) {
|
|
cpu = be16_to_cpu(t.hdr.cpu);
|
|
assert(cpu < CPUS);
|
|
done[cpu] = true;
|
|
} else {
|
|
cpu = be16_to_cpu(t.hdr.cpu);
|
|
assert(cpu < CPUS);
|
|
counts[cpu]++;
|
|
}
|
|
}
|
|
|
|
/* Gather children. */
|
|
for (i = 0; i < CPUS; i++) {
|
|
int status;
|
|
wait(&status);
|
|
}
|
|
|
|
for (i = 0; i < CPUS; i++) {
|
|
printf("Child %i: %u produced, %u overflows, %llu total\n", i,
|
|
counts[i], overflows[i],
|
|
(long long)be64_to_cpu(fake_cpus[i].trace->tb.end));
|
|
assert(counts[i] + repeats[i] <= PER_CHILD_TRACES);
|
|
}
|
|
/* Child 0 never repeats. */
|
|
assert(repeats[0] == 0);
|
|
assert(counts[0] + overflows[0] == PER_CHILD_TRACES);
|
|
|
|
/*
|
|
* FIXME: Other children have some fuzz, since overflows may
|
|
* include repeat record we already read. And odd-numbered
|
|
* overflows may include more repeat records than normal
|
|
* records (they alternate).
|
|
*/
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
union trace minimal;
|
|
union trace large;
|
|
union trace trace;
|
|
unsigned int i, j;
|
|
|
|
opal_node = dt_new_root("opal");
|
|
dt_new(dt_new(opal_node, "firmware"), "exports");
|
|
for (i = 0; i < CPUS; i++) {
|
|
fake_cpus[i].server_no = i;
|
|
fake_cpus[i].pir = i;
|
|
fake_cpus[i].is_secondary = (i & 0x1);
|
|
fake_cpus[i].primary = &fake_cpus[i & ~0x1];
|
|
}
|
|
my_fake_cpu = &fake_cpus[0];
|
|
my_trace_reader = &trace_readers[0];
|
|
init_trace_buffers();
|
|
|
|
for (i = 0; i < CPUS; i++) {
|
|
trace_readers[i].tb = &fake_cpus[i].trace->tb;
|
|
assert(trace_empty(&trace_readers[i]));
|
|
assert(!trace_get(&trace, &trace_readers[i]));
|
|
}
|
|
|
|
assert(sizeof(trace.hdr) % 8 == 0);
|
|
timestamp = 1;
|
|
trace_add(&minimal, 100, sizeof(trace.hdr));
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
|
|
assert(be64_to_cpu(trace.hdr.timestamp) == timestamp);
|
|
|
|
/* Make it wrap once. */
|
|
for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8) + 1; i++) {
|
|
timestamp = i;
|
|
trace_add(&minimal, 99 + (i%2), sizeof(trace.hdr));
|
|
}
|
|
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
/* First one must be overflow marker. */
|
|
assert(trace.hdr.type == TRACE_OVERFLOW);
|
|
assert(trace.hdr.len_div_8 * 8 == sizeof(trace.overflow));
|
|
assert(be64_to_cpu(trace.overflow.bytes_missed) == minimal.hdr.len_div_8 * 8);
|
|
|
|
for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8); i++) {
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
|
|
assert(be64_to_cpu(trace.hdr.timestamp) == i+1);
|
|
assert(trace.hdr.type == 99 + ((i+1)%2));
|
|
}
|
|
assert(!trace_get(&trace, my_trace_reader));
|
|
|
|
/* Now put in some weird-length ones, to test overlap.
|
|
* Last power of 2, minus 8. */
|
|
for (j = 0; (1 << j) < sizeof(large); j++);
|
|
for (i = 0; i < TBUF_SZ; i++) {
|
|
timestamp = i;
|
|
trace_add(&large, 100 + (i%2), (1 << (j-1)));
|
|
}
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
assert(trace.hdr.type == TRACE_OVERFLOW);
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
assert(trace.hdr.len_div_8 == large.hdr.len_div_8);
|
|
i = be64_to_cpu(trace.hdr.timestamp);
|
|
while (trace_get(&trace, my_trace_reader))
|
|
assert(be64_to_cpu(trace.hdr.timestamp) == ++i);
|
|
|
|
/* Test repeats. */
|
|
for (i = 0; i < 65538; i++) {
|
|
timestamp = i;
|
|
trace_add(&minimal, 100, sizeof(trace.hdr));
|
|
}
|
|
timestamp = i;
|
|
trace_add(&minimal, 101, sizeof(trace.hdr));
|
|
timestamp = i+1;
|
|
trace_add(&minimal, 101, sizeof(trace.hdr));
|
|
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
assert(trace.hdr.timestamp == 0);
|
|
assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
|
|
assert(trace.hdr.type == 100);
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
assert(trace.hdr.type == TRACE_REPEAT);
|
|
assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
|
|
assert(be16_to_cpu(trace.repeat.num) == 65535);
|
|
assert(be64_to_cpu(trace.repeat.timestamp) == 65535);
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
assert(be64_to_cpu(trace.hdr.timestamp) == 65536);
|
|
assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
|
|
assert(trace.hdr.type == 100);
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
assert(trace.hdr.type == TRACE_REPEAT);
|
|
assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
|
|
assert(be16_to_cpu(trace.repeat.num) == 1);
|
|
assert(be64_to_cpu(trace.repeat.timestamp) == 65537);
|
|
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
assert(be64_to_cpu(trace.hdr.timestamp) == 65538);
|
|
assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
|
|
assert(trace.hdr.type == 101);
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
assert(trace.hdr.type == TRACE_REPEAT);
|
|
assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
|
|
assert(be16_to_cpu(trace.repeat.num) == 1);
|
|
assert(be64_to_cpu(trace.repeat.timestamp) == 65539);
|
|
|
|
/* Now, test adding repeat while we're reading... */
|
|
timestamp = 0;
|
|
trace_add(&minimal, 100, sizeof(trace.hdr));
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
assert(be64_to_cpu(trace.hdr.timestamp) == 0);
|
|
assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
|
|
assert(trace.hdr.type == 100);
|
|
|
|
for (i = 1; i < TBUF_SZ; i++) {
|
|
timestamp = i;
|
|
trace_add(&minimal, 100, sizeof(trace.hdr));
|
|
assert(trace_get(&trace, my_trace_reader));
|
|
if (i % 65536 == 0) {
|
|
assert(trace.hdr.type == 100);
|
|
assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
|
|
} else {
|
|
assert(trace.hdr.type == TRACE_REPEAT);
|
|
assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
|
|
assert(be16_to_cpu(trace.repeat.num) == 1);
|
|
}
|
|
assert(be64_to_cpu(trace.repeat.timestamp) == i);
|
|
assert(!trace_get(&trace, my_trace_reader));
|
|
}
|
|
|
|
for (i = 0; i < CPUS; i++)
|
|
if (!fake_cpus[i].is_secondary)
|
|
free(fake_cpus[i].trace);
|
|
|
|
test_parallel();
|
|
|
|
return 0;
|
|
}
|