From 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 12:05:51 +0200 Subject: Adding upstream version 5.10.209. Signed-off-by: Daniel Baumann --- kernel/trace/trace_events_filter.c | 2371 ++++++++++++++++++++++++++++++++++++ 1 file changed, 2371 insertions(+) create mode 100644 kernel/trace/trace_events_filter.c (limited to 'kernel/trace/trace_events_filter.c') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c new file mode 100644 index 000000000..c1db5b62d --- /dev/null +++ b/kernel/trace/trace_events_filter.c @@ -0,0 +1,2371 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * trace_events_filter - generic event filtering + * + * Copyright (C) 2009 Tom Zanussi + */ + +#include +#include +#include +#include +#include +#include + +#include "trace.h" +#include "trace_output.h" + +#define DEFAULT_SYS_FILTER_MESSAGE \ + "### global filter ###\n" \ + "# Use this to set filters for multiple events.\n" \ + "# Only events with the given fields will be affected.\n" \ + "# If no events are modified, an error message will be displayed here" + +/* Due to token parsing '<=' must be before '<' and '>=' must be before '>' */ +#define OPS \ + C( OP_GLOB, "~" ), \ + C( OP_NE, "!=" ), \ + C( OP_EQ, "==" ), \ + C( OP_LE, "<=" ), \ + C( OP_LT, "<" ), \ + C( OP_GE, ">=" ), \ + C( OP_GT, ">" ), \ + C( OP_BAND, "&" ), \ + C( OP_MAX, NULL ) + +#undef C +#define C(a, b) a + +enum filter_op_ids { OPS }; + +#undef C +#define C(a, b) b + +static const char * ops[] = { OPS }; + +/* + * pred functions are OP_LE, OP_LT, OP_GE, OP_GT, and OP_BAND + * pred_funcs_##type below must match the order of them above. + */ +#define PRED_FUNC_START OP_LE +#define PRED_FUNC_MAX (OP_BAND - PRED_FUNC_START) + +#define ERRORS \ + C(NONE, "No error"), \ + C(INVALID_OP, "Invalid operator"), \ + C(TOO_MANY_OPEN, "Too many '('"), \ + C(TOO_MANY_CLOSE, "Too few '('"), \ + C(MISSING_QUOTE, "Missing matching quote"), \ + C(OPERAND_TOO_LONG, "Operand too long"), \ + C(EXPECT_STRING, "Expecting string field"), \ + C(EXPECT_DIGIT, "Expecting numeric field"), \ + C(ILLEGAL_FIELD_OP, "Illegal operation for field type"), \ + C(FIELD_NOT_FOUND, "Field not found"), \ + C(ILLEGAL_INTVAL, "Illegal integer value"), \ + C(BAD_SUBSYS_FILTER, "Couldn't find or set field in one of a subsystem's events"), \ + C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \ + C(INVALID_FILTER, "Meaningless filter expression"), \ + C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ + C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \ + C(ERRNO, "Error"), \ + C(NO_FILTER, "No filter found") + +#undef C +#define C(a, b) FILT_ERR_##a + +enum { ERRORS }; + +#undef C +#define C(a, b) b + +static const char *err_text[] = { ERRORS }; + +/* Called after a '!' character but "!=" and "!~" are not "not"s */ +static bool is_not(const char *str) +{ + switch (str[1]) { + case '=': + case '~': + return false; + } + return true; +} + +/** + * prog_entry - a singe entry in the filter program + * @target: Index to jump to on a branch (actually one minus the index) + * @when_to_branch: The value of the result of the predicate to do a branch + * @pred: The predicate to execute. + */ +struct prog_entry { + int target; + int when_to_branch; + struct filter_pred *pred; +}; + +/** + * update_preds- assign a program entry a label target + * @prog: The program array + * @N: The index of the current entry in @prog + * @when_to_branch: What to assign a program entry for its branch condition + * + * The program entry at @N has a target that points to the index of a program + * entry that can have its target and when_to_branch fields updated. + * Update the current program entry denoted by index @N target field to be + * that of the updated entry. This will denote the entry to update if + * we are processing an "||" after an "&&" + */ +static void update_preds(struct prog_entry *prog, int N, int invert) +{ + int t, s; + + t = prog[N].target; + s = prog[t].target; + prog[t].when_to_branch = invert; + prog[t].target = N; + prog[N].target = s; +} + +struct filter_parse_error { + int lasterr; + int lasterr_pos; +}; + +static void parse_error(struct filter_parse_error *pe, int err, int pos) +{ + pe->lasterr = err; + pe->lasterr_pos = pos; +} + +typedef int (*parse_pred_fn)(const char *str, void *data, int pos, + struct filter_parse_error *pe, + struct filter_pred **pred); + +enum { + INVERT = 1, + PROCESS_AND = 2, + PROCESS_OR = 4, +}; + +/* + * Without going into a formal proof, this explains the method that is used in + * parsing the logical expressions. + * + * For example, if we have: "a && !(!b || (c && g)) || d || e && !f" + * The first pass will convert it into the following program: + * + * n1: r=a; l1: if (!r) goto l4; + * n2: r=b; l2: if (!r) goto l4; + * n3: r=c; r=!r; l3: if (r) goto l4; + * n4: r=g; r=!r; l4: if (r) goto l5; + * n5: r=d; l5: if (r) goto T + * n6: r=e; l6: if (!r) goto l7; + * n7: r=f; r=!r; l7: if (!r) goto F + * T: return TRUE + * F: return FALSE + * + * To do this, we use a data structure to represent each of the above + * predicate and conditions that has: + * + * predicate, when_to_branch, invert, target + * + * The "predicate" will hold the function to determine the result "r". + * The "when_to_branch" denotes what "r" should be if a branch is to be taken + * "&&" would contain "!r" or (0) and "||" would contain "r" or (1). + * The "invert" holds whether the value should be reversed before testing. + * The "target" contains the label "l#" to jump to. + * + * A stack is created to hold values when parentheses are used. + * + * To simplify the logic, the labels will start at 0 and not 1. + * + * The possible invert values are 1 and 0. The number of "!"s that are in scope + * before the predicate determines the invert value, if the number is odd then + * the invert value is 1 and 0 otherwise. This means the invert value only + * needs to be toggled when a new "!" is introduced compared to what is stored + * on the stack, where parentheses were used. + * + * The top of the stack and "invert" are initialized to zero. + * + * ** FIRST PASS ** + * + * #1 A loop through all the tokens is done: + * + * #2 If the token is an "(", the stack is push, and the current stack value + * gets the current invert value, and the loop continues to the next token. + * The top of the stack saves the "invert" value to keep track of what + * the current inversion is. As "!(a && !b || c)" would require all + * predicates being affected separately by the "!" before the parentheses. + * And that would end up being equivalent to "(!a || b) && !c" + * + * #3 If the token is an "!", the current "invert" value gets inverted, and + * the loop continues. Note, if the next token is a predicate, then + * this "invert" value is only valid for the current program entry, + * and does not affect other predicates later on. + * + * The only other acceptable token is the predicate string. + * + * #4 A new entry into the program is added saving: the predicate and the + * current value of "invert". The target is currently assigned to the + * previous program index (this will not be its final value). + * + * #5 We now enter another loop and look at the next token. The only valid + * tokens are ")", "&&", "||" or end of the input string "\0". + * + * #6 The invert variable is reset to the current value saved on the top of + * the stack. + * + * #7 The top of the stack holds not only the current invert value, but also + * if a "&&" or "||" needs to be processed. Note, the "&&" takes higher + * precedence than "||". That is "a && b || c && d" is equivalent to + * "(a && b) || (c && d)". Thus the first thing to do is to see if "&&" needs + * to be processed. This is the case if an "&&" was the last token. If it was + * then we call update_preds(). This takes the program, the current index in + * the program, and the current value of "invert". More will be described + * below about this function. + * + * #8 If the next token is "&&" then we set a flag in the top of the stack + * that denotes that "&&" needs to be processed, break out of this loop + * and continue with the outer loop. + * + * #9 Otherwise, if a "||" needs to be processed then update_preds() is called. + * This is called with the program, the current index in the program, but + * this time with an inverted value of "invert" (that is !invert). This is + * because the value taken will become the "when_to_branch" value of the + * program. + * Note, this is called when the next token is not an "&&". As stated before, + * "&&" takes higher precedence, and "||" should not be processed yet if the + * next logical operation is "&&". + * + * #10 If the next token is "||" then we set a flag in the top of the stack + * that denotes that "||" needs to be processed, break out of this loop + * and continue with the outer loop. + * + * #11 If this is the end of the input string "\0" then we break out of both + * loops. + * + * #12 Otherwise, the next token is ")", where we pop the stack and continue + * this inner loop. + * + * Now to discuss the update_pred() function, as that is key to the setting up + * of the program. Remember the "target" of the program is initialized to the + * previous index and not the "l" label. The target holds the index into the + * program that gets affected by the operand. Thus if we have something like + * "a || b && c", when we process "a" the target will be "-1" (undefined). + * When we process "b", its target is "0", which is the index of "a", as that's + * the predicate that is affected by "||". But because the next token after "b" + * is "&&" we don't call update_preds(). Instead continue to "c". As the + * next token after "c" is not "&&" but the end of input, we first process the + * "&&" by calling update_preds() for the "&&" then we process the "||" by + * callin updates_preds() with the values for processing "||". + * + * What does that mean? What update_preds() does is to first save the "target" + * of the program entry indexed by the current program entry's "target" + * (remember the "target" is initialized to previous program entry), and then + * sets that "target" to the current index which represents the label "l#". + * That entry's "when_to_branch" is set to the value passed in (the "invert" + * or "!invert"). Then it sets the current program entry's target to the saved + * "target" value (the old value of the program that had its "target" updated + * to the label). + * + * Looking back at "a || b && c", we have the following steps: + * "a" - prog[0] = { "a", X, -1 } // pred, when_to_branch, target + * "||" - flag that we need to process "||"; continue outer loop + * "b" - prog[1] = { "b", X, 0 } + * "&&" - flag that we need to process "&&"; continue outer loop + * (Notice we did not process "||") + * "c" - prog[2] = { "c", X, 1 } + * update_preds(prog, 2, 0); // invert = 0 as we are processing "&&" + * t = prog[2].target; // t = 1 + * s = prog[t].target; // s = 0 + * prog[t].target = 2; // Set target to "l2" + * prog[t].when_to_branch = 0; + * prog[2].target = s; + * update_preds(prog, 2, 1); // invert = 1 as we are now processing "||" + * t = prog[2].target; // t = 0 + * s = prog[t].target; // s = -1 + * prog[t].target = 2; // Set target to "l2" + * prog[t].when_to_branch = 1; + * prog[2].target = s; + * + * #13 Which brings us to the final step of the first pass, which is to set + * the last program entry's when_to_branch and target, which will be + * when_to_branch = 0; target = N; ( the label after the program entry after + * the last program entry processed above). + * + * If we denote "TRUE" to be the entry after the last program entry processed, + * and "FALSE" the program entry after that, we are now done with the first + * pass. + * + * Making the above "a || b && c" have a progam of: + * prog[0] = { "a", 1, 2 } + * prog[1] = { "b", 0, 2 } + * prog[2] = { "c", 0, 3 } + * + * Which translates into: + * n0: r = a; l0: if (r) goto l2; + * n1: r = b; l1: if (!r) goto l2; + * n2: r = c; l2: if (!r) goto l3; // Which is the same as "goto F;" + * T: return TRUE; l3: + * F: return FALSE + * + * Although, after the first pass, the program is correct, it is + * inefficient. The simple sample of "a || b && c" could be easily been + * converted into: + * n0: r = a; if (r) goto T + * n1: r = b; if (!r) goto F + * n2: r = c; if (!r) goto F + * T: return TRUE; + * F: return FALSE; + * + * The First Pass is over the input string. The next too passes are over + * the program itself. + * + * ** SECOND PASS ** + * + * Which brings us to the second pass. If a jump to a label has the + * same condition as that label, it can instead jump to its target. + * The original example of "a && !(!b || (c && g)) || d || e && !f" + * where the first pass gives us: + * + * n1: r=a; l1: if (!r) goto l4; + * n2: r=b; l2: if (!r) goto l4; + * n3: r=c; r=!r; l3: if (r) goto l4; + * n4: r=g; r=!r; l4: if (r) goto l5; + * n5: r=d; l5: if (r) goto T + * n6: r=e; l6: if (!r) goto l7; + * n7: r=f; r=!r; l7: if (!r) goto F: + * T: return TRUE; + * F: return FALSE + * + * We can see that "l3: if (r) goto l4;" and at l4, we have "if (r) goto l5;". + * And "l5: if (r) goto T", we could optimize this by converting l3 and l4 + * to go directly to T. To accomplish this, we start from the last + * entry in the program and work our way back. If the target of the entry + * has the same "when_to_branch" then we could use that entry's target. + * Doing this, the above would end up as: + * + * n1: r=a; l1: if (!r) goto l4; + * n2: r=b; l2: if (!r) goto l4; + * n3: r=c; r=!r; l3: if (r) goto T; + * n4: r=g; r=!r; l4: if (r) goto T; + * n5: r=d; l5: if (r) goto T; + * n6: r=e; l6: if (!r) goto F; + * n7: r=f; r=!r; l7: if (!r) goto F; + * T: return TRUE + * F: return FALSE + * + * In that same pass, if the "when_to_branch" doesn't match, we can simply + * go to the program entry after the label. That is, "l2: if (!r) goto l4;" + * where "l4: if (r) goto T;", then we can convert l2 to be: + * "l2: if (!r) goto n5;". + * + * This will have the second pass give us: + * n1: r=a; l1: if (!r) goto n5; + * n2: r=b; l2: if (!r) goto n5; + * n3: r=c; r=!r; l3: if (r) goto T; + * n4: r=g; r=!r; l4: if (r) goto T; + * n5: r=d; l5: if (r) goto T + * n6: r=e; l6: if (!r) goto F; + * n7: r=f; r=!r; l7: if (!r) goto F + * T: return TRUE + * F: return FALSE + * + * Notice, all the "l#" labels are no longer used, and they can now + * be discarded. + * + * ** THIRD PASS ** + * + * For the third pass we deal with the inverts. As they simply just + * make the "when_to_branch" get inverted, a simple loop over the + * program to that does: "when_to_branch ^= invert;" will do the + * job, leaving us with: + * n1: r=a; if (!r) goto n5; + * n2: r=b; if (!r) goto n5; + * n3: r=c: if (!r) goto T; + * n4: r=g; if (!r) goto T; + * n5: r=d; if (r) goto T + * n6: r=e; if (!r) goto F; + * n7: r=f; if (r) goto F + * T: return TRUE + * F: return FALSE + * + * As "r = a; if (!r) goto n5;" is obviously the same as + * "if (!a) goto n5;" without doing anything we can interperate the + * program as: + * n1: if (!a) goto n5; + * n2: if (!b) goto n5; + * n3: if (!c) goto T; + * n4: if (!g) goto T; + * n5: if (d) goto T + * n6: if (!e) goto F; + * n7: if (f) goto F + * T: return TRUE + * F: return FALSE + * + * Since the inverts are discarded at the end, there's no reason to store + * them in the program array (and waste memory). A separate array to hold + * the inverts is used and freed at the end. + */ +static struct prog_entry * +predicate_parse(const char *str, int nr_parens, int nr_preds, + parse_pred_fn parse_pred, void *data, + struct filter_parse_error *pe) +{ + struct prog_entry *prog_stack; + struct prog_entry *prog; + const char *ptr = str; + char *inverts = NULL; + int *op_stack; + int *top; + int invert = 0; + int ret = -ENOMEM; + int len; + int N = 0; + int i; + + nr_preds += 2; /* For TRUE and FALSE */ + + op_stack = kmalloc_array(nr_parens, sizeof(*op_stack), GFP_KERNEL); + if (!op_stack) + return ERR_PTR(-ENOMEM); + prog_stack = kcalloc(nr_preds, sizeof(*prog_stack), GFP_KERNEL); + if (!prog_stack) { + parse_error(pe, -ENOMEM, 0); + goto out_free; + } + inverts = kmalloc_array(nr_preds, sizeof(*inverts), GFP_KERNEL); + if (!inverts) { + parse_error(pe, -ENOMEM, 0); + goto out_free; + } + + top = op_stack; + prog = prog_stack; + *top = 0; + + /* First pass */ + while (*ptr) { /* #1 */ + const char *next = ptr++; + + if (isspace(*next)) + continue; + + switch (*next) { + case '(': /* #2 */ + if (top - op_stack > nr_parens) { + ret = -EINVAL; + goto out_free; + } + *(++top) = invert; + continue; + case '!': /* #3 */ + if (!is_not(next)) + break; + invert = !invert; + continue; + } + + if (N >= nr_preds) { + parse_error(pe, FILT_ERR_TOO_MANY_PREDS, next - str); + goto out_free; + } + + inverts[N] = invert; /* #4 */ + prog[N].target = N-1; + + len = parse_pred(next, data, ptr - str, pe, &prog[N].pred); + if (len < 0) { + ret = len; + goto out_free; + } + ptr = next + len; + + N++; + + ret = -1; + while (1) { /* #5 */ + next = ptr++; + if (isspace(*next)) + continue; + + switch (*next) { + case ')': + case '\0': + break; + case '&': + case '|': + /* accepting only "&&" or "||" */ + if (next[1] == next[0]) { + ptr++; + break; + } + fallthrough; + default: + parse_error(pe, FILT_ERR_TOO_MANY_PREDS, + next - str); + goto out_free; + } + + invert = *top & INVERT; + + if (*top & PROCESS_AND) { /* #7 */ + update_preds(prog, N - 1, invert); + *top &= ~PROCESS_AND; + } + if (*next == '&') { /* #8 */ + *top |= PROCESS_AND; + break; + } + if (*top & PROCESS_OR) { /* #9 */ + update_preds(prog, N - 1, !invert); + *top &= ~PROCESS_OR; + } + if (*next == '|') { /* #10 */ + *top |= PROCESS_OR; + break; + } + if (!*next) /* #11 */ + goto out; + + if (top == op_stack) { + ret = -1; + /* Too few '(' */ + parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, ptr - str); + goto out_free; + } + top--; /* #12 */ + } + } + out: + if (top != op_stack) { + /* Too many '(' */ + parse_error(pe, FILT_ERR_TOO_MANY_OPEN, ptr - str); + goto out_free; + } + + if (!N) { + /* No program? */ + ret = -EINVAL; + parse_error(pe, FILT_ERR_NO_FILTER, ptr - str); + goto out_free; + } + + prog[N].pred = NULL; /* #13 */ + prog[N].target = 1; /* TRUE */ + prog[N+1].pred = NULL; + prog[N+1].target = 0; /* FALSE */ + prog[N-1].target = N; + prog[N-1].when_to_branch = false; + + /* Second Pass */ + for (i = N-1 ; i--; ) { + int target = prog[i].target; + if (prog[i].when_to_branch == prog[target].when_to_branch) + prog[i].target = prog[target].target; + } + + /* Third Pass */ + for (i = 0; i < N; i++) { + invert = inverts[i] ^ prog[i].when_to_branch; + prog[i].when_to_branch = invert; + /* Make sure the program always moves forward */ + if (WARN_ON(prog[i].target <= i)) { + ret = -EINVAL; + goto out_free; + } + } + + kfree(op_stack); + kfree(inverts); + return prog; +out_free: + kfree(op_stack); + kfree(inverts); + if (prog_stack) { + for (i = 0; prog_stack[i].pred; i++) + kfree(prog_stack[i].pred); + kfree(prog_stack); + } + return ERR_PTR(ret); +} + +#define DEFINE_COMPARISON_PRED(type) \ +static int filter_pred_LT_##type(struct filter_pred *pred, void *event) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + return *addr < val; \ +} \ +static int filter_pred_LE_##type(struct filter_pred *pred, void *event) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + return *addr <= val; \ +} \ +static int filter_pred_GT_##type(struct filter_pred *pred, void *event) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + return *addr > val; \ +} \ +static int filter_pred_GE_##type(struct filter_pred *pred, void *event) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + return *addr >= val; \ +} \ +static int filter_pred_BAND_##type(struct filter_pred *pred, void *event) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + return !!(*addr & val); \ +} \ +static const filter_pred_fn_t pred_funcs_##type[] = { \ + filter_pred_LE_##type, \ + filter_pred_LT_##type, \ + filter_pred_GE_##type, \ + filter_pred_GT_##type, \ + filter_pred_BAND_##type, \ +}; + +#define DEFINE_EQUALITY_PRED(size) \ +static int filter_pred_##size(struct filter_pred *pred, void *event) \ +{ \ + u##size *addr = (u##size *)(event + pred->offset); \ + u##size val = (u##size)pred->val; \ + int match; \ + \ + match = (val == *addr) ^ pred->not; \ + \ + return match; \ +} + +DEFINE_COMPARISON_PRED(s64); +DEFINE_COMPARISON_PRED(u64); +DEFINE_COMPARISON_PRED(s32); +DEFINE_COMPARISON_PRED(u32); +DEFINE_COMPARISON_PRED(s16); +DEFINE_COMPARISON_PRED(u16); +DEFINE_COMPARISON_PRED(s8); +DEFINE_COMPARISON_PRED(u8); + +DEFINE_EQUALITY_PRED(64); +DEFINE_EQUALITY_PRED(32); +DEFINE_EQUALITY_PRED(16); +DEFINE_EQUALITY_PRED(8); + +/* user space strings temp buffer */ +#define USTRING_BUF_SIZE 1024 + +struct ustring_buffer { + char buffer[USTRING_BUF_SIZE]; +}; + +static __percpu struct ustring_buffer *ustring_per_cpu; + +static __always_inline char *test_string(char *str) +{ + struct ustring_buffer *ubuf; + char *kstr; + + if (!ustring_per_cpu) + return NULL; + + ubuf = this_cpu_ptr(ustring_per_cpu); + kstr = ubuf->buffer; + + /* For safety, do not trust the string pointer */ + if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) + return NULL; + return kstr; +} + +static __always_inline char *test_ustring(char *str) +{ + struct ustring_buffer *ubuf; + char __user *ustr; + char *kstr; + + if (!ustring_per_cpu) + return NULL; + + ubuf = this_cpu_ptr(ustring_per_cpu); + kstr = ubuf->buffer; + + /* user space address? */ + ustr = (char __user *)str; + if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) + return NULL; + + return kstr; +} + +/* Filter predicate for fixed sized arrays of characters */ +static int filter_pred_string(struct filter_pred *pred, void *event) +{ + char *addr = (char *)(event + pred->offset); + int cmp, match; + + cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len); + + match = cmp ^ pred->not; + + return match; +} + +static __always_inline int filter_pchar(struct filter_pred *pred, char *str) +{ + int cmp, match; + int len; + + len = strlen(str) + 1; /* including tailing '\0' */ + cmp = pred->regex.match(str, &pred->regex, len); + + match = cmp ^ pred->not; + + return match; +} +/* Filter predicate for char * pointers */ +static int filter_pred_pchar(struct filter_pred *pred, void *event) +{ + char **addr = (char **)(event + pred->offset); + char *str; + + str = test_string(*addr); + if (!str) + return 0; + + return filter_pchar(pred, str); +} + +/* Filter predicate for char * pointers in user space*/ +static int filter_pred_pchar_user(struct filter_pred *pred, void *event) +{ + char **addr = (char **)(event + pred->offset); + char *str; + + str = test_ustring(*addr); + if (!str) + return 0; + + return filter_pchar(pred, str); +} + +/* + * Filter predicate for dynamic sized arrays of characters. + * These are implemented through a list of strings at the end + * of the entry. + * Also each of these strings have a field in the entry which + * contains its offset from the beginning of the entry. + * We have then first to get this field, dereference it + * and add it to the address of the entry, and at last we have + * the address of the string. + */ +static int filter_pred_strloc(struct filter_pred *pred, void *event) +{ + u32 str_item = *(u32 *)(event + pred->offset); + int str_loc = str_item & 0xffff; + int str_len = str_item >> 16; + char *addr = (char *)(event + str_loc); + int cmp, match; + + cmp = pred->regex.match(addr, &pred->regex, str_len); + + match = cmp ^ pred->not; + + return match; +} + +/* Filter predicate for CPUs. */ +static int filter_pred_cpu(struct filter_pred *pred, void *event) +{ + int cpu, cmp; + + cpu = raw_smp_processor_id(); + cmp = pred->val; + + switch (pred->op) { + case OP_EQ: + return cpu == cmp; + case OP_NE: + return cpu != cmp; + case OP_LT: + return cpu < cmp; + case OP_LE: + return cpu <= cmp; + case OP_GT: + return cpu > cmp; + case OP_GE: + return cpu >= cmp; + default: + return 0; + } +} + +/* Filter predicate for COMM. */ +static int filter_pred_comm(struct filter_pred *pred, void *event) +{ + int cmp; + + cmp = pred->regex.match(current->comm, &pred->regex, + TASK_COMM_LEN); + return cmp ^ pred->not; +} + +static int filter_pred_none(struct filter_pred *pred, void *event) +{ + return 0; +} + +/* + * regex_match_foo - Basic regex callbacks + * + * @str: the string to be searched + * @r: the regex structure containing the pattern string + * @len: the length of the string to be searched (including '\0') + * + * Note: + * - @str might not be NULL-terminated if it's of type DYN_STRING + * or STATIC_STRING, unless @len is zero. + */ + +static int regex_match_full(char *str, struct regex *r, int len) +{ + /* len of zero means str is dynamic and ends with '\0' */ + if (!len) + return strcmp(str, r->pattern) == 0; + + return strncmp(str, r->pattern, len) == 0; +} + +static int regex_match_front(char *str, struct regex *r, int len) +{ + if (len && len < r->len) + return 0; + + return strncmp(str, r->pattern, r->len) == 0; +} + +static int regex_match_middle(char *str, struct regex *r, int len) +{ + if (!len) + return strstr(str, r->pattern) != NULL; + + return strnstr(str, r->pattern, len) != NULL; +} + +static int regex_match_end(char *str, struct regex *r, int len) +{ + int strlen = len - 1; + + if (strlen >= r->len && + memcmp(str + strlen - r->len, r->pattern, r->len) == 0) + return 1; + return 0; +} + +static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused) +{ + if (glob_match(r->pattern, str)) + return 1; + return 0; +} + +/** + * filter_parse_regex - parse a basic regex + * @buff: the raw regex + * @len: length of the regex + * @search: will point to the beginning of the string to compare + * @not: tell whether the match will have to be inverted + * + * This passes in a buffer containing a regex and this function will + * set search to point to the search part of the buffer and + * return the type of search it is (see enum above). + * This does modify buff. + * + * Returns enum type. + * search returns the pointer to use for comparison. + * not returns 1 if buff started with a '!' + * 0 otherwise. + */ +enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not) +{ + int type = MATCH_FULL; + int i; + + if (buff[0] == '!') { + *not = 1; + buff++; + len--; + } else + *not = 0; + + *search = buff; + + if (isdigit(buff[0])) + return MATCH_INDEX; + + for (i = 0; i < len; i++) { + if (buff[i] == '*') { + if (!i) { + type = MATCH_END_ONLY; + } else if (i == len - 1) { + if (type == MATCH_END_ONLY) + type = MATCH_MIDDLE_ONLY; + else + type = MATCH_FRONT_ONLY; + buff[i] = 0; + break; + } else { /* pattern continues, use full glob */ + return MATCH_GLOB; + } + } else if (strchr("[?\\", buff[i])) { + return MATCH_GLOB; + } + } + if (buff[0] == '*') + *search = buff + 1; + + return type; +} + +static void filter_build_regex(struct filter_pred *pred) +{ + struct regex *r = &pred->regex; + char *search; + enum regex_type type = MATCH_FULL; + + if (pred->op == OP_GLOB) { + type = filter_parse_regex(r->pattern, r->len, &search, &pred->not); + r->len = strlen(search); + memmove(r->pattern, search, r->len+1); + } + + switch (type) { + /* MATCH_INDEX should not happen, but if it does, match full */ + case MATCH_INDEX: + case MATCH_FULL: + r->match = regex_match_full; + break; + case MATCH_FRONT_ONLY: + r->match = regex_match_front; + break; + case MATCH_MIDDLE_ONLY: + r->match = regex_match_middle; + break; + case MATCH_END_ONLY: + r->match = regex_match_end; + break; + case MATCH_GLOB: + r->match = regex_match_glob; + break; + } +} + +/* return 1 if event matches, 0 otherwise (discard) */ +int filter_match_preds(struct event_filter *filter, void *rec) +{ + struct prog_entry *prog; + int i; + + /* no filter is considered a match */ + if (!filter) + return 1; + + /* Protected by either SRCU(tracepoint_srcu) or preempt_disable */ + prog = rcu_dereference_raw(filter->prog); + if (!prog) + return 1; + + for (i = 0; prog[i].pred; i++) { + struct filter_pred *pred = prog[i].pred; + int match = pred->fn(pred, rec); + if (match == prog[i].when_to_branch) + i = prog[i].target; + } + return prog[i].target; +} +EXPORT_SYMBOL_GPL(filter_match_preds); + +static void remove_filter_string(struct event_filter *filter) +{ + if (!filter) + return; + + kfree(filter->filter_string); + filter->filter_string = NULL; +} + +static void append_filter_err(struct trace_array *tr, + struct filter_parse_error *pe, + struct event_filter *filter) +{ + struct trace_seq *s; + int pos = pe->lasterr_pos; + char *buf; + int len; + + if (WARN_ON(!filter->filter_string)) + return; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return; + trace_seq_init(s); + + len = strlen(filter->filter_string); + if (pos > len) + pos = len; + + /* indexing is off by one */ + if (pos) + pos++; + + trace_seq_puts(s, filter->filter_string); + if (pe->lasterr > 0) { + trace_seq_printf(s, "\n%*s", pos, "^"); + trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]); + tracing_log_err(tr, "event filter parse error", + filter->filter_string, err_text, + pe->lasterr, pe->lasterr_pos); + } else { + trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr); + tracing_log_err(tr, "event filter parse error", + filter->filter_string, err_text, + FILT_ERR_ERRNO, 0); + } + trace_seq_putc(s, 0); + buf = kmemdup_nul(s->buffer, s->seq.len, GFP_KERNEL); + if (buf) { + kfree(filter->filter_string); + filter->filter_string = buf; + } + kfree(s); +} + +static inline struct event_filter *event_filter(struct trace_event_file *file) +{ + return file->filter; +} + +/* caller must hold event_mutex */ +void print_event_filter(struct trace_event_file *file, struct trace_seq *s) +{ + struct event_filter *filter = event_filter(file); + + if (filter && filter->filter_string) + trace_seq_printf(s, "%s\n", filter->filter_string); + else + trace_seq_puts(s, "none\n"); +} + +void print_subsystem_event_filter(struct event_subsystem *system, + struct trace_seq *s) +{ + struct event_filter *filter; + + mutex_lock(&event_mutex); + filter = system->filter; + if (filter && filter->filter_string) + trace_seq_printf(s, "%s\n", filter->filter_string); + else + trace_seq_puts(s, DEFAULT_SYS_FILTER_MESSAGE "\n"); + mutex_unlock(&event_mutex); +} + +static void free_prog(struct event_filter *filter) +{ + struct prog_entry *prog; + int i; + + prog = rcu_access_pointer(filter->prog); + if (!prog) + return; + + for (i = 0; prog[i].pred; i++) + kfree(prog[i].pred); + kfree(prog); +} + +static void filter_disable(struct trace_event_file *file) +{ + unsigned long old_flags = file->flags; + + file->flags &= ~EVENT_FILE_FL_FILTERED; + + if (old_flags != file->flags) + trace_buffered_event_disable(); +} + +static void __free_filter(struct event_filter *filter) +{ + if (!filter) + return; + + free_prog(filter); + kfree(filter->filter_string); + kfree(filter); +} + +void free_event_filter(struct event_filter *filter) +{ + __free_filter(filter); +} + +static inline void __remove_filter(struct trace_event_file *file) +{ + filter_disable(file); + remove_filter_string(file->filter); +} + +static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir, + struct trace_array *tr) +{ + struct trace_event_file *file; + + list_for_each_entry(file, &tr->events, list) { + if (file->system != dir) + continue; + __remove_filter(file); + } +} + +static inline void __free_subsystem_filter(struct trace_event_file *file) +{ + __free_filter(file->filter); + file->filter = NULL; +} + +static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, + struct trace_array *tr) +{ + struct trace_event_file *file; + + list_for_each_entry(file, &tr->events, list) { + if (file->system != dir) + continue; + __free_subsystem_filter(file); + } +} + +int filter_assign_type(const char *type) +{ + if (strstr(type, "__data_loc") && strstr(type, "char")) + return FILTER_DYN_STRING; + + if (strchr(type, '[') && strstr(type, "char")) + return FILTER_STATIC_STRING; + + if (strcmp(type, "char *") == 0 || strcmp(type, "const char *") == 0) + return FILTER_PTR_STRING; + + return FILTER_OTHER; +} + +static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op, + int field_size, int field_is_signed) +{ + filter_pred_fn_t fn = NULL; + int pred_func_index = -1; + + switch (op) { + case OP_EQ: + case OP_NE: + break; + default: + if (WARN_ON_ONCE(op < PRED_FUNC_START)) + return NULL; + pred_func_index = op - PRED_FUNC_START; + if (WARN_ON_ONCE(pred_func_index > PRED_FUNC_MAX)) + return NULL; + } + + switch (field_size) { + case 8: + if (pred_func_index < 0) + fn = filter_pred_64; + else if (field_is_signed) + fn = pred_funcs_s64[pred_func_index]; + else + fn = pred_funcs_u64[pred_func_index]; + break; + case 4: + if (pred_func_index < 0) + fn = filter_pred_32; + else if (field_is_signed) + fn = pred_funcs_s32[pred_func_index]; + else + fn = pred_funcs_u32[pred_func_index]; + break; + case 2: + if (pred_func_index < 0) + fn = filter_pred_16; + else if (field_is_signed) + fn = pred_funcs_s16[pred_func_index]; + else + fn = pred_funcs_u16[pred_func_index]; + break; + case 1: + if (pred_func_index < 0) + fn = filter_pred_8; + else if (field_is_signed) + fn = pred_funcs_s8[pred_func_index]; + else + fn = pred_funcs_u8[pred_func_index]; + break; + } + + return fn; +} + +/* Called when a predicate is encountered by predicate_parse() */ +static int parse_pred(const char *str, void *data, + int pos, struct filter_parse_error *pe, + struct filter_pred **pred_ptr) +{ + struct trace_event_call *call = data; + struct ftrace_event_field *field; + struct filter_pred *pred = NULL; + char num_buf[24]; /* Big enough to hold an address */ + char *field_name; + bool ustring = false; + char q; + u64 val; + int len; + int ret; + int op; + int s; + int i = 0; + + /* First find the field to associate to */ + while (isspace(str[i])) + i++; + s = i; + + while (isalnum(str[i]) || str[i] == '_') + i++; + + len = i - s; + + if (!len) + return -1; + + field_name = kmemdup_nul(str + s, len, GFP_KERNEL); + if (!field_name) + return -ENOMEM; + + /* Make sure that the field exists */ + + field = trace_find_event_field(call, field_name); + kfree(field_name); + if (!field) { + parse_error(pe, FILT_ERR_FIELD_NOT_FOUND, pos + i); + return -EINVAL; + } + + /* See if the field is a user space string */ + if ((len = str_has_prefix(str + i, ".ustring"))) { + ustring = true; + i += len; + } + + while (isspace(str[i])) + i++; + + /* Make sure this op is supported */ + for (op = 0; ops[op]; op++) { + /* This is why '<=' must come before '<' in ops[] */ + if (strncmp(str + i, ops[op], strlen(ops[op])) == 0) + break; + } + + if (!ops[op]) { + parse_error(pe, FILT_ERR_INVALID_OP, pos + i); + goto err_free; + } + + i += strlen(ops[op]); + + while (isspace(str[i])) + i++; + + s = i; + + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + return -ENOMEM; + + pred->field = field; + pred->offset = field->offset; + pred->op = op; + + if (ftrace_event_is_function(call)) { + /* + * Perf does things different with function events. + * It only allows an "ip" field, and expects a string. + * But the string does not need to be surrounded by quotes. + * If it is a string, the assigned function as a nop, + * (perf doesn't use it) and grab everything. + */ + if (strcmp(field->name, "ip") != 0) { + parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i); + goto err_free; + } + pred->fn = filter_pred_none; + + /* + * Quotes are not required, but if they exist then we need + * to read them till we hit a matching one. + */ + if (str[i] == '\'' || str[i] == '"') + q = str[i]; + else + q = 0; + + for (i++; str[i]; i++) { + if (q && str[i] == q) + break; + if (!q && (str[i] == ')' || str[i] == '&' || + str[i] == '|')) + break; + } + /* Skip quotes */ + if (q) + s++; + len = i - s; + if (len >= MAX_FILTER_STR_VAL) { + parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i); + goto err_free; + } + + pred->regex.len = len; + strncpy(pred->regex.pattern, str + s, len); + pred->regex.pattern[len] = 0; + + /* This is either a string, or an integer */ + } else if (str[i] == '\'' || str[i] == '"') { + char q = str[i]; + + /* Make sure the op is OK for strings */ + switch (op) { + case OP_NE: + pred->not = 1; + fallthrough; + case OP_GLOB: + case OP_EQ: + break; + default: + parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i); + goto err_free; + } + + /* Make sure the field is OK for strings */ + if (!is_string_field(field)) { + parse_error(pe, FILT_ERR_EXPECT_DIGIT, pos + i); + goto err_free; + } + + for (i++; str[i]; i++) { + if (str[i] == q) + break; + } + if (!str[i]) { + parse_error(pe, FILT_ERR_MISSING_QUOTE, pos + i); + goto err_free; + } + + /* Skip quotes */ + s++; + len = i - s; + if (len >= MAX_FILTER_STR_VAL) { + parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i); + goto err_free; + } + + pred->regex.len = len; + strncpy(pred->regex.pattern, str + s, len); + pred->regex.pattern[len] = 0; + + filter_build_regex(pred); + + if (field->filter_type == FILTER_COMM) { + pred->fn = filter_pred_comm; + + } else if (field->filter_type == FILTER_STATIC_STRING) { + pred->fn = filter_pred_string; + pred->regex.field_len = field->size; + + } else if (field->filter_type == FILTER_DYN_STRING) + pred->fn = filter_pred_strloc; + else { + + if (!ustring_per_cpu) { + /* Once allocated, keep it around for good */ + ustring_per_cpu = alloc_percpu(struct ustring_buffer); + if (!ustring_per_cpu) + goto err_mem; + } + + if (ustring) + pred->fn = filter_pred_pchar_user; + else + pred->fn = filter_pred_pchar; + } + /* go past the last quote */ + i++; + + } else if (isdigit(str[i]) || str[i] == '-') { + + /* Make sure the field is not a string */ + if (is_string_field(field)) { + parse_error(pe, FILT_ERR_EXPECT_STRING, pos + i); + goto err_free; + } + + if (op == OP_GLOB) { + parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i); + goto err_free; + } + + if (str[i] == '-') + i++; + + /* We allow 0xDEADBEEF */ + while (isalnum(str[i])) + i++; + + len = i - s; + /* 0xfeedfacedeadbeef is 18 chars max */ + if (len >= sizeof(num_buf)) { + parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i); + goto err_free; + } + + strncpy(num_buf, str + s, len); + num_buf[len] = 0; + + /* Make sure it is a value */ + if (field->is_signed) + ret = kstrtoll(num_buf, 0, &val); + else + ret = kstrtoull(num_buf, 0, &val); + if (ret) { + parse_error(pe, FILT_ERR_ILLEGAL_INTVAL, pos + s); + goto err_free; + } + + pred->val = val; + + if (field->filter_type == FILTER_CPU) + pred->fn = filter_pred_cpu; + else { + pred->fn = select_comparison_fn(pred->op, field->size, + field->is_signed); + if (pred->op == OP_NE) + pred->not = 1; + } + + } else { + parse_error(pe, FILT_ERR_INVALID_VALUE, pos + i); + goto err_free; + } + + *pred_ptr = pred; + return i; + +err_free: + kfree(pred); + return -EINVAL; +err_mem: + kfree(pred); + return -ENOMEM; +} + +enum { + TOO_MANY_CLOSE = -1, + TOO_MANY_OPEN = -2, + MISSING_QUOTE = -3, +}; + +/* + * Read the filter string once to calculate the number of predicates + * as well as how deep the parentheses go. + * + * Returns: + * 0 - everything is fine (err is undefined) + * -1 - too many ')' + * -2 - too many '(' + * -3 - No matching quote + */ +static int calc_stack(const char *str, int *parens, int *preds, int *err) +{ + bool is_pred = false; + int nr_preds = 0; + int open = 1; /* Count the expression as "(E)" */ + int last_quote = 0; + int max_open = 1; + int quote = 0; + int i; + + *err = 0; + + for (i = 0; str[i]; i++) { + if (isspace(str[i])) + continue; + if (quote) { + if (str[i] == quote) + quote = 0; + continue; + } + + switch (str[i]) { + case '\'': + case '"': + quote = str[i]; + last_quote = i; + break; + case '|': + case '&': + if (str[i+1] != str[i]) + break; + is_pred = false; + continue; + case '(': + is_pred = false; + open++; + if (open > max_open) + max_open = open; + continue; + case ')': + is_pred = false; + if (open == 1) { + *err = i; + return TOO_MANY_CLOSE; + } + open--; + continue; + } + if (!is_pred) { + nr_preds++; + is_pred = true; + } + } + + if (quote) { + *err = last_quote; + return MISSING_QUOTE; + } + + if (open != 1) { + int level = open; + + /* find the bad open */ + for (i--; i; i--) { + if (quote) { + if (str[i] == quote) + quote = 0; + continue; + } + switch (str[i]) { + case '(': + if (level == open) { + *err = i; + return TOO_MANY_OPEN; + } + level--; + break; + case ')': + level++; + break; + case '\'': + case '"': + quote = str[i]; + break; + } + } + /* First character is the '(' with missing ')' */ + *err = 0; + return TOO_MANY_OPEN; + } + + /* Set the size of the required stacks */ + *parens = max_open; + *preds = nr_preds; + return 0; +} + +static int process_preds(struct trace_event_call *call, + const char *filter_string, + struct event_filter *filter, + struct filter_parse_error *pe) +{ + struct prog_entry *prog; + int nr_parens; + int nr_preds; + int index; + int ret; + + ret = calc_stack(filter_string, &nr_parens, &nr_preds, &index); + if (ret < 0) { + switch (ret) { + case MISSING_QUOTE: + parse_error(pe, FILT_ERR_MISSING_QUOTE, index); + break; + case TOO_MANY_OPEN: + parse_error(pe, FILT_ERR_TOO_MANY_OPEN, index); + break; + default: + parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, index); + } + return ret; + } + + if (!nr_preds) + return -EINVAL; + + prog = predicate_parse(filter_string, nr_parens, nr_preds, + parse_pred, call, pe); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + rcu_assign_pointer(filter->prog, prog); + return 0; +} + +static inline void event_set_filtered_flag(struct trace_event_file *file) +{ + unsigned long old_flags = file->flags; + + file->flags |= EVENT_FILE_FL_FILTERED; + + if (old_flags != file->flags) + trace_buffered_event_enable(); +} + +static inline void event_set_filter(struct trace_event_file *file, + struct event_filter *filter) +{ + rcu_assign_pointer(file->filter, filter); +} + +static inline void event_clear_filter(struct trace_event_file *file) +{ + RCU_INIT_POINTER(file->filter, NULL); +} + +static inline void +event_set_no_set_filter_flag(struct trace_event_file *file) +{ + file->flags |= EVENT_FILE_FL_NO_SET_FILTER; +} + +static inline void +event_clear_no_set_filter_flag(struct trace_event_file *file) +{ + file->flags &= ~EVENT_FILE_FL_NO_SET_FILTER; +} + +static inline bool +event_no_set_filter_flag(struct trace_event_file *file) +{ + if (file->flags & EVENT_FILE_FL_NO_SET_FILTER) + return true; + + return false; +} + +struct filter_list { + struct list_head list; + struct event_filter *filter; +}; + +static int process_system_preds(struct trace_subsystem_dir *dir, + struct trace_array *tr, + struct filter_parse_error *pe, + char *filter_string) +{ + struct trace_event_file *file; + struct filter_list *filter_item; + struct event_filter *filter = NULL; + struct filter_list *tmp; + LIST_HEAD(filter_list); + bool fail = true; + int err; + + list_for_each_entry(file, &tr->events, list) { + + if (file->system != dir) + continue; + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + goto fail_mem; + + filter->filter_string = kstrdup(filter_string, GFP_KERNEL); + if (!filter->filter_string) + goto fail_mem; + + err = process_preds(file->event_call, filter_string, filter, pe); + if (err) { + filter_disable(file); + parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); + append_filter_err(tr, pe, filter); + } else + event_set_filtered_flag(file); + + + filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL); + if (!filter_item) + goto fail_mem; + + list_add_tail(&filter_item->list, &filter_list); + /* + * Regardless of if this returned an error, we still + * replace the filter for the call. + */ + filter_item->filter = event_filter(file); + event_set_filter(file, filter); + filter = NULL; + + fail = false; + } + + if (fail) + goto fail; + + /* + * The calls can still be using the old filters. + * Do a synchronize_rcu() and to ensure all calls are + * done with them before we free them. + */ + tracepoint_synchronize_unregister(); + list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { + __free_filter(filter_item->filter); + list_del(&filter_item->list); + kfree(filter_item); + } + return 0; + fail: + /* No call succeeded */ + list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { + list_del(&filter_item->list); + kfree(filter_item); + } + parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); + return -EINVAL; + fail_mem: + __free_filter(filter); + /* If any call succeeded, we still need to sync */ + if (!fail) + tracepoint_synchronize_unregister(); + list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { + __free_filter(filter_item->filter); + list_del(&filter_item->list); + kfree(filter_item); + } + return -ENOMEM; +} + +static int create_filter_start(char *filter_string, bool set_str, + struct filter_parse_error **pse, + struct event_filter **filterp) +{ + struct event_filter *filter; + struct filter_parse_error *pe = NULL; + int err = 0; + + if (WARN_ON_ONCE(*pse || *filterp)) + return -EINVAL; + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (filter && set_str) { + filter->filter_string = kstrdup(filter_string, GFP_KERNEL); + if (!filter->filter_string) + err = -ENOMEM; + } + + pe = kzalloc(sizeof(*pe), GFP_KERNEL); + + if (!filter || !pe || err) { + kfree(pe); + __free_filter(filter); + return -ENOMEM; + } + + /* we're committed to creating a new filter */ + *filterp = filter; + *pse = pe; + + return 0; +} + +static void create_filter_finish(struct filter_parse_error *pe) +{ + kfree(pe); +} + +/** + * create_filter - create a filter for a trace_event_call + * @call: trace_event_call to create a filter for + * @filter_str: filter string + * @set_str: remember @filter_str and enable detailed error in filter + * @filterp: out param for created filter (always updated on return) + * Must be a pointer that references a NULL pointer. + * + * Creates a filter for @call with @filter_str. If @set_str is %true, + * @filter_str is copied and recorded in the new filter. + * + * On success, returns 0 and *@filterp points to the new filter. On + * failure, returns -errno and *@filterp may point to %NULL or to a new + * filter. In the latter case, the returned filter contains error + * information if @set_str is %true and the caller is responsible for + * freeing it. + */ +static int create_filter(struct trace_array *tr, + struct trace_event_call *call, + char *filter_string, bool set_str, + struct event_filter **filterp) +{ + struct filter_parse_error *pe = NULL; + int err; + + /* filterp must point to NULL */ + if (WARN_ON(*filterp)) + *filterp = NULL; + + err = create_filter_start(filter_string, set_str, &pe, filterp); + if (err) + return err; + + err = process_preds(call, filter_string, *filterp, pe); + if (err && set_str) + append_filter_err(tr, pe, *filterp); + create_filter_finish(pe); + + return err; +} + +int create_event_filter(struct trace_array *tr, + struct trace_event_call *call, + char *filter_str, bool set_str, + struct event_filter **filterp) +{ + return create_filter(tr, call, filter_str, set_str, filterp); +} + +/** + * create_system_filter - create a filter for an event_subsystem + * @system: event_subsystem to create a filter for + * @filter_str: filter string + * @filterp: out param for created filter (always updated on return) + * + * Identical to create_filter() except that it creates a subsystem filter + * and always remembers @filter_str. + */ +static int create_system_filter(struct trace_subsystem_dir *dir, + struct trace_array *tr, + char *filter_str, struct event_filter **filterp) +{ + struct filter_parse_error *pe = NULL; + int err; + + err = create_filter_start(filter_str, true, &pe, filterp); + if (!err) { + err = process_system_preds(dir, tr, pe, filter_str); + if (!err) { + /* System filters just show a default message */ + kfree((*filterp)->filter_string); + (*filterp)->filter_string = NULL; + } else { + append_filter_err(tr, pe, *filterp); + } + } + create_filter_finish(pe); + + return err; +} + +/* caller must hold event_mutex */ +int apply_event_filter(struct trace_event_file *file, char *filter_string) +{ + struct trace_event_call *call = file->event_call; + struct event_filter *filter = NULL; + int err; + + if (file->flags & EVENT_FILE_FL_FREED) + return -ENODEV; + + if (!strcmp(strstrip(filter_string), "0")) { + filter_disable(file); + filter = event_filter(file); + + if (!filter) + return 0; + + event_clear_filter(file); + + /* Make sure the filter is not being used */ + tracepoint_synchronize_unregister(); + __free_filter(filter); + + return 0; + } + + err = create_filter(file->tr, call, filter_string, true, &filter); + + /* + * Always swap the call filter with the new filter + * even if there was an error. If there was an error + * in the filter, we disable the filter and show the error + * string + */ + if (filter) { + struct event_filter *tmp; + + tmp = event_filter(file); + if (!err) + event_set_filtered_flag(file); + else + filter_disable(file); + + event_set_filter(file, filter); + + if (tmp) { + /* Make sure the call is done with the filter */ + tracepoint_synchronize_unregister(); + __free_filter(tmp); + } + } + + return err; +} + +int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, + char *filter_string) +{ + struct event_subsystem *system = dir->subsystem; + struct trace_array *tr = dir->tr; + struct event_filter *filter = NULL; + int err = 0; + + mutex_lock(&event_mutex); + + /* Make sure the system still has events */ + if (!dir->nr_events) { + err = -ENODEV; + goto out_unlock; + } + + if (!strcmp(strstrip(filter_string), "0")) { + filter_free_subsystem_preds(dir, tr); + remove_filter_string(system->filter); + filter = system->filter; + system->filter = NULL; + /* Ensure all filters are no longer used */ + tracepoint_synchronize_unregister(); + filter_free_subsystem_filters(dir, tr); + __free_filter(filter); + goto out_unlock; + } + + err = create_system_filter(dir, tr, filter_string, &filter); + if (filter) { + /* + * No event actually uses the system filter + * we can free it without synchronize_rcu(). + */ + __free_filter(system->filter); + system->filter = filter; + } +out_unlock: + mutex_unlock(&event_mutex); + + return err; +} + +#ifdef CONFIG_PERF_EVENTS + +void ftrace_profile_free_filter(struct perf_event *event) +{ + struct event_filter *filter = event->filter; + + event->filter = NULL; + __free_filter(filter); +} + +struct function_filter_data { + struct ftrace_ops *ops; + int first_filter; + int first_notrace; +}; + +#ifdef CONFIG_FUNCTION_TRACER +static char ** +ftrace_function_filter_re(char *buf, int len, int *count) +{ + char *str, **re; + + str = kstrndup(buf, len, GFP_KERNEL); + if (!str) + return NULL; + + /* + * The argv_split function takes white space + * as a separator, so convert ',' into spaces. + */ + strreplace(str, ',', ' '); + + re = argv_split(GFP_KERNEL, str, count); + kfree(str); + return re; +} + +static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter, + int reset, char *re, int len) +{ + int ret; + + if (filter) + ret = ftrace_set_filter(ops, re, len, reset); + else + ret = ftrace_set_notrace(ops, re, len, reset); + + return ret; +} + +static int __ftrace_function_set_filter(int filter, char *buf, int len, + struct function_filter_data *data) +{ + int i, re_cnt, ret = -EINVAL; + int *reset; + char **re; + + reset = filter ? &data->first_filter : &data->first_notrace; + + /* + * The 'ip' field could have multiple filters set, separated + * either by space or comma. We first cut the filter and apply + * all pieces separatelly. + */ + re = ftrace_function_filter_re(buf, len, &re_cnt); + if (!re) + return -EINVAL; + + for (i = 0; i < re_cnt; i++) { + ret = ftrace_function_set_regexp(data->ops, filter, *reset, + re[i], strlen(re[i])); + if (ret) + break; + + if (*reset) + *reset = 0; + } + + argv_free(re); + return ret; +} + +static int ftrace_function_check_pred(struct filter_pred *pred) +{ + struct ftrace_event_field *field = pred->field; + + /* + * Check the predicate for function trace, verify: + * - only '==' and '!=' is used + * - the 'ip' field is used + */ + if ((pred->op != OP_EQ) && (pred->op != OP_NE)) + return -EINVAL; + + if (strcmp(field->name, "ip")) + return -EINVAL; + + return 0; +} + +static int ftrace_function_set_filter_pred(struct filter_pred *pred, + struct function_filter_data *data) +{ + int ret; + + /* Checking the node is valid for function trace. */ + ret = ftrace_function_check_pred(pred); + if (ret) + return ret; + + return __ftrace_function_set_filter(pred->op == OP_EQ, + pred->regex.pattern, + pred->regex.len, + data); +} + +static bool is_or(struct prog_entry *prog, int i) +{ + int target; + + /* + * Only "||" is allowed for function events, thus, + * all true branches should jump to true, and any + * false branch should jump to false. + */ + target = prog[i].target + 1; + /* True and false have NULL preds (all prog entries should jump to one */ + if (prog[target].pred) + return false; + + /* prog[target].target is 1 for TRUE, 0 for FALSE */ + return prog[i].when_to_branch == prog[target].target; +} + +static int ftrace_function_set_filter(struct perf_event *event, + struct event_filter *filter) +{ + struct prog_entry *prog = rcu_dereference_protected(filter->prog, + lockdep_is_held(&event_mutex)); + struct function_filter_data data = { + .first_filter = 1, + .first_notrace = 1, + .ops = &event->ftrace_ops, + }; + int i; + + for (i = 0; prog[i].pred; i++) { + struct filter_pred *pred = prog[i].pred; + + if (!is_or(prog, i)) + return -EINVAL; + + if (ftrace_function_set_filter_pred(pred, &data) < 0) + return -EINVAL; + } + return 0; +} +#else +static int ftrace_function_set_filter(struct perf_event *event, + struct event_filter *filter) +{ + return -ENODEV; +} +#endif /* CONFIG_FUNCTION_TRACER */ + +int ftrace_profile_set_filter(struct perf_event *event, int event_id, + char *filter_str) +{ + int err; + struct event_filter *filter = NULL; + struct trace_event_call *call; + + mutex_lock(&event_mutex); + + call = event->tp_event; + + err = -EINVAL; + if (!call) + goto out_unlock; + + err = -EEXIST; + if (event->filter) + goto out_unlock; + + err = create_filter(NULL, call, filter_str, false, &filter); + if (err) + goto free_filter; + + if (ftrace_event_is_function(call)) + err = ftrace_function_set_filter(event, filter); + else + event->filter = filter; + +free_filter: + if (err || ftrace_event_is_function(call)) + __free_filter(filter); + +out_unlock: + mutex_unlock(&event_mutex); + + return err; +} + +#endif /* CONFIG_PERF_EVENTS */ + +#ifdef CONFIG_FTRACE_STARTUP_TEST + +#include +#include + +#define CREATE_TRACE_POINTS +#include "trace_events_filter_test.h" + +#define DATA_REC(m, va, vb, vc, vd, ve, vf, vg, vh, nvisit) \ +{ \ + .filter = FILTER, \ + .rec = { .a = va, .b = vb, .c = vc, .d = vd, \ + .e = ve, .f = vf, .g = vg, .h = vh }, \ + .match = m, \ + .not_visited = nvisit, \ +} +#define YES 1 +#define NO 0 + +static struct test_filter_data_t { + char *filter; + struct trace_event_raw_ftrace_test_filter rec; + int match; + char *not_visited; +} test_filter_data[] = { +#define FILTER "a == 1 && b == 1 && c == 1 && d == 1 && " \ + "e == 1 && f == 1 && g == 1 && h == 1" + DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, ""), + DATA_REC(NO, 0, 1, 1, 1, 1, 1, 1, 1, "bcdefgh"), + DATA_REC(NO, 1, 1, 1, 1, 1, 1, 1, 0, ""), +#undef FILTER +#define FILTER "a == 1 || b == 1 || c == 1 || d == 1 || " \ + "e == 1 || f == 1 || g == 1 || h == 1" + DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""), + DATA_REC(YES, 0, 0, 0, 0, 0, 0, 0, 1, ""), + DATA_REC(YES, 1, 0, 0, 0, 0, 0, 0, 0, "bcdefgh"), +#undef FILTER +#define FILTER "(a == 1 || b == 1) && (c == 1 || d == 1) && " \ + "(e == 1 || f == 1) && (g == 1 || h == 1)" + DATA_REC(NO, 0, 0, 1, 1, 1, 1, 1, 1, "dfh"), + DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""), + DATA_REC(YES, 1, 0, 1, 0, 0, 1, 0, 1, "bd"), + DATA_REC(NO, 1, 0, 1, 0, 0, 1, 0, 0, "bd"), +#undef FILTER +#define FILTER "(a == 1 && b == 1) || (c == 1 && d == 1) || " \ + "(e == 1 && f == 1) || (g == 1 && h == 1)" + DATA_REC(YES, 1, 0, 1, 1, 1, 1, 1, 1, "efgh"), + DATA_REC(YES, 0, 0, 0, 0, 0, 0, 1, 1, ""), + DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""), +#undef FILTER +#define FILTER "(a == 1 && b == 1) && (c == 1 && d == 1) && " \ + "(e == 1 && f == 1) || (g == 1 && h == 1)" + DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 0, "gh"), + DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 1, ""), + DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, ""), +#undef FILTER +#define FILTER "((a == 1 || b == 1) || (c == 1 || d == 1) || " \ + "(e == 1 || f == 1)) && (g == 1 || h == 1)" + DATA_REC(YES, 1, 1, 1, 1, 1, 1, 0, 1, "bcdef"), + DATA_REC(NO, 0, 0, 0, 0, 0, 0, 0, 0, ""), + DATA_REC(YES, 1, 1, 1, 1, 1, 0, 1, 1, "h"), +#undef FILTER +#define FILTER "((((((((a == 1) && (b == 1)) || (c == 1)) && (d == 1)) || " \ + "(e == 1)) && (f == 1)) || (g == 1)) && (h == 1))" + DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "ceg"), + DATA_REC(NO, 0, 1, 0, 1, 0, 1, 0, 1, ""), + DATA_REC(NO, 1, 0, 1, 0, 1, 0, 1, 0, ""), +#undef FILTER +#define FILTER "((((((((a == 1) || (b == 1)) && (c == 1)) || (d == 1)) && " \ + "(e == 1)) || (f == 1)) && (g == 1)) || (h == 1))" + DATA_REC(YES, 1, 1, 1, 1, 1, 1, 1, 1, "bdfh"), + DATA_REC(YES, 0, 1, 0, 1, 0, 1, 0, 1, ""), + DATA_REC(YES, 1, 0, 1, 0, 1, 0, 1, 0, "bdfh"), +}; + +#undef DATA_REC +#undef FILTER +#undef YES +#undef NO + +#define DATA_CNT ARRAY_SIZE(test_filter_data) + +static int test_pred_visited; + +static int test_pred_visited_fn(struct filter_pred *pred, void *event) +{ + struct ftrace_event_field *field = pred->field; + + test_pred_visited = 1; + printk(KERN_INFO "\npred visited %s\n", field->name); + return 1; +} + +static void update_pred_fn(struct event_filter *filter, char *fields) +{ + struct prog_entry *prog = rcu_dereference_protected(filter->prog, + lockdep_is_held(&event_mutex)); + int i; + + for (i = 0; prog[i].pred; i++) { + struct filter_pred *pred = prog[i].pred; + struct ftrace_event_field *field = pred->field; + + WARN_ON_ONCE(!pred->fn); + + if (!field) { + WARN_ONCE(1, "all leafs should have field defined %d", i); + continue; + } + + if (!strchr(fields, *field->name)) + continue; + + pred->fn = test_pred_visited_fn; + } +} + +static __init int ftrace_test_event_filter(void) +{ + int i; + + printk(KERN_INFO "Testing ftrace filter: "); + + for (i = 0; i < DATA_CNT; i++) { + struct event_filter *filter = NULL; + struct test_filter_data_t *d = &test_filter_data[i]; + int err; + + err = create_filter(NULL, &event_ftrace_test_filter, + d->filter, false, &filter); + if (err) { + printk(KERN_INFO + "Failed to get filter for '%s', err %d\n", + d->filter, err); + __free_filter(filter); + break; + } + + /* Needed to dereference filter->prog */ + mutex_lock(&event_mutex); + /* + * The preemption disabling is not really needed for self + * tests, but the rcu dereference will complain without it. + */ + preempt_disable(); + if (*d->not_visited) + update_pred_fn(filter, d->not_visited); + + test_pred_visited = 0; + err = filter_match_preds(filter, &d->rec); + preempt_enable(); + + mutex_unlock(&event_mutex); + + __free_filter(filter); + + if (test_pred_visited) { + printk(KERN_INFO + "Failed, unwanted pred visited for filter %s\n", + d->filter); + break; + } + + if (err != d->match) { + printk(KERN_INFO + "Failed to match filter '%s', expected %d\n", + d->filter, d->match); + break; + } + } + + if (i == DATA_CNT) + printk(KERN_CONT "OK\n"); + + return 0; +} + +late_initcall(ftrace_test_event_filter); + +#endif /* CONFIG_FTRACE_STARTUP_TEST */ -- cgit v1.2.3